// STATA code for: "Which countries have graduated from each income group, // and when?" // Matt Juden, CGD, mjuden@cgdev.org // Written 2016-03-31 // Version 2 - Previous version was Version 1 (2016-02-15) // What's new? - Added lines 32-36 and 134-143 version 13 ******************************************************************************** // To run this file: /* 1. Locate the latest version of the World Bank's historical data for operational lending groups, currently hosted at: siteresources.worldbank.org/DATASTATISTICS/Resources/OGHIST.xls Change the url below to point to this data.*/ local wbdata = /// "http://siteresources.worldbank.org/DATASTATISTICS/Resources/OGHIST.xls" /* 2. Set up a working directory containing a subfolder called 'output' and one called 'input', then edit the file path "root" to point to this directory. */ global root "SomeDrive:\Set\Some\Path" global output $root\output global input $root\input /* 3. Change the local below to the value corresponding to the most recent bank financial year recorded in the World Bank's historical data for operational lending groups.*/ local lastYear = "fy2016" ******************************************************************************** // 1. Import and clean data import excel using `wbdata', /// sheet("Country Analytical History") cellrange(A5:AD226) /// firstrow case(lower) clear rename a iso3 rename b country drop if iso3 == "" * Relabel financial year variables with four-digit years forvalues i = 89/99 { local oldvar = "fy`i'" local newvar = "fy19`i'" rename `oldvar' `newvar' } forvalues i = 0/9 { local oldvar = "fy0`i'" local newvar = "fy200`i'" rename `oldvar' `newvar' } forvalues i = 10/16 { local oldvar = "fy`i'" local newvar = "fy20`i'" rename `oldvar' `newvar' } * Replace categorical country groups with integers forvalues i = 1989/2016 { local variable = "fy`i'" replace `variable' = "1" if `variable' == "L" replace `variable' = "2" if `variable' == "LM" replace `variable' = "3" if `variable' == "UM" replace `variable' = "4" if `variable' == "H" destring `variable', replace force } save "$input\WBHistoricalCountryClassifications", replace ******************************************************************************** // 2A. Find graduation dates use "$input\WBHistoricalCountryClassifications", replace * From low income forvalues i = 1990/2016 { local j = `i'-1 local newvar = "lowgrad`i'" local oldvar = "fy`i'" local oldervar = "fy`j'" local date = "`i'" gen `newvar' = `date' if `oldvar' > 1 & `oldervar' == 1 } * From lower-middle income forvalues i = 1990/2016 { local j = `i'-1 local newvar = "lowmidgrad`i'" local oldvar = "fy`i'" local oldervar = "fy`j'" local date = "`i'" gen `newvar' = `date' if `oldvar' > 2 & `oldervar' == 2 } * From upper-middle income forvalues i = 1990/2016 { local j = `i'-1 local newvar = "upmidgrad`i'" local oldvar = "fy`i'" local oldervar = "fy`j'" local date = "`i'" gen `newvar' = `date' if `oldvar' > 3 & `oldervar' == 3 } // 2B. Find the most recent graduation date for each country * N.B. Some countries have graduated, slipped back, and graduated again. egen lowGraduation = rowmax(lowgrad*) egen lowmidGraduation = rowmax(lowmidgrad*) egen upmidGraduation = rowmax(upmidgrad*) // 2C. Format and export replace lowGraduation = lowGraduation - 1 replace lowmidGraduation = lowmidGraduation - 1 replace upmidGraduation = upmidGraduation - 1 /* Graduation dates generated above are based on the World Bank financial year in which the announcement was made. As announcements are made in July, this is one year more than the calendar year. This modification changes graduation dates to the calendar year in which the announcement was made. The graduation is based on financial data from the calendar year before. So, China's graduation (2000) was announced in summer 2000 (Bank FY 2001), based on data from 1999. */ * Check for countries that have slipped back and not re-graduated gen slippedBack = 1 if (lowGraduation != . & `lastYear' < 2) /// | (lowmidGraduation != . & `lastYear' < 3) /// | (upmidGraduation != . & `lastYear' < 4) * At the time of writing, South Sudan is the only case, and is corrected here assert slippedBack == 1 if iso3 == "SSD" replace lowGraduation = . if iso3 == "SSD" replace slippedBack = . if iso3 == "SSD" * Catch problems and throw up error assert slippedBack == . * Drop variables and format for export drop if lowGraduation == . & lowmidGraduation == . & upmidGraduation == . keep country iso3 lowGraduation lowmidGraduation upmidGraduation label variable iso3 "ISO3 value for country" label variable country "World Bank country name" label variable lowGraduation /// "Most recent date of graduation from low income to a higher income group" label variable lowmidGraduation /// "Most recent date of graduation from lower-middle income to a higher income group" label variable upmidGraduation /// "Most recent date of graduation from upper-middle income to a higher income group" sort lowGraduation lowmidGraduation upmidGraduation country outsheet using "$output\graduationDates.csv", comma replace ******************************************************************************** // 3A. Generate a graph of income groups over time use "$input\WBHistoricalCountryClassifications", replace * Generate totals for each income group for each year forvalues i = 1989/2016 { local basevar = "fy`i'" local lvar = "l`i'" local lmvar = "lm`i'" local umvar = "um`i'" local hvar = "h`i'" gen `lvar' = 1 if `basevar' == 1 gen `lmvar' = 1 if `basevar' == 2 gen `umvar' = 1 if `basevar' == 3 gen `hvar' = 1 if `basevar' == 4 } collapse (sum) l1989-h2016 gen anchor = 1 reshape long l lm um h, i(anchor) j(year) drop anchor * Generate proportions of total for each income group /*N.B. Total number of countries changes each year as more countries are added, (never taken away) making proportions more informative than absolute numbers. */ gen sum = l + lm + um + h gen propL = 100 * l / sum gen propLM = 100 * lm / sum gen propUM = 100 * um / sum gen propH = 100 * h / sum * Generate cumulative proportions to force Stata to generate a stacked graph gen propLMgraph = propLM + propL gen propUMgraph = propLMgraph + propUM gen propHgraph = propUMgraph +propH label var propL "Low income" label var propLMgraph "Lower-middle income" label var propUMgraph "Upper-middle income" label var propHgraph "High income" * Export a .csv of the graph points keep year prop* outsheet using "$output\countryGroupProportions.csv", comma replace * Generate the graph and export twoway area propHgraph propUMgraph propLMgraph propL year, yscale(r(0 100)) /// xtitle("Year") ytitle("Proportion of total countries (stacked)") /// title("Proportion of countries in each World Bank income group") gr export "$output\propCountriesPerGroup.png", as(png) width(3000) replace