************************************** * Life History Data Combined - Step2 * ************************************** * Date created: Dec 13, 2007 * Last update: December 30, 2010 12pm * This code continues from 02-27-08, DataStep1.do, which created a combined life history * data set for migrants and nonmigrants over 1984-2000. Here, we add household and village * level variables to the life history data. * Changes in July 08 only include saving more of the remote variables. clear set mem 500m *cd "N:\Temp Stata Files\" cd "/Users/fgarip/Desktop/Combined LH 51 Villages/Temp Stata Files/" **************************************************************************** ******************* 9. OBTAIN HH-LEVEL VARIABLES *************************** **************************************************************************** use "Temp\comb_lh.dta", clear * (a) Remittances reported by household members * ************************************************* * Get the remittance data reported by household members and stored in indiv00 and indiv94. destring hhid94, replace sort hhid94 cep94 merge hhid94 cep94 using "indiv94.dta", keep(q1 q27 q28 q29 q30* q31 q32 q33 q34*) drop if _merge==2 drop _merge * q1 identifies the type of person in the hh; 3 is a migrant * (someone who has moved out of the VILLAGE (not necessarily out of NR) 2 or more months ago) gen mig = . replace mig = 1 if q1==3 & year==1994 replace mig = 0 if (q1==1 | q1==2 | q1==4 | q1==5) & year==1994 lab var mig "person identified as current mig in hh roster (94 or 00)" * q27 identifies if mig sends money to hh in the last 12 months * q30a-q30e identify if mig sends goods (clothing, food, hh appliances, * electric apppliances, vehicles) to hh. gen rm_hh = . replace rm_hh = 1 if q27==1 & year==1994 replace rm_hh = 0 if q27==2 & year==1994 lab var rm_hh "mig sends money to hh? (hh's account)" gen rg_hh = . replace rg_hh = 1 if (q30a==1 | q30b==1 | q30c==1 | q30d==1 | q30e==1) & year==1994 replace rg_hh = 0 if (q30a==2 & q30b==2 & q30c==2 & q30d==2 & q30e==2) & year==1994 lab var rg_hh "mig sends goods to hh? (hh's account)" gen rmg_hh = 1 if (rm_hh==1 | rg_hh==1) & year==1994 replace rmg_hh = 0 if (rm_hh==0 & rg_hh==0) & year==1994 lab var rmg_hh "mig sends money or goods to hh? (hh's account)" * q28 identifies number of times mig sent money * q29 identifies the total amount of money mig sent gen rt_hh = q28 if year==1994 replace rt_hh = . if (rt_hh==98 | rt_hh==99) & year==1994 replace rt_hh = 0 if rm_hh==0 & year==1994 lab var rt_hh "times mig sent money to hh (hh's account)" * There are 17 obs where rt_hh>0 and rm_hh==0, correct replace rm_hh = 1 if rt_hh>0 & rt_hh~=. & rm_hh==0 & year==1994 replace rmg_hh = 1 if rt_hh>0 & rt_hh~=. & rm_hh==0 & year==1994 gen ra_hh = q29 if year==1994 replace ra_hh = . if (ra_hh==8 | ra_hh==9) & year==1994 replace ra_hh = 0 if rm_hh==0 & year==1994 lab var ra_hh "amount of money mig sent to hh (hh's account)" * Amount sent is coded as follows: (1) <1K baht - (2) 1K-3K baht - (3) 3K-5K baht - (4) 5K-10K baht * (5) 10K-20K baht - (6) >20K baht. Take the average of the interval, and 30K as max and estimate * the amount sent. replace ra_hh = 500 if ra_hh==1 & year==1994 replace ra_hh = 2000 if ra_hh==2 & year==1994 replace ra_hh = 4000 if ra_hh==3 & year==1994 replace ra_hh = 7500 if ra_hh==4 & year==1994 replace ra_hh = 15000 if ra_hh==5 & year==1994 replace ra_hh = 30000 if ra_hh==6 & year==1994 * Remittances Sent By Household to Migrant * ******************************************** ** q31 identifies if hh sends money to mig in the last 12 months ** q32 identifies the number of times hh sent money to mig ** q33 identifies the amount of money hh sent mig ** q34a-q34e identify if hh sends goods (clothing, food, hh appliances, ** electric apppliances, vehicles) to mig. gen hrm_hh = . replace hrm_hh = 1 if q31==1 & year==1994 replace hrm_hh = 0 if q31==2 & year==1994 lab var hrm_hh "hh sends money to mig? (hh's account)" gen hrt_hh = q32 replace hrt_hh = . if (hrt_hh==98 | hrt_hh==99) & year==1994 replace hrt_hh = 0 if hrm_hh==0 & year==1994 lab var hrt_hh "times hh sent money to mig (hh's account)" * There are 59 obs where hrt_hh>0 and hrm_hh==0, correct. replace hrm_hh = 1 if hrt_hh>0 & hrt_hh~=. & (hrm_hh==0 | hrm_hh==.) & year==1994 gen hra_hh = q33 if year==1994 replace hra_hh = . if (hra_hh==8 | hra_hh==9) & year==1994 replace hra_hh = 0 if hrm_hh==0 & year==1994 lab var hra_hh "amount of money hh sent to mig (hh's account)" * Amount sent is coded as follows: (1) <1K baht - (2) 1K-3K baht - (3) 3K-5K baht - (4) 5K-10K baht * (5) 10K-20K baht - (6) >20K baht. Take the average of the interval, and 30K as max and estimate * the amount sent. replace hra_hh = 500 if hra_hh==1 & year==1994 replace hra_hh = 2000 if hra_hh==2 & year==1994 replace hra_hh = 4000 if hra_hh==3 & year==1994 replace hra_hh = 7500 if hra_hh==4 & year==1994 replace hra_hh = 15000 if hra_hh==5 & year==1994 replace hra_hh = 30000 if hra_hh==6 & year==1994 gen hrg_hh = 1 if (q34a==1 | q34b==1 | q34c==1 | q34d==1 | q34e==1) & year==1994 replace hrg_hh = 0 if q34a==2 & q34b==2 & q34c==2 & q34d==2 & q34e==2 & year==1994 lab var hrg_hh "hh sends goods to mig? (hh's account)" gen hrmg_hh = 1 if (hrm_hh==1 | hrg_hh==1) & year==1994 replace hrmg_hh = 0 if hrm_hh==0 & hrg_hh==0 & year==1994 lab var hrmg_hh "hh sends money or goods to mig? (hh's account)" * Check if mig and remit variables are only available in year 1994 - no replacements should occur replace rm_hh = . if year~=1994 replace ra_hh = . if year~=1994 replace rt_hh = . if year~=1994 replace rg_hh = . if year~=1994 replace rmg_hh = . if year~=1994 replace hrm_hh = . if year~=1994 replace hra_hh = . if year~=1994 replace hrt_hh = . if year~=1994 replace hrg_hh = . if year~=1994 replace hrmg_hh = . if year~=1994 * Merging 2000 data - Keep the obs for only year 2000 * ******************************************************* sort hhid00 cep00 merge hhid00 cep00 using "indiv00.dta", keep(x1 x25 x26 x27* x28 x29 x30*) drop if _merge==2 drop _merge replace mig = 1 if x1==3 & year==2000 replace mig = 0 if (x1==1 | x1==2 | x1==4) & year==2000 * x25 identifies if mig sends money OR GOODS to hh in the last 12 months * x26 identifies the amount of money migrant sent. * x27_1 - x27_5 identify if mig sends goods (clothing, food, hh appliances, * electric apppliances, vehicles) to hh. replace rmg_hh = 1 if x25==1 & year==2000 replace rmg_hh = 0 if x25==2 & year==2000 replace rg_hh = 1 if (x27_1==1 | x27_2==1 | x27_3==1 | x27_4==1 | x27_5==1) & year==2000 replace rg_hh = 0 if (x27_1==2 & x27_2==2 & x27_3==2 & x27_4==2 & x27_5==2) & year==2000 replace rg_hh = 0 if rmg_hh==0 & year==2000 * There is no question about whether the mig sends money, we infer it from rmg_hh and x26 * (the reported amount sent). replace rm_hh = 0 if rmg_hh==0 & year==2000 replace rm_hh = 0 if x26==0 & year==2000 replace rm_hh = 1 if x26<8 & x26~=. & year==2000 * x26 identifies the total amount of money mig sent replace ra_hh = x26 if x26<8 & year==2000 replace ra_hh = 0 if rm_hh==0 & year==2000 * Remittances Sent By Household to Migrant * ******************************************** ** Note that there is no variable in 2000 roster that records the ** frequency of remittances from hh to individuals. So, hrt variable is missing. replace hrmg_hh = 1 if x28==1 & year==2000 replace hrmg_hh = 0 if x28==2 & year==2000 replace hrg_hh = 1 if (x30_1==1 | x30_2==1 | x30_3==1 | x30_4==1 | x30_5==1) & year==2000 replace hrg_hh = 0 if (x30_1==2 & x30_2==2 & x30_3==2 & x30_4==2 & x30_5==2) & year==2000 replace hrg_hh = 0 if hrmg_hh==0 & year==2000 * There is no question about whether the hh sends money, we infer it from hrmg_hh and x29 * (the reported amount sent). replace hrm_hh = 0 if hrmg_hh==0 & year==2000 replace hrm_hh = 0 if x29==0 & year==2000 replace hrm_hh = 1 if x29<8 & x29~=. & year==2000 * There is one obs where hrm_hh>0 & hrmg_hh==0, correct replace hrmg_hh = 1 if hrm_hh>0 & hrm_hh~=. replace hra_hh = x29 if x29<8 & year==2000 replace hra_hh = 0 if hrm_hh==0 & year==2000 replace ra_hh = 500 if ra_hh==1 replace ra_hh = 2000 if ra_hh==2 replace ra_hh = 4000 if ra_hh==3 replace ra_hh = 7500 if ra_hh==4 replace ra_hh = 15000 if ra_hh==5 replace ra_hh = 30000 if ra_hh==6 replace ra_hh = 50000 if ra_hh==7 replace hra_hh = 500 if hra_hh==1 replace hra_hh = 2000 if hra_hh==2 replace hra_hh = 4000 if hra_hh==3 replace hra_hh = 7500 if hra_hh==4 replace hra_hh = 15000 if hra_hh==5 replace hra_hh = 30000 if hra_hh==6 replace hra_hh = 50000 if hra_hh==7 drop q1 q27 q28 q29 q30* q31 q32 q33 q34* x1 x25 x26 x27* x28 x29 x30* * Check if mig and remit variables are only available in years 1994 and 2000 - no replacements should occur replace mig = . if year~=1994 & year~=2000 replace rm_hh = . if year~=1994 & year~=2000 replace ra_hh = . if year~=1994 & year~=2000 replace rt_hh = . if year~=1994 & year~=2000 replace rg_hh = . if year~=1994 & year~=2000 replace rmg_hh = . if year~=1994 & year~=2000 replace hrm_hh = . if year~=1994 & year~=2000 replace hra_hh = . if year~=1994 & year~=2000 replace hrt_hh = . if year~=1994 & year~=2000 replace hrg_hh = . if year~=1994 & year~=2000 replace hrmg_hh = . if year~=1994 & year~=2000 ************************ ** (b) Household Land ** ************************ * IMPORTANT NOTE - The codes 9999 or 9998 meaning "don't know" are set to missing. We assume that hh land from 1984 to 1993 is * captured in hh84.dta, from 1994 to 1999 in hh94.dta, and finally 2000 in plots00.dta. See the notes below to see the assumptions * made about the missing values (.) in the land variable. (To summarize, first, if land94 or land00 is available, but land84 is * missing, assume that hh had zero land in 1984. Similarly, if land00 is available, but land94 is missing, assume that hh had zero * land in 1994. Second, if land84 is available, but land94 is missing, assume land84 stays constant. Similarly, if land94 is available, * but land00 is missing, assume land94 stays constant. sort vill84 house84 merge vill84 house84 using "hh84.dta", keep(hh84_33) drop if _merge==2 drop _merge ******** * 1984 * ******** gen land84 = hh84_33 replace land84 = . if land84==999 * Note - in 1994, land=0 means less than 1 rai. Assume that hh with land=0 have 0.5 * (average of 0-1 interval) land. replace land84 = 0.5 if land84==0 * Note - in 1984, no land is recorded as '998' replace land84 = 0 if land84==998 lab var land84 "land (1984) owned in rai (incl house area)" drop hh84_33 ******** * 1994 * ******** sort hhid94 merge hhid94 using "hh94.dta", keep (q6_14) drop if _merge==2 drop _merge gen land94 = q6_14 replace land94 = . if land94==99995 | land94==99999 * Convert to rai (1rai=1600m2; 1wa=4m2; 1rai=400wa) replace land94 = land94/400 lab var land94 "land (1994) owned in rai (incl house area)" drop q6_14 sort hhid00 save "Temp\combt1.dta", replace ******** * 2000 * ******** use "plots00.dta", clear * NOTE - hh00.dta records the number of plangs the hh owns (1-10). Plang is not unit of * measurement, but just identifies a single contiguous piece of land, which does not * include the land where the respondent hh's house is located. hh00.dta does not record * how many rai of land a hh owns. Instead, in plots00, an area measurement of each plang is * recorded. So, I use plots00.dta, and sum up the area of all plangs owned by a hh (based on * hhid00). Note that q6_14rai records how many rai each plang (identified by plang00 id for a hhid00) * is. If this variable is missing (=9999), the respondent is asked to guess the area, which is * recorded in q6_15ra. Below, I combine information from both of these questions to construct * the land00 variable. * If land variable is missing (x6_14rai=9999), take the values 'estimated' by respondent in x6_15rai. replace x6_14rai = x6_15rai if x6_14rai==9999 replace x6_14rai = . if x6_14rai==9999 | x6_14rai==. bys hhid00: egen land00 = total(x6_14rai) bys hhid00: keep if _n==1 keep hhid00 land00 * NOTE - This land00 variable excludes the area of the house location. (land84 and land94 include that - * so we need to use the information in hh00.dta (x6_58rai and x6_59rai) and add it to the land00 variable!!! merge hhid00 using "hh00.dta", keep(x6_58rai x6_59rai) drop _merge * NOTE - We keep all the obs from master and using data, some hhs have no records in plots00.dta * because they only own the land of their home (or no land at all). * If house area variable is missing, take the estimated value by the respondent. * Then add the house area to other land area. replace x6_58rai = x6_59rai if x6_58rai==2999 | x6_58rai==9999 replace x6_58rai = . if x6_58rai==9999 replace land00 = land00 + x6_58rai if land00~=. & x6_58rai~=. replace land00 = x6_58rai if land00==. & x6_58rai~=. lab var land00 "land (2000) owned in rai (incl house area)" sort hhid00 keep hhid00 land00 save "Temp\land00.dta", replace use "Temp\combt1.dta", clear sort hhid00 merge hhid00 using "Temp\land00.dta" drop if _merge==2 drop _merge * Get the missing land variables by using non-missing values of other hh members. * NOTE - Sort command places nonmissing values first for integer or real data. * No changes made! sort hhid00 land00 by hhid00: replace land00 = land00[1] if land00[1]~=. & land00==. sort hhid94 land94 by hhid94: replace land94 = land94[1] if land94[1]~=. & land94==. sort vill84 house84 land84 by vill84 house84: replace land84 = land84[1] if land84[1]~=. & land84==. * If land94 or land00 is available, but land84 is missing, assume that hh had zero land in 1984. replace land84 = 0 if land84==. & (land94~=. | land00~=.) * If land00 is available, but land94 is missing, assume that hh had zero land in 1994. replace land94 = 0 if land94==. & land00~=. gen land = land84 if year>=1984 & year<1994 replace land = land94 if year>=1994 & year<2000 replace land = land00 if year==2000 * If land84 is available, but land94 is missing, assume land84 stays constant. replace land = land84 if year>=1994 & year<2000 & land==. * If land94 is available, but land00 is missing, assume land94 stays constant. replace land = land94 if year==2000 & land==. replace land = land84 if year==2000 & land==. & land94==. lab var land "land owned in rai" save "Temp\combt2.dta", replace ******************************* **** (c) Household Durables *** ******************************* * We need to obtain household durables item-by-item for the PCA analysis in * the development paper. We also need to compute aggregate indicators for the * remittances paper. ******** * 1984 * ******** use "Temp\combt2.dta", replace sort vill84 house84, stable merge vill84 house84 using "hh84.dta",keep(hh84_05 hh84_20 hh84_21 hh84_22 hh84_23 hh84_24 /// hh84_25 hh84_26 hh84_27 hh84_28 hh84_29 hh84_30 /// hh84_31 hh84_32 hh84_38 hh84_40 hh84_42 hh84_44 /// hh84_46 hh84_48 hh84_50 hh84_08) drop if _merge==2 drop _merge * Note when merged to the combined life history data, hh84 values for hh assets and durables have about 40K * missing observations. But the number of missing for cases with non-missing remittance indicators (i.e., rm~=.) * is close to zero. In short, the missing observations for hh durables do not cause us to lose more observations * in the remittance and development papers. * House Quality * ***************** gen hqual84 = 0 replace hqual84 = . if hh84_05==9 | hh84_05==. replace hqual84 = 1 if hh84_05==5 | hh84_05==6 | hh84_05==7 lab var hqual84 "house quality 84 (1: Good - two stories wood or brick)" * Cooking Fuel * **************** * Note in the variables below, "no" is coded as 8. gen ckfuel84 = 0 replace ckfuel = 1 if hh84_22==1 | hh84_23==1 | hh84_24==1 replace ckfuel = . if hh84_22==. | hh84_23==. | hh84_24==. lab var ckfuel84 "cooking fuel 84 (1: gas or elec 0: coal, wood or husk)" * HH electricity * ****************** gen helec84 = 0 replace helec84 = 1 if hh84_25==1 | hh84_25==2 replace helec84 = . if hh84_25==. | hh84_25==9 lab var helec84 "hh has electricity 84" * HH Assets * ************* gen tv84 = hh84_26 replace tv84 = 0 if hh84_26==8 replace tv84 = . if hh84_26==9 lab var tv84 "# of tv's in hh 84" gen fridge84 = hh84_27 replace fridge84 = 0 if hh84_27==8 replace fridge84 = . if hh84_27==9 lab var fridge84 "# of fridges in hh 84" gen pump84 = hh84_28 replace pump84 = 0 if hh84_28==8 replace pump84 = . if hh84_28==9 lab var pump84 "# of water pumps in hh 84" gen itan84 = hh84_29 replace itan84 = 0 if hh84_29==8 replace itan84 = . if hh84_29==9 lab var itan84 "# of itans (agri cars) in hh 84" gen car84 = hh84_30 replace car84 = 0 if hh84_30==8 replace car84 = . if hh84_30==9 lab var car84 "# of cars in hh 84" gen mcyc84 = hh84_31 replace mcyc84 = 0 if hh84_31==8 replace mcyc84 = . if hh84_31==9 lab var mcyc84 "# of motorcycles in hh 84" gen latrine84 = 0 if hh84_32==3 | hh84_32==4 | hh84_32==5 |hh84_32==8 replace latrine84 = 1 if hh84_32==1 | hh84_32==2 lab var latrine84 "latrine (wc) inside or outside the house 84" * Cattle * ********** gen cow84 = hh84_38 replace cow84 = 0 if hh84_38==98 replace cow84 = . if hh84_38==99 gen lncow84 = ln(cow84) if cow84>0 replace lncow84 = 0 if cow84==0 lab var cow84 "# of cows 84" lab var lncow84 "logged # of cows 84" gen buff84 = hh84_40 replace buff84 = 0 if hh84_40==98 replace buff84 = . if hh84_40==99 gen lnbuff84 = ln(buff84) if buff84>0 replace lnbuff84 = 0 if buff84==0 lab var buff84 "# of buffalos 84" lab var lnbuff84 "logged # of buffalo 84" gen pig84 = hh84_42 replace pig84 = 0 if hh84_42==98 replace pig84 = . if hh84_42==99 gen lnpig84 = ln(pig84) if pig84>0 replace lnpig84 = 0 if pig84==0 lab var pig84 "# of pigs 84" lab var lnpig84 "logged # of pigs 84" gen geese84 = hh84_44 replace geese84 = 0 if hh84_44==98 replace geese84 = . if hh84_44==99 gen lngeese84 = ln(geese84) if geese84>0 replace lngeese84 = 0 if geese84==0 lab var geese84 "# of geese 84" lab var lngeese84 "logged # of geese 84" gen duck84 = hh84_46 replace duck84 = 0 if hh84_46==98 replace duck84 = . if hh84_46==99 gen lnduck84 = ln(duck84) if duck84>0 replace lnduck84 = 0 if duck84==0 lab var duck84 "# of ducks 84" lab var lnduck84 "logged # of ducks 84" gen chick84 = hh84_48 replace chick84 = 0 if hh84_48==98 replace chick84 = . if hh84_48==99 gen lnchick84 = ln(chick84) if chick84>0 replace lnchick84 = 0 if chick84==0 lab var chick84 "# of chicken 84" lab var lnchick84 "logged # of chicken 84" gen fish84 = hh84_50 replace fish84 = 0 if hh84_50==98 replace fish84 = . if hh84_50==99 gen lnfish84 = ln(fish84) if fish84>0 replace lnfish84 = 0 if fish84==0 lab var fish84 "# of fish ponds 84" lab var lnfish84 "logged # of fish ponds 84" * Water Piped to HH * ********************* gen water84 = 1 if hh84_08==1 | hh84_08==2 |hh84_08==3 replace water84 = 0 if hh84_08==8 lab var water84 "water piped to house 84" drop hh84_05 hh84_20 hh84_21 hh84_22 hh84_23 hh84_24 hh84_25 hh84_26 hh84_27 hh84_28 /// hh84_29 hh84_30 hh84_31 hh84_32 hh84_38 hh84_40 hh84_42 hh84_46 hh84_48 hh84_50 hh84_08 *sum hqual84 ckfuel84 helec84 tv84 fridge84 pump84 itan84 car84 mcyc84 latrine84 /// * cow84 buff84 pig84 geese84 duck84 chick84 fish84 lncow84 lnbuff84 lnpig84 /// * lngeese84 lnduck84 lnchick84 lnfish84 water84 *codebook hqual84 ckfuel84 helec84 tv84 fridge84 pump84 itan84 car84 mcyc84 latrine84 /// * cow84 buff84 pig84 geese84 duck84 chick84 fish84 lncow84 lnbuff84 lnpig84 /// * lngeese84 lnduck84 lnchick84 lnfish84 water84 if rm~=. save "Temp\combt3.dta", replace ******** * 1994 * ******** use "Temp\combt3.dta", replace sort hhid94 merge hhid94 using "hh94.dta", keep(q6_44 q6_45a_1 q6_45c_* q6_5* window q6_1a q6_2a q6_2b q6_2c q6_2d q6_2e q6_3 /// q6_10a q6_10b q6_10c q6_5a1 q6_5b1 q6_5c1 q6_5d1 q6_5e1 q6_5f1 q6_5g1 q6_5h1 /// q6_13a2 q6_13b2 q6_13c2 q6_13d2 q6_13e2) drop if _merge==2 drop _merge * HH Debt * *********** * Note - hh debt information is only available in hh94, not in 1984 nor in 2000. * A household may have a number of debts to different sources, 5 of these are recorded. * We sum all 5 entries to compute the total debt. gen debt94 = q6_44 replace debt94 = 0 if debt==2 lab var debt94 "hh has any debt in 94?" replace q6_45c_1 = . if q6_45c_1==9999998 | q6_45c_1==9999999 replace q6_45c_2 = . if q6_45c_2==9999998 | q6_45c_1==9999999 replace q6_45c_3 = . if q6_45c_3==9999998 | q6_45c_1==9999999 replace q6_45c_4 = . if q6_45c_4==9999998 | q6_45c_1==9999999 replace q6_45c_5 = . if q6_45c_5==9999998 | q6_45c_1==9999999 egen mdebt94 = rowtotal(q6_45c_1 q6_45c_2 q6_45c_3 q6_45c_4 q6_45c_5) * Because rowtotal() treats missing as 0, there are no missing obs for mdebt94. * Set it to missing if debt94==. replace mdebt94 = . if debt94==. * There are 174 observations that have debt but the amount seems 0. * Those debts may be in fertilizer or rice, not money. replace debt94 = 0 if debt94==1 & mdebt94==0 replace mdebt94 = mdebt94/1000 lab var mdebt94 "amount of money owed (in 1000 baht)" * Window * ********** gen window94 = . replace window94 = 0 if window<4 replace window94 = 1 if window>=4 & window<=7 lab var window94 "House has bug screens or glass panes 94" * HH Elec * *********** gen helec94 = . replace helec94 = 0 if q6_1a==2 replace helec94 = 1 if q6_1a==1 lab var helec94 "hh has electricity 94" * Cooking Fuel * **************** gen ckfuel94 = . replace ckfuel94 = 0 if q6_2a==1 | q6_2b==1 replace ckfuel94 = 1 if q6_2c==1 | q6_2d==1 | q6_2e==1 lab var ckfuel94 "cooking fuel 84 (1: gas or elec 0: coal, wood or husk)" * Water Piped to House * ************************ gen water94 = . replace water94 = 0 if q6_3==2 replace water94 = 1 if q6_3==1 lab var water94 "water piped to house 94" * Cattle * ********** * If q6_10a begins with 3+, the number covers all the cows (for sale and consumption) * 1+ codes cows for consumption, 2+ codes cows for sale. First get rid of the 'purpose' * codes, we only need the actual numbers. The same applies to q6_10b and q6_10c. gen cow94 = q6_10a replace cow94 = . if q6_10a==9999 replace cow94 = cow94 - 1000 if cow94>=1000 & cow94<2000 replace cow94 = cow94 - 2000 if cow94>=2000 & cow94<3000 replace cow94 = cow94 - 3000 if cow94>=3000 & cow94<4000 gen lncow94 = . replace lncow94 = ln(cow94) if cow94>0 replace lncow94 = 0 if cow94==0 lab var cow94 "# of cows 94" lab var lncow94 " logged # of cows 94" gen buff94 = q6_10b replace buff94 = . if q6_10b==9999 replace buff94 = buff94 - 1000 if buff94>=1000 & buff94<2000 replace buff94 = buff94 - 2000 if buff94>=2000 & buff94<3000 replace buff94 = buff94 - 3000 if buff94>=3000 & buff94<4000 gen lnbuff94 = . replace lnbuff94 = ln(buff94) if buff94>0 replace lnbuff94 = 0 if buff94==0 lab var buff94 "# of buffalos 94" lab var lnbuff94 " logged # of buffalos 94" gen pig94 = q6_10c replace pig94 = . if q6_10c==9999 replace pig94 = pig94 - 1000 if pig94>=1000 & pig94<2000 replace pig94 = pig94 - 2000 if pig94>=2000 & pig94<3000 replace pig94 = pig94 - 3000 if pig94>=3000 & pig94<4000 gen lnpig94 = . replace lnpig94 = ln(pig94) if pig94>0 replace lnpig94 = 0 if pig94==0 lab var pig94 "# of pigs 94" lab var lnpig94 " logged # of pigs 94" * HH Assets * ************* gen tv94 = q6_5a1 + q6_5b1 lab var tv94 "# of tvs in hh 94" gen vcr94 = q6_5c1 lab var vcr94 "# of vcrs in hh 94" gen fridge94 = q6_5d1 lab var fridge94 "# of refrigerators in hh 94" gen itan94 = q6_5e1 lab var itan94 "# of itans in hh 94" gen car94 = q6_5f1 lab var car94 "# of cars in hh 94" gen mcyc94 = q6_5g1 lab var mcyc94 "# of motorcycles in hh 94" gen sew94 = q6_5h1 lab var sew94 "# of sewing machines in hh 94" * Farming devices * ******************* * Assume N/A (=8) means no tractor. 2 is no tractor. * Note - different than 2000 data, 94 data does not ask the number of * tractors, but rather asks "own or not?" replace q6_13a2 = 0 if q6_13a2==8 replace q6_13b2 = 0 if q6_13b2==8 replace q6_13c2 = 0 if q6_13c2==8 replace q6_13d2 = 0 if q6_13d2==8 replace q6_13e2 = 0 if q6_13e2==8 replace q6_13a2 = 0 if q6_13a2==2 replace q6_13b2 = 0 if q6_13b2==2 replace q6_13c2 = 0 if q6_13c2==2 replace q6_13d2 = 0 if q6_13d2==2 replace q6_13e2 = 0 if q6_13e2==2 gen tract94 = q6_13a2 + q6_13b2 lab var tract94 "# of large or small tractors 94" gen gen94 = q6_13c2 if q6_13c2~=8 & q6_13c2~=. lab var gen94 "# of electric generators 94" gen pump94 = q6_13d2 if q6_13d2~=8 & q6_13d2~=. lab var pump94 "# of water pumps in 94" gen thresh94 = q6_13e2 if q6_13e2~=8 & q6_13e2~=. lab var thresh94 "# of rice threshers 94" drop window q6_1a q6_2a q6_2b q6_2c q6_2d q6_2e q6_3 q6_10a q6_10b q6_10c q6_5* q6_13* *sum window94 helec94 ckfuel94 water94 cow94 buff94 pig94 lncow94 lnbuff94 lnpig94 /// * tv94 vcr94 fridge94 itan94 car94 mcyc94 sew94 tract94 gen94 pump94 thresh94 *codebook window94 helec94 ckfuel94 water94 cow94 buff94 pig94 lncow94 lnbuff94 lnpig94 /// * tv94 vcr94 fridge94 itan94 car94 mcyc94 sew94 tract94 gen94 pump94 thresh94 save "Temp\combt4.dta", replace ******** * 2000 * ******** use "Temp\combt4.dta", clear sort hhid00, stable merge hhid00 using "hh00.dta", keep(story window* x6_1_* x6_2 x6_4a* x6_10a* x6_10b* x6_76 x6_76t1 x6_76b1 x6_76r1 x6_76t2 x6_76b2 x6_76r2) drop if _merge==2 drop _merge * House Quality * ***************** gen hqual00 = 1 if story==2 replace hqual00 = 0 if story==1 lab var hqual00 "house quality 00 (1: Good - two stories wood or brick)" * Window * ********** gen window00 = 0 replace window00 = 1 if window4==1 | window5==1 | window6==1 replace window00 = . if window1==. | window1==9 lab var window00 "House has bug screens or glass panes 00" * Cooking Fuel * **************** gen ckfuel00 = 0 replace ckfuel00 = 1 if x6_1_3==1 | x6_1_4==1 | x6_1_5==1 replace ckfuel00 = . if x6_1_1==9 | x6_1_1==. lab var ckfuel00 "cooking fuel 00 (1: gas or elec 0: coal, wood or husk)" * Water Piped to House * ************************ gen water00 = 0 replace water00 = 1 if x6_2==1 replace water00 = . if x6_2==. | x6_2==9 lab var water00 "water piped into house 00" * HH Assets * ************* replace x6_4a1 = . if x6_4a1==99 replace x6_4a2 = . if x6_4a2==99 gen tv00 = x6_4a1 + x6_4a2 lab var tv00 "# of tvs in hh 00" gen vcr00 = x6_4a3 replace vcr00 = . if x6_4a3==99 lab var vcr00 "# of vcrs in hh 00" gen cell00 = x6_4a4 replace cell00 = . if x6_4a4==99 lab var cell00 "# of mobile phones in hh 00" gen phone00 = x6_4a5 replace phone00 = . if x6_4a5==99 lab var phone00 "# of telephones in hh 00" gen comp00 = x6_4a6 replace comp00 = . if x6_4a6==99 lab var comp00 "# of computer/laptops in hh 00" gen sat00 = x6_4a7 replace sat00 = . if x6_4a7==99 lab var sat00 "# of satellite dishes in hh 00" gen micro00 = x6_4a8 replace micro00 = . if x6_4a8==99 lab var micro00 "# of microwaves in hh 00" gen wash00 = x6_4a9 replace wash00 = . if x6_4a9==99 lab var wash00 "# of washing machines in hh 00" gen ac00 = x6_4a10 replace ac00 = . if x6_4a10==99 lab var ac00 "# of air conditioners in hh 00" replace x6_4a11 = . if x6_4a11==99 replace x6_4a12 = . if x6_4a12==99 gen fridge00 = x6_4a11 + x6_4a12 lab var fridge00 "# of refrigerators (big and small) in hh 00" gen itan00 = x6_4a13 replace itan00 = . if x6_4a13==99 lab var itan00 "# of itans (agri cars) in hh 00" gen bcyc00 = x6_4a14 replace bcyc00 = . if x6_4a14==99 lab var bcyc00 "# of bicycles in hh 00" replace x6_4a15 = . if x6_4a15==99 replace x6_4a16 = . if x6_4a16==99 gen mcyc00 = x6_4a15 + x6_4a16 lab var mcyc00 "# of motorcycles in hh 00" replace x6_4a17 = . if x6_4a17==99 replace x6_4a18 = . if x6_4a18==99 replace x6_4a19 = . if x6_4a19==99 gen car00 = x6_4a17 + x6_4a18 + x6_4a19 lab var car00 "# of cars in hh 00" gen sew00 = x6_4a20 replace sew00 = . if x6_4a20==99 lab var sew00 "# of sewing machines in hh 00" * Cattle * ********** * In all the cattle variables, missing means zero. (The qns were only asked * of those that answered 'yes' to raise or not qn prior, x6_10a1.) gen cow00 = x6_10b1 replace cow00 = 0 if x6_10a1~=. & cow00==. replace cow00 = . if cow00==9999 gen lncow00 = ln(cow00) if cow00>0 replace lncow00 = 0 if cow00==0 lab var lncow00 "# of logged cows 00" lab var cow00 "# of cows 00" gen buff00 = x6_10b2 replace buff00 = 0 if x6_10a2~=. & buff00==. replace buff00 = . if buff00==9999 gen lnbuff00 = ln(buff00) if buff00>0 replace lnbuff00 = 0 if buff00==0 lab var lnbuff00 "# of logged buffalos 00" lab var buff00 "# of buffalos 00" gen pig00 = x6_10b3 replace pig00 = 0 if x6_10a3~=. & pig00==. replace pig00 = . if pig00==9999 gen lnpig00 = ln(pig00) if pig00>0 replace lnpig00 = 0 if pig00==0 lab var lnpig00 "# of logged pigs 00" lab var pig00 "# of pigs 00" gen duck00 = x6_10b4 replace duck00 = 0 if x6_10a4~=. & duck00==. replace duck00 = . if duck00==9999 gen lnduck00 = ln(duck00) if duck00>0 replace lnduck00 = 0 if duck00==0 lab var lnduck00 "# of logged ducks 00" lab var duck00 "# of ducks 00" gen chick00 = x6_10b5 replace chick00 = 0 if x6_10a5~=. & chick00==. replace chick00 = . if chick00==9999 gen lnchick00 = ln(chick00) if chick00>0 replace lnchick00 = 0 if chick00==0 lab var lnchick00 "# of logged chicken 00" lab var chick00 "# of chicken 00" gen fish00 = x6_10b6 replace fish00 = 0 if x6_10a6~=. & fish00==. replace fish00 = . if fish00==9999 | fish00==99999 gen lnfish00 = ln(fish00) if fish00>0 replace lnfish00 = 0 if fish00==0 lab var lnfish00 "# of loggedfish raised in sq wa pond 00" lab var fish00 "# of fish raised in sq wa pond 00" * Farming Devices * ******************* * In all the below variables, missing means zero. (The qns were only asked * of those that answered 'yes' to have or not qn prior, x6_76.) replace x6_76t1 = 0 if x6_76==2 replace x6_76b1 = 0 if x6_76==2 replace x6_76r1 = 0 if x6_76==2 replace x6_76t1 = 0 if x6_76t1==9 replace x6_76b1 = 0 if x6_76b1==9 replace x6_76r1 = 0 if x6_76r1==9 * x6_76t2 records if the hh *owns* the number of equipment listed in x6_76t1. * This variable is missing for hh's that report zero tractor in x6_76t1. * Keep the number of equipment in x6_76t1 only if it is owned by the hh, that is x6_76t2==1 replace x6_76t1 = 0 if x6_76t2==2 replace x6_76b1 = 0 if x6_76b2==2 replace x6_76r1 = 0 if x6_76r2==2 gen tract00 = 0 replace tract00 = x6_76t1 + x6_76b1 lab var tract00 "# of tractors (small or large) 00" gen thresh00 = 0 replace thresh00 = x6_76r1 lab var thresh00 "# of rice threshers 00" drop story window* x6_1_* x6_2 x6_4a* x6_10a* x6_10b* x6_76 x6_76t1 x6_76b1 x6_76r1 x6_76t2 x6_76b2 x6_76r2 *sum hqual00 window00 ckfuel00 water00 tv00 vcr00 cell00 phone00 comp00 sat00 /// * micro00 wash00 ac00 fridge00 itan00 bcyc00 mcyc00 car00 sew00 cow00 buff00 /// * pig00 duck00 chick00 fish00 lncow00 lnbuff00 lnpig00 lnduck00 lnchick00 lnfish00 /// * tract00 thresh00 *codebook hqual00 window00 ckfuel00 water00 tv00 vcr00 cell00 phone00 comp00 sat00 /// * micro00 wash00 ac00 fridge00 itan00 bcyc00 mcyc00 car00 sew00 cow00 buff00 /// * pig00 duck00 chick00 fish00 lncow00 lnbuff00 lnpig00 lnduck00 lnchick00 lnfish00 /// * tract00 thresh00 ************** * Aggregates * ************** * In all three waves, we count the hh goods as well as farming equipment as durables. * For 1984, we count tv, fridge, pump, itan, car, mcyc, latrine. * For 1994, we count tv, vcr, fridge, itan, car, mcyc, sewing maching, tractor, generator, pump and thresher. * For 2000, we count the above plus computerm, phone, cell, satellite, microwave, bicycle and ac. gen hhdi84 = tv84 + fridge84 + pump84 + itan84 + car84 + mcyc84 + latrine84 gen hhdi94 = tv94 + vcr94 + fridge94 + itan94 + car94 + mcyc94 + sew94 + tract94 + gen94 + pump94 + thresh94 gen hhdi00 = tv00 + vcr00 + cell00 + phone00 + comp00 + sat00 + micro00 + wash00 + ac00 + fridge00 + itan00 + bcyc00 + mcyc00 + /// tract00 + thresh00 + car00 + sew00 lab var hhdi84 "hh durables index 1984" lab var hhdi94 "hh durables index 1984" lab var hhdi00 "hh durables index 2000" ** NOTE - In most of the cases (3968/4274), hhdi00 index is higher than hhdi94. ** We generate an overall durables indicator that equals hhdi94 in 1994, and hhdi00 in 2000. gen hhdi = hhdi94 if year==1994 replace hhdi = hhdi00 if year==2000 lab var hhdi "hh durables index in year" gen hhdilag = hhdi84 if year==1994 replace hhdilag = hhdi94 if year==2000 lab var hhdilag "hh durables index (lagged)" save "Temp\combt5.dta", replace ****************************************** **** (d) Household Economic Activities *** ****************************************** * We obtain the types of economic activities hh is involved in (other than farming) such as * silk weaving, silkworm raising, food preservation, bamboo and basket weaving, vegetable garden, * or other activity. Note the measured economic activities are different across survey waves. * In 1984 the above-mentioned activities are measured, in 1994 silk weaving, silkworm raising, cloth * weawing and charcoal making are recorded. In 2000, use "Temp\combt5.dta", clear * 1984 * ******** sort vill84 house84, stable merge vill84 house84 using "hh84.dta", keep(hh84_51 hh84_52 hh84_53 hh84_54 hh84_55 hh84_56) drop if _merge==2 drop _merge gen cloth84 = 1 if hh84_51<=3 replace cloth84 = 0 if hh84_51==8 gen worm84 = 1 if hh84_52<=3 replace worm84 = 0 if hh84_52==8 gen foodp84 = 1 if hh84_53<=3 replace foodp84 = 0 if hh84_53==8 gen bamboo84 = 1 if hh84_54<=3 replace bamboo84 = 0 if hh84_54==8 gen garden84 = 1 if hh84_55<=3 replace garden84 = 0 if hh84_55==8 gen othecon84 = 1 if hh84_56<=3 replace othecon84 = 0 if hh84_56==8 drop hh84_51 hh84_52 hh84_53 hh84_54 hh84_55 hh84_56 * 1994 * ******** sort hhid94 merge hhid94 using "hh94.dta", keep(q6_4*) drop if _merge==2 drop _merge gen silk94 = 1 if q6_4sa>=1 & q6_4sa<=3 replace silk94 = 0 if q6_4sa==0 gen worm94 = 1 if q6_4wa>=1 & q6_4wa<=3 replace worm94 = 0 if q6_4wa==0 gen cloth94 = 1 if q6_4oa>=1 & q6_4oa<=3 replace cloth94 = 0 if q6_4oa==0 gen coal94 = 1 if q6_4ca>=1 & q6_4ca<=3 replace coal94 = 0 if q6_4ca==0 * Number of hh members involved in these activities gen msilk94 = q6_4sb if q6_4sb<=7 replace msilk94 = 0 if q6_4sb==8 gen mworm94 = q6_4wb if q6_4wb<=7 replace mworm94 = 0 if q6_4wb==8 gen mcloth94 = q6_4ob if q6_4ob<=7 replace mcloth94 = 0 if q6_4ob==8 gen mcoal94 = q6_4cb if q6_4cb<=7 replace mcoal94 = 0 if q6_4cb==8 * There are 251 observations with mcoal94=0, but coal94=0. Correct. replace coal94 = 0 if mcoal94==0 drop q6_4* * 2000 * ******** sort hhid00, stable merge hhid00 using "hh00.dta", keep(x6_3*) drop if _merge==2 drop _merge gen silk00 = 1 if x6_3a1>0 & x6_3a1<=3 replace silk00 = 0 if x6_3a1==0 gen worm00 = 1 if x6_3a2>0 & x6_3a2<=3 replace worm00 = 0 if x6_3a2==0 gen cloth00 = 1 if x6_3a3>0 & x6_3a3<=3 replace cloth00 = 0 if x6_3a3==0 gen coal00 = 1 if x6_3a4>0 & x6_3a4<=3 replace coal00 = 0 if x6_3a4==0 gen wood00 = 1 if x6_3a5>0 & x6_3a5<=3 replace wood00 = 0 if x6_3a5==0 * Number of hh members involved in these activities * This is only recorded for hhs involved in the above activities. * So, set msilk00=0 if silk00=0. gen msilk00 = x6_3b1 if x6_3b1~=99 replace msilk00 = 0 if silk00==0 gen mworm00 = x6_3b2 if x6_3b2~=99 replace mworm00 = 0 if worm00==0 gen mcloth00 = x6_3b3 if x6_3b3~=99 replace mcloth00 = 0 if cloth00==0 gen mcoal00 = x6_3b4 if x6_3b4~=99 replace mcoal00 = 0 if coal00==0 gen mwood00 = x6_3b5 if x6_3b5~=99 replace mwood00 = 0 if wood00==0 * There are 81 observations with mworm00=0, but worm00=0. Correct. replace worm00 = 0 if mworm00==0 replace cloth00 = 0 if mcloth00==0 replace coal00 = 0 if mcoal00==0 replace wood00 = 0 if mwood00==0 drop x6_3* * Aggregates * ************** * We will compute the total number of economic activities hh's are involved in across the three waves. * Note, silk weaving, cloth weaving and silkworm raising are common to the three questionnaires. (In 1984, silk and * cloth weaving are lumped together.) Charcoal making is common to 1994 and 2000 waves, although it is not discriminatory * as almost all hh's are involved in it (60% in 1994 and 70% in 2000). We will compute two aggregate measures, * one for the current year and one lagged. gen heco84 = cloth84 + worm84 gen heco94 = silk94 + cloth94 + worm94 gen heco00 = silk00 + cloth00 + worm00 * Next, we compute the total number of hh members are involved in. In computing the total number of members involved in * economic activities, double-counting is possible, since an individual may be counted as involved in silk weaving as well as, * say silkworm raising. Therefore, we compute the maximum number of individuals in any category to avoid double-counting. (Note * the number of hh members in economic activites is not available in the 1984 data.) egen mheco94 = rowmax(msilk94 mcloth94 mworm94) egen mheco00 = rowmax(msilk00 mcloth00 mworm00) * In the case of number of economic activities, we compute two aggregate meaures, one capturing the current time period, * and one lagged. For the members involved in economic activites, we cannot compute the lagged measure as it is not available in 1984. gen heco = heco94 if year==1994 replace heco = heco00 if year==2000 gen hecolag = heco84 if year==1994 replace hecolag = heco94 if year==2000 gen mheco = mheco94 if year==1994 replace mheco = mheco00 if year==2000 lab var heco84 "84: no of economic activities hh is involved in" lab var heco94 "94: no of economic activities hh is involved in" lab var heco00 "00: no of economic activities hh is involved in" lab var mheco94 "94: no of hh members involved in econ activities" lab var mheco00 "00: no of hh members involved in econ activities" lab var heco "no of economic activities hh is involved in" lab var hecolag "no of economic activities hh is involved in (lagged)" lab var mheco "no of hh members involved in econ activities" save "Temp\combt6.dta", replace ******************************************** **** (e) Household Structure Indicators **** ******************************************** ** We need to compute hh structure variables using the individual-level data in the hh ** roster. We first compute aggregates for each household in the related hh roster (indiv84, ** indiv94 and indiv00), save a temporary file, and then merge this to the combined life history. ******** * 1984 * ******** use "indiv84.dta", replace ** Note - Information on the number of migrants, number of children, location of spouse and ** parents is missing in the 1984 data. * Dependency Ratio 94 * *********************** * Note - age>=80 is recorded as 80. 8+#of months records the age of those<1. * Assume all with age<1 are 1. gen age = in84_06 replace age =. if age==99 replace age =1 if age>80 & age<=91 bys vill84 house84: gen hsize84 = _N gen ind1564 = 0 if age<15 | age>64 replace ind1564 = 1 if age>=15 & age<=64 bys vill84 house84: egen age1564 = total(ind1564) gen nodep84 = hsize84-age1564 gen dratio84 = (nodep84)/age1564 gen ind64 = 1 if age>64 bys vill84 house84: egen nodep_ec84 = total(ind64) lab var nodep84 "84: # of dependents in hh" lab var nodep_ec84 "84: # of dependents excl children (64+) in hh" lab var dratio84 "84: hh dependency ratio" * Indicator of HH Head * ************************ gen head84 = 0 if in84_04~=99 replace head84 = 1 if in84_04==1 lab var head84 "84: Is ind hh head?" * Sex Ratio * ************* gen sex = in84_05 replace sex = 0 if sex==2 replace sex = . if sex==9 bys vill84 house84: egen males = total(sex) gen females = hsize84 - males gen fratio84 = females/hsize84 lab var fratio84 "84: female ratio in hh" * Relation to HH Head * *********************** gen daugh84 = 0 if in84_04~=99 replace daugh8 = 1 if in84_04==3 & sex==0 gen son84 = 0 if in84_04~=99 replace son84 = 1 if in84_04==3 & sex==1 lab var daugh84 "84: Is ind a daughter (a potential heir)?" lab var son84 "84: Is ind a son?" * No of Potential Heirs * ************************* bys vill84 house84: egen tdaugh84 = total(daugh84) gen nodaugh84 = tdaugh84-daugh84 lab var nodaugh84 "84: No of other daughters (heirs) in hh" * Identify the youngest daughter sort vill84 house84 daugh84 age gen ydaugh84 = 0 if daugh84~=. by vill84 house84 daugh84: replace ydaugh84 = 1 if daugh84==1 & tdaugh84>=1 & _n==1 lab var ydaugh84 "84: Is ind the youngest daughter (most likely heir)?" bys vill84 house84: egen noson84 = total(son84) replace noson84 = noson84-son84 lab var noson84 "84: No of other sons in hh" gen noheir84 = noson84 + nodaugh84 lab var noheir84 "84: No of heirs (son+daugh) in hh" sort vill84 house84 cep84, stable keep vill84 house84 cep84 hsize84 dratio84 nodep84 nodep_ec84 fratio84 daugh84 ydaugh84 son84 nodaugh84 noson84 noheir84 head84 save "Temp\hhst84.dta", replace ******** * 1994 * ******** use "indiv94.dta", clear * Dependency Ratio 94 * *********************** gen age = q2 replace age = 1 if age>80 & age<=91 replace age = . if age==98 | age==99 bys hhid94: gen hsize94=_N gen ind1564 = 0 replace ind1564 = 1 if age>=15 & age<=64 bys hhid94: egen age1564 = total(ind1564) gen nodep94 = hsize94-age1564 gen dratio94 = nodep94/age1564 gen ind64 = 1 if age>64 bys hhid94: egen nodep_ec94 = total(ind64) lab var nodep94 "94: # of dependents in hh" lab var nodep_ec94 "94: # of dependents excl children (64+) in hh" lab var dratio94 "94: hh dependency ratio" * Sex Ratio * ************* gen sex = q3 replace sex = 0 if sex==2 replace sex = . if sex==8 bys hhid94: egen males = total(sex) gen females = hsize94 - males gen fratio94 = females/hsize94 lab var fratio94 "94: female ratio in hh" * Indicator of HH Head * ************************ gen head94 = 0 if q4<98 replace head94 = 1 if q4==1 lab var head94 "94: Is ind hh head?" * Relation to HH Head * *********************** gen daugh94 = 0 if q4<98 replace daugh94 = 1 if q4==3 & sex==0 gen son94 = 0 if q4<98 replace son94 = 1 if q4==3 & sex==1 lab var daugh94 "94: Is ind a daughter (a potential heir)?" lab var son94 "94: Is ind a son?" * No of Potential Heirs * ************************* bys hhid94: egen tdaugh94 = total(daugh94) gen nodaugh94 = tdaugh94-daugh94 lab var nodaugh94 "94: No of other daughters (heirs) in hh" ** Identify the youngest daughter sort hhid94 daugh94 age gen ydaugh94 = 0 if daugh94~=. by hhid94 daugh94: replace ydaugh94 = 1 if daugh94==1 & tdaugh94>=1 & _n==1 lab var ydaugh94 "94: Is ind the youngest daughter (most likely heir)?" bys hhid94: egen noson94 = total(son94) replace noson94 = noson94-son94 lab var noson94 "94: No of other sons in hh" gen noheir94 = noson94 + nodaugh94 lab var noheir94 "94: No of heirs (son+daugh) in hh" * Location of Spouse * ********************** * Location of spouse is coded as numeric in q1_6 - if it starts with 1, then spouse lives in the current house. * I first convert the numeric code into string, then create a substring with the first character only, and then * re-convert to numeric. gen den1 = substr(q6,1,1) destring(den1), replace gen spinhh94 = 0 * Check if person is married b4 assigning spouse's location replace spinhh94 = 1 if den1==1 & q5==2 lab var spinhh94 "94: spouse lives in hh" * Location of parents * *********************** * Location of father is coded in q1_13, father lives in house if q1_13!=997,998,999. * Location of mother is coded similarly in q1_14 gen finhh94 = 0 if q13~="998" & q13~="999" replace finhh94 = 1 if q13~="997" & q13~="998" & q13~="999" lab var finhh94 "94: father lives in hh?" gen minhh94 = 0 if q14~="998" & q14~="999" replace minhh94 = 1 if q14~="997" & q13~="998" & q13~="999" lab var minhh94 "94: mother lives in hh?" gen prtinhh94 = 0 if finhh94~=. | minhh94~=. replace prtinhh94 = 1 if finhh94==1 | minhh94==1 lab var prtinhh94 "94: any parent lives in hh?" * Number of Current Migrants * ****************************** gen mig94 = 0 if q1~=9 replace mig94=1 if q1==3 bys hhid94: egen tmig94 = total(mig94) ** exclude the mig himself/herself gen nmig94 = tmig94-mig94 lab var nmig94 "94: no of migs (other than index ind) in the hh" * Number of children in hh * **************************** * We count the children living in the hh using two different methods: (1) we count * individuals in the survey who live in the hh, who are younger than 15, and who are * either children or grandchidlren of hh head. (2) We use 'the number of living children' * variable that is asked only of women. We take only info of women who are 18-35 (our sample!) * and create a hh level variable. Note that both of these variables are less than ideal, * but the best that is available to us given the survey limitations. * NOTE - Define child as 'child'(3) or 'adopted/step child'(5) or 'grand child'(8) * as identified in q1_4 and age<=12 gen ch = 0 if q4<98 replace ch = 1 if (q4==8 | q4==3 | q4==5) & age <=12 bys hhid94: egen hhch94 = total(ch) lab var hhch94 "94: no of children<12 in hh" * NOTE - Number of children by 13-35 year olds in the hh in 1994! * If we computed this variable using indiv94 roster, then we would be * counting the children of the elderly too - who are currently adults! * NOTE - We only know of women's living children, that is why we are computing a hh * level variable instead of an individual level variable. gen livch94 = 0 replace livch94 = q22 if age>=13 & age<=35 replace livch94 = 0 if q22==98 | q22==99 lab var livch94 "94: No of living children of the index woman" bys hhid94: egen hhlivch94 = total(livch94) lab var hhlivch94 "94: No of children by 13-35yr old women in the hh" sort hhid94 cep94, stable keep hhid94 cep94 hsize94 dratio94 nodep94 nodep_ec94 nmig94 livch94 hhlivch94 hhch94 fratio94 daugh94 ydaugh94 son94 nodaugh94 /// noson94 noheir94 head94 spinhh94 finhh94 minhh94 prtinhh94 save "Temp\hhst94.dta", replace ******** * 2000 * ******** use "indiv00.dta", clear * Dependency Ratio 00 * *********************** gen age = x3 replace age = . if age==99 bys hhid00: gen hsize00=_N gen ind1564 = 0 replace ind1564 = 1 if age>=15 & age<=64 bys hhid00: egen age1564 = total(ind1564) gen nodep00 = hsize00-age1564 gen dratio00 = nodep00/age1564 gen ind64 = 1 if age>64 bys hhid00: egen nodep_ec00 = total(ind64) lab var nodep00 "00: # of dependents in hh" lab var nodep_ec00 "00: # of dependents excl children (64+) in hh" lab var dratio00 "00: hh dependency ratio" * Sex Ratio in hh 00 * ********************** gen sex = x4 replace sex = 0 if sex==2 replace sex = . if sex==9 bys hhid00: egen males = total(sex) gen females = hsize00 - males gen fratio00 = females/hsize00 lab var fratio00 "00: female ratio in hh" * Indicator of HH Head * ************************ gen head00 = 0 if x13~=99 replace head00 = 1 if x13==1 lab var head00 "00: Is ind hh head?" * Relation to HH Head * *********************** gen daugh00 = 0 if x13~=99 replace daugh00 = 1 if x13==3 & sex==0 gen son00 = 0 if x13~=99 replace son00 = 1 if x13==3 & sex==1 lab var daugh00 "00: Is ind a daughter (a potential heir)?" lab var son00 "00: Is ind a son?" * No of Potential Heirs 00 * **************************** bys hhid00: egen tdaugh00 = total(daugh00) gen nodaugh00 = tdaugh00-daugh00 lab var nodaugh00 "00: No of other daughters (heirs) in hh" * Identify the youngest daughter sort hhid00 daugh00 age gen ydaugh00 = 0 if daugh~=. bys hhid00 daugh00: replace ydaugh00 = 1 if daugh00==1 & tdaugh00>=1 & _n==1 lab var ydaugh00 "00: Is ind the youngest daughter (most likely heir)?" bys hhid00: egen noson00 = total(son00) replace noson00 = noson00-son00 lab var noson00 "00: No of other sons in hh" gen noheir00 = noson00 + nodaugh00 lab var noheir00 "00: No of heirs (son+daugh) in hh" * IMP NOTE - Merge should be at an individual level - not hh level - * since we have an ind-level measure of relation to hh head (daugh and son) * Number of Current Migrants 00 * ********************************* gen mig00 = 0 if x1~=9 replace mig00=1 if x1==3 bys hhid00: egen tmig00 = total(mig00) ** exclude the mig himself/herself gen nmig00 = tmig00-mig00 lab var nmig00 "00: no of migs (other than index ind)in the hh" * Number of children in hh 00 * ******************************* * We count the children living in the hh using two different methods: (1) we count * individuals in the survey who live in the hh, who are younger than 15, and who are * either children or grandchidlren of hh head. (2) We use 'the numbre of living children' * variable that is asked only of women. We take only info of women who are 13-41 (our sample!) * and create a hh level variable. Note that both of these variables are less than ideal, * but the best that is available to us given the survey limitations. * NOTE - Define child as 'child'(3) or 'adopted/step child'(5) or 'grand child'(8) * as identified in x13 and age<=12 gen ch = 0 if x13~=99 replace ch = 1 if (x13==8 | x13==3 | x13==5) & age <=12 bys hhid00: egen hhch00 = total(ch) lab var hhch00 "00: no of children<12 in hh" * NOTE - Number of children by 13-41 year olds in the hh in 2000! * NOTE - We only know of women's living children, that is why we are computing a hh * level variable instead of an individual level variable. gen livch00 = 0 replace livch00 = x23 if age>=13 & age<=41 replace livch00 = 0 if x23==99 lab var livch00 "00: No of living children of the index woman" bys hhid00: egen hhlivch00 = total(livch00) lab var hhlivch00 "00: No of children by 13-41yr old women in the hh" * Location of Spouse * ********************** * Location of spouse is coded as numeric in x15 - if it starts with 1, then spouse lives in the current house. * I first convert the numeric code into string, then create a substring with the first character only, and then * re-convert to numeric. gen den1 = substr(x15,1,1) destring(den1), replace gen spinhh00 = 0 * Check if person is married b4 assigning spouse's location replace spinhh00 = 1 if den1==1 & x14==2 lab var spinhh00 "00: spouse lives in hh" * Location of parents * *********************** * Location of father is coded in x17, father lives in house if the code starts with 1. * Location of mother is coded similarly in x18. gen den2 = substr(x17,1,1) destring(den2), replace gen finhh00 = 0 if x17~="" & x17~="9999999999" replace finhh00 = 1 if den2==1 lab var finhh00 "00: father lives in hh?" gen den3 = substr(x18,1,1) destring(den3), replace gen minhh00 = 0 if x18~="" & x18~="9999999999" replace minhh00 = 1 if den3==1 lab var minhh00 "00: mother lives in hh?" gen prtinhh00 = 0 if finhh00~=. | minhh00~=. replace prtinhh00 = 1 if finhh00==1 | minhh00==1 lab var prtinhh00 "00: any parent lives in hh?" keep hhid00 cep00 hsize00 dratio00 nodep00 nodep_ec00 nmig00 livch00 hhlivch00 hhch00 fratio00 daugh00 ydaugh00 /// son00 nodaugh00 noson00 noheir00 head00 spinhh00 finhh00 minhh00 prtinhh00 sort hhid00 cep00, stable save "Temp\hhst00.dta", replace ****************************** * Merge to Life History Data * ****************************** * IMP NOTE - Merge should be at an individual level first - not hh level - * since we have an ind-level measure of relation to hh head (daugh and son) use "Temp\combt6.dta", clear sort vill84 house84 cep84, stable merge vill84 house84 cep84 using "Temp\hhst84.dta", keep(head84 son84 daugh84 ydaugh84 nodaugh84 noson84 noheir84) drop if _merge==2 drop _merge sort hhid94 cep94, stable merge hhid94 cep94 using "Temp\hhst94.dta", keep(spinhh94 finhh94 minhh94 prtinhh94 nmig94 head94 son94 daugh94 ydaugh94 /// nodaugh94 noson94 noheir94) drop if _merge==2 drop _merge sort hhid00 cep00, stable merge hhid00 cep00 using "Temp\hhst00.dta", keep(spinhh00 finhh00 minhh00 prtinhh00 nmig00 head00 son00 daugh00 ydaugh00 /// nodaugh00 noson00 noheir00) drop if _merge==2 drop _merge * Now, get the hh-level variables (the only reason for doing this is to avoid the missing values due to individual-level * matching in the former step. sort vill84 house84, stable merge vill84 house84 using "Temp\hhst84.dta", keep(dratio* nodep* fratio* hsize*) drop if _merge==2 drop _merge sort hhid94, stable merge hhid94 using "Temp\hhst94.dta", keep(dratio* nodep* hhch* hhlivch* fratio* hsize*) drop if _merge==2 drop _merge sort hhid00, stable merge hhid00 using "Temp\hhst00.dta", keep(dratio* nodep* hhch* hhlivch* fratio* hsize*) drop if _merge==2 drop _merge * Correct mismatchs - e.g. men who are listed as 'daugh' * or women listed as 'son', or men with livch>0 * (I am suspecting that the mismatchs are due to mistakes in cep94 * vars - they appear only after the merge) replace daugh84 = 0 if sex==1 & daugh84==1 replace daugh94 = 0 if sex==1 & daugh94==1 replace daugh00 = 0 if sex==1 & daugh00==1 replace ydaugh84 = 0 if sex==1 & ydaugh84==1 replace ydaugh94 = 0 if sex==1 & ydaugh94==1 replace ydaugh00 = 0 if sex==1 & ydaugh00==1 replace son84 = 0 if sex==0 & son84==1 replace son94 = 0 if sex==0 & son94==1 replace son00 = 0 if sex==0 & son00==1 * The life history data already has noch and livch variables, keep them, but correct the few * inconsistent cases. replace livch = 0 if sex==1 & livch>0 & livch~=. replace noch = 0 if sex==1 & noch>0 & noch~=. ************************************ * Aggregate HH Structure Variables * ************************************ * Assume hh structure variables obtain their 84 values from 84-93, 94 values from 94-99, and 00 values in 00. * Assume for year<1984, year 1984 values are valid - This is a questionable assumption gen dratio = dratio84 if year<1994 replace dratio = dratio94 if year>=1994 & year<2000 replace dratio = dratio00 if year==2000 lab var dratio "hh dependency ratio in year" gen nodep = nodep84 if year<1994 replace nodep = nodep94 if year>=1994 & year<2000 replace nodep = nodep00 if year==2000 lab var nodep "# of dependents in hh in year" gen nodep_ec = nodep_ec84 if year<1994 replace nodep_ec = nodep_ec94 if year>=1994 & year<2000 replace nodep_ec = nodep_ec00 if year==2000 lab var nodep_ec "# of dependents excl children (64+) in hh" gen fratio = fratio84 if year<1994 replace fratio = fratio94 if year>=1994 & year<2000 replace fratio = fratio00 if year==2000 lab var fratio "female ratio in hh in year" * nmig, hhch, hhlivch, spinhh, finhh, minhh, prtinhh are not measured in 84. gen nmig = nmig94 if year>=1994 & year<2000 replace nmig = nmig00 if year==2000 lab var nmig "no of migs (other than index ind) in hh in year" gen hhch = hhch94 if year>=1994 & year<2000 replace hhch = hhch00 if year==2000 lab var hhch "no of children<12 in hh in year" gen hhlivch = hhlivch94 if year>=1994 & year<2000 replace hhlivch = hhlivch00 if year==2000 lab var hhlivch "no of children by 18-35yr old women in hh in year" gen spinhh = spinhh94 if year>=1994 & year<2000 replace spinhh = spinhh00 if year==2000 lab var spinhh "spouse lives in hh in year?" gen finhh = finhh94 if year>=1994 & year<2000 replace finhh = finhh00 if year==2000 lab var finhh "father lives in hh in year?" gen minhh = minhh94 if year>=1994 & year<2000 replace minhh = minhh00 if year==2000 lab var minhh "mother lives in hh in year?" gen prtinhh = prtinhh94 if year>=1994 & year<2000 replace prtinhh = prtinhh00 if year==2000 lab var prtinhh "any parent lives in hh in year?" gen head = head84 if year<1994 replace head = head94 if year>=1994 & year<2000 replace head = head00 if year==2000 lab var head "Is ind hh head in year?" * Note there are inconsistencies in daugh84, daugh94 and daugh00 vars. * For the overall 'daugh' variable, take 84 values, then get the missing * values from daugh94 and daugh00. gen daugh = daugh84 replace daugh = daugh94 if daugh==. replace daugh = daugh00 if daugh==. lab var daugh "Is ind a daughter (a potential heir)?" * For youngest daughter variable, it is slightly different - someone * may be a youngest daughter in 94 and not be that in 00 and vice versa. * So, take 'ydaugh' of each year, and then since ydaugh94 has some missing * values, take them from ydaugh00 var. gen ydaugh = ydaugh84 if year<1994 replace ydaugh = ydaugh94 if (year>=1994 & year<2000) | ydaugh==. replace ydaugh = ydaugh00 if year==2000 | ydaugh==. lab var ydaugh "Is ind the youngest daughter (most likely heir)?" gen son = son84 replace son = son94 if son==. replace son = son00 if son==. lab var son "Is ind a son?" gen nodaugh = nodaugh84 if year<1994 replace nodaugh = nodaugh94 if year>=1994 & year<2000 replace nodaugh = nodaugh00 if year==2000 lab var nodaugh "no of other daughters (heirs) in hh in year" gen noson = noson84 if year<1994 replace noson = noson94 if year>=1994 & year<2000 replace noson = noson00 if year==2000 lab var noson "no of other sons in hh in year" gen noheir = noheir84 if year<1994 replace noheir = noheir94 if year>=1994 & year<2000 replace noheir = noheir00 if year==2000 lab var noheir "no of heirs (son+daugh) in hh in year" save "Temp\comb_lh_hh.dta", replace **************************************************************************** ******************* 10. OBTAIN VILLAGE-LEVEL VARIABLES ********************* **************************************************************************** * IMPORTANT NOTE: We get the village characteristics from vill94.dta and comm00.dta * for years 1994 and 2000 respectively. We are using vill84 id's in matching. Because * 84 villages were split in 1994 and then again in 2000, we typically have more than one * observation for each vill84 id in the vill94 and comm00 data sets. So, we somehow need to * average out the different observations to come up with a unique observation for each * vill84 id. (For water shortages, for instance, I am using the maximum value instead of the * mean.) ************************************* *** (a) Water Shortage in Village *** ************************************* use "Temp\comb_lh_hh.dta", clear * Note - 1984 data only records whether water shortage is the number 1 problem in the village. * In 40 out of 51 villages it is, so it may not be very discriminatory. 1994 and 2000 data, by * contrast ask for how many months there was water shortage. * 1984 * ******** sort vill84 merge vill84 using "comm84.dta", keep(v84_136) drop _merge gen shortg84 = 0 replace shortg84 = 1 if v84_136==1 lab var shortg84 "84: water shortage a problem (0/1)" * 1994 * ******** * Note there are 310 communities interviewed in the 94 wave. Our original 51 * villages are now split into 76. We only keep these original villages, and * drop the remaining community observations. sort vill94 merge vill94 using "comm94.dta", keep(q4_54 q4_54_1) drop if _merge==2 drop _merge * There are different values of months of shortage (q4_54 and q4_54_1) * for some villages. This is because in 1994 some villages were split, * so we take the maximum value of the possible months of shortage variable. gen temp94 = q4_54_1 - q4_54 * There is one village with -1 months of water shortage, it is probably miscoded. * Correct to 1. replace temp94 = 1 if temp94==-1 bys vill84: egen shortg94 = max(temp94) lab var shortg94 "94: months of water shortage in village" drop temp94 q4_54 q4_54_1 v84_136 * 2000 * ******** * NOTE - In 2000 data, water shortage var is coded in a different way. * Instead of asking water shortage took place from which month to which * month as in 1994 questionnaire, 2000 data lists the actual months of * water shortage. * 2000 data also asks about last year's water shortage (in x65mnth*), which we record into shortg99. sort vill00 merge vill00 using "comm00.dta", keep(x63mnth* x65mnth*) drop if _merge==2 drop _merge * Set missing values to zero - as missing means no months of shortage replace x63mnth1=0 if x63mnth1==. replace x63mnth2=0 if x63mnth2==. replace x63mnth3=0 if x63mnth3==. replace x63mnth4=0 if x63mnth4==. replace x63mnth5=0 if x63mnth5==. * Replace the actual month value with 1 (convert it to an indicator) replace x63mnth1=1 if x63mnth1>0 replace x63mnth2=1 if x63mnth2>0 replace x63mnth3=1 if x63mnth3>0 replace x63mnth4=1 if x63mnth4>0 replace x63mnth5=1 if x63mnth5>0 * Set missing values to zero - as missing means no months of shortage replace x65mnth1=0 if x65mnth1==. replace x65mnth2=0 if x65mnth2==. replace x65mnth3=0 if x65mnth3==. replace x65mnth4=0 if x65mnth4==. replace x65mnth5=0 if x65mnth5==. * Replace the actual month value with 1 (convert it to an indicator) replace x65mnth1=1 if x65mnth1>0 replace x65mnth2=1 if x65mnth2>0 replace x65mnth3=1 if x65mnth3>0 replace x65mnth4=1 if x65mnth4>0 replace x65mnth5=1 if x65mnth5>0 gen temp00 = x63mnth1 + x63mnth2 + x63mnth3 + x63mnth4 + x63mnth5 gen temp99 = x65mnth1 + x65mnth2 + x65mnth3 + x65mnth4 + x65mnth5 * There are different values of months of shortage (temp00) * for some villages. This is because in 94 and 00 some villages were split, * so we take the maximum value of the possible months of shortage variable. bys vill84: egen shortg00 = max(temp00) bys vill84: egen shortg99 = max(temp99) lab var shortg00 "00: months of water shortage in village" lab var shortg99 "99: months of water shortage in village" * Aggregate * ************* * Assume that months of water shortage were constant from 1972-1984 (took 1984 value). * Note this is a questionable assumption. gen shortg = shortg84 if year<1994 replace shortg = shortg94 if year>=1994 & year<2000 replace shortg = shortg00 if year==2000 lab var shortg "Months of water shortage in vill in year" gen shortg_lag = shortg84 if year>=1994 & year<2000 replace shortg_lag = shortg94 if year==2000 lab var shortg_lag "Lagged months of water shortage in vill in year" drop x63mnth* temp00 x65mnth* temp99 ********************************************** *** (b) Year School Established in Village *** ********************************************** ** NOTE - Use the year school establsihed variable in comm00 data - it is more ** or less consistent with vill94.dta (There are few years of differences in school ** establishment dates. Use vill00 ID to merge b/c most vill84 ID values are missing in ** comm00.dta. sort vill00, stable merge vill00 using "comm00.dta", keep(x9year1) drop if _merge==2 drop _merge * convert from buddhist year gen yrtemp = x9year1 replace yrtemp = . if yrtemp==9999 replace yrtemp = yrtemp-543 * compute years since school has been established replace yrtemp = year-yrtemp replace yrtemp = 0 if yrtemp==. * There are different values of yrtemp for some villages. This is because in 1994 * some villages were split, so we take the maximum value of the possible years * since school estbalishment variable. bys vill84: egen yrsch = max(yrtemp) lab var yrsch "years since school established in village" drop yrtemp x9year1 * generate an indicator school (pri or sec) in village gen sch = 0 replace sch = 1 if yrsch~=0 lab var sch "is there a school (pri or sec) in vill?" ************************************* *** (c) Remoteness of the Village *** ************************************* * A village is considered remotely located if there are two (note - in curran's defn three!) or more obstacles to traveling * to the district town. The obstacles are (1) the presence of a portion of the route to the district * town that is a cart path (unpaved, rutted, and narrow), (2) the lack of public transportation to * the district town, (3) travel to the district town takes an hour or more, (4) that during the year there are * four or months of difficult travel to leave the village, and (5) it is 20 or more kilometers to the district town. * 1984 * ******** sort vill84, stable merge vill84 using "comm84.dta", keep(v84_008 v84_009 v84_010 v84_011 v84_018 v84_019) drop _merge * For obstacle 1, see if there is a 'dirt' portion of the road. gen obs1 = 1 if v84_009==3 | v84_009==5 | v84_009==6 | v84_009==7 gen obs2 = 1 if v84_011==2 gen obs3 = 1 if v84_010>=60 gen obs4 = 1 if v84_019-v84_018>=4 & v84_019<98 & v84_018<99 gen obs5 = 1 if v84_008>=20 egen obs = rowtotal(obs1 obs2 obs3 obs4 obs5) * create a variable for each obstacle gen rdpaved84 = 1 if v84_009==1 | v84_009==2 | v84_009==4 replace rdpaved84 = 0 if v84_009==3 | v84_009==5 | v84_009==6 | v84_009==7 lab var rdpaved84 "84: road to NR asphalt and/or gravel?" gen bus84 = 0 if v84_011==2 replace bus84 = 1 if v84_011==1 lab var bus84 "84: is there a bus bw vill and NR?" gen timetonr84 = v84_010 * note '97' means that travel takes more than 2 hours. replace timetonr84 = 120 if timetonr84==97 lab var timetonr84 "84: how many minutes to NR?" gen diffmo84 = v84_019-v84_018 if v84_019<98 & v84_018<99 lab var diffmo84 "84: # of months of difficult travel" gen kmtonr84 = v84_008 lab var kmtonr84 "84: distance (km) to NR" gen remote84 = 0 replace remote84 = 1 if obs>=2 lab var remote84 "84: two or more obstacles to trip to NR" drop obs* v84_008 v84_009 v84_010 v84_011 v84_018 v84_019 * 1994 * ******** sort vill94, stable merge vill94 using "comm94.dta", keep(q2_16_4 q2_16_1 q2_18 q2_19 q2_20 q2_20_1 q2_33 q33_1) drop if _merge==2 drop _merge * For obstacle 1, see if there is a 'dirt' portion of the road. gen obs1 = 1 if q2_16_4==1 gen obs2 = 1 if q2_19==2 gen obs3 = 1 if q2_18>=60 gen obs4 = 1 if q33_1-q2_33>=4 & q33_1<98 & q2_33<99 gen obs5 = 1 if q2_16_1>=20000 egen obs = rowtotal(obs1 obs2 obs3 obs4 obs5) * create a variable for each obstacle * compute the cost of the trip - divide by 2 if roundtrip gen costtonr94 = . replace costtonr94 = q2_20/q2_20_1 if q2_20<98 lab var costtonr94 "cost of one-way bus ticket to NR" gen rdpaved94 = 1 replace rdpaved94 = 1 if q2_16_4==1 lab var rdpaved94 "94: road to NR asphalt and/or gravel?" gen bus94 = 0 replace bus94 = 1 if q2_19==1 lab var bus94 "94: is there a bus bw vill and NR?" gen timetonr94 = q2_18 lab var timetonr94 "94: how many minutes to NR?" gen diffmo94 = q33_1 - q2_33 if q33_1<98 & q2_33<98 replace diffmo94 = 0 if q33_1==98 | q2_33==98 lab var diffmo94 "94: # of months of difficult travel" * There are two villages with negative months of difficult travel - probably * a recording error - mixing of 'from' and 'to' entries - reverse them replace diffmo94 = -1*diffmo94 if diffmo94~=. & diffmo94<0 gen kmtonr94 = q2_16_1/1000 lab var kmtonr94 "94: distance (km) to NR" gen remote94 = 0 replace remote94 = 1 if obs>=2 lab var remote94 "94: two or more obstacles to trip to NR" drop obs* q2_16_4 q2_16_1 q2_18 q2_19 q2_20* q2_33 q33_1 * 2000 * ******** * There is no information in the comm00.dta to compute remote variable. * Only one obstacle to travel is noted (months where it is difficult to travel to * NR - x39mnth1-6) sort vill00, stable merge vill00 using "comm00.dta", keep(x39diff x39mnth*) drop if _merge==2 drop _merge egen diffmo00 = rownonmiss(x39mnth1 x39mnth2 x39mnth3 x39mnth4 x39mnth5 x39mnth6) lab var diffmo00 "00: # of months of difficult travel" drop x39diff x39mnth* * Aggregate * ************* gen rdpaved = rdpaved84 if year<1994 replace rdpaved = rdpaved94 if year>=1994 lab var rdpaved "road to NR asphalt or gravel (84,94)" gen bus = bus84 if year<1994 replace bus = bus94 if year>=1994 lab var bus "is there a bus bw vill and NR? (84,94)" gen timetonr = timetonr84 if year<1994 replace timetonr = timetonr94 if year>=1994 lab var timetonr "how many minutes to NR? (84,94)" gen kmtonr = kmtonr84 if year<1994 replace kmtonr = kmtonr94 if year>=1994 lab var kmtonr "distance (km) to NR? (84,94)" gen diffmo = diffmo84 if year<1994 replace diffmo = diffmo94 if year>=1994 & year<2000 replace diffmo = diffmo00 if year==2000 lab var diffmo "# of months of difficult travel(84,94,00)" gen remote_lag = remote84 if year<=1994 replace remote_lag = remote94 if year>1994 lab var remote_lag "two or more obstacles to trip to NR (lagged)" ****************************************************************************** **** (d) Land, Temple, Telephones, Tvs, Newspaper Reading Ctr in Village ***** ****************************************************************************** * 1984 * ******** sort vill84, stable merge vill84 using "comm84.dta", keep(v84_003 v84_004 v84_140 v84_206 v84_207 v84_209 v84_211 v84_078) drop _merge gen nohh84 = v84_003 gen pop84 = v84_004 gen mills84 = v84_140 if v84_140~=8 gen elec84 = v84_206 replace elec84 = 0 if v84_206==2 gen hhelec84 = v84_209 if v84_209~=98 replace hhelec84 = 0 if v84_209==98 gen ptv84 = v84_211/nohh84 gen vagri84 = v84_078 lab var nohh84 "84: # of houses in vill" lab var pop84 "84: population of vill" lab var mills84 "94: # of rice mills in village" lab var elec84 "84: village has electricity?" lab var hhelec84 "84: # of hhs w electricity in vill" lab var ptv84 "84: ratio of hhs with tv in village" lab var vagri84 "84: land (rai) used for agriculture in vill" * codebook nohh84 pop84 mills84 elec84 hhelec84 ptv84 vagri84 drop v84_003 v84_004 v84_140 v84_206 v84_207 v84_209 v84_211 v84_078 * 1994 * ******** sort vill94, stable merge vill94 using "comm94.dta", keep(q10_110 q9_108 q9_109 q1_7 q1_8 q145 q6_87 q8_102 q8_103 q8_104 /// q8_105 q9_106) drop if _merge==2 drop _merge gen temple_t = q10_110 replace temple_t = 0 if temple_t==2 gen news_t = q9_108 replace news_t = 0 if news_t==2 gen yrnews_t = q9_109-543 if q9_109~=9998 gen nohh_t = q1_7 if q1_7~=999 gen pop_t = q1_8 gen tv_t = q9_106 if q9_106~=998 gen vagri_t = q145 if q145~=9998 gen mills_t = q6_87 gen elec_t = q8_102 replace elec_t = 0 if elec_t==2 gen yrelec_t = q8_103-543 if q8_103~=9998 gen phone_t = q8_104 replace phone_t = 0 if phone_t==2 gen hhphone_t = q8_105 if q8_105~=98 & q8_105~=999 replace hhphone_t = 0 if hhphone_t==. * Generate values based on vill84 IDs - B/c there are more villages in 94, * we can have multiple values by vill84 ID (Take the max or min value depending * on the variable) bys vill84: egen temple94 = max(temple_t) bys vill84: egen news94 = max(news_t) bys vill84: egen yrnews = min(yrnews_t) bys vill84: egen nohh94 = max(nohh_t) bys vill84: egen pop94 = max(pop_t) bys vill84: egen ptv94 = max(tv_t) replace ptv94 = ptv94/nohh94 * Note there is 1 village for which ptv>1 - the reason is that the village in 84 (44005) has split * into two in 1994. The nohh94 variable is missing for one of the split villages, while tv variable * is not. (nohh=31 for the first village and tv_t=21, for the second village nohh=. and tv_t==75. Because * we take the maximum for two villages, ptv ends up with 75/31. Manually correct this by setting it to 21/31 * instead. Also correct nohh.) replace ptv94 = 21/31 if vill84=="44005" replace nohh94 = 31+75 if vill84=="44005" bys vill84: egen vagri94 = max(vagri_t) bys vill84: egen mills94 = max(mills_t) bys vill84: egen elec94 = max(elec_t) bys vill84: egen yrelec = min(yrelec_t) bys vill84: egen phone94 = max(phone_t) bys vill84: egen nophone94 = max(hhphone_t) * There is one village "34005" that has 85 phones recorded, while other villages have at most 5 phones. * I assume that village also 1 phone (Note - phone94==1 for that village. Also, the village has 2 phones * in 2000, so cannot have higher than that in 1994.) replace nophone94=1 if nophone==85 * Assume there is at least one telephone in villages with phone service. replace nophone94 = 1 if phone94==1 & nophone94==0 gen pphone94 = nophone94/nohh94 lab var temple94 "94: is there a temple in vill?" lab var news94 "94: is there a newspaper rdg ctr in vill?" lab var yrnews "94: year newspaper rdg ctr est. in vill?" lab var nohh94 "94: # of houses in vill" lab var pop94 "94: population of vill" lab var vagri94 "94: land (rai) used for agriculture in vill" lab var ptv94 "94: ratio of hhs with tv in village" lab var mills94 "94: # of rice mills in village" lab var elec94 "94: is there electricity in vill?" lab var yrelec "94: year of electrification in vill?" lab var phone94 "94: is there a phone service in village" lab var nophone94 "94: no of phones in village" lab var pphone94 "94: no of phones per hh in village" drop q10_110 q9_108 q9_109 q1_7 q1_8 q145 q6_87 q8_102 q8_103 q8_104 q8_105 q9_106 temple_t news_t yrnews_t nohh_t tv_t vagri_t /// mills_t pop_t elec_t yrelec_t phone_t hhphone_t * codebook temple94 yrnews news94 pop94 nohh94 ptv94 vagri94 ptv94 mills94 elec94 yrelec phone94 nophone94 pphone94 * 2000 * ******** sort vill00, stable merge vill00 using "comm00.dta", keep(x4 x5 x6 x7 x19 x46 x47* x51yrs* x85 x95 x94num x95num x96num) drop if _merge==2 drop _merge gen nohh_t = x4 gen pop_t = x5 + x6 if x5~=9999 & x6~=9999 gen vagri_t = x7 gen mills_t = x19 gen fact_t = x46 replace fact_t = 0 if fact_t==2 gen fact5km_t = x47 replace fact5km_t = 0 if fact5km_t==2 gen nofact_t = x47num replace nofact_t = 0 if nofact_t==. replace x51yrs1 = . if x51yrs1==99 replace x51yrs2 = . if x51yrs2==99 replace x51yrs3 = . if x51yrs3==99 replace x51yrs4 = . if x51yrs4==99 replace x51yrs5 = . if x51yrs5==99 egen yrfact_t = rowmax(x51yrs1 x51yrs2 x51yrs3 x51yrs4 x51yrs5) replace yrfact_t = 0 if yrfact_t==. gen temple_t = x85 replace temple_t = 0 if temple_t==2 gen phone_t = x95 replace phone_t = 0 if phone_t==2 * Phones = sum(regular+cellphones+phone booth) gen nophone_t = x94num + x95num + x96num replace nophone_t = 0 if nophone_t==. * Generate values based on vill84 IDs - B/c there are more villages in 00, * we can have multiple values by vill84 ID (Take the max or mean value depending * on the variable). bys vill84: egen nohh00 = max(nohh_t) bys vill84: egen pop00 = max(pop_t) bys vill84: egen vagri00 = max(vagri_t) bys vill84: egen mills00 = max(mills_t) bys vill84: egen factory00 = max(fact_t) bys vill84: egen fact5km00 = max(fact5km_t) bys vill84: egen nofact5km00 = max(nofact_t) * Note - nofact5km00 records the 'other' factories not reported in the factory00. * Consolidate both variables. replace nofact5km00 = 1 if factory00==1 replace fact5km00 = 1 if factory00==1 bys vill84: egen yrfactory = max(yrfact_t) bys vill84: egen temple00 = max(temple_t) * Rename hh-level phone00 variable already in the data set ren phone00 hhphone00 bys vill84: egen phone00 = max(phone_t) bys vill84: egen nophone00 = max(nophone_t) * Assume villages that have phone service in 1994 have to have phone service in 2000. * There are 4 villages that have been miscoded. replace phone00=1 if phone94==1 * Assume a village needs to have at least the same no of phones in 2000 as in 1994. replace nophone00 = nophone94 if nophone94>nophone00 * Assume there is at least one telephone in villages with phone service. replace nophone00 = 1 if phone00==1 & nophone00==0 gen pphone00 = nophone00/nohh00 lab var nohh00 "00: # of houses in vill" lab var pop00 "00: population of village" lab var vagri00 "00: land (rai) used for agriculture in vill" lab var mills00 "00: # of rice mills in vill" lab var factory00 "00: is there a factory in vill" lab var fact5km00 "00: is there a factory w/in 5km of vill" lab var nofact5km00 "00: # of factories w/in 5km of vill" lab var yrfactory "00: How old is the oldest factory w/in 5km of village?" lab var temple00 "00: is there a temple in vill?" lab var phone00 "00: Phone service in village?" lab var nophone00 "00: Number of phones in village" lab var pphone00 "00: Number of phones per hh in village" drop nohh_t pop_t vagri_t mills_t fact_t fact5km_t nofact_t yrfact_t temple_t phone_t nophone_t drop x4 x5 x6 x7 x19 x46 x47* x51yrs* x85 x95 x94num x95num x96num * codebook nohh00 pop00 vagri00 mills00 factory00 fact5km00 nofact5km00 yrfactory temple00 nophone00 pphone00 ************* * Aggregate * ************* * Generate overall values of temple, nohh, vagri, mills gen nohh = nohh84 if year<1994 replace nohh = nohh94 if year>=1994 & year<2000 replace nohh = nohh00 if year==2000 lab var nohh "no of houses in vill in year (from vill survey)" gen pop = pop84 if year<1994 replace pop = pop94 if year>=1994 & year<2000 replace pop = pop00 if year==2000 lab var pop "population of vill in year (from vill survey)" gen vagri = vagri84 if year<1994 replace vagri = vagri94 if year>=1994 & year<2000 replace vagri = vagri00 if year==2000 lab var vagri "land (rai) used for agriculture in vill" gen mills = mills84 if year<1994 replace mills = mills94 if year>=1994 & year<2000 replace mills = mills00 if year==2000 lab var mills "# of rice mills in vill in year" * Factory information is only available in 2000. We can compute other years, as we know * the year each factory was established. gen factory = 0 replace factory = 1 if year>2000-yrfactory lab var factory "Is there a factory w/in 5km of this village in year?" * Temple, phone variables not measured in 1984, assume they had the same value as in 1994. gen temple = temple94 if year<=1994 replace temple = temple00 if year>1994 lab var temple "is there a temple in vill in year?" gen phone = phone94 if year<=1994 replace phone = phone00 if year>1994 lab var phone "is there a phone service in vill in year?" gen nophone = nophone94 if year<=1994 replace nophone = nophone00 if year>1994 lab var nophone "no of phones in vill in year" gen pphone = pphone94 if year<=1994 replace pphone = pphone00 if year>1994 lab var pphone "no of phones per hh in vill" ****************** ** Sample Cuts *** ****************** * (i) Keep only the migrant villages - note that we should keep all the data from migrant life * histories (mlife94 and mlife00) - see by tab source and then tab source if migvill==1 * KEEP ALL 51 VILLAGES! * keep if migvill==1 * (ii) Delete the observations w age outside 13-41 range. (If we delete the whole data for individuals * with outside range age data, we'd be deleting 17,559 obs, and 630 unique individuals, which amounts * to about 10% of data. Instead we only delete the observations, and lose 2514 cases.) drop if age<13 | age>41 * (iii) Keep years 1972-2000 (41 yr olds in 2000 are 13 in 1972 - 239 obs dropped) drop if year<1972 | year>2000 * (iv) Identify the source info lab def src 1 "1: l94" 2 "2: l94&m94" 3 "3:l94&l00" 4 "4:l94&m00" 5 "5:m94&l00" 6 "6:m94&m00" 7 "7:l00&m00" 8 "8:m94" 9 "9:l00" 10 "10:m00" /// 11 "11: l94&m94&l00" 12 "12: l94&m94&m00" 13 "13: l94&l00&m00" gen source = 0 replace source = 1 if l94==1 & m94==. & l00==. & m00==. replace source = 2 if l94==1 & m94==1 & l00==. & m00==. replace source = 3 if l94==1 & m94==. & l00==1 & m00==. replace source = 4 if l94==1 & m94==. & l00==. & m00==1 replace source = 5 if l94==. & m94==1 & l00==1 & m00==. replace source = 6 if l94==. & m94==1 & l00==. & m00==1 replace source = 7 if l94==. & m94==. & l00==1 & m00==1 replace source = 8 if l94==. & m94==1 & l00==. & m00==. replace source = 9 if l94==. & m94==. & l00==1 & m00==. replace source = 10 if l94==. & m94==. & l00==. & m00==1 replace source = 11 if l94==1 & m94==1 & l00==1 & m00==. replace source = 12 if l94==1 & m94==1 & l00==. & m00==1 replace source = 13 if l94==1 & m94==. & l00==1 & m00==1 lab val source src rename p1 place1 save "Temp\comb_lh_hh_vill0.dta", replace