********************************** * * * California Body Donors Project * * * ********************************** * created: June 11, 2012 * last update: Sep 10, 2013 5pm * start with the original data set cd "/Users/filizgarip/Desktop/Revisions Sep 13 for SSM" use "UC_DDL_Registrants.dta", clear codebook registeration_date gen reg_yr = substr(registeration_date, 1,4) gen reg_mo = substr(registeration_date, 6,2) gen reg_da = substr(registeration_date, 9,2) destring reg_yr, replace destring reg_mo, replace destring reg_da, replace gen reg_date = mdy(reg_mo, reg_da, reg_yr) gen conversion_date = mdy(7,1,2009) if campus_code=="UCD" replace conversion_date = mdy(1,22,2009) if campus_code=="UCI" replace conversion_date = mdy(6,22,2009) if campus_code=="UCLA" replace conversion_date = mdy(3,4,2009) if campus_code=="UCSF" gen main_conversion_date = mdy(9,3,2008) format reg_date %d format conversion_date %d format main_conversion_date %d * age replace birth_year = . if birth_year<1900 & birth_year>1995 gen age = reg_yr - birth_year replace age = . if age<18 | age >100 * education level ren educationlevel_id ed codebook ed gen lths = (ed<15) if ed~=. gen hs = (ed==15 | ed==16) if ed~=. gen somecol = (ed==17 | ed==18) if ed~=. gen col = (ed==19) if ed~=. gen adv = (ed>19) if ed~=. * sex gen sex = 0 if gender=="F" replace sex = 1 if gender=="M" * marital status recode marital_status_id (2 5=1) (1 3 4=0), gen(married) recode marital_status_id (3 4=1) (1 2 5=0), gen(separated) rename marital_status_id marital_status * race gen race=race_id recode race 1=1 2=2 3/7=3 8/18=4 19=5 /*1=White; 2=African American; 3=Native and Islander; 4=Asian American; 5=other*/ gen hispanic= hispanic_id recode hispanic 1=0 2/7=1 /*1=yes*/ label define dummy 1 "yes" 0 "no" label value hispanic dummy replace race=6 if hispanic==1 recode race 5=6 6=5 label define race 1 "White" 2 "Black" 3"Native" 4 "Asian" 5 "Hispanic" 6 "Others" label value race race tab race, gen(r) ren r1 white ren r2 black ren r5 hisp gen otr = (r3==1 | r4==1 | r6==1) if race~=. gen hispotr = (hisp==1 | otr==1) * migrant status gen bplace = "US" if birth_state_id~=. | birth_country=="USA" replace bplace = "nonUS" if birth_country~="" & birth_country~="USA" gen father_bplace = "US" if father_birth_state_id~=. | father_birth_country=="USA" replace father_bplace = "nonUS" if father_birth_country~="" & father_birth_country~="USA" gen mother_bplace = "US" if mother_birth_state_id~=. | mother_birth_country=="USA" replace mother_bplace = "nonUS" if mother_birth_country~="" & mother_birth_country~="USA" gen mig = 0 if bplace~="" replace mig = 1 if bplace=="nonUS" gen migprt = 0 if father_bplace~="" | mother_bplace~="" replace migprt = 1 if father_bplace=="nonUS" | mother_bplace=="nonUS" * geographic location gen davis = campus_code=="UCD" gen irvine = campus_code=="UCI" gen la = campus_code=="UCLA" gen sf = campus_code=="UCSF" gen irvine_la = irvine==1 | la==1 gen sf_davis = sf==1 | davis==1 * us_armed forces gen in_army = 1 if us_armed_forces=="Yes" replace in_army = 0 if us_armed_forces=="No" replace in_army = . if us_armed_forces=="Unknown" | us_armed_forces=="" * Keep observations after pre-need conversion for each campus (when * registration dates were entered accurately) keep if reg_date>main_conversion_date // all dates after main conversion date are accurate // with the exception of pre-need conversion dates drop if reg_date==conversion_date // dropped here * Compare all registrants to actual donors (deceased registrants) gen died = (death_year~="") ttest age, by(died) ttest sex, by(died) ttest married, by(died) ttest separated, by(died) ttest white, by(died) ttest black, by(died) ttest hispotr, by(died) ttest hs, by(died) ttest somecol, by(died) ttest col, by(died) ttest adv, by(died) ttest mig, by(died) ttest migprt, by(died) ttest davis, by(died) ttest irvine, by(died) ttest la, by(died) ttest sf, by(died) ttest irvine_la, by(died) ttest sf_davis, by(died) * Keep only individuals who have died -- the same form was filled again * So, we have few missing observations keep if death_year~="" keep zipcode campus_code death_year us_armed_forces reg_yr reg_mo reg_da age sex married separated white black hisp otr hispotr lths hs somecol col adv mig migprt ed /// race marital_status campus_code davis irvine la sf irvine_la sf_davis occupation in_army save calif_data_temp.dta, replace * Outsheet to excel to code occupation manually. preserve drop if age==. | married==. | white==. | hs==. | mig==. | migprt==. | ed==. sort age sex married separated white black hisp hs col mig migprt gen id = _n save calif_data_no_occ.dta, replace outsheet id age hs somecol col adv occupation in_army using calif_data_occ.csv, delim(",") replace restore * Insheet from excel clear insheet using calif_data_occ_manually_coded.csv keep id occupation occ_cat1 occ_cat2 sort id save calif_data_occ_manually_coded.dta, replace * Merge the occupation data clear use calif_data_no_occ.dta sort id merge 1:1 id using calif_data_occ_manually_coded.dta, keepusing(occ_cat1 occ_cat2) drop id _merge save calif_data_occ1.dta, replace * Per reviewer's request, we need to see if the results are robust to excluding * parent's migration status (and consequently to keeping the observations with missing * values on that variable in). We need to code the occupation categories manually * for that tiny sample. use calif_data_temp.dta, replace drop if age==. | married==. | white==. | hs==. | mig==. | ed==. keep if migprt==. gen id = _n save calif_data_no_occ2.dta, replace outsheet id occupation using calif_data_occ2.csv, delim(",") replace * Insheet the manually-coded data from excel clear insheet using calif_data_occ2_manually_coded.csv keep id occupation occ_cat1 occ_cat2 sort id save calif_data_occ2_manually_coded.dta, replace use calif_data_no_occ2.dta, clear sort id merge 1:1 id using calif_data_occ2_manually_coded.dta, keepusing(occ_cat1 occ_cat2) drop id _merge save calif_data_occ1_migprt_missing.dta, replace * Append to the larger data set (note the cases with missing migprt aren't * in teh occupation data) clear use calif_data_occ1.dta append using calif_data_occ1_migprt_missing.dta save calif_data_occ1_full.dta, replace //contains the observations w missing migprt * Code the occupation categories use calif_data_occ1_full.dta, clear gen prof = (occ_cat1=="p") gen mngr = (occ_cat1=="m") gen clrk = (occ_cat1=="c") gen serv = (occ_cat1=="s") gen wrk = (occ_cat1=="w") gen unemp = (occ_cat1=="st" | occ_cat1=="r" | occ_cat1=="u" | occ_cat1=="h") //unemp includes unemployed, retired, students and housewives. * additional categorization gen md = (occ_cat2=="md") //medical professionals gen tchr = (occ_cat2=="t") //teachers gen othprof = (md==0 & tchr==0 & prof==1) //other professionals // Before dropping the missing observations on occupation, save the original // data set. preserve drop if migprt==. sort age sex married separated white black hisp hs col mig migprt gen id = _n xtile agec = age, nq(2) recode agec 1=0 2=1 save "calif_data_new_orig.dta", replace // Save the original data set only (no occupations, migprt included) outsheet id age agec sex married separated white black hispotr hs somecol col adv mig migprt /// using "calif_data_new_orig.csv", nonames replace outsheet id age agec sex married separated white black hispotr hs somecol col adv mig migprt /// using "calif_data_new_orig.raw", replace restore drop if occ_cat1== "" save "calif_data_occ1_full.dta", replace sort age sex married separated white black hisp hs col mig gen id = _n xtile agec = age, nq(2) recode agec 1=0 2=1 save "calif_data_new_full.dta", replace outsheet id age agec sex married separated white black hispotr hs somecol col adv mig /// prof mngr clrk serv wrk unemp irvine la davis sf using "calif_data_new_full.csv", nonames replace outsheet id age agec sex married separated white black hispotr hs somecol col adv mig /// prof mngr clrk serv wrk unemp irvine la davis sf using "calif_data_new_full.raw", replace // Save the data where the migprt is included (the original analysis) use "calif_data_new_full.dta", replace drop id agec drop if migprt==. sort age sex married separated white black hisp hs col mig migprt gen id = _n xtile agec = age, nq(2) recode agec 1=0 2=1 save "calif_data_new.dta", replace outsheet id age agec sex married separated white black hispotr hs somecol col adv mig migprt /// prof mngr clrk serv wrk unemp using "calif_data_new.csv", nonames replace outsheet id age agec sex married separated white black hispotr hs somecol col adv mig migprt /// prof mngr clrk serv wrk unemp using "calif_data_new.raw", replace // Save a data set with alternative categorization of occupations (less refined) gen profmngr = prof==1 | mngr==1 | clrk==1 save calif_data_new_temp.dta, replace outsheet id age agec sex married separated white black hispotr hs somecol col adv mig migprt /// profmngr serv wrk unemp using "calif_data_new_temp.csv", nonames replace outsheet id age agec sex married separated white black hispotr hs somecol col adv mig migprt /// profmngr serv wrk unemp using "calif_data_new_temp.raw", replace ********************************* * Analysis * ********************************* set logtype text clear infile id clid using "cluster.txt" sort id save "cluster_id.dta", replace use calif_data_new, clear sort id merge 1:1 id using cluster_id drop _merge log using "cluster_comparison_new.txt", replace sum age sex married separated white black hispotr hs somecol col adv prof mngr clrk serv wrk unemp mig migprt, sep(25) bys clid: sum age sex married separated white black hispotr hs somecol col adv prof mngr clrk serv wrk unemp mig migprt, sep(25) ttest age, by(clid) ttest sex, by(clid) ttest married, by(clid) ttest separated, by(clid) ttest white, by(clid) ttest black, by(clid) ttest hispotr, by(clid) ttest hs, by(clid) ttest somecol, by(clid) ttest col, by(clid) ttest adv, by(clid) ttest prof, by(clid) ttest mngr, by(clid) ttest clrk, by(clid) ttest serv, by(clid) ttest wrk, by(clid) ttest unemp, by(clid) ttest mig, by(clid) ttest migprt, by(clid) ttest md, by(clid) ttest tchr, by(clid) ttest othprof, by(clid) ttest irvine, by(clid) ttest la, by(clid) ttest davis, by(clid) ttest sf, by(clid) log cl save "calif_data_w_clusters.dta", replace ********************************* * Analysis - FULL (No migprt) * ********************************* set logtype text clear infile id clid using "cluster_full.txt" sort id save "cluster_id_full.dta", replace use calif_data_new_full, clear sort id merge 1:1 id using cluster_id_full drop _merge log using "cluster_comparison_new_full.txt", replace sum age sex married separated white black hispotr hs somecol col adv prof mngr clrk serv wrk unemp mig, sep(25) bys clid: sum age sex married separated white black hispotr hs somecol col adv prof mngr clrk serv wrk unemp mig, sep(25) ttest age, by(clid) ttest sex, by(clid) ttest married, by(clid) ttest separated, by(clid) ttest white, by(clid) ttest black, by(clid) ttest hispotr, by(clid) ttest hs, by(clid) ttest somecol, by(clid) ttest col, by(clid) ttest adv, by(clid) ttest prof, by(clid) ttest mngr, by(clid) ttest clrk, by(clid) ttest serv, by(clid) ttest wrk, by(clid) ttest unemp, by(clid) ttest mig, by(clid) ttest md, by(clid) ttest tchr, by(clid) ttest othprof, by(clid) ttest irvine, by(clid) ttest la, by(clid) ttest davis, by(clid) ttest sf, by(clid) log cl save "calif_data_w_clusters_full.dta", replace ********************************* *Analysis - TEMP (alt cat of occ)* ********************************* set logtype text clear infile id clid using "cluster_temp.txt" sort id save "cluster_id_temp.dta", replace use calif_data_new_temp, clear sort id merge 1:1 id using cluster_id_temp drop _merge log using "cluster_comparison_new_temp.txt", replace sum age sex married separated white black hispotr hs somecol col adv prof mngr clrk serv wrk unemp mig migprt, sep(25) bys clid: sum age sex married separated white black hispotr hs somecol col adv prof mngr clrk serv wrk unemp mig migprt, sep(25) ttest age, by(clid) ttest sex, by(clid) ttest married, by(clid) ttest separated, by(clid) ttest white, by(clid) ttest black, by(clid) ttest hispotr, by(clid) ttest hs, by(clid) ttest somecol, by(clid) ttest col, by(clid) ttest adv, by(clid) ttest profmngr, by(clid) ttest serv, by(clid) ttest wrk, by(clid) ttest unemp, by(clid) ttest mig, by(clid) ttest migprt, by(clid) ttest md, by(clid) ttest tchr, by(clid) ttest othprof, by(clid) ttest irvine, by(clid) ttest la, by(clid) ttest davis, by(clid) ttest sf, by(clid) log cl save "calif_data_w_clusters_temp.dta", replace ********************************* * Analysis - ORIG * ********************************* set logtype text clear infile id clid using "cluster_orig.txt" sort id save "cluster_id_orig.dta", replace use calif_data_new_orig, clear sort id merge 1:1 id using cluster_id_orig drop _merge log using "cluster_comparison_new_orig.txt", replace sum age sex married separated white black hispotr hs somecol col adv prof mngr clrk serv wrk unemp mig migprt, sep(25) bys clid: sum age sex married separated white black hispotr hs somecol col adv prof mngr clrk serv wrk unemp mig migprt, sep(25) ttest age, by(clid) ttest sex, by(clid) ttest married, by(clid) ttest separated, by(clid) ttest white, by(clid) ttest black, by(clid) ttest hispotr, by(clid) ttest hs, by(clid) ttest somecol, by(clid) ttest col, by(clid) ttest adv, by(clid) ttest prof, by(clid) ttest mngr, by(clid) ttest clrk, by(clid) ttest serv, by(clid) ttest wrk, by(clid) ttest unemp, by(clid) ttest mig, by(clid) ttest migprt, by(clid) ttest md, by(clid) ttest tchr, by(clid) ttest othprof, by(clid) ttest irvine, by(clid) ttest la, by(clid) ttest davis, by(clid) ttest sf, by(clid) log cl save "calif_data_w_clusters_orig.dta", replace