#========================================================= #Part 4 - code for Chapter 4 "Propensity Score Stratification" of book: #Leite, W. L. (2017). Practical propensity score methods using R. #Thousand Oaks, CA: Sage. # #this is the code that was used to generate the example results in the book #As the R software and the R packages used in this example are updated frequently #some incompatibilities between the current code and new R versions or package versions #may appear #Any updates to the code will be posted at: # http://www.practicalpropensityscore.com #========================================================= #this code is for estimating the effect of schools #having at least one full-time security guards #on the frequency of harsh punishment #using propensity score stratification #using data from the School Survey on Crime and Safety #=============================================================================== #Part 4 - marginal mean weights through stratification for the ATT #load data load(file="Chapter4_ssocs08_with_propensity_scores.Rdata") load(file="Chapter4_ssocs08_with_stratification.Rdata") #1. obtain the number of people per subclass for the treated subclassTreat <- data.frame(table(data.stratification$subclass[data.stratification$treat=="Treated"])) names(subclassTreat) <- c("subclass","N.1s") print(subclassTreat) #obtain the number of people per subclass for the untreated subclassUntreat <- data.frame(table(data.stratification$subclass[data.stratification$treat=="Untreated"])) names(subclassUntreat) <- c("subclass","N.0s") print(subclassUntreat) #merge treated and untreated (table.subclass <- merge(subclassTreat,subclassUntreat)) #merge table with the data data.stratification <- merge(data.stratification,table.subclass) #define survey design library(survey) surveyDesign <- svydesign(ids=~0,strata=~STRATA,weights=~FINALWGT, data=data.stratification) #obtain proportions treated and untreated (prop.treat <- svymean(~treat, surveyDesign)) #calculate the ATT MMWS data.stratification$mmwsATT <- with(data.stratification, ifelse(treat=="Treated", 1, (N.1s*prop.treat[1])/(N.0s*prop.treat[2]))) xtabs(~mmwsATT+subclass, data.stratification) #combine marginal mean weight and sampling weight, then normalize data.stratification$mmwsATTFinal <- data.stratification$mmwsATT*data.stratification$FINALWGT data.stratification$mmwsATTFinal <- data.stratification$mmwsATTFinal/mean(data.stratification$mmwsATTFinal) #============================================= #evaluate covariate balance for the ATT library(twang) #create a numeric treatment indicator, required by twang data.stratification$treat2 <- ifelse(data.stratification$treat == "Treated", 1,0) #balance evaluation balanceTableMMWSATT <- bal.stat( data=data.stratification, estimand="ATT", w.all=data.stratification$mmwsATTFinal,get.ks=F, vars=covariateNames, treat.var="treat2", sampw=data.stratification$FINALWGT, multinom=F) balanceTableMMWSATT <- balanceTableMMWSATT$results[,1:5] balanceTableMMWSATT$varRatio <- with(balanceTableMMWSATT, tx.sd^2/ct.sd^2) #examine balance statistics summary(balanceTableMMWSATT) summaryTableMMWSATT = c() for (var in 1:ncol(balanceTableMMWSATT)) { summaryTableMMWSATT = rbind(summaryTableMMWSATT,summary(balanceTableMMWSATT[,var])) } rownames(summaryTableMMWSATT) = colnames(balanceTableMMWSATT) write.csv(summaryTableMMWSATT[,c(1,4,6)], file="summary_balance_MMWSATT.csv") unbalanced.covariatesATT <- list( std.eff.sz = rownames(balanceTableMMWSATT)[abs(balanceTableMMWSATT$std.eff.sz)>0.1], varRatio = rownames(balanceTableMMWSATT)[balanceTableMMWSATT$varRatio<0.8 | balanceTableMMWSATT$varRatio>1.2 ]) #======================================================================= #estimate ATT using marginal mean weighting through stratification to adjust selection bias surveyDesignATT <- svydesign(ids=~0,strata=~STRATA,weights=~mmwsATTFinal, data=data.stratification) surveyDesignATT <- as.svrepdesign(surveyDesignATT, type=c("bootstrap"),replicates=1000) #fit a linear regression model to proportions modelATT1 <- svyglm(percHarsh~treat, surveyDesignATT, family=gaussian()) summary(modelATT1) #fit a logistic model to proportions with quasibinomial family to allow for overdispersion modelATT2 <- svyglm(percHarsh~treat, surveyDesignATT, family=quasibinomial()) summary(modelATT2) #center continuous predictors on the mean of the treated surveyDesignATT <- update(surveyDesignATT, C0540 = C0540 - mean(C0540[treat=="Treated"]), C0568 = C0568 - mean(C0568[treat=="Treated"]), C0544 = C0544 - mean(C0544[treat=="Treated"]), C0558 = C0558 - mean(C0558[treat=="Treated"])) modelATTDR <- svyglm(percHarsh~(treat+subclass+FR_SIZE+C0540+C0158+C0166+C0568+C0540+C0544+C0558)^2, surveyDesignATT, family=gaussian()) summary(modelATTDR)