#========================================================= #Part 3 - code for Chapter 4 "Propensity Score Stratification" of book: #Leite, W. L. (2017). Practical propensity score methods using R. #Thousand Oaks, CA: Sage. # #this is the code that was used to generate the example results in the book #As the R software and the R packages used in this example are updated frequently #some incompatibilities between the current code and new R versions or package versions #may appear #Any updates to the code will be posted at: # http://www.practicalpropensityscore.com #========================================================= #this code is for estimating the effect of schools #having at least one full-time security guards #on the frequency of harsh punishment #using propensity score stratification #using data from the School Survey on Crime and Safety #=============================================================================== #Part 3 - marginal mean weights through stratification for the ATE #obtain subclass for the ATE #load data load(file="Chapter4_ssocs08_with_propensity_scores.Rdata") load(file="Chapter4_ssocs08_with_stratification.Rdata") #obtain the number of people per subclass by treatment combination subclassbyTreat <- data.frame(xtabs(~treat+subclass,data.stratification)) names(subclassbyTreat)<-c("treat","subclass","N.zs") print(subclassbyTreat) #obtain the number of people per subclass table.subclass<- data.frame(xtabs(~subclass,data.stratification)) names(table.subclass)<-c("subclass","N.s") print(table.subclass) #merge tables (subclassbyTreat <- merge(subclassbyTreat,table.subclass)) #define survey design library(survey) surveyDesign <- svydesign(ids=~0,strata=~STRATA,weights=~FINALWGT, data=data.stratification) #obtain estimates of proportion treated and untreated (prop.treat <- svymean(~treat, surveyDesign)) #insert proportion treated into table subclassbyTreat$pr.Z <- ifelse(subclassbyTreat$treat=="Treated",prop.treat[2],prop.treat[1]) #obtain the mmwts for ATE subclassbyTreat$mmwsATE <- with(subclassbyTreat, N.s*pr.Z/N.zs) print(subclassbyTreat) data.stratification <- merge(data.stratification, subclassbyTreat[,c(1,2,6)]) #merge weights with data.stratification xtabs(~mmwsATE+subclass, data.stratification) #combine marginal mean weight and sampling weight, then normalize data.stratification$mmwsATEFinal <- data.stratification$mmwsATE*data.stratification$FINALWGT data.stratification$mmwsATEFinal <- data.stratification$mmwsATEFinal/mean(data.stratification$mmwsATEFinal) #================================================================ #Evaluate covariate balance for the ATE library(twang) #create a numeric treatment indicator, required by twang data.stratification$treat2 <- ifelse(data.stratification$treat == "Treated", 1,0) #balance evaluation balanceTableMMWS <- bal.stat( data=data.stratification, estimand="ATE", w.all=data.stratification$mmwsATEFinal,get.ks=F, vars=covariateNames, treat.var="treat2", sampw=data.stratification$FINALWGT, multinom=F) balanceTableMMWS <- balanceTableMMWS$results[,1:5] balanceTableMMWS$varRatio <- with(balanceTableMMWS, tx.sd^2/ct.sd^2) unbalanced.covariatesATE = list( std.eff.sz = rownames(balanceTableMMWS)[abs(balanceTableMMWS$std.eff.sz)>0.1], varRatio = rownames(balanceTableMMWS)[balanceTableMMWS$varRatio<0.8 | balanceTableMMWS$varRatio>1.2 ]) #examine balance statistics summary(balanceTableMMWS) summaryTableMMWS <- c() for (var in 1:ncol(balanceTableMMWS)) { summaryTableMMWS = rbind(summaryTableMMWS,summary(balanceTableMMWS[,var])) } rownames(summaryTableMMWS) = colnames(balanceTableMMWS) #write.csv(summaryTableMMWS[,c(1,4,6)], file="summary_balance_MMWSATE.csv") #======================================================================= #estimate ATE using marginal mean weighting through stratification to adjust selection bias surveyDesignATE <- svydesign(ids=~0,strata=~STRATA,weights=~mmwsATEFinal, data=data.stratification) surveyDesignATE <- as.svrepdesign(surveyDesignATE, type=c("bootstrap"),replicates=1000) #fit a linear regression model to proportions modelATE <- svyglm(percHarsh~treat, surveyDesignATE, family=gaussian()) summary(modelATE) #fit a logistic model to proportions with quasibinomial family to allow for overdispersion modelATE2 <- svyglm(percHarsh~treat, surveyDesignATE, family=quasibinomial()) summary(modelATE2)