#========================================================= #Part 2 - code for Chapter 4 "Propensity Score Stratification" of book: #Leite, W. L. (2017). Practical propensity score methods using R. #Thousand Oaks, CA: Sage. # #this is the code that was used to generate the example results in the book #As the R software and the R packages used in this example are updated frequently #some incompatibilities between the current code and new R versions or package versions #may appear #Any updates to the code will be posted at: # http://www.practicalpropensityscore.com #========================================================= #this code is for estimating the effect of schools #having at least one full-time security guards #on the frequency of harsh punishment #using propensity score stratification #using data from the School Survey on Crime and Safety #Part 2 - perform propensity score stratification #load data load(file="Chapter4_ssocs08_with_propensity_scores.Rdata") #cut propensity scores into five strata for estimating the ATT SSOCS.data$subclass <- cut(x=SSOCS.data$pScores, breaks=quantile(SSOCS.data$pScores, prob = seq(0, 1, 1/5)),include.lowest=T) #rename the strata labels levels(SSOCS.data$subclass) <- 1:length(levels(SSOCS.data$subclass)) #examine common support xtabs(~treat+subclass,SSOCS.data) #table of counts per stratum #========================================================================= #evaluate covariate balance within strata separately for categorical and continuous variables library(MatchIt) SSOCS.data2 <- SSOCS.data[,c("SCHID", "treat","percHarsh", "pScores","pScores", "STRATA", "FINALWGT",covariateNames)] balanceFormula <- paste(covariateNames, collapse="+") balanceFormula <- formula(paste("treat~",balanceFormula,sep="")) stratification <- matchit(balanceFormula,distance=SSOCS.data2$pScores, data = SSOCS.data2, method = "subclass", sub.by = "treat", subclass=5) #the code above gives a warning that does not apply data.stratification <- match.data(stratification) data.stratification$treat <- factor(data.stratification$treat, levels=c(0,1), labels=c("Untreated","Treated")) data.stratification$subclass <- factor(data.stratification$subclass) xtabs(~treat+subclass, data.stratification) #diagnose covariate balance balance.stratification <- summary(stratification, standardize=T) #obtain standardized mean differences for all strata strataDifferences <- data.frame(balance.stratification$q.table[,3,]) summaryStrataDifferences <- summary(strataDifferences) #write.csv(data.frame(summaryStrataDifferences), file="summary_strata_differences.csv") #obtain the summary of balance after stratification combining all strata summary(abs(balance.stratification$sum.subclass$"Std. Mean Diff.")) table(abs(balance.stratification$sum.subclass$"Std. Mean Diff.") > 0.1) #save dataset with strata membership save(data.stratification, file="Chapter4_ssocs08_with_stratification.Rdata") #================================================================================= #Estimating and Pooling Stratum-specific effects #obtain means for treated and untreated groups by stratum library(survey) #define survey design surveyDesign <- svydesign(ids=~0,strata=~STRATA,weights=~FINALWGT, data=data.stratification) #add replication weights for 1000 bootstrapped samples to the design object surveyDesignBoot <- as.svrepdesign(surveyDesign, type=c("bootstrap"),replicates=1000) #obtain means and standard errors by stratum and treatment group combinations (subclassMeans <- svyby(formula=~percHarsh, by=~treat+subclass, design=surveyDesignBoot,FUN=svymean, covmat=TRUE)) #obtain the ATE pooling strata-specific ATEs #the weights are for each group defined by combining stratum and treatment group #the weights are (stratum size)/(total sample size) (pooledEffects <- svycontrast(subclassMeans, list( ATE=c(-0.5,0.5,-0.18,0.18,-0.13,0.13,-0.10,0.10,-0.09,0.09), ATT=c(-0.2,0.2,-0.2,0.2,-0.2,0.2,-0.2,0.2,-0.2,0.2))))