#=========================================================
#Part 2 - code for Chapter 4 "Propensity Score Stratification" of book:
#Leite, W. L. (2017). Practical propensity score methods using R. 
#Thousand Oaks, CA: Sage. 
# 
#this is the code that was used to generate the example results in the book
#As the R software and the R packages used in this example are updated frequently
#some incompatibilities between the current code and new R versions or package versions
#may appear
#Any updates to the code will be posted at:
# http://www.practicalpropensityscore.com


#=========================================================
#this code is for estimating the effect of schools 
#having at least one full-time security guards
#on the frequency of harsh punishment
#using propensity score stratification
#using data from the School Survey on Crime and Safety

#Part 2 - perform propensity score stratification

#load data
load(file="Chapter4_ssocs08_with_propensity_scores.Rdata")


#cut propensity scores into five strata for estimating the ATT

SSOCS.data$subclass <- cut(x=SSOCS.data$pScores,
                              breaks=quantile(SSOCS.data$pScores, 
                              prob = seq(0, 1, 1/5)),include.lowest=T)
#rename the strata labels
levels(SSOCS.data$subclass) <- 1:length(levels(SSOCS.data$subclass))
#examine common support
xtabs(~treat+subclass,SSOCS.data) #table of counts per stratum




#=========================================================================
#evaluate covariate balance within  strata separately for categorical and continuous variables
library(MatchIt)

SSOCS.data2 <- SSOCS.data[,c("SCHID", "treat","percHarsh", "pScores","pScores",
                            "STRATA", "FINALWGT",covariateNames)]


balanceFormula <- paste(covariateNames, collapse="+")
balanceFormula <- formula(paste("treat~",balanceFormula,sep=""))
stratification <- matchit(balanceFormula,distance=SSOCS.data2$pScores, 
                          data = SSOCS.data2, 
                          method = "subclass", sub.by = "treat", subclass=5)
#the code above gives a warning that does not apply
data.stratification <- match.data(stratification)
data.stratification$treat <- factor(data.stratification$treat, levels=c(0,1),
                                   labels=c("Untreated","Treated"))
data.stratification$subclass <- factor(data.stratification$subclass)
xtabs(~treat+subclass, data.stratification)

#diagnose covariate balance
balance.stratification <- summary(stratification, standardize=T)

#obtain standardized mean differences for all strata
strataDifferences <- data.frame(balance.stratification$q.table[,3,])
summaryStrataDifferences <- summary(strataDifferences)
#write.csv(data.frame(summaryStrataDifferences), file="summary_strata_differences.csv")

#obtain the summary of balance after stratification combining all strata
summary(abs(balance.stratification$sum.subclass$"Std. Mean Diff."))
table(abs(balance.stratification$sum.subclass$"Std. Mean Diff.") > 0.1)


#save dataset with strata membership
save(data.stratification, file="Chapter4_ssocs08_with_stratification.Rdata")

#=================================================================================
#Estimating and Pooling Stratum-specific effects

#obtain means for treated and untreated groups by stratum
library(survey)

#define survey design

surveyDesign <- svydesign(ids=~0,strata=~STRATA,weights=~FINALWGT, data=data.stratification)
#add replication weights for 1000 bootstrapped samples to the design object
surveyDesignBoot <- as.svrepdesign(surveyDesign, type=c("bootstrap"),replicates=1000)

#obtain means and standard errors by stratum and treatment group combinations
(subclassMeans <- svyby(formula=~percHarsh, by=~treat+subclass,
                        design=surveyDesignBoot,FUN=svymean, covmat=TRUE))



#obtain the ATE pooling strata-specific ATEs
#the weights are for each group defined by combining stratum and treatment group
#the weights are (stratum size)/(total sample size)
(pooledEffects <- svycontrast(subclassMeans, list(
    ATE=c(-0.5,0.5,-0.18,0.18,-0.13,0.13,-0.10,0.10,-0.09,0.09),
    ATT=c(-0.2,0.2,-0.2,0.2,-0.2,0.2,-0.2,0.2,-0.2,0.2))))