#=========================================================
#Part 4 - code for Chapter 4 "Propensity Score Stratification" of book:
#Leite, W. L. (2017). Practical propensity score methods using R. 
#Thousand Oaks, CA: Sage. 
# 
#this is the code that was used to generate the example results in the book
#As the R software and the R packages used in this example are updated frequently
#some incompatibilities between the current code and new R versions or package versions
#may appear
#Any updates to the code will be posted at:
# http://www.practicalpropensityscore.com


#=========================================================
#this code is for estimating the effect of schools 
#having at least one full-time security guards
#on the frequency of harsh punishment
#using propensity score stratification
#using data from the School Survey on Crime and Safety



#===============================================================================
#Part 4 - marginal mean weights through stratification for the ATT


#load data
load(file="Chapter4_ssocs08_with_propensity_scores.Rdata")
load(file="Chapter4_ssocs08_with_stratification.Rdata")


#1. obtain the number of people per subclass for the treated
subclassTreat <- data.frame(table(data.stratification$subclass[data.stratification$treat=="Treated"]))
names(subclassTreat) <- c("subclass","N.1s")
print(subclassTreat)
#obtain the number of people per subclass for the untreated
subclassUntreat <-  data.frame(table(data.stratification$subclass[data.stratification$treat=="Untreated"]))
names(subclassUntreat) <- c("subclass","N.0s")
print(subclassUntreat)
#merge treated and untreated
(table.subclass <- merge(subclassTreat,subclassUntreat))

#merge table with the data
data.stratification <- merge(data.stratification,table.subclass)

#define survey design
library(survey)
surveyDesign <- svydesign(ids=~0,strata=~STRATA,weights=~FINALWGT, data=data.stratification)

#obtain proportions treated and untreated
(prop.treat <- svymean(~treat, surveyDesign))


#calculate the ATT MMWS
data.stratification$mmwsATT <- with(data.stratification,
        ifelse(treat=="Treated", 1, (N.1s*prop.treat[1])/(N.0s*prop.treat[2])))
xtabs(~mmwsATT+subclass, data.stratification)



#combine marginal mean weight and sampling weight, then normalize
data.stratification$mmwsATTFinal <- data.stratification$mmwsATT*data.stratification$FINALWGT 
data.stratification$mmwsATTFinal <- data.stratification$mmwsATTFinal/mean(data.stratification$mmwsATTFinal)


#=============================================
#evaluate covariate balance for the ATT
library(twang)
#create a numeric treatment indicator, required by twang
data.stratification$treat2 <- ifelse(data.stratification$treat == "Treated", 1,0) 

#balance evaluation 
balanceTableMMWSATT <- bal.stat(
    data=data.stratification, 
    estimand="ATT",  w.all=data.stratification$mmwsATTFinal,get.ks=F, vars=covariateNames, 
    treat.var="treat2", sampw=data.stratification$FINALWGT, multinom=F)
balanceTableMMWSATT <- balanceTableMMWSATT$results[,1:5]
balanceTableMMWSATT$varRatio <- with(balanceTableMMWSATT, tx.sd^2/ct.sd^2) 


#examine balance statistics
summary(balanceTableMMWSATT)

summaryTableMMWSATT = c()

for (var in 1:ncol(balanceTableMMWSATT)) {
    summaryTableMMWSATT = rbind(summaryTableMMWSATT,summary(balanceTableMMWSATT[,var])) }
rownames(summaryTableMMWSATT) = colnames(balanceTableMMWSATT)
write.csv(summaryTableMMWSATT[,c(1,4,6)], file="summary_balance_MMWSATT.csv")

unbalanced.covariatesATT <- list(
    std.eff.sz = rownames(balanceTableMMWSATT)[abs(balanceTableMMWSATT$std.eff.sz)>0.1],
    varRatio = rownames(balanceTableMMWSATT)[balanceTableMMWSATT$varRatio<0.8 |
                                              balanceTableMMWSATT$varRatio>1.2 ])

#=======================================================================

#estimate ATT using marginal mean weighting through stratification to adjust selection bias
surveyDesignATT <- svydesign(ids=~0,strata=~STRATA,weights=~mmwsATTFinal, data=data.stratification)
surveyDesignATT <- as.svrepdesign(surveyDesignATT, type=c("bootstrap"),replicates=1000)

#fit a linear regression model to proportions
modelATT1 <- svyglm(percHarsh~treat, surveyDesignATT, family=gaussian())
summary(modelATT1)

#fit a logistic model  to proportions with quasibinomial family to allow for overdispersion
modelATT2 <- svyglm(percHarsh~treat, surveyDesignATT, family=quasibinomial())
summary(modelATT2)

#center continuous predictors on the mean of the treated
surveyDesignATT <- update(surveyDesignATT, C0540 = C0540 - mean(C0540[treat=="Treated"]),
                         C0568 = C0568 - mean(C0568[treat=="Treated"]),
                         C0544 = C0544 - mean(C0544[treat=="Treated"]),
                         C0558 = C0558 - mean(C0558[treat=="Treated"]))
modelATTDR <- svyglm(percHarsh~(treat+subclass+FR_SIZE+C0540+C0158+C0166+C0568+C0540+C0544+C0558)^2, 
                    surveyDesignATT, family=gaussian())
summary(modelATTDR)