#Part 1 - code for Chapter 4 "Propensity Score Stratification" of book: #Leite, W. L. (2017). Practical propensity score methods using R. #Thousand Oaks, CA: Sage. # #this is the code that was used to generate the example results in the book #As the R software and the R packages used in this example are updated frequently #some incompatibilities between the current code and new R versions or package versions #may appear #Any updates to the code will be posted at: # http://www.practicalpropensityscore.com #========================================================= #this code is for estimating the effect of schools #having at least one full-time security guards #on the frequency of harsh punishment #using propensity score stratification #using data from the School Survey on Crime and Safety #Part 1 estimation of propensity scores #Read dataset load(file="Chapter4_ssocs08.Rdata") #Get predictors of treatment assignment #variable C0116 (metal detectors) was excluded because 97.4% of the schools #do not have students pass through metal detectors and only 1 of the untreated #school have them pass through metal detectors covariateNames <- names(SSOCS.data)[c(5:6,15:27,29,31,33,35:37,39:54,80:85,150:155,162:164,178, 197:200, 417:422)] #convert missing value indicators into binary variables for (var in 252:414) { SSOCS.data[,var] <- ifelse(SSOCS.data[,var] == 0, 0, 1) } #obtain proportions of missing data for variables of interest missingIndicatorNames <- paste("I", covariateNames, sep="") missingIndicatorNames <- missingIndicatorNames[-c(53:57)] #remove composite variables propMissing = apply(SSOCS.data[,missingIndicatorNames],2,mean) missingIndicatorNames <- missingIndicatorNames[propMissing > 0.05] #obtain imputation flags for variables of interest #check whether any dummy missing indicators are redundant because #variables have missing values for the same cases missingCorrelations <- cor(SSOCS.data[,missingIndicatorNames]) diag(missingCorrelations) <- 0 maxCorrelations <- apply(missingCorrelations,2,max) dummyNAnames <- names(maxCorrelations)[maxCorrelations < 0.8] maxCorrelationsHigh <- maxCorrelations[!duplicated(maxCorrelations)] dummyNAnames <- c(dummyNAnames,names(maxCorrelationsHigh)[maxCorrelationsHigh>=0.8]) #create propensity score estimation formula covariateNames <- c(covariateNames,dummyNAnames) psFormula <- paste(covariateNames, collapse="+") psFormula <- formula(paste("treat~",psFormula, sep="")) print(psFormula) #================================= #estimate propensity scores with logistic regression library(survey) surveyDesign <- svydesign(ids=~0,strata=~STRATA,weights=~FINALWGT, data=SSOCS.data) psModel <- svyglm(psFormula, surveyDesign, family=quasibinomial) #add propensity scores to dataset SSOCS.data$pScores <- fitted(psModel) #evaluate common support with plots tiff("Chapter4_figure4-1.tif", res=600, compression = "lzw", height=6, width=15, units="in") hist(SSOCS.data$pScores[SSOCS.data$treat==0], density = 10, angle = 45, main="Propensity Scores", xlab="Shaded = Untreated | Gray = Treated") hist(SSOCS.data$pScores[SSOCS.data$treat==1], col=gray(0.4,0.25), add=T) dev.off() #======================================== library(lattice) tiff("Chapter4_figure4-2.tif", res=600, compression = "lzw", height=6, width=15, units="in") bwplot( pScores~treat, data = SSOCS.data, ylab = "Propensity Scores", auto.key = TRUE) dev.off() #obtain descriptive statistics summary(SSOCS.data$pScores[SSOCS.data$treat==0]) summary(SSOCS.data$pScores[SSOCS.data$treat==1]) # #===================================================== #Estimate propensity scores with GBM library(twang) #change the treatment indicator from factor to numeric, which is required by GBM set.seed(2016) SSOCS.data$treat <- as.numeric(as.character(SSOCS.data$treat)) myGBM <- ps(psFormula, data = SSOCS.data, n.trees=10000, interaction.depth=4, shrinkage=0.01, stop.method=c("es.max"), estimand = "ATT", verbose=TRUE, sampw = SSOCS.data$FINALWGT) #obtain information about balance achieved and iteration numbers summary(myGBM) #obtain tiff image tiff("Chapter4_GBM_convergence.tif", res=600, compression = "lzw", height=6, width=15, units="in") plot(myGBM,type="b", color=F) dev.off() #extract estimated propensity scores from object pScoresGBM <- myGBM$ps names(pScoresGBM) = "pScoresGBM" SSOCS.data$pScoresGBM <- pScoresGBM #save dataset with propensity scores and the vector of covariate names save(list=c("SSOCS.data", "covariateNames"), file="Chapter4_ssocs08_with_propensity_scores.Rdata")