##code for Chapter 7 "Propensity Score Methods for Continuous Treatment Doses" of book: #Leite, W. L. (2017). Practical propensity score methods using R. #Thousand Oaks, CA: Sage. # #this is the code that was used to generate the example results in the book #As the R software and the R packages used in this example are updated frequently #some incompatibilities between the current code and new R versions or package versions #may appear #Any updates to the code will be posted at: # http://www.practicalpropensityscore.com #PART 1 GENERALIZED PROPENSITY SCORES AND DOSE RESPONSE FUNCTION FOR CONTINOUS TREATMENTS #this example estimates the effect of school participation #in the Algebra Nation virtual learning environment #on the mean student scores on Florida's Algebra I End-of-Course (EOC) Assessment. #load data data <- read.csv("Chapter7_data_Algebra_Nation.csv") #standardize continuous covariates #(this is to help interpret covariate balance measures because they will be in standard units) data$numOfStud2014 <- scale(data$numOfStud2014) data$meanScale2012 <- scale(data$meanScale2012) data$lev1Perc2012 <- scale(data$lev1Perc2012) data$lev5Perc2012 <- scale(data$lev5Perc2012) data$perc.free.lunch <- scale(data$perc.free.lunch) data$perc.reduced.lunch <- scale(data$perc.reduced.lunch) #convert binary variables to factors data$SeniorHigh <- factor(data$SeniorHigh) data$middleHigh <- factor(data$middleHigh) #breakdown locale into size and type variables data$locationSize <- with(data, ifelse(Locale.=="City: Large" | Locale.=="Suburb: Large", "Large", ifelse( Locale.=="City: Midsize"| Locale.=="Suburb: Midsize", "Midsize", "Small"))) data$locationSize <- factor(data$locationSize) data$locationRural <- with(data, ifelse(Locale.=="Rural: Distant" | Locale.=="Rural: Fringe" | Locale.=="Rural: Remote", "Rural", "Urban")) data$locationRural <- factor(data$locationRural) #plot the treatment measure, which is total logins per examinee #and the log of total logins per examinee tiff("Chapter7_figure7-1.tif", res=600, compression = "lzw", height=6, width=15, units="in") with(data, hist(loginsPerExaminee, density = 10, angle = 45, main="", xlim=c(-10,26), ylim=c(0,300), xlab="Shaded = Treatment doses | Gray = Log Treatment Doses ") ) with(data, hist(logLoginsPerExaminee, col=gray(0.4,0.25), xlim=c(-10,26), ylim=c(0,250),add=T) ) dev.off() #===================================================== #define covariates covariateNames <- c("Charter", #dummy indicator of charter school "Magnet.", #dummy indicator of magnet school "Title.I.School.", #dummy indicator of title 1 school "locationRural", #rural location "locationSize", #size of location (large, midsize, small) "Students.", #total number of students in 2012 "SeniorHigh" , #dummy indicator of whether it is a senior high school (the reference group is high school "numOfStud2014", #number of test takers in 2014 "meanScale2012", #mean scaled scores in 2012 "lev1Perc2012",#percent achieving level 1 in 2012 "lev5Perc2012", #percent achieving level 5 in 2012 "perc.free.lunch", #percent free lunch in 2012 "perc.reduced.lunch") #percent reduced lunch in 2012 #define model formulaDose <- formula("logLoginsPerExaminee~Charter+Magnet.+ Title.I.School.+locationRural+locationSize+Students.+ SeniorHigh+numOfStud2014+meanScale2012+lev1Perc2012+ lev5Perc2012+perc.free.lunch+perc.reduced.lunch") #fit regression model for treatment doses modelDoses <- lm(formula=formulaDose, data=data) #estimate generalized propensity score #other densities could have been used instead of dnorm, for example: #dpois for density of poisson, dt for density of t distribution data$GPS <- dnorm(data$logLoginsPerExaminee, mean=modelDoses$fitted, sd=sd(modelDoses$residuals)) #====================================================== #evaluate covariate balance using strata of the GPS data$strataGPS <- with(data,cut(GPS, include.lowest = T,labels=1:5, breaks = quantile(GPS,probs=seq(0,1,0.2)))) balanceTable <- data.frame() #storage #loop through variables for (var in 1:length(covariateNames)) { #formula with treatment dose as a function of covariate balanceFormula <- paste("logLoginsPerExaminee~strataGPS+",covariateNames[var],sep="") #regress dose on covariate without weights maxEff <- max(abs(coef(lm(balanceFormula,data))[-(1:5)])) balanceTable <- rbind(balanceTable,c(var,maxEff)) } #close loop #put variable names on table names(balanceTable) <- c("variable","coef") balanceTable$variable <- covariateNames #standardize coefficients with respect to sd of outcome balanceTable$coef <- balanceTable$coef/sd(data$logLoginsPerExaminee) #fit the outcome model library(survey) designAN <- svydesign(id=~1,weights=~1,data =data) modelOutcome <- svyglm(formula="meanScale2014~logLoginsPerExaminee + I(logLoginsPerExaminee^2) + GPS + I(GPS^2) + logLoginsPerExaminee:GPS", design=designAN) summary(modelOutcome) #Estimate the treatment effect at percentiles of GPS all.effects <- data.frame()#storage #loop through percentiles of dosage from 1% to 99% for (dose in quantile(data$logLoginsPerExaminee,probs=seq(0.01,1,0.01)) ) { #predict outcome given all the GPS for a fixed value of dosage effects <- predict(modelOutcome,type="response", vcov=T, newdata=data.frame(logLoginsPerExaminee=dose, GPS=data$GPS) ) effect <- svycontrast(effects,rep(1/nrow(data),nrow(data))) all.effects <- rbind(all.effects,data.frame(effect)) #accumulate results } #close loop #create a dataset of percentile of doses and responses doseResponses <- data.frame(percentile = seq(1,100,1), logLoginsPerExaminee=quantile(data$logLoginsPerExaminee,probs=seq(0.01,1,0.01)), all.effects) names(doseResponses)[3:4] <- c("meanScale2014", "SE") #calculate confidence intervals doseResponses$lowerCL <- with(doseResponses, meanScale2014 - 1.96*SE) doseResponses$upperCL <- with(doseResponses, meanScale2014 + 1.96*SE) #save table with a selection of percentiles write.csv(doseResponses[seq(10,100,10),], file="Table71.csv",row.names=F) #plot dose-response function tiff("Chapter7_figure7-2.tif", res=600, compression = "lzw", height=6, width=15, units="in") with(doseResponses, plot(logLoginsPerExaminee,meanScale2014, main="", type="b", ylim=c(380,420), xlab="Log logins per examinee",ylab="Mean Algebra I 2014") ) with(doseResponses, lines(logLoginsPerExaminee,lowerCL, lty = 'dashed')) with(doseResponses, lines(logLoginsPerExaminee,upperCL, lty = 'dashed')) dev.off() #save data with GPS included save(data, file = "Chapter7_data_Algebra_Nation_with_GPS.Rdata", compress=T)