##code for Chapter 9 "Weighting Methods for Time-Varying Treatments" of book:
#Leite, W. L. (2016). Practical propensity score methods using R.
#Thousand Oaks, CA: Sage.
#
#this is the code that was used to generate the example results in the book
#As the R software and the R packages used in this example are updated frequently
#some incompatibilities between the current code and new R versions or package versions
#may appear
#Any updates to the code will be posted at:
# http://www.practicalpropensityscore.com

#this example estimates the effect of self-employment on job satisfaction
#using data from the National Longitudinal Survey of Youth 1979 (NLSY79)

#load the dataset
load("chapter_9_long_data_imputed.Rdata")

#=====================================
#obtain weight with covariate balancing propensity score
library(CBPS)

data.long$time = 1+ (data.long$time - 1994)/2 #for some reason it does not work if time is year


#run the estimation of weights, this takes a while to run
cbps <- CBMSM(formula = selfEmploy ~cumSE+age + female + mngr.m +
                rotter.index + white + degree +
                divorce + experience + married + urban,
              id = data.long$id, #define individual id
              time = data.long$time,#define time variable
              data= data.long,
              twostep=TRUE,
              type = "MSM", #define weights for marginal structural models (time-varying treatments)
              msm.variance = "approx",
              time.vary = F) #define that different coefficients are estimated at each time point


#========================================================
#grand mean center the continuous covariates that will be included in the outcome model
#to preserve interpretation of the effect as the ATE
data.long$cumSE <- with(data.long, cumSE - mean(cumSE))
data.long$experience <- with(data.long, experience - mean(experience))

#create the design object for IPTW weights
require(survey)
design.iptw <- svydesign(ids=~id,nest=F,weights =~finalWeight,data=data.long)

#obtain boostrapping of standard errors
# surveyDesignBoot = as.svrepdesign(design.iptw, type=c("bootstrap"),replicates=1000)

# #obtain the group means and standard errors
# (means.iptw=svyby(formula=~genSatis,by=~selfEmploy,design=design.iptw, FUN=svymean))
# #obtain group mean difference by contrast statement
# (contrast.iptw<- svycontrast(means.iptw, contrasts=c(-1,1)))#not self employed group is contrasted by -1
# (contrast.iptw.t=coef(contrast.iptw)/SE(contrast.iptw))#T statistic is estimated as coefficient over standard error
# (contrast.iptw.p=2*(1-pt(abs(contrast.iptw.t),nrow(data.long)-2)))#appropriate p value is obtained from the student's t distribution
#
# #==================================================

#estimate the treatment effect with weighted least squares regression with
#cluster effect adjustment of standard errors through Taylor series linearization
glm.iptw <- svyglm(genSatis~selfEmploy+cumSE+experience+timeRecoded+
                      selfEmploy:cumSE + selfEmploy:experience+selfEmploy:timeRecoded,design.iptw)
summary(glm.iptw)



#=========================================================
#generalized estimating equations
library(geepack)
gee.iptw <- geeglm(genSatis~selfEmploy+cumSE+experience+timeRecoded+
                       selfEmploy:cumSE + selfEmploy:experience+selfEmploy:timeRecoded,
                   data = data.long, weights = data.long$finalWeight, family = gaussian,
                   id = data.long$id, waves=data.long$time, corstr = "ar1")
summary(gee.iptw)

#save all results
#save(list=ls(), file="Chapter9_all_results.Rdata",compress=T)