# Optimizing matching and analysis combinations for estimating causal effects # K. Ellicott Colson, Kara E. Rudolph, Scott C. Zimmerman, Dana E. Goin, Elizabeth E. Stuart, Mark van der Laan, Jennifer Ahern # Scientific Reports # This script provides edited functions to implement targed minimum loss-based estimation (TMLE) to estimate the effect # of treatment among the treated (ATT) with weights. These functions are sourced from the primary sample code script, # sample_code.R. Please refer to this script (sample_code.R) for sample code demonstrating (I) data generating mechanisms, # (II) matching methods, and (III) analysis procedures implemented in this paper. # Functions here are: .setColnames, .bound, regress, predict.regress, tmle.att2, print.cte, tmle.cte, tmle.nde # Helpers ---------------------------- .setColnames <- function(x.colnames, x.ncols, firstChar){ if(is.null(x.colnames)) { if(x.ncols > 1){ x.colnames <- paste(firstChar,1:x.ncols, sep="") } else { x.colnames <- firstChar } } else { invalid.name <- nchar(x.colnames) == 0 if(any(invalid.name)){ x.colnames[invalid.name] <- paste(".internal",firstChar, which(invalid.name), sep="") } } return(x.colnames) } .bound <- function(x, bounds){ x[x>max(bounds)] <- max(bounds) x[x 1) { #only fluctuate g after Q has been fluctuated. H2 <- (Q.A1-Q.A0 - psi) g.up <- glm(Aa~-1+H2, offset=qlogis(g.Aa), family=binomial) g.eps <- coef(g.up) if (is.na(g.eps)) g.eps <- 0 #Fluctuate g g.Aa <- plogis(qlogis(g.Aa)+g.eps*H2) #bound g away from 0 and 1 so logit(g) can be calculated, and clever covars g.Aa <- .bound(g.Aa, gbound) if (a==1) { g.A1 <- g.Aa g.A0 <- 1-g.A1 } else { g.A0 <- g.Aa g.A1 <- 1-g.A0 } } else { g.eps <- 99 } #Calculate expected outcome under treatment recieved Q.AA <- A*Q.A1 + (1-A)*Q.A0 #Calculate covariates for fluctuation H1.A1 <- (1/gDelta.1)*(g.Aa/g.A1) H1.A0 <- (1/gDelta.1)*(-g.Aa/g.A0) H1 <- A*H1.A1 + (1-A)*H1.A0 Q.up <- suppressWarnings(glm(Y~-1+H1, offset=qlogis(Q.AA), family=binomial)) Q.eps <- coef(Q.up) Q.A1 <- plogis(qlogis(Q.A1)+Q.eps[1]*H1.A1) Q.A0 <- plogis(qlogis(Q.A0)+Q.eps[1]*H1.A0) #bound Q away from 0 and 1 so logit(Q) can be calculated Q.A1 <- .bound(Q.A1, Qbound) Q.A0 <- .bound(Q.A0, Qbound) #Calculate target parameter at current step psi <- mean(Q.A1[A==a] - Q.A0[A==a]) crit <- max(abs(c(Q.eps, g.eps))) if (crit <= tol || iter>=maxiter) { done=TRUE } #If not done and critical value has not improved (probably because of bounding on Q) #then convergence failed #Note that if Q is not bounded, logit(Q)=+/-Inf so convergence fails either way... if(done==FALSE && crit==prev.crit) { done=TRUE fail=TRUE warning("Convergence criterion failed to improve between iterations") } prev.crit <- crit if (verbose) cat("iter:", iter, "eps:", Q.eps, g.eps, "crit:", crit, "psi:", psi, "\n") } } if (fail) { res <- list(psi=NA, var.psi=NA, CI=NA, pvalue=NA, Qstar=NA, gstar=NA ) } if (!fail) { #Calculate final parameter of interest after convergence psi <- mean(Q.A1[A==a] - Q.A0[A==a]) Q.AA <- A*Q.A1 + (1-A)*Q.A0 p.Aa <- mean(Aa) #To simplify calculation of IC Y[Delta==0] <- 0 IC <- ((Delta/gDelta.1)*(A*g.Aa/g.A1 - (1-A)*g.Aa/g.A0)*(Y-Q.AA)+Aa*(Q.A1-Q.A0 - psi))/p.Aa if (verbose) { cat("Mean of the influence curve:", mean(IC), "\n") } var.psi <- var(IC)/length(Y) CI <- psi + c(-1.96, 1.96)*sqrt(var.psi) pvalue <- 2*(1-pnorm(abs(psi/sqrt(var.psi)))) estimand <- paste("E(E(Y|A=1,B)-E(Y|A=0,B)|A=", a, ")", sep="") res <- list(psi=psi, var.psi=var.psi, CI=CI, pvalue=pvalue, IC=IC, estimand=estimand, Qstar=cbind("Q.A0"=Q.A0, "Q.A1"=Q.A1), gstar=g.A1) } class(res) <- c("cte") res } # TMLE NDE ----------------------------------------- tmle.nde <- function(A, WZ, Y, RCT=FALSE, ...) { t <- tmle.cte(A, WZ, Y, a=0, ...) t$estimand <- ifelse(RCT, "Natural direct effect", "Natural direct effect among the untreated") return(t) }