###### Author: Ting Ye ###### ###### Last modified: Jan 1, 2023 ###### # Use null variance # # Add estimation # ######################### # Added Jan 1, 2023 # ######################### # covariate adjusted hazard ratio estimation proposed in Ye, Yi, Shao (2022) covariate_adjusted_logrank_estimation<-function(data.simu,p_trt){ n<-dim(data.simu)[1] data.simu$fz<-ind_to_factor(data.simu) data.rev<-data.sort(data.simu) T.seq<-data.rev$t T.rep<-c(0,T.seq)[1:n] T.diff<-T.seq-T.rep same.ind<-which(T.diff==0) n_col<-dim(data.rev)[2] for(ind in same.ind){ data.rev[ind,(n_col-2):n_col]<-data.rev[ind-1,(n_col-2):n_col] } score<-function(theta){ S.alt<-sum(data.rev$delta*(data.rev$I1-exp(theta)*data.rev$Y1/(exp(theta)*data.rev$Y1+data.rev$Y0)))/n return(S.alt) } theta_L<-uniroot(score,interval=c(-10,10))$root # get the unadjusted estimator x.model<-as.matrix(data.rev[,grepl("model",names(data.rev)),drop=FALSE]) #only those has model if(length(levels(data.rev$fz))==1){ x.mat<-model.matrix(~x.model,data=data.rev)[,-1,drop=FALSE] }else{ x.mat<-model.matrix(~factor(fz)+x.model,data=data.rev)[,-1,drop=FALSE] } x.centered<-sweep(x.mat, 2, colMeans(x.mat)) # calculating beta0 and beta1 data.rev$O.hat1<-data.rev$delta*data.rev$Y0/(exp(theta_L)*data.rev$Y1+data.rev$Y0) - exp(theta_L)*cumsum(data.rev$delta*data.rev$Y0/(exp(theta_L)*data.rev$Y1+data.rev$Y0)^2) data.rev$O.hat0<-exp(theta_L)*data.rev$delta*data.rev$Y1/(exp(theta_L)*data.rev$Y1+data.rev$Y0) - exp(theta_L)*cumsum(data.rev$delta*data.rev$Y1/(exp(theta_L)*data.rev$Y1+data.rev$Y0)^2) fit1.Ohat<-lm(data.rev$O.hat1[data.rev$I1==1]~x.mat[data.rev$I1==1,]) fit0.Ohat<-lm(data.rev$O.hat0[data.rev$I1==0]~x.mat[data.rev$I1==0,]) beta1.Ohat<-fit1.Ohat$coefficients[-1] beta0.Ohat<-fit0.Ohat$coefficients[-1] ind.na<-which(is.na(beta1.Ohat)) if(length(ind.na)!=0){ warning("Removing model variables that are linearly dependent with the stratification variables.") x.mat<-x.mat[,-ind.na,drop=FALSE] x.centered<-x.centered[,-ind.na,drop=FALSE] beta1.Ohat<-beta1.Ohat[-ind.na] beta0.Ohat<-beta0.Ohat[-ind.na] } score_CL<-function(theta){ res<-score(theta)-sum(data.rev$I1*(t((x.centered)%*% beta1.Ohat )) - data.rev$I0*(t((x.centered)%*% beta0.Ohat)))/n return(res) } theta_CL<-uniroot(score_CL,interval=c(-10,10))$root # get the adjusted estimator # Modification for making consistent with current code for testing (theta_L in place of theta_CL) sigma2_L<-exp(theta_L)*sum(data.rev$delta*data.rev$Y0*data.rev$Y1/(exp(theta_L)*data.rev$Y1+data.rev$Y0)^2)/n sigma2_CL<-sigma2_L-p_trt*(1-p_trt)*(beta1.Ohat+beta0.Ohat) %*% var(x.mat) %*% (beta1.Ohat+beta0.Ohat) var_est<-sigma2_CL/sigma2_L^2/n return(list(theta_L=theta_L,se.theta_L=sqrt(1/sigma2_L/n), theta_CL=theta_CL,se.theta_CL=sqrt(var_est))) } covariate_adjusted_stratified_logrank_estimation<-function(data.simu,p_trt){ n<-dim(data.simu)[1] data.simu$fz<-ind_to_factor(data.simu) fz.n<-unique(data.simu$fz) # obtain U_SL score_SL<-function(theta){ S.alt<-0 for(z in fz.n){ data.tmp<-data.sort(data.simu[data.simu$fz==z,]) T.seq<-data.tmp$t T.rep<-c(0,T.seq)[1:length(T.seq)] T.diff<-T.seq-T.rep same.ind<-which(T.diff==0) n_col<-dim(data.tmp)[2] for(ind in same.ind){ data.tmp[ind,(n_col-2):n_col]<-data.tmp[ind-1,(n_col-2):n_col] } S.alt<-S.alt+sum(data.tmp$delta*(data.tmp$I1-exp(theta)*data.tmp$Y1/(exp(theta)*data.tmp$Y1+data.tmp$Y0)))/n } return(S.alt) } theta_SL<-uniroot(score_SL,interval=c(-10,10))$root # get the unadjusted stratified estimator data.rev<-data.sort(data.simu[data.simu$fz==fz.n[1],]) T.seq<-data.rev$t T.rep<-c(0,T.seq)[1:length(T.seq)] T.diff<-T.seq-T.rep same.ind<-which(T.diff==0) n_col<-dim(data.rev)[2] for(ind in same.ind){ data.rev[ind,(n_col-2):n_col]<-data.rev[ind-1,(n_col-2):n_col] } data.rev$O.hat1<-data.rev$delta*data.rev$Y0/(exp(theta_SL)*data.rev$Y1+data.rev$Y0) - exp(theta_SL)*cumsum(data.rev$delta*data.rev$Y0/(exp(theta_SL)*data.rev$Y1+data.rev$Y0)^2) data.rev$O.hat0<-exp(theta_SL)*data.rev$delta*data.rev$Y1/(exp(theta_SL)*data.rev$Y1+data.rev$Y0) - exp(theta_SL)*cumsum(data.rev$delta*data.rev$Y1/(exp(theta_SL)*data.rev$Y1+data.rev$Y0)^2) # calculate stratum-specific O_zij for(z in fz.n[-1]){ data.tmp<-data.sort(data.simu[data.simu$fz==z,]) T.seq<-data.tmp$t T.rep<-c(0,T.seq)[1:length(T.seq)] T.diff<-T.seq-T.rep same.ind<-which(T.diff==0) n_col<-dim(data.tmp)[2] for(ind in same.ind){ data.tmp[ind,(n_col-2):n_col]<-data.tmp[ind-1,(n_col-2):n_col] } data.tmp$O.hat1<- data.tmp$O.hat1<-data.tmp$delta*data.tmp$Y0/(exp(theta_SL)*data.tmp$Y1+data.tmp$Y0) - exp(theta_SL)*cumsum(data.tmp$delta*data.tmp$Y0/(exp(theta_SL)*data.tmp$Y1+data.tmp$Y0)^2) data.tmp$O.hat0<-exp(theta_SL)*data.tmp$delta*data.tmp$Y1/(exp(theta_SL)*data.tmp$Y1+data.tmp$Y0) - exp(theta_SL)*cumsum(data.tmp$delta*data.tmp$Y1/(exp(theta_SL)*data.tmp$Y1+data.tmp$Y0)^2) data.rev<-rbind(data.rev,data.tmp) } n<-dim(data.rev)[1] x.model<-as.matrix(data.rev[,grepl("model",names(data.rev)),drop=FALSE]) #only those has model fit.anhc1<-lm(O.hat1~factor(fz)+x.model[data.rev$I1==1,],data=data.rev[data.rev$I1==1,]) fit.anhc0<-lm(O.hat0~factor(fz)+x.model[data.rev$I1==0,],data=data.rev[data.rev$I1==0,]) which.model<-which(grepl("x.model",names(fit.anhc1$coefficients))) which.model.notNA<-which(!is.na(fit.anhc1$coefficients[which.model])) if(length(which.model.notNA)==0){ stop("The model variables are linearly dependent on Z. The covariate-adjusted stratified log-rank is the same as the stratified log-rank.") }else{ fit.anhc1.coefx<-fit.anhc1$coefficients[which.model[which.model.notNA]] fit.anhc0.coefx<-fit.anhc0$coefficients[which.model[which.model.notNA]] x.coefx<-model.matrix(~factor(fz)+as.matrix(x.model),data=data.rev)[,which.model[which.model.notNA],drop=FALSE] for(z in fz.n){ x.coefx[data.rev$fz==z,]<-sweep(x.coefx[data.rev$fz==z,,drop=FALSE], 2, colMeans(x.coefx[data.rev$fz==z,,drop=FALSE])) } x.coefx.centered.byZ<-x.coefx score_CSL<-function(theta){ res<-score_SL(theta)- sum(data.rev$I1*as.vector(x.coefx.centered.byZ %*% fit.anhc1.coefx) - data.rev$I0*as.vector(x.coefx.centered.byZ %*% fit.anhc0.coefx))/n return(res) } theta_CSL<-uniroot(score_CSL,interval=c(-10,10))$root # get the adjusted estimator } ### calculate variance ### # Modification for making consistent with current code for testing (theta_SL in place of theta_CSL) sigma2_SL<-exp(theta_SL)*sum(data.rev$delta*data.rev$Y0*data.rev$Y1/(exp(theta_SL)*data.rev$Y1+data.rev$Y0)^2)/n sigma2_CSL<-sigma2_SL for(z in fz.n){ pz<-length(which(data.rev$fz==z))/n SigmaXz<-var(x.coefx[data.rev$fz==z,,drop=FALSE]) sigma2_CSL<-sigma2_CSL- p_trt*(1-p_trt)*(fit.anhc1.coefx+fit.anhc0.coefx) %*% SigmaXz %*%(fit.anhc1.coefx+fit.anhc0.coefx)*pz } var_est<-sigma2_CSL/sigma2_SL^2/n return(list(theta_SL=theta_SL,se.theta_SL=sqrt(1/sigma2_SL/n), theta_CSL=theta_CSL,se.theta_CSL=sqrt(var_est))) }