/**********************************************************************************/ /****This program is for the implementation of the Cox-type regression ******/ /****on cumulative incidence function with a time change-point, in the ******/ /****context of violation of the proportional assumption under the competing ******/ /****risks setting. Methods are described in the PH.D thesis of Xiaolin Fan. ******/ /**********************************************************************************/ /**********************************************************************************/ /****Author: Xiaolin Fan ******/ /****Updated: 8/1/2008 ******/ /****Documentation File: CHANGEPOINT_readme.pdf ******/ /****Questions or bug report can be sent to xfan@mcw.edu. ******/ /**********************************************************************************/ #include #include #include #include #include #include /********************************************************************************** data input ***********************************************************************************/ typedef struct cr_data{ float time; int cause; /**indicator for failure type or external censoring**/ float z; }CR_DATA; /********************************************************************************* declare the functions in this file **********************************************************************************/ float G_theta(float x,float mu); float lng_theta(float x,float mu); void simu_X(gsl_rng *r, float *Z,float *Z_star,double *X_star,int index,int m,float thetaz); float MH(double *X,double *X_star,float *Z,float *Z_star,float c,int m,int i,int k,float thetaz ); void prob_X(int J,int p2m,double *X1,double *X2,float *pX1,float *pX2); float likelihood(float *pX1,float *pX2,float mu1,float mu2,struct cr_data *data, float *coef1,float coef2,int n,int p2m,float p,float ct); float gen_mu(gsl_rng *,float sigma); float gen_coef(gsl_rng *r,float devi); float prior_mu(float ); float prior_coef(float coef); void pred0_cdf(float *t,int predn,float mu1,float *pX1,int p2m,float *pred0,float p); void pred1_cdf(float *t,int predn,float mu1,float *pX1,int p2m,float *pred1, float p,float ct,float *coef1); float likelihood_CPO(float *pX1,float *pX2,float mu1,float mu2,struct cr_data *data, float *coef1,float coef2,int n,int p2m,float p,float ct); /******************************************************************************** main program start *********************************************************************************/ int main() { FILE *fin_para; static dim=2; /***define read-in parameters***/ int J; /***level of Polya trees***/ float c; /***smoothing parameter in Polya trees***/ int n; /***sample size***/ int MC; /***number of MCMC chains***/ float thetaz1, thetaz2; /****tuning parameters for updating Polya trees***/ float sigma1,sigma2; /****tuning parameters for updating parameters in centering distribution of Polya trees***/ float devi1[dim],devi2; /****tuning parameters for regression coefficients****/ float grid; /****gap between two predictive points****/ float mu1,mu2; /****initial values for parameterd in centering dist.***/ float a1,a2; /****beta parameters for updating the normalized constant****/ float ct; /****time changing-point for cause of interest****/ fin_para=fopen("parameter.txt","r"); /***read in parameter file***/ fscanf(fin_para,"%d",&J); fscanf(fin_para,"%f",&c); fscanf(fin_para,"%d",&n); fscanf(fin_para,"%d",&MC); fscanf(fin_para,"%f %f",&thetaz1,&thetaz2); fscanf(fin_para,"%f %f",&sigma1,&sigma2); fscanf(fin_para,"%f %f",&devi1[0],&devi1[1]); fscanf(fin_para,"%f",&devi2); fscanf(fin_para,"%f %f", &a1,&a2); fscanf(fin_para,"%f",&grid); fscanf(fin_para,"%f %f",&mu1,&mu2); fscanf(fin_para,"%f\n",&ct); printf("-----------------------------------------------------\n"); printf("timedep.c: Bayesian Semiparametric Inference for\n"); printf("---- Time-depdent Coefficients under Regression on\n"); printf("---- Cumulative Incidence Function under Competing Risks\n"); printf("-----------------------------------------------------\n"); printf("Level in Polya Tree %d\n",J); printf("Smoothing Parameter in Polya Tree %10.4f\n",c); printf("Total Sample Size %d\n",n); printf("Number of MCMC iterations %d\n",MC); printf("Tuning Parameters for Polya Trees %10.4f\t %10.4f\n",thetaz1,thetaz2); printf("---------------- for Baseline CIF %10.4f\t %10.4f\n",sigma1,sigma2); printf("---------------- for Coefficients in Cause 1 %10.4f\t %10.4f\n",devi1[0],devi1[1]); printf("---------------- for Coefficiemts in Cause 2 %10.4f\n",devi2); printf("Parameters of beta distr. for generating normalizing const. %10.4f\t %10.4f\n",a1,a2); printf("Gap Between Two Predicted Points %10.4f\n",grid); printf("Initial Values for Centering Dist. %10.4f\t %10.4f\n",mu1,mu2); printf("Time Changing-point is Chosen at %10.4f\n",ct); fclose(fin_para); /****************************************************************/ /***********declare the rest of parameters and files*************/ /****************************************************************/ /***seed***/ const gsl_rng_type *T; gsl_rng *r; /***parameters for Polya trees***/ float m=1.0; int maxind=pow(2,J+1)-2; int p2m=pow(2,J); int index1,index2; float Z1[maxind],Z1_star[maxind],Z2[maxind],Z2_star[maxind]; double X1[maxind],X1_star[maxind],X2[maxind],X2_star[maxind]; float pX1[p2m],pX1_star[p2m],pX2[p2m],pX2_star[p2m]; float pD1_X[maxind],pD1_Xstar[maxind],pD2_X[maxind],pD2_Xstar[maxind]; /***parameters in the baseline cumulative incidence function ***/ float mu1p,tao1,tao1p; float mu2p,tao2,tao2p; float priormu1,priormu2,priormu1p,priormu2p; /***number of predictive points***/ int predn=100; /***define the proposed normalized constant**/ float p=0.5, p_p; /***define regression coefficients***/ float coef1_0[dim],coef1_1[dim]; float coef2_0,coef2_1; float priorcoef1[dim],priorcoef1_p[dim]; float priorcoef2,priorcoef2_p; /***monitor the acceptance rate ***/ int i,k,s, MCn; float q,logu; int num1[maxind],num2[maxind],p_num=0; int num_mu[]={0,0}; int num_coef1[]={0,0},num_coef2=0; /**define predicted CIF**/ float pred0[predn],pred1[predn],t[predn]; /**define LPML***/ double LPML; /***define read-in data**/ FILE *fin_data; /**define output files**/ FILE *accept; FILE *fpred1,*fpred2,*f_p,*f_coef1,*f_coef2,*f_mu; FILE *f_LPML; /***path for read-in data and output files***/ fin_data=fopen("data.txt","r"); accept=fopen("output/accept.txt","w"); fpred1=fopen("output/pred0.txt","w"); fpred2=fopen("output/pred1.txt","w"); f_p=fopen("output/p.txt","w"); f_coef1=fopen("output/coef1.txt","w"); f_coef2=fopen("output/coef2.txt","w"); f_mu=fopen("output/mu.txt","w"); f_LPML=fopen("output/LPML.txt","w"); /***data construction*****/ struct cr_data data[n]; /***read in data file***/ for(i=0;i0 || q>logu){ ++p_num; p=p_p; } fprintf(f_p,"%10.4f\n",p); /*****Step 2: updating Polya trees for cause 1********/ index1=0; for(i=J;i>0;i--){ index2=index1+pow(2,i); for(k=index1;k0 || q>logu){ ++num1[k]; X1[k]=X1_star[k]; X1[k+1]=X1_star[k+1]; Z1[k]=Z1_star[k]; Z1[k+1]=Z1_star[k+1]; } else{ X1_star[k]=X1[k]; X1_star[k+1]=X1[k+1]; } } index1=index2; } prob_X(J,p2m,X1_star,X1,pX1_star,pX1); /******Step 3: updating the parameter in centering distributions for cause 1****/ tao1p=tao1+gen_mu(r,sigma1); mu1p=exp(tao1p); priormu1p=prior_mu(mu1p); q=likelihood(pX1,pX2,mu1p,mu2,data,coef1_0,coef2_0,n,p2m,p,ct) -likelihood(pX1,pX2,mu1,mu2,data,coef1_0,coef2_0,n,p2m,p,ct)+priormu1p-priormu1; logu=log(gsl_ran_flat(r,0.0,1.0)); if (q>0 || q>logu){ ++num_mu[0]; tao1=tao1p; mu1=mu1p; priormu1=priormu1p; } fprintf(f_mu,"%10.4f\t",mu1); /******Step 4: updating regression coeficients for cause 1********/ for(i=0;i0 || q>logu){ ++num_coef1[i]; fprintf(f_coef1,"%10.4f\t",coef1_1[i]); coef1_0[i]=coef1_1[i]; priorcoef1[i]=priorcoef1_p[i]; } else {fprintf(f_coef1,"%10.4f\t",coef1_0[i]);} } fprintf(f_coef1,"\n"); /*****Step 5: updating Polya trees for cause 2********/ index1=0; for(i=J;i>0;i--){ index2=index1+pow(2,i); for(k=index1;k0 || q>logu){ ++num2[k]; X2[k]=X2_star[k]; X2[k+1]=X2_star[k+1]; Z2[k]=Z2_star[k]; Z2[k+1]=Z2_star[k+1]; } else{ X2_star[k]=X2[k]; X2_star[k+1]=X2[k+1]; } } index1=index2; } prob_X(J,p2m,X2,X2_star,pX2,pX2_star); /******Step 6: updating the parameter in centering distributions for cause 2****/ tao2p=tao2+gen_mu(r,sigma2); mu2p=exp(tao2p); priormu2p=prior_mu(mu2p); q=likelihood(pX1,pX2,mu1,mu2p,data,coef1_0,coef2_0,n,p2m,p,ct) -likelihood(pX1,pX2,mu1,mu2,data,coef1_0,coef2_0,n,p2m,p,ct)+priormu2p-priormu2; logu=log(gsl_ran_flat(r,0.0,1.0)); if (q>0 || q>logu){ ++num_mu[1]; tao2=tao2p; mu2=mu2p; priormu2=priormu2p; } fprintf(f_mu,"%10.4f\n",mu2); /*******Step 7: updating the regression coeficient for cause 2********/ coef2_1=coef2_0+gen_coef(r,devi2); priorcoef2_p=prior_coef(coef2_1); q=likelihood(pX1,pX2,mu1,mu2,data,coef1_0,coef2_1,n,p2m,p,ct) -likelihood(pX1,pX2,mu1,mu2,data,coef1_0,coef2_0,n,p2m,p,ct)+priorcoef2_p-priorcoef2; logu=log(gsl_ran_flat(r,0.0,1.0)); if (q>0 || q>logu){ ++num_coef2; coef2_0=coef2_1; priorcoef2=priorcoef2_p; } fprintf(f_coef2,"%10.4f\n",coef2_0); /******Calculate the baseline cumulative incidences function for cause 1*******/ pred0_cdf(t,predn,mu1,pX1,p2m,pred0,p); for(s=0;s>1 | p2m; pX1[k]=pX1[k] * X1[index]; } } for(k=0;k>1 | p2m; pX2[k]=pX2[k] * X2[index]; } } } /************************************************************************* funtion: likelihood function **************************************************************************/ float likelihood(float *pX1,float *pX2,float mu1,float mu2,struct cr_data *data, float *coef1,float coef2,int n,int p2m,float p,float ct){ int i,j,index1,index1_ct,index2; float p_data=0.0; float G_01,G_01_ct,G_02,lng1,lng2,temp; float expterm11,expterm12,expterm2,p2; index1_ct=p2m*G_theta(ct,mu1); if (index1_ct==p2m) {index1_ct=p2m-1;} G_01_ct=0.0; for(j=0;j0.9999){G_01=0.9999;} lng1=log(p2m)+log(pX1[index1])+lng_theta(data[i].time,mu1); if (data[i].time<=ct){p_data=p_data+log(expterm11)+(expterm11-1.0)*log(1-p*G_01)+log(p)+lng1;} else if (data[i].time>ct){ p_data=p_data+(expterm11-expterm12)*log(1-p*G_01_ct)+log(expterm12)+(expterm12-1.0)*log(1-p*G_01)+log(p)+lng1; } } else if (data[i].cause==2){ index2=p2m*G_theta(data[i].time,mu2); if (index2==p2m) {index2=p2m-1;} G_02=0.0; for(j=0;j0.9999){G_02=0.9999;} lng2=log(p2m)+log(pX2[index2])+lng_theta(data[i].time,mu2); p_data=p_data+log(p2)+log(expterm2)+(expterm2-1.0)*log(1-G_02)+lng2; } else if (data[i].cause==0){ index1=p2m*G_theta(data[i].time,mu1); if (index1==p2m) {index1=p2m-1;} G_01=0.0; for(j=0;j0.9999){G_02=0.9999;} if (G_02>0.9999){G_02=0.9999;} temp=p2*(1-pow(1-G_02,expterm2)); if (data[i].time<=ct){p_data=p_data+log(pow(1.0-p*G_01,expterm11)-temp);} else if (data[i].time>ct){ p_data=p_data+log(pow(1-p*G_01_ct,expterm11-expterm12)*pow(1-p*G_01,expterm12)-temp); } } } return p_data; } /************************************************************************ function:sampling the parameter in exponential distribution using random walk *************************************************************************/ float gen_mu(gsl_rng *r,float sigma){ float lnx; lnx=gsl_ran_gaussian(r,sigma); return lnx; } /************************************************************************ function: sampling the parameter for regression coeficients using random walk *************************************************************************/ float gen_coef(gsl_rng *r,float devi){ float lnx; lnx=gsl_ran_gaussian(r,devi); return lnx; } /************************************************************************ funtion: prior on mean paramter of exponential distribution *************************************************************************/ float prior_mu(float mu){ float priormu; float mean=100.0; priormu=log(gsl_ran_exponential_pdf(mu,mean)); return priormu; } /************************************************************************ funtion: prior on regression coefficients *************************************************************************/ float prior_coef(float coef){ float priorcoef; float sd=10.0; priorcoef=log(gsl_ran_gaussian_pdf(coef,sd)); return priorcoef; } /************************************************************************** function: caculate the predictive distribution at grid points for cause 1 with covariate=0 ***************************************************************************/ void pred0_cdf(float *t,int predn,float mu1,float *pX1,int p2m,float *pred0,float p){ int i,j,index; float log_likeli=0.0,temp; for(i=0;i0.9999){G_01=0.9999;} lng1=log(p2m)+log(pX1[index1])+lng_theta(data[i].time,mu1); if (data[i].time<=ct){ junk=log(expterm11)+(expterm11-1.0)*log(1-p*G_01)+log(p)+lng1-log(p1); LPML=LPML+junk; } else if (data[i].time>ct){ junk=(expterm11-expterm12)*log(1-p*G_01_ct)+log(expterm12)+(expterm12-1.0)*log(1-p*G_01)+log(p)+lng1-log(p1); LPML=LPML+junk; } } else if (data[i].cause==2){ index2=p2m*G_theta(data[i].time,mu2); if (index2==p2m) {index2=p2m-1;} G_02=0.0; for(j=0;j0.9999){G_02=0.9999;} lng2=log(p2m)+log(pX2[index2])+lng_theta(data[i].time,mu2); junk=log(expterm2)+(expterm2-1.0)*log(1-G_02)+lng2; LPML=LPML+junk; } else if (data[i].cause==0){ index1=p2m*G_theta(data[i].time,mu1); if (index1==p2m) {index1=p2m-1;} G_01=0.0; for(j=0;j0.9999){G_02=0.9999;} if (G_02>0.9999){G_02=0.9999;} temp=(1-p1)*(1-pow(1-G_02,expterm2)); if (data[i].time<=ct){ junk=log(pow(1.0-p*G_01,expterm11)-temp); LPML=LPML+junk; } else if (data[i].time>ct){ junk=log(pow(1-p*G_01_ct,expterm11-expterm12)*pow(1-p*G_01,expterm12)-temp); LPML=LPML+junk; } } } return LPML; }