# --- Jan. 31st, 2007 --- # --- cross validation --- print(Sys.time()); tau<-1.0; step<-1000; increase<-0.01; random<-read.table("random.dat")$V1; d1.y<-read.table("out-1.dat"); d2.y<-read.table("out-2.dat"); d3.y<-read.table("out-3.dat"); all.y<-rbind(d1.y, d2.y, d3.y); all.y<-all.y$V1; samplesize<-length(all.y); d1.x<-read.table("exp-1.dat"); d2.x<-read.table("exp-2.dat"); d3.x<-read.table("exp-3.dat"); all.x<-cbind(d1.x, d2.x, d3.x); size<-c(ncol(d1.x), ncol(d2.x), ncol(d3.x)); group<-rep(3, sum(size)); for(i in 1:size[1]) group[i]<-1; for(i in (size[1]+1):(size[1]+size[2])) group[i]<-2; # --- remember to perturbe group also --- random.w<-random[1:length(all.y)]; rank.w<-(1:length(all.y))[order(random.w)]; all.y<-all.y[rank.w]; all.x<-all.x[ ,rank.w]; group.status<-group[rank.w]; group.name<-sort(unique(group.status)); num.group<-length(unique(group.status)); group.size<-rep(0, num.group); for(i in 1:num.group){ work.group<-group.name[i]; group.size[i]<-length(group.status[group.status==work.group]); } aug.x<-t(cbind(rep(1, samplesize), t(all.x))); num.cov<-nrow(all.x); # --- prepare to cross validation --- V<-3; delete.size<-as.integer(samplesize/V); cv.score<-rep(0, step); for(v in 1:V){ gradient.mat.c<-matrix(0, (num.cov+1), num.group); beta.mat.c<-matrix(0, (num.cov+1), num.group); cross.y<-all.y[-(((v-1)*delete.size+1):(v*delete.size))]; cross.x<-aug.x[ ,-(((v-1)*delete.size+1):(v*delete.size))]; cross.group.status<-group.status[-(((v-1)*delete.size+1):(v*delete.size))]; cross.group.size<-rep(0, num.group); for(i in 1:num.group){ work.group<-group.name[i]; cross.group.size[i]<- length(cross.group.status[cross.group.status==work.group]); } evaluate.y<-all.y[(((v-1)*delete.size+1):(v*delete.size))]; evaluate.x<-aug.x[ ,(((v-1)*delete.size+1):(v*delete.size))]; evaluate.group.status<- group.status[(((v-1)*delete.size+1):(v*delete.size))]; # --- the following adapted from estimation --- aug.z.dat<-cross.x; indicator<-cross.y; for(s in 1:step){ # --- for data from each group --- for(g in 1:num.group){ work.z.dat<-aug.z.dat[ ,cross.group.status==group.name[g]]; work.mat<-matrix(0, nrow(work.z.dat), ncol(work.z.dat)); for(mm in 1:ncol(work.z.dat)) work.mat[ ,mm]<-work.z.dat[ ,mm]; work.beta<-beta.mat.c[ ,g]; work.y<-indicator[cross.group.status==group.name[g]]; beta.z<-t(work.mat)%*%work.beta; beta.p<-1/(1+exp(-beta.z)); term.1<-work.y/beta.p-(1-work.y)/(1-beta.p); term.2<-exp(beta.z)/((1+exp(beta.z))^2); work.gradient<-(t(term.1*term.2))%*%t(work.mat); ## New~~ Feb 1st gradient.mat.c[ ,g]<-work.gradient[1, ]; }# -- end of gradient computation -- ## New ~~ feb 1st sum.gradient<-abs(apply(gradient.mat.c, 1, sum)); max.gradient<-max(sum.gradient[2:length(sum.gradient)]); gradient<-gradient.mat.c/max.gradient; f.v<-ifelse(sum.gradient>=(tau*max(sum.gradient)), 1, 0); f.v[1]<-1; g.v<-gradient*f.v; beta.mat.c<-beta.mat.c+increase*g.v; # --- now evaluate --- for(m in 1:num.group){ beta.est.temp<-beta.mat.c[ ,m]; evaluate.x.temp<-evaluate.x[ ,evaluate.group.status== group.name[m]]; evaluate.y.temp<-evaluate.y[evaluate.group.status== group.name[m]]; evaluate.x.xt<-matrix(0, nrow(evaluate.x.temp), ncol(evaluate.x.temp)); for(ii in 1:ncol(evaluate.x.xt)) evaluate.x.xt[ ,ii]<-evaluate.x.temp[ ,ii]; likelihood.eva<-0; eva.score<-t(evaluate.x.xt)%*%beta.est.temp; eva.temp<-exp(eva.score); eva.p<-eva.temp/(1+eva.temp); likelihood.eva<-sum(log(eva.p)*evaluate.y.temp)+ sum(log(1-eva.p)*(1-evaluate.y.temp)); cv.score[s]<-cv.score[s]+likelihood.eva; } }# -- end of step iteration } se<-seq(from=1, to=step, by=1); se.sel<-se[cv.score==max(cv.score)]; result<-cbind(tau, increase, se.sel, max(cv.score)); postscript("cv.ps"); plot(lowess(se, cv.score, f=0.2), type="l"); graphics.off(); print(Sys.time());