ISIT = read.table("//yrzhibo.com/teachers/xirb/Courses/biostatistics/Biostatistics2014/ISIT.txt",header=T) attach(ISIT) #op <- par(mfrow=c(2,2),mar=c(6,4,1,2)) Sources16<-ISIT$Sources[ISIT$Station==16] Depth16<-ISIT$SampleDepth[ISIT$Station==16] plot(Depth16,Sources16,type="p") ISIT.tmp = ISIT[ISIT$Station==16,] ISIT.tmp$SampleDepthSq = (ISIT.tmp$SampleDepth)^2 ISIT.tmp$SampleDepthCB = (ISIT.tmp$SampleDepth)^3 m.poly = lm(Sources~SampleDepth+SampleDepthSq+SampleDepthCB,data=ISIT.tmp) x.new = data.frame(SampleDepth=seq(0,5000,by=1),SampleDepthSq=seq(0,5000,by=1)^2,SampleDepthCB=seq(0,5000,by=1)^3) y.new = predict(m.poly,newdata=x.new) plot(Depth16,Sources16,type="p") lines(x.new$SampleDepth,y.new,col="red") plot(m.poly) ### simulated data x.tmp = seq(0.1,10,by=0.05) y = sin(x.tmp)*x.tmp + rnorm(length(x.tmp),sd=0.5) plot(x.tmp,y,xlab="x",ylab="y") dta.sim = data.frame(x=x.tmp,y=y) ### loess Sources16<-ISIT$Sources[ISIT$Station==16] Depth16<-ISIT$SampleDepth[ISIT$Station==16] par(mfrow=c(2,2)) for(i in c(0:2)){ m.loess = loess(Sources~SampleDepth,data=ISIT.tmp,degree=i) y.pred = predict(m.loess) rk = order(ISIT.tmp$SampleDepth) plot(Depth16,Sources16,type="p",main=paste("degree=",i)) lines(ISIT.tmp$SampleDepth[rk],y.pred[rk],col="red",lwd=2) } par(mfrow=c(2,2)) for(h in c(0.1,0.3,0.5,0.75)){ m.loess = loess(Sources~SampleDepth,data=ISIT.tmp,degree=2,span=h) y.pred = predict(m.loess) rk = order(ISIT.tmp$SampleDepth) plot(Depth16,Sources16,type="p",main=paste("span=",h)) lines(ISIT.tmp$SampleDepth[rk],y.pred[rk],col="red",lwd=2) } ### residuals of the LOESS fit m.loess = loess(Sources~SampleDepth,data=ISIT.tmp) y.pred = predict(m.loess) plot(y.pred,Sources16-y.pred,type="p",xlab="Predicted",ylab="Residuals",main="Degree=2;Span=0.75") m.loess = loess(Sources~SampleDepth,data=ISIT.tmp,span=0.3) y.pred = predict(m.loess) plot(y.pred,Sources16-y.pred,type="p",xlab="Predicted",ylab="Residuals") #### mgcv library(mgcv) op <- par(mfrow = c(2, 2), mar = c(5, 4, 1, 2),lwd=2) Sources16 <- ISIT$Sources[ISIT$Station == 16] Depth16 <- ISIT$SampleDepth[ISIT$Station == 16] plot(Depth16, Sources16, type = "p") M3 <- gam(Sources16 ~ s(Depth16, fx = FALSE, k=-1,bs = "cr")) plot(M3, se = TRUE,col="red") M3pred <- predict(M3, se = TRUE, type = "response") plot(Depth16, Sources16, type = "p") I1 <- order(Depth16) lines(Depth16[I1], M3pred$fit[I1], lty=1,col="red") lines(Depth16[I1], M3pred$fit[I1]+2*M3pred$se[I1],lty=2,col="red") lines(Depth16[I1], M3pred$fit[I1]-2*M3pred$se[I1],lty=2,col="red") E2 <- resid(M3) F2 <- fitted(M3) par(mfrow=c(2,2), mar=c(5,4,2,2)) plot(x = F2, y=E2, xlab="Fitted values", ylab="Residuals") plot(x = Depth16, y=E2, xlab="Depth", ylab="Residuals") hist(E2,main="", xlab="Residuals") #### plot several function basd on the polynormial bases library(lattice) x<-seq(0,1,length=25) co<-matrix(nrow=6,ncol=4) co[1:6,1:4]<-rnorm(mean=0,sd=5,24) f1<-co[1,1]+co[1,2]*x+co[1,3]*x^2+co[1,4]*x^3 f2<-co[2,1]+co[2,2]*x+co[2,3]*x^2+co[2,4]*x^3 f3<-co[3,1]+co[3,2]*x+co[3,3]*x^2+co[3,4]*x^3 f4<-co[4,1]+co[4,2]*x+co[4,3]*x^2+co[4,4]*x^3 f5<-co[5,1]+co[5,2]*x+co[5,3]*x^2+co[5,4]*x^3 f6<-co[6,1]+co[6,2]*x+co[6,3]*x^2+co[6,4]*x^3 xall<-rep(x,6) ID<-rep(c(1,2,3,4,5,6),each=25) f<-c(f1,f2,f3,f4,f5,f6) xyplot(f~xall|factor(ID),col=1,type="l",xlab="X values",ylab="Function f") ### divide the x ranges to 4 segments and perform analysis on each segment Data = ISIT library(lattice) Sources<-Data$Sources[Data$Station==16] Depth<-Data$SampleDepth[Data$Station==16] Range<-max(Depth)-min(Depth) Bins<-Range/4 B1<-min(Depth)+Bins B2<-min(Depth)+2*Bins B3<-min(Depth)+3*Bins plot(Depth,Sources) abline(v=B1,lty=2) abline(v=B2,lty=2) abline(v=B3,lty=2) S1<-Sources[Depth<=B1] D1<-Depth[Depth B1 & Depth<=B2] D1<-Depth[Depth > B1 & Depth<=B2] tmp<-lm(S1~D1+I(D1^2) +I(D1^3)) F1<-fitted(tmp) I1<-order(D1) lines(D1[I1],F1[I1],lwd=2,col="red") S1<-Sources[Depth > B2 & Depth<=B3] D1<-Depth[Depth > B2 & Depth<=B3] tmp<-lm(S1~D1+I(D1^2) +I(D1^3)) F1<-fitted(tmp) I1<-order(D1) lines(D1[I1],F1[I1],lwd=2,col="red") S1<-Sources[Depth > B3 ] D1<-Depth[Depth > B3 ] tmp<-lm(S1~D1+I(D1^2) +I(D1^3)) F1<-fitted(tmp) I1<-order(D1) lines(D1[I1],F1[I1],lwd=2,col="red") #### look at the effect of the number of nodes library(lattice) Data=ISIT Sources19<-Data$Sources[Data$Station==19] Depth19<-Data$SampleDepth[Data$Station==19] Depth01<-Depth19 Depth01<-Depth01-min(Depth01) Depth01<-Depth01/max(Depth01) I<-order(Depth01) Depth01<-Depth01[I] Sources19<-Sources19[I] rk<-function(x,z){ ((z-0.5)^2-1/12)*((x-0.5)^2-1/12)/4 -((abs(x-z)-0.5)^4 -0.5*(abs(x-z)-0.5)^2 +7/240)/24 } x = c(0:100)/100 z = 0.3 y = sapply(x,FUN=function(xx){rk(xx,z)}) plot(x,y,type="l") spl.X<-function(x,xk){ q<-length(xk)+2 n<-length(x) X<-matrix(1,n,q) X[,2]<-x X[,3:q]<-outer(x,xk,FUN=rk) X } YALL<-vector(length=100*8) XALL<-vector(length=100*8) IDALL<-vector(length=100*8) a1<-1 a2<-100 XkALL<-0 IDxk<-0 for (knots in 1: 8){ xk<-1:knots/(knots+1) XkALL<-c(XkALL,xk) IDxk<-c(IDxk,rep(knots,length(xk))) X<-spl.X(Depth01,xk) tmp1<-lm(Sources19~X-1) xp<-1:100/100 Xp<-spl.X(xp,xk) plot(Depth01,Sources19) lines(xp,Xp%*%coef(tmp1)) YALL[a1:a2]<-Xp%*%coef(tmp1) XALL[a1:a2]<-xp IDALL[a1:a2]<-knots a1<-a1+100 a2<-a2+100 n<-length(xk) for (i in 1:n){ abline(v=xk[i],lty=2) } } XkALL<-XkALL[-1] IDxk<-IDxk[-1] library(lattice) IDALL2<-IDALL+2 xyplot(YALL~XALL|factor(IDALL2),type="l",col=1,xlab="Depth (scaled between 0 and 1)", ylab="Sources",a1=1, panel=function(x,y,subscripts,...){ panel.lines(x,y,col=1,lwd=2) panel.points(Depth01,Sources19,col=1,cex=0.5)# a<-IDALL2[subscripts] if (a[1] == 3){for (i in 1:1) {panel.abline(v=XkALL[i],lty=2);panel.abline(v=0,lty=2);panel.abline(v=1,lty=2)} } if (a[1] == 4){for (i in 1:2) {panel.abline(v=XkALL[i+1],lty=2);panel.abline(v=0,lty=2);panel.abline(v=1,lty=2)} } if (a[1] == 5){for (i in 1:3) {panel.abline(v=XkALL[i+1+2],lty=2);panel.abline(v=0,lty=2);panel.abline(v=1,lty=2)} } if (a[1] == 6){for (i in 1:4) {panel.abline(v=XkALL[i+1+2+3],lty=2);panel.abline(v=0,lty=2);panel.abline(v=1,lty=2)} } if (a[1] == 7){for (i in 1:5) {panel.abline(v=XkALL[i+1+2+3+4],lty=2);panel.abline(v=0,lty=2);panel.abline(v=1,lty=2)} } if (a[1] == 8){for (i in 1:6) {panel.abline(v=XkALL[i+1+2+3+4+5],lty=2);panel.abline(v=0,lty=2);panel.abline(v=1,lty=2)} } if (a[1] == 9){for (i in 1:7) {panel.abline(v=XkALL[i+1+2+3+4+5+6],lty=2);panel.abline(v=0,lty=2);panel.abline(v=1,lty=2)} } if (a[1] == 10){for (i in 1:8) {panel.abline(v=XkALL[i+1+2+3+4+5+6+7],lty=2);panel.abline(v=0,lty=2);panel.abline(v=1,lty=2)} } } ) #### look at the effect of the lambda parameter spl.S<-function(xk){ q<-length(xk)+2 S<-matrix(0,q,q) S[3:q,3:q]<-outer(xk,xk,FUN=rk) S } mat.sqrt<-function(S){ d<-eigen(S,symmetric=TRUE) d$values[d$values<0]<-0 rS<-d$vectors%*%diag(d$values^0.5)%*%t(d$vectors) rS } prs.fit<-function(y,x,xk,lambda){ print(lambda) q<-length(xk)+2 n<-length(x) Xa<-rbind(spl.X(x,xk),mat.sqrt(spl.S(xk))*sqrt(lambda)) y[(n+1):(n+q)]<-0 lm(y~Xa-1) } xk<-1:7/8 xp<-1:100/100 lambda<-1e-6 #Graph showing the effect of lambda YALL<-vector(length=100*8) XALL<-vector(length=100*8) IDALL<-vector(length=100*8) a1<-1 a2<-100 lambda<-10e-7 for (i in 1:8){ a1 mod.2<-prs.fit(Sources19,Depth01,xk,lambda) Xp<-spl.X(xp,xk) YALL[a1:a2]<-Xp%*%coef(mod.2) XALL[a1:a2]<-xp IDALL[a1:a2]<-rep(lambda,100) a1<-a1+100 a2<-a2+100 lambda<-lambda*10 } xyplot(YALL~XALL|factor(IDALL),type="l",col=1,xlab="Depth (scaled between 0 and 1)", ylab="Sources",a1=1, panel=function(x,y,subscripts,...){ panel.lines(x,y,col=1,lwd=2) panel.points(Depth01,Sources19,col=1,cex=0.5)} ) #### show GCV vs lambda lambda<-10e-6 n<-length(Depth01) V<-rep(0,60) Vl<-rep(0,60) for (i in 1:60){ mod<-prs.fit(Sources19,Depth01,xk,lambda) trA<-sum(influence(mod)$hat[1:n]) rss<-sum((Sources19-fitted(mod)[1:n])^2) V[i]<-n*rss/(n-trA)^2 Vl[i]<-lambda lambda<-lambda*1.09 } plot(x=Vl,y=V,type="l",xlab="lambda",ylab="GCV",lwd=2) abline(v=0.000407,lty="dotted",lwd=2) #### one factor and one nonparametric function S8 <- ISIT$Sources[ISIT$Station == 8] D8 <- ISIT$SampleDepth[ISIT$Station == 8] S13 <- ISIT$Sources[ISIT$Station == 13] D13 <- ISIT$SampleDepth[ISIT$Station == 13] So <- c(S8, S13); De <- c(D8, D13) ID <- rep(c(8, 13), c(length(S8), length(S13))) mi <- max(min(D8), min(D13)) ma <- min(max(D8), max(D13)) I1 <- De > mi & De < ma op <- par(mfrow = c(1, 2)) plot(D8[I1], S8[I1], pch = 16, xlab = "Depth", ylab = "Sources", col = 1, main = "Station 8", xlim = c(500, 3000), ylim = c(0, 40)) plot(D13[I1], S13[I1], pch = 16, xlab = "Depth", ylab = "Sources", col = 1, main = "Station 13", xlim = c(500, 3000), ylim = c(0, 40)) par(op) fID <-factor(ID) #This construction is needed for the vis.gam M4 <- gam(So ~ s(De) + fID, subset = I1) summary(M4) anova(M4) plot(M4) par(mar=c(2,2,2,2)) vis.gam(M4,theta=120,color="heat") gam.check(M4) ### with interaction M5<-gam(So ~ s(De)+ s(De, by = as.numeric(ID == 13)), subset = I1) anova(M5) plot(M5) gam.check(M5) anova(M4,M5, test="F") ### anotherway M6 <- gam(So~s(De,by = as.numeric(ID== 8))+ s(De,by = as.numeric(ID== 13))+factor(ID), subset=I1) #detach(package:gam) #detach(package:mgcv)