require(ggplot2) require(gam) SA <- read.table("http://www-stat.stanford.edu/~tibs/ElemStatLearn/datasets/SAheart.data",sep=",",head=TRUE,row.names=1) summary(SA) names(SA) ###Q6.1 SAGam <- gam(chd ~ s(sbp) + s(tobacco) + s(ldl) + s(adiposity) + s(typea) + s(obesity) + s(alcohol) + s(age) + famhist,data=SA,family=binomial) par(mfcol=c(3,3)) plot(SAGam,se=TRUE) ###Q6.2 ###For fast programming we rely on the cv.glm function in the ###boot library. library(boot) zeroOneCost <- function(r, pi=0) mean(abs(r-pi)>0.5) likelihoodCost <- function(r,pi=0) -mean(log(r*pi+(1-r)*(1-pi))) df <- seq(1,3,by=0.1) #Using cross-validation to select the df-parameter. The conclusion is #dependent upon the run (and random devisions of the dataset), but in a couple of runs #it seems to be df in the range 3-5 SAGamCv <- numeric(length(df)) for(i in seq(along=df)){ formGam <- as.formula(paste("chd~famhist+",paste("s(",names(SA[1,1:9])[-5], ",df=", df[i], ")",sep="",collapse="+"))) SAGam <- gam(formGam,family=binomial,data=SA) tmp <- cv.glm(SA,SAGam,zeroOneCost,7) set.seed(tmp$seed) SAGamCv[i] <- tmp$delta[1] } qplot(df,SAGamCv,geom="line") ###For a comparision, we can also use a (pseudo) AIC criteria. It is pseudo because ###there is a smoother involved and we use the effective degrees of freedom SAGamAic <- numeric(length(df)) for(i in seq(along=df)){ formGam <- as.formula(paste("chd~famhist+",paste("s(",names(SA[1,1:9])[-5], ",df=", df[i], ")",sep="",collapse="+"))) SAGam <- gam(formGam,family=binomial,data=SA) SAGamAic[i] <- SAGam$aic } qplot(df,SAGamAic,geom="line") ###Q6.3 for(i in seq(along=df)){ formGam <- as.formula(paste("chd~famhist+",paste("s(",names(SA[1,1:9])[-5], ",df=", df[i], ")",sep="",collapse="+"))) SAGam <- gam(formGam,family=binomial,data=SA) tmp <- cv.glm(SA,SAGam,likelihoodCost,7) set.seed(tmp$seed) SAGamCv[i] <- tmp$delta[1] } qplot(df,SAGamCv,geom="line") ###Q6.4 Close R and restart require(mgcv) SA <- read.table("http://www-stat.stanford.edu/~tibs/ElemStatLearn/datasets/SAheart.data",sep=",",head=TRUE,row.names=1) formGam <- as.formula(paste("chd~famhist+",paste("s(",names(SA[1,1:9])[-5],")",sep="",collapse="+"))) SAGam <- gam(formGam,family=binomial,data=SA) summary(SAGam) par(mfcol=c(3,3)) plot(SAGam,se=TRUE)