require(splines) ### Loading the data. There are 330 observations in this dataset. LAozone <- read.table("http://www-stat-class.stanford.edu/%7Etibs/ElemStatLearn/datasets/LAozone.data", sep = ",", header = TRUE) rangedpg <- range(LAozone[, "dpg"]) ### For later use nrTrain <- 220 nrTest <- 330 - nrTrain set.seed(222) train <- sample(1:330, nrTrain) LAozoneTrain <- LAozone[train, ] LAozoneTest <- LAozone[!(1:330 %in% train), ] ################################################################################ ### Q6.1 ################################################################################ plotSmoothOzone <- function(df = 10, root = 1/3) { ozoneSmooth <- smooth.spline(LAozoneTrain[, "dpg"], LAozoneTrain[, "ozone"]^root, df = df) plot(ozoneSmooth, xlim=c(-60, 110), ylim = range(LAozoneTrain[, "ozone"]^root), type = "l") points(LAozoneTrain[, "dpg"], LAozoneTrain[, "ozone"]^root, pch = 20) } plotSmoothOzone() ################################################################################ ### Q6.2 ################################################################################ plotSmoothEffect <- function(df = 10, root = 1/3){ LAozoneLm <- lm(ozone^root ~ bs(dpg, df) + vh + wind + humidity + temp + ibh + ibt + vis + doy, data = LAozoneTrain) r <- range(LAozoneTrain[, "dpg"]) dpg <- seq(r[1], r[2], length.out = 100) ## Creating a data frame of artificial data for prediction You have ## to be a little careful here with 'bs' if you extract the ## coefficients and do the prediction of the term yourself because ## 'bs' needs to be called in the prediction with the same knots as ## in the model fitting. This is automatic when using 'predict'. newdata <- cbind(dpg, LAozoneTrain[rep(1, 100), -7, drop = FALSE]) plot(dpg, predict(LAozoneLm, newdata, type = "terms", terms = 1), type = "l") rug(LAozoneTrain[,"dpg"]) } plotSmoothEffect() ################################################################################ ### 5.3 ################################################################################ anovaSmoothEffect <- function(df = 10, root = 1/3){ LAozoneLm <- lm(ozone^root ~ dpg + vh + wind + humidity + temp + ibh + ibt + vis + doy, data=LAozoneTrain) LAozoneLmSmooth <- lm(ozone^root ~ bs(dpg, df) + vh + wind + humidity + temp + ibh + ibt + vis + doy, data=LAozoneTrain) anova(LAozoneLm, LAozoneLmSmooth) } anovaSmoothEffect() anovaSmoothEffect(20) anovaSmoothEffect(30) ###5.4 RSStrain <- function(df = 10, root = 1/3){ ## rangedpg is a globally defined variable that specifies the interval of where we will do LAozoneLmSmooth <- lm(ozone^root ~ bs(dpg, df, Boundary.knots = rangedpg) + vh + wind + humidity + temp + ibh + ibt + vis + doy, data=LAozoneTrain) return(sum((LAozoneTrain[, "ozone"]^root - predict(LAozoneLmSmooth))^2)/nrTrain) } RSStest <- function(df = 10, root = 1/3){ LAozoneLmSmooth <- lm(ozone^root ~ bs(dpg, df, Boundary.knots = rangedpg) + vh + wind + humidity + temp + ibh + ibt + vis + doy, data = LAozoneTrain) return(sum((LAozoneTest[, "ozone"]^root - predict(LAozoneLmSmooth, LAozoneTest))^2)/nrTest) } rssTrain <- sapply(3:20, RSStrain) rssTest <- sapply(3:20, RSStest) rng <- range(c(rssTrain, rssTest)) plot(3:20, rssTrain, type = "l", col = "red", lwd = 2, ylim = rng + c(-1, 1) * (rng[2] - rng[1]) * 0.05) lines(3:20, rssTest, col = "blue", lwd = 2)