require(rpart) ## For trees
require(gam)   ## Estimation of generalized additive models
require(ggplot2)

### Illustration of the relation to naive Bayes on the genetic fingerprint data

load("prac7.RData")
### For comparison reasons we reorder the columns.
prac7Train <- prac7Train[, c(sort(names(prac7Train)[-16]), "population")]

### The logistic regression model

summary(glm(population ~ ., data = prac7Train, family = binomial))

### Using generalized additive models on the genetic fingerprint data

df <- 6
form <- paste("population~",
              paste("s(", names(prac7Train)[-16], ",", df, ")",
                    sep="",collapse="+")
              )
form <- as.formula(form)
prac7Gam <- gam(form, data = prac7Train, family = binomial)
par(mfrow = c(4, 4), mar = c(4, 1, 1, 1))
plot(prac7Gam, se = TRUE)
summary(prac7Gam)

require(VGAM)  ## Alternative to gam. More general, contains multinomial regression.
prac7Vgam <- vgam(form, data = prac7Train, family = binomialff)
par(mfrow = c(4, 4), mar = c(4, 1, 1, 1))
plot(prac7Vgam, se = TRUE)
summary(prac7Vgam)

### Extracting information to produce a similar plot with ggplot2

pred <- melt(predict(prac7Gam, type = "terms"))$value
termPredict <- cbind(data.frame(predict = pred),
                     melt(prac7Train[, -16])
                     )
qplot(value, predict, data = termPredict,
      geom = "line", ylim = c(-10, 10)) +
  facet_wrap(~ variable, scale = "free_x")

### Automatic selection of tuning parameters via the package
### mgcv. There are conflicts with the gam and perhaps also the VGAM
### packages and we detach those. In the alternative, you can restart
### R to make a clean start.

detach(package:gam)
detach(package:VGAM)
require(mgcv)  

form <- paste("population ~",
              paste("s(", names(prac7Train)[-16], ")",
                    sep = "", collapse = "+")
              )
form <- as.formula(form)
### The actual gam fit using that mgcv package. It uses more or less
### the same syntax as 'gam' from the gam package, but the formulae
### are a little different. For once, there is no degrees of freedom
### in the specification of the terms we need to smooth because the
### tuning parameters are automatically selected. This is done by a
### simultaneous optimization over the parameter space of the
### likelihood and the tuning parameters space of the a suitable error
### criteria.
prac7Gam <- gam(form, data = prac7Train, family = binomial)

pred <- melt(predict(prac7Gam, type = "terms"))$value
termPredict <- cbind(data.frame(predict = pred),
                     melt(prac7Train[, -16])
                     )
qplot(value, predict, data = termPredict,
      geom = "line", ylim = c(-10, 10)) +
  facet_wrap(~ variable, scale = "free_x")

summary(prac7Gam)

### Note that for those variables whose effective degrees of freedom
### is 1.00 in the table above the smoother has been reduced to a
### linear function.

################################################################################
### Trees using 'rpart' - the R implementation of CART
################################################################################

### Simple example with the iris data set.

data(iris)

irisTree <- rpart(Species ~ ., data = iris)
plot(irisTree, margin = 0.1)
text(irisTree, use.n = TRUE)

printcp(irisTree)

### Then returning to the data from prac 7

prac7Tree <- rpart(population ~ ., data = prac7Train)
plot(prac7Tree, uniform = TRUE, margin = 0.1)
text(prac7Tree)

### Different choice of complexity parameter

prac7Tree <- rpart(population ~ ., data = prac7Train,
                   control = list(cp = 0.0001))
plot(prac7Tree, uniform = TRUE, margin = 0.1)
text(prac7Tree)
prac7Tree <- prune(prac7Tree, 0.01)
plot(prac7Tree, uniform = TRUE, margin = 0.1)
text(prac7Tree)

### Computing the training error

yHat <- predict(prac7Tree, type = "class")
pred <- table(yHat, y)
print(pred)
print(pred/sum(pred), digits=3)

### Computing the test error

yHat <- predict(prac7Tree, prac7Test, type = "class")
pred <- table(yHat, yTest)
print(pred)
print(pred/sum(pred), digits=3)