################################################################################
### Logistic regression and multinomial regression
################################################################################

### Standard estimation of a logistic regression model. 

SAheart <- read.table("http://www-stat.stanford.edu/~tibs/ElemStatLearn/datasets/SAheart.data",
                      sep = ",", head = TRUE, row.names = 1)

SAheart[1, ]
SAheartGlm <- glm(chd ~ ., data = SAheart, family = binomial)
summary(SAheartGlm)

### Below, we fit a multinomial model to the vowel data.
require(VGAM)   ### Contains an implementation of multinomial regression
require(ggplot2)

vowels <- read.table("http://www-stat-class.stanford.edu/%7Etibs/ElemStatLearn/datasets/vowel.train",
                     row.names = 1, head = TRUE, sep = ",")
vowelsVglm <- vglm(y ~ ., data = vowels, family = multinomial)
summary(vowelsVglm)

### The following plots makes pair-wise comparisons of the linear functions used for
### discrimination in a multinomial logis-model. 

predictor <- predict(vowelsVglm) ### The linear predictors on the training data

select <- function(i, j) vowels[, 1] %in% c(i, j) ### A helper function

sepPlot <- function(i, j, k = i, l = j) {
  selected <- select(i, j)
  qplot(predictor[selected, k],
        predictor[selected, l],
        colour = as.factor(vowels[selected, 1]), shape = I(20)) + 
          geom_abline(intercept = 0, slope = 1, colour = I("black")) +
            scale_colour_discrete("Group") + 
              xlab("") + ylab("")
}

sepPlot(1, 4)
sepPlot(2, 4)

densPlot <- function(i, j, k = i, l = j) {
  selected <- select(i, j)
  qplot(predictor[selected, k] - predictor[selected, l],
        geom = "density", alpha = I(0.6),
        fill = as.factor(vowels[selected, 1]), shape = I(20)) + 
          geom_abline(intercept = 0, slope = 1, colour = I("black")) +
            scale_fill_discrete("Group") + 
              xlab("") + ylab("")
}

densPlot(1, 4)
densPlot(2, 4)

################################################################################
### Penalized logistic regression and high-dimensional data
################################################################################

require(stepPlr)   ### Implements logistic regression with quadratic penalization
require(glmnet)    ### Elastic net penalty for glmmodels 
require(ggplot2)

SRBCTxtrain <- read.table("http://www-stat-class.stanford.edu/~tibs/ElemStatLearn/datasets/khan.xtrain")
SRBCTytrain <- read.table("http://www-stat-class.stanford.edu/~tibs/ElemStatLearn/datasets/khan.ytrain")

SRBCTxtest <- read.table("http://www-stat-class.stanford.edu/~tibs/ElemStatLearn/datasets/khan.xtest")
SRBCTytest <- read.table("http://www-stat-class.stanford.edu/~tibs/ElemStatLearn/datasets/khan.ytest")

### Elastic net -- or with alpha=1 the default this is Lasso -- using the multinomial model

SRBCTglmnet <- glmnet(t(SRBCTxtrain),
                      as.factor(t(SRBCTytrain)),
                      family="multinomial")
par(mfcol = c(2, 2))
plot(SRBCTglmnet) ### Coefficient profiles

### Computing the predictions on the training and test data and
### plotting the curves.

SRBCTpredictTrain <- predict(SRBCTglmnet,
                             t(SRBCTxtrain),
                             type="class")
TrainErrors <- apply(apply(SRBCTpredictTrain, 2, "!=", SRBCTytrain),
                     2, sum, na.rm=TRUE)
qplot(SRBCTglmnet$lambda,
      TrainErrors, geom="line")

SRBCTpredict <- predict(SRBCTglmnet, t(SRBCTxtest), type="class")
TestErrors <- apply(apply(SRBCTpredict, 2, "!=", SRBCTytest),
                    2, sum, na.rm=TRUE)
qplot(SRBCTglmnet$lambda,
      TestErrors, geom="line")

### Slightly nicer plot
Errors <- melt(data.frame(lambda = SRBCTglmnet$lambda,
                          TrainErrors = TrainErrors,
                          TestErrors = TestErrors),
               id = "lambda")
qplot(lambda, value, data = Errors, colour = variable, geom = "line")


### Extracting the non-null coefficients for s = 0.1

SRBCTcoef <- coefficients(SRBCTglmnet, s = 0.1)
SRBCTcoef <- as.data.frame(lapply(SRBCTcoef, as.matrix))
SRBCTcoef[apply(abs(SRBCTcoef), 1, sum) > 0, ]