require(MASS)
require(ggplot2)
data(iris)
newData <- iris[51:150, ]
### I explicitly remove the setosa group from the factor levels. This does
### not appear to be important, though ...
newData[, 5] <- as.factor(as.vector(newData[, 5])) 

irisGlm <- glm(Species ~ ., data = newData, family = binomial)
irisLda <- lda(Species ~ ., data = newData)

newObs <- data.frame(Sepal.Length = 5.9,
                     Sepal.Width  = 2.7,
                     Petal.Length = 4.6,
                     Petal.Width  = 1.3)

predict(irisGlm, newObs, type="response") ### Predicts class probability

### The automatic encoding of the logistic regression model will
### result in virginica being coded as 1. The probability above is the
### estimated probability of virginica. It is close to 0, hence we
### classify as versicolor.

predict(irisLda, newObs)

## You can try various model reductions. 

summary(irisGlm)
summary(glm(Species~., data = newData[, -1], family = binomial))
### say, and try out other model reductions ...

### Q3.2

trainPosterior <- data.frame(logis.virginica = predict(irisGlm, type="response"),
                             lda.virginica = predict(irisLda)$posterior[, 2])
round(trainPosterior, digits = 4)

qplot(x = index, y = value, colour = variable,
      data=melt(cbind(data.frame(index = 1:100,
        diff = trainPosterior[, 1] - trainPosterior[, 2] + 0.5),
        trainPosterior), id = "index"))


### Q3.3

mu1Hat <- irisLda$means[2, ]
mu0Hat <- irisLda$means[1, ]
A <-  cbind(matrix(c(rep(1, 50), rep(0, 50))),
            matrix(c(rep(0, 50), rep(1, 50))))
resid <- as.matrix(newData[, -5] - A %*%  irisLda$means)

SigmaHat <- t(resid) %*% resid/98

tauHatBeta0 <- (t(mu0Hat) %*% solve(SigmaHat, mu0Hat) -
                t(mu1Hat) %*% solve(SigmaHat, mu1Hat))/2
tauHatBeta <- solve(SigmaHat, mu1Hat - mu0Hat)

hat <- data.frame(tauHat = c(tauHatBeta0, tauHatBeta),
                  betaHat= irisGlm$coef)
row.names(hat) <- c("beta_0", "beta_1", "beta_2", "beta_3", "beta_4")
print(hat)

### checking the correctness of the computations ...

h <- function(z) exp(z)/(1+exp(z))
h(as.numeric(tauHatBeta0) + as.matrix(newData[, -5]) %*% tauHatBeta) -
  trainPosterior[, 2]