require(MASS) require(ggplot2) data(iris) newData <- iris[51:150, ] ### I explicitly remove the setosa group from the factor levels. This does ### not appear to be important, though ... newData[, 5] <- as.factor(as.vector(newData[, 5])) irisGlm <- glm(Species ~ ., data = newData, family = binomial) irisLda <- lda(Species ~ ., data = newData) newObs <- data.frame(Sepal.Length = 5.9, Sepal.Width = 2.7, Petal.Length = 4.6, Petal.Width = 1.3) predict(irisGlm, newObs, type="response") ### Predicts class probability ### The automatic encoding of the logistic regression model will ### result in virginica being coded as 1. The probability above is the ### estimated probability of virginica. It is close to 0, hence we ### classify as versicolor. predict(irisLda, newObs) ## You can try various model reductions. summary(irisGlm) summary(glm(Species~., data = newData[, -1], family = binomial)) ### say, and try out other model reductions ... ### Q3.2 trainPosterior <- data.frame(logis.virginica = predict(irisGlm, type="response"), lda.virginica = predict(irisLda)$posterior[, 2]) round(trainPosterior, digits = 4) qplot(x = index, y = value, colour = variable, data=melt(cbind(data.frame(index = 1:100, diff = trainPosterior[, 1] - trainPosterior[, 2] + 0.5), trainPosterior), id = "index")) ### Q3.3 mu1Hat <- irisLda$means[2, ] mu0Hat <- irisLda$means[1, ] A <- cbind(matrix(c(rep(1, 50), rep(0, 50))), matrix(c(rep(0, 50), rep(1, 50)))) resid <- as.matrix(newData[, -5] - A %*% irisLda$means) SigmaHat <- t(resid) %*% resid/98 tauHatBeta0 <- (t(mu0Hat) %*% solve(SigmaHat, mu0Hat) - t(mu1Hat) %*% solve(SigmaHat, mu1Hat))/2 tauHatBeta <- solve(SigmaHat, mu1Hat - mu0Hat) hat <- data.frame(tauHat = c(tauHatBeta0, tauHatBeta), betaHat= irisGlm$coef) row.names(hat) <- c("beta_0", "beta_1", "beta_2", "beta_3", "beta_4") print(hat) ### checking the correctness of the computations ... h <- function(z) exp(z)/(1+exp(z)) h(as.numeric(tauHatBeta0) + as.matrix(newData[, -5]) %*% tauHatBeta) - trainPosterior[, 2]