require(MASS) require(ggplot2) data(iris) ###Q2.1 ###Implementing lda on the iris dataset with the x-method approach irisLda <- lda(iris[,-5],iris[,5]) ###Implementing lda on the iris dataset with the formula approach irisLda <- lda(Species~.,data=iris) ###Computing the number of false predictions on the training dataset sum(predict(irisLda)$class!=iris[,5]) newObs <- data.frame(Sepal.Length=5.9,Sepal.Width=2.7,Petal.Length=4.6,Petal.Width=1.3) predict(irisLda,newObs) ###Q2.2 plot(irisLda) ###This is a quite horrible plot irisProjection <- cbind(scale(as.matrix(iris[,-5]),scale=FALSE) %*% irisLda$scaling,iris[,5,drop=FALSE]) p <- ggplot(data=irisProjection,aes(x=LD1,y=LD2,col=Species)) p + geom_point() ###This is better ##Add the new point that we will predict to the plot newProj <- (as.matrix(newObs) - apply(as.matrix(iris[,-5]),2,mean)) %*% irisLda$scaling p + geom_point() + geom_point(aes(x=newProj[1],y=newProj[2]),colour="red",size=4) ###Q2.3 irisLda <- lda(Species~.,data=iris,prior=c(0.20,0.05,0.75)) ###Computing the number of false predictions on the training dataset sum(predict(irisLda)$class!=iris[,5]) ###Circles around points show classifications. p + geom_point() + geom_point(aes(col=predict(irisLda)$class),shape=1,size=4) ###Q2.4 ###It can always be an idea to transform the data if they don't look Gaussian irisMelted <- melt(iris) qplot(value,data=irisMelted,geom="density",y = ..scaled..,fill=variable,facets=Species~.) qplot(log(value),data=irisMelted,geom="density",y = ..scaled..,fill=variable,facets=Species~.) ###The log transformation does not do miracles, bet lets see how it works. irisLog <- iris irisLog[,-5] <- log(iris[,-5]) irisLogLda <- lda(Species~.,data=irisLog) #Plotting the new projection irisLogProjection <- cbind(scale(as.matrix(irisLog[,-5]),scale=FALSE) %*% irisLogLda$scaling,irisLog[,5,drop=FALSE]) ggplot(data=irisLogProjection,aes(x=LD1,y=LD2,col=Species)) + geom_point() ### sum(predict(irisLogLda)$class!=iris[,5])