require(MASS)
require(ggplot2)
data(iris)


###Q2.1
###Implementing lda on the iris dataset with the x-method approach
irisLda <- lda(iris[,-5],iris[,5])

###Implementing lda on the iris dataset with the formula approach
irisLda <- lda(Species~.,data=iris)


###Computing the number of false predictions on the training dataset
sum(predict(irisLda)$class!=iris[,5])


newObs <- data.frame(Sepal.Length=5.9,Sepal.Width=2.7,Petal.Length=4.6,Petal.Width=1.3)
predict(irisLda,newObs) 


###Q2.2 

plot(irisLda)      ###This is a quite horrible plot
irisProjection <- cbind(scale(as.matrix(iris[,-5]),scale=FALSE) %*% irisLda$scaling,iris[,5,drop=FALSE])
p <- ggplot(data=irisProjection,aes(x=LD1,y=LD2,col=Species))
p + geom_point()   ###This is better

##Add the new point that we will predict to the plot

newProj <- (as.matrix(newObs) - apply(as.matrix(iris[,-5]),2,mean)) %*%  irisLda$scaling
p + geom_point() + geom_point(aes(x=newProj[1],y=newProj[2]),colour="red",size=4)


###Q2.3

irisLda <- lda(Species~.,data=iris,prior=c(0.20,0.05,0.75))

###Computing the number of false predictions on the training dataset
sum(predict(irisLda)$class!=iris[,5])

###Circles around points show classifications. 
p + geom_point() +  geom_point(aes(col=predict(irisLda)$class),shape=1,size=4)

###Q2.4
###It can always be an idea to transform the data if they don't look Gaussian
irisMelted <- melt(iris)

qplot(value,data=irisMelted,geom="density",y = ..scaled..,fill=variable,facets=Species~.)
qplot(log(value),data=irisMelted,geom="density",y = ..scaled..,fill=variable,facets=Species~.)

###The log transformation does not do miracles, bet lets see how it works. 

irisLog <- iris
irisLog[,-5] <- log(iris[,-5])
irisLogLda <- lda(Species~.,data=irisLog)

#Plotting the new projection

irisLogProjection <- cbind(scale(as.matrix(irisLog[,-5]),scale=FALSE) %*% irisLogLda$scaling,irisLog[,5,drop=FALSE])
ggplot(data=irisLogProjection,aes(x=LD1,y=LD2,col=Species)) + geom_point()

###

sum(predict(irisLogLda)$class!=iris[,5])