require(MASS) #Contains the dataset iris used require(ggplot2) vowels <- read.table("http://www-stat-class.stanford.edu/%7Etibs/ElemStatLearn/datasets/vowel.train",row.names=1,head=TRUE,sep=",") vowelsScaled <- vowels vowelsScaled[,-1] <- scale(vowels[,-1],scale=FALSE) vowelsLda <- lda(y~.,data=vowels) ###Computations of the group means X <- model.matrix(~y-1,data=data.frame(y=factor(vowels[,1]))) GM <- X %*% solve(t(X) %*% X, t(X) %*% as.matrix(vowels[,-1])) ###Computation of the singular value decomposition for the residuals ###and the scalings via a singular value decomposition of the ###centroid matrix W <- svd(vowels[,-1] - GM) scalings <- W$v %*% diag(1/W$d) %*% svd(scale(GM,scale=FALSE) %*% W$v %*% diag(1/W$d))$v scalings/vowelsLda$scaling ###Scalings er orthogonal in the inner product given by hat{Sigma}!?? t(scalings) %*% t(as.matrix(vowels[,-1])-GM) %*% (as.matrix(vowels[,-1])-GM) %*% scalings ###Investigations of the principal components and the discrimination directions. ###This plot shows the projection onto the first two principal components computed using ###the V-matrix in the singular value decomposition for the projection as shown in the lecture. vowelsSvd <- svd(vowelsScaled[,-1]) p <- qplot(as.matrix(vowelsScaled[,-1]) %*% vowelsSvd$v[,1],as.matrix(vowelsScaled[,-1]) %*% vowelsSvd$v[,2]) p <- p + geom_point(aes(col=factor(vowelsScaled[,1]))) p ###This plot is identical but relying on the U and D in the singular value decomposition. p <- qplot(vowelSvd$u[,1]*vowelsSvd$d[1],vowelsSvd$u[,2]*vowelsSvd$d[2]) p <- p + geom_point(aes(col=factor(vowelsScaled[,1]))) p ###The following two commands plot the projected means onto the figure. ###It results in a warning on my computer, ignore that. tmp <- scale(vowelsLda$mean,scale=FALSE) %*% vowelsSvd$v[,c(1,2)] p + geom_point(aes(x=tmp[,1],y=tmp[,2],fill=factor(vowelsScaled[,1])),shape=23,size=8) ###Plot of the projection onto the canonical coordinates instead using the results computed by ###the lda function. Both for this plot and the following both the data and the group means ###have been centered before the projection. This does not change the image except for a translation. ###It seems to be the standard that one plots the projection of the centered values. p <- qplot(as.matrix(vowelsScaled[,-1]) %*% vowelsLda$scaling[,1],as.matrix(vowelsScaled[,-1]) %*% vowelsLda$scaling[,2]) p <- p + geom_point(aes(col=factor(vowelsScaled[,1]))) tmp <- scale(vowelsLda$mean,scale=FALSE) %*% vowelsLda$scaling[,c(1,2)] p + geom_point(aes(x=tmp[,1],y=tmp[,2],fill=factor(vowelsScaled[,1])),shape=23,size=8) ###Similar plot based on the scalings computed directly based on the singular value decomposition. ###The axes are scaled differently but the directions are the same. p <- qplot(as.matrix(vowelsScaled[,-1]) %*% scalings[,1],as.matrix(vowelsScaled[,-1]) %*% scalings[,2]) p <- p + geom_point(aes(col=factor(vowelsScaled[,1]))) tmp <- scale(vowelsLda$mean,scale=FALSE) %*% scalings[,c(1,2)] p + geom_point(aes(x=tmp[,1],y=tmp[,2],fill=factor(vowelsScaled[,1])),shape=23,size=8)