###Q1.1 regress <- function(N=1000,p=100) { y <- rnorm(N) x <- matrix(rnorm(N*p),ncol=p) return(lm.fit(x,y)$coef) } plot(sort(abs(regress(p=100)),decreasing=TRUE)) lines(c(1,100),2/sqrt(1000)*c(1,1),col="red",lty=2) ###We use 1/sqrt(1000) as a proxy for the standard deviation of each coordinate. ###The following computation will show that it is not completely off, though ###the correct conditional standard deviations are found as square roots of ### the diagonal elements in (X^TX)^{-1}. x <- matrix(rnorm(1000*100),ncol=100) sqrt(diag(solve(t(x)%*%x))) 1/sqrt(1000) ###Q1.2 M <- 100 regressMeasures <- function(N=1000,p=100){ beta <- regress(N,p) value <- c(max=max(beta),L2=t(beta)%*%beta,L1=sum(abs(beta))) return(value) } empRegress <- replicate(M,regressMeasures(p=500)) hist(empRegress[1,]) hist(empRegress[2,]) hist(empRegress[3,]) ###Q1.3 M <- 100 empRegressList <- list() p <- seq(100,500,by=100) for(i in seq(along=p)){ empRegressList[[i]] <- replicate(M,regressMeasures(p=p[i])) } tmp <- as.matrix(as.data.frame(lapply(empRegressList,function(x) apply(x,1,mean)))) tmp[1,] plot(tmp["L1",],type="b",col="blue",ylim=c(0,18)) points(tmp["max",],type="b") points(tmp["L2",],type="b",col="red") plot(log(tmp["L1",]),type="b",col="blue",ylim=c(-3,4)) points(log(tmp["max",]),type="b") points(log(tmp["L2",]),type="b",col="red")