library(ggplot2)
library(glmnet)
## Loading required package: Matrix
## Loading required package: foreach
## Loaded glmnet 2.0-10
library(cowplot)
## 
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggplot2':
## 
##     ggsave
set.seed(113)
Coef = rnorm(5, 0, sd=1)
X = matrix(rnorm(500, 0, sd=1), nrow = 5, ncol = 100)
Y = apply(X, 2, function(x) Coef%*%x)
df = data.frame(x = c("a", "b", "c", "d", "e"), coef = Coef, Category="True")
ggplot(df, aes(x=x, y=coef, fill=Category)) + geom_bar(stat = "Identity") + theme_bw() +
  xlab("") + ylab("Coefficients")

Reg_analysis <- function(Xtrain, Ytrain, Xtest, Ytest, Ronly = FALSE) {
  model = glmnet(t(Xtrain), Ytrain, alpha=0)
  Ypred = predict.glmnet(model, newx = t(Xtest), s=0)

  df = data.frame(True = Ytest, Predict = as.numeric(Ypred))
  dfcoef = rbind(
    data.frame(Var = c("a", "b", "c", "d", "e"), Coef = Coef, Category="True"),
    data.frame(Var = c("a", "b", "c", "d", "e"), Coef = coef.glmnet(model, s=0)[2:6], Category="Trained")
            )
  dfcoef$Category = factor(dfcoef$Category)
  plots = list()

  err = max(1-sum((Ypred-Ytest)^2)/sum((Ypred-mean(Ypred))^2), 0)
  if (!Ronly) {
    plots[[1]] = ggplot(df, aes(y=True, x=Predict)) + geom_point() + xlab("Prected Y") + ylab("True Y") + 
      annotate("text", x=min(df$Predict)*0.5, y=max(df$True), label=paste0("R squared = ",as.character(round(err, digits = 2)))) + coord_cartesian(xlim = c(min(Ytest), max(Ytest)))
    plots[[2]] = ggplot(dfcoef, aes(x=Var, y=Coef, fill=Category)) + geom_bar(stat = "Identity", position = "dodge") +
      ylab("Coefficients")
    return(plots)
  } else{
    return(err)
  }
}

Training model without noise but with increasing dimensions (random variables)

Only 10 training data and 200 validation data is used.

Nfeats = c(0,2,3,4,5,6)
Rs = list()

Xtrain = matrix(rnorm(50, 0, sd=1), nrow = 5, ncol = 10)
Ytrain = apply(Xtrain, 2, function(x) Coef%*%x)
Xextra = matrix(rnorm(1000, 0, sd=1), nrow = 100, ncol = 10)

Xtest = matrix(rnorm(1000, 0, sd=1), nrow = 5, ncol = 200)
Ytest = apply(Xtest, 2, function(x) Coef%*%x)
Xextra_test = matrix(rnorm(100*200, 0, sd=1), nrow = 100, ncol=200)

for (Nfeat in Nfeats) {
  
  R <- Reg_analysis(rbind(Xtrain,Xextra[1:Nfeat,]), Ytrain, rbind(Xtest,Xextra_test[1:Nfeat, ]), Ytest)[[1]] + 
    ggtitle(paste0("dimension = ", as.character(Nfeat + 5))) + theme(aspect.ratio=1)
  Rs[[length(Rs)+1]] = R
#  plot(R)
}

plot_grid(plotlist = Rs, ncol=2)