################################################################################## # Loading libraries and data library(MBCbook) data(wine27) # Extract data and scale for visualization purpose X = scale(wine27[,1:27]) cls = as.numeric(wine27$Type) # Visualization of data pairs(X[,1:7],col=cls,pch=(15:17)[cls]) matplot(t(X),col=1,type='l',lty=2,xlab='Dimension',ylab='') matplot(t(X),col=cls,type='l',lty=cls,xlab='Dimension',ylab='') # Clustering with Mclust (not HD-ready) res.mclust = Mclust(X,3) table(cls,res.mclust$classification) # Clustering with HDDC (HD-ready) library(HDclassif) res.hddc = hddc(X,3) table(cls,res.hddc$class) ################################################################################## # Loading libraries and data library(MBCbook) data(usps358) X = usps358[,-1]; cls = usps358$cls # Computing group means m1 = colMeans(X[cls==1,]); m2 = colMeans(X[cls==2,]); m3 = colMeans(X[cls==3,]) # Plotting means as vectors par(mfrow=c(2,3)) matplot(m1,type='h',col=1,lty=1,ylim=c(0,2),ylab='') matplot(m2,type='h',col=1,lty=1,ylim=c(0,2),ylab='') matplot(m3,type='h',col=1,lty=1,ylim=c(0,2),ylab='') # Plotting means as images image(t(matrix(t(m1),ncol=16,byrow=TRUE)[16:1,]),col=gray(255:0/255),axes=F);box() image(t(matrix(t(m2),ncol=16,byrow=TRUE)[16:1,]),col=gray(255:0/255),axes=F); box() image(t(matrix(t(m3),ncol=16,byrow=TRUE)[16:1,]),col=gray(255:0/255),axes=F); box() ################################################################################## # Loading libraries and data library(MBCbook) data(usps358) X = usps358[,-1]; cls = usps358$cls # Clustering with Mclust (non HD-ready) res.mclust = Mclust(X,3,initialization=list(subset=sample(nrow(X),300))) table(cls,res.mclust$classification) # Clustering with HDDC (HD-ready) library(HDclassif) res.hddc = hddc(X,3) table(cls,res.hddc$class) ################################################################################## # Volume of the unit hypersphere d = 1:100 V = pi^(d/2) / gamma(d/2+1) plot(d,V,type='b',xlab='Dimension',ylab='Volume') ################################################################################## # Loading libraries library(mvtnorm) library(Rmixmod) # Bayes' classifier function for Gaussian distributions BayesClass <- function(X,tau,m,S){ G = length(m); d = ncol(X) P = matrix(NA,nrow(X),G) for (g in 1:G) P[,g] = tau[g] * dmvnorm(X,m[[g]],S[[g]]) P = P / rowSums(P) %*% matrix(1,1,G) list(P = P, cls = max.col(P)) } # Simulation n = 120; nbrep = 25 dims = seq(10,210,20) err = err2 = matrix(NA,length(dims),nbrep) for (i in 1:length(dims)){ for (j in 1:nbrep){ # Data simulation d = dims[i] m1 = c(0,0,rep(0,d-2)); m2 = c(0,-2,rep(0,d-2)); m3 = c(2,0,rep(0,d-2)); S1 = diag(d); S2 = 0.8 * diag(d); S3 = 0.9 * diag(d) X = as.data.frame(rbind(mvrnorm(n/3,m1,S1),mvrnorm(n/3,m2,S2),mvrnorm(n/3,m3,S3))) X2 = as.data.frame(rbind(mvrnorm(10*n/3,m1,S1),mvrnorm(10*n/3,m2,S2),mvrnorm(10*n/3,m3,S3))) cls = rep(1:3,rep(n/3,3)); cls2 = rep(1:3,rep(10*n/3,3)) # Classification with the Bayes' classifier res1 = BayesClass(X2,rep(1/3,3),list(m1,m2,m3), list(S1,S2,S3))$cls # Classification with EDDA mod = mixmodLearn(X,cls,models=mixmodGaussianModel(listModels = 'Gaussian_pk_Lk_I')) res2 = mixmodPredict(X2,mod["bestResult"])@partition # Computing error rate err[i,j] = sum(res1 != cls2) / length(cls2) err2[i,j] = sum(res2 != cls2) / length(cls2) } } # Plotting error rate boxplot(t(err),ylim=c(0.1,0.33),names=dims,xlab='Dimension',ylab='Classification error rate',col=3) boxplot(t(err2),names=dims,xlab='Dimension',ylab='Classification error rate',col=4,add=TRUE) legend("bottomleft",legend = c('Bayes classifier','EDDA'),col=c(3,4),lty=1,pch=19) ################################################################################## # Loading libraries and data library(MBCbook) data(NIR) Y = NIR[,-1] cls = NIR$cls # PCA on NIR data (through SVD since n < d here) U = svd(Y)$v X = as.matrix(Y) %*% U[,1:2] plot(X,col=cls,pch=(15:17)[cls],cex=1.25,xlab='PC 1',ylab='PC 2') ################################################################################## # Loading libraries and data library(MBCbook) data(NIR) Y = NIR[,-1] cls = NIR$cls # PCA on NIR data (through SVD since n < d here) U = svd(Y)$v X = as.matrix(Y) %*% U pairs(X[,1:4],col=cls,pch=(15:17)[cls],cex=1.25, labels=c('PC 1','PC 2','PC 3','PC4')) # Classification on different couples of PCA axes n = nrow(Y); nb = 50 Err = matrix(NA,2,nb) for (i in 1:nb){ ind = sample(nrow(Y),round(n/2)) U = svd(Y[ind,])$v X = as.matrix(Y) %*% U Err[1,i] = sum(predict(qda(X[-ind,1:2],cls[-ind]),X[ind,1:2])$cl != cls[ind]) / length(ind) Err[2,i] = sum(predict(qda(X[-ind,3:4],cls[-ind]),X[ind,3:4])$cl != cls[ind]) / length(ind) } boxplot(t(Err),col=2:3,names=c('QDA on PC axes 1/2','QDA on PC axes 3/4'),ylim=c(0,0.7),ylab='Error rate') ################################################################################## # Loading libraries and data library(MBCbook) data(usps358) Y = usps358[,-1] cls = usps358$cls # PCA on usps358 data U = svd(Y)$v X.pca = as.matrix(Y) %*% U[,1:2] plot(X.pca,type='n',xlab='PC 1',ylab='PC 2') text(X.pca,col=cls,labels=c(2,6,9)[cls]) # FDA on usps358 data V = lda(Y,cls)$scaling X.fda = as.matrix(Y) %*% V plot(X.fda,type='n',xlab='FDA axis 1',ylab='FDA axis 2') text(X.fda,col=cls,labels=c(2,6,9)[cls]) ################################################################################## # Loading libraries and data library(pls) library(MBCbook) data(usps358) Y = usps358[,-1] cls = usps358$cls # Dummy recoding of the labels n = nrow(Y); G = max(cls) Z = matrix(0,n,G) for (g in 1:G) Z[cls==g,g] = 1 # PLS-DA on usps358 data pl = plsr(Z ~ as.matrix(Y),ncomp=2,method='oscorespls') W = unclass(pl$loadings) X.pls = as.matrix(Y) %*% W # Plotting the resulting projection plot(X.pls,type='n',xlab='PLS axis 1',ylab='PLS axis 2') text(X.pls,col=cls,labels=c(2,6,9)[cls]) ################################################################################## # Loading libraries and data library(pgmm) library(MBCbook) data(wine27) Y = wine27[,1:27] cls = wine27$Type # Selecting the number of factors by model selection res.mfa = pgmmEM(Y,rG=3,rq=1:6,modelSubset='CCU') plot(1:6,res.mfa$bic$CCU,type='b',xlab='Nb of factors',ylab='BIC') # Clustering results table(cls,res.mfa$map) ################################################################################## # Loading libraries and data library(EMMIXmfa) library(MBCbook) data(wine27) Y = wine27[,1:27] cls = as.numeric(wine27$Type) # Learning the common FA subspace and compare with PCA res.mcfa <- mcfa(Y,g=3,q=2) par(mfrow=c(1,2)) plot(predict(princomp(Y)),col=cls+1,pch=c(17,18,19)[cls],xlab='PC axis 1',ylab='PC axis 2') plot(res.mcfa$Uscores,col=cls+1,pch=c(17,18,19)[cls],xlab='MCFA axis 1',ylab='MCFA axis 2') # Clustering results table(cls,res.mcfa$clust) ################################################################################## # Loading libraries and data library(pgmm) library(MBCbook) data(wine27) Y = wine27[,1:27] cls = wine27$Type # Clustering with PGMM res.pgmm = pgmmEM(Y,rG = 1:5, rq = 1:10,modelSubset = 'UUC') # Plot results plot(res.pgmm) # Classification table table(cls,res.pgmm$map) ################################################################################## # Loading libraries and data library(HDclassif) library(MBCbook) data(usps358) Y = usps358[,-1] cls = usps358$cls # Clustering with HDDC # (notice that HDclassif uses the index 'k' for the groups in model names) res = hddc(Y,3,model='AkjBkQkDk',threshold=0.1) # Number of selected dimensions per group res$d # Clustering results table(cls,res$class) ################################################################################## # Loading libraries and data library(FisherEM) library(MBCbook) data(wine27) Y = scale(wine27[,1:27]) cls = wine27$Type # Clustering with Fisher-EM res = fem(Y,K = 3,model='AkjBk') # Loading matrix class(res$U) = 'loadings' print(res$U) # Clustering results table(cls,res$cls) ################################################################################## # Loading libraries and data library(FisherEM) library(MBCbook) data(wine27) Y = scale(wine27[,1:27]) cls = wine27$Type # Clustering with Fisher-EM res = sfem(Y,K = 3,model='AkjBk',l1 = 0.1) # Loading matrix class(res$U) = 'loadings' print(res$U) # Clustering results table(cls,res$cls)