################################################################################## # Estimate parameters of 1d finite normal mixture model with 2 components # for Waiting time observations from the Old Faithful dataset: library(mclust) waiting <- faithful$waiting n <- length (waiting) waiting.Mclust <- Mclust (waiting,model="V",G=2) # Plot densities: x <- seq (from=min(waiting),to=max(waiting),length=1000) den1 <- dnorm (x,mean=waiting.Mclust$parameters$mean[1], sd=sqrt(waiting.Mclust$parameters$variance$sigmasq[1])) den2 <- dnorm (x,mean=waiting.Mclust$parameters$mean[2], sd=sqrt(waiting.Mclust$parameters$variance$sigmasq[2])) tau1 <- waiting.Mclust$parameters$pro[1] tau2 <- waiting.Mclust$parameters$pro[2] dens <- tau1*den1 + tau2*den2 plot (x,dens,type="l",xlab="y",ylab="Probability Density", ylim=c(-max(dens)/10,1.05*max(dens)), main="Density for 1-dim 2-component normal mixture model",lwd=2) lines (x,tau1*den1,col="red") lines (x,tau2*den2, col="blue") legend (x=min(x),y=max(dens),legend=c("Mixture","Component 1","Component 2"), col=c("black","red","blue"),lty=c(1,1,1),lwd=c(2,1,1)) # Simulate and plot a sample from the distribution: sim.results <- simV (waiting.Mclust$parameters,n,seed=0) ysim <- sim.results[,2] groupsim <- sim.results[,"group"] ysim1 <- ysim[groupsim==1] ysim2 <- ysim[groupsim==2] points (ysim1,rep(-max(dens)/20,length(ysim1)),col="red",pch=19) points (ysim2,rep(-max(dens)/20,length(ysim2)),col="blue",pch=19) ################################################################################## # Estimate parameters of 2-component mixture model for the # 2d Old Faithful data and plot the density library(mclust) n <- length (faithful[,1]) faithful.Mclust2den <- densityMclust (faithful, model="VVV",G=2) # Simulate a sample from the density sim.results <- simVVV (faithful.Mclust2den$parameters,n,seed=0) ysim <- sim.results[,c(2,3)] groupsim <- sim.results[,"group"] ysim1 <- ysim[groupsim==1,] ysim2 <- ysim[groupsim==2,] # Plot the density and simulated points plot (faithful.Mclust2den, col="black",xlab="Dimension 1",ylab="Dimension 2", xlim=c(min(ysim[,1]),max(ysim[,1])), ylim = c(min(ysim[,2]),max(ysim[,2])), levels = round (quantile(faithful.Mclust2den$density, probs = c(0.05, 0.25, 0.5, 0.75, 0.95, 0.99)),3)) points (ysim1,col="red",pch=19) points (ysim2,col="blue",pch=19) legend (x=1.5,y=100, legend=c("Density contours","Component 1","Component 2"), col=c("black","red","blue"),lty=c(1,0,0), lwd=c(2,0,0), pch=c(NA,19,19) ) ################################################################################## # Loading the mclust library library(mclust) # Random initial partition n <- length (faithful[,1]) set.seed (0) z.init <- rep (1,n) n2 <- rbinom (n=1,size=n,prob=0.5) s2 <- sample (1:n, size=n2) z.init[s2] <- 2 # EM algorithm step-by-step. # Initialize EM algorithm itmax <- 50 G <- 2 zmat <- matrix (rep (0,n*G), ncol=G) for (g in (1:G)) zmat[,g] <- z.init==g mstep.out <- vector ("list",itmax) estep.out <- vector ("list",itmax) # EM iterations for (iter in 1:itmax) { # M step mstep.tmp <- mstep (modelName="VVV",data=faithful,z=zmat) mstep.out[[iter]] <- mstep.tmp # E step estep.tmp <- estep (modelName="VVV",data=faithful,parameters=mstep.tmp$parameters) estep.out[[iter]] <- estep.tmp zmat <- estep.tmp$z } # Extract classification EMclass <- rep (NA,n) for (i in 1:n) { zmati <- zmat[i,] zmat.max <- zmati==max(zmati) zmat.argmax <- (1:G)[zmat.max] EMclass[i] <- min(zmat.argmax) } # Two-dimensional trace plot of component means # First, extract the EM iteration estimates of mu_{g,d} d <- length (faithful[1,]) # data dimension mu1 <- matrix (rep(NA,itmax*d),ncol=2) mu2 <- matrix (rep(NA,itmax*d),ncol=2) for (iter in (1:itmax)) { mu1[iter,] <- estep.out[[iter]]$parameters$mean[,1] mu2[iter,] <- estep.out[[iter]]$parameters$mean[,2] } # Plot the traces of mu_{g,d} plot (faithful, type="n", xlab="Eruptions", ylab="Waiting", main="Mean Parameters EM Trace") y1 <- faithful[EMclass==2,] y2 <- faithful[EMclass==1,] lines (mu1[,1],mu1[,2],type="l",lwd=3,col="red") lines (mu2[,1],mu2[,2],type="l",lwd=3,col="blue") points (y1, col="red", pch=1) points (y2, col="blue", pch=1) points (mu1[itmax,1],mu1[itmax,2],col="red",pch=19) points (mu2[itmax,1],mu2[itmax,2],col="blue",pch=19) legend (x=min(faithful[,1]),y=max(faithful[,2]), legend=c("Mean 1","Mean 2"), col=c("red","blue"),lty=c(1,1), lwd=c(3,3)) ################################################################################## # Loading the mclust library library(mclust) data(faithful) # Clustering with Mclust faithful.Mclust2 <- Mclust (faithful , model="VVV",G=2) # Plotting the densities as contour surfacePlot (faithful,faithful.Mclust2$parameters,type="contour", what="density") points (faithful) # Alternative ways of plotting the densities surfacePlot (faithful,faithful.Mclust2$parameters,type="image", what="density") surfacePlot (faithful,faithful.Mclust2$parameters,type="persp", what="density") ################################################################################## # Loading the mclust library library(mclust) data(faithful) # Clustering with Mclust faithful.Mclust2 <- Mclust (faithful , model="VVV",G=2) # Display uncertainty on cluster assignements plot(faithful.Mclust2,what="uncertainty") surfacePlot(faithful,faithful.Mclust2$parameters,type="contour", what="uncertainty") points (faithful) ################################################################################## # Calling hc for initializing faithful.hmc <- hc(modelName="VVV",faithful) faithful.hmclass <- c(hclass (faithful.hmc,2)) ################################################################################## # Initialization by sub-sampling S <- sample (1:nrow(faithful), size=100) Mclust (faithful, initialization=list(subset=S)) ################################################################################## # Load the Rmixmod library library(Rmixmod) # Run six smallEMs (here limited to two iterations) nb = 6 par(mfrow=c(2,3)) res = list(); likelihood = c() myModel = mixmodGaussianModel(listModels='Gaussian_pk_Lk_Ck') for (i in 1:nb){ res[[i]] = mixmodCluster(faithful,nbCluster=2,model=myModel,strategy=mixmodStrategy(initMethod='random' ,nbTryInInit=1,nbIterationInInit=1,nbIterationInAlgo=1)) likelihood[i] = res[[i]]@bestResult@likelihood plot(faithful,col=res[[i]]@bestResult@partition+1,pch=c(17,19)[res[[i]]@bestResult@partition]) title(main=paste('Likelihood =',round(likelihood[i]))) } # Run EM initialized with the best smallEM solution par(mfrow=c(1,1)) final.res = mixmodCluster(faithful,nbCluster=2,model=myModel) plot(faithful,col=final.res@bestResult@partition+1,pch=c(17,19)[final.res@bestResult@partition]) title(main=paste('Likelihood =',round(final.res@bestResult@likelihood))) ################################################################################## # Initial setup library (mclust) data (diabetes) diabetes.data <- diabetes[2:4] diabetes.class <- unlist (diabetes[1]) # Model-based clustering fitting and plotting diabetes.Mclust <- Mclust (diabetes.data,modelNames="VVV",G=3) plot(diabetes.Mclust, what = "classification") # Classification error measures classError (diabetes.Mclust$classification, truth=diabetes.class) adjustedRandIndex (diabetes.Mclust$classification, diabetes.class) # Plot clinical classification coordProj (diabetes.data, dimens=c(2,3), what="classification", classification=diabetes.class, col=c("red2","dodgerblue2","green3"), symbols=c(0,16,17), sub="(a) Clinical classification") # Plot mclust classification coordProj (data=diabetes.data, dimens=c(2,3), what="classification", classification=diabetes.Mclust$classification, sub="(b) Model-Based Clustering") # Uncertainty plot uncerPlot (z=diabetes.Mclust$z,truth=diabetes.class) # Plot mclust uncertainty coordProj (data=diabetes.data, dimens=c(2,3), what="uncertainty", parameters=diabetes.Mclust$parameters, z=diabetes.Mclust$z, truth=diabetes.class) ################################################################################## # Loading libraries and data library(MBCbook) data(diabetes) diabetes.data = diabetes[,-1] diabetes.class = diabetes$class # Single link clustering tmp <- hclust (dist(diabetes.data),method="single") diabetes.single <- rect.hclust (tmp, k=3) n.diabetes <- length (diabetes.data[,1]) diabetes.single.class <- rep (NA, n.diabetes) for (g in 1:3) diabetes.single.class[diabetes.single[[g]]] <- g coordProj (diabetes.data, dimens=c(2,3), what="classification", col=c("dodgerblue2","green3","red2"), symbols=c(16,17,0), classification=diabetes.single.class, sub="(c) Single link") classError (diabetes.single.class, truth=diabetes.class) adjustedRandIndex (diabetes.single.class, diabetes.class) # Average link clustering tmp <- hclust (dist(diabetes.data),method="average") diabetes.average <- rect.hclust (tmp, k=3) n.diabetes <- length (diabetes.data[,1]) diabetes.average.class <- rep (NA, n.diabetes) for (g in 1:3) diabetes.average.class[diabetes.average[[g]]] <- g coordProj (diabetes.data, dimens=c(2,3), what="classification", classification=diabetes.average.class, sub = "(d) Average link") coordProj (diabetes.data, dimens=c(2,3), what="errors", classification=diabetes.average.class, truth=diabetes.class) classError (diabetes.average.class, truth=diabetes.class) adjustedRandIndex (diabetes.average.class, diabetes.class) # Complete link clustering tmp <- hclust (dist(diabetes.data),method="complete") diabetes.complete <- rect.hclust (tmp, k=3) n.diabetes <- length (diabetes.data[,1]) diabetes.complete.class <- rep (NA, n.diabetes) for (g in 1:3) diabetes.complete.class[diabetes.complete[[g]]] <- g coordProj (diabetes.data, dimens=c(2,3), what="classification", classification=diabetes.complete.class, col=c("dodgerblue2","green3","red2"), symbols=c(16,17,0), sub="(e) Complete link") classError (diabetes.complete.class, truth=diabetes.class) adjustedRandIndex (diabetes.complete.class, diabetes.class) # k means clustering diabetes.kmeans <- kmeans (diabetes.data, centers=3, nstart=20) coordProj (diabetes.data, dimens=c(2,3), what="classification", classification=diabetes.kmeans$cluster, col=c("green3","red2","dodgerblue2"), symbols=c(17,0,16), sub="(f) k means") classError (diabetes.kmeans$cluster, truth=diabetes.class) adjustedRandIndex (diabetes.kmeans$cluster, diabetes.class) ################################################################################## # Loading the mclust library library(mclust) # Clustering with Mclust and displaying BIC selection faithful.Mclust <- Mclust (faithful) plot (faithful.Mclust, what="BIC") # Clustering with Mclust and displaying ICL selection faithful.mclustICL <- mclustICL (faithful) plot (faithful.mclustICL)