################################################################################## # Loading libraries and data library(prabclus) # Simulating data with noise simdata1 <- sim(modelName="EII", n=180,parameters=list(pro=c(.67,.33),mean=matrix(c(1,1,-1,-1),ncol=2),variance=list(sigmasq=0.09)) ) x1N <- rnorm (60,sd=2); x2N <- rnorm (60,sd=2) # Remove noise points that are close to the cluster centers. dist1 <- rep (NA,60); dist2 <- rep (NA,60) for (k in 1:60) dist1[k] <- sqrt ((x1N[k]-1)^2 + (x1N[k]-1)^2) for (k in 1:60) dist2[k] <- sqrt ((x1N[k]+1)^2 + (x1N[k]+1)^2) close <- dist1<1.4 | dist2<1.4 noise <- cbind (x1N,x2N) noise <- noise[!close,] Nnoise <- dim (noise)[[1]] simdata <- rbind (simdata1[,c(2,3)],noise) n <- dim(simdata)[[1]] plot (simdata, type="n") points (simdata[1:180,]) points (simdata[181:n,], col="red") NNclean.out <- NNclean (scale(simdata), k=12) # Clustering with Mclust MclustN <- Mclust(simdata, initialization=list(noise=(NNclean.out$z==0))) # Plotting results plot (simdata, col=1+NNclean.out$z) mclust2Dplot(simdata, parameters=MclustN$parameters, z=MclustN$z, what = "classification", PCH=20) plot (MclustN, what="uncertainty") ################################################################################## # Loading libraries library (covRobust) # Robust covariance estimation and clustering (using the simulated data of the previous listing) nnve.out <- cov.nnve (scale(simdata)) simdata.MclustN.NNVE <- Mclust (simdata, initialization=list(noise=(nnve.out$ classification==0))) # Plotting results plot (simdata, col=1+nnve.out$classification) mclust2Dplot(simdata , parameters=simdata.MclustN.NNVE$parameters, z=simdata.MclustN.NNVE$z, what = "classification", PCH=20) ################################################################################## # Loading libraries library(tclust) library(MBCbook) # Data simulation (class 1 is for outliers) n = 200; alpha = 0.1 m1 = c(-1,2); m2 = 3*c(1,-1) S1 = 0.25*diag(2); S2 = 0.25 * diag(2) X = rbind(mvrnorm(n/2,m1,S1),mvrnorm(n/2,m2,S2), matrix(runif(alpha*n*2,-5,5),ncol=2)) cls = rep(3:1,c(n/2,n/2,alpha*n)) # tclust with the actual G and percentage of ouliers out = tclust(X,k=2,alpha=alpha) # Plotting results par(mfrow=c(1,2)) plot(X,col=cls,pch=cls,xlab='',ylab='', main="Data with actual labels") plot(out,xlab='',ylab='',main='tclust clustering') ################################################################################## # Clustering for various G and alpha (using data of the previous example) out = ctlcurves (X, k=2:4, alpha=seq(0,0.2,len=21)) # Plotting results par(mfrow=c(1,2)) plot(out) plot(seq (0, 0.2, len = 21),out$obj[1,] - out$obj[2,], type='b',xlab='alpha',ylab='',main='Difference between G=2 and G=3') abline(h=0,lty=2) ################################################################################## # Loading libraries and data library(MBCbook) data(diabetes) X = diabetes[,-1] # Including spurious singular solutions: Mclust (X, modelNames="VVV", control=emControl(eps=0, tol=c(1e-3,1e-3))) # Including prior: Mclust (X, prior=priorControl(), modelNames="VVV") ################################################################################## # Loading libraries and data library(mclust) data(Baudry_etal_2010_JCGS_examples) # Clustering and merging output <- clustCombi(data = ex4.4.1) # plots the hierarchy of combined solutions, then some "entropy plots" which # may help one to select the number of classes plot(output) ################################################################################## # Loading libraries and data library (mclust) library (fpc) data (Baudry_etal_2010_JCGS_examples) # Clustering and merging ex4.1.Mclust <- Mclust (ex4.1) ex4.1.bhat <- mergenormals (ex4.1, method="bhat", clustering=ex4.1.Mclust$classification, probs = ex4.1.Mclust$parameters$pro, muarray = ex4.1.Mclust$parameters$mean, Sigmaarray = ex4.1.Mclust$parameters$variance$sigma, z = ex4.1.Mclust$z) summary (ex4.1.bhat)