################################################################################## # Loading of the libraries and data library(FactoMineR) library(MBCbook) data(credit) X = credit X$Age = as.factor(X$Age) # Clustering with LCM through Rmixmod res = mixmodCluster(X,nbCluster=1:8,dataType="qualitative" ,model=mixmodMultinomialModel(listModels="Binary_pk_Ekj"), criterion = c("BIC","ICL")) # Visualization of the best result according to BIC par(mfrow=c(1,2)) lbl = res@results[[1]]@partition acm = mca(X,abbrev = TRUE) plot(predict(acm,X),col=lbl,pch=c(17,15,18,19)[lbl], xlab='',ylab='',main="Factor map of observations") plot(acm,rows = FALSE,cex=0.75,main="Factor map of levels") # Barplot of cluster-conditional level frequency par(mfrow=c(1,1)) barplot(res) ################################################################################## # Loading of libraries and data library(clustMD) library(MBCbook) data(Byar) # Transformation of skewed variables and ordering according to variable types Byar$Size.of.primary.tumour <- sqrt(Byar$Size.of.primary.tumour) Byar$Serum.prostatic.acid.phosphatase <- log(Byar$Serum.prostatic.acid.phosphatase) Y <- as.matrix(Byar[,c(1,2,5,6,8,9,10,11,3,4,12,7)]) Y[, 9:12] <- Y[, 9:12] + 1 Y[, 1:8] <- scale(Y[, 1:8]) Yekg <- rep(NA, nrow(Y)) Yekg[Y[,12]==1] <- 1 Yekg[(Y[,12]==2)|(Y[,12]==3)|(Y[,12]==4)] <- 2 Yekg[(Y[,12]==5)|(Y[,12]==6)|(Y[,12]==7)] <- 3 Y[, 12] <- Yekg # Clustering with clustMD res <- clustMD(X=Y, G=3, CnsIndx=8, OrdIndx=11, Nnorms=20000, MaxIter=500, model="EVI", store.params=FALSE, scale=TRUE, startCL="kmeans") colnames(res$means) = paste('Grp.',1:3) rownames(res$means) = c(colnames(Y)[1:11],paste(colnames(Y)[12],1:2)) dotchart(t(res$means),col=1:3,pch=19) # Clustering with Rmixmod Y = as.data.frame(Y) Y[,9] = as.factor(Y[,9]); Y[,10] = as.factor(Y[,10]) Y[,11] = as.factor(Y[,11]); Y[,12] = as.factor(Y[,12]) out = mixmodCluster(Y,1:4,criterion='BIC') means = matrix(NA,ncol(Y),4) means[1:8,] = t(out@bestResult@parameters@g_parameter@mean) means[9:12,] = t(out@bestResult@parameters@m_parameter@center) colnames(means) = paste('Grp.',1:4); rownames(means) = colnames(Y) dotchart(t(means),col=1:3,pch=19) ################################################################################## # Loading libraries and data library(HTSCluster) library(MBCbook) data(velibCount) X = velibCount$data # Visualization of some stations Sys.setlocale("LC_TIME", "en_US.UTF-8") dates = strftime(as.POSIXct(velibCount$dates,origin = "1970-01-01"),format="%a-%I%P") matplot(t(X[1:5,]),type='l',lwd=2,main='The Velib data set',xaxt='n',xlab='',ylab='Number of available bikes') axis(1,at=seq(5,181,6),labels=dates[seq(5,181,6)],las=2) # Saturdays and Sundays are removed days = strftime(as.POSIXct(velibCount$dates,origin = "1970-01-01"),format="%u") X = X[,days %in% 1:5] # An hour effect is defined conds = strftime(as.POSIXct(velibCount$dates,origin = "1970-01-01"),format="%H") conds = conds[days %in% 1:5] # Clustering with HTSCluster (may take several minutes!) run <- PoisMixClusWrapper(as.matrix(X),gmin=2,gmax=15,conds=conds) # The slope heuristic (DDSE) suggests G=10 summary(run) G = 10 # Display of the cluster proportions run$all.results[[G-1]]$pi # Plot of the Estimated lambdas matplot(run$all.results[[G-1]]$lambda,type='l',xaxt='n',lwd=2,lty=1,xlab='Hour',ylab='Estimated lambdas') axis(1,at=1:24,labels=rownames(run$lambda),las=1) # Map of the results with leaflet (no API key needed) library(leaflet); library(RColorBrewer) df = velibCount$position colors = brewer.pal(12,'Paired') leaflet(df) %>% addTiles() %>% addCircleMarkers(color = colors[run$all.results[[G-1]]$lab], radius = 5, fillOpacity = 1, stroke = FALSE)