################################################################################## # Loading libraries and data library(funcy) library(funFEM) data(velib) # Clustering with fclust res = funcit(t(velib$data),k = 6, methods = "fitfclust") # Visualization of group means means = res@models$fitfclust@centers matplot(means,col=1:res@k,type='l',xaxt='n',lwd=2) axis(1,at=seq(5,181,6),labels=velib$dates[seq(5,181,6)],las=2) # Map of the results with ggmap (requires a Google API key) library(ggmap) Mymap = get_map(location = 'Paris', zoom = 12, maptype = 'terrain') ggmap(Mymap) + geom_point(data=velib$position, aes(longitude,latitude), colour = I(res@allClusters), size = I(3)) # Map of the results with leaflet (no API key needed) library(leaflet) library(RColorBrewer) df = velib$position colors = brewer.pal(12,'Paired') leaflet(df) %>% addTiles() %>% addCircleMarkers(color = colors[res@allClusters], radius = 5, fillOpacity = 1, stroke = FALSE) ################################################################################## # Loading libraries and data library(funFEM) data(velib) # Transformation of the raw data as curves basis = create.fourier.basis(c(0, 181), nbasis=25) fdobj = smooth.basis(1:181,t(velib$data),basis)$fd # Clustering with funFEM res = funFEM(fdobj,K=6) # Visualization of group means fdmeans = fdobj; fdmeans$coefs = t(res$prms$my) plot(fdmeans,col=1:res$K,xaxt='n',lwd=2) axis(1,at=seq(5,181,6),labels=velib$dates[seq(5,181,6)],las=2) # Map of the results with leaflet library(leaflet) library(RColorBrewer) df = velib$position colors = brewer.pal(12,'Paired') leaflet(df) %>% addTiles() %>% addCircleMarkers(color = colors[res$cls], radius = 5, fillOpacity = 1, stroke = FALSE) ################################################################################## # Projection into the subspace fdproj = t(fdobj$coefs) %*% res$U plot(fdproj,type='p',col=res$cls,pch=19,xlab='Disc axis 1',ylab='Disc axis 2',main='Discriminative subspace') # Add the names of some specific bike stations sel = c(200,301,384,918,936,1024) names = c('NATION','CHAMPS ELYSEES','MONTMARTRE','TOUR EIFFEL','GARE DE LYON','HOTEL DE VILLE') text(fdproj[sel,],labels=names) ################################################################################## # Loading libraries and data library(funHDDC) library(MBCbook) data(velib2D) # Removing NAs and subsampling ind = which(rowSums(is.na(velib2D$brokenDocks))==0) X = velib2D$availableBikes[ind,] Y = velib2D$brokenDocks[ind,] # Transformation of the raw data as curves basis = create.fourier.basis(c(0, 181), nbasis=51) fd1 = smooth.basis(1:181,t(X),basis)$fd fd2 = smooth.basis(1:181,t(Y),basis)$fd fdobj = list(fd1,fd2) # Clustering the multivariate functions with funclust res = funHDDC(fdobj,K=3) # Map of the results with leaflet library(leaflet) library(RColorBrewer) df = velib2D$position[ind,] colors = brewer.pal(12,'Paired') leaflet(df) %>% addTiles() %>% addCircleMarkers(color = colors[res$class], radius = 5, fillOpacity = 1, stroke = FALSE) ################################################################################## # Loading libraries library(topicmodels) # Loading data and lexicalization # (the 3 texts that are used here can be replaced by any text file) A = scan('MBC-chapter5.txt',what='character') B = scan('MBC-chapter9.txt',what='character') C = scan('MBC-chapter12.txt',what='character') Docs = c(paste(A,collapse=' '),paste(B,collapse=' '), paste(C,collapse=' ')) x <- Corpus(VectorSource(Docs)) dtm <- DocumentTermMatrix(x,control=list(steming=TRUE, stopwords=TRUE,minWordLength=3,removeNumbers=TRUE, removePunctuation=TRUE)) # Running LDA K = 5; W = 10 out = LDA(dtm,K) # Visualization of the results image(x=1:K,y=1:W,z=matrix(0,K,W),col=0,xlab='',ylab='',xaxt='n',yaxt='n',main='Estimated topics by LDA') axis(1,at=1:K,labels=paste('Topic',1:K)) text(expand.grid(x=0.75:(K-0.25), y=1:W), labels=t(terms(out,W))[,W:1], col=rep((1:K),W), pos=4) ################################################################################## # Loading libraries library(jpeg) library(MBCbook) # Loading of the image # (the image can be downloaded on the book website # or replaced by any JPEG image) Im = readJPEG('CubaBeach.jpg') X = as.data.frame(apply(Im,3,rbind)) # Clustering ind = sample(nrow(X),10000) out = mixmodCluster(X[ind,],4) res = mixmodPredict(X,out@bestResult) P = res@proba # Going back to the image format G = out@bestResult@nbCluster nr = nrow(Im); nc = ncol(Im) ImP = array(NA,c(nr,nc,G)) for (i in 1:nc){ ImP[,i,] = P[((i-1)*nr+1):(i*nr),] } # Display of posterior probabilities as images imshow(Im[,,1],main='Original image') par(mfrow=c(2,2)) for (g in 1:G) imshow(ImP[,,g],main=paste('Posterior proba. for group',g)) # Export the segmented image ImCl = apply(ImP,c(1,2),function(x){return(out@bestResult@parameters@mean[which.max(x),])}) ImCl = aperm(ImCl,c(2,3,1)) writeJPEG(ImCl,'CubaBeach-Clustered.jpg',quality=0.95) ################################################################################## # Loading libraries library(jpeg) library(MBCbook) # Loading and addition of noise on an image Im = 255 * readJPEG('CubaBeach.jpg')[,,1] sigma = 20 ImNoise = Im + matrix(rnorm(nrow(Im)*ncol(Im),0,sigma),nrow=nrow(Im)) ImNoise[ImNoise>255] = 255; ImNoise[ImNoise<0] = 0 par(mfrow=c(1,2)) imshow(Im,main='Original') imshow(ImNoise,main='Noisy') # Decomposition of the image as 8x8 patches and clustering # (it may take several minutes to run!) X = as.data.frame(imageToPatch(ImNoise,8)) ind = sample(nrow(X), 10000) out = mixmodCluster(X[ind,],40,model=mixmodGaussianModel(family=c("spherical"))) res = mixmodPredict(X,out@bestResult) # Display group means as small images par(mfrow=c(4,5)) mu = out@bestResult@parameters@mean for (g in 1:40) imshow(t(matrix(mu[g,64:1],ncol=8,byrow=FALSE))) # Final denoising and image reconstruction (it may take several minutes to run!) Xdenoised = denoisePatches(X,out,P = res@proba,sigma = sigma) ImRec = reconstructImage(Xdenoised,nrow(Im),ncol(Im)) ImRec[ImRec>255] = 255; ImRec[ImRec<0] = 0 par(mfrow=c(1,3)); imshow(Im,main='Original') imshow(ImNoise,main='Noisy') imshow(ImRec,main='Denoised') ################################################################################## # Loading libraries library(blockcluster) library(MBCbook) data(amazonFineFoods) # Co-clustering with blockcluster X = as.matrix(amazonFineFoods) out = coclusterBinary(X,nbcocluster = c(5,8)) # Plotting original and reorganized matrices image(amazonFineFoods,main='Original data matrix', xaxt='n',yaxt='n') image(amazonFineFoods[order(out@rowclass),order(out@colclass)], main='Data matrix sorted by groups', xaxt='n',yaxt='n')