##################################################################################
# Loading libraries and data
library(funcy)
library(funFEM)
data(velib)

# Clustering with fclust
res = funcit(t(velib$data),k = 6,
             methods = "fitfclust")

# Visualization of group means
means = res@models$fitfclust@centers
matplot(means,col=1:res@k,type='l',xaxt='n',lwd=2)
axis(1,at=seq(5,181,6),labels=velib$dates[seq(5,181,6)],las=2)

# Map of the results with ggmap (requires a Google API key)
library(ggmap)
Mymap = get_map(location = 'Paris', zoom = 12,
                maptype = 'terrain')
ggmap(Mymap) + geom_point(data=velib$position,
                          aes(longitude,latitude),
                          colour = I(res@allClusters),
                          size = I(3))

# Map of the results with leaflet (no API key needed) 
library(leaflet)
library(RColorBrewer)
df = velib$position
colors = brewer.pal(12,'Paired')
leaflet(df) %>% addTiles() %>%
  addCircleMarkers(color = colors[res@allClusters],
                   radius = 5, fillOpacity = 1, stroke = FALSE)

##################################################################################
# Loading libraries and data
library(funFEM)
data(velib)

# Transformation of the raw data as curves
basis = create.fourier.basis(c(0, 181), nbasis=25)
fdobj = smooth.basis(1:181,t(velib$data),basis)$fd

# Clustering with funFEM
res = funFEM(fdobj,K=6)

# Visualization of group means
fdmeans = fdobj; fdmeans$coefs = t(res$prms$my)
plot(fdmeans,col=1:res$K,xaxt='n',lwd=2)
axis(1,at=seq(5,181,6),labels=velib$dates[seq(5,181,6)],las=2)

# Map of the results with leaflet
library(leaflet)
library(RColorBrewer)
df = velib$position
colors = brewer.pal(12,'Paired')
leaflet(df) %>% addTiles() %>%
  addCircleMarkers(color = colors[res$cls],
                   radius = 5, fillOpacity = 1, stroke = FALSE)

##################################################################################
# Projection into the subspace
fdproj = t(fdobj$coefs) %*% res$U
plot(fdproj,type='p',col=res$cls,pch=19,xlab='Disc axis 1',ylab='Disc axis 2',main='Discriminative subspace')

# Add the names of some specific bike stations
sel = c(200,301,384,918,936,1024)
names = c('NATION','CHAMPS ELYSEES','MONTMARTRE','TOUR EIFFEL','GARE DE LYON','HOTEL DE VILLE')
text(fdproj[sel,],labels=names)

##################################################################################
# Loading libraries and data
library(funHDDC)
library(MBCbook)
data(velib2D)

# Removing NAs and subsampling
ind = which(rowSums(is.na(velib2D$brokenDocks))==0)
X = velib2D$availableBikes[ind,]
Y = velib2D$brokenDocks[ind,]

# Transformation of the raw data as curves
basis = create.fourier.basis(c(0, 181), nbasis=51)
fd1 = smooth.basis(1:181,t(X),basis)$fd
fd2 = smooth.basis(1:181,t(Y),basis)$fd
fdobj = list(fd1,fd2)

# Clustering the multivariate functions with funclust
res = funHDDC(fdobj,K=3)

# Map of the results with leaflet
library(leaflet)
library(RColorBrewer)
df = velib2D$position[ind,]
colors = brewer.pal(12,'Paired')
leaflet(df) %>% addTiles() %>%
  addCircleMarkers(color = colors[res$class],
                   radius = 5, fillOpacity = 1, stroke = FALSE)

##################################################################################
# Loading libraries
library(topicmodels)

# Loading data and lexicalization 
# (the 3 texts that are used here can be replaced by any text file)
A = scan('MBC-chapter5.txt',what='character')
B = scan('MBC-chapter9.txt',what='character')
C = scan('MBC-chapter12.txt',what='character')
Docs = c(paste(A,collapse=' '),paste(B,collapse=' '),
         paste(C,collapse=' '))
x <- Corpus(VectorSource(Docs))
dtm <- DocumentTermMatrix(x,control=list(steming=TRUE,
                                         stopwords=TRUE,minWordLength=3,removeNumbers=TRUE,
                                         removePunctuation=TRUE))

# Running LDA 
K = 5; W = 10
out = LDA(dtm,K)

# Visualization of the results 
image(x=1:K,y=1:W,z=matrix(0,K,W),col=0,xlab='',ylab='',xaxt='n',yaxt='n',main='Estimated topics by LDA')
axis(1,at=1:K,labels=paste('Topic',1:K))
text(expand.grid(x=0.75:(K-0.25), y=1:W), labels=t(terms(out,W))[,W:1], col=rep((1:K),W), pos=4)

##################################################################################
# Loading libraries
library(jpeg)
library(MBCbook)

# Loading of the image
# (the image can be downloaded on the book website 
#  or replaced by any JPEG image)
Im = readJPEG('CubaBeach.jpg')
X = as.data.frame(apply(Im,3,rbind))

# Clustering
ind = sample(nrow(X),10000)
out = mixmodCluster(X[ind,],4)
res = mixmodPredict(X,out@bestResult)
P = res@proba

# Going back to the image format
G = out@bestResult@nbCluster
nr = nrow(Im); nc = ncol(Im)
ImP = array(NA,c(nr,nc,G))
for (i in 1:nc){
  ImP[,i,] = P[((i-1)*nr+1):(i*nr),]  
}

# Display of posterior probabilities as images
imshow(Im[,,1],main='Original image')
par(mfrow=c(2,2))
for (g in 1:G) imshow(ImP[,,g],main=paste('Posterior proba. for group',g))

# Export the segmented image
ImCl = apply(ImP,c(1,2),function(x){return(out@bestResult@parameters@mean[which.max(x),])})
ImCl = aperm(ImCl,c(2,3,1))
writeJPEG(ImCl,'CubaBeach-Clustered.jpg',quality=0.95)

##################################################################################
# Loading libraries
library(jpeg)
library(MBCbook)

# Loading and addition of noise on an image
Im = 255 * readJPEG('CubaBeach.jpg')[,,1]
sigma = 20
ImNoise = Im + matrix(rnorm(nrow(Im)*ncol(Im),0,sigma),nrow=nrow(Im))
ImNoise[ImNoise>255] = 255; ImNoise[ImNoise<0] = 0
par(mfrow=c(1,2))
imshow(Im,main='Original')
imshow(ImNoise,main='Noisy')

# Decomposition of the image as 8x8 patches and clustering
# (it may take several minutes to run!)
X = as.data.frame(imageToPatch(ImNoise,8))
ind = sample(nrow(X), 10000)
out = mixmodCluster(X[ind,],40,model=mixmodGaussianModel(family=c("spherical")))
res = mixmodPredict(X,out@bestResult)

# Display group means as small images
par(mfrow=c(4,5))
mu = out@bestResult@parameters@mean
for (g in 1:40) imshow(t(matrix(mu[g,64:1],ncol=8,byrow=FALSE)))

# Final denoising and image reconstruction (it may take several minutes to run!)
Xdenoised = denoisePatches(X,out,P = res@proba,sigma = sigma)
ImRec = reconstructImage(Xdenoised,nrow(Im),ncol(Im))
ImRec[ImRec>255] = 255; ImRec[ImRec<0] = 0
par(mfrow=c(1,3)); imshow(Im,main='Original')
imshow(ImNoise,main='Noisy')
imshow(ImRec,main='Denoised')

##################################################################################
# Loading libraries
library(blockcluster)
library(MBCbook)
data(amazonFineFoods)

# Co-clustering with blockcluster
X = as.matrix(amazonFineFoods)
out = coclusterBinary(X,nbcocluster = c(5,8))

# Plotting original and reorganized matrices
image(amazonFineFoods,main='Original data matrix',
      xaxt='n',yaxt='n')
image(amazonFineFoods[order(out@rowclass),order(out@colclass)],
      main='Data matrix sorted by groups',
      xaxt='n',yaxt='n')