#######################################
### R scripts for Classical MDS
### MBE 2015
### R. Castilho || CCMAR || 2025-02-13

#######################################
### PREPARATION STEPS
# install packages
install.packages("clusterSim")
install.packages("MASS)
install.packages("calibrate")

# load packages 
library(clusterSim)
library(MASS)
library(calibrate)

#######################################
### LOAD DATA
# set working directory... 
# change this to the directory in which the species' data is stored and where you want to save output
setwd()

# by now you know how to load an excel and or text file into R.
data.tab <- XXXX

# now you should have the data.tab object in your environment
# check how many columns and rows it has
dim(...)

## Classical MDS
data.mat = as.matrix(data.tab[,:::]) #transform data.frame into matrix so that can be transposed
dim(data.mat) #X Y
tdata.mat<-t(data.mat) #transposing matrix
dim(tdata.mat) #Y X

## Obtaining distance matrix BC=Bray-Curtis
data.bcdist <- dist.BC(tdata.mat)
fit <- cmdscale(data.bcdis,eig=TRUE, k=2) # k is the number of dim
fit # view results
# plot solution 
x <- fit$points[,1]
y <- fit$points[,2]
plot(x, y, xlab="Coordinate 1", ylab="Coordinate 2", main="Bray-Curtis MDS",   type="p", pch=16)
textxy(x, y, labs = row.names(tdata.mat), cx = 0.5, dcol = "black", m = c(0, 0))

#Obtaining distance matrix Euclidean distances
# "euclidean", "maximum", "manhattan", "canberra", "binary" or "minkowski". Any unambiguous substring can be given.
d <- dist(tdata.mat,method = "euclidean") # euclidean distances between the rows
fit <- cmdscale(d,eig=TRUE, k=2) # k is the number of dimensions 

fit # view results
# plot solution 
x <- fit$points[,1]
y <- fit$points[,2]
plot(x, y, xlab="Coordinate 1", ylab="Coordinate 2", main="Euclidean MDS",   type="p", pch=16)
textxy(x, y, labs = row.names(tdata.mat), cx = 0.5, dcol = "black", m = c(-20, 0))

# as a BONUS
# you may want to have a slightely nicer plot...
#######################################
install.packages("ggfortify")
library(ggfortify)
install.packages("ggplot2")
library(ggplot2)
install.packages("arules")
library(arules)
install.packages("ggdendro")
library(ggdendro)
install.packages("dplyr")
library(dplyr)
install.packages("vegan")
library(vegan)

# the default index in vegdist is Bray-Curtis
d <- vegdist(tdat.mat)
# hierarchical cluster analysis
hc <- hclust(di, method="single")
cut <- as.data.frame(cutree(hc, k=13)) #k number of areas
names(cut) <- "cut"
cut$names <- rownames(cut)
hcdata <- dendro_data(hc, type="rectangle")

ggplot(hcdata$segments) + 
  geom_segment(aes(x = x, y = y, xend = xend, yend = yend))+
  geom_text(data = hcdata$labels, aes(x, y, label = label), 
            hjust = 1, size = 3) + scale_colour_discrete(name = "clusters") +
  labs(x="", y="") +  coord_flip() + ylim(-0.10, 0.4) + xlim(0,15)  + theme_bw()


# MDS
mds.cmdscale <- as.data.frame(cmdscale(as.matrix(d)))
mds.cmdscale$names <- rownames(mds.cmdscale)

ggplot(mds.cmdscale, aes(V1, V2, label=names)) + 
  geom_point(size=2.3) +
  geom_text(check_overlap = TRUE, size=4, 
            hjust = "center", vjust = "bottom", nudge_x = 0, nudge_y = 0.005) + 
  scale_colour_discrete(name = "clusters") +
  labs(x="", y="", title="MDS by Jaccard and cmdscale") + theme_bw()

#-------------------------------------------------
# end
#-------------------------------------------------