####################################### ### R scripts for Classical MDS ### MBE 2015 ### R. Castilho || CCMAR || 2025-02-13 ####################################### ### PREPARATION STEPS # install packages install.packages("clusterSim") install.packages("MASS) install.packages("calibrate") # load packages library(clusterSim) library(MASS) library(calibrate) ####################################### ### LOAD DATA # set working directory... # change this to the directory in which the species' data is stored and where you want to save output setwd() # by now you know how to load an excel and or text file into R. data.tab <- XXXX # now you should have the data.tab object in your environment # check how many columns and rows it has dim(...) ## Classical MDS data.mat = as.matrix(data.tab[,:::]) #transform data.frame into matrix so that can be transposed dim(data.mat) #X Y tdata.mat<-t(data.mat) #transposing matrix dim(tdata.mat) #Y X ## Obtaining distance matrix BC=Bray-Curtis data.bcdist <- dist.BC(tdata.mat) fit <- cmdscale(data.bcdis,eig=TRUE, k=2) # k is the number of dim fit # view results # plot solution x <- fit$points[,1] y <- fit$points[,2] plot(x, y, xlab="Coordinate 1", ylab="Coordinate 2", main="Bray-Curtis MDS", type="p", pch=16) textxy(x, y, labs = row.names(tdata.mat), cx = 0.5, dcol = "black", m = c(0, 0)) #Obtaining distance matrix Euclidean distances # "euclidean", "maximum", "manhattan", "canberra", "binary" or "minkowski". Any unambiguous substring can be given. d <- dist(tdata.mat,method = "euclidean") # euclidean distances between the rows fit <- cmdscale(d,eig=TRUE, k=2) # k is the number of dimensions fit # view results # plot solution x <- fit$points[,1] y <- fit$points[,2] plot(x, y, xlab="Coordinate 1", ylab="Coordinate 2", main="Euclidean MDS", type="p", pch=16) textxy(x, y, labs = row.names(tdata.mat), cx = 0.5, dcol = "black", m = c(-20, 0)) # as a BONUS # you may want to have a slightely nicer plot... ####################################### install.packages("ggfortify") library(ggfortify) install.packages("ggplot2") library(ggplot2) install.packages("arules") library(arules) install.packages("ggdendro") library(ggdendro) install.packages("dplyr") library(dplyr) install.packages("vegan") library(vegan) # the default index in vegdist is Bray-Curtis d <- vegdist(tdat.mat) # hierarchical cluster analysis hc <- hclust(di, method="single") cut <- as.data.frame(cutree(hc, k=13)) #k number of areas names(cut) <- "cut" cut$names <- rownames(cut) hcdata <- dendro_data(hc, type="rectangle") ggplot(hcdata$segments) + geom_segment(aes(x = x, y = y, xend = xend, yend = yend))+ geom_text(data = hcdata$labels, aes(x, y, label = label), hjust = 1, size = 3) + scale_colour_discrete(name = "clusters") + labs(x="", y="") + coord_flip() + ylim(-0.10, 0.4) + xlim(0,15) + theme_bw() # MDS mds.cmdscale <- as.data.frame(cmdscale(as.matrix(d))) mds.cmdscale$names <- rownames(mds.cmdscale) ggplot(mds.cmdscale, aes(V1, V2, label=names)) + geom_point(size=2.3) + geom_text(check_overlap = TRUE, size=4, hjust = "center", vjust = "bottom", nudge_x = 0, nudge_y = 0.005) + scale_colour_discrete(name = "clusters") + labs(x="", y="", title="MDS by Jaccard and cmdscale") + theme_bw() #------------------------------------------------- # end #-------------------------------------------------