####################################### ### R scripts for Classical MDS ### BEMO 2015 ### R. Castilho || CCMAR || 2015-02-17 ####################################### ### PREPARATION STEPS # install packages install.packages("clustsig") library(clustsig) ####################################### ### LOAD DATA # set working directory... change this to the directory in which the species' data is stored and where you want to save output # setwd("D:/UTILIZADORES/user-en/Desktop/BEMO") setwd("~/Desktop") data.tab = read.table("data_andre.txt", header=TRUE, sep = "\t") data.mat = as.matrix(data.tab[,1:13]) # The agglomeration method to be used: # This should be (an unambiguous abbreviation of) one of "ward.D", "ward.D2", "single", "complete", # "average" (= UPGMA), # "mcquitty" (= WPGMA), # "median" (= WPGMC) or # "centroid" (= UPGMC) # The distance index to be used: # "euclidean", # "maximum", # "manhattan", # "canberra", # "binary" # "minkowski", # "braycurtis" or "czekanowski" for Czekanowski Dissimilarity # (referred to as Bray-Curtis Dissimilarity in some fields, particularly marine biology), # "actual-braycurtis" for the true Bray-Curtis Dissimilarity where the data are # standardized before the dissimilarity is calculated. # This value can also be any function which returns a "dist" object. BC=simprof(data.mat, num.expected=10, num.simulated=9, method.cluster="average", method.distance="braycurtis", alpha=0.05, sample.orientation="column", const=0, silent=FALSE, increment=100) simprof.plot(BC, leafcolors=NA, plot=TRUE, fill=TRUE, leaflab="perpendicular", siglinetype=1) # we can now obtain the number of groups which are found to be statistically significant BC$numgroups # obtain a list of length numgroups with each element containing the sample IDs (row/column numbers in #the corresponding original data) that are in each significant cluster BC$significantclusters # p-values for testing whether the two groups in that row are statistically different. The null hypothesis is # that there is no a priori group structure, so if p<0.05 means that there is structure BC$pval ################ library(cluster) # row = observation (sites), column = variable (species) tdata.mat <- t(data.mat) UPGMA <- agnes(tdata.mat, diss = inherits(data.mat, "dist"), metric = "euclidean", stand = FALSE, method = "average", keep.diss = FALSE, keep.data = FALSE, trace.lev = 0) plot(UPGMA) ################ library(vegan) # the default index in vegdist is Bray–Curtis d <- vegdist(tdata.mat) csin <- hclust(d, method="single") plot(csin) plot(csin, hang=-1) ####### Ward Hierarchical Clustering with Bootstrapped p values # install.packages("pvclust") library(pvclust) # pvclust # method: "euclidean", "maximum", "manhattan", "canberra", "binary" or "minkowski" # matrix: row=species; columns=sites fit <- pvclust(data.mat, method.hclust="ward.D", method.dist="euclidean") plot(fit) # dendogram with p values # add rectangles around groups highly supported by the data pvrect(fit, alpha=.95, pv="bp") ################