################################################################################################################### ## Miguel Gandra || m3gandra@gmail.com || April 2015 ############################################################## ################################################################################################################### # Script to plot a distribution map of a chosen species from GBIF, OBIS and FishBase records. # http://www.gbif.org # ---> no prior file manipulation required # ---> automatically distinguishes points on land and sea, plotting them with different colors # ---> automatically saves a pdf file with the map on the chosen directory # ---> exports a csv file with the fetched coordinates # ---> prints a summary of the records # 1. Define the genus and species name in the variables field (below). # 2. Set the directory containing the data files (txt from GBIF and csv from OBIS) - user's desktop predefined. # GBIF data: the script first looks for the "occurrence.txt" file in the chosen directory, # if it's not available or contains wrong species, the data is downloaded directly from the GBIF portal. # The "marginal.tolerance" variable sets the margin at which records are considered to be in land. # A positive value will consider records to be on land, even if they are up to x degrees outside the nearest coast. # A negative value will constraint land records to the ones that are at least x degrees within the coastline. # User can automatically delete points in land by setting the "remove.land.points" variable to TRUE, # or delete points in water by setting the "remove.ocean.points" variable to TRUE. # for instance if you use an intertidal species, a tolerance of -1 is approapriate ###################################################################################### # Automatically install required libraries ########################################## # (check http://tlocoh.r-forge.r-project.org/mac_rgeos_rgdal.html # if rgeos and rgdal installation fails on a Mac) if(!require(dismo)){install.packages("dismo"); library(dismo)} if(!require(XML)){install.packages("XML"); library(XML)} if(!require(jsonlite)){install.packages("jsonlite"); library(jsonlite)} if(!require(graphics)){install.packages("graphics"); library(graphics)} if(!require(maps)){install.packages("maps"); library(maps)} if(!require(maptools)){install.packages("maptools"); library(maptools)} if(!require(rgeos)){install.packages("rgeos"); library(rgeos)} if(!require(rgdal)){install.packages("rgdal"); library(rgdal)} if(!require(spThin)){install.packages("spThin"); library(spThin)} ###################################################################################### # Variables ########################################################################## # define the species of YOUR choice genus <- "Dicentrarchus" species <- "labrax" # define the directory where files are to be saved # YOU NEED TO CHANGE LINES 57 and 58 setwd("C:/Users/rcastil/Desktop") directory <- setwd("C:/Users/rcastil/Desktop") # the following are variables for some tasks marginal.tolerance <- 0 remove.land.points <- TRUE # if TRUE points in land are removed remove.ocean.points <- FALSE # if FALSE points in ocean are kept export.csv <- TRUE # this is to save a coordinates file thinning <-TRUE # this is to thin values, spatial thinning helps to #reduce the effect of uneven, or biased, species occurrence collections # on spatial model outcomes ####################################################################################### # Get GBIF records from txt file or download them directly from the portal ############ gbif.file<-file.path(directory,"occurrence.txt") gbif.data <- try(gbif(genus,species,geo=TRUE,removeZeros=TRUE)) if(class(gbif.data)=="try-error"){ gbif.coordinates<-data.frame(matrix(ncol=2, nrow=0)) stop("GBIF data download failed, check internet connection") }else{ gbif.coordinates <- data.frame(gbif.data$lon,gbif.data$lat) gbif.coordinates <- na.omit(gbif.coordinates)} ####################################################################################### # Remove duplicates ################################################# colnames(gbif.coordinates)<-c("long","lat") coordinates <- rbind(gbif.coordinates) total <- nrow(coordinates) dups <- duplicated(coordinates[,1:2]) dups <- dups[dups==TRUE] coordinates <- unique(coordinates) ####################################################################################### # Set geographical area ############################################################### x <- coordinates[,1] y <- coordinates[,2] xmin=min(x)-5 xmax=max(x)+5 ymin=min(y)-5 ymax=max(y)+5 ######################################################################################## # Plot Map ############################################################################ map("world", xlim=c(xmin,xmax), ylim=c(ymin,ymax), col="gray60", border="gray60", fill=TRUE, resolution=0) box(which = "plot", lty = "solid", lwd=0.25) axis(side=1,cex.axis=0.4,lwd=0.25) axis(side=2,cex.axis=0.4, lwd=0.25) ######################################################################################## # Compute land and ocean points ####################################################### data(wrld_simpl) specie.pts <- SpatialPoints(coordinates, proj4string=CRS(proj4string(wrld_simpl))) min.distances <- c() pts.on.land <- !is.na(over(specie.pts, wrld_simpl)$FIPS) if (marginal.tolerance==0){ x.ocean <- x[pts.on.land==FALSE] y.ocean <- y[pts.on.land==FALSE] x.land <- x[pts.on.land==TRUE] y.land <- y[pts.on.land==TRUE] } else if (marginal.tolerance>0){ distances <- gDistance(specie.pts,wrld_simpl,byid=TRUE) for (i in 1:length(specie.pts)) {min.distances[i] <- min(distances[,i])} x.ocean <- x[min.distances>marginal.tolerance] x.land <- x[min.distances<=marginal.tolerance] y.ocean <- y[min.distances>marginal.tolerance] y.land <- y[min.distances<=marginal.tolerance] } else if (marginal.tolerance<0) { specie.pts <- specie.pts[pts.on.land==TRUE] mp <- map("world", xlim=c(xmin,xmax), ylim=c(ymin,ymax), col="gray60", border="gray60", fill=TRUE, resolution=0) coastline <- cbind(mp$x, mp$y)[!is.na(mp$x),] coast.pts <- SpatialPoints(coastline, proj4string=CRS(proj4string(wrld_simpl))) distances <- gDistance(specie.pts,coast.pts,byid=TRUE) for (i in 1:length(specie.pts)) {min.distances[i] <- min(distances[,i])} x.ocean <- c(x[pts.on.land==FALSE],specie.pts@coords[min.distances<=abs(marginal.tolerance),1]) y.ocean <- c(y[pts.on.land==FALSE],specie.pts@coords[min.distances<=abs(marginal.tolerance),2]) x.land <- specie.pts@coords[min.distances>abs(marginal.tolerance),1] y.land <- specie.pts@coords[min.distances>abs(marginal.tolerance),2] } ######################################################################################## # Plot ocean points ? ################################################################# if (remove.ocean.points==FALSE){ points(x.ocean, y.ocean, pch=21, col='black', bg='blue', cex=0.2, lwd=0.2) } ######################################################################################## # Plot land points ? ################################################################## if (remove.land.points==FALSE){ points(x.land, y.land, pch=21, col='black', bg='red', cex=0.2, lwd=0.2) } ############################################################################################## # Thinning ################################################################################### # Can take several hours, depending on the number of occurrences. # Function automatically exports a csv file with the thinned dataset containing the maximum # number of records. x.thinned <- c() if (thinning==TRUE) { occurrences <- data.frame(x.ocean,y.ocean, paste(genus, species)) colnames(occurrences) <- c("LONG","LAT","SPEC") thinning.results <- thin(occurrences, thin.par=5, rep=10, write.files=T, out.dir = "./results", locs.thinned.list.return=T) i <- which.max(sapply(thinning.results, nrow)) x.thinned <- thinning.results[[i]][,1] y.thinned <- thinning.results[[i]][,2] xmin=min(x.thinned)-5 xmax=max(x.thinned)+5 ymin=min(y.thinned)-5 ymax=max(y.thinned)+5 map("world", xlim=c(xmin,xmax), ylim=c(ymin,ymax), col="gray60", border="gray60", fill=TRUE, resolution=0) box(which = "plot", lty = "solid", lwd=0.25) axis(side=1,cex.axis=0.8,lwd=0.25) axis(side=2,cex.axis=0.8, lwd=0.25) points(x.thinned, y.thinned, pch=21, col='black', bg='blue', cex=0.6, lwd=0.2) } ######################################################################################## # Save pdf ############################################################################# pdf.name <- paste(genus,'_',species,".pdf",sep="") pdf.file <- file.path(directory,pdf.name) dev.copy2pdf(file=pdf.file) ######################################################################################## # Export data as csv ################################################################### if(export.csv==TRUE){ occurrences <- data.frame(x.ocean,y.ocean) colnames(occurrences) <- c("lon","lat") csv.name <- paste(genus,'_',species,".csv",sep="") csv.file <- file.path(directory,csv.name) write.csv(occurrences, file=csv.file, row.names=FALSE) } if(export.csv==TRUE){ x.thinning <- data.frame(x.thinned) csv.name <- "x.thinned" csv.file <- file.path(directory,csv.name) write.csv(x.thinning, file=csv.file, row.names=FALSE) } if(export.csv==TRUE){ y.thinning <- data.frame(y.thinned) csv.name <- "y.thinned" csv.file <- file.path(directory,csv.name) write.csv(y.thinning, file=csv.file, row.names=FALSE) } x1.thinned <-read.csv("x.thinned") y1.thinned <-read.csv("y.thinned") ######################################################################################## # Print summary table ################################################################## r.summary <- data.frame(c(total,nrow(gbif.coordinates), length(dups),sum(length(x.ocean),length(x.land)), length(x.ocean),length(x.land))) rownames(r.summary) <- c("Total","GBIF","Duplicated","Plotted","Ocean","Land") colnames(r.summary) <- c("RECORDS") r.summary ## END #'############################################################################################################## #'# R scripts from "An Introduction to Best Practices in Species Distribution Modeling" ######################## #'# from Adam B. Smith || Missouri Botanical Garden || 2012-10-15 ############################################# #'# modified and extended by R. Castilho || CCMAR || 2014-09-15 ############################################### #'# modified and extended by M. Gandra || UALG || 2015-07-10 ################################################## #'############################################################################################################## #' MAXENT (Maximum Entropy Modelling) #' Goals: #' 1. Demonstrate how to handle spatial data (points, shapefiles, and rasters) in R #' 2. Demonstrate how to prepare species' and environmental data #' 3. Demonstrate "raster-points" method for training Maxent model in R #' 4. Make a map of predictions ####################################################################################################### ## Read dependences ################################################################################### ####################################################################################################### ## Check maxent.jar file ############################################################################## ## only proceed if the maxent.jar file is available, on the right folder jar <- file.path(system.file(package="dismo"), "/java/maxent.jar") if (file.exists(jar)==FALSE) {stop("maxent.jar file not available")} # download Maxent (www.cs.princeton.edu/~schapire/maxent/) # use your file system to cut and paste the Maxent jar file into the folder returned by this command: system.file('java', package='dismo') #Sys.setenv(NOAWT=TRUE) # only for certain macs before loading rJava # options(java.parameters = "-Xmx1g" ) # optional, to set the memory allocated to java, hence maxent (has to be done before loading dismo) ####################################################################################################### ## Automatically install required libraries ########################################################## # (check http://tlocoh.r-forge.r-project.org/mac_rgeos_rgdal.html # if rgeos and rgdal installation fail on a Mac) if(!require(rJava)){install.packages("rJava"); library(rJava)} if(!require(dismo)){install.packages("dismo"); library(dismo)} if(!require(rgeos)){install.packages("rgeos"); library(rgeos)} if(!require(rgdal)){install.packages("rgdal"); library(rgdal)} if(!require(raster)){install.packages("raster"); library(raster)} if(!require(sp)){install.packages("sp"); library(sp)} if(!require(rgl)){install.packages("rgl"); library(rgl)} if(!require(virtualspecies)){install.packages("virtualspecies"); library(virtualspecies)} if(!require(leaflet)){install.packages("leaflet"); library(leaflet)} if(!require(sdmpredictors)){install.packages("sdmpredictors"); library(sdmpredictors)} # set the directory containing the data files - user's desktop SDM folder predefined # YOU NEED TO CHANGE LINE 309 setwd("C:/Users/rcastil/Desktop") ####################################################################################################### ## Load species occurrences ########################################################################### # DO YOU HAVE x.thinned and y.thinned in the global environment? # IF NOT run lines XX and XX #x.thinned <-read.csv("~/Desktop/x.thinned") #y.thinned <-read.csv("~/Desktop/y.thinned") # convert sample points to spatial points spoints <- SpatialPoints(cbind(x.thinned,y.thinned)) projection(spoints) <- "+proj=longlat +ellps=WGS84 +datum=WGS84" ####################################################################################################### ## Load predictor rasters ############################################################################# # 1. download layers from https://www.dropbox.com/sh/3la2tp881a7dmhy/AADysQiomFKSAtvT75RwlPN8a?dl=0 # 2 .extract zip file and place asc files on a directory named oracle on you Desktop/SDM directory list.rasters<-(list.files("which directory", full.names=T, pattern=".asc")) list.rasters # make raster "stack" with raster for each predictor rasters <- stack(list.rasters) names(rasters) # set the coordinate reference system for the raster stack # this is not absolutely necessary if the rasters are unprojected (e.g., WGS84), # but we'll do it to avoid warning messages below projection(rasters) <- CRS("+proj=longlat +datum=WGS84") # crop rasters from script Occurrence_data.R xmin=min(x.thinned)-5 xmax=max(x.thinned)+5 ymin=min(y.thinned)-5 ymax=max(y.thinned)+5 limits <- c(xmin, xmax, ymin, ymax) rasters.crop <- crop(rasters,limits) #what is the crop function doing? #it is best to save your cropped rasters now, so you do not need to run the whole script again if something goes wrong. #writeRaster(rasters.crop, file="raster.crop.grd", format="raster") ####################################################################################################### ## Plot each raster with the sampling points ########################################################## #k <- length(list.rasters) k <- length(list.rasters)-22 k par(mar = c(0.1, 0.1, 0.1, 0.1), mfrow = c(5,5)) for (i in 1:k) { plot(rasters.crop[[i]],axes=FALSE,legend=FALSE, asp = 1 ,maxnl=23) points(spoints, col='red', pch=20, cex=0.4) mtext(names(rasters.crop[[i]]), side=4, font=0, line=0, cex=0.6) } par(mfrow=c(1,1)) ####################################################################################################### ## Co-linearity between predictors ################################################################### # PART1 rasters.crop.reduced <- removeCollinearity(rasters.crop, multicollinearity.cutoff = 0.85, plot= TRUE, select.variables = FALSE, sample.points = FALSE) groups <- which(sapply(rasters.crop.reduced, length)>1) groups rasters.crop.reduced rasters.crop.reduced[groups] # THIS IS VERY IMPORTANT # manually select predictors to retain from each group of co-linear predictors # from each red box, #rasters.crop.reduced[1] <-"Present.Surface.Chlorophyll.Range" rasters.crop.reduced[2] <-"Present.Surface.Chlorophyll.Range" rasters.crop.reduced[3] <-"Present.Surface.Phytoplankton.Mean" #rasters.crop.reduced[4] <-"XXX" #rasters.crop.reduced[5] <-"XXX" #rasters.crop.reduced[6] <-"XXX" #rasters.crop.reduced[7] <-"XXX" #rasters.crop.reduced[8] <-"XXX" rasters.crop.reduced[9] <-"Present.Surface.Temperature.Min" rasters.crop.reduced[10] <-"Present.Surface.Temperature.Mean" #rasters.crop.reduced[11] <-"XXX" rasters.crop.reduced[12]<-"Present.Surface.Nitrate.Range" rasters.crop.reduced[13] <-"Present.Surface.Nitrate.Mean" #rasters.crop.reduced[14]<-"Present.Surface.Iron.Mean" rasters.crop.reduced[15]<-"Present.Surface.Phosphate.Mean" #rasters.crop.reduced[16]<-"XXX" #rasters.crop.reduced[17] <-"XXX" rasters.crop.reduced[18] <-"Present.Surface.Phytoplankton.Min" rasters.crop.reduced[19]<-"Present.Surface.Salinity.Mean" #rasters.crop.reduced[20] <-"XXX" rasters.crop.reduced[21]<-"Present.Surface.Silicate.Mean" #rasters.crop.reduced[22]<-"XXX" #rasters.crop.reduced[23]<-"XXX" rasters.crop.reduced rasters.crop.reduced <- unlist(rasters.crop.reduced) # subset selected layers from the raster stack rasters.selected <- subset(rasters.crop, rasters.crop.reduced) # PART2 rasters.crop.reduced.1 <- removeCollinearity(rasters.selected, multicollinearity.cutoff = 0.75, plot= TRUE, select.variables = FALSE, sample.points = FALSE) groups.1 <- which(sapply(rasters.crop.reduced.1, length)>1) groups.1 rasters.crop.reduced.1 rasters.crop.reduced.1[2] <-"Present.Surface.Phytoplankton.Mean" rasters.crop.reduced.1[3] <-"Present.Surface.Phytoplankton.Min" rasters.crop.reduced.1[8] <-"Present.Surface.Temperature.Mean" rasters.crop.reduced.1[9] <-"Present.Surface.Dissolved.oxygen.Range" rasters.crop.reduced.1[10] <-"Present.Surface.Nitrate.Range" rasters.crop.reduced.1[13] <-"Present.Surface.Phosphate.Mean" rasters.crop.reduced.1 rasters.crop.reduced.1 <- unlist(rasters.crop.reduced.1) # subset selected layers from the raster stack rasters.selected.1 <- subset(rasters.crop, rasters.crop.reduced.1) # Check result # PART3 rasters.crop.reduced.2 <- removeCollinearity(rasters.selected.1, multicollinearity.cutoff = 0.75, plot= TRUE, select.variables = FALSE, sample.points = FALSE) groups.2 <- which(sapply(rasters.crop.reduced.2, length)>1) groups.2 rasters.crop.reduced.2 rasters.crop.reduced.2[9] <-"Present.Surface.Temperature.Range" rasters.crop.reduced.2[10] <-"Present.Surface.Nitrate.Range" rasters.crop.reduced.2 rasters.crop.reduced.2 <- unlist(rasters.crop.reduced.2) # subset selected layers from the raster stack rasters.selected.2 <- subset(rasters.crop, rasters.crop.reduced.2) rasters.selected <- rasters.selected.2 ####################################################################################################### ## Extract values from rasters ######################################################################## # presence values presvals <- extract(rasters.selected, spoints) # setting random seed to always create the same random set of points for this example set.seed(1963) # background values backgr <- randomPoints(rasters.selected, 1000) absvals <- extract(rasters.selected, backgr) # create dataframe with presence/background values # the first column (variable ’pb’) indicates whether this is a presence or a background point pb <- c(rep(1, nrow(presvals)), rep(0, nrow(absvals))) sdmdata.present <- data.frame(cbind(pb, rbind(presvals, absvals))) sdmdata.present <- sdmdata.present[complete.cases(sdmdata.present),] ####################################################################################################### ## Generalized linear model (GLM) ##################################################################### # a significant intercept in a model only means that there is also a constant in the model, # in addition to all other explanatory variables. # model with all variables model.present <- glm(pb ~ .,data=sdmdata.present) summary(model.present) # automatically selects significant variables significant.vars <- summary(model.present)$coeff[-1,4] < 0.05 significant.vars <- names(significant.vars)[significant.vars == TRUE] significant.vars # if the number of variables is still too high, change < 0.05 to 0.001 significant.vars <- summary(model.present)$coeff[-1,4] < 0.01 significant.vars <- names(significant.vars)[significant.vars == TRUE] significant.vars # uncomment the following lines if you want to manually add or delete variables to customize predictors set #significant.vars <- c(significant.vars, 'sstmin') #significant.vars<-significant.vars[-c(3,4,7)] #significant.vars reduced.sdmdata.present <- subset(sdmdata.present, select=c('pb', significant.vars)) # run the regression with only significant variables reduced.model.present<-glm(pb ~ .,data=reduced.sdmdata.present) summary(reduced.model.present) ####################################################################################################### ## Model selection - AIC (Akaike information criterion) ############################################### # the preferred model is the one with the minimum AIC value aic <- AIC(model.present, reduced.model.present) aic selected.model <- row.names(aic)[which.min(aic$AIC)] selected.model ####################################################################################################### ## Maxent model ####################################################################################### # Call Maxent using the "raster-points" format (x=a raster stack and p=a two-column matrix of coordinates). # Only x and p are really required, but I'm showing many of the commands in case you want to tweak some later. # All of the "args" are set to their default values except for "randomtestpoints" which says to randomly # select 30% of the species' records and use these as test sites (the default for this is 0). # All other arguments are set to the defaults except "threads" which can be set up to the number of cores # you have on your computer to speed things up... to see more "args" see bottom of "Help" file from the Maxent program. if(selected.model=='reduced.model.present'){rasters.final<-subset(rasters.selected, significant.vars)} if(selected.model=='model.present') {rasters.final<-rasters.selected} model.maxent <- maxent( x=rasters.final, p=spoints, a=backgr, args=c( 'randomtestpoints=30', 'betamultiplier=1', 'linear=true', 'quadratic=true', 'product=true', 'threshold=true', 'hinge=true', 'threads=2', 'responsecurves=true', 'jackknife=true', 'askoverwrite=false' ) ) # look at model output (HTML page) model.maxent # variable contribution plot(model.maxent) # model evaluation evaluate(model.maxent, x=rasters.final, p=spoints, a=backgr) ####################################################################################################### ## Maxent prediction ################################################################################## # Note that you could save the prediction raster in any number of formats (see ?writeFormats), # but GeoTiffs are small and can be read by ArcMap... # ASCIIs can also be read by other programs but are large... # The default raster format (GRD) sometimes can't be read by ArcMap... # If you don't specifiy a file name then the results are written to R's memory global <- extent(-180, 180, -90, 90) rasters.pred <- subset(rasters, rasters.crop.reduced) rasters.pred <- subset(rasters.pred, significant.vars) map.maxent <- predict( object=rasters.pred, model=model.maxent, na.rm=TRUE, #format='GTiff', #filename= "./results/Prediction_raster.GTiff", #overwrite=TRUE, progress='text', ext=global ) pdf("./results/Prediction_global.pdf", width = 16, height = 9) plot(map.maxent, mar=c(0,0,0,0), zlim = c(0,1), axes=F, maxpixels=ncell(map.maxent)) #scalebar(d=2000, xy=c(140, -60), type="bar", below="km", lonlat=TRUE, cex=0.7) dev.off() pdf("./results/Prediction_original1.pdf") plot(map.maxent, mar=c(0,0,0,0), zlim = c(0,1), axes=F, maxpixels=ncell(map.maxent), ext=limits) points(x, y, pch=21, col=adjustcolor("black", alpha=0.6), bg=adjustcolor("blue", alpha=0.8), cex=0.6, lwd=0.2) dev.off() e1<-extent(130, 180, -55, -20) pdf("./results/Prediction_local1.pdf") plot(map.maxent, mar=c(0,0,0,0), zlim = c(0,1), axes=F, maxpixels=ncell(map.maxent), ext=e1) dev.off() e2<-extent(-85, -48, -58, -25) pdf("./results/Prediction_local2.pdf") plot(map.maxent, mar=c(0,0,0,0), zlim = c(0,1), axes=F, maxpixels=ncell(map.maxent), ext=e2) dev.off() ################################################################################################################ ################################################################################################################ ################################################################################################################