################################################################################################################### ## Miguel Gandra || m3gandra@gmail.com || April 2015 ############################################################## ################################################################################################################### # Script to plot a distribution map of a chosen species from GBIF, OBIS and FishBase records. # http://www.gbif.org # http://iobis.org # http://www.fishbase.org # ---> no prior file manipulation required # ---> automatically distinguishes points on land and sea, plotting them with different colors # ---> automatically saves a pdf file with the map on the chosen directory # ---> exports a csv file with the fetched coordinates # ---> prints a summary of the records # 1. Define the genus and species name in the variables field (below). # 2. Set the directory containing the data files (txt from GBIF and csv from OBIS) - user's desktop predefined. # GBIF data: the script first looks for the "occurrence.txt" file in the chosen directory, # if it's not available or contains wrong species, the data is downloaded directly from the GBIF portal. # The "marginal.tolerance" variable sets the margin at which records are considered to be in land. # A positive value will consider records to be on land, even if they are up to x degrees outside the nearest coast. # A negative value will constraint land records to the ones that are at least x degrees within the coastline. # User can automatically delete points in land by setting the "remove.land.points" variable to TRUE, # or delete points in water by setting the "remove.ocean.points" variable to TRUE. ###################################################################################### # Automatically install required libraries ########################################## # (check http://tlocoh.r-forge.r-project.org/mac_rgeos_rgdal.html # if rgeos and rgdal installation fails on a Mac) if(!require(dismo)){install.packages("dismo"); library(dismo)} if(!require(XML)){install.packages("XML"); library(XML)} if(!require(jsonlite)){install.packages("jsonlite"); library(jsonlite)} if(!require(graphics)){install.packages("graphics"); library(graphics)} if(!require(maps)){install.packages("maps"); library(maps)} if(!require(maptools)){install.packages("maptools"); library(maptools)} if(!require(rgeos)){install.packages("rgeos"); library(rgeos)} if(!require(rgdal)){install.packages("rgdal"); library(rgdal)} if(!require(spThin)){install.packages("spThin"); library(spThin)} ###################################################################################### # Variables ########################################################################## # define the species of YOUR choice genus <- "Lotella" species <- "rhacina" # define the directory where files are to be saved # i.e. c:/Users/axxxx/Desktop setwd("~/Desktop") directory <- setwd("~/Desktop") # the following are variables for some tasks marginal.tolerance <- 0 remove.land.points <- TRUE # if TRUE points in land are removed remove.ocean.points <- FALSE # if FALSE points in ocean are kept export.csv <- TRUE # this is to save a coordinates file thinning <-TRUE # this is to thin values, spatial thinning helps to #reduce the effect of uneven, or biased, species occurrence collections # on spatial model outcomes ####################################################################################### # Get GBIF records from txt file or download them directly from the portal ############ gbif.file<-file.path(directory,"occurrence.txt") txt <- TRUE if(file.exists(gbif.file)==TRUE){ gbif.data <- read.table(gbif.file, sep="\t", header=TRUE, fill=TRUE, quote=NULL, comment='') if(length(grep(genus,species,gbif.data$scientificName))==0){ txt <- FALSE stop("occurrence.txt file from a different species, downloading data from GBIF") }else{ gbif.coordinates <- data.frame(gbif.data$decimalLongitude,gbif.data$decimalLatitude) gbif.coordinates <- na.omit(gbif.coordinates)} }else{ txt <- FALSE stop("occurrence.txt file not available, downloading data from GBIF, KEEP going to the next command lines") } if(txt==FALSE){ gbif.data <- try(gbif(genus,species,geo=TRUE,removeZeros=TRUE)) if(class(gbif.data)=="try-error"){ gbif.coordinates<-data.frame(matrix(ncol=2, nrow=0)) stop("GBIF data download failed, check internet connection") }else{ gbif.coordinates <- data.frame(gbif.data$lon,gbif.data$lat) gbif.coordinates <- na.omit(gbif.coordinates)} } ####################################################################################### # Remove duplicates ################################################# colnames(gbif.coordinates)<-c("long","lat") coordinates <- rbind(gbif.coordinates) total <- nrow(coordinates) dups <- duplicated(coordinates[,1:2]) dups <- dups[dups==TRUE] coordinates <- unique(coordinates) ####################################################################################### # Set geographical area ############################################################### x <- coordinates[,1] y <- coordinates[,2] xmin=min(x)-5 xmax=max(x)+5 ymin=min(y)-5 ymax=max(y)+5 ######################################################################################## # Plot Map ############################################################################ map("world", xlim=c(xmin,xmax), ylim=c(ymin,ymax), col="gray60", border="gray60", fill=TRUE, resolution=0) box(which = "plot", lty = "solid", lwd=0.25) axis(side=1,cex.axis=0.4,lwd=0.25) axis(side=2,cex.axis=0.4, lwd=0.25) ######################################################################################## # Compute land and ocean points ####################################################### data(wrld_simpl) specie.pts <- SpatialPoints(coordinates, proj4string=CRS(proj4string(wrld_simpl))) min.distances <- c() pts.on.land <- !is.na(over(specie.pts, wrld_simpl)$FIPS) if (marginal.tolerance==0){ x.ocean <- x[pts.on.land==FALSE] y.ocean <- y[pts.on.land==FALSE] x.land <- x[pts.on.land==TRUE] y.land <- y[pts.on.land==TRUE] } else if (marginal.tolerance>0){ distances <- gDistance(specie.pts,wrld_simpl,byid=TRUE) for (i in 1:length(specie.pts)) {min.distances[i] <- min(distances[,i])} x.ocean <- x[min.distances>marginal.tolerance] x.land <- x[min.distances<=marginal.tolerance] y.ocean <- y[min.distances>marginal.tolerance] y.land <- y[min.distances<=marginal.tolerance] } else if (marginal.tolerance<0) { specie.pts <- specie.pts[pts.on.land==TRUE] mp <- map("world", xlim=c(xmin,xmax), ylim=c(ymin,ymax), col="gray60", border="gray60", fill=TRUE, resolution=0) coastline <- cbind(mp$x, mp$y)[!is.na(mp$x),] coast.pts <- SpatialPoints(coastline, proj4string=CRS(proj4string(wrld_simpl))) distances <- gDistance(specie.pts,coast.pts,byid=TRUE) for (i in 1:length(specie.pts)) {min.distances[i] <- min(distances[,i])} x.ocean <- c(x[pts.on.land==FALSE],specie.pts@coords[min.distances<=abs(marginal.tolerance),1]) y.ocean <- c(y[pts.on.land==FALSE],specie.pts@coords[min.distances<=abs(marginal.tolerance),2]) x.land <- specie.pts@coords[min.distances>abs(marginal.tolerance),1] y.land <- specie.pts@coords[min.distances>abs(marginal.tolerance),2] } ######################################################################################## # Plot ocean points ? ################################################################# if (remove.ocean.points==FALSE){ points(x.ocean, y.ocean, pch=21, col='black', bg='blue', cex=0.2, lwd=0.2) } ######################################################################################## # Plot land points ? ################################################################## if (remove.land.points==FALSE){ points(x.land, y.land, pch=21, col='black', bg='red', cex=0.2, lwd=0.2) } ############################################################################################## # Thinning ################################################################################### # Can take several hours, depending on the number of occurrences. # Function automatically exports a csv file with the thinned dataset containing the maximum # number of records. x.thinned <- c() if (thinning==TRUE) { occurrences <- data.frame(x.ocean,y.ocean, paste(genus, species)) colnames(occurrences) <- c("LONG","LAT","SPEC") thinning.results <- thin(occurrences, thin.par=5, rep=10, write.files=T, out.dir = "./results", locs.thinned.list.return=T) i <- which.max(sapply(thinning.results, nrow)) x.thinned <- thinning.results[[i]][,1] y.thinned <- thinning.results[[i]][,2] xmin=min(x.thinned)-5 xmax=max(x.thinned)+5 ymin=min(y.thinned)-5 ymax=max(y.thinned)+5 map("world", xlim=c(xmin,xmax), ylim=c(ymin,ymax), col="gray60", border="gray60", fill=TRUE, resolution=0) box(which = "plot", lty = "solid", lwd=0.25) axis(side=1,cex.axis=0.8,lwd=0.25) axis(side=2,cex.axis=0.8, lwd=0.25) points(x.thinned, y.thinned, pch=21, col='black', bg='blue', cex=0.6, lwd=0.2) } ######################################################################################## # Save pdf ############################################################################# pdf.name <- paste(genus,'_',species,".pdf",sep="") pdf.file <- file.path(directory,pdf.name) dev.copy2pdf(file=pdf.file) ######################################################################################## # Export data as csv ################################################################### if(export.csv==TRUE){ occurrences <- data.frame(x.ocean,y.ocean) colnames(occurrences) <- c("lon","lat") csv.name <- paste(genus,'_',species,".csv",sep="") csv.file <- file.path(directory,csv.name) write.csv(occurrences, file=csv.file, row.names=FALSE) } if(export.csv==TRUE){ x.thinning <- data.frame(x.thinned) csv.name <- "x.thinned" csv.file <- file.path(directory,csv.name) write.csv(x.thinning, file=csv.file, row.names=FALSE) } if(export.csv==TRUE){ y.thinning <- data.frame(y.thinned) csv.name <- "y.thinned" csv.file <- file.path(directory,csv.name) write.csv(y.thinning, file=csv.file, row.names=FALSE) } x1.thinned <-read.csv("x.thinned") y1.thinned <-read.csv("y.thinned") ######################################################################################## # Print summary table ################################################################## r.summary <- data.frame(c(total,nrow(gbif.coordinates), length(dups),sum(length(x.ocean),length(x.land)), length(x.ocean),length(x.land))) rownames(r.summary) <- c("Total","GBIF","Duplicated","Plotted","Ocean","Land") colnames(r.summary) <- c("RECORDS") r.summary ## END