#------------------------------------------------------------------- # Miguel Gandra || m3gandra@gmail.com || July 2015 # modified by Rita Castilho #------------------------------------------------------------------- # Script to plot a distribution map of a chosen species from GBIF, OBIS and FishBase records. # http://www.gbif.org # http://iobis.org # http://www.fishbase.org # ---> no prior file manipulation required # ---> automatically distinguishes points georeferenced within land and ocean surfaces # ---> possibility to manually remove points with an interactive function # ---> includes of a spatial thinning algorithm to reduce sampling bias # ---> possibility to save a map with the curated records # ---> prints a summary of the records # 1. Define the genus and species name in the variables field (below). # 2. Set the directory containing the data files (txt from GBIF and csv from OBIS) - user's desktop predefined. # GBIF data: the script first looks for the "occurrence.txt" file in the chosen directory, # if it's not available or contains wrong species, the data is downloaded directly from the GBIF portal. # OBIS data: the script looks for a csv file in the chosen directory (if multiple csv files are found # it uses the first one listed). # FishBase data: the script downloads the data directly from the web. # The "marginal.tolerance" variable sets the margin at which records are considered to be in land. # A positive value will consider records to be on land, even if they are up to x degrees outside the nearest coast. # A negative value will constraint land records to the ones that are at least x degrees within the coastline. # User can automatically delete points in land by setting the "remove.land.points" variable to TRUE, # or delete points in water by setting the "remove.ocean.points" variable to TRUE. #--------------------------------------------------------------------------------------------- # Automatically install required libraries ################################################## # (check http://tlocoh.r-forge.r-project.org/mac_rgeos_rgdal.html # if rgeos and rgdal installation fails on a Mac) if(!require(dismo)){install.packages("dismo"); library(dismo)} if(!require(XML)){install.packages("XML"); library(XML)} if(!require(jsonlite)){install.packages("jsonlite"); library(jsonlite)} if(!require(graphics)){install.packages("graphics"); library(graphics)} if(!require(maps)){install.packages("maps"); library(maps)} if(!require(mapdata)){install.packages("mapdata"); library(mapdata)} if(!require(maptools)){install.packages("maptools"); library(maptools)} if(!require(rgeos)){install.packages("rgeos"); library(rgeos)} if(!require(rgdal)){install.packages("rgdal"); library(rgdal)} # set working directory XXXXX #------------------------------------------------------------------- # Variables ######################################################## genus <- "Salaria" species <- "pavo" marginal.tolerance <- 0 remove.land.points <- TRUE remove.ocean.points <- FALSE #------------------------------------------------------------------- # Get GBIF records from txt file or download them directly from the portal ################### gbif.data <- try(gbif(genus,species,geo=TRUE,removeZeros=TRUE)) gbif.coordinates<-data.frame(matrix(ncol=2, nrow=0)) gbif.coordinates <- data.frame(gbif.data$lon,gbif.data$lat) gbif.coordinates <- na.omit(gbif.coordinates) #------------------------------------------------------------------- ############################################################################################## # Merge records and remove duplicates ######################################################## colnames(gbif.coordinates)<-c("long","lat") coordinates <- rbind(gbif.coordinates) dups <- duplicated(coordinates[,1:2]) dups <- dups[dups==TRUE] coordinates <- unique(coordinates) #------------------------------------------------------------------- # Set geographical area ###################################################################### x <- coordinates[,1] y <- coordinates[,2] xmin=round(min(x)-5) xmax=round(max(x)+5) ymin=round(min(y)-5) ymax=round(max(y)+5) #------------------------------------------------------------------- # Identify points georeferenced in land and ocean surfaces ################################## mp <- map("worldHires", xlim=c(xmin,xmax), ylim=c(ymin,ymax), fill=TRUE, plot=FALSE, resolution=0) IDs <- sapply(strsplit(mp$names, ":"), function(x) x[1]) mp <- map2SpatialPolygons(mp, IDs=IDs, proj4string=CRS("+proj=longlat +datum=WGS84")) # due to the computational power required to calculate distances, # the lower resolution 'world' dataset was preferred mp.lowres <- map("worldHires", xlim=c(xmin,xmax), ylim=c(ymin,ymax), fill=TRUE, plot=FALSE, resolution=0) coastline <- cbind(mp.lowres$x, mp.lowres$y)[!is.na(mp.lowres$x),] IDs <- sapply(strsplit(mp.lowres$names, ":"), function(x) x[1]) mp.lowres <- map2SpatialPolygons(mp.lowres, IDs=IDs, proj4string=CRS("+proj=longlat +datum=WGS84")) species.pts <- SpatialPoints(coordinates, proj4string=CRS(proj4string(mp))) min.distances <- c() pts.on.land <- !is.na(over(species.pts, mp)) if (marginal.tolerance==0){ x.ocean <- x[pts.on.land==FALSE] y.ocean <- y[pts.on.land==FALSE] x.land <- x[pts.on.land==TRUE] y.land <- y[pts.on.land==TRUE] } else if (marginal.tolerance>0){ distances <- gDistance(species.pts, mp.lowres, byid=TRUE) for (i in 1:length(species.pts)) {min.distances[i] <- min(distances[,i])} x.ocean <- x[min.distances>marginal.tolerance] x.land <- x[min.distances<=marginal.tolerance] y.ocean <- y[min.distances>marginal.tolerance] y.land <- y[min.distances<=marginal.tolerance] } else if (marginal.tolerance<0) { species.pts <- species.pts[pts.on.land==TRUE] coast.pts <- SpatialPoints(coastline, proj4string=CRS(proj4string(mp))) distances <- gDistance(species.pts,coast.pts,byid=TRUE) for (i in 1:length(species.pts)) {min.distances[i] <- min(distances[,i])} x.ocean <- c(x[pts.on.land==FALSE],species.pts@coords[min.distances<=abs(marginal.tolerance),1]) y.ocean <- c(y[pts.on.land==FALSE],species.pts@coords[min.distances<=abs(marginal.tolerance),2]) x.land <- species.pts@coords[min.distances>abs(marginal.tolerance),1] y.land <- species.pts@coords[min.distances>abs(marginal.tolerance),2] } # ocean xmin.ocean=round(min(x.ocean)-5) xmax.ocean=round(max(x.ocean)+5) ymin.ocean=round(min(y.ocean)-5) ymax.ocean=round(max(y.ocean)+5) #------------------------------------------------------------------- # Plot Map ################################################################################## # ocean map("worldHires", xlim=c(xmin.ocean,xmax.ocean), ylim=c(ymin.ocean,ymax.ocean), col="gray60", border="gray60", fill=TRUE, resolution=0) box(which = "plot", lty = "solid", lwd=0.25) axis(side=1,cex.axis=0.8,lwd=0.25) axis(side=2,cex.axis=0.8, lwd=0.25) #scalebar(d=500, xy=c(-5, 63), type="bar", below="km", lonlat=TRUE, cex=0.6) if (remove.land.points==FALSE){ points(x.land, y.land, pch=21, col='black', bg='darkred', cex=0.6, lwd=0.2) } if (remove.ocean.points==FALSE){ points(x.ocean, y.ocean, pch=21, col='black', bg='blue2', cex=0.6, lwd=0.2) } #------------------------------------------------------------------- # Print summary table ---------------------------------------------- r.summary <- data.frame(c(nrow(gbif.coordinates), length(dups), length(x.ocean), length(x.land))) row.names(r.summary) <- c("GBIF","Duplicated","Ocean","Land") colnames(r.summary) <- c("RECORDS") write.table(r.summary, file="r.summary.txt", sep="\t", col.names=TRUE, row.names=TRUE) ## for next class xy <- rbind(x.ocean,y.ocean) xy <- t(xy) col.names=c("long","lat") write.table(xy, file = "curated_xy.csv", sep = ",", col.names=col.names,row.names = FALSE, quote=FALSE) #------------------------------------------------------------------- #------------------------------------------------------------------- #-------------------------------------------------------------------