###################################################################################################################
## Miguel Gandra || m3gandra@gmail.com || April 2015 ##############################################################
###################################################################################################################

# Script to plot a distribution map of a chosen species from GBIF, OBIS and FishBase records.
# http://www.gbif.org


#   ---> no prior file manipulation required
#   ---> automatically distinguishes points on land and sea, plotting them with different colors
#   ---> automatically saves a pdf file with the map on the chosen directory
#   ---> exports a csv file with the fetched coordinates
#   ---> prints a summary of the records

# 1. Define the genus and species name in the variables field (below).
# 2. Set the directory containing the data files (txt from GBIF and csv from OBIS) - user's desktop predefined.

# GBIF data: the script first looks for the "occurrence.txt" file in the chosen directory,
# if it's not available or contains wrong species, the data is downloaded directly from the GBIF portal.


# The "marginal.tolerance" variable sets the margin at which records are considered to be in land.
# A positive value will consider records to be on land, even if they are up to x degrees outside the nearest coast.
# A negative value will constraint land records to the ones that are at least x degrees within the coastline.
# User can automatically delete points in land by setting the "remove.land.points" variable to TRUE,
# or delete points in water by setting the "remove.ocean.points" variable to TRUE.

# for instance if you use an intertidal species, a tolerance of -1 is approapriate

######################################################################################
# Automatically install required libraries  ##########################################
# (check http://tlocoh.r-forge.r-project.org/mac_rgeos_rgdal.html
# if rgeos and rgdal installation fails on a Mac)

if(!require(dismo)){install.packages("dismo"); library(dismo)}
if(!require(XML)){install.packages("XML"); library(XML)}
if(!require(jsonlite)){install.packages("jsonlite"); library(jsonlite)}
if(!require(graphics)){install.packages("graphics"); library(graphics)}
if(!require(maps)){install.packages("maps"); library(maps)}
if(!require(maptools)){install.packages("maptools"); library(maptools)}
if(!require(rgeos)){install.packages("rgeos"); library(rgeos)}
if(!require(rgdal)){install.packages("rgdal"); library(rgdal)}
if(!require(spThin)){install.packages("spThin"); library(spThin)}


######################################################################################
# Variables ##########################################################################

# define the species of YOUR choice
genus <- "Dicentrarchus"
species <- "labrax"



# define the directory where files are to be saved
# YOU NEED TO CHANGE LINES 57 and  58 
setwd("C:/Users/rcastil/Desktop")
directory <- setwd("C:/Users/rcastil/Desktop")

# the following are variables for some tasks
marginal.tolerance <- 0    
remove.land.points <- TRUE # if TRUE points in land are removed
remove.ocean.points <- FALSE # if FALSE points in ocean are kept
export.csv <- TRUE # this is to save a coordinates file
thinning <-TRUE # this is to thin values, spatial thinning helps to 
#reduce the effect of uneven, or biased, species occurrence collections 
# on spatial model outcomes

#######################################################################################
# Get GBIF records from txt file or download them directly from the portal ############

gbif.file<-file.path(directory,"occurrence.txt")

gbif.data <- try(gbif(genus,species,geo=TRUE,removeZeros=TRUE))
  if(class(gbif.data)=="try-error"){
    gbif.coordinates<-data.frame(matrix(ncol=2, nrow=0))
    stop("GBIF data download failed, check internet connection")
  }else{
    gbif.coordinates <- data.frame(gbif.data$lon,gbif.data$lat)
    gbif.coordinates <- na.omit(gbif.coordinates)}



#######################################################################################
# Remove duplicates #################################################

colnames(gbif.coordinates)<-c("long","lat")
coordinates <- rbind(gbif.coordinates)
total <- nrow(coordinates)
dups <- duplicated(coordinates[,1:2])
dups <- dups[dups==TRUE]
coordinates <- unique(coordinates)


#######################################################################################
# Set geographical area ###############################################################

x <- coordinates[,1]
y <- coordinates[,2]

xmin=min(x)-5
xmax=max(x)+5
ymin=min(y)-5
ymax=max(y)+5


########################################################################################
# Plot Map  ############################################################################

map("world", xlim=c(xmin,xmax), ylim=c(ymin,ymax), col="gray60",  border="gray60", fill=TRUE, resolution=0)
box(which = "plot", lty = "solid", lwd=0.25)
axis(side=1,cex.axis=0.4,lwd=0.25)
axis(side=2,cex.axis=0.4, lwd=0.25)


########################################################################################
# Compute land and ocean points  #######################################################

data(wrld_simpl)

specie.pts <- SpatialPoints(coordinates, proj4string=CRS(proj4string(wrld_simpl)))
min.distances <- c()

pts.on.land <- !is.na(over(specie.pts, wrld_simpl)$FIPS)

if (marginal.tolerance==0){
  x.ocean <- x[pts.on.land==FALSE]
  y.ocean <- y[pts.on.land==FALSE]
  x.land <- x[pts.on.land==TRUE]
  y.land <- y[pts.on.land==TRUE]
} else if (marginal.tolerance>0){
  distances <- gDistance(specie.pts,wrld_simpl,byid=TRUE)
  for (i in 1:length(specie.pts)) {min.distances[i] <- min(distances[,i])}
  x.ocean <- x[min.distances>marginal.tolerance]
  x.land <- x[min.distances<=marginal.tolerance]
  y.ocean <- y[min.distances>marginal.tolerance]
  y.land <- y[min.distances<=marginal.tolerance]
} else if (marginal.tolerance<0) {
  specie.pts <- specie.pts[pts.on.land==TRUE]
  mp <- map("world", xlim=c(xmin,xmax), ylim=c(ymin,ymax), col="gray60",  border="gray60", fill=TRUE, resolution=0)
  coastline <- cbind(mp$x, mp$y)[!is.na(mp$x),]
  coast.pts <- SpatialPoints(coastline, proj4string=CRS(proj4string(wrld_simpl)))
  distances <- gDistance(specie.pts,coast.pts,byid=TRUE)
  for (i in 1:length(specie.pts)) {min.distances[i] <- min(distances[,i])}
  x.ocean <- c(x[pts.on.land==FALSE],specie.pts@coords[min.distances<=abs(marginal.tolerance),1])
  y.ocean <- c(y[pts.on.land==FALSE],specie.pts@coords[min.distances<=abs(marginal.tolerance),2])
  x.land <- specie.pts@coords[min.distances>abs(marginal.tolerance),1]
  y.land <- specie.pts@coords[min.distances>abs(marginal.tolerance),2]
}


########################################################################################
# Plot ocean points ?  #################################################################

if (remove.ocean.points==FALSE){
  points(x.ocean, y.ocean, pch=21, col='black', bg='blue', cex=0.2, lwd=0.2) 
}

########################################################################################
# Plot land points ?  ##################################################################

if (remove.land.points==FALSE){
  points(x.land, y.land, pch=21, col='black', bg='red', cex=0.2, lwd=0.2) 
}



##############################################################################################
# Thinning ###################################################################################

# Can take several hours, depending on the number of occurrences.
# Function automatically exports a csv file with the thinned dataset containing the maximum
# number of records.

x.thinned <- c()

if (thinning==TRUE)  {
  
  occurrences <- data.frame(x.ocean,y.ocean, paste(genus, species))
  colnames(occurrences) <- c("LONG","LAT","SPEC")
  
  thinning.results <- thin(occurrences, thin.par=5, rep=10, write.files=T, 
                           out.dir = "./results", locs.thinned.list.return=T)
  
  i <- which.max(sapply(thinning.results, nrow))
  
  x.thinned <- thinning.results[[i]][,1]
  y.thinned <- thinning.results[[i]][,2]
  
  xmin=min(x.thinned)-5
  xmax=max(x.thinned)+5
  ymin=min(y.thinned)-5
  ymax=max(y.thinned)+5
  
  map("world", xlim=c(xmin,xmax), ylim=c(ymin,ymax), col="gray60", border="gray60", fill=TRUE, resolution=0)
  box(which = "plot", lty = "solid", lwd=0.25)
  axis(side=1,cex.axis=0.8,lwd=0.25)
  axis(side=2,cex.axis=0.8, lwd=0.25)
  points(x.thinned, y.thinned, pch=21, col='black', bg='blue', cex=0.6, lwd=0.2) 
  
  
}

########################################################################################
# Save pdf #############################################################################

pdf.name <- paste(genus,'_',species,".pdf",sep="")
pdf.file <- file.path(directory,pdf.name)
dev.copy2pdf(file=pdf.file)


########################################################################################
# Export data as csv ###################################################################

if(export.csv==TRUE){
  occurrences <- data.frame(x.ocean,y.ocean)
  colnames(occurrences) <- c("lon","lat")
  csv.name <- paste(genus,'_',species,".csv",sep="")
  csv.file <- file.path(directory,csv.name)
  write.csv(occurrences, file=csv.file, row.names=FALSE)
}


if(export.csv==TRUE){
  x.thinning <- data.frame(x.thinned)
  csv.name <- "x.thinned"
  csv.file <- file.path(directory,csv.name)
  write.csv(x.thinning, file=csv.file, row.names=FALSE)
}

if(export.csv==TRUE){
  y.thinning <- data.frame(y.thinned)
  csv.name <- "y.thinned"
  csv.file <- file.path(directory,csv.name)
  write.csv(y.thinning, file=csv.file, row.names=FALSE)
}

x1.thinned <-read.csv("x.thinned")
y1.thinned <-read.csv("y.thinned")

########################################################################################
# Print summary table ##################################################################

r.summary <- data.frame(c(total,nrow(gbif.coordinates),
                          length(dups),sum(length(x.ocean),length(x.land)), length(x.ocean),length(x.land)))
rownames(r.summary) <- c("Total","GBIF","Duplicated","Plotted","Ocean","Land")
colnames(r.summary) <- c("RECORDS")
r.summary

## END


#'##############################################################################################################
#'# R scripts from "An Introduction to Best Practices in Species Distribution Modeling" ########################
#'# from Adam B. Smith || Missouri Botanical Garden || 2012-10-15  #############################################
#'# modified and extended by R. Castilho || CCMAR || 2014-09-15  ###############################################
#'# modified and extended by M. Gandra || UALG || 2015-07-10  ##################################################
#'##############################################################################################################

#' MAXENT (Maximum Entropy Modelling)          

#' Goals:
#' 1. Demonstrate how to handle spatial data (points, shapefiles, and rasters) in R
#' 2. Demonstrate how to prepare species' and environmental data
#' 3. Demonstrate "raster-points" method for training Maxent model in R
#' 4. Make a map of predictions


#######################################################################################################
## Read dependences ###################################################################################

#######################################################################################################
## Check maxent.jar file ##############################################################################
## only proceed if the maxent.jar file is available, on the right folder
jar <- file.path(system.file(package="dismo"), "/java/maxent.jar")
if (file.exists(jar)==FALSE) {stop("maxent.jar file not available")}

# download Maxent (www.cs.princeton.edu/~schapire/maxent/)
# use your file system to cut and paste the Maxent jar file into the folder returned by this command:
system.file('java', package='dismo')

#Sys.setenv(NOAWT=TRUE)
# only for certain macs before loading rJava

# options(java.parameters = "-Xmx1g" )
# optional, to set the memory allocated to java, hence maxent (has to be done before loading dismo)



#######################################################################################################
## Automatically install required libraries  ##########################################################
# (check http://tlocoh.r-forge.r-project.org/mac_rgeos_rgdal.html
# if rgeos and rgdal installation fail on a Mac)

if(!require(rJava)){install.packages("rJava"); library(rJava)}
if(!require(dismo)){install.packages("dismo"); library(dismo)}
if(!require(rgeos)){install.packages("rgeos"); library(rgeos)}
if(!require(rgdal)){install.packages("rgdal"); library(rgdal)}
if(!require(raster)){install.packages("raster"); library(raster)}
if(!require(sp)){install.packages("sp"); library(sp)}
if(!require(rgl)){install.packages("rgl"); library(rgl)}
if(!require(virtualspecies)){install.packages("virtualspecies"); library(virtualspecies)}
if(!require(leaflet)){install.packages("leaflet"); library(leaflet)}
if(!require(sdmpredictors)){install.packages("sdmpredictors"); library(sdmpredictors)}



# set the directory containing the data files - user's desktop SDM folder predefined
# YOU NEED TO CHANGE LINE 309 
setwd("C:/Users/rcastil/Desktop")



#######################################################################################################
## Load species occurrences ###########################################################################

# DO YOU HAVE x.thinned and y.thinned in the global environment?
# IF NOT run lines XX and XX

#x.thinned <-read.csv("~/Desktop/x.thinned")
#y.thinned <-read.csv("~/Desktop/y.thinned")

# convert sample points to spatial points 
spoints <- SpatialPoints(cbind(x.thinned,y.thinned))
projection(spoints) <- "+proj=longlat +ellps=WGS84 +datum=WGS84"


#######################################################################################################
## Load predictor rasters #############################################################################
# 1. download layers from https://www.dropbox.com/sh/3la2tp881a7dmhy/AADysQiomFKSAtvT75RwlPN8a?dl=0
# 2 .extract zip file and place asc files on a directory named oracle on you Desktop/SDM directory


list.rasters<-(list.files("which directory", full.names=T, pattern=".asc"))
list.rasters


# make raster "stack" with raster for each predictor 
rasters <- stack(list.rasters)
names(rasters)


# set the coordinate reference system for the raster stack
# this is not absolutely necessary if the rasters are unprojected (e.g., WGS84),
# but we'll do it to avoid warning messages below
projection(rasters) <- CRS("+proj=longlat +datum=WGS84")

# crop rasters from script Occurrence_data.R
xmin=min(x.thinned)-5
xmax=max(x.thinned)+5
ymin=min(y.thinned)-5
ymax=max(y.thinned)+5
limits <- c(xmin, xmax, ymin, ymax)
rasters.crop <- crop(rasters,limits)
#what is the crop function doing?

#it is best to save your cropped rasters now, so you do not need to run the whole script again if something goes wrong.
#writeRaster(rasters.crop, file="raster.crop.grd", format="raster")


#######################################################################################################
## Plot each raster with the sampling points ##########################################################
#k <- length(list.rasters)
k <- length(list.rasters)-22
k
par(mar = c(0.1, 0.1, 0.1, 0.1), mfrow = c(5,5)) 
for (i in 1:k) {
  plot(rasters.crop[[i]],axes=FALSE,legend=FALSE, asp = 1 ,maxnl=23)
  points(spoints, col='red', pch=20, cex=0.4)
  mtext(names(rasters.crop[[i]]), side=4, font=0, line=0, cex=0.6)
}

par(mfrow=c(1,1))


#######################################################################################################
## Co-linearity between  predictors ###################################################################
# PART1
rasters.crop.reduced <- removeCollinearity(rasters.crop, multicollinearity.cutoff = 0.85, plot= TRUE,
                                           select.variables = FALSE, sample.points = FALSE)
groups <- which(sapply(rasters.crop.reduced, length)>1)
groups
rasters.crop.reduced
rasters.crop.reduced[groups]

# THIS IS VERY IMPORTANT
# manually select predictors to retain from each group of co-linear predictors
# from each red box, 
#rasters.crop.reduced[1] <-"Present.Surface.Chlorophyll.Range"
rasters.crop.reduced[2] <-"Present.Surface.Chlorophyll.Range"
rasters.crop.reduced[3] <-"Present.Surface.Phytoplankton.Mean"   
#rasters.crop.reduced[4] <-"XXX"
#rasters.crop.reduced[5] <-"XXX"
#rasters.crop.reduced[6] <-"XXX"
#rasters.crop.reduced[7] <-"XXX"   
#rasters.crop.reduced[8] <-"XXX"
rasters.crop.reduced[9] <-"Present.Surface.Temperature.Min"   
rasters.crop.reduced[10] <-"Present.Surface.Temperature.Mean"     
#rasters.crop.reduced[11] <-"XXX"  
rasters.crop.reduced[12]<-"Present.Surface.Nitrate.Range"
rasters.crop.reduced[13] <-"Present.Surface.Nitrate.Mean"  
#rasters.crop.reduced[14]<-"Present.Surface.Iron.Mean"
rasters.crop.reduced[15]<-"Present.Surface.Phosphate.Mean"  
#rasters.crop.reduced[16]<-"XXX"   
#rasters.crop.reduced[17] <-"XXX"  
rasters.crop.reduced[18] <-"Present.Surface.Phytoplankton.Min"  
rasters.crop.reduced[19]<-"Present.Surface.Salinity.Mean"
#rasters.crop.reduced[20] <-"XXX"  
rasters.crop.reduced[21]<-"Present.Surface.Silicate.Mean"  
#rasters.crop.reduced[22]<-"XXX"   
#rasters.crop.reduced[23]<-"XXX"   



rasters.crop.reduced
rasters.crop.reduced <- unlist(rasters.crop.reduced)

# subset selected layers from the raster stack
rasters.selected <- subset(rasters.crop, rasters.crop.reduced)

# PART2
rasters.crop.reduced.1 <- removeCollinearity(rasters.selected, multicollinearity.cutoff = 0.75, plot= TRUE,
                                             select.variables = FALSE, sample.points = FALSE)
groups.1 <- which(sapply(rasters.crop.reduced.1, length)>1)
groups.1
rasters.crop.reduced.1
rasters.crop.reduced.1[2] <-"Present.Surface.Phytoplankton.Mean"
rasters.crop.reduced.1[3] <-"Present.Surface.Phytoplankton.Min"   
rasters.crop.reduced.1[8] <-"Present.Surface.Temperature.Mean"
rasters.crop.reduced.1[9] <-"Present.Surface.Dissolved.oxygen.Range"
rasters.crop.reduced.1[10] <-"Present.Surface.Nitrate.Range"
rasters.crop.reduced.1[13] <-"Present.Surface.Phosphate.Mean"

rasters.crop.reduced.1
rasters.crop.reduced.1 <- unlist(rasters.crop.reduced.1)

# subset selected layers from the raster stack
rasters.selected.1 <- subset(rasters.crop, rasters.crop.reduced.1)

# Check result
# PART3

rasters.crop.reduced.2 <- removeCollinearity(rasters.selected.1, multicollinearity.cutoff = 0.75, plot= TRUE,
                                             select.variables = FALSE, sample.points = FALSE)

groups.2 <- which(sapply(rasters.crop.reduced.2, length)>1)
groups.2
rasters.crop.reduced.2
rasters.crop.reduced.2[9] <-"Present.Surface.Temperature.Range"
rasters.crop.reduced.2[10] <-"Present.Surface.Nitrate.Range"   

rasters.crop.reduced.2
rasters.crop.reduced.2 <- unlist(rasters.crop.reduced.2)

# subset selected layers from the raster stack
rasters.selected.2 <- subset(rasters.crop, rasters.crop.reduced.2)

rasters.selected <- rasters.selected.2
#######################################################################################################
## Extract values from rasters ########################################################################

# presence values
presvals <- extract(rasters.selected, spoints)

# setting random seed to always create the same random set of points for this example
set.seed(1963)

# background values
backgr <- randomPoints(rasters.selected, 1000)
absvals <- extract(rasters.selected, backgr)

# create dataframe with presence/background values 
# the first column (variable â€™pbâ€™) indicates whether this is a presence or a background point
pb <- c(rep(1, nrow(presvals)), rep(0, nrow(absvals)))
sdmdata.present <- data.frame(cbind(pb, rbind(presvals, absvals)))
sdmdata.present <- sdmdata.present[complete.cases(sdmdata.present),]


#######################################################################################################
## Generalized linear model (GLM) #####################################################################

# a significant intercept in a model only means that there is also a constant in the model, 
# in addition to all other explanatory variables.

# model with all variables
model.present <- glm(pb ~ .,data=sdmdata.present) 
summary(model.present)

# automatically selects significant variables
significant.vars <- summary(model.present)$coeff[-1,4] < 0.05
significant.vars <- names(significant.vars)[significant.vars == TRUE] 
significant.vars

# if the number of variables is still too high, change < 0.05 to 0.001
significant.vars <- summary(model.present)$coeff[-1,4] < 0.01
significant.vars <- names(significant.vars)[significant.vars == TRUE] 
significant.vars

# uncomment the following lines if you want to manually add or delete variables to customize predictors set
#significant.vars <- c(significant.vars, 'sstmin')
#significant.vars<-significant.vars[-c(3,4,7)]
#significant.vars

reduced.sdmdata.present <- subset(sdmdata.present, select=c('pb', significant.vars))

# run the regression with only significant variables
reduced.model.present<-glm(pb ~ .,data=reduced.sdmdata.present)
summary(reduced.model.present)


#######################################################################################################
## Model selection - AIC (Akaike information criterion) ###############################################
# the preferred model is the one with the minimum AIC value

aic <- AIC(model.present, reduced.model.present)
aic
selected.model <- row.names(aic)[which.min(aic$AIC)]
selected.model


#######################################################################################################
## Maxent model #######################################################################################

# Call Maxent using the "raster-points" format (x=a raster stack and p=a two-column matrix of coordinates).
# Only x and p are really required, but I'm showing many of the commands in case you want to tweak some later.  

# All of the "args" are set to their default values except for "randomtestpoints" which says to randomly 
# select 30% of the species' records and use these as test sites (the default for this is 0). 

# All other arguments are set to the defaults except "threads" which can be set up to the number of cores 
# you have on your computer to speed things up... to see more "args" see bottom of "Help" file from the Maxent program.


if(selected.model=='reduced.model.present'){rasters.final<-subset(rasters.selected, significant.vars)}
if(selected.model=='model.present') {rasters.final<-rasters.selected} 


model.maxent <- maxent(
  x=rasters.final,
  p=spoints,
  a=backgr,
  args=c(
    'randomtestpoints=30',
    'betamultiplier=1',
    'linear=true',
    'quadratic=true',
    'product=true',
    'threshold=true',
    'hinge=true',
    'threads=2',
    'responsecurves=true',
    'jackknife=true',
    'askoverwrite=false'
  )
)

# look at model output (HTML page)
model.maxent

# variable contribution
plot(model.maxent)

# model evaluation
evaluate(model.maxent, x=rasters.final, p=spoints, a=backgr)


#######################################################################################################
## Maxent prediction ##################################################################################

# Note that you could save the prediction raster in any number of formats (see ?writeFormats),
# but GeoTiffs are small and can be read by ArcMap... 
# ASCIIs can also be read by other programs but are large... 
# The default raster format (GRD) sometimes can't be read by ArcMap...
# If you don't specifiy a file name then the results are written to R's memory

global <- extent(-180, 180, -90, 90)

rasters.pred <- subset(rasters, rasters.crop.reduced)
rasters.pred <- subset(rasters.pred, significant.vars)

map.maxent <- predict(
  object=rasters.pred,
  model=model.maxent,
  na.rm=TRUE,
  #format='GTiff',
  #filename= "./results/Prediction_raster.GTiff",
  #overwrite=TRUE,
  progress='text',
  ext=global
)


pdf("./results/Prediction_global.pdf", width = 16, height = 9)
plot(map.maxent, mar=c(0,0,0,0), zlim = c(0,1), axes=F, maxpixels=ncell(map.maxent))
#scalebar(d=2000, xy=c(140, -60), type="bar", below="km", lonlat=TRUE, cex=0.7)
dev.off()

pdf("./results/Prediction_original1.pdf")
plot(map.maxent, mar=c(0,0,0,0), zlim = c(0,1), axes=F, maxpixels=ncell(map.maxent), ext=limits)
points(x, y, pch=21, col=adjustcolor("black", alpha=0.6), bg=adjustcolor("blue", alpha=0.8), cex=0.6, lwd=0.2) 
dev.off()

e1<-extent(130, 180, -55, -20)
pdf("./results/Prediction_local1.pdf")
plot(map.maxent, mar=c(0,0,0,0), zlim = c(0,1), axes=F, maxpixels=ncell(map.maxent), ext=e1)
dev.off()

e2<-extent(-85, -48, -58, -25)
pdf("./results/Prediction_local2.pdf")
plot(map.maxent, mar=c(0,0,0,0), zlim = c(0,1), axes=F, maxpixels=ncell(map.maxent), ext=e2)
dev.off()


################################################################################################################
################################################################################################################
################################################################################################################