#'##############################################################################################################
#'# R scripts from "An Introduction to Best Practices in Species Distribution Modeling" ########################
#'# from Adam B. Smith || Missouri Botanical Garden || 2012-10-15  #############################################
#'# modified and extended by R. Castilho || CCMAR || 2014-09-15  ###############################################
#'# modified and extended by M. Gandra || UALG || 2015-07-10  ##################################################
#'##############################################################################################################

#' MAXENT (Maximum Entropy Modelling)          

#' Goals:
#' 1. Demonstrate how to handle spatial data (points, shapefiles, and rasters) in R
#' 2. Demonstrate how to prepare species' and environmental data
#' 3. Demonstrate "raster-points" method for training Maxent model in R
#' 4. Make a map of predictions


#######################################################################################################
## Read dependences ###################################################################################

# install Sun's Java JRE from www.oracle.com/technetwork/java/javase/downloads/
# download Maxent (www.cs.princeton.edu/~schapire/maxent/)
# use your file system to cut and paste the Maxent jar file into the folder returned by this command:
system.file('java', package='dismo')

# Sys.setenv(NOAWT=TRUE)
# only for certain macs before loading rJava


#######################################################################################################
## Check maxent.jar file ##############################################################################
## only proceed if the maxent.jar file is available, on the right folder

jar <- file.path(system.file(package="dismo"), "/java/maxent.jar")
if (file.exists(jar)==FALSE) {stop("maxent.jar file not available")}


#######################################################################################################
## Automatically install required libraries  ##########################################################
# (check http://tlocoh.r-forge.r-project.org/mac_rgeos_rgdal.html
# if rgeos and rgdal installation fail on a Mac)

check.packages <- function(pkg){
  new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
  if (length(new.pkg))
    install.packages(new.pkg, dependencies = TRUE)
  sapply(pkg, require, character.only = TRUE)
}

packages<-c("rJava", "dismo", "rgeos", "rgdal", "raster", 
            "sp", "rgl", "virtualspecies", "leaflet", 
            "sdmpredictors", "usdm","sf","tidyverse","caret")

check.packages(packages)

# set the directory containing the data files - user's desktop SDM folder predefined
root <- setwd("~/Desktop/maxent")
list.files()

#######################################################################################################
## Load species occurrences ###########################################################################

coordinates <-read.csv("Diplodus_bellottii.csv")
x <- coordinates$lat
y <- coordinates$lon

# convert sample points to spatial points 
spoints <- SpatialPoints(cbind(y,x))


projection(spoints) <- "+proj=longlat +ellps=WGS84 +datum=WGS84"


#######################################################################################################
## Load predictor rasters #############################################################################
# 1. download layers from http://www.bio-oracle.org/downloads-to-email.php
# 2 .extract zip file and place asc files on a directory named oracle on you Desktop/SDM directory

# pay attention where you have your data!!
list.rasters<-(list.files("./layers", full.names=T, pattern=".tif"))
list.rasters

# make raster "stack" with raster for each predictor 
rasters <- stack(list.rasters)
names(rasters)

# set the coordinate reference system for the raster stack
# this is not absolutely necessary if the rasters are unprojected (e.g., WGS84),
# but we'll do it to avoid warning messages below
projection(rasters) <- CRS("+proj=longlat +datum=WGS84")

# crop rasters from script Occurrence_data.R
xmin=min(x)-5
xmax=max(x)+5
ymin=min(y)-5
ymax=max(y)+5
limits <- c(ymin, ymax,xmin, xmax)
rasters.crop <- crop(rasters,limits)
#what is the crop function doing?

#it is best to save your cropped rasters now, so you do not need to run the whole script again if something goes wrong.
writeRaster(rasters.crop, file="raster.crop.grd", format="raster")

#######################################################################################################
## Plot each raster with the sampling points ##########################################################
# !! be aware, if you have LOTS od rasters this will prodice a graph that is not helpfull
k <- length(list.rasters)
k
par(mar = c(0.1, 0.1, 0.1, 0.1), mfrow = c(5,5)) 
for (i in 1:k) {
  plot(rasters.crop[[i]],axes=FALSE,legend=FALSE, asp = 1 ,maxnl=23)
  points(spoints, col='red', pch=20, cex=0.4)
  mtext(names(rasters.crop[[i]]), side=4, font=0, line=0, cex=0.6)
}

#par(mfrow=c(1,1))

#######################################################################################################
## Co-linearity between  predictors ###################################################################
# this part of the script needs to be edited according to results
# VIF (usdm)
# vifcor finds a pair of variables which has the maximum linear correlation (greater than th), 
# and excludes one of them which has greater VIF

raster.vectors <- as.matrix(rasters.crop)
VIF <- vifstep(raster.vectors,th=2)
VIF
rasters.VIF <- exclude(rasters.crop,VIF)
rasters.selected <- rasters.VIF

#######################################################################################################
## Extract values from rasters ########################################################################

# presence values
presvals <- raster::extract(rasters.selected, spoints)
# setting random seed to always create the same random set of points for this example
set.seed(1963)

# background values
backgr <- randomPoints(rasters.selected, 1000)
absvals <- raster::extract(rasters.selected, backgr)

# create dataframe with presence/background values 
# the first column (variable ) indicates whether this is a presence or a background point
pb <- c(rep(1, nrow(presvals)), rep(0, nrow(absvals)))
sdmdata.present <- data.frame(cbind(pb, rbind(presvals, absvals)))
sdmdata.present <- sdmdata.present[complete.cases(sdmdata.present),]


#######################################################################################################
## some variables may not significantly contribute to the model, therefore lets choose!
## Generalized linear model (GLM) #####################################################################

# a significant intercept in a model only means that there is also a constant in the model, 
# in addition to all other explanatory variables.

# model with all variables
model.present <- glm(pb ~ .,data=sdmdata.present) 
summary(model.present)

# automatically selects significant variables
significant.vars <- summary(model.present)$coeff[-1,4] < 0.05
#[-1,4] means all rows except the first one, and only the 4th column.

significant.vars <- names(significant.vars)[significant.vars == TRUE] 
significant.vars

# if the number of variables is still too high, change < 0.05 to 0.001
#significant.vars <- summary(model.present)$coeff[-1,4] < 0.0001
#significant.vars <- names(significant.vars)[significant.vars == TRUE] 
#significant.vars

# uncomment the following lines if you want to manually add or delete variables to customize predictors set
#significant.vars <- c(significant.vars, 'sstmin')
#significant.vars<-significant.vars[-c(3,4,7)]
#significant.vars

reduced.sdmdata.present <- subset(sdmdata.present, select=c('pb', significant.vars))

# run the regression with only significant variables
reduced.model.present<-glm(pb ~ .,data=reduced.sdmdata.present)
summary(reduced.model.present)


#######################################################################################################
## Model selection - AIC (Akaike information criterion) ###############################################
# the preferred model is the one with the minimum AIC value

aic <- AIC(model.present, reduced.model.present)
aic
selected.model <- row.names(aic)[which.min(aic$AIC)]
selected.model


#######################################################################################################
## Maxent model #######################################################################################

# Call Maxent using the "raster-points" format (x=a raster stack and p=a two-column matrix of coordinates).
# Only x and p are really required, but I'm showing many of the commands in case you want to tweak some later.  

# All of the "args" are set to their default values except for "randomtestpoints" which says to randomly 
# select 30% of the species' records and use these as test sites (the default for this is 0). 

# All other arguments are set to the defaults except "threads" which can be set up to the number of cores 
# you have on your computer to speed things up... to see more "args" see bottom of "Help" file from the Maxent program.


if(selected.model=='reduced.model.present'){rasters.final<-subset(rasters.selected, significant.vars)}
if(selected.model=='model.present') {rasters.final<-rasters.selected} 


model.maxent <- maxent(
  x=rasters.final,
  p=spoints,
  a=backgr,
  args=c(
    'randomtestpoints=30',
    'betamultiplier=1',
    'linear=true',
    'quadratic=true',
    'product=true',
    'threshold=true',
    'hinge=true',
    'threads=2',
    'responsecurves=true',
    'jackknife=true',
    'askoverwrite=false'
  )
)

# look at model output (HTML page)
model.maxent

# variable contribution
plot(model.maxent)

# model evaluation
evaluate(model.maxent, x=rasters.final, p=spoints, a=backgr)


#######################################################################################################
## Maxent prediction ##################################################################################

# Note that you could save the prediction raster in any number of formats (see ?writeFormats),
# but GeoTiffs are small and can be read by ArcMap... 
# ASCIIs can also be read by other programs but are large... 
# The default raster format (GRD) sometimes can't be read by ArcMap...
# If you don't specifiy a file name then the results are written to R's memory

global <- extent(-180, 180, -90, 90)

map.maxent <- predict(
  object=rasters.final,
  model=model.maxent,
  na.rm=TRUE,
  #format='GTiff',
  #filename= "./results/Prediction_raster.GTiff",
  #overwrite=TRUE,
  progress='text',
  ext=global
)


pdf("Prediction_global.pdf", width = 16, height = 9)
plot(map.maxent, mar=c(0,0,0,0), zlim = c(0,1), axes=F, maxpixels=ncell(map.maxent))
#scalebar(d=2000, xy=c(140, -60), type="bar", below="km", lonlat=TRUE, cex=0.7)
dev.off()


################################################################################################################
################################################################################################################
################################################################################################################