4 Generalized Additive Models

Data used in this analysis can be found in the github repo in the following files: FileS4_Impact_ProxyData.xlsx, FileS5_Reference_ProxyData.xlsx. Access the files here. Each proxy should have a seperate file containing z_score data, which I used in the analysis.

4.1 Getting Started

Install the following packages

mgcv, analogue, ggplot2, mvnfast, gratia, viridis

If you’re using ubuntu, run this first to install the dependency RCPArmadillo and mvnfast for gratia

sudo apt-get update -y 
sudo apt-get install -y r-cran-rcpparmadillo
sudo apt-get install -y r-cran-mvnfast

Load packages

library(mgcv) # For gams
library(ggplot2) # For plotting
library(analogue) # For general use and support of other packages
library(gratia) # For derivatives to identify significant periods of change
library(viridis) # Package to use colours

4.2 Fitting GAMs using mgcv

Here I use the mixed model approach via restricted maximum likelihood (REML).

Start by loading a local function for first derivatives

signifD <- function(x, d, upper, lower, eval = 0) {
  miss <- upper > eval & lower < eval
  incr <- decr <- x
  want <- d > eval
  incr[!want | miss] <- NA
  want <- d < eval
  decr[!want | miss] <- NA
  list(incr = incr, decr = decr)
}

4.2.1 Chlorophyll a data

Read the chlorophyll a data into R:

pd_chlA <-read.csv("chlA.csv")
head(pd_chlA)

Check that data looks correct

4.2.1.1 Apply the REML function to your data

chlA_gam<-gam(chlA~s(year, k = 26), data= pd_chlA, method="REML") 
gam.check(chlA_gam)

Plot your results

plot(chlA_gam)

And now add standard error

fit_gcv<- predict(chlA_gam, data = pd_chlA, se.fit = TRUE, length.out=200)
crit.t <- qt(0.975, df.residual(chlA_gam))
chlA_new <- data.frame(Year = pd_chlA[["year"]],
                        fit = fit_gcv$fit,
                        se.fit = fit_gcv$se.fit)

chlA_new <- transform(chlA_new,
                       upper = fit + (crit.t * se.fit),
                       lower = fit - (crit.t * se.fit))

4.2.1.2 Transform your data

You need equal lengths everywhere (i.e., 200 rows) for the following functions to work

chlA_new_sigperiod<-data.frame(approx(chlA_new$Year, chlA_new$fit, n=200)) 

colnames(chlA_new_sigperiod)<-c("Year", "fit")

Check that it looks right using head(chlA_new_sigperiod)

4.2.1.3 Identify significant changes

Identify derivatives of significant value

chlA_der<-derivatives(chlA_gam, type="central", n=200)
draw(chlA_der)

chlA_sig<-signifD(chlA_new_sigperiod$fit, d= chlA_der$derivative, chlA_der$upper, chlA_der$lower)

chlA_sig

4.2.1.4 Plot the results

chlA<-ggplot()+
  geom_ribbon(data=chlA_new, mapping= aes(x= Year, ymax= upper, ymin=lower), fill="#b3e0ff", inherit.aes = FALSE, alpha=0.5)+
  geom_line(data= chlA_new, mapping=aes(y= fit, x= Year), colour="lightblue4")+
  geom_line(data= chlA_new_sigperiod, mapping=aes(y=BTP_chlA_sig$decr, x=Year), colour="deepskyblue4", size=1)+ ## Periods of increase
  geom_line(data=chlA_new_sigperiod, mapping=aes(y=BTP_chlA_sig$incr, x=Year), colour="deepskyblue4", size=1)+ ## Periods of decrease
  geom_point(data=pd_chlA, aes(y=chlA, x= year), shape= 21, fill= "#0099cc", colour="lightblue4", size = 0.5)+
  scale_x_continuous(breaks=seq(1000,2022, by= 100), limits = c(1000,2022))+
  ylim(c(-0.01, 0.16))+
  labs(x=NULL, y=NULL) +
  theme_classic() + 
  theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.line.x=element_blank(), text = element_text(size = 10, face = "bold"))

chlA

Now do the same for every other proxy, here my dataset includes data for d15N, d13C, Phosphorus, Zinc/Al, Cadmium/Al, and relative abundance of two dominant diatom species.

4.2.2 d15N

pd_d15N <-read.csv("d15N.csv")
head(pd_d15N) #Check that data looks correct

d15N_gam<-gam(d15N~s(year, k = 20), data= pd_d15N, method="REML") 
gam.check(d15N_gam)
plot(d15N_gam)

fit_gcv<- predict(d15N_gam, data = pd_d15N, se.fit = TRUE, length.out=200)
crit.t <- qt(0.975, df.residual(d15N_gam))
d15N_new <- data.frame(Year = pd_d15N[["year"]],
                      fit = fit_gcv$fit,
                      se.fit = fit_gcv$se.fit)

d15N_new <- transform(d15N_new,
                     upper = fit + (crit.t * se.fit),
                     lower = fit - (crit.t * se.fit))


d15N_new_sigperiod<-data.frame(approx(d15N_new$Year, d15N_new$fit, n=200)) 
colnames(d15N_new_sigperiod)<-c("Year", "fit")
head(d15N_new_sigperiod) # Check that it looks right. 

BTP_d15N_der<-derivatives(d15N_gam, type="central", n=200)
draw(BTP_d15N_der)

BTP_d15N_sig<-signifD(d15N_new_sigperiod$fit, d= BTP_d15N_der$derivative, BTP_d15N_der$upper, BTP_d15N_der$lower)
BTP_d15N_sig

d15N<-ggplot()+
  geom_ribbon(data=d15N_new, mapping= aes(x= Year, ymax= upper, ymin=lower), fill="#b3e0ff", inherit.aes = FALSE, alpha=0.5)+
  geom_line(data= d15N_new, mapping=aes(y= fit, x= Year), colour="lightblue4")+
  geom_line(data= d15N_new_sigperiod, mapping=aes(y=BTP_d15N_sig$decr, x=Year), colour="deepskyblue4", size=1)+ ## Periods of increase
  geom_line(data=d15N_new_sigperiod, mapping=aes(y=BTP_d15N_sig$incr, x=Year), colour="deepskyblue4", size=1)+ ## Periods of decrease
  geom_point(data=pd_d15N, aes(y= d15N, x= year), shape= 21, fill= "#0099cc", colour="lightblue4", size = 0.5)+
  scale_x_continuous(breaks=seq(1000,2022, by= 100), limits = c(1000,2022))+
  ylim(c(-2, 13))+
  labs(x=NULL, y=NULL) +
  theme_classic() + 
  theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.line.x=element_blank(), text = element_text(size = 10, face = "bold"))

d15N

4.2.3 d13C

pd_d13C <-read.csv("d13C.csv")
head(pd_d13C) #Check that data looks correct

d13C_gam<-gam(d13C~s(year, k = 19), data= pd_d13C, method="REML") 
gam.check(d13C_gam)
plot(d13C_gam)

fit_gcv<- predict(d13C_gam, data = pd_d13C, se.fit = TRUE, length.out=200)
crit.t <- qt(0.975, df.residual(d13C_gam))
d13C_new <- data.frame(Year = pd_d13C[["year"]],
                        fit = fit_gcv$fit,
                        se.fit = fit_gcv$se.fit)

d13C_new <- transform(d13C_new,
                       upper = fit + (crit.t * se.fit),
                       lower = fit - (crit.t * se.fit))

d13C_new_sigperiod<-data.frame(approx(d13C_new$Year, d13C_new$fit, n=200)) 
colnames(d13C_new_sigperiod)<-c("Year", "fit")
head(d13C_new_sigperiod) 

BTP_d13C_der<-derivatives(d13C_gam, type="central", n=200)
draw(BTP_d13C_der)

BTP_d13C_sig<-signifD(d13C_new_sigperiod$fit, d= BTP_d13C_der$derivative, BTP_d13C_der$upper, BTP_d13C_der$lower)
BTP_d13C_sig

d13C<-ggplot()+
  geom_ribbon(data=d13C_new, mapping= aes(x= Year, ymax= upper, ymin=lower), fill="#b3e0ff", inherit.aes = FALSE, alpha=0.5)+
  geom_line(data= d13C_new, mapping=aes(y= fit, x= Year), colour="lightblue4")+
  geom_line(data= d13C_new_sigperiod, mapping=aes(y=BTP_d13C_sig$decr, x=Year), colour="deepskyblue4", size=1)+ ## Periods of increase
  geom_line(data=d13C_new_sigperiod, mapping=aes(y=BTP_d13C_sig$incr, x=Year), colour="deepskyblue4", size=1)+ ## Periods of decrease
  geom_point(data=pd_d13C, aes(y= d13C, x= year), shape= 21, fill= "#0099cc", colour="lightblue4", size = 0.5)+
  theme(text = element_text(size = 10, face = "bold")) +
  ylab("d13C")+
  xlab("year Year (cm)")+
  scale_x_continuous(breaks=seq(1000,2022, by= 100), limits = c(1000,2022))+
  ylim(c(-28, -23))+
  labs(x=NULL, y=NULL) +
  theme_classic() + 
  theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.line.x=element_blank(), text = element_text(size = 10, face = "bold"))

d13C

4.2.4 Phosphorus

pd_metals <-read.csv("metals.csv")
head(pd_metals) 

P_gam<-gam(P~s(year, k = 15), data= pd_metals, method="REML") 
gam.check(P_gam)
plot(P_gam)

fit_gcv<- predict(P_gam, data = pd_metals, se.fit = TRUE, length.out=200)
crit.t <- qt(0.975, df.residual(P_gam))
P_new <- data.frame(Year = pd_metals[["year"]],
                        fit = fit_gcv$fit,
                        se.fit = fit_gcv$se.fit)

P_new <- transform(P_new,
                       upper = fit + (crit.t * se.fit),
                       lower = fit - (crit.t * se.fit))

P_new_sigperiod<-data.frame(approx(P_new$Year, P_new$fit, n=200)) 
colnames(P_new_sigperiod)<-c("Year", "fit")

BTP_P_der<-derivatives(P_gam, type="central", n=200)
draw(BTP_P_der)

BTP_P_sig<-signifD(P_new_sigperiod$fit, d= BTP_P_der$derivative, BTP_P_der$upper, BTP_P_der$lower)
BTP_P_sig

P<-ggplot()+
  geom_ribbon(data=P_new, mapping= aes(x= Year, ymax= upper, ymin=lower), fill="#b3e0ff", inherit.aes = FALSE, alpha=0.5)+
  geom_line(data= P_new, mapping=aes(y= fit, x= Year), colour="lightblue4")+
  geom_line(data= P_new_sigperiod, mapping=aes(y=BTP_P_sig$decr, x=Year), colour="deepskyblue4", size=1)+ ## Periods of increase
  geom_line(data=P_new_sigperiod, mapping=aes(y=BTP_P_sig$incr, x=Year), colour="deepskyblue4", size=1)+ ## Periods of decrease
  geom_point(data=pd_metals, aes(y=P, x= year), shape= 21, fill= "#0099cc", colour="lightblue4", size = 0.5)+
  theme(text = element_text(size = 10, face = "bold")) +
  ylab("")+
  xlab("year")+
  scale_x_continuous(breaks=seq(1000,2022, by= 100), limits = c(1000,2022))+
  ylim(c(-3000, 15000))+
  labs(x=NULL, y=NULL) +
  theme_classic() + 
  theme(text = element_text(size = 10, face = "bold"))

P

4.2.5 Zinc/Al and Cd/Al

pd_metals <-read.csv("metals.csv")
head(pd_metals)

Zn_gam<-gam(Zn~s(year, k = 15), data= pd_metals, method="REML") 
gam.check(Zn_gam)
plot(Zn_gam)

fit_gcv<- predict(Zn_gam, data = pd_metals, se.fit = TRUE, length.out=200)
crit.t <- qt(0.975, df.residual(Zn_gam))
Zn_new <- data.frame(Year = pd_metals[["year"]],
                     fit = fit_gcv$fit,
                     se.fit = fit_gcv$se.fit)

Zn_new <- transform(Zn_new,
                    upper = fit + (crit.t * se.fit),
                    lower = fit - (crit.t * se.fit))


Zn_new_sigperiod<-data.frame(approx(Zn_new$Year, Zn_new$fit, n=200)) 
colnames(Zn_new_sigperiod)<-c("Year", "fit")
head(Zn_new_sigperiod)

BTP_Zn_der<-derivatives(Zn_gam, type="central", n=200)
draw(BTP_Zn_der)

BTP_Zn_sig<-signifD(Zn_new_sigperiod$fit, d= BTP_Zn_der$derivative, BTP_Zn_der$upper, BTP_Zn_der$lower)
BTP_Zn_sig

Zn <-ggplot() +
  geom_ribbon(data=Zn_new, mapping= aes(x= Year, ymax= upper, ymin=lower), fill="#b3e0ff", inherit.aes = FALSE, alpha=0.5) +
  geom_line(data= Zn_new, mapping=aes(y= fit, x= Year), colour="lightblue4") +
  geom_line(data= Zn_new_sigperiod, mapping=aes(y=BTP_Zn_sig$decr, x=Year), colour="deepskyblue4", size=1) + ## Periods of increase
  geom_line(data=Zn_new_sigperiod, mapping=aes(y=BTP_Zn_sig$incr, x=Year), colour="deepskyblue4", size=1) + ## Periods of decrease
  geom_point(data=pd_metals, aes(y= Zn, x= year), shape= 21, fill= "#0099cc", colour="lightblue4", size = 0.5) +
  theme(text = element_text(size = 10, face = "bold")) +
  ylab("")+
  xlab("")+
  scale_x_continuous(breaks=seq(1000,2022, by= 100), limits = c(1000,2022))+
  ylim(c(0.00006, 0.015)) +
  labs(x=NULL, y=NULL) +
  theme_classic() +
  theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.line.x=element_blank(), text = element_text(size = 10, face = "bold"))

Zn

pd_metals <-read.csv("metals.csv")
head(pd_metals)

Cd_gam<-gam(Cd~s(year, k = 15), data= pd_metals, method="REML") 
gam.check(Cd_gam)
plot(Cd_gam)

fit_gcv<- predict(Cd_gam, data = pd_metals, se.fit = TRUE, length.out=200)
crit.t <- qt(0.975, df.residual(Cd_gam))
Cd_new <- data.frame(Year = pd_metals[["year"]],
                      fit = fit_gcv$fit,
                      se.fit = fit_gcv$se.fit)

Cd_new <- transform(Cd_new,
                     upper = fit + (crit.t * se.fit),
                     lower = fit - (crit.t * se.fit))

Cd_new_sigperiod<-data.frame(approx(Cd_new$Year, Cd_new$fit, n=200)) 
colnames(Cd_new_sigperiod)<-c("Year", "fit")
head(Cd_new_sigperiod) 

BTP_Cd_der<-derivatives(Cd_gam, type="central", n=200)
draw(BTP_Cd_der)

BTP_Cd_sig<-signifD(Cd_new_sigperiod$fit, d= BTP_Cd_der$derivative, BTP_Cd_der$upper, BTP_Cd_der$lower)
BTP_Cd_sig

Cd<-ggplot()+
geom_ribbon(data=Cd_new, mapping= aes(x= Year, ymax= upper, ymin=lower), fill="#b3e0ff", inherit.aes = FALSE, alpha=0.5)+
  geom_line(data= Cd_new, mapping=aes(y= fit, x= Year), colour="lightblue4")+
  geom_line(data= Cd_new_sigperiod, mapping=aes(y=BTP_Cd_sig$decr, x=Year), colour="deepskyblue4", size=1)+ ## Periods of increase
  geom_line(data=Cd_new_sigperiod, mapping=aes(y=BTP_Cd_sig$incr, x=Year), colour="deepskyblue4", size=1)+ ## Periods of decrease
  geom_point(data=pd_metals, aes(y= Cd, x= year), shape= 21, fill= "#0099cc", colour="lightblue4", size = 0.5)+
  theme(text = element_text(size = 10, face = "bold")) +
  scale_x_continuous(breaks=seq(1000,2022, by= 100), limits = c(1000,2022))+
  ylim(c(0.05e-05, 3e-04))+
  labs(x=NULL, y=NULL) +
  theme_classic()+
  theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.line.x=element_blank(), text = element_text(size = 10, face = "bold"))


Cd

4.2.6 Diatom (S.construens)

pd_diatoms <-read.csv("diatoms.csv")
head(pd_diatoms)

S.cons_gam<-gam(S.construens~s(year, k = 17), data= pd_diatoms, method="REML") 
gam.check(S.cons_gam)
plot(S.cons_gam)

fit_gcv<- predict(S.cons_gam, data = pd_diatoms, se.fit = TRUE, length.out=200)
crit.t <- qt(0.975, df.residual(S.cons_gam))
S.cons_new <- data.frame(Year = pd_diatoms[["year"]],
                         fit = fit_gcv$fit,
                         se.fit = fit_gcv$se.fit)

S.cons_new <- transform(S.cons_new,
                        upper = fit + (crit.t * se.fit),
                        lower = fit - (crit.t * se.fit))

S.cons_new_sigperiod<-data.frame(approx(S.cons_new$Year, S.cons_new$fit, n=200)) 
colnames(S.cons_new_sigperiod)<-c("Year", "fit")
head(S.cons_new_sigperiod) 

BTP_S.cons_der<-derivatives(S.cons_gam, type="central", n=200)
draw(BTP_S.cons_der)

BTP_S.cons_sig<-signifD(S.cons_new_sigperiod$fit, d= BTP_S.cons_der$derivative, BTP_S.cons_der$upper, BTP_S.cons_der$lower)
BTP_S.cons_sig

S.cons<-ggplot()+
  geom_ribbon(data=S.cons_new, mapping= aes(x= Year, ymax= upper, ymin=lower), fill="#b3e0ff", inherit.aes = FALSE, alpha=0.5)+
  geom_line(data= S.cons_new, mapping=aes(y= fit, x= Year), colour="lightblue4")+
  geom_line(data= S.cons_new_sigperiod, mapping=aes(y=BTP_S.cons_sig$decr, x=Year), colour="deepskyblue4", size=1)+ ## Periods of increase
  geom_line(data=S.cons_new_sigperiod, mapping=aes(y=BTP_S.cons_sig$incr, x=Year), colour="deepskyblue4", size=1)+ ## Periods of decrease
  geom_point(data=pd_diatoms, aes(y= S.construens, x= year), shape= 21, fill= "#0099cc", colour="lightblue4", size = 0.5)+
  scale_x_continuous(breaks=seq(1000,2022, by= 100), limits = c(1000,2022))+
  ylim(c(-15, 115))+
  labs(x=NULL, y=NULL) +
  theme_classic()+
  theme(text = element_text(size = 10, face = "bold"))
        
S.cons

4.3 Plotting the final GAM

library(ggpubr)
ggarrange(chlA, Zn, d13C, Cd, d15N, P, S.cons, ncol = 2, nrow = 4, heights=5, widths=5)

4.4 Final Figure

[Bosch et al. 2024]: Proxy data from the impact core fitted with a generalized additive model (GAM) and aligned with extrapolated 210Pb-dates. Bolded blue lines represent significant periods of change and shaded blue areas represent the confidence intervals.

Figure 4.1: [Bosch et al. 2024]: Proxy data from the impact core fitted with a generalized additive model (GAM) and aligned with extrapolated 210Pb-dates. Bolded blue lines represent significant periods of change and shaded blue areas represent the confidence intervals.