5 Multiproxy Analysis vs. Nesting Pairs

Data used in this analysis can be found in the github repo in the following files: FileS2_MonitoringData.csv and FileS9_ZScores_CSM-IMP.csv.

5.1 Getting Started

Required pkgs for the analysis:

library(ggplot2)
library(dplyr)
library(viridis)
library(ggpubr)
library(tidyverse)

5.2 Example for calculating Z-Scores

Here is one example of how you can calculate the average d15N and standard deviation of d15N:

# Define your data

d15N <- IMP2_isotopes$d15N
percentN <- IMP2_isotopes$N.percent

d13C <- IMP2_isotopes$d15N
percentC <- IMP2_isotopes$C.percent
# calculate z-scores for d15N and d13C

Zscore_isotopes <- IMP2_isotopes %>% 
  mutate(d15N_z = (d15N - mean(d15N))/sd(d15N), 
         d13C_z = (d13C - mean(d13C))/sd(d13C),
         percentN_z = (percentN - mean(percentN))/sd(percentN),
         percentN_z = (percentN - mean(percentN))/sd(percentN))

Export the values to a .csv or excel file

write.csv(Zscore_isotopes, "filename")

5.3 Load the data

Here I load data for the impact site only. After calculating z-scores for the d15N, d15C, chlorophyll a, phosphorus, cadmium , zinc and diatom abundance data (only for S.construens), I arranged it into a file called z-scores.csv.

The .csv file I am working is called z-scores.csv, set up like this:

midpoint d15N z_d15N chla z_chla etc… Mean
- - - - - - -

NOTE: Make sure the file contains a column to plot arithmetic mean.

Load the data in R

zscores <- read.csv("Data/FileS7_ZScores_CSM-IMP.csv")

The colony population data can be found in the Census data file, and contains counts for each species by year.

Year Northern gannet Black-legged kittiwake Common murre
1883
2023

Read the population data into R from another .csv file, mine is in a file called /Data

popn <- read.csv("Data/FileS2_MonitoringData.csv")

5.4 Plot the proxy data

Plot each of your proxies using both geom_line and geom_point to plot a line graph for your data.

z_plot <- ggplot() +
  geom_smooth(zscores, mapping=aes(x=year, y=mean), method="lm", na.rm=TRUE, formula = y ~ poly(x, 2), color="lightgrey", fill="lightgrey", linetype = 0) +
  scale_x_continuous(breaks=seq(1800,2023, by=10), limits = c(1800,2025), position="top") +
  
  #plot mean line (move this to the last line if you want the mean line to lay overtop your proxy data)
  geom_line(data=zscores[!is.na(zscores$mean),], mapping=aes(x=year, y=mean), color="black", linewidth=1,) +
  
  #plot chlorophyll data 
  geom_line(data=zscores[!is.na(zscores$z_chla),], mapping=aes(x=year, y=z_chla), color="green") +
  geom_point(data=zscores, mapping=aes(x=year, y=z_chla), color="green", shape=16, size=2) + 
  
  #plot d15N data
  geom_line(data=zscores[!is.na(zscores$z_d15N),], mapping=aes(x=year, y=z_d15N), color="blue3") +
  geom_point(data=zscores, mapping=aes(x=year, y=z_d15N), color="blue3", shape=4, size=2) +
  
  #plot Phosphorus data
  geom_line(data=zscores[!is.na(zscores$z_P),], mapping=aes(x=year, y=z_P), color="orange") +
  geom_point(data=zscores, mapping=aes(x=year, y=z_P), color="orange", shape=15, size=2) +
  
  #plot Zinc data
  geom_line(data=zscores[!is.na(zscores$z_Zn),], mapping=aes(x=year, y=z_Zn), color="firebrick1") +
  geom_point(data=zscores, mapping=aes(x=year, y=z_Zn), color="firebrick1", shape=25, size=2) +
  
  #plot Cadmium data
  geom_line(data=zscores[!is.na(zscores$Z_Cd),], mapping=aes(x=year, y=Z_Cd), color="red4") +
  geom_point(data=zscores, mapping=aes(x=year, y=Z_Cd), color="red4", shape=17, size=2) +
  
  #plot diatom data
  geom_line(data=zscores[!is.na(zscores$z_S.cons),], mapping=aes(x=year, y=z_S.cons), color="mediumaquamarine") +
  geom_point(data=zscores, mapping=aes(x=year, y=z_S.cons), color="mediumaquamarine", shape=20, size=2) +
  
  #set a theme
  theme_classic()+
  theme(panel.grid.major.x=element_line(), 
        text = element_text(size = 12),
        axis.text.x.top= element_text(vjust=0.5, angle=90, color = "black"),
        axis.text.y = element_text(color = "black"),
        axis.title.x = element_blank())+
  
  #set axis labels
  xlab(NULL)+
  ylab("z-score")

Run z_plot to view the figure. Here I used !is.na to ignore all NA values in the dataset. You can use HTML color codes in place for R colors for more options.

5.5 Plot the population data

Define each variable

NOGA <- popn$Northern.gannet
COMU <- popn$Common.murre
BLKI <- popn$Black.legged.kittiwake
year <- popn$Year

Plot using ggplot

popn_plot <- ggplot(popn, (aes(x=year))) +

  geom_point(aes(y=BLKI), width=1, color="hotpink3", size=2) +
  geom_line(aes(y=BLKI), position = "stack", color = "hotpink3", size = 0.5, linetype="dotted") +

  geom_point(aes(y=NOGA), width=1, color="slateblue4", size=2) +
  geom_line(aes(y=NOGA), position = "stack", color = "slateblue4", size = 0.5, linetype="dotted") +  
  
  geom_point(aes(y=COMU), width=1, color="olivedrab4", size=2) +
  geom_line(aes(y=COMU), position = "stack", color = "olivedrab4", size = 0.5, linetype="dotted") +
  
  scale_x_continuous(breaks=seq(1800,2023, by=10), limits = c(1800,2025), position="bottom") +
  scale_y_continuous(breaks=seq(0,16000, by=4000)) +
  
  ylab("nesting pairs")+
  
  theme_classic()+
  theme(panel.grid.major.x=element_line(), 
        text = element_text(size = 12, color = "black"), 
        axis.text.x = element_text(margin = margin(t=1), color = "black"),
        axis.text.y = element_text(margin = margin(t=1), color = "black"),
        axis.title.x = element_blank(), 
        axis.text.x.bottom = element_text(vjust=0.5, angle=90))

5.6 Arrange the figures together

Plot both your population counts and proxy data onto the same figure

ggarrange(z_plot, popn_plot, 
          ncol = 1, nrow = 2,
          heights=c(15,12),
          align="hv")

5.7 Final Figure

[Bosch et al. 2024]: Summary of ornithogenic proxies measured in the impact pond, with Z-score values represented as points [top figure]. X-axis dates are extrapolated from 210Pb dates via polynomial regression. The thick black line shows the calculated arithmetic mean, fitted with a polynomial regression using ggplot2 in R. and the shaded grey area represents the confidence interval. A cut-off was set at ca. 1808 CE (~11.25 cm) to ensure data continuity due to variations in proxy data depth. Historical monitoring data for Northern gannets (Morus bassanus), common murres (Uria aalge) and black-legged kittiwakes (Rissa tridactyla) nesting on Bird Rock and surrounding mainland areas (File S1) [bottom figure].

Figure 5.1: [Bosch et al. 2024]: Summary of ornithogenic proxies measured in the impact pond, with Z-score values represented as points [top figure]. X-axis dates are extrapolated from 210Pb dates via polynomial regression. The thick black line shows the calculated arithmetic mean, fitted with a polynomial regression using ggplot2 in R. and the shaded grey area represents the confidence interval. A cut-off was set at ca. 1808 CE (~11.25 cm) to ensure data continuity due to variations in proxy data depth. Historical monitoring data for Northern gannets (Morus bassanus), common murres (Uria aalge) and black-legged kittiwakes (Rissa tridactyla) nesting on Bird Rock and surrounding mainland areas (File S1) [bottom figure].