8 Visualizing Metrics

From Katie's Notes:

Within each 2hr period we need to assess:
•   Proportion wet or time wet ((Count wets in 2hr period * 30sec) / 72000sec)
•   Number of switches between 0 and non-zero value

•   Total time when entire 10min sampling interval was dry (count 0s * 10min)
•   Total time when entire 10min sampling interval was wet (count >=1 * 10min)

•   Longest continuous time dry (longest number of consecutive rows with wets==0  * 10min)
•   Longest continuous time wet (longest number of consecutive rows with wets>=1 * 10min)

I start with setting up some plotting functions so we can look at different subsets of our data easily. Then I apply those plotting functions to various sets of the DEG files (6 files) to visualize the metrics we discussed (for Baccalieu, Kent’s Island, and random files).


Let’s see what we have to work with per colony/deployment_periods

deployment_summary
## # A tibble: 7 × 8
##   deployment_period `Baccalieu - Ned Walsh` `Bon Portage` Country  Gull  Kent
##   <chr>                               <int>         <int>   <int> <int> <int>
## 1 2017-2018                               6             0       0     0     0
## 2 2018-2019                               0             5       6    10    10
## 3 2019-2020                              10            17      17    23    16
## 4 2021-2022                              20             0      16    16    27
## 5 2022-2023                               7            13       0    11     0
## 6 Other                                   0             0       0     2     0
## 7 TOTAL                                  43            35      39    62    53
## # ℹ 2 more variables: `Middle Lawn` <int>, TOTAL <dbl>

8.1 Baccalieu Island 2017-2018

Let’s start by visualizing switches/proportions for 2017-2018 files from Baccalieu (6 files total)

8.1.1 Running plotting functions

Proportions:

#subset the dataframe so we have less data to work with
bacc_2017_props <- deg_props_2hrs %>%
  filter(colony == "Baccalieu - Ned Walsh",
         deployment_period == "2017-2018") %>%
  select(bird_id, date, start_time, end_time, prop_wet)

head(bacc_2017_props)
## # A tibble: 6 × 5
##   bird_id date       start_time end_time prop_wet
##   <chr>   <date>     <chr>      <chr>       <dbl>
## 1 BH584   2017-09-20 00:00:00   01:59:59    0    
## 2 BH584   2017-09-20 02:00:00   03:59:59    0    
## 3 BH584   2017-09-20 04:00:00   05:59:59    0.133
## 4 BH584   2017-09-20 06:00:00   07:59:59    0.325
## 5 BH584   2017-09-20 08:00:00   09:59:59    0.15 
## 6 BH584   2017-09-20 10:00:00   11:59:59    0.075
plot_bacc2017_props <- plot_props(data = bacc_2017_props, 
                                  y_var = "prop_wet", 
                                  y_label = "Proportion", 
                                  plot_title = "Proportion of Wet Time Across Deployments Per 2hr bins) - (Baccalieu Island 2017-2018)")

This kind of hurts to look at so we can also try aggregating the data to days taking the sum of prop_wet.

Daily Proportions:

bacc_2017_daily_props <- bacc_2017_props %>%
  group_by(bird_id, date) %>%
  summarize(daily_prop_wet = mean(prop_wet, na.rm = TRUE), .groups = "drop")


plot_bacc2017_daily_props <- plot_daily_props(data = bacc_2017_daily_props, 
                                  y_var = "daily_prop_wet", 
                                  y_label = "Proportion", 
                                  plot_title = "Mean Proportion of Wet Periods (2hr bins) Per Day (Baccalieu Island 2017-2018)")

Daily Switches:

#subset the dataframe so we have less data to work with
bacc_2017_switches <- daily_switches %>%
  filter(colony == "Baccalieu - Ned Walsh",
         deployment_period == "2017-2018") %>%
  select(bird_id, date, daily_switches)

head(bacc_2017_switches)
## # A tibble: 6 × 3
##   bird_id date       daily_switches
##   <chr>   <date>              <dbl>
## 1 BH584   2017-09-20             45
## 2 BH584   2017-09-21             38
## 3 BH584   2017-09-22             43
## 4 BH584   2017-09-23             33
## 5 BH584   2017-09-24             25
## 6 BH584   2017-09-25             28
plot_bacc2017_daily_switches <- plot_daily_switches(
  data = bacc_2017_switches,
  y_var = "daily_switches",
  y_label = "Sum of Switches",
  plot_title = "Daily Sum of Switches between Wet (wet>0) and Dry (wet=0) States for Baccalieu Island (2017-2018)")

Longest Wet/Dry Periods

bacc_2017_longest_prds <- deg_longest_prds %>%
  filter(colony == "Baccalieu - Ned Walsh",
         deployment_period == "2017-2018") %>%
  select(bird_id,
         longest_dry_time, 
         longest_dry_start, 
         longest_dry_end, 
         longest_wet_time,
         longest_wet_start, 
         longest_wet_end)
plot_bacc_2017_longest_prds <- plot_longest_prds(
  data = bacc_2017_longest_prds,
  plot_title = "Longest Wet and Dry Periods - Baccalieu Island 2017-2018")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.

Total Wet/Dry

bacc_2017_total_wetdry <- deg_total_wetdry %>%
  filter(colony == "Baccalieu - Ned Walsh",
         deployment_period == "2017-2018") %>%
  select(bird_id,
         total_time_wet,
         total_time_dry)


plot_bacc_2017_total_times <- plot_total_times(bacc_2017_total_wetdry, "Total Wet and Dry Times (Baccalieu Island, 2017-2018)")

8.1.2 Final Plots

plot_bacc2017_props
plot_bacc2017_daily_props
plot_bacc2017_daily_switches
plot_bacc_2017_total_times
plot_bacc_2017_longest_prds

8.2 Kent Island 2018-2019

Let’s visualize the switches/proportions for 2018-2019 files from Kent’s Island (10 files total). Make sure you clear out your environment so you don’t accidentally run this on Baccalieu-related objects or other sites you may have tried out.

rm(list = ls(pattern = "^bacc"))

8.2.1 Running plotting functions

Let’s start by subsetting the metadata file so we can look at Kent’s Island

deployment_summary
## # A tibble: 7 × 8
##   deployment_period `Baccalieu - Ned Walsh` `Bon Portage` Country  Gull  Kent
##   <chr>                               <int>         <int>   <int> <int> <int>
## 1 2017-2018                               6             0       0     0     0
## 2 2018-2019                               0             5       6    10    10
## 3 2019-2020                              10            17      17    23    16
## 4 2021-2022                              20             0      16    16    27
## 5 2022-2023                               7            13       0    11     0
## 6 Other                                   0             0       0     2     0
## 7 TOTAL                                  43            35      39    62    53
## # ℹ 2 more variables: `Middle Lawn` <int>, TOTAL <dbl>

Let’s use the 10 files from 2018-2019.

Proportions:

#subset the dataframe so we have less data to work with
kent_2018_props <- deg_props_2hrs %>%
  
  filter(colony == "Kent",
         deployment_period == "2018-2019") %>%
  
  select(bird_id, date, start_time, end_time, prop_wet)

kent_2018_props
## # A tibble: 38,209 × 5
##    bird_id date       start_time end_time prop_wet
##    <chr>   <date>     <chr>      <chr>       <dbl>
##  1 BP488   2018-09-12 00:00:00   01:59:59        0
##  2 BP488   2018-09-12 02:00:00   03:59:59        0
##  3 BP488   2018-09-12 04:00:00   05:59:59        0
##  4 BP488   2018-09-12 06:00:00   07:59:59        0
##  5 BP488   2018-09-12 08:00:00   09:59:59        0
##  6 BP488   2018-09-12 10:00:00   11:59:59        0
##  7 BP488   2018-09-12 12:00:00   13:59:59        0
##  8 BP488   2018-09-12 14:00:00   15:59:59        0
##  9 BP488   2018-09-12 16:00:00   17:59:59        0
## 10 BP488   2018-09-12 18:00:00   19:59:59        0
## # ℹ 38,199 more rows
plot_kent2018_props <- plot_props(data = kent_2018_props, 
                                  y_var = "prop_wet", 
                                  y_label = "Proportion", 
                                  plot_title = "Proportion of Wet Time Across Deployments Per 2hr bins) - (Kent Island 2018-2019)")

Daily Proportions:

kent_2018_daily_props <- kent_2018_props %>%
  group_by(bird_id, date) %>%
  summarize(daily_prop_wet = mean(prop_wet, na.rm = TRUE), .groups = "drop")

plot_kent2018_daily_props <- plot_daily_props(data = kent_2018_daily_props, 
                                  y_var = "daily_prop_wet", 
                                  y_label = "Proportion", 
                                  plot_title = "Mean Proportion of Wet Periods (2hr bins) Per Day (Kent Island 2018-2019)")

Daily Switches:

#subset the dataframe so we have less data to work with
kent_2018_switches <- daily_switches %>%
  filter(colony == "Kent",
         deployment_period == "2018-2019") %>%
  select(bird_id, date, daily_switches)

head(kent_2018_switches)
## # A tibble: 6 × 3
##   bird_id date       daily_switches
##   <chr>   <date>              <dbl>
## 1 BP488   2018-09-12              0
## 2 BP488   2018-09-13             36
## 3 BP488   2018-09-14             25
## 4 BP488   2018-09-15             39
## 5 BP488   2018-09-16             36
## 6 BP488   2018-09-17             38
plot_kent2018_daily_switches <- plot_daily_switches(
  data = kent_2018_switches,
  y_var = "daily_switches",
  y_label = "Sum of Switches",
  plot_title = "Daily Sum of Switches between Wet (wet>0) and Dry (wet=0) States for Kent Island 2018-2019")

Longest Wet/Dry Periods

kent_2018_longest_prds <- deg_longest_prds %>%
  filter(colony == "Kent",
         deployment_period == "2018-2019") %>%
  select(bird_id,
         longest_dry_time, 
         longest_dry_start, 
         longest_dry_end, 
         longest_wet_time,
         longest_wet_start, 
         longest_wet_end)
plot_kent_2018_longest_prds <- plot_longest_prds(
  data = kent_2018_longest_prds,
  plot_title = "Longest Wet and Dry Periods for Kent Island 2018-2019")

Total Wet/Dry

kent_2018_total_wetdry <- deg_total_wetdry %>%
  filter(colony == "Kent",
         deployment_period == "2018-2019") %>%
  select(bird_id,
         total_time_wet,
         total_time_dry)


plot_kent_2018_total_times <- plot_total_times(kent_2018_total_wetdry, "Total Wet and Dry Times (Kent Island 2018-2019)")

8.2.2 Final Plots

plot_kent2018_props
plot_kent2018_daily_props
plot_kent2018_daily_switches
plot_kent_2018_total_times
plot_kent_2018_longest_prds

8.3 Random subset

Now let’s visualize the switches/proportions for 10 random files Make sure you clear out your environment so you don’t accidentally run this on Baccalieu- or Kent-related objects or other sites you may have tried out.

rm(list = ls(pattern = "^kent"), envir = .GlobalEnv)
rm(list = ls(pattern = "^bacc"), envir = .GlobalEnv)

8.3.1 Running plotting functions

Let’s start by subsetting the metadata file so we can look at 10 files from any random year and place.

random_bird_ids <- metrics_md %>%
    
  distinct(bird_id) %>%
  sample_n(10) %>%
  
  pull(bird_id)

random_bird_ids
##  [1] "CD606" "CH713" "CD489" "BU831" "CH023" "CD474" "BU468" "CD454" "CH789"
## [10] "BU752"

Proportions:

#subset the dataframe so we have less data to work with
random_props <- deg_props_2hrs %>%
  filter(bird_id %in% random_bird_ids) %>%
  select(bird_id, colony, date, start_time, end_time, prop_wet)

#make sure it filtered the same random files properly
print(unique(random_props$bird_id))
##  [1] "BU468" "BU752" "BU831" "CD454" "CD474" "CD489" "CD606" "CH023" "CH713"
## [10] "CH789"
plot_random_props <- plot_props(data = random_props, 
                                  y_var = "prop_wet", 
                                  y_label = "Proportion", 
                                  plot_title = "Proportion of Wet Time Across Deployments Per 2hr bins) - (10 random files)")

Daily Proportions:

random_daily_props <- random_props %>%
  group_by(bird_id, date) %>%
  summarize(daily_prop_wet = mean(prop_wet, na.rm = TRUE), .groups = "drop")

plot_random_daily_props <- plot_daily_props(data = random_daily_props, 
                                  y_var = "daily_prop_wet", 
                                  y_label = "Proportion", 
                                  plot_title = "Mean Proportion of Wet Periods (2hr bins) Per Day (10 random files)")

Daily Switches:

#subset the dataframe so we have less data to work with
random_switches <- daily_switches %>%
  filter(bird_id %in% random_bird_ids) %>%
  select(bird_id, date, daily_switches)

#make sure it filtered the same random files
print(unique(random_switches$bird_id))
##  [1] "BU468" "BU752" "BU831" "CD454" "CD474" "CD489" "CD606" "CH023" "CH713"
## [10] "CH789"
plot_random_daily_switches <- plot_daily_switches(
  data = random_switches,
  y_var = "daily_switches",
  y_label = "Sum of Switches",
  plot_title = "Daily Sum of Switches between Wet (wet>0) and Dry (wet=0) States for 10 Random Files")

Longest Wet/Dry Periods

random_longest_prds <- deg_longest_prds %>%
  filter(bird_id %in% random_bird_ids) %>%
  select(bird_id,
         longest_dry_time, 
         longest_dry_start, 
         longest_dry_end, 
         longest_wet_time,
         longest_wet_start, 
         longest_wet_end)
plot_random_longest_prds <- plot_longest_prds(
  data = random_longest_prds,
  plot_title = "Longest Wet and Dry Periods for 10 Random Files")

Total Wet/Dry

random_total_wetdry <- deg_total_wetdry %>%
  filter(bird_id %in% random_bird_ids) %>%
  select(bird_id,
         total_time_wet,
         total_time_dry)


plot_random_total_times <- plot_total_times(random_total_wetdry, "Total Wet and Dry Times for 10 Random Files")

8.3.2 Final Plots

plot_random_props
plot_random_daily_props
plot_random_daily_switches
plot_random_total_times
plot_random_longest_prds