8 Visualizing Metrics
From Katie's Notes:
Within each 2hr period we need to assess:
• Proportion wet or time wet ((Count wets in 2hr period * 30sec) / 72000sec)
• Number of switches between 0 and non-zero value
• Total time when entire 10min sampling interval was dry (count 0s * 10min)
• Total time when entire 10min sampling interval was wet (count >=1 * 10min)
• Longest continuous time dry (longest number of consecutive rows with wets==0 * 10min)
• Longest continuous time wet (longest number of consecutive rows with wets>=1 * 10min)
I start with setting up some plotting functions so we can look at different subsets of our data easily. Then I apply those plotting functions to various sets of the DEG files (6 files) to visualize the metrics we discussed (for Baccalieu, Kent’s Island, and random files).
Let’s see what we have to work with per colony/deployment_periods
## # A tibble: 7 × 8
## deployment_period `Baccalieu - Ned Walsh` `Bon Portage` Country Gull Kent
## <chr> <int> <int> <int> <int> <int>
## 1 2017-2018 6 0 0 0 0
## 2 2018-2019 0 5 6 10 10
## 3 2019-2020 10 17 17 23 16
## 4 2021-2022 20 0 16 16 27
## 5 2022-2023 7 13 0 11 0
## 6 Other 0 0 0 2 0
## 7 TOTAL 43 35 39 62 53
## # ℹ 2 more variables: `Middle Lawn` <int>, TOTAL <dbl>
8.1 Baccalieu Island 2017-2018
Let’s start by visualizing switches/proportions for 2017-2018 files from Baccalieu (6 files total)
8.1.1 Running plotting functions
Proportions:
#subset the dataframe so we have less data to work with
bacc_2017_props <- deg_props_2hrs %>%
filter(colony == "Baccalieu - Ned Walsh",
deployment_period == "2017-2018") %>%
select(bird_id, date, start_time, end_time, prop_wet)
head(bacc_2017_props)## # A tibble: 6 × 5
## bird_id date start_time end_time prop_wet
## <chr> <date> <chr> <chr> <dbl>
## 1 BH584 2017-09-20 00:00:00 01:59:59 0
## 2 BH584 2017-09-20 02:00:00 03:59:59 0
## 3 BH584 2017-09-20 04:00:00 05:59:59 0.133
## 4 BH584 2017-09-20 06:00:00 07:59:59 0.325
## 5 BH584 2017-09-20 08:00:00 09:59:59 0.15
## 6 BH584 2017-09-20 10:00:00 11:59:59 0.075
plot_bacc2017_props <- plot_props(data = bacc_2017_props,
y_var = "prop_wet",
y_label = "Proportion",
plot_title = "Proportion of Wet Time Across Deployments Per 2hr bins) - (Baccalieu Island 2017-2018)")This kind of hurts to look at so we can also try aggregating the data to days taking the sum of prop_wet.
Daily Proportions:
bacc_2017_daily_props <- bacc_2017_props %>%
group_by(bird_id, date) %>%
summarize(daily_prop_wet = mean(prop_wet, na.rm = TRUE), .groups = "drop")
plot_bacc2017_daily_props <- plot_daily_props(data = bacc_2017_daily_props,
y_var = "daily_prop_wet",
y_label = "Proportion",
plot_title = "Mean Proportion of Wet Periods (2hr bins) Per Day (Baccalieu Island 2017-2018)")Daily Switches:
#subset the dataframe so we have less data to work with
bacc_2017_switches <- daily_switches %>%
filter(colony == "Baccalieu - Ned Walsh",
deployment_period == "2017-2018") %>%
select(bird_id, date, daily_switches)
head(bacc_2017_switches)## # A tibble: 6 × 3
## bird_id date daily_switches
## <chr> <date> <dbl>
## 1 BH584 2017-09-20 45
## 2 BH584 2017-09-21 38
## 3 BH584 2017-09-22 43
## 4 BH584 2017-09-23 33
## 5 BH584 2017-09-24 25
## 6 BH584 2017-09-25 28
plot_bacc2017_daily_switches <- plot_daily_switches(
data = bacc_2017_switches,
y_var = "daily_switches",
y_label = "Sum of Switches",
plot_title = "Daily Sum of Switches between Wet (wet>0) and Dry (wet=0) States for Baccalieu Island (2017-2018)")Longest Wet/Dry Periods
bacc_2017_longest_prds <- deg_longest_prds %>%
filter(colony == "Baccalieu - Ned Walsh",
deployment_period == "2017-2018") %>%
select(bird_id,
longest_dry_time,
longest_dry_start,
longest_dry_end,
longest_wet_time,
longest_wet_start,
longest_wet_end)plot_bacc_2017_longest_prds <- plot_longest_prds(
data = bacc_2017_longest_prds,
plot_title = "Longest Wet and Dry Periods - Baccalieu Island 2017-2018")## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
Total Wet/Dry
bacc_2017_total_wetdry <- deg_total_wetdry %>%
filter(colony == "Baccalieu - Ned Walsh",
deployment_period == "2017-2018") %>%
select(bird_id,
total_time_wet,
total_time_dry)
plot_bacc_2017_total_times <- plot_total_times(bacc_2017_total_wetdry, "Total Wet and Dry Times (Baccalieu Island, 2017-2018)")8.2 Kent Island 2018-2019
Let’s visualize the switches/proportions for 2018-2019 files from Kent’s Island (10 files total). Make sure you clear out your environment so you don’t accidentally run this on Baccalieu-related objects or other sites you may have tried out.
8.2.1 Running plotting functions
Let’s start by subsetting the metadata file so we can look at Kent’s Island
## # A tibble: 7 × 8
## deployment_period `Baccalieu - Ned Walsh` `Bon Portage` Country Gull Kent
## <chr> <int> <int> <int> <int> <int>
## 1 2017-2018 6 0 0 0 0
## 2 2018-2019 0 5 6 10 10
## 3 2019-2020 10 17 17 23 16
## 4 2021-2022 20 0 16 16 27
## 5 2022-2023 7 13 0 11 0
## 6 Other 0 0 0 2 0
## 7 TOTAL 43 35 39 62 53
## # ℹ 2 more variables: `Middle Lawn` <int>, TOTAL <dbl>
Let’s use the 10 files from 2018-2019.
Proportions:
#subset the dataframe so we have less data to work with
kent_2018_props <- deg_props_2hrs %>%
filter(colony == "Kent",
deployment_period == "2018-2019") %>%
select(bird_id, date, start_time, end_time, prop_wet)
kent_2018_props## # A tibble: 38,209 × 5
## bird_id date start_time end_time prop_wet
## <chr> <date> <chr> <chr> <dbl>
## 1 BP488 2018-09-12 00:00:00 01:59:59 0
## 2 BP488 2018-09-12 02:00:00 03:59:59 0
## 3 BP488 2018-09-12 04:00:00 05:59:59 0
## 4 BP488 2018-09-12 06:00:00 07:59:59 0
## 5 BP488 2018-09-12 08:00:00 09:59:59 0
## 6 BP488 2018-09-12 10:00:00 11:59:59 0
## 7 BP488 2018-09-12 12:00:00 13:59:59 0
## 8 BP488 2018-09-12 14:00:00 15:59:59 0
## 9 BP488 2018-09-12 16:00:00 17:59:59 0
## 10 BP488 2018-09-12 18:00:00 19:59:59 0
## # ℹ 38,199 more rows
plot_kent2018_props <- plot_props(data = kent_2018_props,
y_var = "prop_wet",
y_label = "Proportion",
plot_title = "Proportion of Wet Time Across Deployments Per 2hr bins) - (Kent Island 2018-2019)")Daily Proportions:
kent_2018_daily_props <- kent_2018_props %>%
group_by(bird_id, date) %>%
summarize(daily_prop_wet = mean(prop_wet, na.rm = TRUE), .groups = "drop")
plot_kent2018_daily_props <- plot_daily_props(data = kent_2018_daily_props,
y_var = "daily_prop_wet",
y_label = "Proportion",
plot_title = "Mean Proportion of Wet Periods (2hr bins) Per Day (Kent Island 2018-2019)")Daily Switches:
#subset the dataframe so we have less data to work with
kent_2018_switches <- daily_switches %>%
filter(colony == "Kent",
deployment_period == "2018-2019") %>%
select(bird_id, date, daily_switches)
head(kent_2018_switches)## # A tibble: 6 × 3
## bird_id date daily_switches
## <chr> <date> <dbl>
## 1 BP488 2018-09-12 0
## 2 BP488 2018-09-13 36
## 3 BP488 2018-09-14 25
## 4 BP488 2018-09-15 39
## 5 BP488 2018-09-16 36
## 6 BP488 2018-09-17 38
plot_kent2018_daily_switches <- plot_daily_switches(
data = kent_2018_switches,
y_var = "daily_switches",
y_label = "Sum of Switches",
plot_title = "Daily Sum of Switches between Wet (wet>0) and Dry (wet=0) States for Kent Island 2018-2019")Longest Wet/Dry Periods
kent_2018_longest_prds <- deg_longest_prds %>%
filter(colony == "Kent",
deployment_period == "2018-2019") %>%
select(bird_id,
longest_dry_time,
longest_dry_start,
longest_dry_end,
longest_wet_time,
longest_wet_start,
longest_wet_end)plot_kent_2018_longest_prds <- plot_longest_prds(
data = kent_2018_longest_prds,
plot_title = "Longest Wet and Dry Periods for Kent Island 2018-2019")Total Wet/Dry
8.3 Random subset
Now let’s visualize the switches/proportions for 10 random files Make sure you clear out your environment so you don’t accidentally run this on Baccalieu- or Kent-related objects or other sites you may have tried out.
rm(list = ls(pattern = "^kent"), envir = .GlobalEnv)
rm(list = ls(pattern = "^bacc"), envir = .GlobalEnv)8.3.1 Running plotting functions
Let’s start by subsetting the metadata file so we can look at 10 files from any random year and place.
random_bird_ids <- metrics_md %>%
distinct(bird_id) %>%
sample_n(10) %>%
pull(bird_id)
random_bird_ids## [1] "CD606" "CH713" "CD489" "BU831" "CH023" "CD474" "BU468" "CD454" "CH789"
## [10] "BU752"
Proportions:
#subset the dataframe so we have less data to work with
random_props <- deg_props_2hrs %>%
filter(bird_id %in% random_bird_ids) %>%
select(bird_id, colony, date, start_time, end_time, prop_wet)
#make sure it filtered the same random files properly
print(unique(random_props$bird_id))## [1] "BU468" "BU752" "BU831" "CD454" "CD474" "CD489" "CD606" "CH023" "CH713"
## [10] "CH789"
plot_random_props <- plot_props(data = random_props,
y_var = "prop_wet",
y_label = "Proportion",
plot_title = "Proportion of Wet Time Across Deployments Per 2hr bins) - (10 random files)")Daily Proportions:
random_daily_props <- random_props %>%
group_by(bird_id, date) %>%
summarize(daily_prop_wet = mean(prop_wet, na.rm = TRUE), .groups = "drop")
plot_random_daily_props <- plot_daily_props(data = random_daily_props,
y_var = "daily_prop_wet",
y_label = "Proportion",
plot_title = "Mean Proportion of Wet Periods (2hr bins) Per Day (10 random files)")Daily Switches:
#subset the dataframe so we have less data to work with
random_switches <- daily_switches %>%
filter(bird_id %in% random_bird_ids) %>%
select(bird_id, date, daily_switches)
#make sure it filtered the same random files
print(unique(random_switches$bird_id))## [1] "BU468" "BU752" "BU831" "CD454" "CD474" "CD489" "CD606" "CH023" "CH713"
## [10] "CH789"
plot_random_daily_switches <- plot_daily_switches(
data = random_switches,
y_var = "daily_switches",
y_label = "Sum of Switches",
plot_title = "Daily Sum of Switches between Wet (wet>0) and Dry (wet=0) States for 10 Random Files")Longest Wet/Dry Periods
random_longest_prds <- deg_longest_prds %>%
filter(bird_id %in% random_bird_ids) %>%
select(bird_id,
longest_dry_time,
longest_dry_start,
longest_dry_end,
longest_wet_time,
longest_wet_start,
longest_wet_end)plot_random_longest_prds <- plot_longest_prds(
data = random_longest_prds,
plot_title = "Longest Wet and Dry Periods for 10 Random Files")Total Wet/Dry