4 Longest Wet/Dry Periods
• Longest continuous time dry (longest number of consecutive rows with wets==0 * 10min)
• Longest continuous time wet (longest number of consecutive rows with wets>=1 * 10min)
Here I use rle() (run length encoding) to find consecutive runs of vals above 0 or equal to 20.
This produces a vector values made up of TRUE/FALSE vals, and lengths which tracks the number of TRUE/FALSE vals occurring in a row.
example // vec <- c(TRUE, TRUE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, FALSE) len = c(2,3,3,1)
Without looking at when the periods are occurring
calc_longest_prds <- function(file_path) {
deg_data <- read_csv(file_path, show_col_types = FALSE)
# define bird IDs
bird_id <- str_extract(basename(file_path), "(?<=filtered_)[A-Z-0-9]{5}")
#find the longest period
longest_run <- function(condition) {
rle_data <- rle(condition) #run length encode values and lengths for condition
#max run length for runs
max(rle_data$lengths[rle_data$values], na.rm = TRUE)}
#of dry times
longest_dry_intervals <- longest_run(deg_data$`wets0-20` == 0)
longest_dry_time <- longest_dry_intervals * 10 #convert to mins
#of wet times
longest_wet_intervals <- longest_run(deg_data$`wets0-20` >= 1)
longest_wet_time <- longest_wet_intervals * 10 #convert to mins
#return results
tibble(
bird_id=bird_id,
longest_dry_time = longest_dry_time,
longest_wet_time = longest_wet_time)
}#define deg files to use
deg_files <- list.files(output.dir, pattern = "*.deg", full.names = TRUE) #add if you want to subset
plan(multisession, workers = 4)
# run on gull 2017-2018 files
longest_prds <- future_map_dfr(deg_files, calc_longest_prds)
longest_prds## # A tibble: 298 × 3
## bird_id longest_dry_time longest_wet_time
## <chr> <dbl> <dbl>
## 1 BH584 4550 1140
## 2 BH594 5690 840
## 3 BH595 5800 2600
## 4 BH596 4580 920
## 5 BH602 6060 1150
## 6 BH603 7520 2300
## 7 BP488 1920 1410
## 8 BP490 4630 5800
## 9 BP491 6040 1660
## 10 BP492 7060 3780
## # ℹ 288 more rows
If we want to know when the periods are occurring
find_longest_prds <- function(file_path) {
deg_data <- read_csv(file_path, show_col_types = FALSE)
# define bird IDs
bird_id <- str_extract(basename(file_path), "(?<=filtered_)[A-Z-0-9]{5}")
deg_data <- deg_data %>%
mutate(datetime = as.POSIXct(paste(date, time), format = "%Y-%m-%d %H:%M:%S"))
# Find the longest run and its start/end times
longest_run <- function(condition) {
rle_data <- rle(condition) # Run-length encode the condition
max_length <- max(rle_data$lengths[rle_data$values], na.rm = TRUE) # Max run length
start_index <- which.max(rle_data$lengths * rle_data$values) # Index of the max run
start_row <- cumsum(rle_data$lengths)[start_index - 1] + 1 # First row of the run
end_row <- start_row + max_length - 1 # Last row of the run
list(length = max_length, start = start_row, end = end_row)
}
# Dry times
dry_run <- longest_run(deg_data$`wets0-20` == 0)
longest_dry_time <- dry_run$length * 10 # Convert to minutes
longest_dry_start <- deg_data$datetime[dry_run$start]
longest_dry_end <- deg_data$datetime[dry_run$end]
# Wet times
wet_run <- longest_run(deg_data$`wets0-20` >= 1)
longest_wet_time <- wet_run$length * 10 # Convert to minutes
longest_wet_start <- deg_data$datetime[wet_run$start]
longest_wet_end <- deg_data$datetime[wet_run$end]
# Return results
tibble(
bird_id = bird_id,
longest_dry_time = longest_dry_time,
longest_dry_start = longest_dry_start,
longest_dry_end = longest_dry_end,
longest_wet_time = longest_wet_time,
longest_wet_start = longest_wet_start,
longest_wet_end = longest_wet_end
)
}#define deg files to use
deg_files <- list.files(output.dir, pattern = "*.deg", full.names = TRUE) #add if you want to subset
plan(multisession, workers = 4)
# run on all deg files
deg_longest_prds_time <- future_map_dfr(deg_files, find_longest_prds, .progress = TRUE)deg_longest_prds <- deg_longest_prds_time %>%
left_join(metrics_md %>% select(bird_id, deployment_period, colony), by = "bird_id") %>%
mutate(
longest_dry_start = as.POSIXct(longest_dry_start, format = "%Y-%m-%d %H:%M:%S", tz = "UTC"),
longest_dry_end = as.POSIXct(longest_dry_end, format = "%Y-%m-%d %H:%M:%S", tz = "UTC"),
longest_wet_start = as.POSIXct(longest_wet_start, format = "%Y-%m-%d %H:%M:%S", tz = "UTC"),
longest_wet_end = as.POSIXct(longest_wet_end, format = "%Y-%m-%d %H:%M:%S", tz = "UTC")) %>%
filter(deployment_period != "2023-2024")