4 Longest Wet/Dry Periods

•   Longest continuous time dry (longest number of consecutive rows with wets==0  * 10min)
•   Longest continuous time wet (longest number of consecutive rows with wets>=1 * 10min)

Here I use rle() (run length encoding) to find consecutive runs of vals above 0 or equal to 20.

This produces a vector values made up of TRUE/FALSE vals, and lengths which tracks the number of TRUE/FALSE vals occurring in a row.

example // vec <- c(TRUE, TRUE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, FALSE) len = c(2,3,3,1)

Without looking at when the periods are occurring

calc_longest_prds <- function(file_path) {
  
  deg_data <- read_csv(file_path, show_col_types = FALSE)
  
  # define bird IDs
  bird_id <- str_extract(basename(file_path), "(?<=filtered_)[A-Z-0-9]{5}")

  #find the longest period
  longest_run <- function(condition) {
    rle_data <- rle(condition)    #run length encode values and lengths for condition
    #max run length for runs
   max(rle_data$lengths[rle_data$values], na.rm = TRUE)}
  
  #of dry times
  longest_dry_intervals <- longest_run(deg_data$`wets0-20` == 0)
  longest_dry_time <- longest_dry_intervals * 10 #convert to mins
  
  #of wet times
  longest_wet_intervals <- longest_run(deg_data$`wets0-20` >= 1)
  longest_wet_time <- longest_wet_intervals * 10 #convert to mins
  
  #return results 
  tibble(
    bird_id=bird_id,
    longest_dry_time = longest_dry_time,
    longest_wet_time = longest_wet_time)
}

#define deg files to use
deg_files <- list.files(output.dir, pattern = "*.deg", full.names = TRUE) #add if you want to subset

plan(multisession, workers = 4)

# run on gull 2017-2018 files
longest_prds <- future_map_dfr(deg_files, calc_longest_prds)


longest_prds

## # A tibble: 298 × 3
##    bird_id longest_dry_time longest_wet_time
##    <chr>              <dbl>            <dbl>
##  1 BH584               4550             1140
##  2 BH594               5690              840
##  3 BH595               5800             2600
##  4 BH596               4580              920
##  5 BH602               6060             1150
##  6 BH603               7520             2300
##  7 BP488               1920             1410
##  8 BP490               4630             5800
##  9 BP491               6040             1660
## 10 BP492               7060             3780
## # ℹ 288 more rows

If we want to know when the periods are occurring

find_longest_prds <- function(file_path) {
  
  deg_data <- read_csv(file_path, show_col_types = FALSE)
  
  # define bird IDs
  bird_id <- str_extract(basename(file_path), "(?<=filtered_)[A-Z-0-9]{5}")
  
  deg_data <- deg_data %>%
    mutate(datetime = as.POSIXct(paste(date, time), format = "%Y-%m-%d %H:%M:%S"))
  
  # Find the longest run and its start/end times
  longest_run <- function(condition) {
    rle_data <- rle(condition)  # Run-length encode the condition
    max_length <- max(rle_data$lengths[rle_data$values], na.rm = TRUE)  # Max run length
    start_index <- which.max(rle_data$lengths * rle_data$values)  # Index of the max run
    start_row <- cumsum(rle_data$lengths)[start_index - 1] + 1  # First row of the run
    end_row <- start_row + max_length - 1  # Last row of the run
    list(length = max_length, start = start_row, end = end_row)
  }
  
  # Dry times
  dry_run <- longest_run(deg_data$`wets0-20` == 0)
  longest_dry_time <- dry_run$length * 10  # Convert to minutes
  longest_dry_start <- deg_data$datetime[dry_run$start]
  longest_dry_end <- deg_data$datetime[dry_run$end]
  
  # Wet times
  wet_run <- longest_run(deg_data$`wets0-20` >= 1)
  longest_wet_time <- wet_run$length * 10  # Convert to minutes
  longest_wet_start <- deg_data$datetime[wet_run$start]
  longest_wet_end <- deg_data$datetime[wet_run$end]
  
  # Return results
  tibble(
    bird_id = bird_id,
    longest_dry_time = longest_dry_time,
    longest_dry_start = longest_dry_start,
    longest_dry_end = longest_dry_end,
    longest_wet_time = longest_wet_time,
    longest_wet_start = longest_wet_start,
    longest_wet_end = longest_wet_end
  )
}

#define deg files to use
deg_files <- list.files(output.dir, pattern = "*.deg", full.names = TRUE) #add if you want to subset

plan(multisession, workers = 4)

# run on all deg files
deg_longest_prds_time <- future_map_dfr(deg_files, find_longest_prds, .progress = TRUE)

deg_longest_prds <- deg_longest_prds_time %>%
  left_join(metrics_md %>% select(bird_id, deployment_period, colony), by = "bird_id") %>%
  mutate(
    longest_dry_start = as.POSIXct(longest_dry_start, format = "%Y-%m-%d %H:%M:%S", tz = "UTC"),
    longest_dry_end = as.POSIXct(longest_dry_end, format = "%Y-%m-%d %H:%M:%S", tz = "UTC"),
    longest_wet_start = as.POSIXct(longest_wet_start, format = "%Y-%m-%d %H:%M:%S", tz = "UTC"),
    longest_wet_end = as.POSIXct(longest_wet_end, format = "%Y-%m-%d %H:%M:%S", tz = "UTC")) %>%
      filter(deployment_period != "2023-2024")