5 Processing count results

For now, we are interested in calculating the following parameters for the count results:

The number of birds within every PPI pixel.
The average mass of the birds within every PPI pixel.

We can derive the total numbers of birds comparatively easily from the bird counts by Sovon, but to calculate the average mass of the birds we need to link a database of life-history traits. For the latter we first need to translate the vernacular (modern) names of bird species (used by Sovon) to the scientific ones, so we can link both.

5.1 Processing environment

library(rgbif)
library(stringr)
library(readxl)
library(dplyr)
library(tidyr)
library(readr)
library(kableExtra)

5.2 Loading the Sovon count data

The count data is spread over a few sheets in an xlsx file, which we load here. For clarity, we rename all the columns to English and we filter out all counts (i.e. areas) where birds are not positively identified to species level. Although it would be possible to ‘fill’ these uncertain counts based on proportions, determining how to do that is not necessary for our purposes. Instead, we will just remove these counts altogether. Finally, subspecies identifiers for these species are removed, as we assume variation is limited and the database of life-history traits does not contain parameters for subspecies separately.

sovon_data <- "data/raw/sovon/tel_dec_jan_1718.xlsx"

data <- data.frame()
sheets <- excel_sheets(sovon_data)
sheets <- sheets[-c(1, 5)]  # Sheet 1 and 5 contain PTT and roost counts respectively, so we ignore these for now, as they have to be processed differently

# Explicit column types to suppress warnings thrown because of lacking euring codes for records with no birds
coltypes <- c("numeric", "text", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "text", "numeric", "numeric", "numeric")

for (i in seq_along(sheets)) {
  data <- rbind(data, read_excel(sovon_data, sheet = sheets[i], col_types = coltypes))
}

data %>%
  drop_na() %>%  # A few rows somehow contain no birds or species 'geen vogels'
  rename(count_id = TELLING_ID, area_nr = GEBIEDSCODE, year = JAAR, month = MAAND, day = DAG, start_time = BEGINTIJD, end_time = EINDTIJD,
         "euring" = "EURING", species = SOORT, number = Aantal, xcoor = XCOOR, ycoor = YCOOR) %>%
  group_by(area_nr) %>%
  filter(!any(str_ends(species, "spec."))) %>%  # Filter out all counts with unidentified birds
  filter(!any(length(str_subset(species, " of ")) > 0)) %>%  # Filter out all counts with either/or totals
  filter(!any(str_starts(species, "hybride"))) %>%  # Filter out all counts with hybrids
  ungroup() %>%
  rowwise() %>%
  mutate(species = str_split(species, "\\(")[[1]][1] %>% str_trim()) -> wb_data  # And remove all subspecies identifications

head(wb_data, 10) %>%
  kable(format = "html", col.names = colnames(wb_data)) %>%
  kable_styling() %>%
  kableExtra::scroll_box(width = "100%", height = "400px")

count_id	area_nr	year	month	day	start_time	end_time	euring	species	number	xcoor	ycoor
1060908	BR1111	2017	12	19	1045	1108	1610	Grauwe Gans	280	127723	426233
1060908	BR1111	2017	12	19	1045	1108	1700	Nijlgans	2	127723	426233
1060909	BR1112	2017	12	19	1108	1126	1610	Grauwe Gans	76	125082	426701
1060909	BR1112	2017	12	19	1108	1126	1661	Grote Canadese Gans	50	125082	426701
1060909	BR1112	2017	12	19	1108	1126	1700	Nijlgans	22	125082	426701
1060906	BR1122	2017	12	19	1020	1037	1610	Grauwe Gans	250	125313	426061
1060906	BR1122	2017	12	19	1020	1037	1619	Soepgans	5	125313	426061
1060906	BR1122	2017	12	19	1020	1037	1661	Grote Canadese Gans	35	125313	426061
1060911	BR1130	2017	12	19	1145	1200	1610	Grauwe Gans	200	121962	426420
1060911	BR1130	2017	12	19	1145	1200	1661	Grote Canadese Gans	40	121962	426420

Following this logic, we can process the PTT counts similarly and see which species are contained in those.

read_excel(sovon_data, sheet = 1) %>%
  rename(count_id = tellingid, route = route, count_point = telpunt, season = seizoen, year = teljaar, month = maand, 
         day = dag, euring = euring, species = soort, number = aantal, xcoor = xcoor, ycoor = ycoor) %>%
  group_by(route, count_point) %>%
  filter(!any(str_ends(species, "spec."))) %>%  # Filter out all counts with unidentified birds
  filter(!any(length(str_subset(species, " of ")) > 0)) %>%  # Filter out all counts with either/or totals
  filter(!any(str_starts(species, "hybride"))) %>%  # Filter out all counts with hybrids
  ungroup() %>%
  rowwise() %>%
  mutate(species = str_split(species, "\\(")[[1]][1] %>% str_trim()) -> ptt_data  # And remove all subspecies identifications

head(ptt_data, 10) %>%
  kable(format = "html", col.names = colnames(ptt_data)) %>%
  kable_styling() %>%
  kableExtra::scroll_box(width = "100%", height = "400px")

count_id	route	count_point	season	year	month	day	euring	species	number	xcoor	ycoor
80917	4	1	2017	2017	12	23	720	Aalscholver	2	246342	520763
80917	4	1	2017	2017	12	23	5920	Zilvermeeuw	18	246342	520763
80917	4	1	2017	2017	12	23	6700	Houtduif	1	246342	520763
80917	4	1	2017	2017	12	23	11870	Merel	4	246342	520763
80917	4	1	2017	2017	12	23	15630	Roek	24	246342	520763
80917	4	2	2017	2017	12	23	6700	Houtduif	1	246357	522178
80917	4	2	2017	2017	12	23	11870	Merel	1	246357	522178
80917	4	2	2017	2017	12	23	15600	Kauw	6	246357	522178
80917	4	2	2017	2017	12	23	15630	Roek	78	246357	522178
80917	4	3	2017	2017	12	23	14620	Pimpelmees	2	246692	523139

5.3 Filtering/preprocessing species names

The following section is the result of an iterative process aimed at matching species in our count data with species in the database of life-history characteristics. Unfortunately, automatic tools only get us so far, a bit of tweaking has to be done by hand. To reduce manual corrections needed, only species to which >1% of the birds belong in a single count have been corrected manually. In other words: if a species which cannot be matched always accounts for less than 1% of the total number of birds within a count, this species is discarded from the whole dataset.

Besides the 1% criteria, both the waterbird and PTT counts contain some exotics (e.g. ‘Helmparelhoen’/Helmeted guineafowl) for which our life-history characteristics dataset does not contain any measurements anyways (there are others that we do have measurements of), some species that are very unlikely to ever take flight during NYE (e.g. ‘Kip’/Domesticated chicken) and some mammals for which the same applies. We will remove these from the dataset manually.

exotics <- c("Helmparelhoen", "Kaapse Casarca", "Manengans", "Ringtaling", "Buffelkopeend", "Kokardezaagbek", "Chileense Flamingo",
             "Kaapse Taling", "Bahamapijlstaart", "Muskuseend", "Zwarte Zwaan", "Knobbelgans", "Zwaangans")
unlikely_flight_candidate <- c("Kip")
mammals <- c("Damhert", "Haas", "Ree", "Bever", "Bruine Rat", "Muskusrat", "Mol", "Vos", "Kat", "Otter", "Grijze Zeehond", "Konijn",
             "Eekhoorn", "Edelhert", "Gewone Zeehond", "Wild Zwijn", "Steenmarter", "Moeflon")
input_error <- c("Steltstrandloper")
remove_species <- c(exotics, unlikely_flight_candidate, mammals, input_error)

ptt_data %>%
  filter(!species %in% remove_species) -> ptt_data

wb_data %>%
  filter(!species %in% remove_species) -> wb_data

With all these species removed or adjusted, we can create a species lookup table. We fetch the scientific names and corresponding GBIF species IDs from the Checklist Dutch Species Register, which is the GBIF dataset with key 4dd32523-a3a3-43b7-84df-4cda02f15cf7. We furthermore remove all unnecessary information, such as subspecies from the scientific names as well.

unique_species <- unique(c(wb_data$species, ptt_data$species))

build_species_lut <- function(specieslist, datasetKey = NULL, class_name = NULL) {
  # As this function can possibly return many different records, we pick the scientific name and GBIF ID (nubKey) that are the most
  # common in the returned results. This should most often result in an OK result of the name lookup function.
  Mode <- function(x) {
    ux <- unique(na.omit(x))
    ux[which.max(tabulate(match(x, ux)))]
  }
  
  n <- length(specieslist)
  
  species_lut <- data.frame(lookupname = character(n), 
                            scientificname = character(n), 
                            gbif_key = numeric(n), stringsAsFactors = FALSE)
  
  # Somehow the higherTaxonKey changes regularly, so we have to query this first
  higherTaxonKey <- NULL
  if (!is.null(class_name)) {
    class_record <- name_lookup(class_name, datasetKey = datasetKey)
    higherTaxonKey <- class_record$data$classKey
  }
  
  for(i in seq_along(specieslist)) {
    gbif_data <- tryCatch({
      gbif <- name_lookup(specieslist[i], 
                          datasetKey = datasetKey, 
                          higherTaxonKey = higherTaxonKey, 
                          return = "data")
      list(paste(str_split(Mode(gbif$data$scientificName), pattern = " ")[[1]][1:2], collapse = " "), Mode(gbif$data$nubKey))
    }, error = function(e) {
      list("", NaN)
    })
    
    species_lut[i, ] <- c(specieslist[i], gbif_data[1], as.numeric(as.character(gbif_data[2])))
  }
  
  return(species_lut)
}

species_lut <- build_species_lut(unique_species, datasetKey = "4dd32523-a3a3-43b7-84df-4cda02f15cf7", class_name = "Aves")

## Warning: `return` param in `name_lookup` function is defunct as of rgbif v3.0.0, and is ignored
## See `?rgbif` for more information.
## This warning will be thrown once per R session.

## Warning: Unknown or uninitialised column: `scientificName`.

head(species_lut, 10) %>%
  kable(format = "html", col.names = colnames(species_lut)) %>%
  kable_styling() %>%
  kableExtra::scroll_box(width = "100%", height = "400px")

lookupname	scientificname	gbif_key
Grauwe Gans	Anser anser	6178331
Nijlgans	Alopochen aegyptiaca	2498252
Grote Canadese Gans	Branta canadensis	5232437
Soepgans	Anser anser	9384117
Brandgans	Branta leucopsis	5232464
Knobbelzwaan	Cygnus olor	2498343
Kolgans	Anser albifrons	2498017
Canadese Gans	Branta canadensis	5232437
Toendrarietgans	Anser serrirostris	6085485
Kleine Zwaan	Cygnus bewickii	4409105

Finally, the lookup table also contains some scientific names which unfortunately will not match with the life-history characteristics dataset, so these too we will adjust manually.

# Change to similar species which is in the LHT database
species_lut[species_lut$lookupname == "Toendrarietgans", "scientificname"] <- "Anser fabalis"  # Taiga Bean Goose
species_lut[species_lut$lookupname == "Kleine Canadese Gans", "scientificname"] <- "Branta leucopsis"  # Barnacle Goose
species_lut[species_lut$lookupname == "Kleine Barmsijs", "scientificname"] <- "Acanthis flammea"  # Redpoll
species_lut[species_lut$lookupname == "Pontische Meeuw", "scientificname"] <- "Larus argentatus"  # Herring Gull
species_lut[species_lut$lookupname == "Indische Gans", "scientificname"] <- "Anser albifrons"  # Greater White-fronted Goose
species_lut[species_lut$lookupname == "Canadese Gans", "scientificname"] <- "Branta canadensis"  # Canada Goose
species_lut[species_lut$lookupname == "Topper", "scientificname"] <- "Aythya marila"  # Greater Scaup
species_lut[species_lut$lookupname == "Tafeleend", "scientificname"] <- "Aythya ferina"  # Greater Scaup

# Change scientific name for same species to match with the LHT database
species_lut[species_lut$lookupname == "Kleine Zwaan", "scientificname"] <- "Cygnus columbianus"  # Bewick's Swan
species_lut[species_lut$lookupname == "Kokmeeuw", "scientificname"] <- "Larus ridibundus"  # Black-headed Gull
species_lut[species_lut$lookupname == "Smient", "scientificname"] <- "Mareca penelope"  # Wigeon
species_lut[species_lut$lookupname == "Krakeend", "scientificname"] <- "Mareca strepera"  # Gadwall
species_lut[species_lut$lookupname == "Slobeend", "scientificname"] <- "Spatula clypeata"  # Northern Shoveler
species_lut[species_lut$lookupname == "Winterkoning", "scientificname"] <- "Troglodytes troglodytes"  # Wren
species_lut[species_lut$lookupname == "Grote Jager", "scientificname"] <- "Catharacta skua"  # Great Skua
species_lut[species_lut$lookupname == "Roodborsttapuit", "scientificname"] <- "Saxicola torquatus"  # Stonechat
species_lut[species_lut$lookupname == "Strandleeuwerik", "scientificname"] <- "Eremophila alpestris"  # Horned Lark

5.4 Adding vernacular family names

Fetching vernacular names of scientific families from GBIF is even more convoluted than the name lookup we have done above, so instead we have manually derived a list of vernacular names for families based on the generated species lookup table.

read_delim("data/raw/sovon/familyvernacular_lut.csv", delim = ";", col_types = cols(lookupname = col_character(), familyvernacular = col_character())) %>%
  right_join(species_lut, by = "lookupname") -> species_lut

5.5 Linking life-history traits to species

We use the Life-history characteristics of European birds-dataset (Storchová and Hořák 2018) to calculate the mean mass of all birds in a PPI pixel. This dataset is stored on Dryad and we can download it there. Unfortunately at the time of writing the rdryad package is severely out-of-date with the new Dryad API, so we cannot nicely automate this download yet. Anyways, the files should be downloaded manually and added to data/raw/life-history-characteristics/.

We calculate the mean of the mass of both sexed and unsexed birds and assume that represents the mean mass for a species.

read_tsv("data/raw/life-history-characteristics/Life-history characteristics of European birds.txt", 
         col_types = cols_only('Species' = col_character(), 'WeightU_MEAN' = col_double(), 'WeightM_MEAN' = col_double(), 'WeightF_MEAN' = col_double())) %>%
  rowwise %>%
  mutate(mean_weight = mean(c(WeightU_MEAN, WeightF_MEAN, WeightM_MEAN))) %>%
  dplyr::select(Species, mean_weight) %>%
  rename(species = Species) %>%
  filter(!any(str_ends(species, "ssp"))) %>%  # Filter out birds not identified to species
  rowwise() %>%
  mutate(species = paste(str_split(species, pattern = " ")[[1]][1:2], collapse = " ")) %>%
  ungroup() %>%
  group_by(species) %>%
  summarise(mean_weight = mean(mean_weight), .groups = "drop_last",
            mean_crs = mean_weight ^ (2/3)) %>%
  drop_na() -> lhc

lhc[lhc$species == "Aquila nipalenis", "species"] <- "Aquila nipalensis"  # Small error in dataset -> notified author

head(lhc, 10) %>%
  kable(format = "html", col.names = colnames(lhc)) %>%
  kable_styling() %>%
  kableExtra::scroll_box(width = "100%", height = "400px")

species	mean_weight	mean_crs
Acanthis flammea	13.40	5.641612
Accipiter brevipes	221.50	36.608547
Accipiter gentilis	931.50	95.379543
Accipiter nisus	204.00	34.654006
Acridotheres cristatellus	124.35	24.913258
Acridotheres tristis	124.35	24.913258
Acrocephalus agricola	10.50	4.795047
Acrocephalus arundinaceus	30.30	9.719153
Acrocephalus dumetorum	12.00	5.241483
Acrocephalus melanopogon	11.70	5.153757

Now we can try to link the names once again with what can be found in GBIF.

unique_species_lhc <- unique(lhc$species)

lhc_species_lut <- build_species_lut(unique_species_lhc, dataset = NULL)

head(lhc_species_lut, 10) %>%
  kable(format = "html", col.names = colnames(lhc_species_lut)) %>%
  kable_styling() %>%
  kableExtra::scroll_box(width = "100%", height = "400px")

lookupname	scientificname	gbif_key
Acanthis flammea	Acanthis flammea	5231630
Accipiter brevipes	Accipiter brevipes	2480578
Accipiter gentilis	Accipiter gentilis	2480589
Accipiter nisus	Accipiter nisus	2480637
Acridotheres cristatellus	Acridotheres cristatellus	2489010
Acridotheres tristis	Acridotheres tristis	2489005
Acrocephalus agricola	Acrocephalus agricola	2493137
Acrocephalus arundinaceus	Acrocephalus arundinaceus	2493128
Acrocephalus dumetorum	Acrocephalus dumetorum	2493145
Acrocephalus melanopogon	Acrocephalus melanopogon	2493124

With the GBIF IDs/keys in place for both the life-history characteristics, as well as the Sovon counts, we can now link the datasets together. We start with the waterbirds.

lhc %>%
  left_join(lhc_species_lut, by = c("species" = "scientificname")) %>%
  dplyr::select(species, mean_weight, mean_crs, gbif_key) -> lhc

wb_data %>%
  left_join(species_lut, by = c("species" = "lookupname")) %>%
  left_join(dplyr::select(lhc, mean_weight, mean_crs, gbif_key), by = c("gbif_key" = "gbif_key")) %>%
  left_join(dplyr::select(lhc, mean_weight, mean_crs, species), by = c("scientificname" = "species")) %>%
  dplyr::select(-c(mean_weight.x, mean_crs.x)) %>%
  rename(mean_weight = mean_weight.y, mean_crs = mean_crs.y) -> wb_data_lhc

head(wb_data_lhc, 10) %>%
  kable(format = "html", col.names = colnames(wb_data_lhc)) %>%
  kable_styling() %>%
  kableExtra::scroll_box(width = "100%", height = "400px")

count_id	area_nr	year	month	day	start_time	end_time	euring	species	number	xcoor	ycoor	familyvernacular	scientificname	gbif_key	mean_weight	mean_crs
1060908	BR1111	2017	12	19	1045	1108	1610	Grauwe Gans	280	127723	426233	Geese	Anser anser	6178331	3347	223.7538
1060908	BR1111	2017	12	19	1045	1108	1700	Nijlgans	2	127723	426233	Geese	Alopochen aegyptiaca	2498252	2270	172.7232
1060909	BR1112	2017	12	19	1108	1126	1610	Grauwe Gans	76	125082	426701	Geese	Anser anser	6178331	3347	223.7538
1060909	BR1112	2017	12	19	1108	1126	1661	Grote Canadese Gans	50	125082	426701	Geese	Branta canadensis	5232437	4635	277.9926
1060909	BR1112	2017	12	19	1108	1126	1700	Nijlgans	22	125082	426701	Geese	Alopochen aegyptiaca	2498252	2270	172.7232
1060906	BR1122	2017	12	19	1020	1037	1610	Grauwe Gans	250	125313	426061	Geese	Anser anser	6178331	3347	223.7538
1060906	BR1122	2017	12	19	1020	1037	1619	Soepgans	5	125313	426061	Geese	Anser anser	9384117	3347	223.7538
1060906	BR1122	2017	12	19	1020	1037	1661	Grote Canadese Gans	35	125313	426061	Geese	Branta canadensis	5232437	4635	277.9926
1060911	BR1130	2017	12	19	1145	1200	1610	Grauwe Gans	200	121962	426420	Geese	Anser anser	6178331	3347	223.7538
1060911	BR1130	2017	12	19	1145	1200	1661	Grote Canadese Gans	40	121962	426420	Geese	Branta canadensis	5232437	4635	277.9926

And then the PTT counts.

ptt_data %>%
  left_join(species_lut, by = c("species" = "lookupname")) %>%
  left_join(dplyr::select(lhc, mean_weight, mean_crs, gbif_key), by = c("gbif_key" = "gbif_key")) %>%
  left_join(dplyr::select(lhc, mean_weight, mean_crs, species), by = c("scientificname" = "species")) %>%
  dplyr::select(-c(mean_weight.x, mean_crs.x)) %>%
  rename(mean_weight = mean_weight.y, mean_crs = mean_crs.y) -> ptt_data_lhc

head(ptt_data_lhc, 10) %>%
  kable(format = "html", col.names = colnames(ptt_data_lhc)) %>%
  kable_styling() %>%
  kableExtra::scroll_box(width = "100%", height = "400px")

count_id	route	count_point	season	year	month	day	euring	species	number	xcoor	ycoor	familyvernacular	scientificname	gbif_key	mean_weight	mean_crs
80917	4	1	2017	2017	12	23	720	Aalscholver	2	246342	520763	Cormorants	Phalacrocorax carbo	2481890	2254.0	171.910581
80917	4	1	2017	2017	12	23	5920	Zilvermeeuw	18	246342	520763	Gulls	Larus argentatus	2481139	1054.0	103.568354
80917	4	1	2017	2017	12	23	6700	Houtduif	1	246342	520763	Pigeons	Columba palumbus	2495455	496.5	62.701727
80917	4	1	2017	2017	12	23	11870	Merel	4	246342	520763	Thrushes	Turdus merula	2490719	97.0	21.111276
80917	4	1	2017	2017	12	23	15630	Roek	24	246342	520763	Crows	Corvus frugilegus	2482513	459.0	59.503401
80917	4	2	2017	2017	12	23	6700	Houtduif	1	246357	522178	Pigeons	Columba palumbus	2495455	496.5	62.701727
80917	4	2	2017	2017	12	23	11870	Merel	1	246357	522178	Thrushes	Turdus merula	2490719	97.0	21.111276
80917	4	2	2017	2017	12	23	15600	Kauw	6	246357	522178	Crows	Corvus monedula	2482473	236.0	38.189268
80917	4	2	2017	2017	12	23	15630	Roek	78	246357	522178	Crows	Corvus frugilegus	2482513	459.0	59.503401
80917	4	3	2017	2017	12	23	14620	Pimpelmees	2	246692	523139	Tits	Cyanistes caeruleus	2487879	11.5	5.094856

Now we can verify if our 1% criteria (see above) for species matching is met. We do this by calculating the proportions of birds belonging to a certain species out of the total numbers of birds counted within a count. This should result in an empty dataframe, which will stop the code chunk below from running if that is not the case.

wb_data_lhc %>%
  as.data.frame() %>%
  group_by(count_id) %>%
  mutate(total_birds_count = sum(number)) %>%
  group_by(count_id, species) %>%
  mutate(proportion_species = sum(number) / total_birds_count) %>%
  ungroup() %>%
  arrange(count_id) %>%
  filter((is.na(mean_weight) & (proportion_species > 0.01))) -> wb_data_lhc_verify

stopifnot(nrow(wb_data_lhc_verify) == 0)
rm(wb_data_lhc_verify)

And once again, we can do the same for the PTT counts.

ptt_data_lhc %>%
  as.data.frame() %>%
  group_by(count_id) %>%
  mutate(total_birds_count = sum(number)) %>%
  group_by(count_id, species) %>%
  mutate(proportion_species = sum(number) / total_birds_count) %>%
  ungroup() %>%
  arrange(count_id) %>%
  filter((is.na(mean_weight) & (proportion_species > 0.01))) -> ptt_data_lhc_verify

stopifnot(nrow(ptt_data_lhc_verify) == 0)
rm(ptt_data_lhc_verify)

With that out of the way we can finally remove the remaining empty rows and save the PTT and waterbird counts in their final form.

ptt_data_lhc %>%
  drop_na() -> ptt_data

wb_data_lhc %>%
  drop_na() -> wb_data

And we save the data for further use.

saveRDS(ptt_data, file = "data/processed/sovon/ptt.RDS")
saveRDS(wb_data, file = "data/processed/sovon/wb.RDS")

5.6 Calculating total bird biomass

While we are at it, we will already calculate the total biomass contained within each of the count areas, so we can then add these values to the PPIs through the IDs of the count areas/routes. We use the waterbird count from January 2018, and the point-transect counts from 2017, as these are the best in terms of coverage.

wb_year <- 2018
ptt_year <- 2017

wb_data %>%
  mutate(area_nr = as.character(area_nr)) %>%
  filter(year == wb_year) %>%
  rowwise() %>%
  mutate(specific_biomass = number * mean_weight,
         specific_crs = number * mean_crs) %>%
  ungroup() %>%
  group_by(area_nr, year) %>%
  summarise(total_biomass = sum(specific_biomass),
            total_crs = sum(specific_crs),
            weighted_mean_weight = weighted.mean(mean_weight, number),
            weighted_mean_crs = weighted.mean(mean_crs, number),
            total_birds = sum(number),
            .groups = "drop_last") %>%
  ungroup() %>%
  group_by(area_nr) %>%  # Below is only required if year-filter is NOT set, otherwise does nothing
  summarise(total_biomass = mean(total_biomass),
            total_crs = mean(total_crs),
            weighted_mean_weight = mean(weighted_mean_weight),
            weighted_mean_crs = mean(weighted_mean_crs),
            total_birds = mean(total_birds),
            .groups = "drop_last") %>%
  identity() -> wb_biomass

ptt_data %>%
  filter(year == ptt_year) %>%
  rowwise() %>%
  mutate(specific_biomass = number * mean_weight,
         specific_crs = number * mean_crs) %>%
  ungroup() %>%
  group_by(route, year) %>%
  summarise(total_biomass = sum(specific_biomass),
            total_crs = sum(specific_crs),
            weighted_mean_weight = weighted.mean(mean_weight, number),
            weighted_mean_crs = weighted.mean(mean_crs, number),
            total_birds = sum(number),
            .groups = "drop_last") %>%
  ungroup() %>%
  group_by(route) %>%  # Below is only required if year-filter is NOT set, otherwise does nothing
  summarise(total_biomass = mean(total_biomass),
            total_crs = mean(total_crs),
            weighted_mean_weight = mean(weighted_mean_weight),
            weighted_mean_crs = mean(weighted_mean_crs),
            total_birds = mean(total_birds),
            .groups = "drop_last") %>%
  identity() -> ptt_biomass

And now we will save these datasets of total_biomass as well.

saveRDS(wb_biomass, file = "data/processed/sovon/wb_biomass.RDS")
saveRDS(ptt_biomass, file = "data/processed/sovon/ptt_biomass.RDS")

5.7 Calculating proportions across bird families

Now that we have identified which families birds belong to, we can calculate the proportion of birds that belong to these families across all the PTT and waterbird counts. This can give an idea of the spatial composition of bird communities to illustrate analysis results with.

wb <- wb_data
ptt <- ptt_data
families <- unique(species_lut$familyvernacular)

wb %>%
  mutate(area_nr = as.character(area_nr)) %>%
  filter(year == wb_year) %>%
  group_by(area_nr, year) %>%
  mutate(total_birds = sum(number)) %>%
  ungroup() %>%
  group_by(area_nr, year, familyvernacular) %>%
  mutate(familyprop = sum(number) / total_birds) %>%
  ungroup() %>%
  dplyr::select(area_nr, familyvernacular, familyprop, total_birds) %>%
  distinct() %>%
  arrange(area_nr) %>%
  pivot_wider(names_from = familyvernacular, values_from = familyprop) %>%
  replace(is.na(.), 0) %>%
  identity() -> wb_props

ptt %>%
  filter(year == ptt_year) %>%
  group_by(route, year) %>%
  mutate(total_birds = sum(number)) %>%
  ungroup() %>%
  group_by(route, year, familyvernacular) %>%
  mutate(familyprop = sum(number) / total_birds) %>%
  ungroup() %>%
  dplyr::select(route, familyvernacular, familyprop, total_birds) %>%
  distinct() %>%
  arrange(route) %>%
  pivot_wider(names_from = familyvernacular, values_from = familyprop) %>%
  replace(is.na(.), 0) %>%
  identity() -> ptt_props

And now we will save these datasets of family proportions as well.

saveRDS(wb_props, file = "data/processed/sovon/wb_props.RDS")
saveRDS(ptt_props, file = "data/processed/sovon/ptt_props.RDS")

5.8 Export lookup table characteristic groups

We store a lookup table of characteristic groups and scientific names as .csv for a WebTable in the paper.

ptt %>%
  dplyr::select(species, scientificname, familyvernacular, mean_weight, mean_rcs = mean_crs) %>%
  distinct(scientificname, .keep_all = TRUE) %>%
  group_by(familyvernacular) %>%
  mutate(family_mean_weight = mean(mean_weight)) %>%
  ungroup() %>%
  arrange(desc(family_mean_weight), desc(mean_weight)) %>%
  write.csv("data/processed/sovon/lut_species_family.csv")

5.9 Summary statistics for WebPanel

Number of PTT counts included

ptt %>%
  distinct(count_id) %>%
  nrow()

## [1] 593

Number of waterbird counts included

wb %>%
  distinct(area_nr) %>%
  nrow()

## [1] 3472

4 Annotating count areas

6 Apply processing to all PPIs