12 Figure 2: Model output

We have now quantified uncertainty in our model using bootstrapping and can finally visualise the outcome.

12.1 Processing environment

Once again we have tweaked some of the mboost default functionality, in this case to facilitate plotting using ggplot2.

modelci <- readRDS("data/models/confints/final_modelci.RDS")
data_cleaned <- readRDS("data/models/data_cleaned.RDS")
baseline <- readRDS("data/processed/disturbance_baseline.RDS")
source("R/mboost_bootstrapped_quantiles.R")
source("R/plot.mboost_adjusted.R")

12.2 Variable importance

We start by calculating summary statistics of bootstrapped variable importance, as we’ll organise the final figure according to descending variable importance.

bootstrapped_varimp <- function(modelci, exclude = c("dist_radar", "acov"), round = 0) {
  ind_vis <- lapply(modelci$varimp, function(x) {
    x %>%
      filter(!variable %in% exclude) %>%
      mutate(vi = reduction / sum(reduction) * 100)
  })
  vis_out <- ind_vis[[1]] %>%
    dplyr::select(variable, vi) %>%
    mutate(variable = as.character(variable))
  
  agg_vis <- lapply(ind_vis[2:length(ind_vis)], function(x) {
    x %>%
      dplyr::select(vi)
  })
  
  vis_out[2:length(ind_vis)] <- bind_cols(agg_vis)
  vis_out
}

bvi_biol <- bootstrapped_varimp(modelci)
bvi_all <- bootstrapped_varimp(modelci, exclude = NULL)

And we can plot the distribution of these variable importance measures for each of the predictors, for a model containing all predictors and one containing just biologically-relevant predictors. For the latter we have rescaled variable importance back to 100% after removal of dist_radar.

bvi_biol %>%
  pivot_longer(!variable, names_to = "bootstrap", values_to = "vi") %>%
  ggplot(aes(x = vi, y = variable)) +
  stat_eye() +
  labs(x = "Variable importance (%)", y = "Predictor", title = "Biologically-relevant predictors")
bvi_all %>%
  pivot_longer(!variable, names_to = "bootstrap", values_to = "vi") %>%
  ggplot(aes(x = vi, y = variable)) +
  stat_eye() +
  labs(x = "Variable importance (%)", y = "Predictor", title = "All model predictors")

We can now also calculate the mean variable importance for each of the predictors to determine the rank in the final figure.

bvi_biol %>%
  pivot_longer(!variable, names_to = "bootstrap", values_to = "vi") %>%
  group_by(variable) %>%
  summarise(mean_vi = mean(vi), q025 = quantile(vi, probs = 0.025), q975 = quantile(vi, probs = 0.975), .groups = "drop_last") %>%
  mutate(mean_round = round(mean_vi, 0)) %>%
  arrange(desc(mean_vi)) -> bvi_biol

bvi_all %>%
  pivot_longer(!variable, names_to = "bootstrap", values_to = "vi") %>%
  group_by(variable) %>%
  summarise(mean_vi = mean(vi), q025 = quantile(vi, probs = 0.025), q975 = quantile(vi, probs = 0.975), .groups = "drop_last") %>%
  mutate(mean_round = round(mean_vi, 0)) %>%
  arrange(desc(mean_vi)) -> bvi_all

bvi_biol
bvi_all
variable mean_vi q025 q975 mean_round
agricultural 57.7548679 53.1343876 62.384789 58
dist_urban 14.7228878 11.7484591 17.788836 15
total_rcs 13.1193902 10.5275230 16.032063 13
semiopen 6.7201888 4.2070031 9.538973 7
forests 5.9381210 4.0156491 8.093302 6
wetlands 1.0527471 0.5537306 1.675548 1
waterbodies 0.6917972 0.2450107 1.231014 1
variable mean_vi q025 q975 mean_round
acov 57.2999093 55.5008294 59.0830247 57
dist_radar 22.4759546 20.9457733 24.0241596 22
agricultural 11.6829366 10.4330351 13.0415245 12
dist_urban 2.9768141 2.3492067 3.6369542 3
total_rcs 2.6524443 2.1348919 3.2529700 3
semiopen 1.3599282 0.8326605 1.9754669 1
forests 1.1994243 0.8164663 1.6287635 1
wetlands 0.2128418 0.1136479 0.3365340 0
waterbodies 0.1397470 0.0481859 0.2495291 0

12.3 Compare groups

There’s quite some overlap in the confidence intervals, so let’s see which variables actually have different variable importance.

bvi_all %>%
  pivot_longer(!variable, names_to = "bootstrap", values_to = "vi") -> all

summary(aov(vi ~ variable, data = all))
pttest <- pairwise.t.test(all$vi, all$variable)
write.matrix(pttest$p.value, file = "data/models/varimp.pttest.csv", sep = ",")
pttest
##             Df Sum Sq Mean Sq F value Pr(>F)    
## variable     8  11300  1412.5    2164 <2e-16 ***
## Residuals   27     18     0.7                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  all$vi and all$variable 
## 
##              acov    agricultural dist_radar dist_urban forests semiopen
## agricultural < 2e-16 -            -          -          -       -       
## dist_radar   < 2e-16 1.4e-15      -          -          -       -       
## dist_urban   < 2e-16 1.1e-13      < 2e-16    -          -       -       
## forests      < 2e-16 1.3e-15      < 2e-16    0.03822    -       -       
## semiopen     < 2e-16 1.5e-15      < 2e-16    0.06126    1.00000 -       
## total_rcs    < 2e-16 6.1e-14      < 2e-16    1.00000    0.08410 0.12804 
## waterbodies  < 2e-16 < 2e-16      < 2e-16    0.00041    0.38295 0.33673 
## wetlands     < 2e-16 < 2e-16      < 2e-16    0.00049    0.38295 0.35385 
##              total_rcs waterbodies
## agricultural -         -          
## dist_radar   -         -          
## dist_urban   -         -          
## forests      -         -          
## semiopen     -         -          
## total_rcs    -         -          
## waterbodies  0.00104   -          
## wetlands     0.00125   1.00000    
## 
## P value adjustment method: holm

Indeed, mean variable importance for dist_urban is not significantly different from total_rcs and the same applies to waterbodies and wetlands.

12.4 Visualise marginal effects

Now that we have calculated how the marginal effects should be ordered, we can generate the final plots.

12.4.1 Biologically-relevant marginal effects

confidence.intervals <- c(0.95, 0.8, 0.5)
colors <- c("urban" = "#94346E", 
            "agricultural" = "#73AF48", 
            "semiopen" = "#EDAD08", 
            "forests" = "#0F8554", 
            "wetlands" = "#38A6A5", 
            "waterbodies" = "#1D6996",
            "dist_urban" = "#94346E", 
            "total_rcs" = "#CC503E", 
            "dist_radar" = "#666666")
x_labels <- c("urban" = "Urban", 
              "agricultural" = "Agricultural", 
              "semiopen" = "Semi-open", 
              "forests" = "Forests", 
              "wetlands" = "Wetlands", 
              "waterbodies" = "Water bodies", 
              "dist_urban" = "Distance to fireworks (m)", 
              "total_rcs" = "Total RCS (g^(2/3))", 
              "dist_radar" = "Distance from radar (m)")
limit_quantiles <- c("dist_urban", "total_rcs")
manual_limits <- list("dist_urban" = c(min(data_cleaned$dist_urban), max(data_cleaned$dist_urban)),
                      "total_rcs" = c(0, quantile(data_cleaned$total_rcs, probs = 0.975)))
# ylims <- c(-1.8, 1.5) + modelci$model$offset
ylims <- c(-1, 1)

plot_mboost_pdp <- function(modelci, data, which, confints, colors = NULL, ylims = ylims, varimp = NULL, offset = FALSE) {
  # Extract bootstrapped quantiles
  quants <- t(mboost_bootstrapped_quantiles(modelci, confidence.intervals, which = which))
  if (offset) quants <- quants + modelci$model$offset
  bootstrapped_quantiles <- as.data.frame(quants)
  # bootstrapped_quantiles <- as.data.frame(t(mboost_bootstrapped_quantiles(modelci, confidence.intervals, which = which)))
  bootstrapped_quantiles$x <- modelci$data[modelci$model$which(which)][[1]][, 1]
  bootstrapped_quantiles$y <- plot.mboost_adjusted(modelci$model, which = which, newdata = modelci$data[[modelci$model$which(which)]])[[2]]
  if (offset) {
    bootstrapped_quantiles$y <- bootstrapped_quantiles$y + modelci$model$offset
    ylims <- ylims + modelci$model$offset
  }
  
  p <- ggplot(bootstrapped_quantiles)
  i <- 1
  sorted_confints <- sort(confints, decreasing = TRUE)
  
  # Add variable importance
  if (is.null(varimp)) {
    variable_importance <- function(model, which, exclude = c("dist_radar"), round = 0) {
      vi <- as.data.frame(varimp(model)) %>%
        filter(!variable %in% exclude)
      vi$vi <- vi$reduction / sum(vi$reduction) * 100
      vi %>%
        filter(variable == which) %>%
        dplyr::select(vi) %>%
        as.numeric() %>%
        round(round)
    }
    vi <- variable_importance(modelci$model, which)
  } else {
    vi <- varimp %>%
      filter(variable == which) %>%
      dplyr::select(mean_round)
  }
  
  offset_right <- 0.95
  offset_top <- 0.35
  if (which == "dist_urban") {
    p <- p +
      annotate("text", x = manual_limits$dist_urban[[2]] * offset_right, y = ylims[2] - offset_top, label = paste0(vi, "%"), hjust = 1, color = colors[which],
               fontface = "bold", size = 12, alpha = 0.5)
  } else if(which == "total_rcs") {
    p <- p +
      annotate("text", x = manual_limits$total_rcs[[2]] * offset_right, y = ylims[2] - offset_top, label = paste0(vi, "%"), hjust = 1, color = colors[which],
               fontface = "bold", size = 12, alpha = 0.5)
  } else {
    p <- p +
      annotate("text", x = 1 * offset_right, y = ylims[2] - offset_top, label = paste0(vi, "%"), hjust = 1, color = colors[which], 
               fontface = "bold", size = 12, alpha = 0.5)
  }
  
  # Add density
  if (which == "dist_urban") {
    dens <- density(data[, which], bw = 225, from = manual_limits$dist_urban[[1]], to = manual_limits$dist_urban[[2]])
  } else if (which == "total_rcs") {
    dens <- density(data[, which], bw = 2000, from = manual_limits$total_rcs[[1]], to = manual_limits$total_rcs[[2]])
  } else {
    dens <- density(data[, which], bw = 0.03556, from = 0, to = 1)
  }
  scale_lims <- c(ylims[1] + 0.05, ylims[2] - 0.05)
  dens$y <- scales::rescale(dens$y, to = scale_lims)
  dens <- as.data.frame(list(as.matrix(dens$x), as.matrix(dens$y)))
  colnames(dens) <- c("x", "y")
  p <- p +
    geom_line(aes(x = x, y = y), data =  dens, color = "grey50", lineend = "round", linetype = 3) +
    scale_y_continuous(sec.axis = sec_axis(~ ., name = "Predictor frequency", breaks = scale_lims, labels = c("min", "max")))
  
  ## Add horizontal line
  if (offset) {
    yintercept <- modelci$model$offset
  } else {
    yintercept <- 0
  }
  p <- p +
    geom_hline(yintercept = yintercept, col = "darkgrey")
  
  for (ci in sorted_confints) {
    alphas <- rev(factor(sorted_confints))
    ymax <- as.name(paste((1 - (1 - ci) / 2) * 100, "%", sep = ""))
    ymax <- enquo(ymax)
    ymin <- as.name(paste(((1 - ci) / 2) * 100, "%", sep = ""))
    ymin <- enquo(ymin)
    
    p <- p +
      geom_ribbon(aes(x = x, ymin = !!ymin, ymax = !!ymax, alpha = !!alphas[i]), fill = colors[which]) +
      geom_line(aes(x = x, y = y), color = colors[which], size = 0.75, lineend = "round")
    
    if (offset) {
      p <- p + scale_y_continuous(labels = function(x) {10^x})
    }

    i <- i + 1
  }
  
  p <- p +
    scale_alpha_discrete(range = c(0.2, 0.5)) +
    coord_cartesian(ylim = ylims, expand = FALSE) +
    guides(alpha = FALSE)
  
  if (which == "dist_urban") {
    xlabel <- x_labels[which]
    p <- p +
      coord_cartesian(xlim = manual_limits$dist_urban, ylim = ylims, expand = FALSE)
      # scale_x_continuous(breaks = c(0, 10000, 20000, 30000), labels = c(0, 10, 20, 30))
  } else if (which == "total_rcs") {
    xlabel <- x_labels[which]
    p <- p +
      coord_cartesian(xlim = manual_limits$total_rcs, ylim = ylims, expand = FALSE)
      # scale_x_continuous(breaks = c(250, 500, 750), labels = c(250*4, 500*4, 750*4))
  } else {
    xlabel <- paste("%", x_labels[which])
    p <- p + 
      scale_x_continuous(breaks = c(0, 0.25, 0.5, 0.75, 1), labels = c("0%", "25%", "50%", "75%", "100%"))
  }
  
  p <- p +
    theme_classic(base_size = 10) +
    labs(x = xlabel, y = expression(paste("Flight response ", log[10], "(VIR)"))) +
    theme(axis.line.y.left = element_line(color = colors[which], size = 0.5),
          axis.line.y.right = element_line(color = "grey"),
          axis.ticks.y.left = element_line(color = colors[which], size = 0.5),
          axis.ticks.y.right = element_line(color = "grey"),
          axis.title.y = element_text())
  
  if (which(varimp$variable == which) %% 2 == 0) {
    p <- p +
      theme(axis.title.y.left = element_blank())
  } else {
    p <- p +
      theme(axis.title.y.right = element_blank())
  }

  if (!which %in% limit_quantiles) {
    p <- p +
      theme(axis.title.x = element_text(vjust = 0.5))
  }

  p
}

predictors <- bvi_biol$variable
plots <- mapply(function(predictors) {plot_mboost_pdp(modelci, data_cleaned, which = predictors, confidence.intervals, colors, ylims, bvi_biol, 
                                                      offset = FALSE)}, 
                predictors = predictors,
                SIMPLIFY = FALSE)

p <- wrap_plots(plots, ncol = 2)

p

And we save it as a PDF so we can tweak the plot manually in Adobe Illustrator.

ggsave(filename = "data/plots/paper/fig2_biol.pdf", plot = p, width = 16, height = 20, dpi = 300, units = "cm")

12.5 All marginal effects

confidence.intervals <- c(0.95, 0.8, 0.5)
colors <- c("urban" = "#94346E", 
            "agricultural" = "#73AF48", 
            "semiopen" = "#EDAD08", 
            "forests" = "#0F8554", 
            "wetlands" = "#38A6A5", 
            "waterbodies" = "#1D6996",
            "dist_urban" = "#94346E", 
            "total_rcs" = "#CC503E", 
            "dist_radar" = "#666666",
            "acov" = "#666666")
x_labels <- c("urban" = "Urban", 
              "agricultural" = "Agricultural", 
              "semiopen" = "Semi-open", 
              "forests" = "Forests", 
              "wetlands" = "Wetlands", 
              "waterbodies" = "Water bodies", 
              "dist_urban" = "Distance to fireworks [m]", 
              "total_rcs" = "Total RCS [cm^2]", 
              "dist_radar" = "Distance from radar [m]",
              "acov" = "Residual autocovariate [cm^2]")
limit_quantiles <- c("dist_urban", "total_rcs")
manual_limits <- list("dist_urban" = c(min(data_cleaned$dist_urban), max(data_cleaned$dist_urban)),
                      "total_rcs" = c(0, max(data_cleaned$total_rcs)),
                      "dist_radar" = c(0, 66000))
# ylims <- c(-1.8, 1.5) + modelci$model$offset
ylims <- c(-1, 1)

plot_mboost_pdp <- function(modelci, data, which, confints, colors = NULL, ylims = ylims, varimp = NULL, offset = FALSE) {
  # Extract bootstrapped quantiles
  quants <- t(mboost_bootstrapped_quantiles(modelci, confidence.intervals, which = which))
  if (offset) quants <- quants + modelci$model$offset
  bootstrapped_quantiles <- as.data.frame(quants)
  # bootstrapped_quantiles <- as.data.frame(t(mboost_bootstrapped_quantiles(modelci, confidence.intervals, which = which)))
  bootstrapped_quantiles$x <- modelci$data[modelci$model$which(which)][[1]][, 1]
  bootstrapped_quantiles$y <- plot.mboost_adjusted(modelci$model, which = which, newdata = modelci$data[[modelci$model$which(which)]])[[2]]
  if (offset) {
    bootstrapped_quantiles$y <- bootstrapped_quantiles$y + modelci$model$offset
    ylims <- ylims + modelci$model$offset
  }
  
  p <- ggplot(bootstrapped_quantiles)
  i <- 1
  sorted_confints <- sort(confints, decreasing = TRUE)
  
  # Add variable importance
  if (is.null(varimp)) {
    variable_importance <- function(model, which, exclude = c("dist_radar"), round = 0) {
      vi <- as.data.frame(varimp(model)) %>%
        filter(!variable %in% exclude)
      vi$vi <- vi$reduction / sum(vi$reduction) * 100
      vi %>%
        filter(variable == which) %>%
        dplyr::select(vi) %>%
        as.numeric() %>%
        round(round)
    }
    vi <- variable_importance(modelci$model, which)
  } else {
    vi <- varimp %>%
      filter(variable == which) %>%
      dplyr::select(mean_round, q025, q975)
  }
  
  offset_right <- 0.95
  offset_top <- 0.35
  vi_string <- paste0(vi$mean_round, "% [", round(vi$q025, digits = 2), ";", round(vi$q975, digits = 2), "]")
  fontsize <- 6
  if (which == "dist_urban") {
    p <- p +
      annotate("text", x = manual_limits$dist_urban[[2]] * offset_right, y = ylims[2] - offset_top, label = vi_string, hjust = 1, color = colors[which],
               fontface = "bold", size = fontsize, alpha = 0.5)
  } else if(which == "total_rcs") {
    p <- p +
      annotate("text", x = manual_limits$total_rcs[[2]] * offset_right, y = 5 * (ylims[2] - offset_top), label = vi_string, hjust = 1, color = colors[which],
               fontface = "bold", size = fontsize, alpha = 0.5)
  } else if (which == "acov") {
    d <- model.frame(modelci$model)
    acov <- as.matrix(d[["bbs(acov)"]])
    p <- p +
      annotate("text", x = max(acov) * offset_right, y = 6.5 *(ylims[2] - offset_top), label = vi_string, hjust = 1, color = colors[which], fontface = "bold", 
               size = fontsize, alpha = 0.5)
  } else if(which == "dist_radar") {
    p <- p +
      annotate("text", x = manual_limits$dist_radar[[2]] * offset_right, y = 2 *(ylims[2] - offset_top), label = vi_string, hjust = 1, color = colors[which],
               fontface = "bold", size = fontsize, alpha = 0.5)
  } else {
    p <- p +
      annotate("text", x = 1 * offset_right, y = ylims[2] - offset_top, label = vi_string, hjust = 1, color = colors[which], 
               fontface = "bold", size = fontsize, alpha = 0.5)
  }
  
  # Add density
  if (which == "dist_urban") {
    dens <- density(data[, which], bw = 225, from = manual_limits$dist_urban[[1]], to = manual_limits$dist_urban[[2]])
  } else if (which == "total_rcs") {
    dens <- density(data[, which], bw = 2000, from = manual_limits$total_rcs[[1]], to = manual_limits$total_rcs[[2]])
  } else if (which == "acov") {
    d <- model.frame(modelci$model)
    acov <- as.matrix(d[["bbs(acov)"]])
    dens <- density(acov, from = min(acov), to = max(acov))
  } else if (which == "dist_radar") {
    dens <- density(data[, which], from = manual_limits$dist_radar[[1]], to = manual_limits$dist_radar[[2]])
  } else {
    dens <- density(data[, which], bw = 0.03556, from = 0, to = 1)
  }
  
  if (which == "dist_urban") { scale_lims <- c(-3 + 0.05, 1.5 - 0.05) }
  else if (which == "total_rcs") { scale_lims <- c(-8 + 0.05, 5 - 0.05) }
  else if (which == "dist_radar") { scale_lims <- c(-2 + 0.05, 2 - 0.05) }
  else if (which == "acov") { scale_lims <- c(-4 + 0.05, 6 - 0.05)}
  else {scale_lims <- c(ylims[1] + 0.05, ylims[2] - 0.05)}
  
  dens$y <- scales::rescale(dens$y, to = scale_lims)
  dens <- as.data.frame(list(as.matrix(dens$x), as.matrix(dens$y)))
  colnames(dens) <- c("x", "y")
  p <- p +
    geom_line(aes(x = x, y = y), data =  dens, color = "grey50", lineend = "round", linetype = 3) +
    scale_y_continuous(sec.axis = sec_axis(~ ., name = "Predictor frequency", breaks = scale_lims, labels = c("min", "max")))
  
  ## Add horizontal line
  if (offset) {
    yintercept <- modelci$model$offset
  } else {
    yintercept <- 0
  }
  p <- p +
    geom_hline(yintercept = yintercept, col = "darkgrey")
  
  for (ci in sorted_confints) {
    alphas <- rev(factor(sorted_confints))
    ymax <- as.name(paste((1 - (1 - ci) / 2) * 100, "%", sep = ""))
    ymax <- enquo(ymax)
    ymin <- as.name(paste(((1 - ci) / 2) * 100, "%", sep = ""))
    ymin <- enquo(ymin)
    
    p <- p +
      geom_ribbon(aes(x = x, ymin = !!ymin, ymax = !!ymax, alpha = !!alphas[i]), fill = colors[which]) +
      geom_line(aes(x = x, y = y), color = colors[which], size = 0.75, lineend = "round")
    
    if (offset) {
      p <- p + scale_y_continuous(labels = function(x) {10^x})
    }

    i <- i + 1
  }
  
  p <- p +
    scale_alpha_discrete(range = c(0.2, 0.5)) +
    coord_cartesian(ylim = ylims, expand = FALSE) +
    guides(alpha = FALSE)
  
  if (which == "dist_urban") {
    xlabel <- x_labels[which]
    p <- p +
      coord_cartesian(xlim = manual_limits$dist_urban, ylim = c(-3, 1.5), expand = FALSE)
      # scale_x_continuous(breaks = c(0, 10000, 20000, 30000), labels = c(0, 10, 20, 30))
  } else if (which == "total_rcs") {
    xlabel <- x_labels[which]
    p <- p +
      coord_cartesian(xlim = manual_limits$total_rcs, ylim = c(-8, 5), expand = FALSE)
      # scale_x_continuous(breaks = c(250, 500, 750), labels = c(250*4, 500*4, 750*4))
  } else if (which == "dist_radar") {
    xlabel <- x_labels[which]
    p <- p +
      coord_cartesian(ylim = c(-2, 2), expand = FALSE)
  } else if (which == "acov") {
    xlabel <- x_labels[which]
    p <- p +
      coord_cartesian(ylim = c(-4, 6), expand = FALSE, xlim = c(min(acov), max(acov)))
  } else {
    xlabel <- paste("%", x_labels[which])
    p <- p + 
      scale_x_continuous(breaks = c(0, 0.25, 0.5, 0.75, 1), labels = c("0%", "25%", "50%", "75%", "100%"))
  }
  
  p <- p +
    theme_classic(base_size = 10) +
    labs(x = xlabel, y = expression(paste("Flight response ", log[10], "(VIR)"))) +
    theme(axis.line.y.left = element_line(color = colors[which], size = 0.5),
          axis.line.y.right = element_line(color = "grey"),
          axis.ticks.y.left = element_line(color = colors[which], size = 0.5),
          axis.ticks.y.right = element_line(color = "grey"),
          axis.title.y = element_text())
  
  if (which(varimp$variable == which) %% 2 == 0) {
    p <- p +
      theme(axis.title.y.left = element_blank())
  } else {
    p <- p +
      theme(axis.title.y.right = element_blank())
  }

  if (!which %in% limit_quantiles) {
    p <- p +
      theme(axis.title.x = element_text(vjust = 0.5))
  }

  p
}

predictors <- bvi_all$variable
plots <- mapply(function(predictors) {plot_mboost_pdp(modelci, data_cleaned, which = predictors, confidence.intervals, colors, ylims, bvi_all, 
                                                      offset = FALSE)}, 
                predictors = predictors,
                SIMPLIFY = FALSE)

p <- wrap_plots(plots, ncol = 2)

p

And once again we save the plot to PDF.

ggsave(filename = "data/plots/paper/fig2_all.pdf", plot = p, width = 16, height = 20, dpi = 300, units = "cm")

12.6 Landscapes side-by-side

confidence.intervals <- c(0.95, 0.8, 0.5)
colors <- c("urban" = "#94346E", 
            "agricultural" = "#73AF48", 
            "semiopen" = "#EDAD08", 
            "forests" = "#0F8554", 
            "wetlands" = "#38A6A5", 
            "waterbodies" = "#1D6996",
            "dist_urban" = "#94346E", 
            "total_rcs" = "#CC503E", 
            "dist_radar" = "#666666")
x_labels <- c("urban" = "Urban", 
              "agricultural" = "Agricultural", 
              "semiopen" = "Semi-open", 
              "forests" = "Forests", 
              "wetlands" = "Wetlands", 
              "waterbodies" = "Water bodies", 
              "dist_urban" = "Distance to fireworks (m)", 
              "total_rcs" = "Total RCS (g^(2/3))", 
              "dist_radar" = "Distance from radar (m)")
limit_quantiles <- c("dist_urban", "total_rcs")
manual_limits <- list("dist_urban" = c(min(data_cleaned$dist_urban), quantile(data_cleaned$dist_urban, probs = 0.975)),
                      "total_rcs" = c(0, quantile(data_cleaned$total_rcs, probs = 0.975)))
# ylims <- c(-1.8, 1.5) + modelci$model$offset
ylims <- c(-1, 1)

plot_mboost_pdp <- function(modelci, data, which, confints, colors = NULL, ylims = ylims, varimp = NULL, offset = FALSE) {
  # Extract bootstrapped quantiles
  quants <- t(mboost_bootstrapped_quantiles(modelci, confidence.intervals, which = which))
  if (offset) quants <- quants + modelci$model$offset
  bootstrapped_quantiles <- as.data.frame(quants)
  # bootstrapped_quantiles <- as.data.frame(t(mboost_bootstrapped_quantiles(modelci, confidence.intervals, which = which)))
  bootstrapped_quantiles$x <- modelci$data[modelci$model$which(which)][[1]][, 1]
  bootstrapped_quantiles$y <- plot.mboost_adjusted(modelci$model, which = which, newdata = modelci$data[[modelci$model$which(which)]])[[2]]
  if (offset) {
    bootstrapped_quantiles$y <- bootstrapped_quantiles$y + modelci$model$offset
    ylims <- ylims + modelci$model$offset
  }
  
  p <- ggplot(bootstrapped_quantiles)
  i <- 1
  sorted_confints <- sort(confints, decreasing = TRUE)
  
  # Add variable importance
  if (is.null(varimp)) {
    variable_importance <- function(model, which, exclude = c("dist_radar"), round = 0) {
      vi <- as.data.frame(varimp(model)) %>%
        filter(!variable %in% exclude)
      vi$vi <- vi$reduction / sum(vi$reduction) * 100
      vi %>%
        filter(variable == which) %>%
        dplyr::select(vi) %>%
        as.numeric() %>%
        round(round)
    }
    vi <- variable_importance(modelci$model, which)
  } else {
    vi <- varimp %>%
      filter(variable == which) %>%
      dplyr::select(mean_round)
  }
  
  offset_right <- 0.95
  offset_top <- 0.35
  if (which == "dist_urban") {
    p <- p +
      annotate("text", x = manual_limits$dist_urban[[2]] * offset_right, y = ylims[2] - offset_top, label = paste0(vi, "%"), hjust = 1, color = colors[which],
               fontface = "bold", size = 12, alpha = 0.5)
  } else if(which == "total_rcs") {
    p <- p +
      annotate("text", x = manual_limits$total_rcs[[2]] * offset_right, y = ylims[2] - offset_top, label = paste0(vi, "%"), hjust = 1, color = colors[which],
               fontface = "bold", size = 12, alpha = 0.5)
  } else {
    p <- p +
      annotate("text", x = 1 * offset_right, y = ylims[2] - offset_top, label = paste0(x_labels[which], ": ", vi, "%"), hjust = 1, color = colors[which], 
               fontface = "bold", size = 12, alpha = 0.5)
  }
  
  # Add density
  if (which == "dist_urban") {
    dens <- density(data[, which], bw = 225, from = manual_limits$dist_urban[[1]], to = manual_limits$dist_urban[[2]])
  } else if (which == "total_rcs") {
    dens <- density(data[, which], bw = 2000, from = manual_limits$total_rcs[[1]], to = manual_limits$total_rcs[[2]])
  } else {
    dens <- density(data[, which], bw = 0.03556, from = 0, to = 1)
  }
  scale_lims <- c(ylims[1] + 0.05, ylims[2] - 0.05)
  dens$y <- scales::rescale(dens$y, to = scale_lims)
  dens <- as.data.frame(list(as.matrix(dens$x), as.matrix(dens$y)))
  colnames(dens) <- c("x", "y")
  p <- p +
    geom_line(aes(x = x, y = y), data =  dens, color = "grey", lineend = "round", linetype = 3) +
    scale_y_continuous(sec.axis = sec_axis(~ ., name = "Predictor frequency", breaks = scale_lims, labels = c("min", "max")))
  
  ## Add horizontal line
  if (offset) {
    yintercept <- modelci$model$offset
  } else {
    yintercept <- 0
  }
  p <- p +
    geom_hline(yintercept = yintercept, col = "darkgrey")
  
  for (ci in sorted_confints) {
    alphas <- rev(factor(sorted_confints))
    ymax <- as.name(paste((1 - (1 - ci) / 2) * 100, "%", sep = ""))
    ymax <- enquo(ymax)
    ymin <- as.name(paste(((1 - ci) / 2) * 100, "%", sep = ""))
    ymin <- enquo(ymin)
    
    p <- p +
      geom_ribbon(aes(x = x, ymin = !!ymin, ymax = !!ymax, alpha = !!alphas[i]), fill = colors[which]) +
      geom_line(aes(x = x, y = y), color = colors[which], size = 0.75, lineend = "round")
    
    if (offset) {
      p <- p + scale_y_continuous(labels = function(x) {10^x})
    }

    i <- i + 1
  }
  
  p <- p +
    scale_alpha_discrete(range = c(0.2, 0.5)) +
    coord_cartesian(ylim = ylims, expand = FALSE) +
    guides(alpha = FALSE)
  
  if (which == "dist_urban") {
    xlabel <- x_labels[which]
    p <- p +
      coord_cartesian(xlim = manual_limits$dist_urban, ylim = ylims, expand = FALSE)
      # scale_x_continuous(breaks = c(0, 10000, 20000, 30000), labels = c(0, 10, 20, 30))
  } else if (which == "total_rcs") {
    xlabel <- x_labels[which]
    p <- p +
      coord_cartesian(xlim = manual_limits$total_rcs, ylim = ylims, expand = FALSE)
      # scale_x_continuous(breaks = c(250, 500, 750), labels = c(250*4, 500*4, 750*4))
  } else {
    xlabel <- paste("% Coverage")
    p <- p + 
      scale_x_continuous(breaks = c(0, 0.25, 0.5, 0.75, 1), labels = c("0%", "25%", "50%", "75%", "100%"))
  }
  
  p <- p +
    theme_classic(base_size = 10) +
    labs(x = xlabel, y = expression(paste("Flight response ", log[10], "(VIR)"))) +
    theme(axis.line.y.left = element_line(color = colors[which], size = 0.5),
          axis.line.y.right = element_line(color = "grey"),
          axis.ticks.y.left = element_line(color = colors[which], size = 0.5),
          axis.ticks.y.right = element_line(color = "grey"),
          axis.title.y = element_text(),
          axis.title.y.right = element_blank())
  
  if (!which == "forests") {
    p <- p +
      theme(axis.title.y.left = element_blank())
  }
  
  if (!which == "waterbodies") {
    p <- p +
      theme(axis.title.x = element_blank())
  }

  p
}

predictors <- c("agricultural", "semiopen", "forests", "wetlands", "waterbodies")
plots <- mapply(function(predictors) {plot_mboost_pdp(modelci, data_cleaned, which = predictors, confidence.intervals, colors, ylims, bvi_biol, 
                                                      offset = FALSE)}, 
                predictors = predictors,
                SIMPLIFY = FALSE)

saveRDS(plots, file = "data/plots/paper/fig_landscapes.RDS")

p <- wrap_plots(plots, ncol = 1)

p

12.7 Pseudo-RCS & Distance to fireworks

confidence.intervals <- c(0.95, 0.8, 0.5)
colors <- c("urban" = "#94346E", 
            "agricultural" = "#73AF48", 
            "semiopen" = "#EDAD08", 
            "forests" = "#0F8554", 
            "wetlands" = "#38A6A5", 
            "waterbodies" = "#1D6996",
            "dist_urban" = "#94346E", 
            "total_rcs" = "#CC503E", 
            "dist_radar" = "#666666")
x_labels <- c("urban" = "Urban", 
              "agricultural" = "Agricultural", 
              "semiopen" = "Semi-open", 
              "forests" = "Forests", 
              "wetlands" = "Wetlands", 
              "waterbodies" = "Water bodies", 
              "dist_urban" = "Distance to fireworks (m)", 
              "total_rcs" = "Total RCS (cm^2)", 
              "dist_radar" = "Distance from radar (m)")
limit_quantiles <- c("dist_urban", "total_rcs")
manual_limits <- list("dist_urban" = c(min(data_cleaned$dist_urban), max(data_cleaned$dist_urban)),
                      "total_rcs" = c(0, quantile(data_cleaned$total_rcs, probs = 0.995)))
# ylims <- c(-1.8, 1.5) + modelci$model$offset
ylims <- c(-3, 1)

plot_mboost_pdp <- function(modelci, data, which, confints, colors = NULL, ylims = ylims, varimp = NULL, offset = FALSE) {
  # Extract bootstrapped quantiles
  quants <- t(mboost_bootstrapped_quantiles(modelci, confidence.intervals, which = which))
  if (offset) quants <- quants + modelci$model$offset
  bootstrapped_quantiles <- as.data.frame(quants)
  # bootstrapped_quantiles <- as.data.frame(t(mboost_bootstrapped_quantiles(modelci, confidence.intervals, which = which)))
  bootstrapped_quantiles$x <- modelci$data[modelci$model$which(which)][[1]][, 1]
  bootstrapped_quantiles$y <- plot.mboost_adjusted(modelci$model, which = which, newdata = modelci$data[[modelci$model$which(which)]])[[2]]
  if (offset) {
    bootstrapped_quantiles$y <- bootstrapped_quantiles$y + modelci$model$offset
    ylims <- ylims + modelci$model$offset
  }
  
  p <- ggplot(bootstrapped_quantiles)
  i <- 1
  sorted_confints <- sort(confints, decreasing = TRUE)
  
  # Add variable importance
  if (is.null(varimp)) {
    variable_importance <- function(model, which, exclude = c("dist_radar"), round = 0) {
      vi <- as.data.frame(varimp(model)) %>%
        filter(!variable %in% exclude)
      vi$vi <- vi$reduction / sum(vi$reduction) * 100
      vi %>%
        filter(variable == which) %>%
        dplyr::select(vi) %>%
        as.numeric() %>%
        round(round)
    }
    vi <- variable_importance(modelci$model, which)
  } else {
    vi <- varimp %>%
      filter(variable == which) %>%
      dplyr::select(mean_round)
  }
  
  offset_right <- 0.95
  offset_top <- 0.35
  if (which == "dist_urban") {
    p <- p +
      annotate("text", x = manual_limits$dist_urban[[2]] * offset_right, y = ylims[2] - offset_top, label = paste0(vi, "%"), hjust = 1, color = colors[which],
               fontface = "bold", size = 12, alpha = 0.5)
  } else if(which == "total_rcs") {
    p <- p +
      annotate("text", x = manual_limits$total_rcs[[2]] * offset_right, y = ylims[2] - offset_top, label = paste0(vi, "%"), hjust = 1, color = colors[which],
               fontface = "bold", size = 12, alpha = 0.5)
  } else {
    p <- p +
      annotate("text", x = 1 * offset_right, y = ylims[2] - offset_top, label = paste0(x_labels[which], ": ", vi, "%"), hjust = 1, color = colors[which], 
               fontface = "bold", size = 12, alpha = 0.5)
  }
  
  # Add density
  if (which == "dist_urban") {
    dens <- density(data[, which], bw = 225, from = manual_limits$dist_urban[[1]], to = manual_limits$dist_urban[[2]])
  } else if (which == "total_rcs") {
    dens <- density(data[, which], bw = 2000, from = manual_limits$total_rcs[[1]], to = manual_limits$total_rcs[[2]])
  } else {
    dens <- density(data[, which], bw = 0.03556, from = 0, to = 1)
  }
  scale_lims <- c(ylims[1] + 0.05, ylims[2] - 0.05)
  dens$y <- scales::rescale(dens$y, to = scale_lims)
  dens <- as.data.frame(list(as.matrix(dens$x), as.matrix(dens$y)))
  colnames(dens) <- c("x", "y")
  p <- p +
    geom_line(aes(x = x, y = y), data =  dens, color = "grey", lineend = "round", linetype = 3) +
    scale_y_continuous(sec.axis = sec_axis(~ ., name = "Predictor frequency", breaks = scale_lims, labels = c("min", "max")))
  
  ## Add horizontal line
  if (offset) {
    yintercept <- modelci$model$offset
  } else {
    yintercept <- 0
  }
  p <- p +
    geom_hline(yintercept = yintercept, col = "darkgrey")
  
  for (ci in sorted_confints) {
    alphas <- rev(factor(sorted_confints))
    ymax <- as.name(paste((1 - (1 - ci) / 2) * 100, "%", sep = ""))
    ymax <- enquo(ymax)
    ymin <- as.name(paste(((1 - ci) / 2) * 100, "%", sep = ""))
    ymin <- enquo(ymin)
    
    p <- p +
      geom_ribbon(aes(x = x, ymin = !!ymin, ymax = !!ymax, alpha = !!alphas[i]), fill = colors[which]) +
      geom_line(aes(x = x, y = y), color = colors[which], size = 0.75, lineend = "round")
    
    if (offset) {
      p <- p + scale_y_continuous(labels = function(x) {10^x})
    }

    i <- i + 1
  }
  
  p <- p +
    scale_alpha_discrete(range = c(0.2, 0.5)) +
    coord_cartesian(ylim = ylims, expand = FALSE) +
    guides(alpha = FALSE)
  
  if (which == "dist_urban") {
    xlabel <- x_labels[which]
    p <- p +
      coord_cartesian(xlim = manual_limits$dist_urban, ylim = ylims, expand = FALSE) +
      scale_x_continuous(breaks = c(0, 5000, 10000, 15000))
  } else if (which == "total_rcs") {
    xlabel <- x_labels[which]
    p <- p +
      coord_cartesian(xlim = manual_limits$total_rcs, ylim = ylims, expand = FALSE)
      # scale_x_continuous(breaks = c(250, 500, 750), labels = c(250*4, 500*4, 750*4))
  } else {
    xlabel <- paste("% Coverage")
    p <- p + 
      scale_x_continuous(breaks = c(0, 0.25, 0.5, 0.75, 1), labels = c("0%", "25%", "50%", "75%", "100%"))
  }
  
  p <- p +
    theme_classic(base_size = 10) +
    labs(x = xlabel, y = expression(paste("Flight response ", log[10], "(VIR)"))) +
    theme(axis.line.y.left = element_line(color = colors[which], size = 0.5),
          axis.line.y.right = element_line(color = "grey"),
          axis.ticks.y.left = element_line(color = colors[which], size = 0.5),
          axis.ticks.y.right = element_line(color = "grey"),
          axis.title.y = element_text(),
          axis.title.y.right = element_blank())
  
  if (!which == "forests") {
    p <- p +
      theme(axis.title.y.left = element_blank())
  }
  
  if (!which == "waterbodies") {
    p <- p +
      theme(axis.title.x = element_blank())
  }

  p
}

predictors <- c("dist_urban", "total_rcs")
plots <- mapply(function(predictors) {plot_mboost_pdp(modelci, data_cleaned, which = predictors, confidence.intervals, colors, ylims, bvi_biol, 
                                                      offset = FALSE)}, 
                predictors = predictors,
                SIMPLIFY = FALSE)

saveRDS(plots, file = "data/plots/paper/fig_rcs_disturban.RDS")

plots[[1]] + plots[[2]] +
plot_layout(widths = c(1, 1), nrow = 1) & 
theme(axis.text.x = element_text(size = 10),
      axis.title.x = element_text(size = 12),
      axis.text.y = element_text(size = 10)) -> plots_rcs_disturban

ggsave(filename = "data/plots/paper/fig_rcs_disturban.pdf", plot = plots_rcs_disturban, width = 14, height = 5, dpi = 300, units = "cm")

plots_rcs_disturban