1 Overview

We compare a stationary Besag model with the non-stationary fbesag model for dengue risk across Brazil’s 558 microregions. The flexible model estimates region-specific spatial precisions while maintaining smoothness across shared borders. A joint penalized-complexity prior ensures contraction to stationarity when heterogeneity is not supported by the data. For details of the method and prior, see Abdul-Fattah et al. (2024).

This document provides: - Partition maps (administrative vs. terrestrial biomes). - Fitted stationary and flexible spatial models using INLA. - A model comparison table: DIC, logML, GCPO, MAE. - An SMR choropleth and difference maps for posterior summaries.

Folder assumption. All required data files (including brazil_558_microregions.RData) are in the same folder as this Rmd.


2 Packages

# Optionally install missing packages (set to TRUE if you need auto-install)
auto_install <- FALSE

need <- c("INLA","fbesag","ggplot2","ggthemes","spdep","hydroGOF","dplyr","tibble","sf","scales","knitr")
to_install <- need[!vapply(need, requireNamespace, FALSE, quietly = TRUE)]

if (length(to_install) > 0 && auto_install) {
  install.packages(setdiff(to_install, c("INLA","fbesag")))
  if (!requireNamespace("remotes", quietly = TRUE)) install.packages("remotes")
  if (!requireNamespace("INLA", quietly = TRUE)) remotes::install_version("INLA", version = "24.08.28")
  if (!requireNamespace("fbesag", quietly = TRUE)) remotes::install_github("esmail-abdulfattah/fbesag")
}

suppressPackageStartupMessages({
  library(INLA)
  library(fbesag)
  library(ggplot2)
  library(ggthemes)
  library(spdep)
  library(hydroGOF)
  library(dplyr)
  library(tibble)
  library(sf)
  library(scales)
  library(knitr)
})

3 Data

# Load the dataset saved in the same folder as this Rmd
load("brazil_558_microregions.RData")

# Basic checks and assumptions:
stopifnot(exists("data"), exists("graph"), exists("map_df"))
stopifnot(nrow(map_df) >= 558)  # expect at least one geometry per microregion

3.1 Partition selection and offset convention

# Partition toggle: administrative (FALSE) vs. terrestrial biomes (TRUE)
Six_terrestrial_biomes <- FALSE

# Expected counts: set TRUE if E is on the log-scale in your object
E_is_log <- FALSE

# Collapse per-observation vectors to per-area (length = 558)
collapse_to_area <- function(area_index, values, n_areas = 558) {
  vapply(seq_len(n_areas), function(i) {
    vi <- values[area_index == i]
    if (length(vi) == 0) return(NA_integer_)
    u <- unique(vi)
    u[1]
  }, integer(1))
}

# Per-area IDs from S3/S4
id_regions_admin_area <- collapse_to_area(data$S1, data$S3, n_areas = 558)
id_regions_biome_area <- collapse_to_area(data$S1, data$S4, n_areas = 558)

# Original recoding for the biome option
id_regions_biome_area[id_regions_biome_area == 3] <- 2
id_regions_biome_area[id_regions_biome_area == 4] <- 3
id_regions_biome_area[id_regions_biome_area == 5] <- 4
id_regions_biome_area[id_regions_biome_area == 6] <- 5

# Active partition for modeling
id_regions <- if (Six_terrestrial_biomes) id_regions_biome_area else id_regions_admin_area

4 Region Maps

# Attach per-area IDs to the map (assumes row order aligns with area index)
map_df$region_admin <- factor(id_regions_admin_area,
                              levels = sort(unique(id_regions_admin_area)))
map_df$region_biome <- factor(id_regions_biome_area,
                              levels = sort(unique(id_regions_biome_area)))

pal_admin <- hue_pal()(length(levels(map_df$region_admin)))
pal_biome <- hue_pal()(length(levels(map_df$region_biome)))

# Subtle borders for legibility
border_col <- "#f7f7f7"

p_admin <- ggplot(map_df) +
  geom_sf(aes(fill = region_admin), linewidth = 0.15, color = border_col) +
  scale_fill_manual(values = pal_admin, name = "Region") +
  ggtitle("Administrative Grouping (S3)") +
  theme_void() + theme(legend.position = "bottom")

p_biome <- ggplot(map_df) +
  geom_sf(aes(fill = region_biome), linewidth = 0.15, color = border_col) +
  scale_fill_manual(values = pal_biome, name = "Biome") +
  ggtitle("Terrestrial Biomes (S4)") +
  theme_void() + theme(legend.position = "bottom")

if (Six_terrestrial_biomes) p_biome else p_admin


5 Model Specification

# Scale the intrinsic graph for a sum-to-zero constraint
constr.inter <- list(A = matrix(1, 1, dim(graph)[1]), e = rep(0, 1))
scaled_graph <- as.matrix(INLA:::inla.scale.model(graph, constr.inter))

# PC prior for the stationary Besag precision
precision.prior <- list(prec = list(prior = "pc.prec", param = c(0.5, 0.01)))

# Flexible model object (region-specific precisions)
fbesag.model <- fbesag::get_fbesag(
  graph,
  id       = id_regions[1:558],
  sd_gamma = 0.15,
  param    = list(p1 = 1, p2 = 1e-5)
)
## [1] "/home/abdulfe/R/x86_64-pc-linux-gnu-library/4.5/fbesag"
# Indices for INLA
data$c_S1 <- data$S1
data$c_T1 <- data$T1

# Expected counts for offset
E_counts <- if (E_is_log) exp(data$E) else data$E

# Stationary Besag (intrinsic via generic0)
baseformula <- Y ~ 1 +
  f(S1, model = "generic0",
    Cmatrix = scaled_graph,
    constr  = TRUE, rankdef = 1,
    hyper  = precision.prior) +
  f(T1, model = "rw1",
    constr = TRUE, scale.model = TRUE,
    hyper  = list(prec = list(prior = "pc.prec", param = c(0.5, 0.01))))

# Flexible Besag
formula <- Y ~ 1 +
  f(S1, model = fbesag.model, constr = TRUE, rankdef = 1) +
  f(T1, model = "rw1",
    constr = TRUE, scale.model = TRUE,
    hyper  = precision.prior)

6 Model Fitting

config_flag <- FALSE

model_naive <- inla(
  formula = baseformula, data = data, family = "poisson", offset = log(E_counts),
  control.inla = list(strategy = "gaussian", int.strategy = "eb"),
  control.compute = list(dic = TRUE, config = config_flag,
                         cpo = TRUE, return.marginals = FALSE,
                         control.gcpo = list(enable = TRUE, num.level.sets = 2)),
  control.fixed = list(correlation.matrix = TRUE, prec.intercept = 1, prec = 1),
  control.predictor = list(link = 1, compute = TRUE),
  verbose = TRUE
)

model_fbesag <- inla(
  formula = formula, data = data, family = "poisson", offset = log(E_counts),
  control.inla = list(strategy = "gaussian", int.strategy = "eb"),
  control.compute = list(dic = TRUE, config = config_flag,
                         cpo = TRUE, return.marginals = FALSE,
                         control.gcpo = list(enable = TRUE, num.level.sets = 2)),
  control.fixed = list(correlation.matrix = TRUE, prec.intercept = 1, prec = 1),
  control.predictor = list(link = 1, compute = TRUE),
  verbose = TRUE
)

7 Model Comparison

mean_neglog <- function(v) {
  v <- v[is.finite(v) & v > 0]
  if (length(v) == 0) return(NA_real_)
  -mean(log(v))
}

gcpo_naive  <- if (!is.null(model_naive$gcpo))  mean_neglog(model_naive$gcpo$gcpo) else mean_neglog(model_naive$cpo$cpo)
gcpo_fbesag <- if (!is.null(model_fbesag$gcpo)) mean_neglog(model_fbesag$gcpo$gcpo) else mean_neglog(model_fbesag$cpo$cpo)

mae_naive  <- hydroGOF::mae(model_naive$summary.fitted.values$`0.5quant`, data$Y, na.rm = TRUE)
mae_fbesag <- hydroGOF::mae(model_fbesag$summary.fitted.values$`0.5quant`, data$Y, na.rm = TRUE)

res <- tibble::tibble(
  Model = c("Stationary Besag", "Flexible Besag"),
  DIC   = c(round(model_naive$dic$dic, 0), round(model_fbesag$dic$dic, 0)),
  logML = c(as.numeric(model_naive$mlik[1]), as.numeric(model_fbesag$mlik[1])),
  GCPO  = c(gcpo_naive, gcpo_fbesag),
  MAE   = c(mae_naive, mae_fbesag)
)

res$`Better than stationary?` <- c(NA,
  (res$DIC[2]   < res$DIC[1]) &
  (res$logML[2] > res$logML[1]) &
  (res$GCPO[2]  < res$GCPO[1]) &
  (res$MAE[2]   < res$MAE[1])
)

knitr::kable(res, digits = 3, align = "lrrrrc",
             caption = "Lower DIC/GCPO/MAE and higher logML indicate improvement.")
Lower DIC/GCPO/MAE and higher logML indicate improvement.
Model DIC logML GCPO MAE Better than stationary?
Stationary Besag -61840 -118863.2 16.551 37.079 NA
Flexible Besag -61912 -118609.0 16.550 37.078 TRUE

8 SMR by Microregion

# Average SMR per microregion
SMR <- vapply(1:558, function(i) {
  yy <- data$Y[data$S1 == i]
  ee <- E_counts[data$S1 == i]
  mean(yy / ee)
}, numeric(1))

map_df$SMR_cat <- cut(
  SMR, breaks = c(-Inf, 0.5, 1, 2, 5, Inf),
  labels = c("<0.5", "0.5–1", "1–2", "2–5", "5+"),
  right = FALSE, ordered_result = TRUE
)

pal_smr <- c("#edf8fb", "#b2e2e2", "#66c2a4", "#2ca25f", "#006d2c")
border_col <- "#f7f7f7"

ggplot(map_df) +
  geom_sf(aes(fill = SMR_cat), linewidth = 0.15, color = border_col) +
  scale_fill_manual(values = pal_smr, name = "SMR") +
  ggtitle("Standardized Morbidity Ratio (SMR) by Microregion") +
  theme_void(base_size = 12) +
  theme(legend.position = "bottom")

9 Non-stationarity Diagnostics (fbesag vs. stationary)

# Differences in spatial field summaries
map_df$diff_mean <- model_fbesag$summary.random$S1$mean -
                    model_naive$summary.random$S1$mean

map_df$diff_q975 <- model_fbesag$summary.random$S1$`0.975quant` -
                    model_naive$summary.random$S1$`0.975quant`

map_df$diff_q025 <- model_fbesag$summary.random$S1$`0.025quant` -
                    model_naive$summary.random$S1$`0.025quant`

ga_blue <- "#49B8F1"
border_col <- "#f7f7f7"

p_mean <- ggplot(map_df) +
  geom_sf(aes(fill = diff_mean), linewidth = 0.15, color = border_col) +
  scale_fill_gradient2(low = ga_blue, mid = "white", high = "red", midpoint = 0, name = "\u0394 mean") +
  ggtitle("Difference in Posterior Mean of Spatial Effect (fbesag − stationary)") +
  theme_void(base_size = 12)

p_q975 <- ggplot(map_df) +
  geom_sf(aes(fill = diff_q975), linewidth = 0.15, color = border_col) +
  scale_fill_gradient2(low = ga_blue, mid = "white", high = "red", midpoint = 0, name = "\u0394 97.5%") +
  ggtitle("Difference in 97.5th Percentile of Spatial Effect") +
  theme_void(base_size = 12)

p_q025 <- ggplot(map_df) +
  geom_sf(aes(fill = diff_q025), linewidth = 0.15, color = border_col) +
  scale_fill_gradient2(low = ga_blue, mid = "white", high = "red", midpoint = 0, name = "\u0394 2.5%") +
  ggtitle("Difference in 2.5th Percentile of Spatial Effect") +
  theme_void(base_size = 12)

p_mean

p_q975

p_q025

10 Regional Summary (Optional)

ch <- id_regions[1:558]
abs_mean_diff_by_region <- vapply(sort(unique(ch)), function(k) {
  mean(abs(map_df$diff_mean[ch == k]))
}, numeric(1))

tibble::tibble(
  Region = sort(unique(ch)),
  `Mean |Δ mean|` = abs_mean_diff_by_region
) %>%
  knitr::kable(digits = 4, caption = "Average absolute change in posterior mean (by sub-region).")
Average absolute change in posterior mean (by sub-region).
Region Mean |Δ mean|
1 0.0069
2 0.0046
3 0.0154
4 0.0389
5 0.0040

11 Interpretation

  • If the flexible model reduces DIC/GCPO/MAE and increases logML, and the difference maps display structured changes, the stationarity assumption is likely violated.
  • Where differences are negligible, the PC prior contracts the flexible model toward the stationary baseline—fbesag can be used as a safe diagnostic.
  • Sensitivity can be assessed via sd_gamma (e.g., 0.05 for stronger contraction, 0.3 for greater heterogeneity).

References

Abdul-Fattah, Esmail, Elias Krainski, Janet van Niekerk, and Håvard Rue. 2024. “Non-Stationary Bayesian Spatial Model for Disease Mapping Based on Sub-Regions.” Statistical Methods in Medical Research 33 (6): 1093–1111. https://doi.org/10.1177/09622802241244613.