Non-stationary Spatial Model for Dengue in Brazil: A Hands-On Guide

1 Overview

We compare a stationary Besag model with the non-stationary fbesag model for dengue risk across Brazil’s 558 microregions. The flexible model estimates region-specific spatial precisions while maintaining smoothness across shared borders. A joint penalized-complexity prior ensures contraction to stationarity when heterogeneity is not supported by the data. For details of the method and prior, see Abdul-Fattah et al. (2024).

This document provides: - Partition maps (administrative vs. terrestrial biomes). - Fitted stationary and flexible spatial models using INLA. - A model comparison table: DIC, logML, GCPO, MAE. - An SMR choropleth and difference maps for posterior summaries.

Folder assumption. All required data files (including brazil_558_microregions.RData) are in the same folder as this Rmd.

2 Packages

# Optionally install missing packages (set to TRUE if you need auto-install)
auto_install <- FALSE

need <- c("INLA","fbesag","ggplot2","ggthemes","spdep","hydroGOF","dplyr","tibble","sf","scales","knitr")
to_install <- need[!vapply(need, requireNamespace, FALSE, quietly = TRUE)]

if (length(to_install) > 0 && auto_install) {
  install.packages(setdiff(to_install, c("INLA","fbesag")))
  if (!requireNamespace("remotes", quietly = TRUE)) install.packages("remotes")
  if (!requireNamespace("INLA", quietly = TRUE)) remotes::install_version("INLA", version = "24.08.28")
  if (!requireNamespace("fbesag", quietly = TRUE)) remotes::install_github("esmail-abdulfattah/fbesag")
}

suppressPackageStartupMessages({
  library(INLA)
  library(fbesag)
  library(ggplot2)
  library(ggthemes)
  library(spdep)
  library(hydroGOF)
  library(dplyr)
  library(tibble)
  library(sf)
  library(scales)
  library(knitr)
})

3 Data

# Load the dataset saved in the same folder as this Rmd
load("brazil_558_microregions.RData")

# Basic checks and assumptions:
stopifnot(exists("data"), exists("graph"), exists("map_df"))
stopifnot(nrow(map_df) >= 558)  # expect at least one geometry per microregion

3.1 Partition selection and offset convention

# Partition toggle: administrative (FALSE) vs. terrestrial biomes (TRUE)
Six_terrestrial_biomes <- FALSE

# Expected counts: set TRUE if E is on the log-scale in your object
E_is_log <- FALSE

# Collapse per-observation vectors to per-area (length = 558)
collapse_to_area <- function(area_index, values, n_areas = 558) {
  vapply(seq_len(n_areas), function(i) {
    vi <- values[area_index == i]
    if (length(vi) == 0) return(NA_integer_)
    u <- unique(vi)
    u[1]
  }, integer(1))
}

# Per-area IDs from S3/S4
id_regions_admin_area <- collapse_to_area(data$S1, data$S3, n_areas = 558)
id_regions_biome_area <- collapse_to_area(data$S1, data$S4, n_areas = 558)

# Original recoding for the biome option
id_regions_biome_area[id_regions_biome_area == 3] <- 2
id_regions_biome_area[id_regions_biome_area == 4] <- 3
id_regions_biome_area[id_regions_biome_area == 5] <- 4
id_regions_biome_area[id_regions_biome_area == 6] <- 5

# Active partition for modeling
id_regions <- if (Six_terrestrial_biomes) id_regions_biome_area else id_regions_admin_area

4 Region Maps

# Attach per-area IDs to the map (assumes row order aligns with area index)
map_df$region_admin <- factor(id_regions_admin_area,
                              levels = sort(unique(id_regions_admin_area)))
map_df$region_biome <- factor(id_regions_biome_area,
                              levels = sort(unique(id_regions_biome_area)))

pal_admin <- hue_pal()(length(levels(map_df$region_admin)))
pal_biome <- hue_pal()(length(levels(map_df$region_biome)))

# Subtle borders for legibility
border_col <- "#f7f7f7"

p_admin <- ggplot(map_df) +
  geom_sf(aes(fill = region_admin), linewidth = 0.15, color = border_col) +
  scale_fill_manual(values = pal_admin, name = "Region") +
  ggtitle("Administrative Grouping (S3)") +
  theme_void() + theme(legend.position = "bottom")

p_biome <- ggplot(map_df) +
  geom_sf(aes(fill = region_biome), linewidth = 0.15, color = border_col) +
  scale_fill_manual(values = pal_biome, name = "Biome") +
  ggtitle("Terrestrial Biomes (S4)") +
  theme_void() + theme(legend.position = "bottom")

if (Six_terrestrial_biomes) p_biome else p_admin

5 Model Specification

# Scale the intrinsic graph for a sum-to-zero constraint
constr.inter <- list(A = matrix(1, 1, dim(graph)[1]), e = rep(0, 1))
scaled_graph <- as.matrix(INLA:::inla.scale.model(graph, constr.inter))

# PC prior for the stationary Besag precision
precision.prior <- list(prec = list(prior = "pc.prec", param = c(0.5, 0.01)))

# Flexible model object (region-specific precisions)
fbesag.model <- fbesag::get_fbesag(
  graph,
  id       = id_regions[1:558],
  sd_gamma = 0.15,
  param    = list(p1 = 1, p2 = 1e-5)
)

## [1] "/home/abdulfe/R/x86_64-pc-linux-gnu-library/4.5/fbesag"

# Indices for INLA
data$c_S1 <- data$S1
data$c_T1 <- data$T1

# Expected counts for offset
E_counts <- if (E_is_log) exp(data$E) else data$E

# Stationary Besag (intrinsic via generic0)
baseformula <- Y ~ 1 +
  f(S1, model = "generic0",
    Cmatrix = scaled_graph,
    constr  = TRUE, rankdef = 1,
    hyper  = precision.prior) +
  f(T1, model = "rw1",
    constr = TRUE, scale.model = TRUE,
    hyper  = list(prec = list(prior = "pc.prec", param = c(0.5, 0.01))))

# Flexible Besag
formula <- Y ~ 1 +
  f(S1, model = fbesag.model, constr = TRUE, rankdef = 1) +
  f(T1, model = "rw1",
    constr = TRUE, scale.model = TRUE,
    hyper  = precision.prior)

6 Model Fitting

config_flag <- FALSE

model_naive <- inla(
  formula = baseformula, data = data, family = "poisson", offset = log(E_counts),
  control.inla = list(strategy = "gaussian", int.strategy = "eb"),
  control.compute = list(dic = TRUE, config = config_flag,
                         cpo = TRUE, return.marginals = FALSE,
                         control.gcpo = list(enable = TRUE, num.level.sets = 2)),
  control.fixed = list(correlation.matrix = TRUE, prec.intercept = 1, prec = 1),
  control.predictor = list(link = 1, compute = TRUE),
  verbose = TRUE
)

model_fbesag <- inla(
  formula = formula, data = data, family = "poisson", offset = log(E_counts),
  control.inla = list(strategy = "gaussian", int.strategy = "eb"),
  control.compute = list(dic = TRUE, config = config_flag,
                         cpo = TRUE, return.marginals = FALSE,
                         control.gcpo = list(enable = TRUE, num.level.sets = 2)),
  control.fixed = list(correlation.matrix = TRUE, prec.intercept = 1, prec = 1),
  control.predictor = list(link = 1, compute = TRUE),
  verbose = TRUE
)

7 Model Comparison

mean_neglog <- function(v) {
  v <- v[is.finite(v) & v > 0]
  if (length(v) == 0) return(NA_real_)
  -mean(log(v))
}

gcpo_naive  <- if (!is.null(model_naive$gcpo))  mean_neglog(model_naive$gcpo$gcpo) else mean_neglog(model_naive$cpo$cpo)
gcpo_fbesag <- if (!is.null(model_fbesag$gcpo)) mean_neglog(model_fbesag$gcpo$gcpo) else mean_neglog(model_fbesag$cpo$cpo)

mae_naive  <- hydroGOF::mae(model_naive$summary.fitted.values$`0.5quant`, data$Y, na.rm = TRUE)
mae_fbesag <- hydroGOF::mae(model_fbesag$summary.fitted.values$`0.5quant`, data$Y, na.rm = TRUE)

res <- tibble::tibble(
  Model = c("Stationary Besag", "Flexible Besag"),
  DIC   = c(round(model_naive$dic$dic, 0), round(model_fbesag$dic$dic, 0)),
  logML = c(as.numeric(model_naive$mlik[1]), as.numeric(model_fbesag$mlik[1])),
  GCPO  = c(gcpo_naive, gcpo_fbesag),
  MAE   = c(mae_naive, mae_fbesag)
)

res$`Better than stationary?` <- c(NA,
  (res$DIC[2]   < res$DIC[1]) &
  (res$logML[2] > res$logML[1]) &
  (res$GCPO[2]  < res$GCPO[1]) &
  (res$MAE[2]   < res$MAE[1])
)

knitr::kable(res, digits = 3, align = "lrrrrc",
             caption = "Lower DIC/GCPO/MAE and higher logML indicate improvement.")

Lower DIC/GCPO/MAE and higher logML indicate improvement.
Model	DIC	logML	GCPO	MAE	Better than stationary?
Stationary Besag	-61840	-118863.2	16.551	37.079	NA
Flexible Besag	-61912	-118609.0	16.550	37.078	TRUE

8 SMR by Microregion

# Average SMR per microregion
SMR <- vapply(1:558, function(i) {
  yy <- data$Y[data$S1 == i]
  ee <- E_counts[data$S1 == i]
  mean(yy / ee)
}, numeric(1))

map_df$SMR_cat <- cut(
  SMR, breaks = c(-Inf, 0.5, 1, 2, 5, Inf),
  labels = c("<0.5", "0.5–1", "1–2", "2–5", "5+"),
  right = FALSE, ordered_result = TRUE
)

pal_smr <- c("#edf8fb", "#b2e2e2", "#66c2a4", "#2ca25f", "#006d2c")
border_col <- "#f7f7f7"

ggplot(map_df) +
  geom_sf(aes(fill = SMR_cat), linewidth = 0.15, color = border_col) +
  scale_fill_manual(values = pal_smr, name = "SMR") +
  ggtitle("Standardized Morbidity Ratio (SMR) by Microregion") +
  theme_void(base_size = 12) +
  theme(legend.position = "bottom")

9 Non-stationarity Diagnostics (fbesag vs. stationary)

# Differences in spatial field summaries
map_df$diff_mean <- model_fbesag$summary.random$S1$mean -
                    model_naive$summary.random$S1$mean

map_df$diff_q975 <- model_fbesag$summary.random$S1$`0.975quant` -
                    model_naive$summary.random$S1$`0.975quant`

map_df$diff_q025 <- model_fbesag$summary.random$S1$`0.025quant` -
                    model_naive$summary.random$S1$`0.025quant`

ga_blue <- "#49B8F1"
border_col <- "#f7f7f7"

p_mean <- ggplot(map_df) +
  geom_sf(aes(fill = diff_mean), linewidth = 0.15, color = border_col) +
  scale_fill_gradient2(low = ga_blue, mid = "white", high = "red", midpoint = 0, name = "\u0394 mean") +
  ggtitle("Difference in Posterior Mean of Spatial Effect (fbesag − stationary)") +
  theme_void(base_size = 12)

p_q975 <- ggplot(map_df) +
  geom_sf(aes(fill = diff_q975), linewidth = 0.15, color = border_col) +
  scale_fill_gradient2(low = ga_blue, mid = "white", high = "red", midpoint = 0, name = "\u0394 97.5%") +
  ggtitle("Difference in 97.5th Percentile of Spatial Effect") +
  theme_void(base_size = 12)

p_q025 <- ggplot(map_df) +
  geom_sf(aes(fill = diff_q025), linewidth = 0.15, color = border_col) +
  scale_fill_gradient2(low = ga_blue, mid = "white", high = "red", midpoint = 0, name = "\u0394 2.5%") +
  ggtitle("Difference in 2.5th Percentile of Spatial Effect") +
  theme_void(base_size = 12)

p_mean

p_q975

p_q025

10 Regional Summary (Optional)

ch <- id_regions[1:558]
abs_mean_diff_by_region <- vapply(sort(unique(ch)), function(k) {
  mean(abs(map_df$diff_mean[ch == k]))
}, numeric(1))

tibble::tibble(
  Region = sort(unique(ch)),
  `Mean |Δ mean|` = abs_mean_diff_by_region
) %>%
  knitr::kable(digits = 4, caption = "Average absolute change in posterior mean (by sub-region).")

Average absolute change in posterior mean (by sub-region).
Region	Mean \|Δ mean\|
1	0.0069
2	0.0046
3	0.0154
4	0.0389
5	0.0040

11 Interpretation

If the flexible model reduces DIC/GCPO/MAE and increases logML, and the difference maps display structured changes, the stationarity assumption is likely violated.
Where differences are negligible, the PC prior contracts the flexible model toward the stationary baseline—fbesag can be used as a safe diagnostic.
Sensitivity can be assessed via sd_gamma (e.g., 0.05 for stronger contraction, 0.3 for greater heterogeneity).

References

Abdul-Fattah, Esmail, Elias Krainski, Janet van Niekerk, and Håvard Rue. 2024. “Non-Stationary Bayesian Spatial Model for Disease Mapping Based on Sub-Regions.” Statistical Methods in Medical Research 33 (6): 1093–1111. https://doi.org/10.1177/09622802241244613.