We compare a stationary Besag model with the
non-stationary fbesag model for dengue
risk across Brazil’s 558 microregions. The flexible
model estimates region-specific spatial precisions
while maintaining smoothness across shared borders. A joint
penalized-complexity prior ensures contraction to
stationarity when heterogeneity is not supported by the data.
For details of the method and prior, see Abdul-Fattah et al. (2024).
This document provides: - Partition maps (administrative vs. terrestrial biomes). - Fitted stationary and flexible spatial models using INLA. - A model comparison table: DIC, logML, GCPO, MAE. - An SMR choropleth and difference maps for posterior summaries.
Folder assumption. All required data files (including
brazil_558_microregions.RData) are in the same folder as this Rmd.
# Optionally install missing packages (set to TRUE if you need auto-install)
auto_install <- FALSE
need <- c("INLA","fbesag","ggplot2","ggthemes","spdep","hydroGOF","dplyr","tibble","sf","scales","knitr")
to_install <- need[!vapply(need, requireNamespace, FALSE, quietly = TRUE)]
if (length(to_install) > 0 && auto_install) {
install.packages(setdiff(to_install, c("INLA","fbesag")))
if (!requireNamespace("remotes", quietly = TRUE)) install.packages("remotes")
if (!requireNamespace("INLA", quietly = TRUE)) remotes::install_version("INLA", version = "24.08.28")
if (!requireNamespace("fbesag", quietly = TRUE)) remotes::install_github("esmail-abdulfattah/fbesag")
}
suppressPackageStartupMessages({
library(INLA)
library(fbesag)
library(ggplot2)
library(ggthemes)
library(spdep)
library(hydroGOF)
library(dplyr)
library(tibble)
library(sf)
library(scales)
library(knitr)
})# Load the dataset saved in the same folder as this Rmd
load("brazil_558_microregions.RData")
# Basic checks and assumptions:
stopifnot(exists("data"), exists("graph"), exists("map_df"))
stopifnot(nrow(map_df) >= 558) # expect at least one geometry per microregion# Partition toggle: administrative (FALSE) vs. terrestrial biomes (TRUE)
Six_terrestrial_biomes <- FALSE
# Expected counts: set TRUE if E is on the log-scale in your object
E_is_log <- FALSE
# Collapse per-observation vectors to per-area (length = 558)
collapse_to_area <- function(area_index, values, n_areas = 558) {
vapply(seq_len(n_areas), function(i) {
vi <- values[area_index == i]
if (length(vi) == 0) return(NA_integer_)
u <- unique(vi)
u[1]
}, integer(1))
}
# Per-area IDs from S3/S4
id_regions_admin_area <- collapse_to_area(data$S1, data$S3, n_areas = 558)
id_regions_biome_area <- collapse_to_area(data$S1, data$S4, n_areas = 558)
# Original recoding for the biome option
id_regions_biome_area[id_regions_biome_area == 3] <- 2
id_regions_biome_area[id_regions_biome_area == 4] <- 3
id_regions_biome_area[id_regions_biome_area == 5] <- 4
id_regions_biome_area[id_regions_biome_area == 6] <- 5
# Active partition for modeling
id_regions <- if (Six_terrestrial_biomes) id_regions_biome_area else id_regions_admin_area# Attach per-area IDs to the map (assumes row order aligns with area index)
map_df$region_admin <- factor(id_regions_admin_area,
levels = sort(unique(id_regions_admin_area)))
map_df$region_biome <- factor(id_regions_biome_area,
levels = sort(unique(id_regions_biome_area)))
pal_admin <- hue_pal()(length(levels(map_df$region_admin)))
pal_biome <- hue_pal()(length(levels(map_df$region_biome)))
# Subtle borders for legibility
border_col <- "#f7f7f7"
p_admin <- ggplot(map_df) +
geom_sf(aes(fill = region_admin), linewidth = 0.15, color = border_col) +
scale_fill_manual(values = pal_admin, name = "Region") +
ggtitle("Administrative Grouping (S3)") +
theme_void() + theme(legend.position = "bottom")
p_biome <- ggplot(map_df) +
geom_sf(aes(fill = region_biome), linewidth = 0.15, color = border_col) +
scale_fill_manual(values = pal_biome, name = "Biome") +
ggtitle("Terrestrial Biomes (S4)") +
theme_void() + theme(legend.position = "bottom")
if (Six_terrestrial_biomes) p_biome else p_admin# Scale the intrinsic graph for a sum-to-zero constraint
constr.inter <- list(A = matrix(1, 1, dim(graph)[1]), e = rep(0, 1))
scaled_graph <- as.matrix(INLA:::inla.scale.model(graph, constr.inter))
# PC prior for the stationary Besag precision
precision.prior <- list(prec = list(prior = "pc.prec", param = c(0.5, 0.01)))
# Flexible model object (region-specific precisions)
fbesag.model <- fbesag::get_fbesag(
graph,
id = id_regions[1:558],
sd_gamma = 0.15,
param = list(p1 = 1, p2 = 1e-5)
)## [1] "/home/abdulfe/R/x86_64-pc-linux-gnu-library/4.5/fbesag"
# Indices for INLA
data$c_S1 <- data$S1
data$c_T1 <- data$T1
# Expected counts for offset
E_counts <- if (E_is_log) exp(data$E) else data$E
# Stationary Besag (intrinsic via generic0)
baseformula <- Y ~ 1 +
f(S1, model = "generic0",
Cmatrix = scaled_graph,
constr = TRUE, rankdef = 1,
hyper = precision.prior) +
f(T1, model = "rw1",
constr = TRUE, scale.model = TRUE,
hyper = list(prec = list(prior = "pc.prec", param = c(0.5, 0.01))))
# Flexible Besag
formula <- Y ~ 1 +
f(S1, model = fbesag.model, constr = TRUE, rankdef = 1) +
f(T1, model = "rw1",
constr = TRUE, scale.model = TRUE,
hyper = precision.prior)config_flag <- FALSE
model_naive <- inla(
formula = baseformula, data = data, family = "poisson", offset = log(E_counts),
control.inla = list(strategy = "gaussian", int.strategy = "eb"),
control.compute = list(dic = TRUE, config = config_flag,
cpo = TRUE, return.marginals = FALSE,
control.gcpo = list(enable = TRUE, num.level.sets = 2)),
control.fixed = list(correlation.matrix = TRUE, prec.intercept = 1, prec = 1),
control.predictor = list(link = 1, compute = TRUE),
verbose = TRUE
)
model_fbesag <- inla(
formula = formula, data = data, family = "poisson", offset = log(E_counts),
control.inla = list(strategy = "gaussian", int.strategy = "eb"),
control.compute = list(dic = TRUE, config = config_flag,
cpo = TRUE, return.marginals = FALSE,
control.gcpo = list(enable = TRUE, num.level.sets = 2)),
control.fixed = list(correlation.matrix = TRUE, prec.intercept = 1, prec = 1),
control.predictor = list(link = 1, compute = TRUE),
verbose = TRUE
)mean_neglog <- function(v) {
v <- v[is.finite(v) & v > 0]
if (length(v) == 0) return(NA_real_)
-mean(log(v))
}
gcpo_naive <- if (!is.null(model_naive$gcpo)) mean_neglog(model_naive$gcpo$gcpo) else mean_neglog(model_naive$cpo$cpo)
gcpo_fbesag <- if (!is.null(model_fbesag$gcpo)) mean_neglog(model_fbesag$gcpo$gcpo) else mean_neglog(model_fbesag$cpo$cpo)
mae_naive <- hydroGOF::mae(model_naive$summary.fitted.values$`0.5quant`, data$Y, na.rm = TRUE)
mae_fbesag <- hydroGOF::mae(model_fbesag$summary.fitted.values$`0.5quant`, data$Y, na.rm = TRUE)
res <- tibble::tibble(
Model = c("Stationary Besag", "Flexible Besag"),
DIC = c(round(model_naive$dic$dic, 0), round(model_fbesag$dic$dic, 0)),
logML = c(as.numeric(model_naive$mlik[1]), as.numeric(model_fbesag$mlik[1])),
GCPO = c(gcpo_naive, gcpo_fbesag),
MAE = c(mae_naive, mae_fbesag)
)
res$`Better than stationary?` <- c(NA,
(res$DIC[2] < res$DIC[1]) &
(res$logML[2] > res$logML[1]) &
(res$GCPO[2] < res$GCPO[1]) &
(res$MAE[2] < res$MAE[1])
)
knitr::kable(res, digits = 3, align = "lrrrrc",
caption = "Lower DIC/GCPO/MAE and higher logML indicate improvement.")| Model | DIC | logML | GCPO | MAE | Better than stationary? |
|---|---|---|---|---|---|
| Stationary Besag | -61840 | -118863.2 | 16.551 | 37.079 | NA |
| Flexible Besag | -61912 | -118609.0 | 16.550 | 37.078 | TRUE |
# Average SMR per microregion
SMR <- vapply(1:558, function(i) {
yy <- data$Y[data$S1 == i]
ee <- E_counts[data$S1 == i]
mean(yy / ee)
}, numeric(1))
map_df$SMR_cat <- cut(
SMR, breaks = c(-Inf, 0.5, 1, 2, 5, Inf),
labels = c("<0.5", "0.5–1", "1–2", "2–5", "5+"),
right = FALSE, ordered_result = TRUE
)
pal_smr <- c("#edf8fb", "#b2e2e2", "#66c2a4", "#2ca25f", "#006d2c")
border_col <- "#f7f7f7"
ggplot(map_df) +
geom_sf(aes(fill = SMR_cat), linewidth = 0.15, color = border_col) +
scale_fill_manual(values = pal_smr, name = "SMR") +
ggtitle("Standardized Morbidity Ratio (SMR) by Microregion") +
theme_void(base_size = 12) +
theme(legend.position = "bottom")# Differences in spatial field summaries
map_df$diff_mean <- model_fbesag$summary.random$S1$mean -
model_naive$summary.random$S1$mean
map_df$diff_q975 <- model_fbesag$summary.random$S1$`0.975quant` -
model_naive$summary.random$S1$`0.975quant`
map_df$diff_q025 <- model_fbesag$summary.random$S1$`0.025quant` -
model_naive$summary.random$S1$`0.025quant`
ga_blue <- "#49B8F1"
border_col <- "#f7f7f7"
p_mean <- ggplot(map_df) +
geom_sf(aes(fill = diff_mean), linewidth = 0.15, color = border_col) +
scale_fill_gradient2(low = ga_blue, mid = "white", high = "red", midpoint = 0, name = "\u0394 mean") +
ggtitle("Difference in Posterior Mean of Spatial Effect (fbesag − stationary)") +
theme_void(base_size = 12)
p_q975 <- ggplot(map_df) +
geom_sf(aes(fill = diff_q975), linewidth = 0.15, color = border_col) +
scale_fill_gradient2(low = ga_blue, mid = "white", high = "red", midpoint = 0, name = "\u0394 97.5%") +
ggtitle("Difference in 97.5th Percentile of Spatial Effect") +
theme_void(base_size = 12)
p_q025 <- ggplot(map_df) +
geom_sf(aes(fill = diff_q025), linewidth = 0.15, color = border_col) +
scale_fill_gradient2(low = ga_blue, mid = "white", high = "red", midpoint = 0, name = "\u0394 2.5%") +
ggtitle("Difference in 2.5th Percentile of Spatial Effect") +
theme_void(base_size = 12)
p_meanch <- id_regions[1:558]
abs_mean_diff_by_region <- vapply(sort(unique(ch)), function(k) {
mean(abs(map_df$diff_mean[ch == k]))
}, numeric(1))
tibble::tibble(
Region = sort(unique(ch)),
`Mean |Δ mean|` = abs_mean_diff_by_region
) %>%
knitr::kable(digits = 4, caption = "Average absolute change in posterior mean (by sub-region).")| Region | Mean |Δ mean| |
|---|---|
| 1 | 0.0069 |
| 2 | 0.0046 |
| 3 | 0.0154 |
| 4 | 0.0389 |
| 5 | 0.0040 |
fbesag can be
used as a safe diagnostic.sd_gamma (e.g., 0.05
for stronger contraction, 0.3 for greater heterogeneity).