Learn-skills.dev r-bayes
Patterns for Bayesian inference in R using brms, including multilevel models, DAG validation, and marginal effects. Use when performing Bayesian analysis.
install
source · Clone the upstream repo
git clone https://github.com/NeverSight/learn-skills.dev
Claude Code · Install into ~/.claude/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/NeverSight/learn-skills.dev "$T" && mkdir -p ~/.claude/skills && cp -r "$T/data/skills-md/ab604/claude-code-r-skills/r-bayes" ~/.claude/skills/neversight-learn-skills-dev-r-bayes && rm -rf "$T"
manifest:
data/skills-md/ab604/claude-code-r-skills/r-bayes/SKILL.mdsource content
Core Packages
library(brms) library(cmdstanr) library(dagitty) library(ggdag) library(marginaleffects) library(tidybayes) library(bayesplot)
Directed Acyclic Graphs (DAGs)
Prior to causal inference, create and validate DAGs with dagitty and ggdag.
Define DAG Structure
dag <- dagitty(' dag { # Node positions for visualization exposure [pos="0,1"] mediator [pos="1,1"] outcome [pos="2,1"] confounder [pos="1,0"] # Edges (arrows) confounder -> exposure confounder -> outcome exposure -> mediator mediator -> outcome exposure -> outcome } ')
Identify Adjustment Sets
# For direct effect adjustmentSets(dag, exposure = "treatment", outcome = "outcome", effect = "direct") # For total effect adjustmentSets(dag, exposure = "treatment", outcome = "outcome", effect = "total")
Validate DAG Against Data
# Get implied conditional independencies implied_cis <- impliedConditionalIndependencies(dag) # Test against data ci_results <- localTests(dag, data = analysis_data, type = "cis") # Assess validation ci_df <- as.data.frame(ci_results) ci_df$independent <- ci_df$p.value > 0.05 pct_supported <- 100 * mean(ci_df$independent, na.rm = TRUE) cat(sprintf("DAG support: %.1f%% of implied CIs hold\n", pct_supported))
Visualize DAG
dag_tidy <- tidy_dagitty(dag) ggplot(dag_tidy, aes(x = x, y = y, xend = xend, yend = yend)) + geom_dag_edges(edge_colour = "grey50") + geom_dag_point(size = 20) + geom_dag_text(size = 3.5, color = "black") + theme_dag() + labs(title = "Causal DAG")
Bayesian Regression with brms
Standard Configuration
options(mc.cores = 4) # Standard brms model call model <- brm( formula = outcome ~ predictor1 + predictor2 + (1 | group_id), data = model_data, family = bernoulli(link = "logit"), # For binary outcomes prior = priors, sample_prior = "yes", # For prior-posterior comparison chains = 4, cores = 4, iter = 4000, warmup = 1000, control = list( adapt_delta = 0.95, max_treedepth = 15 ), seed = 123, # Set seed for reproducibility backend = "cmdstanr", file = "models/model_name", # Cache compiled model file_refit = "on_change" # Only refit if formula/data change )
Priors
Store priors separately and define explicitly:
priors <- c( prior(normal(0, 2), class = "Intercept"), prior(normal(0, 1), class = "b"), # Fixed effects prior(exponential(1), class = "sd"), # Random effect SD prior(lkj(2), class = "cor") # Correlation priors ) # Get default priors for a formula get_prior(outcome ~ predictor + (1 | id), data = data, family = bernoulli())
Common Families
# Binary outcome family = bernoulli(link = "logit") # Count data family = poisson(link = "log") family = negbinomial(link = "log") # Continuous family = gaussian() family = student() # Robust to outliers # Ordinal family = cumulative(link = "logit")
Multilevel Models
Random Intercepts
# Random intercept per participant outcome ~ predictors + (1 | participant_id)
Random Slopes
# Random intercept and slope for time outcome ~ time + predictors + (1 + time | participant_id)
Crossed Random Effects
# Participants nested in groups, items crossed response ~ predictors + (1 | participant_id) + (1 | item_id)
Within-Person Centering
For longitudinal data, separate between-person and within-person effects:
# Create person-centered variables model_data <- data |> group_by(participant_id) |> mutate( # Between-person means (stable trait) predictor_mean = mean(predictor, na.rm = TRUE), # Within-person deviations (dynamic change) predictor_dev = predictor - predictor_mean, # Volatility (person-level SD) predictor_sd = sd(predictor, na.rm = TRUE) ) |> ungroup() |> # Standardize mutate( predictor_mean_z = scale(predictor_mean)[, 1], predictor_dev_z = scale(predictor_dev)[, 1] ) # Model with both components model <- brm( outcome ~ predictor_mean_z + predictor_dev_z + (1 | participant_id), data = model_data, family = bernoulli() )
Lagged Predictors for Temporal Precedence
# Create lagged predictors within person model_data <- data |> group_by(participant_id) |> arrange(time) |> mutate( # Lagged values (from previous timepoint) predictor_lag = lag(predictor, order_by = time), predictor_dev_lag = lag(predictor_dev, order_by = time) ) |> ungroup() # Test if t-1 predicts outcome at t (establishes temporal precedence) model_lagged <- brm( outcome ~ predictor_dev_lag_z + predictor_mean_z + (1 | participant_id), ... )
Extracting and Interpreting Results
Extract Posterior Samples
posterior <- as_draws_df(model) # Access specific parameter samples <- posterior$b_predictor_z # Summary statistics tibble( estimate = median(samples), lower_95 = quantile(samples, 0.025), upper_95 = quantile(samples, 0.975), lower_80 = quantile(samples, 0.10), upper_80 = quantile(samples, 0.90), prob_negative = mean(samples < 0), prob_positive = mean(samples > 0) )
Odds Ratios (for logistic models)
# Convert log-odds to odds ratios effects_df <- effects_df |> mutate( OR = exp(estimate), OR_lower = exp(lower_95), OR_upper = exp(upper_95) )
Posterior Probability of Direction
# P(effect is protective) prob_protective <- mean(posterior$b_predictor < 0) # P(effect is harmful) prob_harmful <- mean(posterior$b_predictor > 0) # P(|effect| > some threshold) prob_meaningful <- mean(abs(posterior$b_predictor) > 0.1)
Compare Effect Magnitudes
# Test if within-person effect is larger than between-person diff <- abs(posterior$b_predictor_dev_z) - abs(posterior$b_predictor_mean_z) prob_within_larger <- mean(diff > 0) cat(sprintf("P(|within| > |between|) = %.1f%%\n", 100 * prob_within_larger))
Marginal Effects with marginaleffects
Average Marginal Effects (AME)
# Change in P(outcome) per 1 unit change in predictor ame <- avg_slopes( model, variables = c("predictor1_z", "predictor2_z"), type = "response" # Probability scale ) print(ame)
Predictions at Specific Values
# Predictions at low (-1 SD), mean (0), and high (+1 SD) predictions <- predictions( model, newdata = datagrid( model = model, predictor_z = c(-1, 0, 1) ), type = "response", re_formula = NA # Population-level (ignore random effects) ) as.data.frame(predictions) |> select(predictor_z, estimate, conf.low, conf.high)
Marginal Effect Plots
plot_predictions( model, by = "predictor_z", type = "response", re_formula = NA ) + labs( title = "Effect of Predictor on Outcome", x = "Predictor (standardized)", y = "P(Outcome)" ) + scale_y_continuous(labels = scales::percent) + theme_minimal()
Comparing Slopes Across Models
# Extract AME from multiple models ame_model1 <- avg_slopes(model1, variables = "predictor_z", type = "response") ame_model2 <- avg_slopes(model2, variables = "predictor_z", type = "response") comparison <- bind_rows( as.data.frame(ame_model1) |> mutate(model = "Full"), as.data.frame(ame_model2) |> mutate(model = "Simple") )
Model Diagnostics
Check MCMC Convergence
# Trace plots mcmc_trace(model, pars = c("b_Intercept", "b_predictor_z")) # R-hat (should be < 1.01) summary(model)$fixed$Rhat # Effective sample size (should be > 400) summary(model)$fixed$Bulk_ESS summary(model)$fixed$Tail_ESS
Posterior Predictive Checks
pp_check(model) pp_check(model, type = "stat", stat = "mean") pp_check(model, type = "stat_2d", stat = c("mean", "sd"))
Prior-Posterior Comparison
# Requires sample_prior = "yes" in brm() prior_summary(model) # Plot prior vs posterior mcmc_areas(model, pars = "b_predictor_z", prob = 0.95)
tidybayes for Posterior Manipulation
# Extract draws in tidy format draws <- model |> spread_draws(b_predictor1_z, b_predictor2_z) |> mutate( OR_predictor1 = exp(b_predictor1_z), OR_predictor2 = exp(b_predictor2_z) ) # Summarize draws |> median_qi(OR_predictor1, OR_predictor2, .width = c(0.80, 0.95)) # Visualize draws |> ggplot(aes(x = OR_predictor1)) + stat_halfeye() + geom_vline(xintercept = 1, linetype = "dashed") + labs(x = "Odds Ratio", y = NULL)
Workflow Summary
- Define causal DAG with dagitty
- Validate DAG against data with
localTests() - Identify adjustment sets for target effects
- Specify priors based on domain knowledge
- Fit brms model with random effects for nested data
- Check diagnostics (convergence, PPCs)
- Extract posteriors for inference
- Compute marginal effects on interpretable scale
- Visualize effects with uncertainty
Anti-Patterns to Avoid
# WRONG: Using contemporaneous predictors when temporal order matters outcome_t ~ predictor_t # Shows co-occurrence, not temporal precedence # CORRECT: Use lagged predictors to establish temporal precedence outcome_t ~ predictor_t_minus_1 # WRONG: Ignoring clustering brm(outcome ~ predictor, data = longitudinal_data) # CORRECT: Account for repeated measures brm(outcome ~ predictor + (1 | participant_id), data = longitudinal_data) # WRONG: Interpreting within-person effects from between-person variation # Using person aggregates when you have time-varying data # CORRECT: Person-mean centering to separate effects outcome ~ predictor_mean_z + predictor_dev_z + (1 | id)