Peter Ralph
Advanced Biological Statistics
stan_model( )
).sampling( )
).lm( )
, Stan-styleSimulate:
xy <- data.frame(x = rnorm(20))
xy$y <- 1.2 + 0.5 * xy$x + rnorm(nrow(xy), sd=0.3)
lm(y ~ x, data=xy) %>% summary()
##
## Call:
## lm(formula = y ~ x, data = xy)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.36631 -0.14092 0.00429 0.14046 0.42923
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.16549 0.05928 19.661 1.29e-13 ***
## x 0.50466 0.06705 7.526 5.78e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2537 on 18 degrees of freedom
## Multiple R-squared: 0.7589, Adjusted R-squared: 0.7455
## F-statistic: 56.65 on 1 and 18 DF, p-value: 5.78e-07
The model: \[\begin{aligned} Y_i &\sim \Normal(\beta_0 + \beta_x \times X_i, \text{sd}=\sigma) \end{aligned}\]
The parameter values: \[\begin{aligned} 1 &\le i \le n \\ \beta_0 &= 1.2 \\ \beta_x &= 0.5 \\ \sigma &= 0.3 \end{aligned}\]
data {
// stuff you input
}
transformed data {
// stuff that's calculated from the data (just once, at the start)
}
parameters {
// stuff you want to learn about
}
transformed parameters {
// stuff that's calculated from the parameters (at every step)
}
model {
// the action!
}
generated quantities {
// stuff you want computed also along the way
}
data {
// stuff you input
}
parameters {
// stuff you want to learn about
}
model {
// the action!
}
The model: \[\begin{aligned} Y_i &\sim \Normal(\beta_0 + \beta_x \times X_i, \text{sd}=\sigma) \end{aligned}\]
data {
int n;
vector[n] x;
vector[n] y;
}
parameters {
// stuff you want to learn about
}
model {
// the action!
}
The model: \[\begin{aligned} Y_i &\sim \Normal(\beta_0 + \beta_x \times X_i, \text{sd}=\sigma) \end{aligned}\]
data {
int n;
vector[n] x;
vector[n] y;
}
parameters {
real beta_0; // intercept
real beta_x; // slope
real<lower=0> sigma; // residual SD
}
model {
// the action!
}
The model: \[\begin{aligned} Y_i &\sim \Normal(\beta_0 + \beta_x \times X_i, \text{sd}=\sigma) \end{aligned}\]
data {
int n;
vector[n] x;
vector[n] y;
}
parameters {
real beta_0; // intercept
real beta_x; // slope
real<lower=0> sigma; // residual SD
}
model {
y ~ normal(beta_0 + beta_x * x, sigma);
}
The model: \[\begin{aligned} Y_i &\sim \Normal(\beta_0 + \beta_x \times X_i, \text{sd}=\sigma) \end{aligned}\]
data {
int n;
vector[n] x;
vector[n] y;
}
parameters {
real beta_0; // intercept
real beta_x; // slope
real<lower=0> sigma; // residual SD
}
model {
y ~ normal(beta_0 + beta_x * x, sigma);
beta_0 ~ normal(0, 5);
beta_1 ~ normal(0, 5);
sigma ~ normal(0, 5);
}
The model, with priors: \[\begin{aligned} Y_i &\sim \Normal(\beta_0 + \beta_x \times X_i, \text{sd}=\sigma) \\ \beta_0 &\sim \Normal(0, 5) \\ \beta_1 &\sim \Normal(0, 5) \\ \sigma &\sim \Normal(0, 5) \end{aligned}\]
stanlm_code <- "
data {
int n;
vector[n] x;
vector[n] y;
}
parameters {
real beta_0; // intercept
real beta_x; // slope
real<lower=0> sigma; // residual SD
}
model {
y ~ normal(beta_0 + beta_x * x, sigma);
beta_0 ~ normal(0, 5);
beta_x ~ normal(0, 5);
sigma ~ normal(0, 5);
}
"
stanlm_mod <- stan_model(model_code=stanlm_code)
##
## SAMPLING FOR MODEL 'cbd4ba6f8bbc5029399a1ce10525d05d' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 1.3e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.13 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 1000 [ 0%] (Warmup)
## Chain 1: Iteration: 100 / 1000 [ 10%] (Warmup)
## Chain 1: Iteration: 200 / 1000 [ 20%] (Warmup)
## Chain 1: Iteration: 300 / 1000 [ 30%] (Warmup)
## Chain 1: Iteration: 400 / 1000 [ 40%] (Warmup)
## Chain 1: Iteration: 500 / 1000 [ 50%] (Warmup)
## Chain 1: Iteration: 501 / 1000 [ 50%] (Sampling)
## Chain 1: Iteration: 600 / 1000 [ 60%] (Sampling)
## Chain 1: Iteration: 700 / 1000 [ 70%] (Sampling)
## Chain 1: Iteration: 800 / 1000 [ 80%] (Sampling)
## Chain 1: Iteration: 900 / 1000 [ 90%] (Sampling)
## Chain 1: Iteration: 1000 / 1000 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 0.014818 seconds (Warm-up)
## Chain 1: 0.012961 seconds (Sampling)
## Chain 1: 0.027779 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL 'cbd4ba6f8bbc5029399a1ce10525d05d' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 1e-05 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.1 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 1000 [ 0%] (Warmup)
## Chain 2: Iteration: 100 / 1000 [ 10%] (Warmup)
## Chain 2: Iteration: 200 / 1000 [ 20%] (Warmup)
## Chain 2: Iteration: 300 / 1000 [ 30%] (Warmup)
## Chain 2: Iteration: 400 / 1000 [ 40%] (Warmup)
## Chain 2: Iteration: 500 / 1000 [ 50%] (Warmup)
## Chain 2: Iteration: 501 / 1000 [ 50%] (Sampling)
## Chain 2: Iteration: 600 / 1000 [ 60%] (Sampling)
## Chain 2: Iteration: 700 / 1000 [ 70%] (Sampling)
## Chain 2: Iteration: 800 / 1000 [ 80%] (Sampling)
## Chain 2: Iteration: 900 / 1000 [ 90%] (Sampling)
## Chain 2: Iteration: 1000 / 1000 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 0.014052 seconds (Warm-up)
## Chain 2: 0.012467 seconds (Sampling)
## Chain 2: 0.026519 seconds (Total)
## Chain 2:
##
## SAMPLING FOR MODEL 'cbd4ba6f8bbc5029399a1ce10525d05d' NOW (CHAIN 3).
## Chain 3:
## Chain 3: Gradient evaluation took 7e-06 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.07 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3:
## Chain 3:
## Chain 3: Iteration: 1 / 1000 [ 0%] (Warmup)
## Chain 3: Iteration: 100 / 1000 [ 10%] (Warmup)
## Chain 3: Iteration: 200 / 1000 [ 20%] (Warmup)
## Chain 3: Iteration: 300 / 1000 [ 30%] (Warmup)
## Chain 3: Iteration: 400 / 1000 [ 40%] (Warmup)
## Chain 3: Iteration: 500 / 1000 [ 50%] (Warmup)
## Chain 3: Iteration: 501 / 1000 [ 50%] (Sampling)
## Chain 3: Iteration: 600 / 1000 [ 60%] (Sampling)
## Chain 3: Iteration: 700 / 1000 [ 70%] (Sampling)
## Chain 3: Iteration: 800 / 1000 [ 80%] (Sampling)
## Chain 3: Iteration: 900 / 1000 [ 90%] (Sampling)
## Chain 3: Iteration: 1000 / 1000 [100%] (Sampling)
## Chain 3:
## Chain 3: Elapsed Time: 0.013271 seconds (Warm-up)
## Chain 3: 0.011769 seconds (Sampling)
## Chain 3: 0.02504 seconds (Total)
## Chain 3:
##
## SAMPLING FOR MODEL 'cbd4ba6f8bbc5029399a1ce10525d05d' NOW (CHAIN 4).
## Chain 4:
## Chain 4: Gradient evaluation took 6e-06 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.06 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4:
## Chain 4:
## Chain 4: Iteration: 1 / 1000 [ 0%] (Warmup)
## Chain 4: Iteration: 100 / 1000 [ 10%] (Warmup)
## Chain 4: Iteration: 200 / 1000 [ 20%] (Warmup)
## Chain 4: Iteration: 300 / 1000 [ 30%] (Warmup)
## Chain 4: Iteration: 400 / 1000 [ 40%] (Warmup)
## Chain 4: Iteration: 500 / 1000 [ 50%] (Warmup)
## Chain 4: Iteration: 501 / 1000 [ 50%] (Sampling)
## Chain 4: Iteration: 600 / 1000 [ 60%] (Sampling)
## Chain 4: Iteration: 700 / 1000 [ 70%] (Sampling)
## Chain 4: Iteration: 800 / 1000 [ 80%] (Sampling)
## Chain 4: Iteration: 900 / 1000 [ 90%] (Sampling)
## Chain 4: Iteration: 1000 / 1000 [100%] (Sampling)
## Chain 4:
## Chain 4: Elapsed Time: 0.015976 seconds (Warm-up)
## Chain 4: 0.012195 seconds (Sampling)
## Chain 4: 0.028171 seconds (Total)
## Chain 4:
## Inference for Stan model: cbd4ba6f8bbc5029399a1ce10525d05d.
## 4 chains, each with iter=1000; warmup=500; thin=1;
## post-warmup draws per chain=500, total post-warmup draws=2000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## beta_0 1.17 0.00 0.06 1.05 1.13 1.17 1.21 1.29 1440 1
## beta_x 0.50 0.00 0.07 0.36 0.46 0.50 0.55 0.65 1334 1
## sigma 0.27 0.00 0.05 0.19 0.24 0.27 0.30 0.39 1199 1
## lp__ 15.48 0.04 1.29 12.18 14.93 15.81 16.43 16.93 828 1
##
## Samples were drawn using NUTS(diag_e) at Mon Jan 17 05:46:27 2022.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
Metabolic rate (\(Y\)) depends on sunlight (\(X\)), but measurement noise depends on temperature (\(T\)):
\[\begin{aligned} Y_i &\sim \Normal(\beta_0 + \beta_x \times X_i, \text{sd}=\sigma_i) \\ \sigma_i &\sim \Normal_+(0, \text{sd}=\gamma T_i) \end{aligned}\]
“Equivalently”:
## Compiling Stan program...
## Start sampling
##
## SAMPLING FOR MODEL '2d6f0d660e749a9a3fb3521f61fac781' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 1e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.1 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 0.022363 seconds (Warm-up)
## Chain 1: 0.019917 seconds (Sampling)
## Chain 1: 0.04228 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL '2d6f0d660e749a9a3fb3521f61fac781' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 9e-06 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.09 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 0.026135 seconds (Warm-up)
## Chain 2: 0.03037 seconds (Sampling)
## Chain 2: 0.056505 seconds (Total)
## Chain 2:
##
## SAMPLING FOR MODEL '2d6f0d660e749a9a3fb3521f61fac781' NOW (CHAIN 3).
## Chain 3:
## Chain 3: Gradient evaluation took 9e-06 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.09 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3:
## Chain 3:
## Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 3:
## Chain 3: Elapsed Time: 0.024293 seconds (Warm-up)
## Chain 3: 0.018528 seconds (Sampling)
## Chain 3: 0.042821 seconds (Total)
## Chain 3:
##
## SAMPLING FOR MODEL '2d6f0d660e749a9a3fb3521f61fac781' NOW (CHAIN 4).
## Chain 4:
## Chain 4: Gradient evaluation took 8e-06 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.08 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4:
## Chain 4:
## Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 4:
## Chain 4: Elapsed Time: 0.022821 seconds (Warm-up)
## Chain 4: 0.025203 seconds (Sampling)
## Chain 4: 0.048024 seconds (Total)
## Chain 4:
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: y ~ x
## Data: xy (Number of observations: 20)
## Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup draws = 4000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 1.16 0.06 1.04 1.30 1.00 2998 2509
## x 0.50 0.07 0.35 0.66 1.00 3365 2427
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 0.27 0.05 0.19 0.39 1.00 2565 2693
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Under the hood…
## // generated with brms 2.16.3
## functions {
## }
## data {
## int<lower=1> N; // total number of observations
## vector[N] Y; // response variable
## int<lower=1> K; // number of population-level effects
## matrix[N, K] X; // population-level design matrix
## int prior_only; // should the likelihood be ignored?
## }
## transformed data {
## int Kc = K - 1;
## matrix[N, Kc] Xc; // centered version of X without an intercept
## vector[Kc] means_X; // column means of X before centering
## for (i in 2:K) {
## means_X[i - 1] = mean(X[, i]);
## Xc[, i - 1] = X[, i] - means_X[i - 1];
## }
## }
## parameters {
## vector[Kc] b; // population-level effects
## real Intercept; // temporary intercept for centered predictors
## real<lower=0> sigma; // dispersion parameter
## }
## transformed parameters {
## }
## model {
## // likelihood including constants
## if (!prior_only) {
## target += normal_id_glm_lpdf(Y | Xc, Intercept, b, sigma);
## }
## // priors including constants
## target += student_t_lpdf(Intercept | 3, 1.2, 2.5);
## target += student_t_lpdf(sigma | 3, 0, 2.5)
## - 1 * student_t_lccdf(0 | 3, 0, 2.5);
## }
## generated quantities {
## // actual population-level intercept
## real b_Intercept = Intercept - dot_product(means_X, b);
## }