\[ %% % Add your macros here; they'll be included in pdf and html output. %% \newcommand{\R}{\mathbb{R}} % reals \newcommand{\E}{\mathbb{E}} % expectation \renewcommand{\P}{\mathbb{P}} % probability \DeclareMathOperator{\logit}{logit} \DeclareMathOperator{\logistic}{logistic} \DeclareMathOperator{\SE}{SE} \DeclareMathOperator{\sd}{sd} \DeclareMathOperator{\var}{var} \DeclareMathOperator{\cov}{cov} \DeclareMathOperator{\cor}{cor} \DeclareMathOperator{\Normal}{Normal} \DeclareMathOperator{\MVN}{MVN} \DeclareMathOperator{\LogNormal}{logNormal} \DeclareMathOperator{\Poisson}{Poisson} \DeclareMathOperator{\Beta}{Beta} \DeclareMathOperator{\Binom}{Binomial} \DeclareMathOperator{\Gam}{Gamma} \DeclareMathOperator{\Exp}{Exponential} \DeclareMathOperator{\Cauchy}{Cauchy} \DeclareMathOperator{\Unif}{Unif} \DeclareMathOperator{\Dirichlet}{Dirichlet} \DeclareMathOperator{\Wishart}{Wishart} \DeclareMathOperator{\StudentsT}{StudentsT} \DeclareMathOperator{\Weibull}{Weibull} \newcommand{\given}{\;\vert\;} \]

Spatial density estimation

Peter Ralph

8 March 2021 – Advanced Biological Statistics

Biketown

layout(t(1:2))
setup_map(main='starts'); points(bt_start, pch=20, cex=0.5, col='red')
setup_map(main='ends'); points(bt_end, pch=20, cex=0.5, col='blue')

plot of chunk r plot_pdx

Spatial density estimation

Kernel density estimation

::: {.columns} ::::::: {.column width=60%}

Suppose we have a bunch of points

\[ (x_1, y_1), \ldots, (x_n, y_n) \]

with density \(f(x,y)\), i.e.,

\[ f(x,y) \approx \frac{\#(\text{of points within $r$ of $(x,y)$})}{\pi r^2} \]

Using

\[ \rho_\sigma(x,y) = \frac{1}{2\pi\sigma^2} e^{-\frac{x^2+y^2}{2\sigma^2}} \]

we can estimate \(f(x,y)\) by

\[ \hat f(x,y) = \sum_{i=1}^n \rho_\sigma(x_i-x, y_i-y) . \]

The two choices of kernel density estimation

  1. The kernel, \(\rho()\).

    (we used a Gaussian)

  2. The bandwidth, \(\sigma\).

Your turn

  1. Simulate 200 spatial locations with a density having two “bumps”. Plot these points. (rnorm(n, mean=-3), rnorm(n, mean=+3))

  2. Make a \(20 \times 20\) grid of “reference” locations. (expand.grid( ))

  3. Compute the kernel density estimate for each location, with \(\sigma\) chosen appropriately, and plot it. (dnorm( ))

Bike trip density estimation

bkde2D

library(KernSmooth)
grid_n <- 301
start_smooth <- bkde2D(bt_start@coords, bandwidth=0.004, gridsize=c(grid_n, grid_n))
str(start_smooth)
## List of 3
##  $ x1  : num [1:301] -123 -123 -123 -123 -123 ...
##  $ x2  : num [1:301] 45.5 45.5 45.5 45.5 45.5 ...
##  $ fhat: num [1:301, 1:301] 0.00 0.00 1.08e-14 4.68e-16 1.78e-14 ...

Plotting surfaces, method 1

image( ) and contour( )

image(x=start_smooth$x1, y=start_smooth$x2, z=matrix(start_smooth$fhat, nrow=grid_n, ncol=grid_n), xlab='eastings', ylab='northings')
contour(x=start_smooth$x1, y=start_smooth$x2, z=matrix(start_smooth$fhat, nrow=grid_n, ncol=grid_n), add=TRUE)

plot of chunk r plot_surface1x

Plotting surfaces, method 2

library(raster)
smooth_df <- as.data.frame(expand.grid(x=start_smooth$x1, y=start_smooth$x2))
smooth_df$z <- as.vector(start_smooth$fhat)
raster_smooth <- rasterFromXYZ(smooth_df)

plot(raster_smooth)
setup_map(add=TRUE)
contour(raster_smooth, add=TRUE)

plot of chunk r plot_surface2x

Choosing a bandwidth

The bandwidth is important

plot of chunk r kernsmooth

How to pick a bandwidth?

There are various automatic methods.

But… crossvalidation!

For each bandwidth:

  1. Fit using most of the data,
  2. and predict the remaining data.
  3. Do this a bunch and return an estimate of goodness-of-fit.

… but wait, what’s this mean, here?

Revised:

For each bandwidth:

  1. Fit the smooth using most of the data,
  2. predict the density at the locations of the remaining data,
  3. and return the mean log density at “true” points.

Why log?:

If \(f\) and \(g\) are probability distributions, and \(x_1, \ldots, x_n\) are drawn from \(f\), then \[ L(g) = \sum_{i=1}^n \log g(x_i) \lessapprox L(f) , \] i.e., is maximized for \(g=f\). (see: cross entropy)

On example data

n <- 400
xy <- cbind(rnorm(n, mean=rep(c(-3,3), times=c(n/2,n/2))), rnorm(n))
xygrid <- expand.grid(x=seq(-5,5,length.out=40),
                      y=seq(-5,5,length.out=40))
xy_fhat <- rep(NA, nrow(xygrid))
sigma <- 0.25
for (k in 1:nrow(xygrid)) {
    xy_fhat[k] <- sum( exp(-( (xy[,1] - xygrid[k,1])^2 + (xy[,2] - xygrid[k,2])^2 )/(2*sigma^2) ) / (2 * pi * sigma^2 ) )
}
plot(xy, asp=1, pch=20)
points(xygrid[,1], xygrid[,2], cex=xy_fhat/6)

plot of chunk r do_it

Prediction, by linear interpolation

library(fields)
the_xval <- function (bw, points, ntest, grid_n=301) {
    use_these <- sample(rep(c(TRUE, FALSE), times=c(nrow(xy)-ntest, ntest)))
    smooth <- bkde2D(xy[use_these,], bandwidth=bw, gridsize=c(grid_n, grid_n))
    names(smooth) <- c("x", "y", "z")
    pred <- interp.surface(smooth, loc=xy[!use_these,])
    return(mean(log(pred), na.rm=TRUE))
}

the_scores <- data.frame(bw = rep(seq(0.1, 1.0, length.out=12), each=20),
                          xent = NA)
for (k in 1:nrow(the_scores)) {
    pred <- the_xval(bw=the_scores$bw[k], points=bt_start, ntest=100)
    the_scores$xent[k] <- pred
}

The results

plot(xent ~ bw, data=the_scores)
lines(lowess(the_scores$bw, the_scores$xent), lwd=2, col='red')

plot of chunk r plot_xvalA

Prediction, on bicycle data

bw <- 0.004
xy <- bt_start@coords
ntest <- 100
use_these <- sample(rep(c(TRUE, FALSE), times=c(nrow(xy)-ntest, ntest)))
smooth <- bkde2D(xy[use_these,], bandwidth=bw, gridsize=c(grid_n, grid_n))
names(smooth) <- c("x", "y", "z")
pred <- fields::interp.surface(smooth, loc=xy[!use_these,])
hist(pred)

plot of chunk r one_xval

density_xval <- function (bw, points, ntest, grid_n=301) {
    xy <- points@coords
    use_these <- sample(rep(c(TRUE, FALSE), times=c(nrow(xy)-ntest, ntest)))
    smooth <- bkde2D(xy[use_these,], bandwidth=bw, gridsize=c(grid_n, grid_n))
    names(smooth) <- c("x", "y", "z")
    pred <- interp.surface(smooth, loc=xy[!use_these,])
    return(mean(log(pred), na.rm=TRUE))
}

xval_scores <- data.frame(bw = rep(bw_list[1:5], each=20),
                          xent = NA)
for (k in 1:nrow(xval_scores)) {
    pred <- density_xval(bw=xval_scores$bw[k], points=bt_start, ntest=100)
    xval_scores$xent[k] <- pred
}

plot(xent ~ bw, data=xval_scores)

plot of chunk r do_xval

Bicycle migration

plot of chunk r plot_surface2y

Start minus end

plot of chunk r plot_diff

portland map with terrain

Kernel smoothing

What is mean local trip length?

bt$Duration <- as.numeric(as.difftime(bt$Duration, format="%X"))
hist(as.numeric(bt$Duration), breaks=200, xlab='ride durations in minutes')

plot of chunk r duration

Our old friend, loess

lo_dur <- loess(Duration ~ Start_Latitude * Start_Longitude, data=bt, span=0.1)
pred_lats <- seq(min(bt$Start_Latitude), max(bt$Start_Latitude), length.out=101)
pred_lons <- seq(min(bt$Start_Longitude), max(bt$Start_Longitude), length.out=101)
pred_locs <- expand.grid(Start_Latitude=pred_lats,
                         Start_Longitude=pred_lons)
pred_duration <- predict(lo_dur, newdata=pred_locs)

… loess!?!?!

image(pred_lons, pred_lats, pred_duration)
contour(pred_lons, pred_lats, pred_duration, add=TRUE)

plot of chunk r plot_loess

  1. Why?

  2. Suggest another method.

Another method: Kriging

krig_dur <- fields::Krig(x=cbind(bt$Start_Latitude, bt$Start_Longitude), Y=bt$Duration)
pred_krig <- matrix(predict(krig_dur, x=pred_locs), nrow=length(pred_lats))
image(pred_lons, pred_lats, pred_krig)
contour(pred_lons, pred_lats, pred_krig, add=TRUE)

plot of chunk r kriging

// reveal.js plugins