\[ %% % Add your macros here; they'll be included in pdf and html output. %% \newcommand{\R}{\mathbb{R}} % reals \newcommand{\E}{\mathbb{E}} % expectation \renewcommand{\P}{\mathbb{P}} % probability \DeclareMathOperator{\logit}{logit} \DeclareMathOperator{\logistic}{logistic} \DeclareMathOperator{\SE}{SE} \DeclareMathOperator{\sd}{sd} \DeclareMathOperator{\var}{var} \DeclareMathOperator{\cov}{cov} \DeclareMathOperator{\cor}{cor} \DeclareMathOperator{\Normal}{Normal} \DeclareMathOperator{\LogNormal}{logNormal} \DeclareMathOperator{\Poisson}{Poisson} \DeclareMathOperator{\Beta}{Beta} \DeclareMathOperator{\Binom}{Binomial} \DeclareMathOperator{\Gam}{Gamma} \DeclareMathOperator{\Exp}{Exponential} \DeclareMathOperator{\Cauchy}{Cauchy} \DeclareMathOperator{\Unif}{Unif} \DeclareMathOperator{\Dirichlet}{Dirichlet} \DeclareMathOperator{\Wishart}{Wishart} \DeclareMathOperator{\StudentsT}{StudentsT} \DeclareMathOperator{\Weibull}{Weibull} \newcommand{\given}{\;\vert\;} \]

Trends, smoothing, autocorrelation, and cycles

Peter Ralph

14 January 2021 – Advanced Biological Statistics

CO2 concentrations

Atmospheric CO2 concentrations at Mauna Loa

plot of CO2 concentrations on NOAA’s site

Monthly averages

# The "average" column contains the monthly mean CO2 mole fraction determined
# from daily averages.  The mole fraction of CO2, expressed as parts per million
# (ppm) is the number of molecules of CO2 in every one million molecules of dried
# air (water vapor removed).  If there are missing days concentrated either early
# or late in the month, the monthly mean is corrected to the middle of the month
# using the average seasonal cycle.  Missing months are denoted by -99.99.
# The "interpolated" column includes average values from the preceding column
# and interpolated values where data are missing.  Interpolated values are
# computed in two steps.  First, we compute for each month the average seasonal
# cycle in a 7-year window around each monthly value.  In this way the seasonal
# cycle is allowed to change slowly over time.  We then determine the "trend"
# value for each month by removing the seasonal cycle; this result is shown in
# the "trend" column.  Trend values are linearly interpolated for missing months.
# The interpolated monthly mean is then the sum of the average seasonal cycle
# value and the trend value for the missing month.

The data

Datasets/Mauna_Loa_C02

co2 <- read.table("../Datasets/Mauna_Loa_C02/co2_mm_mlo.txt", comment="#")
names(co2) <- c("year", "month", "decimal_date", "average", "interpolated", "trend", "num_days")
co2[co2 == -99.99] <- NA
head(co2)
##   year month decimal_date average interpolated  trend num_days
## 1 1958     3     1958.208  315.71       315.71 314.62       -1
## 2 1958     4     1958.292  317.45       317.45 315.29       -1
## 3 1958     5     1958.375  317.50       317.50 314.71       -1
## 4 1958     6     1958.458      NA       317.10 314.85       -1
## 5 1958     7     1958.542  315.86       315.86 314.98       -1
## 6 1958     8     1958.625  314.93       314.93 315.94       -1

plot of chunk r plot_co2

Trend plus seasonal

plot of chunk r trend_co2

Seasonal

plot(average - trend ~ month, data=co2, pch=20, cex=0.5, col=adjustcolor("black", 0.5), xlab='Month')

plot of chunk r plot_seasonal

Autocorrelation

The autocorrelation function of a time series \((x(t) : 0 \le t \le L)\) is the correlation between points as a function of lag:

\[\begin{aligned} \rho(\ell) &= \cor[x(T), x(T+\ell)], \end{aligned}\]

where \(T\) is uniform between 0 and \(L-\ell\).

\[\begin{aligned} \rho(\ell) &= \frac{1}{T-\ell-1} \sum_{t=1}^{T-\ell} \left(\frac{ x(t) - \bar x }{ \sd[x] } \right) \left(\frac{ x(t + \ell) - \bar x }{ \sd[x] } \right) . \end{aligned}\]

Computing autocorrelation:

The acf( ) function requires a time series object:

co2_ts <- ts(co2$interpolated, deltat=1/(12))

acf(co2_ts, lag.max=10 * 12, xlab='lag (years)')

plot of chunk r co2_ts

Seasonal autocorrelation

co2_seasonal <- ts(co2$interpolated - co2$trend, deltat=1/(12))
acf(co2_seasonal, lag.max=3 * 12, xlab='lag (years)')

plot of chunk r seasonal_acf

Smoothing

loess

loess soil

loess

> help(loess)

Local Polynomial Regression Fitting

Description:

     Fit a polynomial surface determined by one or more numerical
     predictors, using local fitting.

Usage:

     loess(formula, data, weights, subset, na.action, model = FALSE,
           span = 0.75, enp.target, degree = 2,
           parametric = FALSE, drop.square = FALSE, normalize = TRUE,
           family = c("gaussian", "symmetric"),
           method = c("loess", "model.frame"),
           control = loess.control(...), ...)

The problem: put a “smooth” line through some points

xt <- data.frame(t=seq(0, 10*pi, length.out=150))
xt$x <- cos(xt$t) + rnorm(nrow(xt), sd=0.4)
plot(x ~ t, data=xt)

plot of chunk r loess

xtsm <- loess(x ~ t, data=xt, span=0.1)
xt$smoothed <- predict(xtsm, newdata=xt)

plot(x ~ t, data=xt)
lines(smoothed ~ t, data=xt, col='red')

plot of chunk r loess2

A key parameter: span

plot of chunk r loess3

Ocean temperatures

Ocean temperature is available from NOAA buoys at tidesandcurrents.noaa.gov:

tidesandcurrents.noaa.gov

Ten years of data for Newport, OR are in the Datasets/ directory

buoy location

Reading in dates:

library(lubridate) 
ocean <- read.csv("../Datasets/Ocean_Temp_Data/Newport_Sea_Temp_Data_2010-2019.csv")
ocean$date <- with(ocean,
                   ymd_hm(paste(paste(Year, Month, Day, sep='-'), Time)))
ocean$time <- hm(ocean$Time)

plot(WATER_TEMP_F ~ date, data=ocean, type='l')

plot of chunk r load_ocean

plot(WATER_TEMP_F ~ date, data=ocean, type='l', subset=Year==2012, main='2012')

plot of chunk r twentytwelve

plot(WATER_TEMP_F ~ date, data=ocean, type='l', subset=Year==2012 & Month==7, main='July 2012')

plot of chunk r july_twentytwelve

Autocorrelation

water_ts <- ts(ocean$WATER_TEMP_F, deltat=1)
acf(water_ts, lag.max=10 * 12, xlab='lag (hours)', na.action=na.pass)

plot of chunk r ocean_acf0

Removing the trend

ocean$numdate <- as.numeric(ocean$date)
month_smooth <- loess(WATER_TEMP_F ~ numdate, data=ocean,
                      span=sum(ocean$Month < 4 & ocean$Year == 2012)/nrow(ocean))
ocean$month_sm <- predict(month_smooth, newdata=ocean)

plot(WATER_TEMP_F ~ date, data=ocean, pch=20, cex=0.5, ylim=c(35, 65))
lines(month_sm ~ date, data=ocean, col='red', lwd=2)

plot of chunk r smooth1

plot of chunk r smooth2

Remaining autocorrelation

water <- ts(ocean$WATER_TEMP_F - ocean$month_sm, deltat=1)
acf(water, lag.max=10 * 12, xlab='lag (hours)', na.action=na.pass)

plot of chunk r ocean_acf

A 12 hour cycle?

acf(water, lag.max=10 * 12, xlab='lag (hours)', na.action=na.pass)
abline(v=12*1:10, col=c('red', 'blue'), lwd=2)

plot of chunk r ocean_acf2

A 12 * (1 + 1/27.2) hour cycle!

acf(water, lag.max=10 * 12, xlab='lag (hours)', na.action=na.pass)
abline(v=12*(1 + 1/27.2) * 1:10, col=c('red', 'blue'), lwd=2)

plot of chunk r ocean_acf3

… why?

Addendum

Spectra

# spectrum
wspec <- spectrum(water, na.action=na.remove)
plot(wspec)

plot of chunk r thespec

// reveal.js plugins