In this vignette we show how to use WHAM for general simulation
studies. It updateds and replaces a previous WHAM vignette on Operating
models and MSE. We assume you already have wham
installed
and are relatively familiar with the package. If not, read the Introduction and Tutorial.
In this vignette we show how to:
# devtools::install_github("timjmiller/wham", dependencies=TRUE)
library(wham)
library(ggplot2)
#> Warning: package 'ggplot2' was built under R version 4.2.3
library(tidyr)
#> Warning: package 'tidyr' was built under R version 4.2.3
library(dplyr)
#> Warning: package 'dplyr' was built under R version 4.2.3
Create a directory for this analysis:
# choose a location to save output, otherwise will be saved in working directory
write.dir <- "choose/where/to/save/output"
dir.create(write.dir)
setwd(write.dir)
wham.dir <- find.package("wham")
asap.file.path = file.path(wham.dir,"extdata","ex1_SNEMAYT.dat")
asap3 <- read_asap3_dat(asap.file.path)
Make an input and fit a model in wham. This model was also fit in the first example vignette.
#define selectivity model
selectivity=list(
model=rep("age-specific",3), #define selectivity model
re=rep("none",3), #define selectivity random effects model
#define initial/fixed values for age-specific selectivity
initial_pars=list(c(0.5,0.5,0.5,1,1,0.5),c(0.5,0.5,0.5,1,0.5,0.5),c(0.5,1,1,1,0.5,0.5)),
fix_pars=list(4:5,4,2:4)) #which ages to not estimate age-specfic selectivity parameters
NAA_re = list(
sigma="rec", #random effects for recruitment only
cor="iid", #random effects are independent
recruit_model = 2) #mean recruitment is the only fixed effect parameter
input <- prepare_wham_input(
asap3,
selectivity = selectivity,
NAA_re = NAA_re,
model_name="Ex 1: SNEMA Yellowtail Flounder")
mod <- fit_wham(input, do.osa = F, do.retro=F, MakeADFun.silent = T) # don't do retro peels and OSA residuals, don't show output during optimization
names(mod)
#> [1] "par" "fn" "gr" "he"
#> [5] "hessian" "method" "retape" "env"
#> [9] "report" "simulate" "years" "years_full"
#> [13] "ages.lab" "model_name" "input" "wham_version"
#> [17] "opt" "date" "dir" "rep"
#> [21] "TMB_version" "parList" "final_gradient" "is_sdrep"
#> [25] "na_sdrep" "runtime" "sdrep"
Several of the elements in the object returned by
fit_wham
are those produced by TMB::MakeADFun
.
mod$par
are the fixed effects or parameters over which the
marginal likelihood is optimized. mod$fn
is the function
that returns the (Laplace approximation of) the negative marginal
log-likelihood. mod$gr
is the function that returns the
gradient and mod$he
returns the hessian.
mod$report
is a function that provides a list of output
defined by REPORT() calls in the compiled cpp code and is already
evaluated and provided by mod$rep
. When available,
mod$sdrep
provides the hessian based standard errors for
estimates from the model which is generated by
theTMB::sdreport
function.
mod$sdrep #the standard errors of the fixed effects
#> Estimate Std. Error
#> mean_rec_pars 10.1429738203 0.28530852
#> logit_q -7.6512254853 0.07240598
#> logit_q -8.3169815516 0.04787652
#> log_F1 -0.3010365597 0.10378094
#> F_devs 0.9285229416 0.13406941
#> F_devs -0.0004773479 0.12741677
#> F_devs -0.6333646293 0.14063536
#> F_devs 0.3545992256 0.15672409
#> F_devs 0.0007498385 0.14814836
#> F_devs -0.1186869337 0.13952459
#> F_devs -0.3135424185 0.13573691
#> F_devs -0.3638492439 0.14032728
#> F_devs 0.3747984450 0.15107794
#> F_devs 0.4062432156 0.14131528
#> F_devs 0.3345658415 0.12315402
#> F_devs -0.0926085078 0.12354231
#> F_devs -0.0268561987 0.12975421
#> F_devs -0.1547492535 0.13236249
#> F_devs -0.5781912378 0.14060717
#> F_devs 0.0729322405 0.14723880
#> F_devs 0.5728244469 0.13078407
#> F_devs -0.1306285071 0.11004614
#> F_devs -0.2800617478 0.11369600
#> F_devs -0.5485798871 0.12377094
#> F_devs 0.3737751961 0.13772134
#> F_devs -0.9198984028 0.13376737
#> F_devs 0.2580256177 0.14685907
#> F_devs 0.4837561008 0.14669630
#> F_devs -0.0965007661 0.14584155
#> F_devs 0.1868119330 0.15441950
#> F_devs -0.0813445695 0.15144239
#> F_devs 0.1925950197 0.15297110
#> F_devs -0.4368514619 0.14045843
#> F_devs -0.1981206595 0.14133134
#> F_devs 0.2086433955 0.14571752
#> F_devs -0.5430209898 0.13831609
#> F_devs -0.2223686593 0.14787653
#> F_devs -0.3312150849 0.14530564
#> F_devs -0.1132458326 0.14270517
#> F_devs -0.3259619687 0.13965183
#> F_devs -0.3070878804 0.14247434
#> F_devs 0.4437180306 0.14646439
#> F_devs 0.4592751847 0.14826234
#> F_devs 0.3692754247 0.15160407
#> F_devs 0.2753424755 0.15409611
#> F_devs -0.2900113211 0.14619853
#> F_devs -0.2117818048 0.14619819
#> log_N1_pars 10.1750412226 0.10527751
#> log_N1_pars 9.9577703377 0.12199347
#> log_N1_pars 10.6253148155 0.09076633
#> log_N1_pars 9.9797892153 0.12635104
#> log_N1_pars 9.5175748165 0.18987237
#> log_N1_pars 10.3051733768 0.13849860
#> log_NAA_sigma 0.2984650943 0.10907366
#> logit_selpars -3.2740083810 0.07972564
#> logit_selpars -3.5663837086 0.12201301
#> logit_selpars -0.4793729495 0.08493727
#> logit_selpars -0.6123439738 0.07866043
#> logit_selpars -0.0404455537 0.13774029
#> logit_selpars 1.5214280591 0.25870728
#> logit_selpars 1.3197981151 0.32315199
#> logit_selpars 1.8480537113 0.72656177
#> logit_selpars -0.3155761732 0.27025562
#> logit_selpars -1.7055860028 0.14892970
#> logit_selpars -1.8842728586 0.17675440
#> logit_selpars -2.2054297407 0.20103797
The important item for this vignette is mod$simulate
mod$simulate
#> function (par = last.par, complete = FALSE)
#> {
#> f(par, order = 0, type = "double", do_simulate = TRUE)
#> sim <- as.list(reportenv)
#> if (complete) {
#> ans <- data
#> ans[names(sim)] <- sim
#> }
#> else {
#> ans <- sim
#> }
#> ans
#> }
#> <bytecode: 0x000002092a0a4a60>
#> <environment: 0x000002091fb898b0>
As we can see it is a function, but less apparent is that it will
report anything using the REPORT
calls, and most
importantly, those within SIMULATE statements in the compiled cpp code.
More information about simulation using the TMB package can be found here. The
WHAM cpp code is written to be able to simulate all observations and any
random effects that are specified to be part of the particular model
configuration. mod$simulate
is what we use to simulate data
for simulation studies. The complete
argument tells the
function to provide all data inputs whether they are simulated or not.
For a wham model this would include important things like weight and
maturity at age.
Here is a simple function that will simulate data from an operating
model. It will fit the same model configuration to that data with the
argument self.fit=TRUE
.
sim_fn <- function(om, self.fit = FALSE){
input <- om$input
input$data = om$simulate(complete=TRUE)
if(self.fit) {
fit <- fit_wham(input, do.osa = F, do.retro = F, MakeADFun.silent = T)
return(fit)
} else return(input)
}
set.seed(123)
self_sim_fit <- sim_fn(mod, self.fit = TRUE)
plot(mod$years, self_sim_fit$input$data$SSB, type = 'l', ylab = "SSB", xlab = "Year")
lines(mod$years, self_sim_fit$rep$SSB, lty = 2, col = 'red')
Note that we just have to over-write the data component of the input
with the values produced by om$simulate(complete=TRUE)
. The
call of sim_fn
will fit the same model assumed to simulate
the data. fit_wham
saves the input used to fit the model
and om$simulate(complete=TRUE)
will provide all model
output provided by REPORT
calls in the cpp file. This
includes the true SSB for the simulated data which can be compared with
the values estimated from the simulated data.
Obviously for a simulation study many simulated data sets and model fits are needed for a given operating model. There are many ways to accomplish this task. It is often desirable to first simulate all the data before fitting each of them and to update results with each fit so that results are not lost if a catastrophic error occurs. Here is a lo-tech approach to simulating 100 data inputs and save information from fits of the same model to each of those data sets.
set.seed(123)
sim_inputs <- replicate(100, sim_fn(mod), simplify=F)
res = list(reps = list(), par.est = list(), par.se = list(), adrep.est = list(), adrep.se =list())
for(i in 1:length(sim_inputs)){
cat(paste0("i: ",i, "\n"))
tfit = fit_wham(sim_inputs[[i]], do.osa = F, do.retro = F, MakeADFun.silent = T)
res$reps[[i]] = tfit$rep
res$par.est[[i]] = as.list(tfit$sdrep, "Estimate")
res$par.se[[i]] = as.list(tfit$sdrep, "Std. Error")
res$adrep.est[[i]] = as.list(tfit$sdrep, "Estimate", report = TRUE)
res$adrep.se[[i]] = as.list(tfit$sdrep, "Std. Error", report = TRUE)
}
The replicate
function is used to generate 100 simulated
inputs. Then we loop over the simulated data sets, fit each of them and
save results for each fit. The results saved are the values provided by
REPORT
calls in the cpp file, the fixed effects MLEs (and
PEBEs for random effects) and their standard errors, and the estimates
for values provided by ADREPORT
calls in the cpp file and
their standard errors. See ?TMB::as.list.sdreport
for more
information about accessing sdreport information like this. Saving these
attributes will allow one to make inferences about bias of the maximum
likelihood estimators, and corresponding standard errors, and confidence
interval coverage for the basic parameters as well as a variety of
derived model output (e.g., SSB, reference points).
Below is a plot of bias and 95% confidence intervals for SSB estimation.
true = sapply(sim_inputs, function(x) x$data$SSB)
est = sapply(res$rep, function(x) return(x$SSB))
SSB_rel_resid = est/true - 1
resid_cis = apply(SSB_rel_resid,1,mean) + apply(SSB_rel_resid,1,sd)*qnorm(0.975)*t(matrix(c(-1,1),2,44))/10
plot(mod$years, apply(SSB_rel_resid,1,mean), ylim = c(-0.1,0.05), ylab = "Estimated Relative Bias SSB", xlab = "Year")
lines(mod$years, resid_cis[,1])
lines(mod$years, resid_cis[,2])
abline(h = 0, lty = 2)
When the fitted model is different from that used to simulate the
data, there is a bit more work involved. Some components of the input
used by fit_wham
are important.
names(input)
#> [1] "data" "par" "map" "random" "years"
#> [6] "years_full" "ages.lab" "model_name" "asap3"
The first 4 elements of the input (data
,
par
, map
, random
) are those used
by TMB::MakeADFun to set up an objective function with derivative
information (i.e.,gradient, hessian) used by the optimizer nlminb in
fit_wham. We saw above that we could just swap the entire
data
element of the input when the estimation model is the
same as the operating model. More generally though, components of these
4 input lists will need to be modified for the simulation studies where
the operating model and estimation model are not the same. If the
available data is the same for the operating and fitted model only a few
elements of the input$data list need to be moved.
Below is an example where an alternative likelihood for the age composition observations is assumed for the estimation model.
input_em <- prepare_wham_input(
asap3,
recruit_model=2,
model_name="Ex 1: SNEMA Yellowtail Flounder",
selectivity=selectivity,
NAA_re = NAA_re,
age_comp = "logistic-normal-miss0") #the only difference from original input
#only replace the observations so that the other inputs for configuration are correct.
#IMPORTANT TO PROVIDE obsvec!
sim_fn = function(om, input, do.fit = FALSE){
obs_names = c("agg_indices","agg_catch","catch_paa","index_paa", "Ecov_obs", "obsvec")
om_data = om$simulate(complete = TRUE)
input$data[obs_names] = om_data[obs_names]
if(do.fit) {
fit = fit_wham(input, do.osa = F, do.retro = F, MakeADFun.silent = TRUE)
return(list(fit, om_data))
} else return(list(input,om_data))
}
set.seed(123)
simfit = sim_fn(mod, input_em, do.fit = TRUE)
plot(mod$years, simfit[[2]]$SSB, type = 'l', ylab = "SSB", xlab = "Year")
lines(mod$years, simfit[[1]]$rep$SSB, lty = 2, col = 'red')
It is very important that the obsvec
component of the
simulated data is put into the input used for fitting because this holds
the values in agg_catch
, agg_indices
, etc. in
vector form for evaluating the likelihood. This is necessary to allow
one-step-ahead residuals to be calculated using TMB. Note also because
only a few elements are copied into input$data
we need to
save all of the true values of the simulated population as a separate
component that is returned. For this particular simulation, SSB is
estimated greater than the truth when the logistic normal is assumed but
observations were generated by a multinomial. The same sort of approach
as above can be used for simulating and fitting many data sets.
Suppose we wanted to see how the estimating model behaves when we
assume the multinomial age composition is more precise than it really
is. Here we assume we want to use all of the same parameter estimates
from the original model but change the necessary fixed data inputs. We
modify the input with the appropriate data and populate
input$par
with the estimates from the original fit. Then we
create this alternative operating model by calling
fit_wham(input, do.fit=F)
which creates the model
configuration with the dll so that data can be simulated with this
particular configuruation. Note that do.fit=F
means that
whatever parameter values are set (normally as initial and fixed values)
in input$par
will be used to simulate data. For the
estimating model we can use the same input as the operating model except
for changing the inputs for the effective sample size.
sim_fn = function(om, Neff_om = 50, Neff_em = 200, do.fit = FALSE){
om_input = om$input #the original input
om_input$data$index_Neff[] = Neff_om #true effective sample size for the indices
om_input$data$catch_Neff[] = Neff_om #true effective sample size for the indices
om_input$par = as.list(om$sdrep, "Estimate") #assume parameter values estimated from original model
om_alt = fit_wham(om_input, do.fit = F, MakeADFun.silent=T) #make unfitted model for simulating data
om_data = om_alt$simulate(complete = TRUE)
em_input = om_input #the same configuration other than Neff
em_input$data$index_Neff[] = Neff_em #assumed effective sample size for the indices
em_input$data$catch_Neff[] = Neff_em #assumed effective sample size for the indices
obs_names = c("agg_indices","agg_catch","catch_paa","index_paa", "Ecov_obs", "obsvec")
em_input$data[obs_names] = om_data[obs_names]
if(do.fit) {
fit = fit_wham(input, do.osa = F, do.retro = F, MakeADFun.silent = TRUE)
return(list(fit, om_data))
} else return(list(em_input,om_data))
}
set.seed(123)
simfit = sim_fn(mod, do.fit = TRUE)
plot(mod$years, simfit[[2]]$SSB, type = 'l', ylab = "SSB", xlab = "Year")
lines(mod$years, simfit[[1]]$rep$SSB, lty = 2, col = 'red')
Note that we could make the assumed effective sample size only
incorrect for some subset of the fleets and indices. In the
sim_fn
function here we could have also just supplied all
of om_data
to em_input$data
and then changed
the Neff appropriately rather than just change the observations. Also,
note that changing the Neff will only affect fits to those indices and
fleets where the multinomial age comp assumption is made and where age
composition observations are available.
The incorrect age composition precision has a large effect here!
Suppose we wanted to evaluate the effect of assuming a model with just random effects on recruitment when a full state-space model was the operating model. We will fit the full state-space model to the example data to get a configuration for the operating model then simulate data from it and fit the recruitment-only model to it.
NAA_re_om = list(
sigma="rec+1", #random effects for recruitment and older age classes
cor="iid", #random effects are independent
recruit_model = 2) #mean recruitment is the only fixed effect parameter
NAA_re_em = list(
sigma="rec", #random effects for recruitment only
cor="iid", #random effects are independent
recruit_model = 2) #mean recruitment is the only fixed effect parameter
input_om <- prepare_wham_input(
asap3,
selectivity = selectivity,
NAA_re = NAA_re_om,
model_name="Ex 1: SNEMA Yellowtail Flounder")
input_em <- prepare_wham_input(
asap3,
selectivity = selectivity,
NAA_re = NAA_re_em,
model_name="Ex 1: SNEMA Yellowtail Flounder")
om_ss <- fit_wham(input_om, do.osa = F, do.retro=F, MakeADFun.silent = T) # don't do retro peels and OSA residuals, don't show output during optimization
sim_fn = function(om, input, do.fit = FALSE){
obs_names = c("agg_indices","agg_catch","catch_paa","index_paa", "Ecov_obs", "obsvec")
om_data = om$simulate(complete = TRUE)
input$data[obs_names] = om_data[obs_names]
if(do.fit) {
fit = fit_wham(input, do.osa = F, do.retro = F, MakeADFun.silent = TRUE)
return(list(fit, om_data))
} else return(list(input,om_data))
}
set.seed(123)
simfit = sim_fn(om_ss, input_em, do.fit = TRUE)
plot(om_ss$years, simfit[[2]]$SSB, type = 'l', ylab = "SSB", xlab = "Year")
lines(om_ss$years, simfit[[1]]$rep$SSB, lty = 2, col = 'red')
The scale of the population estimates from the recruitment-only model is similar to the true value, but it does not have sufficient flexibility to capture the inter-annual variability in the population size.
Suppose the true selectivity is that estimated from the original
model, but that we wanted to evaluate the effect of assuming logistic
selectivity. Use the same NAA_re
defined earlier but change
the selectivity
selectivity_em = list(
model = c(rep("logistic", input$data$n_fleets),rep("logistic", input$data$n_indices)),
initial_pars = rep(list(c(5,1)), input$data$n_fleets + input$data$n_indices)) #fleet, index
input_em <- prepare_wham_input(
asap3,
selectivity = selectivity_em,
NAA_re = NAA_re,
model_name="Ex 1: SNEMA Yellowtail Flounder")
sim_fn = function(om, input, do.fit = FALSE){
obs_names = c("agg_indices","agg_catch","catch_paa","index_paa", "Ecov_obs", "obsvec")
om_data = om$simulate(complete = TRUE)
input$data[obs_names] = om_data[obs_names]
if(do.fit) {
fit = fit_wham(input, do.osa = F, do.retro = F, MakeADFun.silent = TRUE)
return(list(fit, om_data))
} else return(list(input,om_data))
}
set.seed(123)
simfit = sim_fn(mod, input_em, do.fit = TRUE)
plot(mod$years, simfit[[2]]$SSB, type = 'l', ylab = "SSB", xlab = "Year")
lines(mod$years, simfit[[1]]$rep$SSB, lty = 2, col = 'red')
plot(mod$years, simfit[[2]]$SSB/exp(simfit[[2]]$log_SSB_FXSPR), type = 'l', ylab = "SSB/SSB(F40)", xlab = "Year")
lines(mod$years, simfit[[1]]$rep$SSB/exp(simfit[[1]]$rep$log_SSB_FXSPR), lty = 2, col = 'red')
Lower SSB and stock status are estimated if logistic selectivity is assumed.
Note that the simulated data are conditioned on the fixed effects
parameter values. If a model is fitted using fit_wham
then
the optimized values will be used mod$simulate
for
simulations, but these parameters can be specified without fitting the
model and simulations can be more generally configured. Here we will use
the functionality of prepare_wham_input
to create a model
object without an asap3 dat file and, then set parameter values we want
to assume for the operating model
First we will read in a function make_basic_info
that
will make a list that can be provided to the basic_info
argument of prepare_wham_input
. The function is defined in
the vign10_helper.R file and the elements of the output it provides are
defined in the help file for prepare_wham_input
.
make_basic_info <- function(base_years = 1982:2021, ages = 1:10, Fhist = "updown", n_feedback_years = 0) { #changed years
info <- list()
info$ages <- ages
info$years <- as.integer(base_years[1] - 1 + 1:(length(base_years) + n_feedback_years))
info$n_fleets <- 1
info$n_indices <- 1
na <- length(info$ages)
ny <- length(info$years)
nby <- length(base_years)
mid <- floor(nby/2)
#up then down
if(Fhist == "updown") info$F <- matrix(0.2 + c(seq(0,0.4,length.out = mid),seq(0.4,0,length.out=nby-mid)),nby, info$n_fleets)
#down then up
if(Fhist == "downup") info$F <- matrix(0.2 + c(seq(0.4,0,length.out = mid),seq(0,0.4,length.out=nby-mid)),nby, info$n_fleets)
if(n_feedback_years>0) info$F <- rbind(info$F, info$F[rep(nby, n_feedback_years),, drop = F]) #same F as terminal year for feedback period
info$catch_cv <- matrix(0.1, ny, info$n_fleets)
info$catch_Neff <- matrix(200, ny, info$n_fleets)
info$index_cv <- matrix(0.3, ny, info$n_indices)
info$index_Neff <- matrix(100, ny, info$n_indices)
info$fracyr_indices <- matrix(0.5, ny, info$n_indices)
info$index_units <- rep(1, length(info$n_indices)) #biomass
info$index_paa_units <- rep(2, length(info$n_indices)) #abundance
info$maturity <- t(matrix(1/(1 + exp(-1*(1:na - na/2))), na, ny))
L <- 100*(1-exp(-0.3*(1:na - 0)))
W <- exp(-11)*L^3
nwaa <- info$n_indices + info$n_fleets + 2
info$waa <- array(NA, dim = c(nwaa, ny, na))
for(i in 1:nwaa) info$waa[i,,] <- t(matrix(W, na, ny))
info$fracyr_SSB <- rep(0.25,ny)
info$q <- rep(0.3, info$n_indices)
info$selblock_pointer_fleets <- t(matrix(1:info$n_fleets, info$n_fleets, ny))
info$selblock_pointer_indices <- t(matrix(info$n_fleets + 1:info$n_indices, info$n_indices, ny))
return(info)
}
Various attributes are specified in the make_basic_info function including the number of fleets and indices, the years over which the model spans, the number ages, precision of aggregate catch and index observations. Maturity and weight at age are defined as well as catchabilities for the indices, when they occur during the year and the selectiivty blocks to assign to each fleet and index.
Many of the other necessary attributes and initial parameters can be
defined using the arguments NAA_re
,
selectivity
, M
to
prepare_wham_input
. Below we use specify logistic
selectivity and associated parameters (a50 and /1slope), M = 0.2 and
initial numbers at age and mean recruitment all using the lists supplied
to these arguments of prepare_wham_input. However, nowhere have we
specified a value for the standard deviation of the (log) recruitment
random effects and so, the initial value (sd = 1) given in
prepare_wham_input
will be used. Importantly,
fit_wham
must be called with do.fit=FALSE
to
set up the model to be simulated with the parameter values specified
rather than estimated.
data(vignette_10_helper)
stock_om_info = make_basic_info()
selectivity_om = list(
model = c(rep("logistic", stock_om_info$n_fleets),rep("logistic", stock_om_info$n_indices)),
initial_pars = rep(list(c(5,1)), stock_om_info$n_fleets + stock_om_info$n_indices)) #fleet, index
M_om = list(initial_means = rep(0.2, length(stock_om_info$ages)))
NAA_re_om = list(
N1_pars = exp(10)*exp(-(0:(length(stock_om_info$ages)-1))*M_om$initial_means[1]),
sigma = "rec", #random about mean
cor="iid", #random effects are independent
use_steepness = 0,
recruit_model = 2, #random effects with a constant mean
recruit_pars = exp(10)
)
stock_om_input = prepare_wham_input(basic_info = stock_om_info, selectivity = selectivity_om, NAA_re = NAA_re_om, M = M_om)
stock_om = fit_wham(stock_om_input, do.fit = FALSE, MakeADFun.silent = TRUE)
As stated above, many of the parameter values are specified in the
lists supplied to prepare_wham_input
, but not all of them.
In particular, we have not specified the variance parameter for the
recruitment random effects so the default value for fitting a wham model
is set.
stock_om_input$par$log_NAA_sigma
#> [1] 0
which will mean a standard deviation = 1 is assumed. In general, the
parameters defining the variance and correlation of any random effects
and any effects of environmental covariates currently need to be
specified by changing the input returned by
prepare_wham_input
.
There are two parameter elements that we need to consider when using
these random effects: log_NAA_sigma
as mentioned above and
trans_NAA_rho
which defines the autocorrelation parameters
on a logit scale with the parameter bounds of (-1,1). The latter need to
be specified when the cor
element of the
NAA_re
list is not 'iid'
. So here we will
consider a 2d AR1 autoregressing model for the numbers at age random
effects to demonstrate setting the parameters. We will suppose that we
want to assume standard deviations of 0.6 for recruitment and 0.2 for
older ages, and correlations of 0.4 temporally and 0.7 with age.
stock_om_info = make_basic_info()
M_om = list(initial_means = rep(0.2, length(stock_om_info$ages)))
selectivity_om = list(
model = c(rep("logistic", stock_om_info$n_fleets),rep("logistic", stock_om_info$n_indices)),
initial_pars = rep(list(c(5,1)), stock_om_info$n_fleets + stock_om_info$n_indices)) #fleet, index
NAA_re_om = list(
N1_pars = exp(10)*exp(-(0:(length(stock_om_info$ages)-1))*M_om$initial_means[1]),
sigma = "rec+1", #random about mean
cor="2dar1", #random effects correlated among age classes and across time (separably)
use_steepness = 0,
recruit_model = 2, #random effects with a constant mean
recruit_pars = exp(10)
)
stock_om_input = prepare_wham_input(basic_info = stock_om_info, selectivity = selectivity_om, NAA_re = NAA_re_om, M = M_om)
stock_om_input$par$log_NAA_sigma
#> [1] 0
stock_om_input$par$log_NAA_sigma = log(c(0.6,0.2))
stock_om_input$par$log_NAA_sigma
#> [1] -0.5108256 -1.6094379
stock_om_input$par$trans_NAA_rho
#> [1] 0 0
stock_om_input$par$trans_NAA_rho = wham:::gen.logit(c(0.7,0.4), -1, 1,s=2) #age first, year second
stock_om_input$par$trans_NAA_rho
#> [1] 0.8673005 0.4236489
Note we made use of an unexported function from the wham package that
will do the transformation of parameters estimated on the logit scale.
Not also, that the correlation parameter for age is first and that for
time is second in the trans_NAA_rho
vector. If
cor = 'ar1_a'
then we would only change the first parameter
in trans_NAA_rho
and if cor = 'ar1_y'
we would
only change the second parameter. Leaving the other value at zero makes
those correlation parameters zero and the random effects independent in
the appropriate way. Now when stock_om$simulate()
is called
the variability will be as we specified.
M random effects assumptions are similar to the NAA random effects,
but the parameter names are different. For M there is a single vector
(M_repars
) of 3 values that defines the transformed M
variance and autoregressing parameters: standard deviation, correlation
with age, correlation with year. Here we assume we have random effects
on recruitment and on natural mortality. As above, we define the general
model to have 2d autocorrelation of M random effects for the most
general case with variance of the (log) M random effects = 0.1 and
correlations with age and time of 0.7 and 0.4.
stock_om_info = make_basic_info()
M_om = list(
model = "constant", # a single mean M for all ages
initial_means = 0.2,
re = "2dar1")
selectivity_om = list(
model = c(rep("logistic", stock_om_info$n_fleets),rep("logistic", stock_om_info$n_indices)),
initial_pars = rep(list(c(5,1)), stock_om_info$n_fleets + stock_om_info$n_indices)) #fleet, index
NAA_re_om = list(
N1_pars = exp(10)*exp(-(0:(length(stock_om_info$ages)-1))*M_om$initial_means[1]),
sigma = "rec", #random about mean
use_steepness = 0,
recruit_model = 2, #random effects with a constant mean
recruit_pars = exp(10)
)
stock_om_input = prepare_wham_input(basic_info = stock_om_info, selectivity = selectivity_om, NAA_re = NAA_re_om, M = M_om)
stock_om_input$par$log_NAA_sigma
#> [1] 0
stock_om_input$par$log_NAA_sigma = log(0.6)
stock_om_input$par$log_NAA_sigma
#> [1] -0.5108256
stock_om_input$par$M_repars
#> [1] -2.302585 0.000000 0.000000
stock_om_input$par$M_repars = c(log(0.1), wham:::gen.logit(c(0.7,0.4),-1,1, s=2))
stock_om_input$par$M_repars
#> [1] -2.3025851 0.8673005 0.4236489
#stock_om = fit_wham(stock_om_input, do.fit = FALSE, MakeADFun.silent = TRUE)
Note that the variance of the M random effects we are assuming is the
same as the default value for estimation. Note also, that the mean of
the M autoregressing random effects is defined by the
initial_means
component of the M
argument to
prepare_wham_input
and it is not estimated by default. To
estimate the mean of the M random effects at age, or just estimate a
constant M without random effects, specify the M$est_ages
argument to prepare_wham_input
. There is a difference from
numbers at age in how random effects for M are specified. If
M$re = "ar_a"
then there are only n_ages
random effects that are constant over time. Similarly for
M$re = "ar_y"
, there are n_years_model
random
effects that are used for each age class. However, Setting the
correlation parameters when M$re = "ar_a"
or
M$re = "ar_y"
is analogous to the description for numbers
at age random effects. To get ar_a
or ar_y
models analogous to numbers at age, one would have to specify
M$re = "2dar1"
and change map$M_repars
to fix
the appropriate parameter fixed at 0. To force random effects for
certain age classes to be the same, one would also have to configure
map$M_re
appropriately.
M_om = list(
model = "constant", # a single mean M for all ages
initial_means = 0.2,
est_ages = 1, #must be 1 when model = "constant"
re = "2dar1")
Setting the variance parameters for selectivity is analogous to that
for natural mortality random effects. The important difference is that
the parameter defining the random effects variance parameters
(sel_repars
) is a matrix with rows for each selectivity
block and the columns as defined for M_repars
. Here we have
two selectivity blocks: one for the fleet and one for the index. We will
assume that the variance parameter for the fleet is 0.4 and that for the
index is 0.1, but they both have the same correlation parameters with
age and time: 0.7 and 0.4.
stock_om_info = make_basic_info()
M_om = list(
model = "constant", # a single mean M for all ages
initial_means = 0.2)
selectivity_om = list(
model = c(rep("logistic", stock_om_info$n_fleets),rep("logistic", stock_om_info$n_indices)),
initial_pars = rep(list(c(5,1)), stock_om_info$n_fleets + stock_om_info$n_indices),
re = rep("2dar1", stock_om_info$n_fleets + stock_om_info$n_indices))
NAA_re_om = list(
N1_pars = exp(10)*exp(-(0:(length(stock_om_info$ages)-1))*M_om$initial_means[1]),
sigma = "rec", #random about mean
use_steepness = 0,
recruit_model = 2, #random effects with a constant mean
recruit_pars = exp(10)
)
stock_om_input = prepare_wham_input(basic_info = stock_om_info, selectivity = selectivity_om, NAA_re = NAA_re_om, M = M_om)
stock_om_input$par$log_NAA_sigma
#> [1] 0
stock_om_input$par$log_NAA_sigma = log(0.6)
stock_om_input$par$log_NAA_sigma
#> [1] -0.5108256
stock_om_input$par$sel_repars
#> [,1] [,2] [,3]
#> [1,] -2.302585 0 0
#> [2,] -2.302585 0 0
stock_om_input$par$sel_repars[1,] = c(log(0.4), wham:::gen.logit(c(0.7,0.4),-1,1, s=2))
stock_om_input$par$sel_repars[2,] = c(log(0.1), wham:::gen.logit(c(0.7,0.4),-1,1, s=2))
stock_om_input$par$sel_repars
#> [,1] [,2] [,3]
#> [1,] -0.9162907 0.8673005 0.4236489
#> [2,] -2.3025851 0.8673005 0.4236489
#stock_om = fit_wham(stock_om_input, do.fit = FALSE, MakeADFun.silent = TRUE)
Note in this case where we assume logistic selectivity and 2D AR1
random effects, the correlation with “age” is actually just the
correlation of the two logistic selectivity parameters on their
estimated/transformed scale. Also, differing from natural mortality the
mean parameters for the selectivity random effects
(logit_selpars
) are estimated by default.
Setting the variance parameters for catchability is analogous to that
for natural mortality and selectivity random effects. The parameters
defining the variance and correlation of the annual catchability are
defined in q_repars
which is a matrix with rows
corresponding to different indices and 2 columns for the standard
deviation and correlation. The mean of the process is defined and
estimated in the logit_q
parameter which is defined in the
make_basic_info
function. Here we will assume the
transformed catchability is an AR1 process with standard deviation 0.1
and autocorrelation = 0.5.
stock_om_info = make_basic_info()
M_om = list(
model = "constant", # a single mean M for all ages
initial_means = 0.2)
selectivity_om = list(
model = c(rep("logistic", stock_om_info$n_fleets),rep("logistic", stock_om_info$n_indices)),
initial_pars = rep(list(c(5,1)), stock_om_info$n_fleets + stock_om_info$n_indices),
re = rep("2dar1", stock_om_info$n_fleets + stock_om_info$n_indices))
NAA_re_om = list(
N1_pars = exp(10)*exp(-(0:(length(stock_om_info$ages)-1))*M_om$initial_means[1]),
sigma = "rec", #random about mean
use_steepness = 0,
recruit_model = 2, #random effects with a constant mean
recruit_pars = exp(10)
)
catchability_om = list(re = "ar1") #could also define the mean q parameter here.
stock_om_input = prepare_wham_input(basic_info = stock_om_info, selectivity = selectivity_om, NAA_re = NAA_re_om, M = M_om, catchability = catchability_om)
stock_om_input$par$log_NAA_sigma
#> [1] 0
stock_om_input$par$log_NAA_sigma = log(0.6)
stock_om_input$par$log_NAA_sigma
#> [1] -0.5108256
stock_om_input$par$q_repars
#> [,1] [,2]
#> [1,] 0 0
stock_om_input$par$q_repars[1,] = c(log(0.1), wham:::gen.logit(0.5,-1,1, s=2))
stock_om_input$par$q_repars
#> [,1] [,2]
#> [1,] -2.302585 0.5493061
When environmental effects are considered, there are assumptions to
be made about the variance and any autocorrelation parameters for the
random effects for the latent environmental process and the variance of
the observation errors. The latter are specified in the the
ecov$log_sigma
argument to prepare_wham_input
,
but those for the random effects (Ecov_process_pars
) must
be specified after the call to prepare_wham_input
like the
other random effects described above. The value for the effect of the
covariate on the population (Ecov_beta
) must also be
specified this way. The columns of Ecov_process_pars
correspond to each environmental covariate being modeled and the number
of rows is 3. If ecov$process_model = "rw"
only the first
two elements are used and represent the value in the first year and the
conditional standard deviation parameter of the random walk. When
ecov$process_model = "ar1"
the three elements correspond to
mean, variance, and correlation parameters.
The Ecov_beta
parameter is a 4-dimensional array to be
able to use it across all possible types of effects in a wham model. The
first dimension indicates where the effect will be applied: recruitment,
natural mortality, index 1,… index n. The second dimension is the order
of the polynomial effect of the covariate. The third dimension indicates
which environmental covariate the effect is having on the population.
The fourth dimension indicates which age class the effect is on (for
natural mortality only).
As long as there is only a single environmental effect, we can change
the appropriate element of the Ecov_beta
array by using the
map$Ecov_beta
specification returned by
prepare_wham_input
. The map$Ecov_beta
will be
all NA
except for the configured effects on the population
and when there is just one there is no ambiguity. Otherwise, the 4
correct indices of the Ecov_beta
array must be referenced
appropriately for each effect.
We will assume that our errors on the observations of the
environmental covariate have a standard deviation = 0.2, the latent
process is AR1 with mean 0, standard deviation = 0.3 and correlation
parameter 0.5. We will also assume the effect is on (log) recruitment
(no stock-recruit function) and the size of the effect is
Ecov_beta = 0.3
.
stock_om_info = make_basic_info()
#constand M fixed at 0.2
M_om = list(
model = "constant", # a single mean M for all ages
initial_means = 0.2)
#logistic selectivity for fleet and index
selectivity_om = list(
model = c(rep("logistic", stock_om_info$n_fleets),rep("logistic", stock_om_info$n_indices)),
initial_pars = rep(list(c(5,1)), stock_om_info$n_fleets + stock_om_info$n_indices))
NAA_re_om = list(
N1_pars = exp(10)*exp(-(0:(length(stock_om_info$ages)-1))*M_om$initial_means[1]),
sigma = "rec", #random about mean
recruit_model = 2, #random effects with a constant mean
recruit_pars = exp(10)
)
ecov_om = list(
label = "Climate variable 1",
process_model = "ar1",
mean = matrix(0, length(stock_om_info$years), 1), #observations which will be simulated later
logsigma = log(0.2),
lag = 0,
years = stock_om_info$years,
use_obs = matrix(1,length(stock_om_info$years),1),
where = "recruit",
how = 1)
stock_om_input = prepare_wham_input(basic_info = stock_om_info, selectivity = selectivity_om, NAA_re = NAA_re_om, M = M_om, ecov = ecov_om)
#same assumption as above about standard deviation of recruitment.
stock_om_input$par$log_NAA_sigma
#> [1] 0
stock_om_input$par$log_NAA_sigma = log(0.6)
stock_om_input$par$log_NAA_sigma
#> [1] -0.5108256
#set sd and rho of ecov processes
stock_om_input$par$Ecov_process_pars[] = c(0, log(0.3), wham:::gen.logit(0.5,-1,1)) #mean, log(sd), logit(0.5)
#set size of Ecov_beta
stock_om_input$par$Ecov_beta[!is.na(stock_om_input$map$Ecov_beta)] = 0.3
stock_om_input$par$Ecov_beta[1,1,1,] #all = 0.3, but only the first one is used and all values mapped to be equal.
#> [1] 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3
stock_om = fit_wham(stock_om_input, do.fit = FALSE, MakeADFun.silent = TRUE)
set.seed(123)
sim_Ecov = stock_om$simulate(complete=TRUE)
plot(stock_om$years, sim_Ecov$Ecov_x, type = 'l', ylab = "Covariate", xlab = "Year")
plot(stock_om$years, sim_Ecov$NAA[,1], type = 'l', ylab = "Recruitment", xlab = "Year")
See the catchability vignette for an example of simulating effects of multiple covariates on the population. Also see the CPI and recruitment vignette for configuring effects on recruitment when a stock-recruit function is assumed.
Here we show how to set up an estimation model that assumes the wrong
natural mortality rate (M = 0.3), but other aspects of the estimation
model are consistent with the operating model. The sim_fn
function will return the simulated population and data and either the
input for the estimating model or the fitted estimation model.
stock_om_input = prepare_wham_input(basic_info = stock_om_info, selectivity = selectivity_om, NAA_re = NAA_re_om, M = M_om)
#same assumption as above about standard deviation of recruitment.
stock_om_input$par$log_NAA_sigma = log(0.6)
stock_om = fit_wham(stock_om_input, do.fit = FALSE, MakeADFun.silent = TRUE)
M_em = list(
model = "constant",
initial_means = 0.3)
stock_em_input = prepare_wham_input(basic_info = stock_om_info, selectivity = selectivity_om, NAA_re = NAA_re_om, M = M_em)
sim_fn = function(om, em_input, do.fit = FALSE){
obs_names = c("agg_indices","agg_catch","catch_paa","index_paa", "Ecov_obs", "obsvec")
om_sim = om$simulate(complete = TRUE)
em_input$data[obs_names] = om_sim[obs_names]
if(do.fit) {
em_fit = fit_wham(em_input, do.osa = F, do.retro = F, MakeADFun.silent = TRUE)
return(list(em_fit, om_sim))
} else return(list(em_input,om_sim))
}
set.seed(123)
simfit = sim_fn(stock_om, stock_em_input, do.fit = TRUE)
plot(stock_om$years, simfit[[2]]$SSB, type = 'l', ylab = "SSB", xlab = "Year")
lines(stock_om$years, simfit[[1]]$rep$SSB, lty = 2, col = 'red')
plot(stock_om$years, simfit[[2]]$SSB/exp(simfit[[2]]$log_SSB_FXSPR), type = 'l', ylab = "SSB/SSB(F40)", xlab = "Year")
lines(stock_om$years, simfit[[1]]$rep$SSB/exp(simfit[[1]]$rep$log_SSB_FXSPR), lty = 2, col = 'red')
Both SSB and stock status are biased high when M is specified too large.
Here we will use the same operating and estimating model assumptions as above where the former is based on M = 0.2 and the latter assumes M = 0.3. To be applicable to a wider class of closed-loop simulations, it will be useful to create an operating model that includes a base period and a feedback period and an estimating model that uses data as it becomes available as time progresses through the feedback period. Previously, we had made use of the projection years as the feedback period, but selectivity random effects are not projected and environmental observations are not simulated in the projection period, both of which may be relevant. The operating model is also updated through the feedback period (redefining F and population state).
There are two other functions defined in the
vignette_10_helper.R
file for generating F from catch
advice: get_F_from_catch
, update_om_F
. The
first, is essentially just a solver for the F in the operating model for
a catch that would be provided as management advice using the estimation
model, but note that when the catch advice is not possible to actually
take from the population or implies F greater than 10, F is set at
10.
get_F_from_catch <- function(om, year, catch, Finit = 0.1, maxF = 10){
rep = om$report()
#print(year)
naa = rep$NAA[year,]
Maa = rep$MAA[year,]
sel_tot = rep$FAA_tot[year,]/max(rep$FAA_tot[year,])
waa = om$input$data$waa[om$input$data$waa_pointer_totcatch, year,]
get_catch = function(log_F, naa, sel, waa, Maa){
Faa = exp(log_F) * sel_tot
Zaa = Maa + Faa
Catch = 0
for(a in 1:length(naa)) Catch = Catch + waa[a] * naa[a] * Faa[a] *(1 - exp(-Zaa[a]))/Zaa[a];
return(Catch)
}
obj = function(log_F) (catch - get_catch(log_F, naa, sel_tot, waa, Maa))^2
opt = try(nlminb(log(Finit), obj))
if(!is.character(opt)) Fsolve = exp(opt$par)[1] else Fsolve = maxF
if(Fsolve>10) Fsolve = maxF
print(paste0("Fsolve: ", Fsolve))
return(Fsolve)
}
The second function uses get_F_from_catch
to get the F
and replace the F parameters in the operating model and update the
population and observations
update_om_F = function(om, year, catch){
rep = om$report() #generate the reported values given the parameters
year_ind = which(om$years == year) #index corresponding to year
Fsolve = get_F_from_catch(om, year_ind, catch) #find the F for the catch advice
#have to be careful if more than one fleet
sel = rep$FAA[year_ind,,] #n_fleets x n_ages
sel_all = sel/max(rep$FAA_tot[year_ind,]) #sum(sel_all) = 1
FAA_all = Fsolve * rbind(sel_all)
F_fleet = apply(FAA_all, 1, max) #full F for each fleet
if(year_ind>1) om$input$par$F_devs[year_ind-1,] = log(F_fleet) - log(rep$F[year_ind-1,]) #change the F_dev to produce the right full F
else om$input$par$log_F1 = log(F_fleet) #if year is the first year of the model, change F in year 1
om <- fit_wham(om$input, do.fit = FALSE, MakeADFun.silent = TRUE)
}
First source the functions.
data(vignette_10_helper)
Create the stock operating model. We will specify a feedback period of 40 years beyond the 40 year base period.
stock_om_info = make_basic_info(base_years = 1982:2021, n_feedback_years = 40)
stock_om_input = prepare_wham_input(basic_info = stock_om_info, selectivity = selectivity_om, NAA_re = NAA_re_om, M = M_om)
stock_om <- fit_wham(stock_om_input, do.fit = FALSE, MakeADFun.silent = TRUE)
Now stock_om
will be the operating model for a generic
stock with biological inputs defined in the make_basic_info
function. We need to define a function for updating the simulations from
the operating model during the feedback period.
update_om_fn = function(om, seed = 123, interval.info = NULL){
obs_names = c("agg_indices","agg_catch","catch_paa","index_paa", "Ecov_obs", "obsvec")
if(!is.null(interval.info)){ #iteratively update F over assessment interval for the given catch advice
for(y in interval.info$years){
om = update_om_F(om, year = y, catch = interval.info$catch) #put in the right F values
set.seed(seed)
om_sim = om$simulate(complete=TRUE) #resimulate the population and observations
om$input$data[obs_names] = om_sim[obs_names] #update any simulated data
om$input$par[om$input$random] = om_sim[om$input$random] #update any simulated random effects
# reset the om
om <- fit_wham(om$input, do.fit = F, MakeADFun.silent = TRUE)
}
} else { #otherwise just (re)generate the population
set.seed(seed)
om_sim = om$simulate(complete=TRUE) #resimulate the population and observations
om$input$data[obs_names] = om_sim[obs_names] #update any simulated data
om$input$par[om$input$random] = om_sim[om$input$random] #update any simulated random effects
# reset the om
om <- fit_wham(om$input, do.fit = F, MakeADFun.silent = TRUE)
}
return(om)
}
We now turn to defining the estimation model in a way such that the terminal year can be updated appropriately through the feedback period.
make_em_input = function(M_em, sel_em, NAA_em, om_data, em_years){
info = make_basic_info(base_years = em_years) #update the terminal year for the estimation model
ind_em = 1:length(em_years) #year indices
#fill in the data from the operating model simulation
info$agg_catch = om_data$agg_catch[ind_em,, drop = F]
info$agg_indices = om_data$agg_indices[ind_em,, drop = F]
info$catch_paa = om_data$catch_paa[,ind_em,, drop = F]
info$index_paa = om_data$index_paa[,ind_em,, drop = F]
em_input = prepare_wham_input(basic_info = info, selectivity = sel_em, NAA_re = NAA_em, M = M_em)
}
We also need a function that will take the estimated model and generate catch advice from it.
advice_fn = function(em){
#make 5 year projections using F40. Use average SSB/R and YPR inputs over most recent 5 years
proj_opts = list(n.yrs=5, use.FXSPR=TRUE, avg.yrs=tail(em$years,5))
em_proj = project_wham(em, proj.opts = proj_opts, MakeADFun.silent=TRUE) #projected version of the em
advice = mean(em_proj$rep$pred_catch[length(em_proj$years) + 1:5]) #mean of the projected catch over the next 5 years fishing at F40
print(advice)
return(advice)
}
Finally, we make a function that uses all the other functions and steps through the feedback period and updating the operating model, refitting the estimation model and generating the catch advice.
loop_through_fn = function(om, M_em, selectivity_em, NAA_re_em, assess_years, assess_interval = assess.interval, base_years){
catches = numeric() #save the catch advice
for(i in assess_years){
print(i)
#make the input for the estimation model
em_input = make_em_input(M_em, selectivity_em, NAA_re_em, om_data = om$input$data, em_years = base_years[1]:i)
#fit the estimation model
em = fit_wham(em_input, do.retro = FALSE, do.osa=F, MakeADFun.silent = TRUE) #no feedback period yet
#make the catch advice
advice = advice_fn(em)
catches = c(catches, advice)
#set the catch for the next assess_interval years
interval.info = list(catch = advice, years = i + 1:assess_interval)
#update the operating model with the right Fs and resimulate the data given those Fs
om = update_om_fn(om, seed = 123, interval.info = interval.info)
}
return(list(om = om, em = em, catches = catches))
}
stock_om = update_om_fn(stock_om)
assess.interval = 4
base.years = make_basic_info()$years #no feedback period yet
first.year = head(base.years,1)
terminal.year = tail(base.years,1)
assess.years = seq(terminal.year, tail(stock_om$years,1)-assess.interval,by = assess.interval)
Running the loop function will take a little while, so we have saved some results for plotting.
looped_res = loop_through_fn(stock_om, M_em = M_em, selectivity_em = selectivity_om, NAA_re_em = NAA_re, assess_years = assess.years, base_years = base.years)
looped_rep = looped_res$om$rep
plot(stock_om$years, stock_om$rep$SSB, type = "l", ylim = c(0,400000), ylab = "SSB", xlab = "Year")
lines(stock_om$years, looped_rep$SSB, col = "red")