This is the 11th WHAM example. We assume you already have
wham
installed and are relatively familiar with the
package. If not, read the Introduction and Tutorial.
In this vignette we show how to both simulate and estimate:
# devtools::install_github("timjmiller/wham", dependencies=TRUE)
library(wham)
library(ggplot2)
#> Warning: package 'ggplot2' was built under R version 4.2.3
library(tidyr)
#> Warning: package 'tidyr' was built under R version 4.2.3
library(dplyr)
#> Warning: package 'dplyr' was built under R version 4.2.3
Create a directory for this analysis:
# choose a location to save output, otherwise will be saved in working directory
write.dir <- "choose/where/to/save/output"
dir.create(write.dir)
setwd(write.dir)
Make a basic_info
list of input components defining a
simple default stock. We’ll then pass this to
prepare_wham_input
and fit_wham
. This is
similar to example 10, but now there are two indices.
make_digifish <- function(years = 1975:2014) {
digifish = list()
digifish$ages = 1:10
digifish$years = years
na = length(digifish$ages)
ny = length(digifish$years)
digifish$n_fleets = 1
digifish$catch_cv = matrix(0.1, ny, digifish$n_fleets)
digifish$catch_Neff = matrix(200, ny, digifish$n_fleets)
digifish$n_indices = 2
digifish$index_cv = matrix(0.3, ny, digifish$n_indices)
digifish$index_Neff = matrix(100, ny, digifish$n_indices)
digifish$fracyr_indices = matrix(0.5, ny, digifish$n_indices)
digifish$index_units = rep(1, length(digifish$n_indices)) #biomass
digifish$index_paa_units = rep(2, length(digifish$n_indices)) #abundance
digifish$maturity = t(matrix(1/(1 + exp(-1*(1:na - na/2))), na, ny))
L = 100*(1-exp(-0.3*(1:na - 0)))
W = exp(-11)*L^3
nwaa = digifish$n_indices + digifish$n_fleets + 2
digifish$waa = array(NA, dim = c(nwaa, ny, na))
for(i in 1:nwaa) digifish$waa[i,,] = t(matrix(W, na, ny))
digifish$fracyr_SSB = rep(0.25,ny)
digifish$q = rep(0.3, digifish$n_indices)
digifish$F = matrix(0.2,ny, digifish$n_fleets)
digifish$selblock_pointer_fleets = t(matrix(1:digifish$n_fleets, digifish$n_fleets, ny))
digifish$selblock_pointer_indices = t(matrix(digifish$n_fleets + 1:digifish$n_indices, digifish$n_indices, ny))
return(digifish)
}
digifish = make_digifish()
Now define other components needed by prepare_wham_input
(selectivity and M).
selectivity = list(model = c(rep("logistic", digifish$n_fleets),rep("logistic", digifish$n_indices)),
initial_pars = rep(list(c(5,1)), digifish$n_fleets + digifish$n_indices)) #fleet, index
M = list(initial_means = rep(0.2, length(digifish$ages)))
Here we specify that recruitment deviations are independent random effects, with no stock-recruit relationship.
Now we can make the input list with
prepare_wham_input
input = prepare_wham_input(basic_info = digifish, selectivity = selectivity, NAA_re = NAA_re, M = M, catchability = catchability)
We can then define an operating model (OM) by simulating data (and recruitment) from this input:
om = fit_wham(input, do.fit = FALSE)
#simulate data from operating model
set.seed(0101010)
newdata = om$simulate(complete=TRUE)
Now put the simulated data in an input file with all the same configuration as the operating model.
temp = input
temp$data = newdata
Fit an estimation model that is the same as the operating model (self-test).
fit = fit_wham(temp, do.osa = FALSE, MakeADFun.silent = TRUE, retro.silent = TRUE)
fit$mohns_rho = mohns_rho(fit)
plot_wham_output(fit)
This plot of the q prior and approximate posterior is provided by
plot_wham_output
wham:::plot_q_prior_post(fit)
This will add time varying iid random effects on catchability for the first index while still keeping the prior on the second index.
Generate input as above.
input = prepare_wham_input(basic_info = digifish, selectivity = selectivity, NAA_re = NAA_re, M = M, catchability = catchability)
The default initial value for the standard deviation of the random effects is 1 but we will simulate with less variation than that
$par$q_repars[1] = log(0.2)
input
Now create the operating model, simulate data and fit as above
om = fit_wham(input, do.fit = FALSE)
#simulate data from operating model
set.seed(0101010)
newdata = om$simulate(complete=TRUE)
#put the simulated data in an input file with all the same configuration as the operating model
temp = input
temp$data = newdata
#fit estimating model that is the same as the operating model
fit = fit_wham(temp, do.osa = FALSE, MakeADFun.silent = TRUE)
fit$mohns_rho = mohns_rho(fit)
plot_wham_output(fit)
This is similar to what is provided in plot_wham_output
,
but with the true values from the simulation also plotted
pal = viridisLite::viridis(n=2)
plot(fit$years, fit$rep$q[,1], type = 'n', lwd = 2, col = pal[1], ylim = c(0,1), ylab = "q", xlab = "Year")
se = summary(fit$sdrep)
se = matrix(se[rownames(se) == "logit_q_mat",2], length(fit$years))
for( i in 1:input$data$n_indices){
lines(fit$years, fit$rep$q[,i], lwd = 2, col = pal[i])
polyy = c(fit$rep$q[,i]*exp(-1.96*se[,i]),rev(fit$rep$q[,i]*exp(1.96*se[,i])))
polygon(c(fit$years,rev(fit$years)), polyy, col=adjustcolor(pal[i], alpha.f=0.4), border = "transparent")
lines(fit$years, newdata$q[,i], lwd = 2, col = pal[i], lty = 2)
}
legend("topright", legend = paste0("Index ", rep(1:input$data$n_indices, each = 2), c(" Est.", " True")), lwd = 2, col = rep(pal, each = 2), lty = c(1,2))
First simulate and estimate environmental covariate processes, but no effects of population.
ecov = list(
label = c("Climate variable 1", "Climate variable 2"),
process_model = c("ar1","ar1"),
mean = cbind(rnorm(length(digifish$years)),rnorm(length(digifish$years))),
logsigma = log(c(0.01, 0.2)),
lag = c(0,0),
years = digifish$years,
use_obs = matrix(1,length(digifish$years),2),
where = c("none","none"),
indices = list(2, NULL),
how = c(1,0))
Note that how[1] conflicts with where[1], but
prepare_wham_input
will use where[1].
We will not include a prior distribution for the second index, but we will keep the iid random effects on q for the first index.
catchability = list(re = c("iid", "none"))
input = prepare_wham_input(basic_info = digifish, selectivity = selectivity, NAA_re = NAA_re, M = M, catchability = catchability, ecov = ecov)
A warning is thrown about the how[1] and where[1] conflict.
For the simulations, we need to set some values for the standard deviation and correlation parameters of the environmental covariate processes.
input$par$Ecov_process_pars[2,] = log(c(0.1,0.2)) #sd
#cor pars are c(0.4,-0.3)
input$par$Ecov_process_pars[3,] = log((c(0.4,-0.3)-(-1))/(1-c(0.4,-0.3)))
The same value for the standard deviation of q random effects on the first index.
input$par$q_repars[1] = log(0.2)
Now create the operating model and simulate and fit data.
om = fit_wham(input, do.fit = FALSE)
set.seed(0101010)
newdata = om$simulate(complete=TRUE)
#put the simulated data in an input file with all the same configuration as the operating model
temp = input
temp$data = newdata
#fit estimating model that is the same as the operating model
fit = fit_wham(temp, do.osa = FALSE, MakeADFun.silent = TRUE)#, retro.silent = TRUE)
fit$mohns_rho = mohns_rho(fit)
plot_wham_output(fit)
Again, this is similar to what is provided in
plot_wham_output
, but with the true values from the
simulation also plotted
pal = viridisLite::viridis(n=2)
se = summary(fit$sdrep)
se = matrix(se[rownames(se) == "logit_q_mat",2], length(fit$years))
plot(fit$years, fit$rep$q[,1], type = 'n', lwd = 2, col = pal[1], ylim = c(0,1), ylab = "q", xlab = "Year")
for( i in 1:input$data$n_indices){
lines(fit$years, fit$rep$q[,i], lwd = 2, col = pal[i])
polyy = c(fit$rep$q[,i]*exp(-1.96*se[,i]),rev(fit$rep$q[,i]*exp(1.96*se[,i])))
polygon(c(fit$years,rev(fit$years)), polyy, col=adjustcolor(pal[i], alpha.f=0.4), border = "transparent")
lines(fit$years, newdata$q[,i], lwd = 2, col = pal[i], lty = 2)
}
legend("topright", legend = paste0("Index ", rep(1:input$data$n_indices, each = 2), c(" Est.", " True")), lwd = 2, col = rep(pal, each = 2), lty = c(1,2))
Let’s take a look at the true and estimated Environmental covariate process pararameters. Columns are the different covariates. Rows are the mean, sd, and correlation parameters, respectively.
input$par$Ecov_process_pars
fit$parList$Ecov_process_pars
Compare true and estimated (log) standard deviation of time-varying q. First row, first column.
input$par$q_repars
fit$parList$q_repars
The estimated variability in q is lower than truth, but the estimate has large standard error:
as.list(fit$sdrep, "Std")$q_repars
Allow effects of first Environmental covariate on q for second index.
ecov = list(
label = c("Climate variable 1", "Climate variable 2"),
process_model = c("ar1","ar1"),
mean = cbind(rnorm(length(digifish$years)),rnorm(length(digifish$years))),
logsigma = log(c(0.01, 0.2)),
lag = c(0,0),
years = digifish$years,
use_obs = matrix(1,length(digifish$years),2),
where = c("q","none"),
indices = list(2, NULL),
how = c(1,0))
As above get rid of prior on second index, add random effects on q for first index, generate input
catchability = list(re = c("iid", "none"))
input = prepare_wham_input(basic_info = digifish, selectivity = selectivity, NAA_re = NAA_re, M = M, catchability = catchability, ecov = ecov)
how[1] no longer conflicts with where[1].
As above set parameter values for simulation.
#set sd and rho of ecov processes
input$par$Ecov_process_pars[2,] = log(c(0.1,0.2)) #sd
#cor pars are c(0.4,-0.3)
input$par$Ecov_process_pars[3,] = log((c(0.4,-0.3)-(-1))/(1-c(0.4,-0.3)))
#set value to simulate variation in q
input$par$q_repars[1] = log(0.2)
Also, set value for Ecov_beta effect on q (dims are n_effects (2 + n_indices, max_n_poly, n_Ecov, n_ages)
input$par$Ecov_beta[4,1,1,] = 0.5
Verify that the map argument is setting all the ``ages’’ to use the same value, but only the first value is used for q or recruitment.
As above, create the operating model, simulate and fit data and compare true and estimated parameters.
om = fit_wham(input, do.fit = FALSE)
set.seed(0101010)
newdata = om$simulate(complete=TRUE)
#put the simulated data in an input file with all the same configuration as the operating model
temp = input
temp$data = newdata
#fit estimating model that is the same as the operating model
fit = fit_wham(temp, do.osa = FALSE, MakeADFun.silent = TRUE)#, retro.silent = TRUE)
fit$mohns_rho = mohns_rho(fit)
plot_wham_output(fit)
#compare assumed and estimated ecov process pars
input$par$Ecov_beta[4,1,1,1]
fit$parList$Ecov_beta[4,1,1,1]
#compare assumed and estimated ecov process pars
input$par$Ecov_process_pars
fit$parList$Ecov_process_pars
#compare true and estimated time-varying q
input$par$q_repars
fit$parList$q_repars
#estimated variability in q is lower than truth, but estimate has large SE
as.list(fit$sdrep, "Std")$q_repars
pal = viridisLite::viridis(n=2)
se = summary(fit$sdrep)
se = matrix(se[rownames(se) == "logit_q_mat",2], length(fit$years))
plot(fit$years, fit$rep$q[,1], type = 'n', lwd = 2, col = pal[1], ylim = c(0,1), ylab = "q", xlab = "Year")
for( i in 1:input$data$n_indices){
lines(fit$years, fit$rep$q[,i], lwd = 2, col = pal[i])
polyy = c(fit$rep$q[,i]*exp(-1.96*se[,i]),rev(fit$rep$q[,i]*exp(1.96*se[,i])))
polygon(c(fit$years,rev(fit$years)), polyy, col=adjustcolor(pal[i], alpha.f=0.4), border = "transparent")
lines(fit$years, newdata$q[,i], lwd = 2, col = pal[i], lty = 2)
}
legend("topright", legend = paste0("Index ", rep(1:input$data$n_indices, each = 2), c(" Est.", " True")), lwd = 2, col = rep(pal, each = 2), lty = c(1,2))
Add covariates to the model and allow effects of first covariate on q for second index AND recruitment. Then proceed as above.
ecov = list(
label = c("Climate variable 1", "Climate variable 2"),
process_model = c("ar1","ar1"),
mean = cbind(rnorm(length(digifish$years)),rnorm(length(digifish$years))),
logsigma = log(c(0.01, 0.2)),
lag = c(0,0),
years = digifish$years,
use_obs = matrix(1,length(digifish$years),2),
where = list(c("recruit","q"),"none"),
indices = list(2, NULL),
how = c(1,0))
#get rid of prior on second index. add AR1 random effects on q for first index
catchability = list(re = c("iid", "none"))
input = prepare_wham_input(basic_info = digifish, selectivity = selectivity, NAA_re = NAA_re, M = M, catchability = catchability, ecov = ecov)
#how[1] no longer conflicts with where[1].
#set sd and rho of ecov processes
input$par$Ecov_process_pars[2,] = log(c(0.1,0.2)) #sd
#cor pars are c(0.4,-0.3)
input$par$Ecov_process_pars[3,] = log((c(0.4,-0.3)-(-1))/(1-c(0.4,-0.3)))
#set value to simulate variation in q
input$par$q_repars[1] = log(0.2)
x = array(input$map$Ecov_beta, dim = dim(input$par$Ecov_beta))
x[1,1,1,] #all the ages mapped to use the same value. Only the first value is used for q or recruitment.
x[4,1,1,] #all the ages mapped to use the same value. Only the first value is used for q or recruitment.
#set value for Ecov_beta effect on q (dims are n_effects (2 + n_indices, max_n_poly, n_Ecov, n_ages)
input$par$Ecov_beta[4,1,1,] = 0.5
Also, set a value for Ecov_beta effect on recruitment (dims are n_effects (2 + n_indices, max_n_poly, n_Ecov, n_ages)
input$par$Ecov_beta[1,1,1,] = -0.5
Create the operating model, simulate and fit data, and compare true and estimated parameters.
om = fit_wham(input, do.fit = FALSE)
set.seed(0101010)
newdata = om$simulate(complete=TRUE)
#put the simulated data in an input file with all the same configuration as the operating model
temp = input
temp$data = newdata
#fit estimating model that is the same as the operating model
fit = fit_wham(temp, do.osa = FALSE, MakeADFun.silent = TRUE)#, retro.silent = TRUE)
fit$mohns_rho = mohns_rho(fit)
plot_wham_output(fit)
#compare assumed and estimated ecov effect on q for second index
input$par$Ecov_beta[4,1,1,1]
fit$parList$Ecov_beta[4,1,1,1]
#compare assumed and estimated ecov effect on recruitment
input$par$Ecov_beta[1,1,1,1]
fit$parList$Ecov_beta[1,1,1,1]
#SE for beta parameters is large, especially for recruitment effect
as.list(fit$sdrep, "Std")$Ecov_beta[c(4,1),1,1,1]
#compare assumed and estimated ecov process pars
input$par$Ecov_process_pars
fit$parList$Ecov_process_pars
#compare true and estimated time-varying q
input$par$q_repars
fit$parList$q_repars
#estimated variability in q is lower than truth, but estimate has large SE
as.list(fit$sdrep, "Std")$q_repars
pal = viridisLite::viridis(n=2)
se = summary(fit$sdrep)
se = matrix(se[rownames(se) == "logit_q_mat",2], length(fit$years))
plot(fit$years, fit$rep$q[,1], type = 'n', lwd = 2, col = pal[1], ylim = c(0,0.4), ylab = "q", xlab = "Year")
for( i in 1:input$data$n_indices){
lines(fit$years, fit$rep$q[,i], lwd = 2, col = pal[i])
polyy = c(fit$rep$q[,i]*exp(-1.96*se[,i]),rev(fit$rep$q[,i]*exp(1.96*se[,i])))
polygon(c(fit$years,rev(fit$years)), polyy, col=adjustcolor(pal[i], alpha.f=0.4), border = "transparent")
lines(fit$years, newdata$q[,i], lwd = 2, col = pal[i], lty = 2)
}
legend("topright", legend = paste0("Index ", rep(1:input$data$n_indices, each = 2), c(" Est.", " True")), lwd = 2, col = rep(pal, each = 2), lty = c(1,2))