-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path4.1_cal_pop.Rmd
300 lines (250 loc) · 11.7 KB
/
4.1_cal_pop.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
---
title: "Catch-at-Length (gear, region and time specific)"
date: "`r Sys.Date()`"
output:
html_document:
toc: true
toc_depth: 3
toc_float:
collapsed: FALSE
code_folding: show
number_sections: TRUE
---
Section CAL reflects the length frequencies in the total catches. They are dependent not only on the population's size structure but also gear-type (selectivity), region (availability) and time (growth). To remove the 'gear' and 'region' effect this script looks at length-frequency data from a given gear in each region. Different time-scales are considered to address growth.
# SET-UP
```{r markdown, echo=F}
library(knitr)
opts_chunk$set(echo = T, collapse = T, fig.align = "center", fig.width = 9, fig.height = 6)
options(width = 140)
```
## settings
```{r settings, message = F}
source('0.0_settings.R')
library(ggridges)
library(zoo)
library(PerformanceAnalytics)
```
## load all data
```{r data, message = F}
# for lf by trimester
load(paste0(dir.rdat, "lf.caa.Rdata")) # from 3.0_caa
load(paste0(dir.rdat, "bio.Rdata")) # from 1.0_read
# for lf by month
load(paste0(dir.rdat, "lf.Rdata")) # from 1.0_read
group.region <- read.table(paste0(dir.dat,'caa_group_region.txt'),header=T)
group.gear <- read.table(paste0(dir.dat,'caa_group_gear.txt'),header=T)
```
# LEVEL SPECIFIC LENGTH-FREQUENCIES
## BY TRIMESTER
### Prepatation
Which cal level (gear/period/region) has sufficient samples?
```{r cal prep, message = F}
# levels considered
gears <- c('Gillnets','Lines','Seines_Nets_Traps_Weirs')
regions <- c('eNL','sGSL','SS','wNL')
period <- 2:4
by <- c('year','gear','region','period')
# subset
this.lf <- lf.caa[lf.caa$gear %in% gears &
lf.caa$region %in% regions &
lf.caa$period %in% period &
lf.caa$year %in% 1976:2019,]
# get cal by just taking the average -> equal weight to all samples!! This is the same as if get.samples would have been used.
cal <- ddply(this.lf,by,transform,prop=n/sum(n))
cal <- ddply(cal,c(by,'length'),summarise,lf=mean(prop),n=sum(n),N=length(unique(sample.id)))
cal <- ddply(cal,by,transform,N=max(N))
# cal with only one sample: remove
cal <- cal[cal$N>1,]
```
#### CHECK {.tabset}
##### availability
```{r cal check avail, fig.height = 4,fig.width = 8}
ggplot(unique(cal[,c(by,'N')]),aes(x=year,y=interaction(gear,region,period)))+
geom_point(aes(size=N,col=N))+
scale_color_viridis_c()
```
### Calculation
```{r cal calc, message = F}
# get cal for each region
cal <- this.lf[(this.lf$region=='sGSL' & this.lf$gear=='Lines' & this.lf$period %in% 3)| # southern Gulf (gillnets = dome sel + earlier)
(this.lf$region=='SS' & this.lf$gear=='Seines_Nets_Traps_Weirs' & this.lf$period==2)| # scotian shelf
(this.lf$region=='wNL' & this.lf$gear=='Seines_Nets_Traps_Weirs' & this.lf$period %in% 3:4)| # wNL
(this.lf$region=='eNL' & this.lf$gear=='Seines_Nets_Traps_Weirs' & this.lf$period %in% 3:4) # eNL
,]
cal[cal$region %in% c('eNL','wNL'),'period'] <- '3-4'
cal <- cal[cal$length>210,] # remove tiny fish in the occasional sample
# get cal by just taking the average -> equal weight to all samples!! This is the same as if get.samples would have been used.
cal <- ddply(cal,c(by,'length'),summarise,n=sum(n),N=length(unique(sample.id))) # For NL periods 3 and 4 are combined!
cal <- ddply(cal,by,transform,prop=n/sum(n),N=max(N),ntot=sum(n),med=median(rep.int(length,n)))
# cal with only one sample: remove
cal <- cal[cal$N>1,]
```
#### PLOTS {.tabset}
##### cal
```{r cal plot , fig.height = 12,fig.width = 10}
ggplot(cal) +
geom_density_ridges(stat = "identity", aes(x = length, y = year, height = prop,group=year,fill=region),alpha=0.6)+
geom_segment(aes(x=med,xend=med,y=year,yend=year+1))+
geom_text(aes(x=200,y=year,label=N),vjust=-0.1,hjust=0)+
facet_grid(.~region)+
scale_y_continuous(expand=c(0,0))+
theme_ridges()+
labs(y='',x='Length (cm)')+
scale_fill_viridis_d()
```
##### corelation
Exploratory.
Check of correlation (overall) between median sizes (rough measure).
Also checked correlation over time (rolling window): uninteresting because insufficient years.
```{r cal corr overall, fig.height = 6,fig.width = 8}
df <- as.data.frame.matrix(xtabs(med ~ year + region, unique(cal[,c('year','region','med')])))
df[df==0] <- NA
chart.Correlation(df, histogram=TRUE, pch=19)
```
##### lm
Exploratory.
Check of correlation (overall) between median sizes (rough measure).
Also checked correlation over time (rolling window): uninteresting because insufficient years.
```{r cal lm, fig.height = 4,fig.width = 6}
df <- unique(cal[,c('year','region','med')])
df <- dcast(df,year~region,value.var='med')
cross <- expand.grid(a=unique(cal$region),b=unique(cal$region))
cross <- cross[cross$a!=cross$b,]
b <- apply(cross,1,function(d){
x <- df[,d[1]]
y <- df[,d[2]]
m <- robustbase::lmrob(formula = y ~ 0+x)
c(coef(m),summary(m)$coefficients[,4] )
})
out <- cbind(cross,t(b))
pstar <- function(x,bonf=1){
lab <- rep('',length(x))
lab[x<0.1/bonf] <- '*'
lab[x<0.05/bonf] <- '**'
lab[x<0.01/bonf] <- '***'
lab
}
out$star <- pstar(out$V2)
out <- out[!(out$b=='sGSL' & out$a!='sGSL') & # not great code
!(out$b=='eNL' & out$a %in% c('wNL')) &
!(out$b=='SS' & out$a %in% c('eNL','wNL')) &
!(out$b=='wNL' & out$a %in% c('wNL')),]
ggplot(out,aes(x=a,y=b))+
geom_tile(aes(fill=x))+
geom_text(data=out[out$a!=out$b,],aes(label=round(x,2)))+
geom_text(data=out[out$a!=out$b,],aes(label=star),hjust=-1,vjust=-0.2)+
scale_fill_gradient2(low = 'darkred', mid = 'white', high = 'darkgreen',midpoint=1)+
theme(legend.position = 'none')+
scale_x_discrete(expand=c(0,0))+
scale_y_discrete(expand=c(0,0))+
labs(x='',y='')
```
## BY TRIMESTER (+ correction)
The above samples freqs still are influenced by day of the year; try and back-calculate to remove the effect. By keeping a split between regions, the only difference between them becomes the zonal difference and gear selectivity.
Thought about removing the effect model-wise but can't see how this would work (freqs ~ several non-linear effects, with missing levels)
### Preparation
```{r cal back, message = F}
lf.lf <- lf[!is.na(lf$length),c('year','date','nafo','gear.cat','length','n','sample.id')]
## groupings
lf.lf$region <- group.region[match(lf.lf$nafo,group.region$nafo),'region']
lf.lf$nafo <- NULL
lf.lf$gear <- group.gear[match(lf.lf$gear.cat,group.gear$gear.cat),'gear.group']
lf.lf$gear.cat <- NULL
## Append length-frequencies from carbio
lf.bio <- bio[!is.na(bio$length) & bio$random==TRUE,c('year','date','nafo','gear','length','sample.id')]
lf.bio$sample.id <- -lf.bio$sample.id # negative numbers for these samples for easy tracking
lf.bio$region <- group.region[match(lf.bio$nafo,group.region$nafo),'region']
lf.bio$nafo <- NULL
lf.bio$gear <- group.gear[match(lf.bio$gear,group.gear$gear.cat),'gear.group']
lf.bio <- ddply(lf.bio,c('year','date','region','gear','length','sample.id'),summarise,n=length(year)) # get freqs
lf.lf <- rbind(lf.lf,lf.bio)
## make equal length classes (consistency)
lf.lf$length <-roundFish(lf.lf$length,5)
## select one gear per region (because can't adjust for varying selectivities)
lf.lf <- lf.lf[(lf.lf$region=='sGSL' & lf.lf$gear=='Lines')| # southern Gulf (gillnets = dome sel + earlier)
(lf.lf$region=='SS' & lf.lf$gear=='Seines_Nets_Traps_Weirs')| # scotian shelf
(lf.lf$region=='wNL' & lf.lf$gear=='Seines_Nets_Traps_Weirs')| # wNL
(lf.lf$region=='eNL' & lf.lf$gear=='Seines_Nets_Traps_Weirs') # eNL
,]
lf.lf <- lf.lf[lf.lf$length>210,] # remove tiny fish in the occasional sample
## correct length for date
# 1) get age-length data
al.mod <- bio[!is.na(bio$year) & !is.na(bio$length) & !is.na(bio$agef),c('year','doy','length','agef')]
al.mod$age <- with(al.mod,ifelse(doy<172,agef-(172-doy)/365,agef+(doy-172)/365)) # 172 = 21st of July spawning
al.mod$cohort <- with(al.mod,year-agef)
#2) clean age-length data
al.mod <- al.mod[al.mod$agef<18,]
al.mod <- ddply(al.mod,c('agef'),transform,outlier=outlier(length,coef=3)) # identify overall extremes (see caa)
table(al.mod$outlier)
al.mod <- al.mod[al.mod$outlier==FALSE,]
al.mod <- al.mod[!(al.mod$agef==0 &al.mod$length>275),] # problem with convergence
p <- ggplot(al.mod,aes(x=age,y=length))+
geom_point()+
facet_wrap(~cohort)
# 3) fit model by cohort (brute forece inits till it fits)
library(FSA)
vb <- length~Linf*(1-exp(-K*(age-t0)))
inits <- ddply(al.mod[al.mod$cohort %in% 1973:2014,],c('cohort'),function(x)unlist(FSA::vbStarts(length~age,data=x)))
mods <- lapply(1973:2014,function(x){
i <- 1
f <- NULL
while(i < nrow(inits)&class(f)!='nls'){
ini <- inits[i,-1]
f <- try(nls(vb,data=al.mod[al.mod$cohort==x,],start=as.list(ini)), silent=TRUE)
i <- i+1
}
return(f)
})
coefs <- ldply(mods,coef)
```
### Calculation
Did not do this because realized not such a good idea.
* result will depend on the type of growth curve used (bertalanffy vs a seasonal one like Somers). What to use?
* growth differs significantly between cohorts yet I cannot apply the correct cohort model (length is insufficient to guess the cohort)
Any back-calculation with thus be overly simple and dependent on the assumptions (e.g., using one overall Von Bertalanffy curve would be necessary). Is a half-baked attempt worth it? Note that size distributions by region will never be fully comparable anyway, given that the southern gulf is dominated by a different gear type (selectivity) and that different availability of fish in the regions will induce differences as well (NL presumably having proportionally more larger fish). Because we have CAA by region, focusing on size here seems redundant.
## BY MONTH
### Calculation
```{r cal month calc month, message = F}
# get cal for each region
lf.month <- lf[,c('year','month','nafo','gear.cat','length','n','sample.id')]
lf.month$region <- group.region[match(lf.month$nafo,group.region$nafo),'region']
lf.month$gear <- group.gear[match(lf.month$gear.cat,group.gear$gear.cat),'gear.group']
lf.month$length <-roundFish(lf.month$length,5)
lf.month <- lf.month[(lf.month$region=='sGSL' & lf.month$gear=='Lines')| # sGSL
(lf.month$region=='SS' & lf.month$gear=='Seines_Nets_Traps_Weirs')| # SS
(lf.month$region=='wNL' & lf.month$gear=='Seines_Nets_Traps_Weirs')| # wNL
(lf.month$region=='eNL' & lf.month$gear=='Seines_Nets_Traps_Weirs') # eNL
,]
this.lf <- ddply(lf.month,c('year','month','region','length'),summarise,n=sum(n),N=length(unique(sample.id)))
this.lf <- ddply(this.lf,c('year','month','region'),transform,prop=n/sum(n),N=max(N),ntot=sum(n),med=median(rep.int(length,n)))
```
#### PLOTS {.tabset}
```{r cal month plot prep , fig.height = 10,fig.width = 12}
regions <- unique(this.lf$region)
ps <- lapply(regions,function(x)ggplot(this.lf[this.lf$region==x,]) +
geom_density_ridges(stat = "identity", aes(x = length, y = year, height = prop,group=year,fill=month),alpha=0.6)+
geom_segment(aes(x=med,xend=med,y=year,yend=year+1))+
geom_text(aes(x=200,y=year,label=N),vjust=-0.1,hjust=0)+
facet_grid(.~month)+
scale_y_continuous(expand=c(0,0))+
theme_ridges()+theme(legend.position='none')+
labs(y='',x='Length (cm)')+
scale_fill_viridis_c())
names(ps) <- regions
```
##### SS
```{r cal month SS, fig.height = 10,fig.width = 12}
ps['SS']
```
##### sGSL
```{r cal month sGSL, fig.height = 10,fig.width = 12}
ps['sGSL']
```
##### wNL
```{r cal month wNL, fig.height = 10,fig.width = 12}
ps['wNL']
```
##### eNL
```{r cal month eNL, fig.height = 8,fig.width = 10}
ps['eNL']
```