poseidon-dev/SardinesOnlyatlantisom2SStest.Rmd at master · Bai-Li-NOAA/poseidon-dev · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
---
title: "Sardines only end-to-end atlantisom test"
author: "Sarah Gaichas and Christine Stawitz"
date: "`r format(Sys.time(), '%d %B, %Y')`"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

```{r message=FALSE, warning=FALSE}
library(tidyr)
require(dplyr)
library(ggplot2)
library(data.table)
library(here)
library(ggforce)
library(ggthemes)
library(atlantisom)
```

## Introduction

Now we test the `atlantisom` to SS workflow with a new California Current run that includes climate forcing, recruitment variability, and the fishing scenario outlined for our project in Norway: "output_CC_2063_run11".

We will go from atlantis model output to SS input in this page for a single species, "sardine", that doesn't require any age class splitting.

As of 31 May 2019, the `run_truth` function has been modified to do the catch numbers fix for legacy models.

```{r initialize}

initCCA <- TRUE
initNEUS <- FALSE
initNOBA <- FALSE

species_ss <- c("Pacific_sardine")

#function to make a config file? need one for each atlantis run

if(initCCA) source(here("config/CC2Config.R"))

if(initNEUS) source(here("config/NEUSConfig.R"))

if(initNOBA) source(here("config/NOBAConfig.R"))

```

```{r get_names, message=FALSE, warning=FALSE}
#Load functional groups
funct.groups <- load_fgs(dir=d.name,
                         file_fgs = functional.groups.file)
#Get just the names of active functional groups
funct.group.names <- funct.groups %>%
  filter(IsTurnedOn == 1) %>%
  select(Name) %>%
  .$Name

```

This can be run with `species_ss` in select_groups to get results only for that species. Here I'm running with all species; took about 50 minutes to return.

```{r get_truth_ss}

# default run_truth setup will save the file, so check for that first

if(!file.exists(file.path(d.name,
                          paste0("output", scenario.name, "run_truth.RData")))){
  #Store all loaded results into an R object
  truth <- run_truth(scenario = scenario.name,
                     dir = d.name,
                     file_fgs = functional.groups.file,
                     file_bgm = box.file,
                     select_groups = funct.group.names,
                     file_init = initial.conditions.file,
                     file_biolprm = biol.prm.file,
                     file_runprm = run.prm.file,
                     verbose = TRUE
  )
} else {
  truth <- get(load(file.path(d.name,
                              paste0("output", scenario.name, "run_truth.RData"))))
}
```

In the big picture, we will have the true population dynamics from Atlantis which we need for reference to compare with estimated population dynamics from the stock assessment estimation models.

Ideally we will have a function that gets the whole truth (and nothing but the truth) from Atlantis and stores it in a sensible way for later comparison and calculation of performance metrics.

Then we will have a function that implements the "survey" that produces the stock assessment inputs and sends those to the SS file writing functions.

## Truth from OM

The following settings should achieve a survey that samples all Atlantis model output timesteps, all species, and all model polygons, with perfect efficiency and full selectivity for all ages:

```{r census-spec, message=FALSE, warning=FALSE}

# make a function for this
source(here("config/census_spec.R"))

```

Get true total biomass, total numbers. This code gets results for all species and saves the output true B and N.

```{r trueB-N, eval=FALSE}

# this uses result$biomass_ages to sample biomass directly, no need for wt@age est
survey_testBall <- create_survey(dat = truth$biomass_ages,
                                 time = timeall,
                                 species = survspp,
                                 boxes = boxall,
                                 effic = effic1,
                                 selex = selex1)

# make up a constant 0 cv for testing
surv_cv <- data.frame(species=survspp, cv=rep(0.0,length(survspp)))

# call sample_survey_biomass with a bunch of 1000s for weight at age
# in the code it multiplies atoutput by wtatage/1000 so this allows us to use
# biomass directly
wtage <- data.frame(species=rep(survspp, each=10),
                    agecl=rep(c(1:10),length(survspp)),
                    wtAtAge=rep(1000.0,length(survspp)*10))

surveyB_frombio <- sample_survey_biomass(survey_testBall, surv_cv, wtage)

#save for later use, takes a long time to generate
saveRDS(surveyB_frombio, file.path(d.name, paste0(scenario.name, "surveyBcensus.rds")))

# this uses result$nums and a new function to get survey index in numbers (abundance)

survey_testNall <- create_survey(dat = truth$nums,
                                 time = timeall,
                                 species = survspp,
                                 boxes = boxall,
                                 effic = effic1,
                                 selex = selex1)

# as above, make up a constant 0 cv for testing
surv_cv <- data.frame(species=survspp, cv=rep(0.0,length(survspp)))

surveyN <- sample_survey_numbers(survey_testNall, surv_cv)

#save for later use, takes a long time to generate
saveRDS(surveyN, file.path(d.name, paste0(scenario.name, "surveyNcensus.rds")))


```

Plot true biomass and numbers for "sardines."

```{r plot-true-B-N}

trueB <- readRDS(file.path(d.name, paste0(scenario.name,"surveyBcensus.rds")))

trueB_ss <- trueB %>%
  filter(species==species_ss)

# read Atlantis output files
atBtxt2 <- read.table(file.path(d.name, paste0("output", scenario.name, "BiomIndx.txt")), header=T)
groupslookup <- funct.groups %>%
  filter(IsTurnedOn > 0)

atBtxt2tidy <- atBtxt2 %>%
  select(Time:DIN) %>%
  #select(Time, FPL:DIN) %>%
  rename_(.dots=with(groupslookup, setNames(as.list(as.character(Code)), Name))) %>%
  gather(species, biomass, -Time) %>%
  filter(species %in% c("Pacific_sardine"))


plotB <-ggplot() +
  geom_line(data=trueB_ss, aes(x=time/stepperyr,y=atoutput, color="survey census B"),
            alpha = 10/10) +
  geom_point(data=atBtxt2tidy, aes(x=Time/365,y=biomass, color="txt output true B"),
             alpha = 10/10) +
  theme_tufte() +
  theme(legend.position = "top") +
  labs(colour=scenario.name)

plotB +
  facet_wrap(~species, scales="free")


trueN <- readRDS(file.path(d.name, paste0(scenario.name,"surveyNcensus.rds")))

trueN_ss <- trueN %>%
  filter(species==species_ss)

plotN <-ggplot() +
  geom_line(data=trueN_ss, aes(x=time/stepperyr,y=atoutput, color="survey census N"),
            alpha = 10/10) +
  theme_tufte() +
  theme(legend.position = "top") +
  labs(colour=scenario.name)

plotN +
  facet_wrap(~species, scales="free")

```

Get true population age comp, length comp, and weight at age. Now we go to single species to speed things up.

```{r true-comps, warning=FALSE, message=FALSE, eval=FALSE}

# wrap these individual calls into a function
survey_N_ss <- create_survey(dat = truth$nums,
                                 time = timeall,
                                 species = species_ss,
                                 boxes = boxall,
                                 effic = effic1,
                                 selex = selex1)

# apply default sample fish to get numbers, effNhigh samples all, see census_spec
# NOPE! effNhigh is not big enough for census of sardines, bigger breaks function
#numssshigh <- sample_fish(survey_N_ss, effNhigh)

numsss <- aggregate(survey_N_ss$atoutput,list(survey_N_ss$species,survey_N_ss$agecl,survey_N_ss$time),sum)
names(numsss) <- c("species","agecl","time","atoutput")
numssshigh <- data.frame(species = numsss$species,
					  agecl = numsss$agecl,
					  polygon = NA,
					  layer = NA,
					  time = numsss$time,
					  atoutput = numsss$atoutput)

# save age comps
saveRDS(numssshigh, file.path(d.name, paste0(scenario.name, "natage_census_sard.rds")))

# aggregate true resn per survey design
aggresnss <- aggregateDensityData(dat = truth$resn,
                                 time = timeall,
                                 species = species_ss,
                                 boxes = boxall)

# aggregate true structn per survey design
aggstructnss <- aggregateDensityData(dat = truth$structn,
                                 time = timeall,
                                 species = species_ss,
                                 boxes = boxall)

#dont sample these, just aggregate them using median
structnss <- sample_fish(aggstructnss, effNhigh, sample = FALSE)

resnss <-  sample_fish(aggresnss, effNhigh, sample = FALSE)

# this function does the length sampling and also weight at age
length_census_ss <- calc_age2length(structn = structnss,
                                 resn = resnss,
                                 nums = numssshigh,
                                 biolprm = truth$biolprm, fgs = truth$fgs,
                                 CVlenage = 0.1, remove.zeroes=TRUE)

#save for later use, takes a long time to generate
saveRDS(length_census_ss, file.path(d.name, paste0(scenario.name, "lengthwt_census_sard.rds")))

```

Plot some of the age, length comps, and wtatage

```{r plot-true-agecomps}

Natage <- readRDS(file.path(d.name, paste0(scenario.name, "natage_census_sard.rds")))

Natageplot <- ggplot(Natage, aes(x=agecl, y=atoutput)) +
  geom_point() +
  theme_tufte() +
  labs(subtitle = paste(scenario.name,
                        Natage$species))

Natageplot + facet_wrap_paginate(~time, ncol=3, nrow = 3, page = 1, scales="free")
Natageplot + facet_wrap_paginate(~time, ncol=3, nrow = 3, page = 2, scales="free")
Natageplot + facet_wrap_paginate(~time, ncol=3, nrow = 3, page = 3, scales="free")
Natageplot + facet_wrap_paginate(~time, ncol=3, nrow = 3, page = 4, scales="free")

```

```{r lf-plot}

lengthwt_census_ss <- readRDS(file.path(d.name, paste0(scenario.name, "lengthwt_census_sard.rds")))

len <- lengthwt_census_ss$natlength

lfplot <- ggplot(len, aes(upper.bins)) +
  geom_bar(aes(weight = atoutput)) +
  theme_tufte() +
  labs(subtitle = paste(scenario.name,
                        len$species))

lfplot + facet_wrap_paginate(~time, ncol=4, nrow = 4, page = 1, scales="free_y")
lfplot + facet_wrap_paginate(~time, ncol=4, nrow = 4, page = 2, scales="free_y")
lfplot + facet_wrap_paginate(~time, ncol=4, nrow = 4, page = 3, scales="free_y")


```

```{r wtage-plot}

wtage_ann <- lengthwt_census_ss$muweight %>%
  filter(time %in% annualmidyear)

# reverse to show agecl time series of wt
wageplot <- ggplot(wtage_ann, aes(time, atoutput)) +
  geom_line(aes(colour = factor(agecl))) +
  theme_tufte() +
  theme(legend.position = "bottom") +
  xlab("model timestep (5 per year)") +
  ylab("average individual weight (g)") +
  labs(subtitle = paste0(scenario.name, " annual mid year sample"))

wageplot + facet_wrap(c("species"), scales="free_y")

```

Get true population parameters (M in progress, S-R, growth)

```{r true-poppars}

```

Get true catch in tons (may not be different from input for our project)

```{r true-catchtot}

truecatchbio <- load_catch(d.name, file_catch = catch.file, fgs = funct.groups)

truecatchbio_ss <- truecatchbio[truecatchbio$species == species_ss,]

# note: time output of this file is in days
# what model timestep is this output matching?
# rescale catch in biomass time from days to years
truecatchbio_ss$time <- as.integer(truecatchbio_ss$time/365)

plotcatch <- ggplot() +
  geom_line(data=truecatchbio_ss, aes(x=time,y=atoutput, color="true catch bio"),
            alpha = 10/10) +
  theme_tufte() +
  theme(legend.position = "top") +
  labs(colour=scenario.name)

plotcatch +
  facet_wrap(~species, scales="free")

```

Get true catch at age and other comps (may not be different from input for our project). First see if catch output is at same timescale as population by checking toutfinc: `r fstepperyr`


```{r true-catage-lf-wtage, eval=FALSE}

# get survey nums with full (no) selectivity
catchN_ss <- create_fishery_subset(dat = truth$catch,
                                 time = timeall,
                                 species = species_ss,
                                 boxes = boxall)

# try summing this to get true catch in nums by timestep with surv_cv = 0
totcatchN <- sample_survey_numbers(catchN_ss, surv_cv)

# save true total catch in numbers by timestep, need to sum to year
saveRDS(totcatchN, file.path(d.name, paste0(scenario.name, "catchN_census_sard.rds")))

# apply default sample fish as before to get numbers
# no, dont apply sample, effN isnt high enough and can't be made high enough
#catch_numssshigh <- sample_fish(catchN_ss, effNhigh)

# just aggregte true numbers over layer and polygon
catch_numsss <- aggregate(catchN_ss$atoutput,list(catchN_ss$species,catchN_ss$agecl,catchN_ss$time),sum)
names(catch_numsss) <- c("species","agecl","time","atoutput")
catch_numssshigh <- data.frame(species = catch_numsss$species,
					  agecl = catch_numsss$agecl,
					  polygon = NA,
					  layer = NA,
					  time = catch_numsss$time,
					  atoutput = catch_numsss$atoutput)


# save true catch at age
saveRDS(catch_numssshigh, file.path(d.name, paste0(scenario.name, "catchage_census_sard.rds")))


# aggregate true resn per survey or fishery subset design
catch_aggresnss <- aggregateDensityData(dat = truth$resn,
                                 time = timeall,
                                 species = species_ss,
                                 boxes = boxall)

# aggregate true structn per survey or fishery subsetdesign
catch_aggstructnss <- aggregateDensityData(dat = truth$structn,
                                 time = timeall,
                                 species = species_ss,
                                 boxes = boxall)

#dont sample these, just aggregate them using median
catch_structnss <- sample_fish(catch_aggstructnss, effNhigh, sample = FALSE)

catch_resnss <-  sample_fish(catch_aggresnss, effNhigh, sample = FALSE)

# get catch lengths and wtage at every timestep
catch_length_census_ss <- calc_age2length(structn = catch_structnss,
                                 resn = catch_resnss,
                                 nums = catch_numssshigh,
                                 biolprm = truth$biolprm, fgs = truth$fgs,
                                 maxbin = 150,
                                 CVlenage = 0.1, remove.zeroes=TRUE)

#save for later use, takes a long time to generate
saveRDS(catch_length_census_ss, file.path(d.name, paste0(scenario.name, "catchlengthwt_census_sard.rds")))

```

This is true catch at age for sardine:

```{r catage1}

catch_numssshigh <- readRDS(file.path(d.name, paste0(scenario.name, "catchage_census_sard.rds")))

catchage <- catch_numssshigh %>%
  filter(species == "Pacific_sardine")

catageplot <- ggplot(catchage, aes(x=agecl, y=atoutput)) +
  geom_point() +
  theme_tufte() +
  labs(subtitle = paste(scenario.name,
                        catchage$species))

catageplot + facet_wrap_paginate(~time, ncol=3, nrow = 3, page = 1, scales="free")
catageplot + facet_wrap_paginate(~time, ncol=3, nrow = 3, page = 2, scales="free")
catageplot + facet_wrap_paginate(~time, ncol=3, nrow = 3, page = 3, scales="free")
catageplot + facet_wrap_paginate(~time, ncol=3, nrow = 3, page = 4, scales="free")

```

Do true catch numbers times weight at age match catch in biomass? It was about 16 times too high relative to catch.txt output, and shifted one year back (which is due to the floor function, and an easy fix).

June 6: just corrected the catch nums correction in `run_truth` and I hope it matches now.

```{r catchNtoB}

totcatchN <- readRDS(file.path(d.name, paste0(scenario.name, "catchN_census_sard.rds")))

totcatchann <- totcatchN %>%
  mutate(yr = floor(time/fstepperyr)+1) %>%
  group_by(species, yr) %>%
  summarise(anncatchage = sum(atoutput))

plottotC <- ggplot(totcatchann, aes(x=yr, y=anncatchage)) +
  geom_point() +
  theme_tufte() +
  labs(subtitle = paste("Catch in N ", scenario.name,
                        totcatchann$species))

plottotC

catch_numssshigh <- readRDS(file.path(d.name, paste0(scenario.name, "catchage_census_sard.rds")))

catch_lengthwt_census_ss <- readRDS(file.path(d.name, paste0(scenario.name, "catchlengthwt_census_sard.rds")))

CwtAtAge <- catch_lengthwt_census_ss$muweight %>%
  select(species, agecl, time, wtAtAge = atoutput) %>%
  mutate(wtAtAge = wtAtAge/1000)

catchbioN_ss <- merge(catch_numssshigh,CwtAtAge,by=c("species","agecl", "time"),all.x=T) %>%
  mutate(biomass = atoutput*wtAtAge/1000) %>%
  mutate(yr = floor(time/fstepperyr)+1) %>%
  group_by(species, yr) %>%
  summarise(anncatchB = sum(biomass))


plotcatch <- ggplot() +
  geom_line(data=truecatchbio_ss, aes(x=time,y=atoutput, color="true catch bio from txt"),
            alpha = 10/10) +
  geom_point(data=catchbioN_ss, aes(x=yr,y=anncatchB, color="true catch bio from N"),
            alpha = 10/10) +
  theme_tufte() +
  theme(legend.position = "top") +
  labs(colour=scenario.name)

plotcatch +
  facet_wrap(~species, scales="free")

```

Sadly, still a problem with the catch correction if it should match catch.txt when comverted to biomass. We are now off by about a factor of 2 (catch nums * wtAtAge is half true catch bio from the txt file):

```{r catchoff}

 sardcatchB <- truecatchbio_ss #%>%
# now done above
#   mutate(yr = as.integer(time/365))

sardcatchBcomp <- merge(sardcatchB, catchbioN_ss)
sardcatchBcomp <- mutate(sardcatchBcomp, mult=atoutput/anncatchB)

mean(sardcatchBcomp$mult)

summary(sardcatchBcomp$mult)

```


## “Data” from OM for EMs

Specify our survey sampling. This could come from other information such as the overlap of actual survey stations with OM polygons, experiments evaluating survey selectivity and efficiency, actual sample-based survey cv, etc.

Here we are using Christine's specifications, which approximate the acoustic survey index used in the sardine assessment:

```{r sardine-survey-spec}
# from atlantisom/config/sardine_config.R, to be generalized here

#Set species name
species <- species_ss #"Pacific_sardine" #change to species_ss to match

#Directory with SS files
model_dir <- "Sardine_SS_files/"

#Name of SS data file
datfile_name <- "sardEM_3_3.dat"

#Name of atlantis model output - this is an RData object from the runtruth() function
#atlantis_output_name <- "outputCCV3run_truth.RData"

#Efficiency
eff_case <- 0.5

#Age classes of your species
age_classes <- 1:10

#Selectivity function - a vector equivalent to length (age classes)
sel_by_age <- rep(1,length(age_classes))

#Atlantis model timestep corresponding to the true output--now from census_spec.R
timestep <- stepperyr #5

#Which atlantis timestep does the survey run in?--now from census_spec.R
# with 5 output steps per year, 0 is Jan-Feb-midMar, 1 is midMar-Apr-May,
# 2 is June-July-midAug, 3 is midAug-Sept-Oct, 4 is Nov-Dec (ish)

survey_sample_time <- midptyr #3 this is more late summer-fall, changed to 2

#The last timestep to sample
total_sample <- noutsteps-1 #495

#Vector of indices of survey times to pull
survey_sample_full <- seq(survey_sample_time,
                          total_sample, by=timestep)

#Effective sample size for composition data (annual total samples)
surveyEffN <- 1000
fisheryEffN <- 1000

#CVs for length at age, catch, and survey
CVs <- list("lenage"=0.1, "fishery"=0.01, "survey"=0.1)

#Number of years of data to pull
nyears <- 50

#Atlantis initialization period in years
burnin <- 30

#Maximum size of a fish in cm
maxbin <- 150

#Years to fish, assuming fishing output is annual and years index 1,2,3 etc
#fish_years <- burnin:(burnin+nyears-1)
# fishery output timestep (fstepperyr in census_spec.R) for this run is 5 per year

# here we are summing the catch in numbers for input in to SS.
# To use the total catch biomass output from catch.txt the above would work
# because the output is annual but in units of DAYS so needs time/365

#The last timestep to sample
total_sample <- noutsteps-1 #495

#Vector of indices of catch in numbers to pull (by timestep to sum)
fish_sample_full <- c(0:total_sample)
fish_burnin <- burnin*fstepperyr+1
fish_nyears <- nyears*fstepperyr
fish_times <- fish_sample_full[fish_burnin:(fish_burnin+fish_nyears-1)]
fish_timesteps <- seq(fish_times[5], max(fish_times), by=fstepperyr) #last timestep
fish_years <- unique(floor(fish_times/fstepperyr)+1)

# potential problem: while they are from the same model year,
# fish_years dont match survey_years timesteps (survey mid year, fishery end)
# so when matching use the index, not the value itself

#Years to survey, assuming survey is once per year
survey_years <- survey_sample_full[(burnin+1):(burnin+nyears)]

#Month of survey/fishing
survey_month <- 7
fishing_month <- 1

#Efficiency of the survey
effic <- data.frame(species=species, efficiency=eff_case)

# Assume uniform selectivity, same as selex1 here
sel<- data.frame(species=rep(species,length(age_classes)),
               agecl=age_classes,
              selex=sel_by_age)

```

Get input survey biomass for SS (these have q, selectivity, cv)

```{r toSS-surveyts-comps}

#Sample survey - 3rd timestep simulates a summer survey
survey_out <- create_survey(dat=truth$nums,
                            time=survey_sample_full,
                            species=species,
                            boxes=boxall,
                            effic=effic,
                            selex=sel)

#Try a biomass based survey for comparison
survey_outB <- create_survey(dat=truth$biomass_ages,
                            time=survey_sample_full,
                            species=species,
                            boxes=boxall,
                            effic=effic,
                            selex=sel)


#Set effective sample size for age compositions
effN <- surveyEffN
highEffN <- data.frame(species=species, effN=rep(effN, length(species)))

#Sample fish for age composition
age_comp_data <- sample_fish(survey_out, highEffN)

# aggregate true resn per survey design
survey_aggresnstd <- aggregateDensityData(dat = truth$resn,
                                          time = survey_sample_full,
                                          species = species,
                                          boxes = boxall)

# aggregate true structn per survey design
survey_aggstructnstd <- aggregateDensityData(dat =truth$structn,
                                             time = survey_sample_full,
                                             species = species,
                                             boxes = boxall)

ss_structnstd <- sample_fish(survey_aggstructnstd,
                             effN,
                             sample=FALSE)
ss_resnstd <- sample_fish(survey_aggresnstd,
                          effN,
                          sample=FALSE)

#Extract length composition data
ss_length_stdsurv <- calc_age2length(structn = ss_structnstd,
                                     resn = ss_resnstd,
                                     nums = age_comp_data,
                                     biolprm = truth$biolprm, fgs = truth$fgs,
                                     CVlenage = CVs$lenage, remove.zeroes=TRUE)

#Need to replace with interp function
wtAtAge <- ss_length_stdsurv$muweight %>%
  select(species, agecl, time, wtAtAge = atoutput) %>%
  mutate(wtAtAge = wtAtAge/1000)

# CV function
cv <- data.frame(species="Pacific_sardine", cv=CVs$survey)

#Sample survey biomass
survObsBiom <- sample_survey_biomass(dat=survey_out,cv=cv,wtAtAge)

# check against survey with truth$biomass_ages output
wtage1 <- data.frame(species=rep(species_ss, each=10),
                    agecl=rep(c(1:10),length(species_ss)),
                    wtAtAge=rep(1000.0,length(species_ss)*10))

survObsBiomB <- sample_survey_biomass(dat=survey_outB,cv=cv,wtage1)

# survey numbers, not sure which SS needs?
survObsNum <- sample_survey_numbers(dat=survey_out,cv=cv)


```

Do some comparisons of biomass surveys with truth.

```{r plotBseries}

plotB <-ggplot() +
  geom_point(data=atBtxt2tidy, aes(x=Time/365,y=biomass, color="txt output true B"),
             alpha = 10/10) +
  geom_line(data=trueB_ss, aes(x=time/stepperyr,y=atoutput, color="survey census B"),
            alpha = 10/10) +
  geom_point(data=survObsBiomB, aes(x=time/stepperyr,y=atoutput, color="survey sample B from biomass_ages"),
            alpha = 10/10) +
  theme_tufte() +
  theme(legend.position = "top") +
  labs(colour=scenario.name)

plotB +
  facet_wrap(~species, scales="free")

plotB <-ggplot() +
    geom_point(data=atBtxt2tidy, aes(x=Time/365,y=biomass, color="txt output true B"),
             alpha = 10/10) +
    geom_line(data=trueB_ss, aes(x=time/stepperyr,y=atoutput, color="survey census B"),
            alpha = 10/10) +
  geom_point(data=survObsBiom, aes(x=time/stepperyr,y=atoutput, color="survey sample B from nums * wtAtAge"),
            alpha = 10/10) +
  theme_tufte() +
  theme(legend.position = "top") +
  labs(colour=scenario.name)

plotB +
  facet_wrap(~species, scales="free")
```

Get input total catch for SS (here using numbers). SS takes a fishery cv which can be applied here to get a catch time series, or we can input true catch.

PLEASE DON'T USE THIS FOR CCA. We have determined above that catch in numbers from legacy atlantis code, even corrected, is still off by a factor of 2. We leave the info here because this code can be used for atlantis codebases after December 2015.

```{r toSS-fisheryts}

# catch at age by area and timestep
catch_numbers <-  create_fishery_subset(dat = truth$catch,
                                         time = fish_times,
                                         species = species,
                                         boxes = boxall)

if (fstepperyr>1){
  #need to take all output time steps and sum catch over each year
  catch_numbers_area <- catch_numbers %>%
    #assign a temporary year column, aggregate nums over this year
    mutate(yr = floor(time/fstepperyr)+1) %>%
    group_by(species, agecl, polygon, yr) %>%
    summarise(anncatch_area=sum(atoutput))

  # catch in numbers summed over polygon
  # and agecl after time has been aggregated into 1 yr
  catch_total <- catch_numbers_area %>%
    group_by(yr) %>%
    summarise(catch=sum(anncatch_area))
}

# catch in numbers summed over polygon and agecl if output is annual
if (fstepperyr==1){
  catch_total <- catch_numbers %>%
    group_by(time) %>%
    summarise(catch=sum(atoutput))
}

# if we want to apply the CV to this we need to write sample_total_catch

#cv <- data.frame(species="Pacific_sardine", cv=CVs$fishery)

#catchObsNum <- sample_total_catch(dat=catch_numbers, cv=cv)

```

Check this against true total catch numbers above. Should be the same (and it is, but both are incorrect in the absolute scale due to the CATCH.nc output problem in older codebases).

```{r compare-totcatch}

compareC <- ggplot() +
  geom_line(data=totcatchann, aes(x=yr, y=anncatchage, color="census catch N"),
            alpha = 10/10) +
  geom_point(data=catch_total, aes(x=yr,y=catch, color="sample catch N"),
            alpha = 10/10) +
  theme_tufte() +
  theme(legend.position = "top") +
  labs(colour=scenario.name)

compareC +
  facet_wrap(~species, scales="free")

```

Get composition inputs for SS (survey and fishery catch at age, survey and fishery lengths, survey and fishey weight at age).

Because catch composition information goes in to the assessment as a proportion, we can use fishery catch at age from this legacy codebase even with absolute catch numbers likely half what they should be overall.


```{r toSS-fisherycomps}

# Survey length comps and wtage done above to get survey ts using nums*wtage approach

#We end up using CAAL for the survey below, so let's generate fishery age comps instead
effN <- fisheryEffN/fstepperyr
effN <- data.frame(species=species, effN=effN)

#catch at age each timestep summed over polygons
catch_numsss_samp <- sample_fish(catch_numbers, effN)

#Get weights
# aggregate true resn per fishery subset design
catch_aggresnss <- aggregateDensityData(dat = truth$resn,
                                 time = fish_times,
                                 species = species,
                                 boxes = boxall)

# aggregate true structn fishery subsetdesign
catch_aggstructnss <- aggregateDensityData(dat = truth$structn,
                                 time = fish_times,
                                 species = species,
                                 boxes = boxall)

#dont sample these, just aggregate them using median
catch_structnss_samp <- sample_fish(catch_aggstructnss, effN, sample = FALSE)

catch_resnss_samp <-  sample_fish(catch_aggresnss, effN, sample = FALSE)

# these fishery lengths and weight at age are each output timestep
catch_lengthwt_samp <- calc_age2length(structn = catch_structnss_samp,
                                 resn = catch_resnss_samp,
                                 nums = catch_numsss_samp,
                                 biolprm = truth$biolprm, fgs = truth$fgs,
                                 maxbin = maxbin,
                                 CVlenage = CVs$lenage, remove.zeroes=TRUE)


```

<!--Can we match catch in weight from catch.txt? No, numbers sampled are 1e-6 compared with actual catch numbers.

```{r compareC-wt, eval=FALSE}

CswtAtAge <- catch_lengthwt_samp$muweight %>%
  select(species, agecl, time, wtAtAge = atoutput) %>%
  mutate(wtAtAge = wtAtAge/1000)

catchsampbio_ss <- merge(catch_numsss_samp,CswtAtAge,by=c("species","agecl", "time"),all.x=T) %>%
  mutate(biomass = atoutput*wtAtAge/1000) %>%
  mutate(yr = floor(time/fstepperyr)+1) %>%
  group_by(species, yr) %>%
  summarise(anncatchBsamp = sum(biomass))

plotcatch <- ggplot() +
  geom_line(data=truecatchbio_ss, aes(x=time/365,y=atoutput/1000000, color="true catch bio"),
            alpha = 10/10) +
  geom_point(data=catchsampbio_ss, aes(x=yr,y=anncatchBsamp, color="sample catch B"),
            alpha = 10/10) +
  theme_tufte() +
  theme(legend.position = "top") +
  labs(colour=scenario.name)

plotcatch +
  facet_wrap(~species, scales="free")

```
-->

Get population parameters for SS from Atlantis

```{r toSS-poppars}

```

## Write data for Stock Synthesis

To read in the Stock Synthesis files, first ensure you have the latest version of `r4ss`. You will need to ensure the relevant Stock Synthesis files are stored in the inst/extdata/ folder, with each species example in its own folder.

```{r readdata}
#devtools::install_github("r4ss/r4ss", dependencies = FALSE)
require(r4ss)
#source("./config/sardine_config.R") this is above for illustration

stocksynthesis.data <- r4ss::SS_readdat_3.30(paste0("./inst/extdata/",
model_dir,
datfile_name))
```

*Now changing catch units to tons!*

Write time series biomass (tons) and catch (tons) data for SS using `SS_write_ts`:

```{r SS-write-ts}


#Test writing CPUE in biomass
  stocksynthesis.data <- SS_write_ts(ss_data_list = stocksynthesis.data,
                ts_data = list(survObsBiom$atoutput[fish_years],
                  truecatchbio_ss$atoutput[truecatchbio_ss$time %in% fish_years]),
                CVs = c(CVs$survey,
                        CVs$fishery),
                data_years = list((survObsBiom$time[fish_years]-survey_sample_time)/timestep+1,             fish_years),
            sampling_month = list(rep(survey_month,nyears),
                                rep(fishing_month,nyears)),
                units = c("biomass","biomass"),
                data_type=c("CPUE","catch"),
                fleets = c(2,1))

stocksynthesis.data$CPUE

stocksynthesis.data$catch
```

Writing age composition data

```{r SS-write-comps}
#age_comp_data

#Get the age bins
age_bin_names <- names(stocksynthesis.data$agecomp)[10:length(names(stocksynthesis.data$agecomp))]
age_bins <- sub("a","",age_bin_names)

age_comp_flat <- reformat_compositions(age_comp_data,
                     round.places = 4,
                     comp_type = "agecomp")

## Write age composition data for survey
stocksynthesis.data <- SS_write_comps(ss_data_list = stocksynthesis.data,
               comp_matrix = list(age_comp_flat[burnin:(burnin+nyears-1),]),
               data_rows = list(stocksynthesis.data$styr:(stocksynthesis.data$styr+nyears-1)),
               sampling_month = list(rep(survey_month,nyears)),
               data_type = c("agecomp"),
               fleet_number = c(1),
               bins = list(age_bins),
               caal_bool = c(FALSE))
stocksynthesis.data$agecomp


```

Once we have the natlength matrix, we can munge the data into the proper CAAL and length bin format for SS

```{r SS-comps-formatting, eval=FALSE}

#Check length compositions match age compositions
#ss_length_stdsurv$natlength %>%
#  group_by(time,agecl) %>%
#  summarise(sum(atoutput))

len_comp_data <- ss_length_stdsurv$natlength
fish_len_comp_data <- catch_lengthwt_samp$natlength

if(fstepperyr>1){
  fish_len_comp_anndata <- fish_len_comp_data %>%
    mutate(yr = floor(time/fstepperyr)) %>%
    group_by(species, agecl, lower.bins, upper.bins, time=as.integer(yr)) %>%
    summarise(annnatlength=sum(atoutput)) %>%
    rename(atoutput = annnatlength)
} else {
  fish_len_comp_anndata <- fish_len_comp_data
}

caal_comp_flat <- reformat_compositions(len_comp_data,                                round.places=4,
                comp_type="caalcomp")


#remove burnin
caal_comp_final <- filter(caal_comp_flat,
                         time %in% survey_years)


#Add over age classes to get sample size
len_comp_flat <- reformat_compositions(len_comp_data,
                                  round.places = 0,
                           comp_type="lencomp")
#remove burnin
len_comp_final <- filter(len_comp_flat,
                         time %in% survey_years)

length_bins <- as.integer(names(len_comp_final))
length_bins <- length_bins[!is.na(length_bins)]

# fishery length comps are still 5 timesteps per year
# need to aggregate to annual (done above)
# also,  make effN annual goal/fstepsperyr (done above)
fish_len_comp_flat <- reformat_compositions(fish_len_comp_anndata,
                                  round.places = 0,
                           comp_type="lencomp")

#remove burnin works after adjustment above
fish_len_comp_final <- filter(fish_len_comp_flat,
                         time %in% fish_years)

# fishery age comps also 5 timesteps per year
if(fstepperyr>1){
  fish_age_comp_anndata <- catch_numsss_samp %>%
    mutate(yr = floor(time/fstepperyr)) %>%
    group_by(species, agecl, time=as.integer(yr)) %>%
    summarise(annnatage=sum(atoutput)) %>%
    rename(atoutput = annnatage)
} else {
  fish_age_comp_anndata <- catch_numsss_samp
}

fish_age_comp_flat <- reformat_compositions(fish_age_comp_anndata,
                           comp_type="agecomp")

#remove burnin (not necessary?fish comps made with fish_years only)
fish_age_comp_final <- filter(fish_age_comp_flat,
                         time %in% fish_years)

comp_list <- list(caal_comp_final,len_comp_final, fish_age_comp_final, fish_len_comp_final)

apply_month <- list(rep(survey_month, nrow(comp_list[[1]])),
                    rep(survey_month, nrow(comp_list[[2]])),
                    rep(fishing_month,nrow(comp_list[[3]])),
                    rep(fishing_month,nrow(comp_list[[4]])))


# below fails with two errors (only gets to first when running)