-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconstDBcoverage.Rmd
More file actions
130 lines (86 loc) · 4.59 KB
/
constDBcoverage.Rmd
File metadata and controls
130 lines (86 loc) · 4.59 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
---
title: "Neotoma Constituent Database Coverage"
author: "Nick Hoffman"
date: "`r Sys.Date()`"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
```{r cars,echo=FALSE, message=FALSE,include=TRUE, warning=FALSE}
library(tidyverse)
library(stringr)
library(jsonlite)
wideTable = read.csv("../../recommender_table_wide1022.csv")
wideTable[is.na(wideTable)] = 0
longTable = wideTable %>% pivot_longer(cols=starts_with("t_"), names_to = "Time") %>%
dplyr::filter(value!=0) %>% dplyr::select(!value) %>%
pivot_longer(cols=starts_with("pl_"),names_to = "RegionName") %>% dplyr::filter(value!=0) %>% dplyr::select(!value) %>%
pivot_longer(cols=starts_with("d_"),names_to = "Dataset") %>% dplyr::filter(value!=0) %>% dplyr::select(!value)
longTable = longTable %>%
group_by(Time, RegionName, Dataset) %>%
summarize(
Database = {
db_values <- unique(Database)
if (length(db_values) > 1) {
paste(paste(db_values[-length(db_values)], collapse = ", "), "or", db_values[length(db_values)])
} else {
db_values
}
},
.groups = "drop"
)
longTable = longTable %>% mutate(Time = case_when(
Time == 't_pre.Quaternary' ~ 'pre-Quaternary',
Time == 't_modern..post.1850.AD.' ~ "modern (post 1850 AD)",
Time == 't_Quaternary' ~ "Quaternary"
)) %>% mutate(RegionName = str_replace_all(str_sub(RegionName,start=4), "\\.", " ")) %>%
mutate(Dataset = str_replace_all(str_sub(Dataset,start=3), "\\.", " ")) %>%
mutate(Dataset = case_when(Dataset == "x ray fluorescence diffraction" ~ "x-ray fluorescence/diffraction", Dataset == "loss on ignition" ~ "loss-on-ignition", TRUE ~ Dataset))
regions = c("afr", "arct",'far','chin','jap','rest',"atl","eurbig",
'cent', 'ala', 'us', 'can',"oceania", "pacoc","southam") %>% as.data.frame()
regionnames = c("Africa", "the Arctic", "the far East", "China", "Japan", "Asia outside of China Japan and the far East", "the Atlantic"
, "Europe", "Central America", "Alaska", "the continental United States", "Canada", "Oceania", "the Pacific","South America") %>%
as.data.frame() %>% cbind(regions)
names(regionnames) = c("RegionName","Region")
longTable = left_join(longTable,regionnames) %>% mutate(RegionName = case_when(
RegionName == "Asia outside of China Japan and the far East" ~ "Asia outside of China, Japan, and the far East",
TRUE ~ RegionName))
regions = c("afr", "arct",'far','chin','jap','rest',"atl","eurbig",
'cent', 'ala', 'us', 'can',"oceania", "pacoc","southam")
datasets = c("biomarker" ,"charcoal" , "diatom" , "dinoflagellates" , "insect" , "sedaDNA" ,"ostracode" ,
"plant macrofossil", "pollen","specimen stable isotope","testate amoebae","vertebrate fauna","water chemistry","biochemistry" ,
"chironomid" ,"cladocera" ,"geochemistry" ,"loss-on-ignition", "macroinvertebrate","organic carbon" , "paleomagnetic" ,
"physical sedimentology" , "phytolith" ,"stable isotope" ,"x-ray fluorescence/diffraction" )
times = c( "modern (post 1850 AD)", "Quaternary","pre-Quaternary")
function_mat = matrix(nrow=1125,ncol=3)
idx=0
for (r in regions) {
for (t in times) {
for (d in datasets) {
idx = idx + 1
function_mat[idx,] = c(r,t,d)
}
}
}
longFull = function_mat %>% as.data.frame()
names(longFull) = c("Region","Time","Dataset")
longFull = longFull %>% left_join(longTable)
library(DT)
```
```{r pressure,echo=FALSE, message=FALSE,include=TRUE, warning=FALSE}
library(stringr)
counter = longFull %>% dplyr::mutate(comma_count = str_count(Database,",")) %>% mutate(or_count = str_count(Database, " or ")) %>% mutate(num_db = comma_count+or_count+1)
counter = counter %>% select(!c(comma_count,or_count,Region))
datatable(counter,rownames=FALSE)
byReg = counter %>% dplyr::group_by(RegionName) %>% summarize(meanNumDB = round(mean(num_db),2)) %>% arrange(desc(meanNumDB))
datatable(byReg,rownames=FALSE)
byTime = counter %>% dplyr::group_by(Time) %>% summarize(meanNumDB = round(mean(num_db),2))%>% arrange(desc(meanNumDB))
datatable(byTime,rownames=FALSE)
byProxy = counter %>% dplyr::group_by(Dataset) %>% summarize(meanNumDB =round(mean(num_db),2)) %>% arrange(desc(meanNumDB))
datatable(byProxy,rownames=FALSE)
byRegTime = counter %>% dplyr::group_by(RegionName,Time) %>% summarize(meanNumDB =round(mean(num_db),2)) %>% arrange(desc(meanNumDB))
datatable(byRegTime,rownames=FALSE)
byRegProxy = counter %>% dplyr::group_by(RegionName,Dataset) %>% summarize(meanNumDB = round(mean(num_db),2)) %>% arrange(desc(meanNumDB))
datatable(byRegProxy,rownames=FALSE)
```