|
1 | | -# ===== Load dependencies ===== |
2 | | -library(dplyr) |
3 | | -library(tidyr) |
4 | | -library(stringr) |
5 | | -library(testthat) |
| 1 | +#Function file: describe_curation.R |
6 | 2 |
|
7 | | -# ===== Mock getSignatures function ===== |
8 | | -getSignatures <- function(dat, tax.id.type = "metaphlan") { |
9 | | - strsplit(dat$`MetaPhlAn taxon names`, "\\|") |
10 | | -} |
11 | | - |
12 | | -# ===== Helper: countTaxon ===== |
13 | | -.countTaxon <- function(dat, x, direction = c("both", "increased", "decreased")) { |
14 | | - direction <- match.arg(direction) |
15 | | - if (direction != "both") { |
16 | | - dat <- dplyr::filter(dat, `Abundance in Group 1` == direction) |
17 | | - } |
18 | | - allnames <- getSignatures(dat) |
19 | | - sum(vapply(allnames, function(sig) x %in% sig, integer(1))) |
20 | | -} |
21 | | - |
22 | | -# ===== Helper: Binomial Test Summary ===== |
23 | | -.createBinomTestSummary <- function(x, n, p = 0.5, wordy = TRUE) { |
24 | | - bt <- binom.test(x, n, p) |
25 | | - if (wordy) { |
26 | | - paste0("p-value=", signif(bt$p.value, 2), |
27 | | - " (increased freq=", x, ", total freq=", n, ")") |
28 | | - } else { |
29 | | - signif(bt$p.value, 2) |
30 | | - } |
31 | | -} |
32 | | - |
33 | | -# ===== Mock getMostFrequentTaxa function ===== |
34 | | -getMostFrequentTaxa <- function(dat, sig.type = "both", n = 10) { |
35 | | - tab <- table(dat$`MetaPhlAn taxon names`) |
36 | | - df <- as.data.frame(head(sort(tab, decreasing = TRUE), n)) |
37 | | - names(df) <- c("Var1", "Freq") |
38 | | - df |
39 | | -} |
40 | | - |
41 | | -# ===== Main Function: createTaxonTable ===== |
42 | | -createTaxonTable <- function(dat, n = 10) { |
43 | | - dmap <- c("kingdom", "phylum", "class", "order", "family", "genus", "species") |
44 | | - names(dmap) <- substring(dmap, 1, 1) |
45 | | - |
46 | | - output <- data.frame(getMostFrequentTaxa(dat, sig.type = "both", n = n), |
47 | | - stringsAsFactors = FALSE) %>% |
48 | | - mutate(metaphlan_name = Var1) %>% |
49 | | - separate(col = Var1, sep = "\\|", into = dmap, fill = "right") %>% |
50 | | - mutate(across(kingdom:species, ~ str_replace(., ".__", ""))) %>% |
51 | | - rename(n_signatures = Freq) |
52 | | - |
53 | | - output <- output %>% |
54 | | - mutate(n_signatures = sapply(metaphlan_name, function(x) { |
55 | | - sum(grepl(x, dat$`MetaPhlAn taxon names`, fixed = TRUE)) |
56 | | - })) %>% |
57 | | - mutate(total_signatures = sapply(metaphlan_name, function(x) |
58 | | - .countTaxon(dat, x, "both"))) %>% |
59 | | - mutate(increased_signatures = sapply(metaphlan_name, function(x) |
60 | | - .countTaxon(dat, x, "increased"))) %>% |
61 | | - mutate(decreased_signatures = sapply(metaphlan_name, function(x) |
62 | | - .countTaxon(dat, x, "decreased"))) %>% |
63 | | - mutate(Taxon = gsub(".+\\|", "", metaphlan_name)) |
64 | | - |
65 | | - output %>% |
66 | | - separate(Taxon, into = c("Taxonomic Level", "Taxon Name"), sep = "__") %>% |
67 | | - mutate(`Taxonomic Level` = unname(dmap[`Taxonomic Level`])) %>% |
68 | | - rowwise() %>% |
69 | | - mutate(`Binomial Test pval` = .createBinomTestSummary(increased_signatures, total_signatures, wordy = FALSE)) %>% |
70 | | - ungroup() %>% |
71 | | - relocate(`Taxon Name`, `Taxonomic Level`, total_signatures, |
72 | | - increased_signatures, decreased_signatures, `Binomial Test pval`) |
73 | | -} |
74 | | - |
75 | | -# ======= TEST ========= |
76 | | - |
77 | | -test_that("createTaxonTable returns correct structure", { |
78 | | - mock_data <- data.frame( |
79 | | - `MetaPhlAn taxon names` = c( |
80 | | - "k__Bacteria|p__Firmicutes|g__Lactobacillus", |
81 | | - "k__Bacteria|p__Firmicutes|g__Lactobacillus", |
82 | | - "k__Bacteria|p__Bacteroidetes|g__Bacteroides" |
83 | | - ), |
84 | | - `Abundance in Group 1` = c("increased", "decreased", "increased"), |
85 | | - stringsAsFactors = FALSE |
86 | | - ) |
87 | | - |
88 | | - result <- createTaxonTable(mock_data, n = 2) |
89 | | - |
90 | | - expect_s3_class(result, "data.frame") |
91 | | - expect_true(all(c("Taxon Name", "Taxonomic Level", "total_signatures", |
92 | | - "increased_signatures", "decreased_signatures", "Binomial Test pval") %in% colnames(result))) |
93 | | - expect_gt(nrow(result), 0) |
| 3 | +bsdb <- bugsigdbr::importBugSigDB() |
| 4 | +test_that("createTaxonTable creates a table of most frequent taxa in a data.frame", { |
| 5 | + expect_s3_class(createTaxonTable(bsdb[1:10,]), "data.frame") |
94 | 6 | }) |
0 commit comments