-
Notifications
You must be signed in to change notification settings - Fork 0
Feature/refactor_DIMS_GenerateBreaks #95
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 5 commits
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
093a4e9
removed outdir parameter, separated trim and breaks section
mraves2 1c98907
moved code into functions for GenerateBreaks
mraves2 76e4a56
added trim parameter to function get_trim_parameters and fixed typo
mraves2 aa127cd
added unit tests for GenerateBreaks
mraves2 9c2d41c
added fixtures files for test_generate_breaks
mraves2 ef3de4e
clarified trim parameter based on comments code review in DIMS genera…
mraves2 21e7ee4
Merge branch 'develop' into feature/refactor_DIMS_GenerateBreaks
mraves2 1695074
modified file name in DIMS/tests/testthat/test_generate_breaks.R
mraves2 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,61 +1,23 @@ | ||
| ## adapted from 1-generateBreaksFwhm.HPC.R ## | ||
|
|
||
| # load required package | ||
| suppressPackageStartupMessages(library("xcms")) | ||
|
|
||
| # define parameters | ||
| cmd_args <- commandArgs(trailingOnly = TRUE) | ||
|
|
||
| filepath <- cmd_args[1] | ||
| outdir <- cmd_args[2] | ||
| trim <- as.numeric(cmd_args[3]) | ||
| resol <- as.numeric(cmd_args[4]) | ||
|
|
||
| # initialize | ||
| trim_left_pos <- NULL | ||
| trim_right_pos <- NULL | ||
| trim_left_neg <- NULL | ||
| trim_right_neg <- NULL | ||
| breaks_fwhm <- NULL | ||
| breaks_fwhm_avg <- NULL | ||
| bins <- NULL | ||
| trim <- as.numeric(cmd_args[2]) | ||
| resol <- as.numeric(cmd_args[3]) | ||
|
|
||
| # read in mzML file | ||
| raw_data <- suppressMessages(xcms::xcmsRaw(filepath)) | ||
|
|
||
| # Get time values for positive and negative scans | ||
| pos_times <- raw_data@scantime[raw_data@polarity == "positive"] | ||
| neg_times <- raw_data@scantime[raw_data@polarity == "negative"] | ||
| # get trim parameters and save them to file | ||
| get_trim_parameters(raw_data@scantime, raw_data@polarity, trim) | ||
|
|
||
| # trim (remove) scans at the start and end for positive | ||
| trim_left_pos <- round(pos_times[length(pos_times) * (trim * 1.5)]) # 15% aan het begin | ||
| trim_right_pos <- round(pos_times[length(pos_times) * (1 - (trim * 0.5))]) # 5% aan het eind | ||
| # create breaks of bins for intensities. Bin size is a function of fwhm which is a function of m/z | ||
| get_breaks_for_bins(raw_data$mzrange, resol) | ||
|
|
||
| # trim (remove) scans at the start and end for negative | ||
| trim_left_neg <- round(neg_times[length(neg_times) * trim]) | ||
| trim_right_neg <- round(neg_times[length(neg_times) * (1 - trim)]) | ||
|
|
||
| # Mass range m/z | ||
| low_mz <- raw_data@mzrange[1] | ||
| # Determine maximum m/z and save to file | ||
| high_mz <- raw_data@mzrange[2] | ||
|
|
||
| # determine number of segments (bins) | ||
| nr_segments <- 2 * (high_mz - low_mz) | ||
| segment <- seq(from = low_mz, to = high_mz, length.out = nr_segments + 1) | ||
|
|
||
| # determine start and end of each bin. | ||
| for (i in 1:nr_segments) { | ||
| start_segment <- segment[i] | ||
| end_segment <- segment[i+1] | ||
| resol_mz <- resol * (1 / sqrt(2) ^ (log2(start_segment / 200))) | ||
| fwhm_segment <- start_segment / resol_mz | ||
| breaks_fwhm <- c(breaks_fwhm, seq(from = (start_segment + fwhm_segment), to = end_segment, by = 0.2 * fwhm_segment)) | ||
| # average the m/z instead of start value | ||
| range <- seq(from = (start_segment + fwhm_segment), to = end_segment, by = 0.2 * fwhm_segment) | ||
| delta_mz <- range[2] - range[1] | ||
| breaks_fwhm_avg <- c(breaks_fwhm_avg, range + 0.5 * delta_mz) | ||
| } | ||
|
|
||
| # generate output file | ||
| save(breaks_fwhm, breaks_fwhm_avg, trim_left_pos, trim_right_pos, trim_left_neg, trim_right_neg, file = "breaks.fwhm.RData") | ||
| save(high_mz, file = "highest_mz.RData") | ||
|
|
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,58 @@ | ||
| # GenerateBreaks functions | ||
| get_trim_parameters <- function(scantimes, polarities, trim = 0.1) { | ||
| #' determine the scans per scanmode which are trimmed off; save trim parameters to file | ||
| #' | ||
| #' @param scantimes: vector of scan times in seconds | ||
| #' @param polarities: vector of polarities (positive or negative) | ||
|
mraves2 marked this conversation as resolved.
|
||
|
|
||
| # Get time values for positive and negative scans | ||
| pos_times <- scantimes[polarities == "positive"] | ||
| neg_times <- scantimes[polarities == "negative"] | ||
|
|
||
| # trim (remove) scans at the start (15%) and end (5%) for positive | ||
| trim_left_pos <- round(pos_times[length(pos_times) * (trim * 1.5)]) | ||
| trim_right_pos <- round(pos_times[length(pos_times) * (1 - (trim * 0.5))]) | ||
|
Jorisvansteenbrugge marked this conversation as resolved.
|
||
|
|
||
| # trim (remove) scans at the start and end (10%) for negative | ||
| trim_left_neg <- round(neg_times[length(neg_times) * trim]) | ||
| trim_right_neg <- round(neg_times[length(neg_times) * (1 - trim)]) | ||
|
Jorisvansteenbrugge marked this conversation as resolved.
|
||
|
|
||
| # save trim parameters to file | ||
| save(trim_left_pos, trim_right_pos, trim_left_neg, trim_right_neg, file = "trim_params.RData") | ||
| } | ||
|
|
||
| get_breaks_for_bins <- function(mzrange, resol = 140000) { | ||
| #' create a vector with the breaks in m/z of bins for intensities | ||
| #' | ||
| #' @param mzrange: vector of minimum and maximum m/z values (integeers) | ||
| #' @param resol: value for resolution (integer) | ||
|
|
||
| # initialize | ||
|
Jorisvansteenbrugge marked this conversation as resolved.
|
||
| breaks_fwhm <- NULL | ||
| breaks_fwhm_avg <- NULL | ||
|
|
||
| # determine number of segments used to create bins | ||
| nr_segments <- 2 * (mzrange[2] - mzrange[1]) | ||
| segments <- seq(from = mzrange[1], to = mzrange[2], length.out = nr_segments + 1) | ||
|
|
||
| # determine start and end of each bin. fwhm (width of peaks) is assumed to be constant within a segment | ||
| for (segment_index in 1:nr_segments) { | ||
| start_segment <- segments[segment_index] | ||
| end_segment <- segments[segment_index + 1] | ||
| # determine resolution at given m/z value | ||
| resol_mz <- resol * (1 / sqrt(2) ^ (log2(start_segment / 200))) | ||
| # determine fwhm (full width at half maximum) of the peaks in this segment | ||
| fwhm_segment <- start_segment / resol_mz | ||
| # determine the breaks within this segment | ||
| breaks_segment <- seq(from = (start_segment + fwhm_segment), to = end_segment, by = 0.2 * fwhm_segment) | ||
| # add breaks for this segment to vector with all breaks | ||
| breaks_fwhm <- c(breaks_fwhm, seq(from = (start_segment + fwhm_segment), to = end_segment, by = 0.2 * fwhm_segment)) | ||
| # get a vector of average m/z instead of start value | ||
| delta_mz <- breaks_segment[2] - breaks_segment[1] | ||
| avg_breaks_segment <- breaks_segment + 0.5 * delta_mz | ||
| breaks_fwhm_avg <- c(breaks_fwhm_avg, avg_breaks_segment) | ||
| } | ||
|
|
||
| # save breaks to file | ||
| save(breaks_fwhm, breaks_fwhm_avg, file = "breaks.fwhm.RData") | ||
| } | ||
Binary file not shown.
Binary file not shown.
|
Jorisvansteenbrugge marked this conversation as resolved.
|
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,54 @@ | ||
| # unit tests for GenerateBreaks | ||
| # functions: get_trim_parameters and get_breaks_for_bins | ||
|
|
||
| # source all functions for GenerateBreaks | ||
| source("../../preprocessing/generate_breaks_functions.R") | ||
|
|
||
| # test get_trim_parameters | ||
| testthat::test_that("trim parameters are correctly calculated", { | ||
| # create list of scan times to test on: | ||
| test_scantimes <- seq(0, 100, length = 168) | ||
| test_polarities <- c(rep("positive", 84), rep("negative", 84)) | ||
| test_trim <- 0.1 | ||
|
|
||
| # test that the function produces no output except trim_params.RData file | ||
| expect_silent(get_trim_parameters(test_scantimes, test_polarities, test_trim)) | ||
| expect_true(file.exists("./trim_params.RData")) | ||
|
|
||
| # test the parameters in the output RData file | ||
| load("./trim_params.RData") | ||
| test_trim_left_pos <- trim_left_pos | ||
| test_trim_left_neg <- trim_left_neg | ||
| test_trim_right_pos <- trim_right_pos | ||
| test_trim_right_neg <- trim_right_neg | ||
| rm(trim_left_pos, trim_left_neg, trim_right_pos, trim_right_neg) | ||
|
|
||
| # load previously stored values from fixtures | ||
| load("fixtures/test_trim_params.RData") | ||
| expect_equal(test_trim_left_pos, trim_left_pos) | ||
| expect_equal(test_trim_left_neg, trim_left_neg) | ||
| expect_equal(test_trim_right_pos, trim_right_pos) | ||
| expect_equal(test_trim_right_neg, trim_right_neg) | ||
| }) | ||
|
|
||
| # test get_breaks_for_bins | ||
| testthat::test_that("breaks are correctly calculated", { | ||
| # create list of scan times to test on: | ||
| test_mzrange <- c(300, 400) | ||
| test_resol <- 140000 | ||
|
|
||
| # test that the function produces no output except breaks.fwhm.RData file | ||
| expect_silent(get_breaks_for_bins(test_mzrange, test_resol)) | ||
| expect_true(file.exists("./breaks.fwhm.RData")) | ||
|
|
||
| # test the vectors in the output RData file | ||
| load("./breaks.fwhm.RData") | ||
| test_breaks_fwhm <- breaks_fwhm | ||
| test_breaks_fwhm_avg <- breaks_fwhm_avg | ||
| rm(breaks_fwhm, breaks_fwhm_avg) | ||
|
|
||
| # load breaks from fixtures and compare vectors | ||
| load("fixtures/breaks.fwhm.RData") | ||
| expect_equal(test_breaks_fwhm, breaks_fwhm) | ||
| expect_equal(test_breaks_fwhm_avg, breaks_fwhm_avg) | ||
| }) |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.