Skip to content

Commit 7075a15

Browse files
committed
A couple of changes related to filetypes
1 parent 4a33b46 commit 7075a15

3 files changed

Lines changed: 15 additions & 13 deletions

File tree

neat/bacterial_wrapper/runner.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -125,14 +125,16 @@ def bacterial_wrapper(reference_file, bacteria_name, ref_config_file, output_dir
125125

126126
# Stitching all outputs together - Keshav's script
127127

128-
def concat_fq(input_files: List[Path], dest: BgzfWriter) -> None:
128+
def concat_fq(input_files: List[Path], dest: Path) -> None:
129129

130130
if not input_files:
131+
# Nothing to do, and no error to throw
131132
return
132-
133-
for input_file in input_files:
134-
with bgzf.BgzfReader(input_file) as in_f:
135-
shutil.copyfileobj(in_f, dest)
133+
134+
with gzip.open(dest, 'wt') as out_f:
135+
for input_file in input_files:
136+
with gzip.open(input_file, 'rt') as in_f:
137+
shutil.copyfileobj(in_f, out_f)
136138

137139
def merge_bam(bams: List[Path], dest: Path, threads: int) -> None:
138140

@@ -172,19 +174,19 @@ def stitch_all_outputs(files: List[Path], output_dir) -> None:
172174
fq2_list.append(file)
173175
elif "r1.fastq" in file_name or ".fastq" in suffixes:
174176
fq1_list.append(file)
175-
elif ".vcf" in suffixes:
177+
elif ".vcf" in suffixes and ".tbi" not in suffixes:
176178
vcf_list.append(file)
177-
elif ".bam" in suffixes:
179+
elif ".bam" in suffixes and ".bai" not in suffixes:
178180
bam_list.append(file)
179181

180-
dest_fq1 = bgzf.BgzfWriter(f"{output_dir}/stitched_fq1.bgzf")
182+
dest_fq1 = Path(f"{output_dir}/stitched_fq1.gz")
181183
dest_bam = Path(f"{output_dir}/stitched.bam")
182184
dest_vcf = Path(f"{output_dir}/stitched.vcf")
183185

184186
concat_fq(fq1_list, dest_fq1)
185187

186188
if (fq2_list):
187-
dest_fq2 = bgzf.BgzfWriter(f"{output_dir}/stitched_fq2.bgzf")
189+
dest_fq2 = Path(f"{output_dir}/stitched_fq2.gz")
188190
concat_fq(fq2_list, dest_fq2)
189191

190192
merge_bam(bam_list, dest_bam, 2)

neat/read_simulator/utils/generate_reads.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,9 @@ def cover_dataset(
4949
number_reads_per_layer = ceil(span_length / fragment_model.fragment_mean)
5050
if options.paired_ended:
5151
# TODO use gc bias to skew this number. Calculate at the runner level.
52-
number_reads = number_reads_per_layer * (options.coverage//2)
52+
number_reads = ceil(number_reads_per_layer * (options.coverage / 2))
5353
else:
54-
number_reads = number_reads_per_layer * options.coverage
54+
number_reads = ceil(number_reads_per_layer * options.coverage)
5555

5656
# step 1: Divide the span up into segments drawn from the fragment pool. Assign reads based on that.
5757
# step 2: repeat above until number of reads exceeds number_reads

neat/read_simulator/utils/stitch_outputs.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
def concat(files_to_join: List[Path], dest_file: gzip.GzipFile) -> None:
2121
if not files_to_join:
2222
# Nothing to do, and no error to throw
23-
_LOG.warn(f"Concat called but there are no files to join: {files_to_join}" )
23+
_LOG.warning(f"Concat called but there are no files to join: {files_to_join}" )
2424
return
2525
for f in files_to_join:
2626
with gzip.open(f, 'rt') as in_f:
@@ -37,7 +37,7 @@ def merge_vcfs(vcfs: List[Path], ofw: OutputFileWriter) -> None:
3737
def merge_bam(bam_files: List[Path], ofw: OutputFileWriter, threads: int):
3838
merged_file = ofw.tmp_dir / "temp_merged.bam"
3939
intermediate_files = []
40-
# Note 1000 is abritrary. May need to be a user parameter/adjustable/a function
40+
# Note 1000 is arbitrary. May need to be a user parameter/adjustable/a function
4141
for i in range(0, len(bam_files), 500):
4242
temp_file = str(ofw.tmp_dir / f"temp_merged_{i}.bam")
4343
pysam.merge("--no-PG", "-f", temp_file, *map(str, bam_files[i:i+500]))

0 commit comments

Comments
 (0)