Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified .images/multiply-pipeline.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,16 @@ $1$ New organisms can be made available for download by adding them to the colle
$2$ You can make your own primer3 settings by creating new or alterating existing JSON files in the `settings/primer3` folder.


## Extending previously-designed panels

It's also possible to ask `multiply` to find designs that extend an existing multiplex primer panel (for example, one that has already been tested in the lab). To use this functionality:

- specify the path to the `multiply` multiplex output for the previous designs - and a list of regions - in the `[Extend]` section of your design file.
- then run the pipeline as usual.

Multiple will search for primers in your regions of interest at the `generate` step, but will combine them with the previously-designed primers for all subsequent steps of the pipeline to build new multiplexes extending the original one. (An example of this process can be found in the `designs/pf-extend.ini` file.)


## Resources
`multiply` uses the following external software and databases:
- `primer3`. Individual primer pair design. https://primer3.org/
Expand Down
25 changes: 25 additions & 0 deletions designs/pf-extend.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
[Sample]
genome = PlasmodiumFalciparum

[Genes]
target_ids = PF3D7_0629500, PF3D7_0304600
target_names = AAT1, CSP

[Extend]
target_ids = PF3D7_0206800, PF3D7_0810800, PF3D7_1407900, PF3D7_1408100, PF3D7_0709000, PF3D7_1343700, PF3D7_0417200, MDR1part
target_names = MSP2, DHPS, PMI, PMIII, CRT1, K13, DHFR, MDR1part
design_file = results/pf-default/select/table.multiplexes_information.csv

[Primers]
include_tails = False
F_tail = TTTCTGTTGGTGCTGATATTGC
R_tail = ACTTGCCTGTCGCTCTATCTTC

[Amplicons]
min_size_bp = 3000
max_size_bp = 5000
primer3_settings = default, stringent, relaxed, variable

[Output]
name = pf-extend
primer_code = v
2 changes: 1 addition & 1 deletion src/multiply/blast/annotator.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def summarise_by_primer(self, output_path=None):
primer_pair_name=qseqid[:-2],
target_name=qseqid.split("_")[0],
total_alignments=qseqid_df.shape[0],
**qseqid_df[self.annotations].sum().to_dict(),
**qseqid_df[self.annotations.keys()].sum().to_dict(),
)
for qseqid, qseqid_df in self.blast_df.groupby("qseqid")
]
Expand Down
10 changes: 9 additions & 1 deletion src/multiply/generate/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,6 @@ def generate(design):
print("Done.\n")

# WRITE
print("Writing output table...")
primer_df = pd.DataFrame(
[
pair.get_primer_as_dict(direction)
Expand All @@ -166,7 +165,16 @@ def generate(design):
"pair_penalty",
]
]

if params['from_extend']:
print( f"Adding {params['extend_primers'].shape[0]} previously-generated primers from [Extend] section..." )
primer_df = pd.concat([
primer_df,
params['extend_primers']
])

output_csv = f"{params['output_dir']}/table.candidate_primers.csv"
print( f"Writing output table to \"{output_csv}\"..." )
primer_df.to_csv(output_csv, index=False)
print(f" to: {output_csv}")
print("Done.\n")
Expand Down
120 changes: 118 additions & 2 deletions src/multiply/util/parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def parse_parameters(design_path):
params = {}
params = add_samples(config, params)
params = add_genes(config, params)
params = add_extend(config, params)
params = add_regions(config, params)
params = add_primers(config, params)
params = add_amplicons(config, params)
Expand Down Expand Up @@ -58,7 +59,7 @@ def check_design_exists(design_path):
def check_valid_sections(
config,
must_include=["Sample", "Primers", "Amplicons", "Output"],
one_of=[["Genes", "Regions"]],
one_of=[["Genes", "Regions"]]
):
"""
Check that all expected sections are found within the configuration object
Expand Down Expand Up @@ -91,7 +92,6 @@ def check_valid_sections(
f"Design must include at least one of these sections: {', '.join(section_set)}. Please add."
)


def add_samples(config, params):
"""
Add [Sample] information to a parameter dictionary
Expand Down Expand Up @@ -174,6 +174,122 @@ def add_genes(config, params):

return params

def add_extend(config, params):
"""
Add [Extend] information from a configparser object to a params dictionary
This is similar to the [Genes] section but lists previously-defined primers, via
a well-formed candidate primers table.
The input design_file can be, e.g. either the table.candidate_primers.csv or the
output table.multiplexes_information.csv from a previous run - it must have these columns:
'target_id',
'target_name',
'pair_name',
'primer_name',
'direction',
'seq',
'length',
'tm',
'gc',
'chrom',
'start',
'product_bp',
'pair_penalty'

params
config: ConfigParser
ConfigParser object holding design file information.
params: dict
Dictionary of MULTIPLY parameters.
returns
params: dict
Dictionary of MULTIPLY parameters, with [Extend]
parameters added.
"""

# Check if genes have been provided
if not config.has_section("Extend"):
params["from_extend"] = False
return params

# Parse gene IDs
target_ids = [g.strip() for g in config.get("Extend", "target_ids").split(",")]

# Parse gene namess
has_names = config.has_option("Extend", "target_names")
if has_names:
target_names = [
g.strip() for g in config.get("Extend", "target_names").split(",")
]
else:
target_names = target_ids

# Sanity checks
n_ids = len(target_ids)
n_names = len(target_names)
if not n_ids == n_names:
raise DesignFileError(
f"In [Extend], found {n_ids} `target_ids` and {n_names} `target_names`. Ensure equal."
)

# Load the previously-defined primers:
import pandas
design_file = config.get("Extend", "design_file" )
primer_df = pandas.read_csv( design_file )
primer_df = primer_df[
[
'target_id',
'target_name',
'pair_name',
'primer_name',
'direction',
'seq',
'length',
'tm',
'gc',
'chrom',
'start',
'product_bp',
'pair_penalty'
]
]
# Check all specified genes are represented in the file and have the correct names:
for index, id in enumerate( target_ids ):
this_target = primer_df[ primer_df.target_id == id ]
if this_target.shape[0] == 0:
raise DesignFileError(
f"In [Extend], target id \"{id}\" was not among the targets in the specified design file \"{design_file}\"."
)
target_name = list( set( this_target.target_name.to_list() ))
if len(target_name) > 1:
raise DesignFileError(
f"In [Extend], in the specified design file \"{design_file}\", target {target_id} seems to have more than one name."
)
target_name = target_name[0]
if has_names:
# If we're given names, make sure they are correct:
if target_name != target_names[index]:
raise DesignFileError(
f"In [Extend], the name (\"{target_name}\") for target {id} in the design file \"{design_file}\", does not match the one specified (\"{target_names[index]}\")."
)
else:
# Otherwise get the name from the file:
target_names[index] = target_name

primer_df = primer_df[ primer_df.target_id.isin( target_ids )]

# Mapping between IDs and names
id_to_name = {i: n for i, n in zip(target_ids, target_names)}
name_to_id = {n: i for i, n in id_to_name.items()}

# Add to dictionary
params["from_extend"] = True
params["extend_ids"] = target_ids
params["extend_has_names"] = True
params["extend_names"] = target_names
params["extend_id_to_name"] = id_to_name
params["extend_primers"] = primer_df

return params

def add_regions(config, params):
"""
Expand Down