Skip to content

Commit 9cd3ac2

Browse files
major changes through inclusion of package data, default ptms will be parsed from list now
1 parent 8820175 commit 9cd3ac2

6 files changed

Lines changed: 627 additions & 31 deletions

File tree

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
modification_cache/*
1+
mumble/package_data/modifications_cache.pkl
22

33
# Byte-compiled / optimized / DLL files
44
__pycache__/

mumble/__main__.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import click
22
import logging
33
import sys
4-
4+
import importlib
55
from rich.logging import RichHandler
66

77
from mumble import PSMHandler, remove_modification_cache
@@ -20,9 +20,11 @@
2020
"type": click.Path(exists=True),
2121
"help": "Path to the input file.",
2222
},
23-
"unimod_modification_file": {
23+
"modification_file": {
2424
"type": click.Path(exists=True),
2525
"help": "Restriction list of modifications to use from Unimod.",
26+
"default": str(importlib.resources.files("mumble.package_data") / "default_ptm_list.tsv"),
27+
"show_default": True,
2628
},
2729
"psm_file_type": {
2830
"type": click.STRING,
@@ -92,6 +94,12 @@
9294
"help": "Set the logging level",
9395
"show_default": True,
9496
},
97+
"all_unimod_modifications": {
98+
"is_flag": True,
99+
"default": False,
100+
"help": "Instead of using a subset of modifications from Unimod, use all available modifications.",
101+
"show_default": True,
102+
},
95103
}
96104

97105

mumble/mumble.py

Lines changed: 32 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from copy import deepcopy
2+
import importlib.resources
23
import logging
34
import itertools
45
import os
@@ -7,6 +8,7 @@
78
from pathlib import Path
89
from functools import lru_cache
910
import hashlib
11+
import importlib
1012

1113
import pandas as pd
1214
import pickle
@@ -17,6 +19,7 @@
1719
from pyteomics.mass import std_aa_mass, unimod
1820
from pyteomics.fasta import IndexedFASTA
1921
from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TimeElapsedColumn
22+
from rich.pretty import pretty_repr
2023

2124
# Add a logger
2225
logger = logging.getLogger(__name__)
@@ -43,7 +46,7 @@ def __init__(self, config_file: str = None, **kwargs):
4346
fasta_file=self.params["fasta_file"],
4447
combination_length=self.params["combination_length"],
4548
exclude_mutations=self.params["exclude_mutations"],
46-
unimod_modification_file=self.params["unimod_modification_file"],
49+
modification_file=self.params["modification_file"],
4750
)
4851
self.psm_file_name = None
4952

@@ -69,18 +72,22 @@ def _load_parameters(self, overrides: dict) -> dict:
6972
"include_original_psm": False,
7073
"include_decoy_psm": False,
7174
"psm_file_type": "infer",
72-
"unimod_modification_file": None,
75+
"modification_file": str(
76+
importlib.resources.files("mumble.package_data") / "default_ptm_list.tsv"
77+
),
7378
"modification_mapping": {},
79+
"all_unimod_modifications": False,
7480
}
7581

76-
# Use a single loop to consolidate parameters
7782
params = {
7883
key: overrides.get(
7984
key, self.config_loader.get(key, default) if self.config_loader else default
8085
)
8186
for key, default in keys_with_defaults.items()
8287
}
83-
logger.info(f"Mumble config: {params}")
88+
if params["all_unimod_modifications"]:
89+
params["modification_file"] = False
90+
logger.info(f"Mumble config: {pretty_repr(params)}")
8491

8592
return params
8693

@@ -399,7 +406,7 @@ def __init__(
399406
fasta_file=None,
400407
combination_length=1,
401408
exclude_mutations=False,
402-
unimod_modification_file=None,
409+
modification_file=None,
403410
) -> None:
404411
"""
405412
Constructor of the class.
@@ -415,14 +422,17 @@ def __init__(
415422
self.cache = _ModificationCache(
416423
combination_length=combination_length,
417424
exclude_mutations=exclude_mutations,
418-
modification_file=unimod_modification_file,
425+
modification_file=modification_file,
419426
)
420427
self.cache.load_cache()
421428

422429
self.modification_df = self.cache.modification_df
423430
self.monoisotopic_masses = self.cache.monoisotopic_masses
424431
self.modifications_names = self.cache.modifications_names
425-
432+
if len(self.modification_df["name"].unique()) == 0:
433+
raise ValueError(
434+
"No modifications found in the modification file. Please check fileformat."
435+
)
426436
logger.info(
427437
f'Including {len(self.modification_df["name"].unique())} unique modifications on {len(self.modification_df["name"])} sites'
428438
)
@@ -803,7 +813,7 @@ def load_cache(self, force_reload=False):
803813
Args:
804814
force_reload (bool, optional): If True, regenerate the cache even if it exists. Defaults to False.
805815
"""
806-
self._load_or_generate_data(self.cache_file, force_reload=force_reload)
816+
self._load_or_generate_data(force_reload=force_reload)
807817

808818
@classmethod
809819
def _get_cache_file_path(cls):
@@ -813,15 +823,7 @@ def _get_cache_file_path(cls):
813823
return:
814824
str: path to cache file
815825
"""
816-
current_dir = os.path.dirname(os.path.realpath(__file__))
817-
parent_dir = os.path.dirname(current_dir)
818-
cache_dir = os.path.join(parent_dir, "modification_cache")
819-
820-
# Create the cache directory if it doesn't exist
821-
os.makedirs(cache_dir, exist_ok=True)
822-
823-
cache_file = os.path.join(cache_dir, "modification_cache.pkl")
824-
return cache_file
826+
return str(importlib.resources.files("mumble.package_data") / "modifications_cache.pkl")
825827

826828
@staticmethod
827829
def _calculate_file_hash(file_path: str) -> str:
@@ -840,11 +842,11 @@ def _calculate_file_hash(file_path: str) -> str:
840842
sha256.update(chunk)
841843
return sha256.hexdigest()
842844

843-
def _load_or_generate_data(self, cache_file: str, force_reload: bool = False) -> None:
845+
def _load_or_generate_data(self, force_reload: bool = False) -> None:
844846
"""Load data from cache or generate and save it if cache doesn't exist."""
845-
if os.path.exists(cache_file) and not force_reload:
847+
if os.path.exists(self.cache_file) and not force_reload:
846848
logger.info("Checking cache")
847-
with open(cache_file, "rb") as f:
849+
with open(self.cache_file, "rb") as f:
848850
cache_data = pickle.load(f)
849851

850852
if cache_data["metadata"] == (
@@ -860,11 +862,11 @@ def _load_or_generate_data(self, cache_file: str, force_reload: bool = False) ->
860862
self.modifications_names = cache_data["modifications_names"]
861863
except KeyError:
862864
logger.info("Cached data invalid or incomplete, regenerating cache")
863-
self._regenerate_and_save_cache(cache_file)
865+
self._regenerate_and_save_cache()
864866
else:
865-
self._regenerate_and_save_cache(cache_file)
867+
self._regenerate_and_save_cache()
866868
else:
867-
self._regenerate_and_save_cache(cache_file)
869+
self._regenerate_and_save_cache()
868870

869871
def get_unimod_database(self):
870872
"""
@@ -873,7 +875,11 @@ def get_unimod_database(self):
873875
Args:
874876
exclude_mutations (bool, optional): If True, modifications with the classification 'AA substitution' will be excluded. Defaults to False.
875877
"""
876-
unimod_db = unimod.Unimod()
878+
879+
# Load Unimod database
880+
unimod_db = unimod.Unimod(
881+
"sqlite:///" + str(importlib.resources.files("mumble.package_data") / "unimod.db")
882+
)
877883
position_id_mapper = {
878884
2: "anywhere",
879885
3: "N-term",
@@ -1000,7 +1006,7 @@ def generate_combinations(items, length):
10001006
else:
10011007
return [], []
10021008

1003-
def _regenerate_and_save_cache(self, cache_file: str) -> None:
1009+
def _regenerate_and_save_cache(self) -> None:
10041010
"""Regenerate data and save it to the cache."""
10051011
logger.info("Generating cache data")
10061012
self.get_unimod_database()
@@ -1010,7 +1016,7 @@ def _regenerate_and_save_cache(self, cache_file: str) -> None:
10101016
logger.debug(
10111017
f"New cache metadata: \ncombination length {self.combination_length}, \nexclude_mutations {self.exclude_mutations},\nmodification file hash {self.modification_file_hash}",
10121018
)
1013-
with open(cache_file, "wb") as f:
1019+
with open(self.cache_file, "wb") as f:
10141020
pickle.dump(
10151021
{
10161022
"metadata": (

0 commit comments

Comments
 (0)