11from copy import deepcopy
2+ import importlib .resources
23import logging
34import itertools
45import os
78from pathlib import Path
89from functools import lru_cache
910import hashlib
11+ import importlib
1012
1113import pandas as pd
1214import pickle
1719from pyteomics .mass import std_aa_mass , unimod
1820from pyteomics .fasta import IndexedFASTA
1921from rich .progress import Progress , SpinnerColumn , TextColumn , BarColumn , TimeElapsedColumn
22+ from rich .pretty import pretty_repr
2023
2124# Add a logger
2225logger = logging .getLogger (__name__ )
@@ -43,7 +46,7 @@ def __init__(self, config_file: str = None, **kwargs):
4346 fasta_file = self .params ["fasta_file" ],
4447 combination_length = self .params ["combination_length" ],
4548 exclude_mutations = self .params ["exclude_mutations" ],
46- unimod_modification_file = self .params ["unimod_modification_file " ],
49+ modification_file = self .params ["modification_file " ],
4750 )
4851 self .psm_file_name = None
4952
@@ -69,18 +72,22 @@ def _load_parameters(self, overrides: dict) -> dict:
6972 "include_original_psm" : False ,
7073 "include_decoy_psm" : False ,
7174 "psm_file_type" : "infer" ,
72- "unimod_modification_file" : None ,
75+ "modification_file" : str (
76+ importlib .resources .files ("mumble.package_data" ) / "default_ptm_list.tsv"
77+ ),
7378 "modification_mapping" : {},
79+ "all_unimod_modifications" : False ,
7480 }
7581
76- # Use a single loop to consolidate parameters
7782 params = {
7883 key : overrides .get (
7984 key , self .config_loader .get (key , default ) if self .config_loader else default
8085 )
8186 for key , default in keys_with_defaults .items ()
8287 }
83- logger .info (f"Mumble config: { params } " )
88+ if params ["all_unimod_modifications" ]:
89+ params ["modification_file" ] = False
90+ logger .info (f"Mumble config: { pretty_repr (params )} " )
8491
8592 return params
8693
@@ -399,7 +406,7 @@ def __init__(
399406 fasta_file = None ,
400407 combination_length = 1 ,
401408 exclude_mutations = False ,
402- unimod_modification_file = None ,
409+ modification_file = None ,
403410 ) -> None :
404411 """
405412 Constructor of the class.
@@ -415,14 +422,17 @@ def __init__(
415422 self .cache = _ModificationCache (
416423 combination_length = combination_length ,
417424 exclude_mutations = exclude_mutations ,
418- modification_file = unimod_modification_file ,
425+ modification_file = modification_file ,
419426 )
420427 self .cache .load_cache ()
421428
422429 self .modification_df = self .cache .modification_df
423430 self .monoisotopic_masses = self .cache .monoisotopic_masses
424431 self .modifications_names = self .cache .modifications_names
425-
432+ if len (self .modification_df ["name" ].unique ()) == 0 :
433+ raise ValueError (
434+ "No modifications found in the modification file. Please check fileformat."
435+ )
426436 logger .info (
427437 f'Including { len (self .modification_df ["name" ].unique ())} unique modifications on { len (self .modification_df ["name" ])} sites'
428438 )
@@ -803,7 +813,7 @@ def load_cache(self, force_reload=False):
803813 Args:
804814 force_reload (bool, optional): If True, regenerate the cache even if it exists. Defaults to False.
805815 """
806- self ._load_or_generate_data (self . cache_file , force_reload = force_reload )
816+ self ._load_or_generate_data (force_reload = force_reload )
807817
808818 @classmethod
809819 def _get_cache_file_path (cls ):
@@ -813,15 +823,7 @@ def _get_cache_file_path(cls):
813823 return:
814824 str: path to cache file
815825 """
816- current_dir = os .path .dirname (os .path .realpath (__file__ ))
817- parent_dir = os .path .dirname (current_dir )
818- cache_dir = os .path .join (parent_dir , "modification_cache" )
819-
820- # Create the cache directory if it doesn't exist
821- os .makedirs (cache_dir , exist_ok = True )
822-
823- cache_file = os .path .join (cache_dir , "modification_cache.pkl" )
824- return cache_file
826+ return str (importlib .resources .files ("mumble.package_data" ) / "modifications_cache.pkl" )
825827
826828 @staticmethod
827829 def _calculate_file_hash (file_path : str ) -> str :
@@ -840,11 +842,11 @@ def _calculate_file_hash(file_path: str) -> str:
840842 sha256 .update (chunk )
841843 return sha256 .hexdigest ()
842844
843- def _load_or_generate_data (self , cache_file : str , force_reload : bool = False ) -> None :
845+ def _load_or_generate_data (self , force_reload : bool = False ) -> None :
844846 """Load data from cache or generate and save it if cache doesn't exist."""
845- if os .path .exists (cache_file ) and not force_reload :
847+ if os .path .exists (self . cache_file ) and not force_reload :
846848 logger .info ("Checking cache" )
847- with open (cache_file , "rb" ) as f :
849+ with open (self . cache_file , "rb" ) as f :
848850 cache_data = pickle .load (f )
849851
850852 if cache_data ["metadata" ] == (
@@ -860,11 +862,11 @@ def _load_or_generate_data(self, cache_file: str, force_reload: bool = False) ->
860862 self .modifications_names = cache_data ["modifications_names" ]
861863 except KeyError :
862864 logger .info ("Cached data invalid or incomplete, regenerating cache" )
863- self ._regenerate_and_save_cache (cache_file )
865+ self ._regenerate_and_save_cache ()
864866 else :
865- self ._regenerate_and_save_cache (cache_file )
867+ self ._regenerate_and_save_cache ()
866868 else :
867- self ._regenerate_and_save_cache (cache_file )
869+ self ._regenerate_and_save_cache ()
868870
869871 def get_unimod_database (self ):
870872 """
@@ -873,7 +875,11 @@ def get_unimod_database(self):
873875 Args:
874876 exclude_mutations (bool, optional): If True, modifications with the classification 'AA substitution' will be excluded. Defaults to False.
875877 """
876- unimod_db = unimod .Unimod ()
878+
879+ # Load Unimod database
880+ unimod_db = unimod .Unimod (
881+ "sqlite:///" + str (importlib .resources .files ("mumble.package_data" ) / "unimod.db" )
882+ )
877883 position_id_mapper = {
878884 2 : "anywhere" ,
879885 3 : "N-term" ,
@@ -1000,7 +1006,7 @@ def generate_combinations(items, length):
10001006 else :
10011007 return [], []
10021008
1003- def _regenerate_and_save_cache (self , cache_file : str ) -> None :
1009+ def _regenerate_and_save_cache (self ) -> None :
10041010 """Regenerate data and save it to the cache."""
10051011 logger .info ("Generating cache data" )
10061012 self .get_unimod_database ()
@@ -1010,7 +1016,7 @@ def _regenerate_and_save_cache(self, cache_file: str) -> None:
10101016 logger .debug (
10111017 f"New cache metadata: \n combination length { self .combination_length } , \n exclude_mutations { self .exclude_mutations } ,\n modification file hash { self .modification_file_hash } " ,
10121018 )
1013- with open (cache_file , "wb" ) as f :
1019+ with open (self . cache_file , "wb" ) as f :
10141020 pickle .dump (
10151021 {
10161022 "metadata" : (
0 commit comments