-
Notifications
You must be signed in to change notification settings - Fork 2
Add test trajectories #45
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 2 commits
f4699e5
0c2b647
9b36385
f058cf0
b7e782f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The pdbs and trajectories should be inside the |
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The pdbs and trajectories should be inside the |
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The pdbs and trajectories should be inside the |
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The pdbs and trajectories should be inside the |
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The pdbs and trajectories should be inside the
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This file is 18 MB – why is this so much bigger? Is there a way to make it smaller??
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have removed subdirectories and put all the pdb and xtc files in tests/data/prestin.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The file pape3_20ns.xtc was larger than the others because its output was saved every 10 ps. I reduced the size for this file. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,9 @@ | ||
| """ | ||
| basicrta | ||
| A package to extract binding kinetics from molecular dynamics simulations | ||
| """ | ||
|
|
||
| # Add imports here | ||
| from importlib.metadata import version | ||
|
|
||
| __version__ = version("basicrta") |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,226 @@ | ||
| import os | ||
| import gc | ||
| import numpy as np | ||
| from tqdm import tqdm | ||
| from multiprocessing import Pool, Lock | ||
| from glob import glob | ||
| import MDAnalysis as mda | ||
| from MDAnalysis.analysis.base import Results | ||
| from basicrta import istarmap | ||
| from basicrta.gibbs import Gibbs | ||
| gc.enable() | ||
|
|
||
| """This module provides the ProcessProtein class, which collects and processes | ||
| Gibbs sampler data. | ||
| """ | ||
|
|
||
| class ProcessProtein(object): | ||
| r"""ProcessProtein is the class that collects and processes Gibbs sampler | ||
| data. This class collects results for all residues in the | ||
| `basicrta-{cutoff}` directory and can write out a :math:`\tau` vs resid | ||
| numpy array or plot :math:`\tau` vs resid. If a structure is provided, | ||
| :math:`\tau` will be written as b-factors for visualization. | ||
|
|
||
| :param niter: Number of iterations used in the Gibbs samplers | ||
| :type niter: int | ||
| :param prot: Name of protein in `tm_dict.txt`, used to draw TM bars in | ||
| :math:`tau` vs resid plot. | ||
| :type prot: str, optional | ||
| :param cutoff: Cutoff used in contact analysis. | ||
| :type cutoff: float | ||
| """ | ||
|
|
||
| def __init__(self, niter, prot, cutoff, gskip, burnin, taus=None, bars=None): | ||
| self.residues = Results() | ||
| self.niter = niter | ||
| self.prot = prot | ||
| self.cutoff = cutoff | ||
| self.gskip = gskip | ||
| self.burnin = burnin | ||
| self.taus = taus | ||
| self.bars = bars | ||
|
|
||
| def __getitem__(self, item): | ||
| return getattr(self, item) | ||
|
|
||
| def _single_residue(self, adir, process=False): | ||
| if os.path.exists(f'{adir}/gibbs_{self.niter}.pkl'): | ||
| result = f'{adir}/gibbs_{self.niter}.pkl' | ||
| try: | ||
| result = f'{adir}/gibbs_{self.niter}.pkl' | ||
| g = Gibbs().load(result) | ||
| if process: | ||
| g.gskip = self.gskip | ||
| g.burnin = self.burnin | ||
| g.process_gibbs() | ||
| tau = g.estimate_tau() | ||
| except: | ||
| result = None | ||
| tau = [0, 0, 0] | ||
| else: | ||
| result = None | ||
| tau = [0, 0, 0] | ||
|
|
||
| residue = adir.split('/')[-1] | ||
| return residue, tau, result | ||
| #setattr(self.residues, f'{residue}', Results()) | ||
| #setattr(self.residues[f'{residue}'], 'file', result) | ||
| #setattr(self.residues[f'{residue}'], 'tau', tau) | ||
| #return self | ||
|
|
||
| def reprocess(self, nproc=1): | ||
| """Rerun processing and clustering on :class:`Gibbs` data. | ||
|
|
||
| :param nproc: Number of processes to use in clustering results for all | ||
| residues. | ||
| :type nproc: int | ||
| """ | ||
| from basicrta.util import get_bars | ||
|
|
||
| dirs = np.array(glob(f'basicrta-{self.cutoff}/?[0-9]*')) | ||
| sorted_inds = (np.array([int(adir.split('/')[-1][1:]) for adir in dirs]) | ||
| .argsort()) | ||
| dirs = dirs[sorted_inds] | ||
| inarr = np.array([[adir, True] for adir in dirs]) | ||
| with (Pool(nproc, initializer=tqdm.set_lock, | ||
| initargs=(Lock(),)) as p): | ||
| try: | ||
| residues, taus, results = [], [], [] | ||
| for residue, tau, result in tqdm(p.istarmap(self._single_residue, inarr), | ||
| total=len(dirs), position=0, | ||
| desc='overall progress'): | ||
| residues.append(residue) | ||
| taus.append(tau) | ||
| results.append(result) | ||
| gc.collect() | ||
| pass | ||
| except KeyboardInterrupt: | ||
| pass | ||
|
|
||
| taus = np.array(taus) | ||
| bars = get_bars(taus) | ||
| setattr(self, 'taus', taus[:, 1]) | ||
| setattr(self, 'bars', bars) | ||
| setattr(self, 'residues', np.array(residues)) | ||
| setattr(self, 'files', np.array(results)) | ||
|
|
||
| def get_taus(self, nproc=1): | ||
| r"""Get :math:`\tau` and 95\% confidence interval bounds for the slowest | ||
| process for each residue. | ||
|
|
||
| :returns: Returns a tuple of the form (:math:`\tau`, [CI lower bound, | ||
| CI upper bound]) | ||
| :rtype: tuple | ||
|
|
||
| """ | ||
| from basicrta.util import get_bars | ||
|
|
||
| dirs = np.array(glob(f'basicrta-{self.cutoff}/?[0-9]*')) | ||
| sorted_inds = (np.array([int(adir.split('/')[-1][1:]) for adir in dirs]) | ||
| .argsort()) | ||
| dirs = dirs[sorted_inds] | ||
| with (Pool(nproc, initializer=tqdm.set_lock, | ||
| initargs=(Lock(),)) as p): | ||
| try: | ||
| residues, taus, results = [], [], [] | ||
| for residue, tau, result in tqdm(p.imap(self._single_residue, dirs), | ||
| total=len(dirs), position=0, | ||
| desc='overall progress'): | ||
| residues.append(residue) | ||
| taus.append(tau) | ||
| results.append(result) | ||
| except KeyboardInterrupt: | ||
| pass | ||
|
|
||
| #taus = [] | ||
| #for res in tqdm(self.residues, total=len(self.residues)): | ||
| # taus.append(res.tau) | ||
|
|
||
| taus = np.array(taus) | ||
| bars = get_bars(taus) | ||
| setattr(self, 'taus', taus[:, 1]) | ||
| setattr(self, 'bars', bars) | ||
| setattr(self, 'residues', np.array(residues)) | ||
| setattr(self, 'files', np.array(results)) | ||
| #return taus[:, 1], bars | ||
|
|
||
| def write_data(self, fname='tausout'): | ||
| r"""Write :math:`\tau` values with 95\% confidence interval to a numpy | ||
| file with the format [`sel1` resid, :math:`\tau`, CI lower bound, CI | ||
| upper bound]. | ||
|
|
||
| :param fname: Filename to save data to. | ||
| :type fname: str, optional | ||
| """ | ||
| if self.taus is None: | ||
| taus, bars = self.get_taus() | ||
|
|
||
| keys = self.residues.keys() | ||
| residues = np.array([int(res[1:]) for res in keys]) | ||
| data = np.stack((residues, taus, bars[0], bars[1])) | ||
| np.save(fname, data.T) | ||
|
|
||
| def plot_protein(self, **kwargs): | ||
| r"""Plot :math:`\tau` vs resid. kwargs are passed to the | ||
| :meth:`plot_protein` method of `util.py`. These can be used to change | ||
| the labeling cutoff, y-limit of the plot, scale the figure, and set | ||
| major and minor ticks. | ||
| """ | ||
| from basicrta.util import plot_protein | ||
|
|
||
| if self.taus is None: | ||
| self.get_taus() | ||
|
|
||
| residues = self.residues | ||
| residues = [res.split('/')[-1] for res in residues] | ||
|
|
||
| exclude_inds = np.where(self.bars < 0)[1] | ||
| taus = np.delete(self.taus, exclude_inds) | ||
| bars = np.delete(self.bars, exclude_inds, axis=1) | ||
| residues = np.delete(residues, exclude_inds) | ||
|
|
||
| plot_protein(residues, taus, bars, self.prot, **kwargs) | ||
|
|
||
| def b_color_structure(self, structure): | ||
| r"""Add :math:`\tau` to b-factors in the specified structure. Saves | ||
| structure with b-factors to `tau_bcolored.pdb`. | ||
| """ | ||
| if self.taus is None: | ||
| taus, bars = self.get_taus() | ||
|
|
||
| cis = bars[1]+bars[0] | ||
| errs = taus/cis | ||
| errs[errs != errs] = 0 | ||
| residues = list(self.residues.keys()) | ||
| u = mda.Universe(structure) | ||
|
|
||
| u.add_TopologyAttr('tempfactors') | ||
| u.add_TopologyAttr('occupancies') | ||
| for tau, err, residue in tqdm(zip(taus, errs, residues)): | ||
| res = u.select_atoms(f'protein and resid {residue[1:]}') | ||
| res.tempfactors = np.round(tau, 2) | ||
| res.occupancies = np.round(err, 2) | ||
|
|
||
| u.select_atoms('protein').write('tau_bcolored.pdb') | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| import argparse | ||
| parser = argparse.ArgumentParser() | ||
| parser.add_argument('--nproc', type=int, default=1) | ||
| parser.add_argument('--cutoff', type=float) | ||
| parser.add_argument('--niter', type=int, default=110000) | ||
| parser.add_argument('--prot', type=str, default=None, nargs='?') | ||
| parser.add_argument('--label-cutoff', type=float, default=3, | ||
| dest='label_cutoff', | ||
| help='Only label residues with tau > ' | ||
| 'LABEL-CUTOFF * <tau>. ') | ||
|
|
||
| parser.add_argument('--structure', type=str, nargs='?') | ||
| args = parser.parse_args() | ||
|
|
||
| pp = ProcessProtein(args.niter, args.prot, args.cutoff) | ||
| pp.reprocess(nproc=args.nproc) | ||
| pp.collect_results() | ||
| pp.write_data() | ||
| pp.plot_protein(label_cutoff=args.label_cutoff) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,87 @@ | ||
| #!/usr/bin/env python | ||
|
|
||
| """ | ||
| Command-line interface for combining contact timeseries from multiple repeat runs. | ||
|
|
||
| This module provides functionality to combine contact files from multiple | ||
| trajectory repeats, enabling pooled analysis of binding kinetics. | ||
| """ | ||
|
|
||
| import os | ||
| import argparse | ||
| from basicrta.contacts import CombineContacts | ||
|
|
||
|
|
||
| def main(): | ||
| """Main function for combining contact files.""" | ||
| parser = argparse.ArgumentParser( | ||
| description="Combine contact timeseries from multiple repeat runs. " | ||
| "This enables pooling data from multiple trajectory repeats " | ||
| "and calculating posteriors from all data together." | ||
| ) | ||
|
|
||
| parser.add_argument( | ||
| '--contacts', | ||
| nargs='+', | ||
| required=True, | ||
| help="List of contact pickle files to combine (e.g., contacts_7.0.pkl from different runs)" | ||
| ) | ||
|
|
||
| parser.add_argument( | ||
| '--output', | ||
| type=str, | ||
| default='combined_contacts.pkl', | ||
| help="Output filename for combined contacts (default: combined_contacts.pkl)" | ||
| ) | ||
|
|
||
| parser.add_argument( | ||
| '--no-validate', | ||
| action='store_true', | ||
| help="Skip compatibility validation (use with caution)" | ||
| ) | ||
|
|
||
| args = parser.parse_args() | ||
|
|
||
| # Validate input files exist | ||
| missing_files = [] | ||
| for filename in args.contacts: | ||
| if not os.path.exists(filename): | ||
| missing_files.append(filename) | ||
|
|
||
| if missing_files: | ||
| print("ERROR: The following contact files were not found:") | ||
| for filename in missing_files: | ||
| print(f" {filename}") | ||
| return 1 | ||
|
|
||
| if len(args.contacts) < 2: | ||
| print("ERROR: At least 2 contact files are required for combining") | ||
| return 1 | ||
|
|
||
| if os.path.exists(args.output): | ||
| print(f"ERROR: Output file {args.output} already exists") | ||
| return 1 | ||
|
|
||
| try: | ||
| combiner = CombineContacts( | ||
| contact_files=args.contacts, | ||
| output_name=args.output, | ||
| validate_compatibility=not args.no_validate | ||
| ) | ||
|
|
||
| output_file = combiner.run() | ||
|
|
||
| print(f"\nCombination successful!") | ||
| print(f"Combined contact file saved as: {output_file}") | ||
| print(f"\nYou can now use this file with the Gibbs sampler:") | ||
| print(f" python -m basicrta.gibbs --contacts {output_file} --nproc <N>") | ||
|
|
||
| return 0 | ||
|
|
||
| except Exception as e: | ||
| print(f"ERROR: {e}") | ||
| return 1 | ||
|
|
||
|
|
||
| if __name__ == '__main__': | ||
| exit(main()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The pdbs and trajectories should be inside the
tests/data/prestindirectory and not in subdirectories (seedatafiles.py).