Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The pdbs and trajectories should be inside the tests/data/prestin directory and not in subdirectories (see datafiles.py).

Binary file not shown.
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The pdbs and trajectories should be inside the tests/data/prestin directory and not in subdirectories (see datafiles.py).

Binary file not shown.
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The pdbs and trajectories should be inside the tests/data/prestin directory and not in subdirectories (see datafiles.py).

Binary file not shown.
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The pdbs and trajectories should be inside the tests/data/prestin directory and not in subdirectories (see datafiles.py).

Binary file not shown.
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The pdbs and trajectories should be inside the tests/data/prestin directory and not in subdirectories (see datafiles.py).

Binary file not shown.
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The pdbs and trajectories should be inside the tests/data/prestin directory and not in subdirectories (see datafiles.py).

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This file is 18 MB – why is this so much bigger? Is there a way to make it smaller??

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have removed subdirectories and put all the pdb and xtc files in tests/data/prestin.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The file pape3_20ns.xtc was larger than the others because its output was saved every 10 ps. I reduced the size for this file.

Binary file not shown.
4 changes: 4 additions & 0 deletions basicrta/tests/datafiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@
times = np.load((_data_ref / 'times.npy').as_posix())
PDB = (_data_ref / 'prot_chol.pdb').as_posix()
XTC = (_data_ref / 'prot_chol.xtc').as_posix()
PDB_PAPE_1 = (_data_ref / 'prestin' /'protein_pape1.pdb.bz2').as_posix()
XTC_PAPE_1 = (_data_ref / 'prestin' /'protein_pape1_20ns.xtc').as_posix()


# This should be the last line: clean up namespace
del resources

9 changes: 9 additions & 0 deletions build/lib/basicrta/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
"""
basicrta
A package to extract binding kinetics from molecular dynamics simulations
"""

# Add imports here
from importlib.metadata import version

__version__ = version("basicrta")
226 changes: 226 additions & 0 deletions build/lib/basicrta/cluster.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
import os
import gc
import numpy as np
from tqdm import tqdm
from multiprocessing import Pool, Lock
from glob import glob
import MDAnalysis as mda
from MDAnalysis.analysis.base import Results
from basicrta import istarmap
from basicrta.gibbs import Gibbs
gc.enable()

"""This module provides the ProcessProtein class, which collects and processes
Gibbs sampler data.
"""

class ProcessProtein(object):
r"""ProcessProtein is the class that collects and processes Gibbs sampler
data. This class collects results for all residues in the
`basicrta-{cutoff}` directory and can write out a :math:`\tau` vs resid
numpy array or plot :math:`\tau` vs resid. If a structure is provided,
:math:`\tau` will be written as b-factors for visualization.

:param niter: Number of iterations used in the Gibbs samplers
:type niter: int
:param prot: Name of protein in `tm_dict.txt`, used to draw TM bars in
:math:`tau` vs resid plot.
:type prot: str, optional
:param cutoff: Cutoff used in contact analysis.
:type cutoff: float
"""

def __init__(self, niter, prot, cutoff, gskip, burnin, taus=None, bars=None):
self.residues = Results()
self.niter = niter
self.prot = prot
self.cutoff = cutoff
self.gskip = gskip
self.burnin = burnin
self.taus = taus
self.bars = bars

def __getitem__(self, item):
return getattr(self, item)

def _single_residue(self, adir, process=False):
if os.path.exists(f'{adir}/gibbs_{self.niter}.pkl'):
result = f'{adir}/gibbs_{self.niter}.pkl'
try:
result = f'{adir}/gibbs_{self.niter}.pkl'
g = Gibbs().load(result)
if process:
g.gskip = self.gskip
g.burnin = self.burnin
g.process_gibbs()
tau = g.estimate_tau()
except:
result = None
tau = [0, 0, 0]
else:
result = None
tau = [0, 0, 0]

residue = adir.split('/')[-1]
return residue, tau, result
#setattr(self.residues, f'{residue}', Results())
#setattr(self.residues[f'{residue}'], 'file', result)
#setattr(self.residues[f'{residue}'], 'tau', tau)
#return self

def reprocess(self, nproc=1):
"""Rerun processing and clustering on :class:`Gibbs` data.

:param nproc: Number of processes to use in clustering results for all
residues.
:type nproc: int
"""
from basicrta.util import get_bars

dirs = np.array(glob(f'basicrta-{self.cutoff}/?[0-9]*'))
sorted_inds = (np.array([int(adir.split('/')[-1][1:]) for adir in dirs])
.argsort())
dirs = dirs[sorted_inds]
inarr = np.array([[adir, True] for adir in dirs])
with (Pool(nproc, initializer=tqdm.set_lock,
initargs=(Lock(),)) as p):
try:
residues, taus, results = [], [], []
for residue, tau, result in tqdm(p.istarmap(self._single_residue, inarr),
total=len(dirs), position=0,
desc='overall progress'):
residues.append(residue)
taus.append(tau)
results.append(result)
gc.collect()
pass
except KeyboardInterrupt:
pass

taus = np.array(taus)
bars = get_bars(taus)
setattr(self, 'taus', taus[:, 1])
setattr(self, 'bars', bars)
setattr(self, 'residues', np.array(residues))
setattr(self, 'files', np.array(results))

def get_taus(self, nproc=1):
r"""Get :math:`\tau` and 95\% confidence interval bounds for the slowest
process for each residue.

:returns: Returns a tuple of the form (:math:`\tau`, [CI lower bound,
CI upper bound])
:rtype: tuple

"""
from basicrta.util import get_bars

dirs = np.array(glob(f'basicrta-{self.cutoff}/?[0-9]*'))
sorted_inds = (np.array([int(adir.split('/')[-1][1:]) for adir in dirs])
.argsort())
dirs = dirs[sorted_inds]
with (Pool(nproc, initializer=tqdm.set_lock,
initargs=(Lock(),)) as p):
try:
residues, taus, results = [], [], []
for residue, tau, result in tqdm(p.imap(self._single_residue, dirs),
total=len(dirs), position=0,
desc='overall progress'):
residues.append(residue)
taus.append(tau)
results.append(result)
except KeyboardInterrupt:
pass

#taus = []
#for res in tqdm(self.residues, total=len(self.residues)):
# taus.append(res.tau)

taus = np.array(taus)
bars = get_bars(taus)
setattr(self, 'taus', taus[:, 1])
setattr(self, 'bars', bars)
setattr(self, 'residues', np.array(residues))
setattr(self, 'files', np.array(results))
#return taus[:, 1], bars

def write_data(self, fname='tausout'):
r"""Write :math:`\tau` values with 95\% confidence interval to a numpy
file with the format [`sel1` resid, :math:`\tau`, CI lower bound, CI
upper bound].

:param fname: Filename to save data to.
:type fname: str, optional
"""
if self.taus is None:
taus, bars = self.get_taus()

keys = self.residues.keys()
residues = np.array([int(res[1:]) for res in keys])
data = np.stack((residues, taus, bars[0], bars[1]))
np.save(fname, data.T)

def plot_protein(self, **kwargs):
r"""Plot :math:`\tau` vs resid. kwargs are passed to the
:meth:`plot_protein` method of `util.py`. These can be used to change
the labeling cutoff, y-limit of the plot, scale the figure, and set
major and minor ticks.
"""
from basicrta.util import plot_protein

if self.taus is None:
self.get_taus()

residues = self.residues
residues = [res.split('/')[-1] for res in residues]

exclude_inds = np.where(self.bars < 0)[1]
taus = np.delete(self.taus, exclude_inds)
bars = np.delete(self.bars, exclude_inds, axis=1)
residues = np.delete(residues, exclude_inds)

plot_protein(residues, taus, bars, self.prot, **kwargs)

def b_color_structure(self, structure):
r"""Add :math:`\tau` to b-factors in the specified structure. Saves
structure with b-factors to `tau_bcolored.pdb`.
"""
if self.taus is None:
taus, bars = self.get_taus()

cis = bars[1]+bars[0]
errs = taus/cis
errs[errs != errs] = 0
residues = list(self.residues.keys())
u = mda.Universe(structure)

u.add_TopologyAttr('tempfactors')
u.add_TopologyAttr('occupancies')
for tau, err, residue in tqdm(zip(taus, errs, residues)):
res = u.select_atoms(f'protein and resid {residue[1:]}')
res.tempfactors = np.round(tau, 2)
res.occupancies = np.round(err, 2)

u.select_atoms('protein').write('tau_bcolored.pdb')


if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--nproc', type=int, default=1)
parser.add_argument('--cutoff', type=float)
parser.add_argument('--niter', type=int, default=110000)
parser.add_argument('--prot', type=str, default=None, nargs='?')
parser.add_argument('--label-cutoff', type=float, default=3,
dest='label_cutoff',
help='Only label residues with tau > '
'LABEL-CUTOFF * <tau>. ')

parser.add_argument('--structure', type=str, nargs='?')
args = parser.parse_args()

pp = ProcessProtein(args.niter, args.prot, args.cutoff)
pp.reprocess(nproc=args.nproc)
pp.collect_results()
pp.write_data()
pp.plot_protein(label_cutoff=args.label_cutoff)
87 changes: 87 additions & 0 deletions build/lib/basicrta/combine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
#!/usr/bin/env python

"""
Command-line interface for combining contact timeseries from multiple repeat runs.

This module provides functionality to combine contact files from multiple
trajectory repeats, enabling pooled analysis of binding kinetics.
"""

import os
import argparse
from basicrta.contacts import CombineContacts


def main():
"""Main function for combining contact files."""
parser = argparse.ArgumentParser(
description="Combine contact timeseries from multiple repeat runs. "
"This enables pooling data from multiple trajectory repeats "
"and calculating posteriors from all data together."
)

parser.add_argument(
'--contacts',
nargs='+',
required=True,
help="List of contact pickle files to combine (e.g., contacts_7.0.pkl from different runs)"
)

parser.add_argument(
'--output',
type=str,
default='combined_contacts.pkl',
help="Output filename for combined contacts (default: combined_contacts.pkl)"
)

parser.add_argument(
'--no-validate',
action='store_true',
help="Skip compatibility validation (use with caution)"
)

args = parser.parse_args()

# Validate input files exist
missing_files = []
for filename in args.contacts:
if not os.path.exists(filename):
missing_files.append(filename)

if missing_files:
print("ERROR: The following contact files were not found:")
for filename in missing_files:
print(f" {filename}")
return 1

if len(args.contacts) < 2:
print("ERROR: At least 2 contact files are required for combining")
return 1

if os.path.exists(args.output):
print(f"ERROR: Output file {args.output} already exists")
return 1

try:
combiner = CombineContacts(
contact_files=args.contacts,
output_name=args.output,
validate_compatibility=not args.no_validate
)

output_file = combiner.run()

print(f"\nCombination successful!")
print(f"Combined contact file saved as: {output_file}")
print(f"\nYou can now use this file with the Gibbs sampler:")
print(f" python -m basicrta.gibbs --contacts {output_file} --nproc <N>")

return 0

except Exception as e:
print(f"ERROR: {e}")
return 1


if __name__ == '__main__':
exit(main())
Loading
Loading