CodeEntropy/CodeEntropy/levels.py at ebe7d9afb6cd8dc41584a02d33449aec14ec8639 · CCPBioSim/CodeEntropy · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
import logging

import numpy as np

logger = logging.getLogger(__name__)


class LevelManager:
    """
    Manages the structural and dynamic levels involved in entropy calculations. This
    includes selecting relevant levels, computing axes for translation and rotation,
    and handling bead-based representations of molecular systems. Provides utility
    methods to extract averaged positions, convert coordinates to spherical systems,
    compute weighted forces and torques, and manipulate matrices used in entropy
    analysis.
    """

    def __init__(self):
        """
        Initializes the LevelManager with placeholders for level-related data,
        including translational and rotational axes, number of beads, and a
        general-purpose data container.
        """
        self.data_container = None
        self._levels = None
        self._trans_axes = None
        self._rot_axes = None
        self._number_of_beads = None

    def select_levels(self, data_container):
        """
        Function to read input system and identify the number of molecules and
        the levels (i.e. united atom, residue and/or polymer) that should be used.
        The level refers to the size of the bead (atom or collection of atoms)
        that will be used in the entropy calculations.

        Input
        -----
        arg_DataContainer : MDAnalysis universe object containing the system of interest

        Returns
        -------
        number_molecules : integer
        levels : array of strings for each molecule
        """

        # fragments is MDAnalysis terminology for what chemists would call molecules
        number_molecules = len(data_container.atoms.fragments)
        logger.debug("The number of molecules is {}.".format(number_molecules))

        fragments = data_container.atoms.fragments
        levels = [[] for _ in range(number_molecules)]

        for molecule in range(number_molecules):
            levels[molecule].append(
                "united_atom"
            )  # every molecule has at least one atom

            atoms_in_fragment = fragments[molecule].select_atoms("not name H*")
            number_residues = len(atoms_in_fragment.residues)

            if len(atoms_in_fragment) > 1:
                levels[molecule].append("residue")

                if number_residues > 1:
                    levels[molecule].append("polymer")

        logger.debug(f"levels {levels}")

        return number_molecules, levels

    def get_matrices(
        self, data_container, level, start, end, step, number_frames, highest_level
    ):
        """
        Function to create the force matrix needed for the transvibrational entropy
        calculation and the torque matrix for the rovibrational entropy calculation.

        Input
        -----
            data_container : MDAnalysis universe type with the information on the
            molecule of interest.
            level : string, which of the polymer, residue, or united atom levels
            are the matrices for.
            start : int, starting frame, default 0 (first frame)
            end : int, ending frame, default -1 (last frame)
            step : int, step for going through trajectories, default 1

        Returns
        -------
            force_matrix : force covariance matrix for transvibrational entropy
            torque_matrix : torque convariance matrix for rovibrational entropy
        """

        # Make beads
        list_of_beads = self.get_beads(data_container, level)

        # number of beads and frames in trajectory
        number_beads = len(list_of_beads)

        # initialize force and torque arrays
        weighted_forces = [
            [0 for x in range(number_frames)] for y in range(number_beads)
        ]
        weighted_torques = [
            [0 for x in range(number_frames)] for y in range(number_beads)
        ]

        # Calculate forces/torques for each bead
        for bead_index in range(number_beads):
            for timestep in data_container.trajectory[start:end:step]:
                # Set up axes
                # translation and rotation use different axes
                # how the axes are defined depends on the level
                trans_axes, rot_axes = self.get_axes(data_container, level, bead_index)

                # Sort out coordinates, forces, and torques for each atom in the bead
                timestep_index = timestep.frame - start
                weighted_forces[bead_index][timestep_index] = self.get_weighted_forces(
                    data_container, list_of_beads[bead_index], trans_axes, highest_level
                )
                weighted_torques[bead_index][timestep_index] = (
                    self.get_weighted_torques(
                        data_container, list_of_beads[bead_index], rot_axes
                    )
                )

        # Make covariance matrices - looping over pairs of beads
        # list of pairs of indices
        pair_list = [(i, j) for i in range(number_beads) for j in range(number_beads)]

        force_submatrix = [
            [0 for x in range(number_beads)] for y in range(number_beads)
        ]
        torque_submatrix = [
            [0 for x in range(number_beads)] for y in range(number_beads)
        ]

        for i, j in pair_list:
            # for each pair of beads
            # reducing effort because the matrix for [i][j] is the transpose of the one
            # for [j][i]
            if i <= j:
                # calculate the force covariance segment of the matrix
                force_submatrix[i][j] = self.create_submatrix(
                    weighted_forces[i], weighted_forces[j], number_frames
                )
                force_submatrix[j][i] = np.transpose(force_submatrix[i][j])

                # calculate the torque covariance segment of the matrix
                torque_submatrix[i][j] = self.create_submatrix(
                    weighted_torques[i], weighted_torques[j], number_frames
                )
                torque_submatrix[j][i] = np.transpose(torque_submatrix[i][j])

        # use np.block to make submatrices into one matrix
        force_matrix = np.block(
            [
                [force_submatrix[i][j] for j in range(number_beads)]
                for i in range(number_beads)
            ]
        )

        torque_matrix = np.block(
            [
                [torque_submatrix[i][j] for j in range(number_beads)]
                for i in range(number_beads)
            ]
        )

        # fliter zeros to remove any rows/columns that are all zero
        force_matrix = self.filter_zero_rows_columns(force_matrix)
        torque_matrix = self.filter_zero_rows_columns(torque_matrix)

        logger.debug(f"Force Matrix: {force_matrix}")
        logger.debug(f"Torque Matrix: {torque_matrix}")

        return force_matrix, torque_matrix

    def get_dihedrals(self, data_container, level):
        """
        Define the set of dihedrals for use in the conformational entropy function.
        If residue level, the dihedrals are defined from the atoms
        (4 bonded atoms for 1 dihedral).
        If polymer level, use the bonds between residues to cast dihedrals.
        Note: not using improper dihedrals only ones with 4 atoms/residues
        in a linear arrangement.

        Input
        -----
        data_container : system information
        level : level of the hierarchy (should be residue or polymer)

        Output
        ------
        dihedrals : set of dihedrals
        """
        # Start with empty array
        dihedrals = []

        # if united atom level, read dihedrals from MDAnalysis universe
        if level == "united_atom":
            dihedrals = data_container.dihedrals

        # if residue level, looking for dihedrals involving residues
        if level == "residue":
            num_residues = len(data_container.residues)
            if num_residues < 4:
                logger.debug("no residue level dihedrals")

            else:
                # find bonds between residues N-3:N-2 and N-1:N
                for residue in range(4, num_residues + 1):
                    # Using MDAnalysis selection,
                    # assuming only one covalent bond between neighbouring residues
                    # TODO not written for branched polymers
                    atom_string = (
                        "resindex "
                        + str(residue - 4)
                        + " and bonded resindex "
                        + str(residue - 3)
                    )
                    atom1 = data_container.select_atoms(atom_string)

                    atom_string = (
                        "resindex "
                        + str(residue - 3)
                        + " and bonded resindex "
                        + str(residue - 4)
                    )
                    atom2 = data_container.select_atoms(atom_string)

                    atom_string = (
                        "resindex "
                        + str(residue - 2)
                        + " and bonded resindex "
                        + str(residue - 1)
                    )
                    atom3 = data_container.select_atoms(atom_string)

                    atom_string = (
                        "resindex "
                        + str(residue - 1)
                        + " and bonded resindex "
                        + str(residue - 2)
                    )
                    atom4 = data_container.select_atoms(atom_string)

                    atom_group = atom1 + atom2 + atom3 + atom4
                    dihedrals.append(atom_group.dihedral)

        logger.debug(f"Dihedrals: {dihedrals}")

        return dihedrals

    def get_beads(self, data_container, level):
        """
        Function to define beads depending on the level in the hierarchy.

        Input
        -----
        data_container : the MDAnalysis universe
        level : the heirarchy level (polymer, residue, or united atom)

        Output
        ------
        list_of_beads : the relevent beads
        """

        if level == "polymer":
            list_of_beads = []
            atom_group = "all"
            list_of_beads.append(data_container.select_atoms(atom_group))

        if level == "residue":
            list_of_beads = []
            num_residues = len(data_container.residues)
            for residue in range(num_residues):
                atom_group = "resindex " + str(residue)
                list_of_beads.append(data_container.select_atoms(atom_group))

        if level == "united_atom":
            list_of_beads = []
            heavy_atoms = data_container.select_atoms("not name H*")
            for atom in heavy_atoms:
                atom_group = (
                    "index "
                    + str(atom.index)
                    + " or (name H* and bonded index "
                    + str(atom.index)
                    + ")"
                )
                list_of_beads.append(data_container.select_atoms(atom_group))

        logger.debug(f"List of beads: {list_of_beads}")

        return list_of_beads

    def get_axes(self, data_container, level, index=0):
        """
        Function to set the translational and rotational axes.
        The translational axes are based on the principal axes of the unit
        one level larger than the level we are interested in (except for
        the polymer level where there is no larger unit). The rotational
        axes use the covalent links between residues or atoms where possible
        to define the axes, or if the unit is not bonded to others of the
        same level the prinicpal axes of the unit are used.

        Input
        -----
        data_container : the information about the molecule and trajectory
        level : the level (united atom, residue, or polymer) of interest
        index : residue index (integer)

        Output
        ------
        trans_axes : translational axes
        rot_axes : rotational axes
        """
        index = int(index)

        if level == "polymer":
            # for polymer use principle axis for both translation and rotation
            trans_axes = data_container.atoms.principal_axes()
            rot_axes = data_container.atoms.principal_axes()

        if level == "residue":
            # Translation
            # for residues use principal axes of whole molecule for translation
            trans_axes = data_container.atoms.principal_axes()

            # Rotation
            # find bonds between atoms in residue of interest and other residues
            # we are assuming bonds only exist between adjacent residues
            # (linear chains of residues)
            # TODO refine selection so that it will work for branched polymers
            index_prev = index - 1
            index_next = index + 1
            atom_set = data_container.select_atoms(
                f"(resindex {index_prev} or resindex {index_next}) "
                f"and bonded resid {index}"
            )
            residue = data_container.select_atoms(f"resindex {index}")

            if len(atom_set) == 0:
                # if no bonds to other residues use pricipal axes of residue
                rot_axes = residue.atoms.principal_axes()

            else:
                # set center of rotation to center of mass of the residue
                center = residue.atoms.center_of_mass()

                # get vector for average position of bonded atoms
                vector = self.get_avg_pos(atom_set, center)

                # use spherical coordinates function to get rotational axes
                rot_axes = self.get_sphCoord_axes(vector)

        if level == "united_atom":
            # Translation
            # for united atoms use principal axes of residue for translation
            trans_axes = data_container.residues.principal_axes()

            # Rotation
            # for united atoms use heavy atoms bonded to the heavy atom
            atom_set = data_container.select_atoms(
                f"not name H* and bonded index {index}"
            )

            # center at position of heavy atom
            atom_group = data_container.select_atoms(f"index {index}")
            center = atom_group.positions[0]

            # get vector for average position of hydrogens
            vector = self.get_avg_pos(atom_set, center)

            # use spherical coordinates function to get rotational axes
            rot_axes = self.get_sphCoord_axes(vector)

        logger.debug(f"Translational Axes: {trans_axes}")
        logger.debug(f"Rotational Axes: {rot_axes}")

        return trans_axes, rot_axes

    def get_avg_pos(self, atom_set, center):
        """
        Function to get the average position of a set of atoms.

        Input
        -----
        atoms : MDAnalysis atom group
        center : position for center of rotation

        Output
        ------
        avg_position : three dimensional vector
        """
        # start with an empty vector
        avg_position = np.zeros((3))

        # get number of atoms
        number_atoms = len(atom_set.names)

        if number_atoms != 0:
            # sum positions for all atoms in the given set
            for atom_index in range(number_atoms):
                atom_position = atom_set.atoms[atom_index].position

                avg_position += atom_position

            avg_position /= number_atoms  # divide by number of atoms to get average

        else:
            # if no atoms in set the unit has no bonds to restrict its rotational
            # motion, so we can use a random vector to get the spherical
            # coordinates axes
            avg_position = np.random.random(3)

        # transform the average position to a coordinate system with the origin
        # at center
        avg_position = avg_position - center

        logger.debug(f"Average Position: {avg_position}")

        return avg_position

    def get_sphCoord_axes(self, arg_r):
        """
        For a given vector in space, treat it is a radial vector rooted at
        0,0,0 and derive a curvilinear coordinate system according to the
        rules of polar spherical coordinates
        """

        x2y2 = arg_r[0] ** 2 + arg_r[1] ** 2
        r2 = x2y2 + arg_r[2] ** 2

        # Check for division by zero
        if r2 == 0.0:
            raise ValueError("r2 is zero, cannot compute spherical coordinates.")

        if x2y2 == 0.0:
            raise ValueError("x2y2 is zero, cannot compute sin_phi and cos_phi.")

        # These conditions are mathematically unreachable for real-valued vectors.
        # Marked as no cover to avoid false negatives in coverage reports.

        # Check for non-negative values inside the square root
        if x2y2 / r2 < 0:  # pragma: no cover
            raise ValueError(
                f"Negative value encountered for sin_theta calculation: {x2y2 / r2}. "
                f"Cannot take square root."
            )

        if x2y2 < 0:  # pragma: no cover
            raise ValueError(
                f"Negative value encountered for sin_phi and cos_phi "
                f"calculation: {x2y2}. "
                f"Cannot take square root."
            )

        if x2y2 != 0.0:
            sin_theta = np.sqrt(x2y2 / r2)
            cos_theta = arg_r[2] / np.sqrt(r2)

            sin_phi = arg_r[1] / np.sqrt(x2y2)
            cos_phi = arg_r[0] / np.sqrt(x2y2)

        else:  # pragma: no cover
            sin_theta = 0.0
            cos_theta = 1

            sin_phi = 0.0
            cos_phi = 1

        # if abs(sin_theta) > 1 or abs(sin_phi) > 1:
        #     print('Bad sine : T {} , P {}'.format(sin_theta, sin_phi))

        # cos_theta = np.sqrt(1 - sin_theta*sin_theta)
        # cos_phi = np.sqrt(1 - sin_phi*sin_phi)

        # print('{} {} {}'.format(*arg_r))
        # print('Sin T : {}, cos T : {}'.format(sin_theta, cos_theta))
        # print('Sin P : {}, cos P : {}'.format(sin_phi, cos_phi))

        spherical_basis = np.zeros((3, 3))

        # r^
        spherical_basis[0, :] = np.asarray(
            [sin_theta * cos_phi, sin_theta * sin_phi, cos_theta]
        )

        # Theta^
        spherical_basis[1, :] = np.asarray(
            [cos_theta * cos_phi, cos_theta * sin_phi, -sin_theta]
        )

        # Phi^
        spherical_basis[2, :] = np.asarray([-sin_phi, cos_phi, 0.0])

        logger.debug(f"Spherical Basis: {spherical_basis}")

        return spherical_basis

    def get_weighted_forces(
        self, data_container, bead, trans_axes, highest_level, force_partitioning=0.5
    ):
        """
        Function to calculate the mass weighted forces for a given bead.

        Input
        -----
        bead : the part of the system to be considered
        trans_axes : the axes relative to which the forces are located

        Output
        ------
        weighted_force : the mass weighted sum of the forces in the bead
        """

        forces_trans = np.zeros((3,))

        # Sum forces from all atoms in the bead
        for atom in bead.atoms:
            # update local forces in translational axes
            forces_local = np.matmul(trans_axes, data_container.atoms[atom.index].force)
            forces_trans += forces_local

        if highest_level:
            # multiply by the force_partitioning parameter to avoid double counting
            # of the forces on weakly correlated atoms
            # the default value of force_partitioning is 0.5 (dividing by two)
            forces_trans = force_partitioning * forces_trans

        # divide the sum of forces by the mass of the bead to get the weighted forces
        mass = bead.total_mass()

        # Check that mass is positive to avoid division by 0 or negative values inside
        # sqrt
        if mass <= 0:
            raise ValueError(
                f"Invalid mass value: {mass}. Mass must be positive to compute the "
                f"square root."
            )

        weighted_force = forces_trans / np.sqrt(mass)

        logger.debug(f"Weighted Force: {weighted_force}")

        return weighted_force

    def get_weighted_torques(
        self, data_container, bead, rot_axes, force_partitioning=0.5
    ):
        """
        Function to calculate the moment of inertia weighted torques for a given bead.

        This function computes torques in a rotated frame and then weights them using
        the moment of inertia tensor. To prevent numerical instability, it treats
        extremely small diagonal elements of the moment of inertia tensor as zero
        (since values below machine precision are effectively zero). This avoids
        unnecessary use of extended precision (e.g., float128).

        Additionally, if the computed torque is already zero, the function skips
        the division step, reducing unnecessary computations and potential errors.

        Parameters
        ----------
        data_container : object
            Contains atomic positions and forces.
        bead : object
            The part of the molecule to be considered.
        rot_axes : np.ndarray
            The axes relative to which the forces and coordinates are located.
        force_partitioning : float, optional
            Factor to adjust force contributions, default is 0.5.

        Returns
        -------
        np.ndarray
            The mass-weighted sum of the torques in the bead.
        """

        torques = np.zeros((3,))
        weighted_torque = np.zeros((3,))

        for atom in bead.atoms:

            # update local coordinates in rotational axes
            coords_rot = (
                data_container.atoms[atom.index].position - bead.center_of_mass()
            )
            coords_rot = np.matmul(rot_axes, coords_rot)
            # update local forces in rotational frame
            forces_rot = np.matmul(rot_axes, data_container.atoms[atom.index].force)

            # multiply by the force_partitioning parameter to avoid double counting
            # of the forces on weakly correlated atoms
            # the default value of force_partitioning is 0.5 (dividing by two)
            forces_rot = force_partitioning * forces_rot

            # define torques (cross product of coordinates and forces) in rotational
            # axes
            torques_local = np.cross(coords_rot, forces_rot)
            torques += torques_local

        # divide by moment of inertia to get weighted torques
        # moment of inertia is a 3x3 tensor
        # the weighting is done in each dimension (x,y,z) using the diagonal
        # elements of the moment of inertia tensor
        moment_of_inertia = bead.moment_of_inertia()

        for dimension in range(3):
            # Skip calculation if torque is already zero
            if np.isclose(torques[dimension], 0):
                weighted_torque[dimension] = 0
                continue

            # Check for zero moment of inertia
            if np.isclose(moment_of_inertia[dimension, dimension], 0):
                raise ZeroDivisionError(
                    f"Attempted to divide by zero moment of inertia in dimension "
                    f"{dimension}."
                )

            # Check for negative moment of inertia
            if moment_of_inertia[dimension, dimension] < 0:
                raise ValueError(
                    f"Negative value encountered for moment of inertia: "
                    f"{moment_of_inertia[dimension, dimension]} "
                    f"Cannot compute weighted torque."
                )

            # Compute weighted torque
            weighted_torque[dimension] = torques[dimension] / np.sqrt(
                moment_of_inertia[dimension, dimension]
            )

        logger.debug(f"Weighted Torque: {weighted_torque}")

        return weighted_torque

    def create_submatrix(self, data_i, data_j, number_frames):
        """
        Function for making covariance matrices.

        Input
        -----
        data_i : values for bead i
        data_j : valuees for bead j

        Output
        ------
        submatrix : 3x3 matrix for the covariance between i and j
        """

        # Start with 3 by 3 matrix of zeros
        submatrix = np.zeros((3, 3))

        # For each frame calculate the outer product (cross product) of the data from
        # the two beads and add the result to the submatrix
        for frame in range(number_frames):
            outer_product_matrix = np.outer(data_i[frame], data_j[frame])
            submatrix = np.add(submatrix, outer_product_matrix)

        # Divide by the number of frames to get the average
        submatrix /= number_frames

        logger.debug(f"Submatrix: {submatrix}")

        return submatrix

    def filter_zero_rows_columns(self, arg_matrix):
        """
        function for removing rows and columns that contain only zeros from a matrix

        Input
        -----
        arg_matrix : matrix

        Output
        ------
        arg_matrix : the reduced size matrix
        """

        # record the initial size
        init_shape = np.shape(arg_matrix)

        zero_indices = list(
            filter(
                lambda row: np.all(np.isclose(arg_matrix[row, :], 0.0)),
                np.arange(np.shape(arg_matrix)[0]),
            )
        )
        all_indices = np.ones((np.shape(arg_matrix)[0]), dtype=bool)
        all_indices[zero_indices] = False
        arg_matrix = arg_matrix[all_indices, :]

        all_indices = np.ones((np.shape(arg_matrix)[1]), dtype=bool)
        zero_indices = list(
            filter(
                lambda col: np.all(np.isclose(arg_matrix[:, col], 0.0)),
                np.arange(np.shape(arg_matrix)[1]),
            )
        )
        all_indices[zero_indices] = False
        arg_matrix = arg_matrix[:, all_indices]

        # get the final shape
        final_shape = np.shape(arg_matrix)

        if init_shape != final_shape:
            logger.debug(
                "A shape change has occurred ({},{}) -> ({}, {})".format(
                    *init_shape, *final_shape
                )
            )

        logger.debug(f"arg_matrix: {arg_matrix}")

        return arg_matrix