"""Class for managing atom coordinate data.
Author: Cameron F. Abrams <cfa22@drexel.edu>
"""
import logging
import os
import numpy as np
import pandas as pd
from ..geometry.bondlist import Bondlist
from ..geometry.linkcell import Linkcell
from ..geometry.matrix4 import Matrix4
from ..geometry.ring import Ring, Segment
from ..io import gro as _gro_io
from ..io import mol2 as _mol2_io
from ..io import pdb as _pdb_io
from ..io.gro import GRX_ATTRIBUTES, GRX_GLOBALLY_UNIQUE, GRX_UNSET_DEFAULTS
from ..utils.dataframetools import get_rows_w_attribute, set_row_attribute, set_rows_attributes_from_dict
logger = logging.getLogger(__name__)
[docs]
class Coordinates:
""" Handles atom coordinates.
The primary object is `A`, a pandas DataFrame with one row per atom. Each atom has attributes that may be found in a `gro` file and/or a `mol2` file, along with so-called extended attributes, which are used solely by HTPolyNet.
"""
gro_attributes = _gro_io.GRO_ATTRIBUTES
mol2_atom_attributes = _mol2_io.MOL2_ATOM_ATTRIBUTES
mol2_bond_attributes = _mol2_io.MOL2_BOND_ATTRIBUTES
mol2_bond_types = _mol2_io.MOL2_BOND_TYPES
def __init__(self, name=''):
"""Constructs an empty Coordinates object.
Args:
name (str): a name string, defaults to ''
"""
self.name = name
self.metadat = {}
self.N = 0
self.A = pd.DataFrame()
self.mol2_bonds = pd.DataFrame()
self.mol2_bondlist = Bondlist()
self.linkcell = Linkcell(pbc_wrapper=self.wrap_point)
self.empty = True
self.box = np.zeros((3,3))
self.grx_attributes = GRX_ATTRIBUTES
self.parent = None
[docs]
@classmethod
def read_gro(cls, filename, wrap_coords=True):
"""Reads a Gromacs gro file.
Args:
filename (str): name of gro file
wrap_coords (bool): if True, wrap atom coordinates into the box, defaults to True
Returns:
Coordinates: a new Coordinates instance
"""
inst = cls(filename)
if filename != '':
data = _gro_io.read(filename)
inst.name = data['name']
inst.N = data['N']
inst.metadat = data['metadat']
inst.A = data['A']
inst.box = data['box']
inst.empty = False
if wrap_coords:
inst.wrap_coords()
return inst
[docs]
@classmethod
def read_mol2(cls, filename):
"""Reads in a Sybyl MOL2 file into a Coordinates instance.
Note that this method only reads in MOLECULE, ATOM, and BOND sections.
All lengths are converted from Ångström to nm.
Args:
filename (str): name of input mol2 file
Returns:
Coordinates: a new Coordinates instance
"""
inst = cls(name=filename)
data = _mol2_io.read(filename)
inst.name = data['name']
inst.N = data['N']
inst.metadat = data['metadat']
inst.A = data['A']
inst.mol2_bonds = data['mol2_bonds']
inst.mol2_bondlist = data['mol2_bondlist']
inst.empty = False
return inst
[docs]
@classmethod
def read_pdb(cls, filename):
"""Reads a PDB file into a Coordinates instance.
CONECT records are parsed to build a bond table. Repeated entries for
the same atom pair within one CONECT line are interpreted as higher-order
bonds (e.g. two appearances → double bond, order '2'). Positions are
converted from Ångström to nm.
Args:
filename (str): path to the PDB file
Returns:
Coordinates: a new Coordinates instance with mol2_bonds populated
"""
inst = cls(name=filename)
data = _pdb_io.read(filename)
inst.name = data['name']
inst.N = data['N']
inst.metadat = data['metadat']
inst.A = data['A']
inst.mol2_bonds = data['mol2_bonds']
inst.mol2_bondlist = data['mol2_bondlist']
inst.empty = False
return inst
[docs]
@classmethod
def fcc(cls, a: float, nc=[1,1,1]):
"""Generates a Coordinates object that represents an FCC crystal.
Args:
a (float): lattice parameter
nc (list): number of unit cells in the three lattice vector directions, defaults to [1,1,1]
Returns:
Coordinates: a Coordinates object
"""
from htpolynet.geometry.lattice import fcc as _fcc
posn = _fcc(a,nc)
N = len(posn)
inst = cls()
inst.A['globalIdx'] = list(range(1,N+1))
inst.A['atomName'] = 'AL'
inst.A['resNum'] = list(range(1,N+1))
inst.A['posX'] = posn[:,0]
inst.A['posY'] = posn[:,1]
inst.A['posZ'] = posn[:,2]
inst.A['resName'] = 'MET'
inst.N = N
return inst
[docs]
def claim_parent(self, parent):
self.parent = parent
[docs]
def set_box(self, box: np.ndarray):
"""Sets the box size from box.
Args:
box (numpy.ndarray): 3-by-1 or 3-by-3 box size matrix
"""
if box.shape == (3, 1):
for i in range(3):
self.box[i, i] = box[i]
elif box.shape == (3, 3):
self.box = np.copy(box)
[docs]
def total_volume(self, units='gromacs'):
"""Returns total volume of box.
Args:
units (str): unit system designation; if 'SI' returns m^3, defaults to 'gromacs'
Returns:
float: volume (in nm^3 if units is 'gromacs' or m^3 if units is 'SI')
"""
nm_per_m = 1.e9
vol = np.prod(self.box.diagonal()) # nm^3
return vol if units != 'SI' else vol / (nm_per_m ** 3)
[docs]
def copy_coords(self, other):
"""Copies the posX, posY, and posZ atom attributes, and the box size, from other.A to self.A.
Args:
other (Coordinates): the other Coordinates instance
"""
assert self.A.shape[0] == other.A.shape[0], f'Cannot copy -- atom count mismatch {self.A.shape[0]} vs {other.A.shape[0]}'
for c in ['posX', 'posY', 'posZ']:
otherpos = other.A[c].copy()
self.A[c] = otherpos
self.box = np.copy(other.box)
[docs]
def subcoords(self, sub_adf: pd.DataFrame):
"""Generates a new Coordinates object to hold the atoms dataframe in 'sub_adf' parameter.
Args:
sub_adf (pd.DataFrame): an atom dataframe
Returns:
Coordinates: a new Coordinates object
"""
newC = Coordinates()
newC.set_box(self.box)
newC.A = sub_adf
newC.N = sub_adf.shape[0]
return newC
[docs]
def unwrap(self, P, O, pbc):
"""Shifts point P to its unwrapped closest periodic image to point O.
Args:
P (np.ndarray(3,float)): a point
O (np.ndarray(3,float)): origin
pbc (np.ndarray(3,int)): directions in which pbc are applied
Returns:
np.ndarray(3,float): a point
"""
ROP = self.mic(O - P, pbc)
PCPI = O - ROP
return PCPI
[docs]
def pierces(self, B: pd.DataFrame, C: pd.DataFrame, pbc):
"""Determines whether or not bond represented by two points in B pierces ring represented by N points in C.
Args:
B (pd.DataFrame): two points defining a bond
C (pd.DataFrame): N points defining a ring
pbc (list): periodic boundary condition flags, one per dimension
Returns:
bool: True if ring is pierced
"""
BC = np.array(B[['posX', 'posY', 'posZ']])
CC = np.array(C[['posX', 'posY', 'posZ']])
# get both atoms in bond into CPI
# get all atoms in ring into CPI (but not necessarily wrt bond)
CC[1:] = np.array([self.unwrap(c, CC[0], pbc) for c in CC[1:]])
BC[0] = self.unwrap(BC[0], CC[0], pbc)
BC[1] = self.unwrap(BC[1], BC[0], pbc)
B[['posX', 'posY', 'posZ']] = BC
C[['posX', 'posY', 'posZ']] = CC
S = Segment(BC)
R = Ring(CC)
R.analyze()
do_it, point = R.segint(S)
return do_it
[docs]
def linkcell_initialize(self, cutoff=0.0, populate=True, force_repopulate=False, save=True):
"""Initializes the link-cell structure for ring-pierce testing.
Assigns a ``linkcell_idx`` attribute to each ring atom and reactive
atom (z > 0) directly in ``self.A``; all other atoms get -1.
Results are cached to a ``.grx`` file keyed by cutoff value.
Args:
cutoff (float): cell side length (nm), defaults to 0.0
populate (bool): if True, perform the assignment; defaults to True
force_repopulate (bool): if True, ignore any cached file; defaults to False
save (bool): if True, write the linkcell_idx column to a cache file; defaults to True
"""
logger.debug('Initializing link-cell structure')
self.linkcell.create(cutoff, self.box)
if populate:
lc_file = f'linkcell-{cutoff:.2f}.grx'
if os.path.exists(lc_file) and not force_repopulate:
logger.debug(f'Found {lc_file}; reading cached linkcell_idx.')
self.read_atomset_attributes(lc_file)
else:
mask = (
self.A['globalIdx'].isin(self.parent.Topology.rings.all_atoms())
| (self.A['z'] > 0)
)
self.linkcell.assign(self.A, mask=mask)
if save:
self.write_atomset_attributes(['linkcell_idx'], lc_file)
[docs]
def geometric_center(self):
"""Computes and returns the geometric center of the atoms in self's A dataframe.
Returns:
np.ndarray(3,float): geometric center
"""
a = self.A
return np.array([a.posX.mean(), a.posY.mean(), a.posZ.mean()])
[docs]
def rij(self, i, j, pbc=None):
"""Computes distance between atoms i and j.
Args:
i (int): globalIdx of first atom
j (int): globalIdx of second atom
pbc (array-like, optional): PBC flags per axis, defaults to [1,1,1]
Returns:
float: distance between i and j (nm)
"""
if pbc is None:
pbc = [1, 1, 1]
if np.any(pbc) and np.allclose(self.box, 0):
logger.warning('Interatomic distance calculation using PBC with no boxsize set.')
Rij = self.mic(self.get_R(i) - self.get_R(j), pbc)
return np.sqrt(Rij.dot(Rij))
[docs]
def mic(self, r, pbc=None):
"""Applies minimum image convention to displacement vector r.
Args:
r (np.ndarray(3,float)): displacement vector
pbc (array-like, optional): PBC flags per axis, defaults to [1,1,1]
Returns:
np.ndarray(3,float): minimum-image displacement vector
"""
if pbc is None:
pbc = [1, 1, 1]
b = self.box.diagonal()
mask = np.asarray(pbc, dtype=bool)
r = r.copy()
r[mask] -= np.round(r[mask] / b[mask]) * b[mask]
return r
[docs]
def wrap_point(self, ri):
"""Wraps point ri into the central periodic image.
Args:
ri (np.ndarray(3,float)): a point
Returns:
tuple: the wrapped point and number of box lengths shifted per dimension
"""
b = self.box.diagonal()
R = ri % b
box_lengths = (-np.floor(ri / b)).astype(int)
return R, box_lengths
[docs]
def wrap_coords(self):
"""Wraps all atomic coordinates into box."""
assert np.any(self.box), f'Cannot wrap if boxsize is not set: {self.box}'
b = self.box.diagonal()
pos = self.A[['posX','posY','posZ']].values
box_lengths = (-np.floor(pos / b)).astype(int)
self.A[['posX','posY','posZ']] = pos % b
self.A[['boxLx','boxLy','boxLz']] = box_lengths
[docs]
def merge(self, other):
"""Merges two Coordinates objects.
Args:
other (Coordinates): the other Coordinates object
Returns:
tuple: integer shifts in atom index, bond index, and residue index
"""
idxshift = self.A.shape[0]
bdxshift = self.mol2_bonds.shape[0]
rdxshift = 0 if self.A.empty else int(self.A['resNum'].max())
nOtherBonds = 0
if not other.A.empty:
oa = other.A.copy()
oa['globalIdx'] += idxshift
oa['resNum'] += rdxshift
self.A = pd.concat((self.A, oa), ignore_index=True)
self.N = self.A.shape[0]
if not other.mol2_bonds.empty:
ob = other.mol2_bonds.copy()
nOtherBonds = ob.shape[0]
ob['bondIdx'] += bdxshift
ob[['ai', 'aj']] += idxshift
self.mol2_bonds = pd.concat((self.mol2_bonds, ob), ignore_index=True)
self.mol2_bondlist.update(ob)
self.metadat['N'] = self.N
self.metadat['nBonds'] = self.metadat.get('nBonds', 0) + nOtherBonds
return (idxshift, bdxshift, rdxshift)
[docs]
def write_atomset_attributes(self, attributes, filename, formatters=[]):
"""Writes atom attributes to a GRX-format file.
Args:
attributes (list): list of attribute names to write
filename (str): name of file to write
formatters (list): formatting methods per attribute, defaults to []
Raises:
Exception: if any item in attributes does not exist in the coordinates dataframe
"""
_gro_io.write_grx(filename, self.A, attributes, formatters=formatters)
[docs]
def read_atomset_attributes(self, filename, attributes=[]):
"""Reads atomic attributes from a GRX-format file into self.A.
Args:
filename (str): name of file
attributes (list): list of attributes to take, defaults to [] (take all)
Returns:
list: names of attributes that were read
"""
df, attributes_read = _gro_io.read_grx(filename, attributes)
self.A = self.A.merge(df, how='outer', on='globalIdx')
return attributes_read
[docs]
def set_atomset_attribute(self, attribute, srs):
"""Sets attribute of atoms to srs.
Args:
attribute (str): name of attribute
srs (scalar or list-like): attribute values in same ordering as self.A
"""
self.A[attribute] = srs
[docs]
def atomcount(self):
"""Returns the number of atoms in the Coordinates object.
Returns:
int: number of atoms
"""
return self.N
[docs]
def show_z_report(self):
"""Generates a text-based histogram of atom z-values (0-3), keyed by resname:atomname."""
zhists = {}
for r in self.A.itertuples():
n = r.atomName
nn = r.resName
k = f'{nn}:{n}'
z = r.z
if not k in zhists:
zhists[k] = np.zeros(4).astype(int)
zhists[k][z] += 1
for n in zhists:
if any([zhists[n][i] > 0 for i in range(1, 4)]):
logger.debug(f'Z-hist for {n} atoms:')
for i in range(4):
logger.debug(f'{i:>5d} ({zhists[n][i]:>6d}): ' + '*' * (zhists[n][i] // 10))
[docs]
def return_bond_lengths(self, bdf):
"""Returns an ordered list of bond lengths for bonds in bdf.
Args:
bdf (pd.DataFrame): dataframe with 'ai' and 'aj' columns of atom indices indicating bonds
Returns:
list: atom distances
"""
lengths = []
for b in bdf.itertuples():
lengths.append(self.rij(b.ai, b.aj))
return lengths
[docs]
def minimum_distance(self, other, self_excludes=None, other_excludes=None):
"""Computes and returns distance of closest approach between two sets of atoms.
Args:
other (Coordinates): other Coordinates instance
self_excludes (list, optional): atom globalIdx values in self to exclude, defaults to None
other_excludes (list, optional): atom globalIdx values in other to exclude, defaults to None
Returns:
float: minimum interatomic distance (nm)
"""
sp = self.A.loc[~self.A['globalIdx'].isin(self_excludes or []), ['posX','posY','posZ']].values
op = other.A.loc[~other.A['globalIdx'].isin(other_excludes or []), ['posX','posY','posZ']].values
diff = sp[:, np.newaxis, :] - op[np.newaxis, :, :]
return np.sqrt((diff * diff).sum(axis=2)).min()
[docs]
def homog_trans(self, M: Matrix4, indices=None):
"""Applies homogeneous transformation matrix M to coordinates.
Args:
M (Matrix4): homogeneous transformation matrix
indices (list, optional): atom globalIdx values to transform; if None transforms all
"""
R = M.m[:3, :3]
t = M.m[:3, 3]
if not indices:
pos = self.A[['posX','posY','posZ']].values
self.A[['posX','posY','posZ']] = pos @ R.T + t
else:
mask = self.A['globalIdx'].isin(indices)
pos = self.A.loc[mask, ['posX','posY','posZ']].values
self.A.loc[mask, ['posX','posY','posZ']] = pos @ R.T + t
[docs]
def rotate(self, R):
"""Rotates all coordinate vectors by rotation matrix R.
Args:
R (numpy.ndarray): rotation matrix (3x3)
"""
pos = self.A[['posX','posY','posZ']].values
self.A[['posX','posY','posZ']] = pos @ R.T
[docs]
def translate(self, L):
"""Translates all coordinate vectors by displacement vector L.
Args:
L (numpy.ndarray): displacement vector (nm)
"""
pos = self.A[['posX','posY','posZ']].values
self.A[['posX','posY','posZ']] = pos + L
[docs]
def maxspan(self):
"""Returns dimensions of orthorhombic convex hull enclosing Coordinates.
Returns:
numpy.ndarray: array of x-span, y-span, z-span
"""
sp = self.A[['posX', 'posY', 'posZ']]
return np.array(
[
sp.posX.max() - sp.posX.min(),
sp.posY.max() - sp.posY.min(),
sp.posZ.max() - sp.posZ.min()
]
)
[docs]
def minmax(self):
"""Returns the lower-leftmost and upper-rightmost positions in the atoms dataframe.
Returns:
tuple(np.ndarray, np.ndarray): lower-leftmost and upper-rightmost points, respectively
"""
sp = self.A[['posX', 'posY', 'posZ']]
return np.array([sp.posX.min(), sp.posY.min(), sp.posZ.min()]), np.array([sp.posX.max(), sp.posY.max(), sp.posZ.max()])
[docs]
def checkbox(self):
"""Checks that all atom positions fit within the designated box.
Returns:
tuple(bool, bool): True,True if both lower-leftmost and upper-rightmost points are within the box
"""
mm, MM = self.minmax()
bb = self.box.diagonal()
return mm < bb, MM > bb
[docs]
def get_R(self, idx: int) -> np.ndarray:
"""Returns the cartesian position of atom with global index idx.
Args:
idx (int): global index of atom
Returns:
numpy.ndarray: cartesian position of the atom, shape (3,)
"""
return self.A.loc[self.A['globalIdx'] == idx, ['posX', 'posY', 'posZ']].values[0]
[docs]
def get_atom_attribute(self, name, attributes):
"""Returns values of attributes listed in name from atoms specified by attribute:value pairs.
Args:
name (str or list): attribute(s) whose values are to be returned
attributes (dict): attribute:value pairs that specify the set of atoms to be considered
Returns:
scalar or Series: attribute value(s) of the matching row
"""
mask = pd.Series(True, index=self.A.index)
for k, v in attributes.items():
mask &= (self.A[k] == v)
return self.A.loc[mask, name].iloc[0]
[docs]
def get_atoms_w_attribute(self,name,attributes):
"""Returns rows of atoms dataframe with columns named in name for atoms matching attributes.
Args:
name (list): attribute names for the columns to be returned
attributes (dict): attribute:value pairs that specify the set of atoms to be considered
Returns:
pd.DataFrame: matching dataframe segment
"""
df = self.A
return get_rows_w_attribute(df, name, attributes)
[docs]
def set_atom_attribute(self, name, value, attributes):
"""Sets the attributes named in name to the given values for atoms matching attributes.
Args:
name (list): names of attributes to set
value (list): values of attributes to set (parallel to name)
attributes (dict): attribute:value pairs that specify the set of atoms to be considered
"""
df = self.A
set_row_attribute(df, name, value, attributes)
[docs]
def delete_atoms(self, idx=[], reindex=True):
"""Deletes atoms whose global indices appear in idx.
If reindex is True, global indices are recalculated to be sequential starting at 1
with no gaps, and two new columns are added: 'oldGlobalIdx' (pre-deletion indices)
and 'globalIdxShift' (change from old to new index for each atom).
Args:
idx (list): atom indexes to delete, defaults to []
reindex (bool): reindex remaining atoms, defaults to True
"""
# logger.debug(f'Coordinates:delete_atoms {idx}')
adf = self.A
indexes_to_drop = adf[adf.globalIdx.isin(idx)].index
indexes_to_keep = set(range(adf.shape[0])) - set(indexes_to_drop)
self.A = adf.take(list(indexes_to_keep)).reset_index(drop=True)
if reindex:
adf = self.A
oldGI = adf['globalIdx'].copy()
adf['globalIdx'] = adf.index + 1
mapper = {k: v for k, v in zip(oldGI, adf['globalIdx'])}
self.N -= len(idx)
''' delete appropriate bonds '''
if not self.mol2_bonds.empty:
d = self.mol2_bonds
indexes_to_drop = d[(d.ai.isin(idx)) | (d.aj.isin(idx))].index
indexes_to_keep = set(range(d.shape[0])) - set(indexes_to_drop)
self.mol2_bonds = d.take(list(indexes_to_keep)).reset_index(drop=True)
if reindex:
d = self.mol2_bonds
d.ai = d.ai.map(mapper)
d.aj = d.aj.map(mapper)
d.bondIdx = d.index + 1
if 'nBonds' in self.metadat:
self.metadat['nBonds'] = len(self.mol2_bonds)
self.bondlist = Bondlist.fromDataFrame(self.mol2_bonds)
[docs]
def write_gro(self, filename, grotitle=''):
"""Writes coordinates and, if present, velocities to a GROMACS-format coordinate file.
Args:
filename (str): name of file to write
grotitle (str): title line for the .gro file, defaults to ''
"""
_gro_io.write(filename, self.A, self.box, self.name, grotitle=grotitle)
[docs]
def write_mol2(self, filename, bondsDF=pd.DataFrame(), molname='', other_attributes=pd.DataFrame()):
"""Writes a mol2-format file from coordinates and an optional bonds DataFrame.
Args:
filename (str): name of file to write
bondsDF (pandas.DataFrame): dataframe of bonds ['ai','aj'], defaults to empty DataFrame
molname (str): name of molecule, defaults to ''
other_attributes (pandas.DataFrame): auxiliary dataframe of attributes, defaults to empty DataFrame
"""
_mol2_io.write(
filename, self.A, self.mol2_bonds, self.metadat, self.name,
bondsDF=bondsDF, molname=molname, other_attributes=other_attributes,
)