"""Manages generation of molecular templates.
Author: Cameron F. Abrams <cfa22@drexel.edu>
"""
import logging
import os
import shutil
from copy import deepcopy
from itertools import product
import numpy as np
import pandas as pd
from ..core import projectfilesystem as pfs
from ..core.bondtemplate import BondTemplate, BondTemplateList, ReactionBond, ReactionBondList
from ..core.topocoord import TopoCoord
from ..cure.chain import ChainManager
from ..cure.reaction import Reaction, ReactionList, reaction_stage
from ..external.ambertools import GAFFParameterize
from ..external.command import run
from ..external.gromacs import mdp_modify,gro_from_trr
from ..geometry.matrix4 import Matrix4
from ..io.gro import GRX_ATTRIBUTES
logger=logging.getLogger(__name__)
def _yield_bonds_as_df(R: Reaction, TC: TopoCoord, resid_mapper) -> pd.DataFrame:
"""Returns a DataFrame of reaction bonds by matching template bond records to atom instances.
Args:
R (Reaction): a Reaction
TC (TopoCoord): molecule topology and coordinates
resid_mapper (list[dict]): per-reactant maps from in-reactant resid to in-product resid
Returns:
pd.DataFrame: one row per bond with columns ai, aj, ri, rj, order, reactantName
"""
rows = []
for bondrec in R.bonds:
atom_keys = bondrec['atoms']
assert len(atom_keys) == 2, f'bond record must have exactly 2 atoms, got {len(atom_keys)}'
atomrecs = [R.atoms[k] for k in atom_keys]
atom_names = [rec['atom'] for rec in atomrecs]
in_product_resids = [resid_mapper[atomrecs[x]['reactant'] - 1][atomrecs[x]['resid']] for x in (0, 1)]
ai = TC.get_gro_attribute_by_attributes('globalIdx', {'resNum': in_product_resids[0], 'atomName': atom_names[0]})
aj = TC.get_gro_attribute_by_attributes('globalIdx', {'resNum': in_product_resids[1], 'atomName': atom_names[1]})
rows.append({'ai': ai, 'aj': aj, 'ri': in_product_resids[0], 'rj': in_product_resids[1],
'order': bondrec['order'], 'reactantName': R.product})
return pd.DataFrame(rows)
[docs]
class Molecule:
def __init__(self, name='', generator:Reaction=None, origin:str=None):
self.name = name
self.parentname = name # stereoisomer parent
self.TopoCoord = TopoCoord()
self.generator:Reaction = generator
self.sequence = []
self.origin = origin
self.reaction_bonds:ReactionBondList = []
self.bond_templates:BondTemplateList = []
self.symmetry_relateds = []
self.stereocenters = [] # list of atomnames
self.stereoisomers = {}
self.nconformers = 0
self.conformers_dict = {}
self.conformers = [] # just a list of gro file basenames
self.zrecs = []
self.is_reactant = False
self.chain_manager = ChainManager()
[docs]
@classmethod
def New(cls, mol_name, generator: Reaction, molrec: dict | None = None):
"""Generates a new, partially populated Molecule based on directives in the configuration input.
Args:
mol_name (str): name of molecule
generator (Reaction): reaction that generates this molecule, if applicable
molrec (dict, optional): dictionary of directives for this molecule, defaults to None
Returns:
Molecule: a new Molecule object
"""
M = cls(name=mol_name, generator=generator)
if not molrec: return M
M.symmetry_relateds = molrec.get('symmetry_equivalent_atoms', [])
M.stereocenters = molrec.get('stereocenters', [])
# expand stereocenters: any atom in a symmetry-equivalent set with a declared
# stereocenter pulls the rest of that set in as stereocenters too
extra = set()
for stc in M.stereocenters:
for sc in M.symmetry_relateds:
if stc in sc:
extra.update(sc)
extra -= set(M.stereocenters)
M.stereocenters.extend(extra)
logger.debug(f'{M.name} stereocenters: {M.stereocenters}')
# generate shells for new stereoisomers
M.create_new_stereoisomers()
logger.debug(f'{M.name} stereoisomers: {[s.name for s in M.stereoisomers.values()]}')
M.conformers_dict = molrec.get('conformers', {})
logger.debug(f'{M.name} conformers_dict {M.conformers_dict}')
return M
[docs]
def update_zrecs(self, zrecs, moldict):
"""Updates the "z-records" based on z's declared in the input configuration file.
Args:
zrecs (dict): zrecs extracted from configuration file for this molecule
moldict (dict): dictionary of available molecules
"""
def replace_if_greater(rec, D, matchattr=[], maxattr=[]):
if not matchattr or not maxattr: return
for r in D:
matched = all([r[a] == rec[a] for a in matchattr])
if matched:
replace = all([r[a] < rec[a] for a in maxattr])
if replace:
D.remove(r)
D.append(rec)
return True
return False
logger.debug(f'Update zrecs in {self.name} from {zrecs}')
seq = self.sequence
for zr in zrecs:
resid = zr['resid'] - 1
rname = seq[resid]
target = moldict[rname]
logger.debug(f'{target.name} {target.zrecs} ->')
found = replace_if_greater(zr, target.zrecs, matchattr=['resid', 'atom'], maxattr=['z'])
if not found: target.zrecs.append(zr)
logger.debug(f'-> {target.name} {target.zrecs}')
[docs]
def determine_sequence(self, moldict):
"""Recursively determines the sequence of a molecule using the network of reactions that must be executed to generate it from primitives.
Args:
moldict (dict): dictionary of available molecules
Returns:
list: list of resnames in order of sequence
"""
if not self.generator: return [self.parentname]
R:Reaction = self.generator
thisseq = []
for rid, rname in R.reactants.items():
parentname = moldict[rname].parentname
thisseq.extend(moldict[parentname].determine_sequence(moldict))
# logger.debug(thisseq)
return thisseq
[docs]
def set_sequence_from_moldict(self, moldict):
"""Sets the sequence of this molecule using the recursive determine_sequence method.
Args:
moldict (dict): dictionary of available molecules
Returns:
Molecule: self
"""
self.sequence = self.determine_sequence(moldict)
return self
[docs]
def set_sequence_from_coordinates(self):
"""set_sequence Establish the sequence-list (residue names in order) based on resNum attributes in atom list
"""
adf = self.TopoCoord.gro_DataFrame('atoms')
trial_sequence = []
current_resid = 0
for i, r in adf.iterrows():
ri = r['resNum']
rn = r['resName']
if ri != current_resid:
current_resid = ri
trial_sequence.append(rn)
assert trial_sequence == self.sequence, f'Error: trial {trial_sequence} does not equal self.seq {self.sequence}'
return self
[docs]
def create_new_stereoisomers(self):
"""Generates new molecules to hold stereoisomers of self.
Returns:
None: returns early if no action taken
"""
if self.generator: return # we only consider stereoisomers on monomers
if not self.stereocenters: return
basename = self.name + '-S'
b = [[0, 1] for _ in range(len(self.stereocenters))]
sseq = product(*b)
next(sseq) # skip the unmodified; it's the base molecule
for x in sseq:
s = ''.join([str(_) for _ in x])
mname = f'{basename}{s}'
self.stereoisomers[mname] = Molecule.New(mname, None)
self.stereoisomers[mname].parentname = self.name
[docs]
def initialize_molecule_rings(self):
"""initialize_molecule_rings generates the dictionary of rings
the dictionary of rings is keyed on ring size
"""
TC = self.TopoCoord
TC.Topology.detect_rings()
logger.debug(f'Detected {len(TC.Topology.rings)} unique rings.')
[docs]
def initialize_monomer_grx_attributes(self):
"""initialize_monomer_grx_attributes initializes all GRX attributes of atoms in molecule
"""
logger.debug(f'{self.name}')
TC = self.TopoCoord
TC.set_gro_attribute('z', 0)
TC.set_gro_attribute('nreactions', 0)
TC.set_gro_attribute('molecule', 1)
TC.set_gro_attribute('molecule_name', self.name)
for att in ['sea_idx','bondchain','bondchain_idx']:
TC.set_gro_attribute(att,-1)
# set symmetry class indices
sea_idx=1
logger.debug(f'{self.name}: symmetry_relateds {self.symmetry_relateds}')
for s in self.symmetry_relateds:
# logger.debug(f'sea_idx {sea_idx} set for set {s}')
for atomName in s:
# logger.debug(f'{atomName} {sea_idx}')
TC.set_gro_attribute_by_attributes('sea_idx', sea_idx, {'atomName': atomName})
sea_idx += 1
# set z and nreactions
idx = []
for zr in self.zrecs:
an = zr['atom']
rnum = zr['resid']
if rnum != 1: continue
z = zr['z']
logger.debug(f'{self.name} setting z for {an} {rnum} {z}')
TC.set_gro_attribute_by_attributes('z', z, {'atomName': an, 'resNum': rnum})
idx.append(TC.get_gro_attribute_by_attributes('globalIdx', {'atomName': an, 'resNum': rnum}))
for sr in self.symmetry_relateds:
# logger.debug(f'{self.name}: setting z for {an}, considering sr {sr}')
if an in sr:
for bn in sr:
if bn == an: continue
# logger.debug(f'{self.name}: setting z for {bn}')
idx.append(TC.get_gro_attribute_by_attributes('globalIdx', {'atomName': bn, 'resNum': rnum}))
TC.set_gro_attribute_by_attributes('z', z, {'atomName': bn, 'resNum': rnum})
# TC.idx_lists['bondchain']=[]
self.chain_manager = ChainManager(create_if_missing=True)
pairs = product(idx, idx)
for i, j in pairs:
if i < j:
iname = TC.get_gro_attribute_by_attributes('atomName', {'globalIdx': i})
jname = TC.get_gro_attribute_by_attributes('atomName', {'globalIdx': j})
if TC.Topology.bondlist.are_bonded(i, j) and iname.startswith('C') and jname.startswith('C'):
# this monomer has two carbon atoms capable of reacting
# that are bound to each other -- this means that
# the two originated as a double-bond.
# *If* there is one with three hydrogens
# (remember this is an activated monomer)
# then it is the "tail"; the other is the "head".
i_nH = TC.count_H(i)
j_nH = TC.count_H(j)
if i_nH == 3 and j_nH != 3:
# i is the tail
entry = [j, i]
elif i_nH != 3 and j_nH == 3:
# j is the tail
entry = [i, j]
else:
logger.warning(f'In molecule {self.name}, cannot identify bonded reactive head and tail atoms\nAssuming {j} is head and {i} is tail')
entry = [j, i]
# logger.debug(f'Adding {entry} to chainlist of {self.name}')
self.chain_manager.injest_bond(entry[0], entry[1])
# TC.idx_lists['bondchain'].append(entry)
# TC.reset_grx_attributes_from_idx_list('bondchain')
self.chain_manager.to_dataframe(TC.Coordinates.A)
self.initialize_molecule_rings()
[docs]
def previously_parameterized(self):
"""Returns True if a gro file exists in the project molecule/parameterized directory for this molecule.
Returns:
bool: True if gro file found
"""
rval = True
for ext in ['gro']:
rval = rval and pfs.exists(os.path.join(pfs.Dirs.molecules_parameterized, f'{self.name}.{ext}'))
return rval
[docs]
def parameterize(self, outname='', input_structure_format='mol2', ambertools={}):
"""Manages GAFF parameterization of this molecule.
Args:
outname (str, optional): output file basename, defaults to ''
input_structure_format (str, optional): input structure format, defaults to 'mol2' ('pdb' is other possibility)
ambertools (dict): ambertools configuration directives, defaults to {}
"""
assert os.path.exists(f'{self.name}.{input_structure_format}'),f'Cannot parameterize molecule {self.name} without {self.name}.{input_structure_format} as input'
if outname == '':
outname = f'{self.name}'
if input_structure_format == 'pdb':
# Convert PDB → mol2 before antechamber so that repeated CONECT entries
# (which encode double bonds in PDB format) are written as proper bond
# orders in the mol2 bond table rather than duplicated single bonds.
from .coordinates import Coordinates
coords = Coordinates.read_pdb(f'{self.name}.pdb')
coords.write_mol2(f'{self.name}.mol2', molname=self.name)
logger.info(f'Converted {self.name}.pdb → {self.name}.mol2 with bond orders from CONECT records')
input_structure_format = 'mol2'
GAFFParameterize(self.name, outname, input_structure_format=input_structure_format, ambertools=ambertools)
self.load_top_gro(f'{outname}.top', f'{outname}.gro', mol2filename=f'{outname}.mol2', wrap_coords=False)
self.initialize_molecule_rings()
self.TopoCoord.write_tpx(f'{outname}.tpx')
[docs]
def minimize(self, outname=''):
"""Manages invocation of vacuum minimization.
Args:
outname (str, optional): output file basename, defaults to ''
"""
if outname == '':
outname = f'{self.name}'
self.TopoCoord.vacuum_minimize(outname)
[docs]
def relax(self, relax_dict):
"""Manages invocation of MD relaxations.
Args:
relax_dict (dict): dictionary of simulation directives
"""
deffnm = relax_dict.get('deffnm', f'{self.name}-relax')
nsteps = relax_dict.get('nsteps', 10000)
temperature = relax_dict.get('temperature', 10000)
n = self.name
boxsize = np.array(self.TopoCoord.maxspan()) + 2 * np.ones(3)
self.center_coords(new_boxsize=boxsize)
mdp_prefix = 'single-molecule-nvt'
pfs.checkout(pfs.Dirs.mdp_file(mdp_prefix))
mdp_modify(f'{mdp_prefix}.mdp', {'nsteps': nsteps, 'gen-vel': 'yes', 'ref_t': temperature, 'gen-temp': temperature})
logger.info(f'In vacuo equilibration of {self.name}.gro for {nsteps} steps at {temperature} K')
self.TopoCoord.grompp_and_mdrun(out=deffnm, mdp=mdp_prefix, boxSize=boxsize)
[docs]
def center_coords(self, new_boxsize: np.ndarray = None):
"""Wrapper for the TopoCoord.center_coords method.
Args:
new_boxsize (np.ndarray, optional): new box size, defaults to None
"""
self.TopoCoord.center_coords(new_boxsize)
[docs]
def generate(self, outname='', available_molecules={}, gaff={}, ambertools={}):
"""Manages generating topology and coordinates for self.
Args:
outname (str, optional): output file basename, defaults to ''
available_molecules (dict, optional): dictionary of available molecules, defaults to {}
gaff (dict): GAFF configuration directives, defaults to {}
ambertools (dict): ambertools configuration directives, defaults to {}
"""
logger.debug(f'{self.name}.generate() begins')
if outname == '': outname = f'{self.name}'
do_minimization = gaff.get('minimize_molecules', True)
do_parameterization = False
if self.generator:
R = self.generator
if R.stage in [reaction_stage.cure, reaction_stage.param, reaction_stage.cap, reaction_stage.repair]: do_parameterization = True
self.TopoCoord = TopoCoord()
logger.debug(f'Using reaction {R.name} ({str(R.stage)}) to generate {self.name} parent {self.parentname}')
isf = 'mol2'
resid_mapper = []
for ri in R.reactants.values():
logger.debug(f'Adding {ri}')
new_reactant = deepcopy(available_molecules[ri])
new_reactant.TopoCoord.write_mol2(filename=f'{self.name}-reactant{ri}-prebonding.mol2', molname=self.name)
rnr = len(new_reactant.sequence)
shifts = self.TopoCoord.merge(new_reactant.TopoCoord, self_cm=self.chain_manager, other_cm=new_reactant.chain_manager)
# for ln in self.TopoCoord.Coordinates.A.head().to_string().split('\n'): logger.debug(ln)
resid_mapper.append({k:v for k,v in zip(range(1,rnr+1),range(1+shifts[2],1+rnr+shifts[2]))})
# logger.debug(f'{self.name}: resid_mapper {resid_mapper}')
# logger.debug(f'{self.TopoCoord.idx_lists}')
# logger.debug(f'\n{self.TopoCoord.Coordinates.A.to_string()}')
# logger.debug(f'composite prebonded molecule in box {self.TopoCoord.Coordinates.box}')
self.TopoCoord.write_mol2(filename=f'{self.name}-prebonding.mol2', molname=self.name)
self.set_sequence_from_coordinates()
bdf = _yield_bonds_as_df(R, self.TopoCoord, resid_mapper)
# logger.debug(f'Generation of {self.name}: composite molecule has {len(self.sequence)} resids')
# logger.debug(f'generation of {self.name}: composite molecule:\n{composite_mol.TopoCoord.Coordinates.A.to_string()}')
self.make_bonds(bdf, available_molecules, R.stage)
# self.TopoCoord.set_gro_attribute('reactantName',R.product)
self.TopoCoord.set_gro_attribute('sea_idx', -1) # turn off symmetry-equivalence for multimers
self.TopoCoord.set_gro_attribute('molecule', 1)
self.TopoCoord.set_gro_attribute('molecule_name', self.name)
self.write_gro_attributes(GRX_ATTRIBUTES, f'{R.product}.grx')
# if pfs.exists(f'molecules/inputs/{self.name}.mol2'): # an override structure is present
# logger.debug(f'Using override input molecules/inputs/{self.name}.{isf} as a generator')
# pfs.checkout(f'molecules/inputs/{self.name}.{isf}')
# else:
self.TopoCoord.write_mol2(filename=f'{self.name}.mol2', molname=self.name)
if not do_parameterization:
self.TopoCoord.write_gro(f'{self.name}.gro', grotitle=self.name)
self.TopoCoord.write_top(f'{self.name}.top')
# if pfs.exists(f'molecules/inputs/{self.name}.pdb'): # an override structure is present
# isf='pdb'
# logger.debug(f'Using override input molecules/inputs/{self.name}.{isf} as a generator')
# pfs.checkout(f'molecules/inputs/{self.name}.{isf}')
else:
# this molecule was not assigned a generator: implies it is a monomer and must have a parameterization
input_structure_formats = ['mol2','pdb']
isf = None
for isf in input_structure_formats:
if pfs.exists(f'{pfs.Dirs.molecules_inputs}/{self.name}.{isf}'):
logger.debug(f'Using input {pfs.Dirs.molecules_inputs}/{self.name}.{isf} as a generator')
pfs.checkout(f'{pfs.Dirs.molecules_inputs}/{self.name}.{isf}')
break
assert isf, 'Error: no valid input structure file found'
do_parameterization = True
reactantName = self.name
if do_parameterization:
self.parameterize(outname, input_structure_format=isf, ambertools=ambertools)
else:
if self.name != self.parentname:
logger.info(f'Built {self.name} using topology of {self.parentname}; copying {self.parentname}.top to {self.name}.top')
self.load_top_gro(f'{self.parentname}.top', f'{self.name}.gro', tpxfilename=f'{self.parentname}.tpx', wrap_coords=False)
shutil.copy(f'{self.parentname}.top', f'{self.name}.top')
shutil.copy(f'{self.parentname}.grx', f'{self.name}.grx')
shutil.copy(f'{self.parentname}.tpx', f'{self.name}.tpx')
if do_minimization:
self.minimize(outname)
self.set_sequence_from_coordinates()
if not self.generator:
self.TopoCoord.set_gro_attribute('reactantName', reactantName)
self.initialize_monomer_grx_attributes()
self.write_gro_attributes(GRX_ATTRIBUTES, f'{reactantName}.grx')
else:
if do_parameterization or self.name != self.parentname:
grx=f'{reactantName}.grx'
if (os.path.exists(grx)):
self.TopoCoord.read_gro_attributes(grx)
#self.reset_chains_from_attributes()
# logger.debug(f'{self.name} gro\n{self.TopoCoord.Coordinates.A.to_string()}')
self.prepare_new_bonds(available_molecules=available_molecules)
# for ln in self.TopoCoord.Coordinates.A.head().to_string().split('\n'): logger.debug(ln)
logger.info(f'{self.name}: {self.get_molecular_weight():.2f} g/mol')
logger.debug('Done.')
[docs]
def get_molecular_weight(self):
"""Returns the molecular weight of self.
Returns:
float: molecular weight in g/mol
"""
mass = self.TopoCoord.Topology.total_mass(units='gromacs') # g
return mass
[docs]
def prepare_new_bonds(self, available_molecules={}):
"""Populates the bond templates and reaction bonds for self.
Args:
available_molecules (dict, optional): dictionary of available molecules, defaults to {}
"""
# logger.debug(f'set_reaction_bonds: molecules {list(available_molecules.keys())}')
R = self.generator
if not R:
return
self.reaction_bonds = []
self.bond_templates = []
TC = self.TopoCoord
# logger.debug(f'prepare_new_bonds {self.name}: chainlists {TC.idx_lists["bondchain"]}')
for bondrec in R.bonds:
atom_keys = bondrec['atoms']
order = bondrec['order']
assert len(atom_keys) == 2
atomrecs = [R.atoms[x] for x in atom_keys]
atom_names = [x['atom'] for x in atomrecs]
reactant_keys = [x['reactant'] for x in atomrecs]
in_reactant_resids = [x['resid'] for x in atomrecs]
if reactant_keys[0] == reactant_keys[1]: # this is an intraresidue bond
reactant_names = [R.reactants[reactant_keys[0]]]
else:
reactant_names = [R.reactants[x] for x in reactant_keys]
reactant_sequences = [available_molecules[x].sequence for x in reactant_names]
product_sequence = []
for seq in reactant_sequences:
product_sequence.extend(seq)
# logger.debug(f'product_sequence {product_sequence}')
sequence_residue_idx_origins = [0, 0]
if len(reactant_sequences) == 2:
sequence_residue_idx_origins[1] = len(reactant_sequences[0])
in_product_resids = [in_reactant_resids[x] + sequence_residue_idx_origins[x] for x in [0, 1]]
# logger.debug(f'in_product_resids {in_product_resids}')
in_product_resnames = [product_sequence[in_product_resids[x] - 1] for x in [0, 1]]
atom_idx = [TC.get_gro_attribute_by_attributes('globalIdx', {'resNum': in_product_resids[x], 'atomName': atom_names[x]}) for x in [0, 1]]
# logger.debug(f'{R.name} names {atom_names} in_product_resids {in_product_resids} idx {atom_idx}')
bystander_resids, bystander_resnames, bystander_atomidx, bystander_atomnames = TC.get_bystanders(atom_idx)
oneaway_resids, oneaway_resnames, oneaway_atomidx, oneaway_atomnames = TC.get_oneaways(atom_idx, chain_manager=self.chain_manager)
# logger.debug(f'{self.name} bystanders {bystander_resids} {bystander_resnames} {bystander_atomidx} {bystander_atomnames}')
# logger.debug(f'{self.name} oneaways {oneaway_resids} {oneaway_resnames} {oneaway_atomidx} {oneaway_atomnames}')
self.reaction_bonds.append(ReactionBond(atom_idx, in_product_resids, order, bystander_resids, bystander_atomidx, oneaway_resids, oneaway_atomidx))
intraresidue = in_product_resids[0] == in_product_resids[1]
self.bond_templates.append(BondTemplate(atom_names, in_product_resnames, intraresidue, order, bystander_resnames, bystander_atomnames, oneaway_resnames, oneaway_atomnames))
[docs]
def idx_mappers(self, otherTC: TopoCoord, other_bond, bystanders, oneaways, uniq_atom_idx: set):
"""Computes the mapping dictionary from molecule template index to instance index in the other TopoCoord.
Args:
otherTC (TopoCoord): the other TopoCoord
other_bond: 2 atom indices of the bond in the other TopoCoord
bystanders: bystander lists, one for each reacting atom
oneaways: oneaways, one for each atom in the bond
uniq_atom_idx (set): set of unique atoms in template that must be mapped to instance
Raises:
Exception: if there is a buggy double-counting of one or more indexes
Returns:
tuple: two-way dictionaries of index mappers instance<->template
"""
assert len(other_bond) == 2
assert len(bystanders) == 2
assert len(oneaways) == 2
ut = uniq_atom_idx.copy()
logger.debug(f'Template name {self.name}')
i_idx, j_idx = other_bond
i_resName, i_resNum, i_atomName = otherTC.get_gro_attribute_by_attributes(['resName','resNum','atomName'],{'globalIdx':i_idx})
j_resName, j_resNum, j_atomName = otherTC.get_gro_attribute_by_attributes(['resName','resNum','atomName'],{'globalIdx':j_idx})
logger.debug(f'i_idx {i_idx} i_resName {i_resName} i_resNum {i_resNum} i_atomName {i_atomName}')
logger.debug(f'j_idx {j_idx} j_resName {j_resName} j_resNum {j_resNum} j_atomName {j_atomName}')
# identify the template bond represented by the other_bond parameter
ij = []
for RB, BT in zip(self.reaction_bonds, self.bond_templates):
temp_resids = RB.resids
temp_iname, temp_jname = BT.names
temp_iresname, temp_jresname = BT.resnames
temp_bystander_resids = RB.bystander_resids
temp_oneaway_resids = RB.oneaway_resids
# logger.debug(f'temp_iresname {temp_iresname} temp_iname {temp_iname}')
# logger.debug(f'temp_jresname {temp_jresname} temp_jname {temp_jname}')
if (i_atomName, i_resName) == (temp_iname, temp_iresname):
ij = [0, 1]
break # found it -- stop looking
elif (i_atomName, i_resName) == (temp_jname, temp_jresname):
ij = [1, 0]
break
assert len(ij) == 2, f'Mappers using template {self.name} unable to map from instance bond {i_resName}-{i_resNum}-{i_atomName}---{j_resName}-{j_resNum}-{j_atomName}'
inst_resids = [i_resNum, j_resNum]
inst_resids = [inst_resids[ij[x]] for x in [0, 1]]
inst_bystander_resids = [bystanders[ij[x]] for x in [0, 1]]
inst_oneaway_resids = [oneaways[ij[x]] for x in [0, 1]]
# Bystander counts on each side may legitimately differ when the
# template (a small parameterization fragment) carries fewer
# bystanders than the in-system instance — e.g. cyanate-ester cure,
# where the CY+CY dimer template has no BPA bystander but every
# BCY-embedded instance does (find_template's subset semantics
# let the small-fragment template match in this case). Allow
# the template to be the shorter list on each side; the extra
# instance bystanders are residues outside the template and don't
# need to be mapped here. When both lists carry chain context
# the counts agree as before.
# Pair region-by-region so that a length difference on one side
# doesn't shift alignment of the other side's bystanders or the
# oneaways. Within each region zip naturally truncates the longer
# (always the instance) list.
instdf = otherTC.Coordinates.A
tempdf = self.TopoCoord.Coordinates.A
inst2temp = {}
temp2inst = {}
resid_pairs = list(zip(inst_resids, temp_resids))
resid_pairs.extend(zip(inst_bystander_resids[0], temp_bystander_resids[0]))
resid_pairs.extend(zip(inst_bystander_resids[1], temp_bystander_resids[1]))
resid_pairs.extend(zip(inst_oneaway_resids, temp_oneaway_resids))
for inst, temp in resid_pairs:
if inst and temp: # None's in the bystander lists and oneaways lists should be ignored
logger.debug(f'map inst resid {inst} to template resid {temp}')
idf = instdf[instdf['resNum'] == inst][['globalIdx','atomName']].copy()
# logger.debug(f'idf res {inst}:\n{idf.to_string()}')
tdf = tempdf[tempdf['resNum'] == temp][['globalIdx','atomName']].copy()
# logger.debug(f'tdf res {temp}:\n{tdf.to_string()}')
tdf = tdf.merge(idf, on='atomName', how='inner', suffixes=('_template','_instance'))
# logger.debug(f'merged\n{tdf.to_string()}')
for i, r in tdf.iterrows():
temp_idx = r['globalIdx_template']
inst_idx = r['globalIdx_instance']
# logger.debug(f't {temp_idx} <-> i {inst_idx}')
# only map template atoms that are identified in the passed in set
if temp_idx in ut:
ut.remove(temp_idx)
temp2inst[temp_idx] = inst_idx
inst2temp[inst_idx] = temp_idx
if temp_idx in temp2inst and temp2inst[temp_idx] != inst_idx:
raise Exception(f'Error: temp_idx {temp_idx} already claimed in temp2inst; bug')
assert len(inst2temp) == len(temp2inst), f'Error: could not establish two-way dict of atom globalIdx'
return (inst2temp, temp2inst)
[docs]
def get_angles_dihedrals(self, bond):
"""Returns copies of selections from the Topology interaction-type dataframes that contain the two atoms indicated in the bond.
Args:
bond: 2-element list-like container of ints
Raises:
Exception: if a NaN is found in any selection
Returns:
tuple: copies of the dataframe selections for angles, dihedrals, and 1-4 pairs
"""
ai, aj = bond
d = self.TopoCoord.Topology.D['angles']
ad = d[((d.ai == ai) & (d.aj == aj)) |
((d.ai == aj) & (d.aj == ai)) |
((d.aj == ai) & (d.ak == aj)) |
((d.aj == aj) & (d.ak == ai))].copy()
d = self.TopoCoord.Topology.D['dihedrals']
td = d[((d.ai == ai) & (d.aj == aj)) |
((d.ai == aj) & (d.aj == ai)) |
((d.aj == ai) & (d.ak == aj)) |
((d.aj == aj) & (d.ak == ai)) |
((d.ak == ai) & (d.al == aj)) |
((d.ak == aj) & (d.al == ai))].copy()
check = True
for a in ['ai', 'aj', 'ak', 'al']:
check = check and td[a].isnull().values.any()
if check:
logger.error('NAN in molecule/dihedrals')
raise ValueError('NAN in molecule/dihedrals')
d = self.TopoCoord.Topology.D['pairs']
paird = pd.DataFrame()
for ai, al in zip(td.ai, td.al):
tpair = d[((d.ai == ai) & (d.aj == al)) |
((d.ai == al) & (d.aj == ai))].copy()
paird = pd.concat((paird, tpair), ignore_index=True)
check = True
for a in ['ai', 'aj']:
check = check and paird[a].isnull().values.any()
if check:
logger.error('NAN in molecule/pairs')
raise ValueError('NAN in molecule/pairs')
return ad, td, paird
[docs]
def get_resname(self, internal_resid):
"""Returns the residue name at position internal_resid in the molecule's sequence.
Args:
internal_resid (int): molecule-internal residue index
Returns:
str: residue name
"""
return self.sequence[internal_resid - 1]
# def inherit_attribute_from_reactants(self,attribute,available_molecules,increment=True,no_increment_if_negative=True):
# """inherit_attribute_from_reactants populate certain atom attributes in molecule from its constituent reactants
# :param attribute: _description_
# :type attribute: _type_
# :param available_molecules: _description_
# :type available_molecules: _type_
# :param increment: _description_, defaults to True
# :type increment: bool, optional
# :param no_increment_if_negative: _description_, defaults to True
# :type no_increment_if_negative: bool, optional
# """
# adf=self.TopoCoord.Coordinates.A
# ordered_attribute_idx=[]
# curr_max=0
# # logger.debug(f'{self.name}({adf.shape[0]}) inheriting {attribute} from {self.sequence}')
# # logger.debug(f'available molecules {list(available_molecules.keys())}')
# for i,r in enumerate(self.sequence):
# '''
# for this residue number, read the list of unique atom names
# '''
# namesinres=list(adf[adf['resNum']==(i+1)]['atomName'])
# '''
# access coordinates of standalone residue template with this name 'r' on the list of available molecules
# '''
# rdf=available_molecules[r].TopoCoord.Coordinates.A
# '''
# get the attribute values from residue template
# '''
# x=list(rdf[rdf['atomName'].isin(namesinres)][attribute])
# # logger.debug(f'{r}->{len(x)}')
# '''
# increment these attribute value based on residue number in this molecule
# '''
# if increment:
# i_x=[]
# for y in x:
# if y>0 or (y<0 and not no_increment_if_negative):
# i_x.append(y+curr_max)
# else:
# i_x.append(y)
# curr_max=max(i_x)
# ordered_attribute_idx.extend(i_x)
# assert len(ordered_attribute_idx)==adf.shape[0]
# adf[attribute]=ordered_attribute_idx
[docs]
def merge(self, other):
"""Merges TopoCoord from other into self's TopoCoord.
Args:
other (Molecule): another Molecule
Returns:
tuple: a shift tuple (returned by Coordinates.merge())
"""
shifts = self.TopoCoord.merge(other.TopoCoord, self_cm=self.chain_manager, other_cm=other.chain_manager)
return shifts
[docs]
def load_top_gro(self, topfilename, grofilename, tpxfilename='', mol2filename='', wrap_coords=False, ignore_bonds=False, overwrite_coordinates=False):
"""Generates a new TopoCoord member object for this molecule by reading in a Gromacs topology file and a Gromacs gro file.
Args:
topfilename (str): Gromacs topology file
grofilename (str): Gromacs gro file
tpxfilename (str, optional): extended topology file, defaults to ''
mol2filename (str, optional): alternative coordinate mol2 file, defaults to ''
wrap_coords (bool): if True, wrap coordinates into the box after reading gro, defaults to False
ignore_bonds (bool): if True, skip reading bonds from mol2, defaults to False
overwrite_coordinates (bool): if True, overwrite existing coordinates when reading mol2, defaults to False
"""
self.TopoCoord = TopoCoord(topfilename=topfilename, grofilename=grofilename, tpxfilename=tpxfilename, mol2filename=mol2filename, wrap_coords=wrap_coords, ignore_bonds=ignore_bonds, overwrite_coordinates=overwrite_coordinates)
[docs]
def set_gro_attribute(self, attribute, srs):
"""Sets attribute of atoms to srs (drills through to Coordinates.set_atomset_attributes()).
Args:
attribute (str): name of attribute
srs: scalar or list-like attribute values in same ordering as self.A
"""
self.TopoCoord.set_gro_attribute(attribute, srs)
[docs]
def read_gro_attributes(self, grxfilename, attribute_list=[]):
"""Reads attributes from file into self.TopoCoord.Coordinates.A.
Args:
grxfilename (str): name of input file
attribute_list (list, optional): list of attributes to take, defaults to [] (take all)
"""
self.TopoCoord.read_gro_attributes(grxfilename, attribute_list=attribute_list, chain_manager=self.chain_manager)
[docs]
def write_gro_attributes(self, attribute_list, grxfilename):
"""Writes atomic attributes to a file.
Args:
attribute_list (list): list of attributes to write
grxfilename (str): name of output file
"""
self.TopoCoord.write_gro_attributes(attribute_list, grxfilename)
[docs]
def make_bonds(self, bdf: pd.DataFrame, moldict, stage):
"""Adds new bonds to the molecule's topology and deletes any sacrificial hydrogens.
Args:
bdf (pd.DataFrame): pandas dataframe identifying new bonds
moldict (dict): dictionary of available molecular templates
stage (reaction_stage): enumerated parameter indicating reaction_stage
"""
TC = self.TopoCoord
explicit_sacrificial_Hs = {}
for i, r in bdf.iterrows():
aname, bname = [TC.get_gro_attribute_by_attributes('atomName', {'globalIdx': x}) for x in [r.ai, r.rj]]
logger.debug(f'generating {self.name} bond {r.ri}:{aname}:{r.ai}-{r.rj}:{bname}:{r.aj} order {r.order}')
if r.ri != r.rj:
resid_sets = TC.get_resid_sets([r.ai, r.aj])
hxi, hxj = self.transrot(r.ai, r.ri, r.aj, r.rj, connected_resids=resid_sets[1])
explicit_sacrificial_Hs[i] = [hxi, hxj]
if stage in [reaction_stage.cure, reaction_stage.param, reaction_stage.cap, reaction_stage.repair]:
template_source = 'ambertools'
else:
template_source = 'internal' # signals that a template molecule should be identified to parameterize this bond
TC.update_topology_and_coordinates(bdf, moldict, explicit_sacH=explicit_sacrificial_Hs, template_source=template_source, chain_manager=self.chain_manager)
self.initialize_molecule_rings()
[docs]
def transrot(self, at_idx, at_resid, from_idx, from_resid, connected_resids=[]):
"""Given a composite molecule, translates and rotates the piece downstream of the yet-to-be-created bond to minimize steric overlaps and identify the best two sacrificial hydrogens.
Args:
at_idx (int): global index of left-hand atom in new bond
at_resid (int): resid of left-hand residue
from_idx (int): global index of right-hand atom in new bond
from_resid (int): resid of right-hand residue
connected_resids (list, optional): list of all other resids attached to right-hand residue that should move with it, defaults to []
Returns:
tuple: 2-tuple containing global indices of the sacrificial hydrogens
"""
# Rotate and translate
if at_resid == from_resid:
return # should never happen but JIC
logger.debug(f'{self.name} connected resids {connected_resids}')
TC = self.TopoCoord
ATC = TopoCoord()
BTC = TopoCoord()
C = TC.gro_DataFrame('atoms')
ATC.Coordinates.A = C[C['resNum'] == at_resid].copy()
bresids = connected_resids.copy()
bresids.append(from_resid)
BTC.Coordinates.A = C[C['resNum'].isin(bresids)].copy()
for ln in BTC.Coordinates.A.to_string().split('\n'): logger.debug(ln)
NONROT = C[~C['resNum'].isin(bresids)].shape[0]
logger.debug(f'{self.TopoCoord.Coordinates.A.shape[0]} atoms')
logger.debug(f'holding {at_resid} ({NONROT})')
logger.debug(f'rotating/translating {bresids} ({BTC.Coordinates.A.shape[0]})')
assert self.TopoCoord.Coordinates.A.shape[0] == (NONROT + BTC.Coordinates.A.shape[0])
mypartners = TC.Topology.bondlist.partners_of(at_idx)
otpartners = TC.Topology.bondlist.partners_of(from_idx)
logger.debug(f'Partners of {at_idx} {mypartners}')
logger.debug(f'Partners of {from_idx} {otpartners}')
myHpartners = {k: v for k, v in zip(mypartners, [C[C['globalIdx'] == i]['atomName'].values[0] for i in mypartners]) if v.startswith('H')}
otHpartners = {k: v for k, v in zip(otpartners, [C[C['globalIdx'] == i]['atomName'].values[0] for i in otpartners]) if v.startswith('H')}
myHighestH = {k: v for k, v in myHpartners.items() if v == max([k for k in myHpartners.values()], key=lambda x: int(x.split('H')[1] if x.split('H')[1] != '' else '0'))}
otHighestH = {k: v for k, v in otHpartners.items() if v == max([k for k in otHpartners.values()], key=lambda x: int(x.split('H')[1] if x.split('H')[1] != '' else '0'))}
assert len(myHighestH) == 1
assert len(otHighestH) == 1
logger.debug(f'Highest-named H partner of {at_idx} is {myHighestH}')
logger.debug(f'Highest-named H partner of {from_idx} is {otHighestH}')
assert len(myHpartners) > 0, f'Error: atom {at_idx} does not have a deletable H atom!'
assert len(otHpartners) > 0, f'Error: atom {from_idx} does not have a deletable H atom!'
Ri = TC.get_R(at_idx)
Rj = TC.get_R(from_idx)
logger.debug(f'Ri {at_idx} {Ri} {type(Ri)} {Ri.dtype}')
logger.debug(f'Rj {from_idx} {Rj} {type(Rj)} {Rj.dtype}')
overall_maximum = (-1.e9, -1, -1)
coord_trials = {}
for myH, myHnm in myHpartners.items(): # keys are globalIdx's, values are names
coord_trials[myH] = {}
Rh = TC.get_R(myH)
logger.debug(f' Rh {myH} {Rh} {Rh.dtype}')
Rih = Ri - Rh
Rih *= 1.0 / np.linalg.norm(Rih)
for otH, otHnm in otHpartners.items():
logger.debug(f'{self.name}: Considering {myH} {otH}')
coord_trials[myH][otH] = deepcopy(BTC)
# logger.debug(f'\n{coord_trials[myH][otH].Coordinates.A.to_string()}')
Rk = coord_trials[myH][otH].get_R(otH)
logger.debug(f'{self.name}: otH {otH} Rk {Rk} {Rk.dtype}')
Rkj = Rk - Rj
Rkj *= 1.0 / np.linalg.norm(Rkj)
logger.debug(f'Rkj {Rkj} {Rkj.dtype} Rih {Rih} {Rih.dtype}')
#Rhk=Rh-Rk
#rhk=np.linalg.norm(Rhk)
cp = np.cross(Rkj,Rih)
c = np.dot(Rkj,Rih)
v = np.array([[0,-cp[2],cp[1]],[cp[2],0,-cp[0]],[-cp[1],cp[0],0]])
v2 = np.dot(v,v)
I = np.array([[1.,0.,0.],[0.,1.,0.],[0.,0.,1.]])
# R is the rotation matrix that will rotate donb to align with accb
R = I + v + v2 / (1. + c)
# logger.debug(f'{self.name}: R:\n{R}')
# rotate translate all donor atoms!
coord_trials[myH][otH].rotate(R)
Rk = coord_trials[myH][otH].get_R(otH)
# overlap the two H atoms by translation
Rik = Rh - Rk
coord_trials[myH][otH].translate(Rik)
minD = TC.minimum_distance(coord_trials[myH][otH], self_excludes=[myH], other_excludes=[otH])
logger.debug(f'{self.name}: minD {minD}')
if minD > overall_maximum[0]:
overall_maximum = (minD, myH, otH)
logger.debug(f'{self.name}: overall_maximum {overall_maximum}')
minD, myH, otH = overall_maximum
BTC = coord_trials[myH][otH]
TC.overwrite_coords(BTC)
TC.swap_atom_names(myH, list(myHighestH.keys())[0])
TC.swap_atom_names(otH, list(otHighestH.keys())[0])
return myH, otH
[docs]
def atoms_w_same_attribute_as(self, find_dict={}, same_attribute='', return_attribute=''):
"""Returns a list of atom attributes named in return_attribute from atoms that share an attribute named in same_attribute with the atom identified by find_dict.
Args:
find_dict (dict, optional): dictionary of attribute:value pairs that should uniquely identify an atom, defaults to {}
same_attribute (str, optional): name of attribute used to screen atoms, defaults to ''
return_attribute (str, optional): attribute value to return a list of from the screened atoms, defaults to ''
Returns:
list: list of attribute values
"""
att_val = self.TopoCoord.get_gro_attribute_by_attributes(same_attribute, find_dict)
return self.TopoCoord.get_gro_attributelist_by_attributes(return_attribute, {same_attribute: att_val})
[docs]
def flip_stereocenters(self, idxlist):
"""Flips stereochemistry of atoms in idxlist.
Args:
idxlist (list): global indices of chiral atoms
"""
self.TopoCoord.flip_stereocenters(idxlist)
[docs]
def rotate_bond(self, a, b, deg):
"""Rotates all atoms in molecule on b-side of a-b bond by deg degrees.
Args:
a (int): index of a
b (int): index of b
deg (float): angle of rotation in degrees
"""
TC = self.TopoCoord
A = TC.Coordinates.A
branch = TopoCoord()
bl = deepcopy(self.TopoCoord.Topology.bondlist)
branchidx = bl.half_as_list((a, b), 99)
ra = TC.get_R(a)
rb = TC.get_R(b)
Rab = ra - rb
rab = Rab / np.linalg.norm(Rab)
O = rb
TC.translate(-1 * O)
branch.Coordinates.A = A[A['globalIdx'].isin(branchidx)].copy()
# do stuff
rx, ry, rz = rab
az = np.arctan2(ry, rx) # azimuthal angle of bond axis (radians)
ay = np.arctan2(np.sqrt(rx**2 + ry**2), rz) # polar angle of bond axis (radians)
M = (Matrix4()
.rot(np.degrees(az), 'z')
.rot(np.degrees(ay), 'y')
.rot(deg, 'z')
.rot(np.degrees(-ay), 'y')
.rot(np.degrees(-az), 'z'))
branch.rotate(M.m[:3,:3])
bdf = branch.Coordinates.A
A.loc[A['globalIdx'].isin(branchidx), ['posX', 'posY', 'posZ']] = bdf[['posX', 'posY', 'posZ']]
TC.translate(O)
# def sea_of(self,idx):
# clu=self.atoms_w_same_attribute_as(find_dict={'globalIdx':idx},
# same_attribute='sea_idx',
# return_attribute='globalIdx')
# return list(clu)
[docs]
def generate_stereoisomers(self):
"""Generates list of Molecule shells, one for each stereoisomer.
Returns:
None: returns early if no stereoisomers need to be generated
"""
if self.TopoCoord.Topology.D['atoms'].shape[0] == 0: return # self has not yet acquired topology/coordinates
if len(self.stereoisomers) == 0: return
flip = [[0, 1] for _ in range(len(self.stereocenters))]
st_idx = [self.TopoCoord.get_gro_attribute_by_attributes('globalIdx', {'atomName': n}) for n in self.stereocenters]
P = product(*flip)
next(P) # one with no flips is the original molecule, so skip it
for p in P:
si_name = self.name + '-S' + ''.join([str(_) for _ in p])
if not si_name in self.stereoisomers:
logger.debug(f'{si_name} not found in dict of stereoisomers of {self.name}')
logger.debug(f'Stereocenter sequence {p} generates stereoisomer {si_name}')
M = self.stereoisomers[si_name]
M.origin = self.origin
M.TopoCoord = deepcopy(self.TopoCoord)
fsc = [st_idx[i] for i in range(len(self.stereocenters)) if p[i]]
M.flip_stereocenters(fsc)
M.TopoCoord.write_gro(f'{si_name}.gro')
MoleculeDict = dict[str,Molecule]
MoleculeList = list[Molecule]