Source code for htpolynet.io.gro

"""GROMACS .gro and HTPolyNet .grx file reader/writer.

Author: Cameron F. Abrams <cfa22@drexel.edu>
"""
import logging
import os

import numpy as np
import pandas as pd

logger = logging.getLogger(__name__)

GRO_ATTRIBUTES = [
    'resNum', 'resName', 'atomName', 'globalIdx',
    'posX', 'posY', 'posZ', 'velX', 'velY', 'velZ',
]

GRX_ATTRIBUTES = [
    'z', 'nreactions', 'reactantName', 'sea_idx',
    'bondchain', 'bondchain_idx', 'molecule', 'molecule_name',
]
"""Extended per-atom attributes stored in .grx files.

- z              number of sacrificial H's available on this atom
- nreactions     number of bonds this atom has formed
- reactantName   name of the most recent reactant this atom belonged to
- sea_idx        symmetry-equivalent-atom group index within a residue
- bondchain      index of the C-C bondchain this atom belongs to
- bondchain_idx  position of this atom within its bondchain
- molecule       index of the molecule this atom belongs to
- molecule_name  name of that molecule
"""

GRX_GLOBALLY_UNIQUE = [False, False, False, True, True, False, True, False]
"""Parallel to GRX_ATTRIBUTES: True if the attribute must be globally unique across the system."""

GRX_UNSET_DEFAULTS = [0, 0, 'UNSET', -1, -1, -1, -1, 'UNSET']
"""Parallel to GRX_ATTRIBUTES: sentinel value that means 'not yet assigned'."""


[docs] def read(filename): """Parse a GROMACS .gro file. Args: filename (str): path to the .gro file Returns: dict: parsed data with keys: - 'name' (str): title line - 'N' (int): atom count - 'metadat' (dict): metadata (currently just {'N': N}) - 'A' (pd.DataFrame): atom data frame - 'box' (np.ndarray, shape 3×3): box matrix """ box = np.zeros((3, 3)) metadat = {} with open(filename, 'r') as f: data = f.read().split('\n') while '' in data: data.remove('') name = data[0] N = int(data[1]) metadat['N'] = N series = {k: [] for k in GRO_ATTRIBUTES} lc_globalIdx = 1 for x in data[2:-1]: series['resNum'].append(int(x[0:5].strip())) series['resName'].append(x[5:10].strip()) series['atomName'].append(x[10:15].strip()) # globalIdx is always row index + 1 when the file is correctly formatted series['globalIdx'].append(lc_globalIdx) lc_globalIdx += 1 # Fixed-width fields; split() fails when columns touch # Format: "%5d%-5s%5s%5d%8.3f%8.3f%8.3f%8.4f%8.4f%8.4f" numbers = list(map(float, [x[20 + 8 * i:20 + 8 * (i + 1)] for i in range(3)])) if len(x) > 44: numbers.extend(list(map(float, [x[44 + 8 * i:44 + 8 * (i + 1)] for i in range(3)]))) series['posX'].append(numbers[0]) series['posY'].append(numbers[1]) series['posZ'].append(numbers[2]) if len(numbers) == 6: series['velX'].append(numbers[3]) series['velY'].append(numbers[4]) series['velZ'].append(numbers[5]) if len(series['velX']) == 0: del series['velX'] del series['velY'] del series['velZ'] assert N == len(series['globalIdx']), f'Atom count mismatch inside {filename}' A = pd.DataFrame(series) boxdata = list(map(float, data[-1].split())) box[0][0] = boxdata[0] box[1][1] = boxdata[1] box[2][2] = boxdata[2] if len(boxdata) == 9: box[0][1], box[0][2], box[1][0], box[1][2], box[2][0], box[2][1] = boxdata[3:] return {'name': name, 'N': N, 'metadat': metadat, 'A': A, 'box': box}
[docs] def write(filename, A, box, name, grotitle=''): """Write a GROMACS .gro file. Args: filename (str): path to write A (pd.DataFrame): atom data frame; must contain the gro_attributes columns box (np.ndarray, shape 3×3): box matrix name (str): molecule/system name used as the title when grotitle is empty grotitle (str): explicit title line, defaults to '' """ title = name if not grotitle else grotitle has_vel = 'velX' in A.columns gro_cols = GRO_ATTRIBUTES if has_vel else GRO_ATTRIBUTES[:-3] atomformatters = ( [ lambda x: f'{x:>5d}', lambda x: f'{x:<5s}', lambda x: f'{x:>5s}', lambda x: f'{x % 10000:5d}', ] + [lambda x: f'{x:8.3f}'] * 3 + [lambda x: f'{x:8.4f}'] * 3 ) with open(filename, 'w') as f: f.write(title + '\n') f.write(f'{A.shape[0]:>5d}\n') for _, r in A.iterrows(): f.write(''.join([atomformatters[i](v) for i, v in enumerate(list(r[gro_cols]))]) + '\n') if not np.any(box): logger.debug('Writing Gromacs coordinates file but boxsize is not set.') f.write(f'{box[0][0]:10.5f}{box[1][1]:10.5f}{box[2][2]:10.5f}') x, y = box.nonzero() if not all(x == y): f.write(f'{box[0][1]:10.5f}{box[0][2]:10.5f}') f.write(f'{box[1][0]:10.5f}{box[1][2]:10.5f}') f.write(f'{box[2][0]:10.5f}{box[2][1]:10.5f}') f.write('\n')
[docs] def write_grx(filename, A, attributes, formatters=[]): """Write a subset of atom attributes to an HTPolyNet .grx file. The file is a whitespace-delimited table with 'globalIdx' as the first column followed by the requested attribute columns. Args: filename (str): path to write A (pd.DataFrame): atom data frame attributes (list): attribute names to include formatters (list): optional per-column formatter callables, defaults to [] Raises: Exception: if any attribute in attributes is absent from A """ missing = [a for a in attributes if a not in A.columns] if missing: raise Exception(f'Column(s) {missing} not found in atoms dataframe') with open(filename, 'w') as f: cols = A[['globalIdx'] + attributes] if formatters: f.write(cols.to_string(header=True, index=False, formatters=formatters) + '\n') else: f.write(cols.to_string(header=True, index=False) + '\n')
[docs] def read_grx(filename, attributes=[]): """Read an HTPolyNet .grx extended-attribute file. Args: filename (str): path to read attributes (list): attribute names to read; if empty, all columns are read Returns: tuple: (df, attributes_read) where df has a 'globalIdx' column plus the attribute columns, and attributes_read is the list of attribute names that were actually read. Raises: AssertionError: if the file does not exist or lacks a 'globalIdx' column """ assert os.path.exists(filename), f'Error: {filename} not found' if len(attributes) == 0: df = pd.read_csv(filename, sep=r'\s+', header=0) assert 'globalIdx' in df.columns, \ f"Error: {filename} does not have a 'globalIdx' column" attributes_read = [c for c in df.columns if c != 'globalIdx'] else: df = pd.read_csv(filename, sep=r'\s+', names=['globalIdx'] + attributes, header=0) attributes_read = attributes return df, attributes_read