Source code for htpolynet.io.gro

"""GROMACS .gro and HTPolyNet .grx file reader/writer.

Author: Cameron F. Abrams <cfa22@drexel.edu>
"""
import logging
import os

import numpy as np
import pandas as pd

logger = logging.getLogger(__name__)

GRO_ATTRIBUTES = [
    'resNum', 'resName', 'atomName', 'globalIdx',
    'posX', 'posY', 'posZ', 'velX', 'velY', 'velZ',
]

GRX_ATTRIBUTES = [
    'z', 'nreactions', 'reactantName', 'sea_idx',
    'bondchain', 'bondchain_idx', 'molecule', 'molecule_name',
]
"""Extended per-atom attributes stored in .grx files.

- z              number of sacrificial H's available on this atom
- nreactions     number of bonds this atom has formed
- reactantName   name of the most recent reactant this atom belonged to
- sea_idx        symmetry-equivalent-atom group index within a residue
- bondchain      index of the C-C bondchain this atom belongs to
- bondchain_idx  position of this atom within its bondchain
- molecule       index of the molecule this atom belongs to
- molecule_name  name of that molecule
"""

GRX_GLOBALLY_UNIQUE = [False, False, False, True, True, False, True, False]
"""Parallel to GRX_ATTRIBUTES: True if the attribute must be globally unique across the system."""

GRX_UNSET_DEFAULTS = [0, 0, 'UNSET', -1, -1, -1, -1, 'UNSET']
"""Parallel to GRX_ATTRIBUTES: sentinel value that means 'not yet assigned'."""



[docs]
def read(filename):
    """Parse a GROMACS .gro file.

    Args:
        filename (str): path to the .gro file

    Returns:
        dict: parsed data with keys:
            - 'name'    (str): title line
            - 'N'       (int): atom count
            - 'metadat' (dict): metadata (currently just {'N': N})
            - 'A'       (pd.DataFrame): atom data frame
            - 'box'     (np.ndarray, shape 3×3): box matrix
    """
    box = np.zeros((3, 3))
    metadat = {}
    with open(filename, 'r') as f:
        data = f.read().split('\n')
    while '' in data:
        data.remove('')

    name = data[0]
    N = int(data[1])
    metadat['N'] = N

    series = {k: [] for k in GRO_ATTRIBUTES}
    lc_globalIdx = 1
    for x in data[2:-1]:
        series['resNum'].append(int(x[0:5].strip()))
        series['resName'].append(x[5:10].strip())
        series['atomName'].append(x[10:15].strip())
        # globalIdx is always row index + 1 when the file is correctly formatted
        series['globalIdx'].append(lc_globalIdx)
        lc_globalIdx += 1
        # Fixed-width fields; split() fails when columns touch
        # Format: "%5d%-5s%5s%5d%8.3f%8.3f%8.3f%8.4f%8.4f%8.4f"
        numbers = list(map(float, [x[20 + 8 * i:20 + 8 * (i + 1)] for i in range(3)]))
        if len(x) > 44:
            numbers.extend(list(map(float, [x[44 + 8 * i:44 + 8 * (i + 1)] for i in range(3)])))
        series['posX'].append(numbers[0])
        series['posY'].append(numbers[1])
        series['posZ'].append(numbers[2])
        if len(numbers) == 6:
            series['velX'].append(numbers[3])
            series['velY'].append(numbers[4])
            series['velZ'].append(numbers[5])

    if len(series['velX']) == 0:
        del series['velX']
        del series['velY']
        del series['velZ']

    assert N == len(series['globalIdx']), f'Atom count mismatch inside {filename}'
    A = pd.DataFrame(series)

    boxdata = list(map(float, data[-1].split()))
    box[0][0] = boxdata[0]
    box[1][1] = boxdata[1]
    box[2][2] = boxdata[2]
    if len(boxdata) == 9:
        box[0][1], box[0][2], box[1][0], box[1][2], box[2][0], box[2][1] = boxdata[3:]

    return {'name': name, 'N': N, 'metadat': metadat, 'A': A, 'box': box}




[docs]
def write(filename, A, box, name, grotitle=''):
    """Write a GROMACS .gro file.

    Args:
        filename (str): path to write
        A (pd.DataFrame): atom data frame; must contain the gro_attributes columns
        box (np.ndarray, shape 3×3): box matrix
        name (str): molecule/system name used as the title when grotitle is empty
        grotitle (str): explicit title line, defaults to ''
    """
    title = name if not grotitle else grotitle
    has_vel = 'velX' in A.columns
    gro_cols = GRO_ATTRIBUTES if has_vel else GRO_ATTRIBUTES[:-3]
    atomformatters = (
        [
            lambda x: f'{x:>5d}',
            lambda x: f'{x:<5s}',
            lambda x: f'{x:>5s}',
            lambda x: f'{x % 10000:5d}',
        ]
        + [lambda x: f'{x:8.3f}'] * 3
        + [lambda x: f'{x:8.4f}'] * 3
    )
    with open(filename, 'w') as f:
        f.write(title + '\n')
        f.write(f'{A.shape[0]:>5d}\n')
        for _, r in A.iterrows():
            f.write(''.join([atomformatters[i](v) for i, v in enumerate(list(r[gro_cols]))]) + '\n')
        if not np.any(box):
            logger.debug('Writing Gromacs coordinates file but boxsize is not set.')
        f.write(f'{box[0][0]:10.5f}{box[1][1]:10.5f}{box[2][2]:10.5f}')
        x, y = box.nonzero()
        if not all(x == y):
            f.write(f'{box[0][1]:10.5f}{box[0][2]:10.5f}')
            f.write(f'{box[1][0]:10.5f}{box[1][2]:10.5f}')
            f.write(f'{box[2][0]:10.5f}{box[2][1]:10.5f}')
        f.write('\n')




[docs]
def write_grx(filename, A, attributes, formatters=[]):
    """Write a subset of atom attributes to an HTPolyNet .grx file.

    The file is a whitespace-delimited table with 'globalIdx' as the first
    column followed by the requested attribute columns.

    Args:
        filename (str): path to write
        A (pd.DataFrame): atom data frame
        attributes (list): attribute names to include
        formatters (list): optional per-column formatter callables, defaults to []

    Raises:
        Exception: if any attribute in attributes is absent from A
    """
    missing = [a for a in attributes if a not in A.columns]
    if missing:
        raise Exception(f'Column(s) {missing} not found in atoms dataframe')
    with open(filename, 'w') as f:
        cols = A[['globalIdx'] + attributes]
        if formatters:
            f.write(cols.to_string(header=True, index=False, formatters=formatters) + '\n')
        else:
            f.write(cols.to_string(header=True, index=False) + '\n')




[docs]
def read_grx(filename, attributes=[]):
    """Read an HTPolyNet .grx extended-attribute file.

    Args:
        filename (str): path to read
        attributes (list): attribute names to read; if empty, all columns are read

    Returns:
        tuple: (df, attributes_read) where df has a 'globalIdx' column plus the
               attribute columns, and attributes_read is the list of attribute names
               that were actually read.

    Raises:
        AssertionError: if the file does not exist or lacks a 'globalIdx' column
    """
    assert os.path.exists(filename), f'Error: {filename} not found'
    if len(attributes) == 0:
        df = pd.read_csv(filename, sep=r'\s+', header=0)
        assert 'globalIdx' in df.columns, \
            f"Error: {filename} does not have a 'globalIdx' column"
        attributes_read = [c for c in df.columns if c != 'globalIdx']
    else:
        df = pd.read_csv(filename, sep=r'\s+', names=['globalIdx'] + attributes, header=0)
        attributes_read = attributes
    return df, attributes_read