Source code for htpolynet.cure.chain

"""

.. module:: chain
   :synopsis: Manages data structure that keeps track of polymerized chains
   
.. moduleauthor: Cameron F. Abrams, <cfa22@drexel.edu>

"""
import logging
logger=logging.getLogger(__name__)


[docs]
class Chain:
    def __init__(self,head=-1,tail=-1,idx=-1):
        self.idx_list=[]
        if head!=-1:
            self.idx_list.append(head)
        if tail!=-1:
            self.idx_list.append(tail)
        self.is_cyclic=False
        self.idx=idx

[docs]
    def is_head(self,a):
        return not self.is_cyclic and a==self.idx_list[0]


[docs]
    def is_tail(self,a):
        return not self.is_cyclic and a==self.idx_list[-1]


[docs]
    def merge(self,other):
        # tail of self bonds to head of other
        self.idx_list.extend(other.idx_list)



[docs]
    def remap_idx(self,idx_mapper):
        self.idx_list=[idx_mapper[x] for x in self.idx_list]


[docs]
    def shift(self,shift):
        self.idx_list=[x+shift for x in self.idx_list]




[docs]
class ChainManager:
    def __init__(self,**kwargs):
        self.chains=[]
        self.create_if_missing=kwargs.get('create_if_missing',False)


[docs]
    def chain_of(self,a):
        for c in self.chains:
            if a in c.idx_list:
                return c
        return None

    

[docs]
    def new_chain(self,head,tail):
        self.chains.append(Chain(head=head,tail=tail,idx=len(self.chains)))



[docs]
    def mergechains(self,dst,src):
        dst.merge(src)
        self.chains.remove(src)
        self.reindex()



[docs]
    def injest_other(self,other):
        self.chains.extend(other.chains)
        self.reindex()



[docs]
    def reindex(self):
        for i,c in enumerate(self.chains):
            c.idx=i



[docs]
    def shift(self,shift):
        for c in self.chains:
            c.shift(shift)



[docs]
    def remap(self,idx_mapper):
        for c in self.chains:
            c.remap_idx(idx_mapper)



[docs]
    def injest_bond(self,ai,aj):
        ic=self.chain_of(ai)
        jc=self.chain_of(aj)
        if not ic and not jc:
            if self.create_if_missing:
                logger.debug(f'New chain with head {ai} and tail {aj}')
                self.new_chain(ai,aj)
            else:
                pass
        elif not ic:
            # aj is in a chain, ai is not.  This is unexpected for vinyl
            # polymerization (where every reactive C participates in a
            # former-C=C pair and is therefore in some chain from monomer
            # init) but is legitimate for multistep / non-vinyl chemistry
            # (e.g., HTPB chain assembly from butenes, where reactive
            # methyls are not part of any C=C pair).  Extend jc to include
            # ai by attaching at the closer end.
            logger.debug(f'j-Atom {aj} (chain {jc.idx}) bonds to non-chain atom {ai}; extending chain {jc.idx} with {ai}.')
            if jc.is_head(aj):
                jc.idx_list.insert(0, ai)
            else:
                jc.idx_list.append(ai)
        elif not jc:
            # symmetric counterpart of the above
            logger.debug(f'i-Atom {ai} (chain {ic.idx}) bonds to non-chain atom {aj}; extending chain {ic.idx} with {aj}.')
            if ic.is_tail(ai):
                ic.idx_list.append(aj)
            else:
                ic.idx_list.insert(0, aj)
        elif ic==jc:
            assert (ic.is_head(ai) and ic.is_tail(aj)) or (ic.is_head(aj) and ic.is_tail(ai))
            ic.is_cyclic=True
            logger.debug(f'New bond between {ai} and {aj} creates a cyclic chain of length ({len(ic.idx_list)})')
        elif ic.is_head(ai) and jc.is_tail(aj):
            assert not ic.is_cyclic and not jc.is_cyclic
            self.mergechains(jc,ic)
        elif jc.is_head(aj) and ic.is_tail(ai):
            assert not ic.is_cyclic and not jc.is_cyclic
            self.mergechains(ic,jc)



[docs]
    def injest_bonds(self,bondlist):
        for b in bondlist:
            self.injest_bond(b[0],b[1])



[docs]
    def to_dataframe(self,D,lidx_col='bondchain_idx',gidx_col='bondchain'):
        assert lidx_col in D and gidx_col in D,f'Dataframe missing column {lidx_col} or {gidx_col}'
        # wipe out df
        D[lidx_col]=[-1]*D.shape[0]
        D[gidx_col]=[-1]*D.shape[0]
        for gidx,c in enumerate(self.chains):
            for lidx,atidx in enumerate(c.idx_list):
                df_idx=atidx-1 # should be true always
                D.loc[df_idx,lidx_col]=lidx
                D.loc[df_idx,gidx_col]=gidx



[docs]
    def from_dataframe(self,D,lidx_col='bondchain_idx',gidx_col='bondchain'):
        # overwrites whole thing
        self.chains=[]
        heads=D[D[lidx_col]==0].index+1 # always!
        headchains=D[D[lidx_col]==0][gidx_col]
        for h,hc in zip(heads,headchains):
            self.chains.append(Chain(head=h,idx=hc))
        self.chains.sort(key=lambda x: x.idx)
        old_idx=[x.idx for x in self.chains]
        self.reindex()
        assert all([(x==y) for x,y in zip([x.idx for x in self.chains],list(range(len(self.chains))))])
        next_idx=1
        while next_idx<D.shape[0]:
            if D[D[lidx_col]==next_idx].shape[0]==0:
                break
            next_ats=D[D[lidx_col]==next_idx].index+1 # always
            next_atchains=D[D[lidx_col]==next_idx][gidx_col]
            for a,ac in zip(next_ats,next_atchains):
                acidx=old_idx.index(ac)
                self.chains[acidx].idx_list.append(a)
            next_idx+=1