Source code for htpolynet.utils.dataframetools

"""Some convenient tools for handling pandas dataframes in the context of htpolynet coordinates.

Author: Cameron F. Abrams <cfa22@drexel.edu>
"""
import logging

import pandas as pd

logger=logging.getLogger(__name__)


[docs] def get_row_as_string(df:pd.DataFrame,attributes): """Returns the selected rows as a string, with rows expected to be uniquely defined by attributes dict. Args: df (pd.DataFrame): a pandas dataframe attributes (dict(str,obj)): dictionary of column names (keys) and values that specify set of rows to be returned Returns: str: selected dataframe converted to a string """ ga={k:v for k,v in attributes.items() if k in df} c=[df[k] for k in ga] V=pd.Series(list(ga.values())) l=pd.Series([True]*df.shape[0]) for i in range(len(c)): l = (l) & (c[i]==V[i]) return df[list(l)].to_string()
[docs] def get_rows_w_attribute(df:pd.DataFrame,name,attributes:dict): """Returns a series of values of attribute "name" from all rows matching attributes dict. Returns: values: list of values from selected rows """ ga={k:v for k,v in attributes.items() if k in df} assert len(ga)>0,f'Cannot find any rows with attributes {attributes}' if type(name)==list: name_in_df=all([n in df for n in name]) else: name_in_df= name in df assert name_in_df,f'Attribute(s) {name} not found' c=[df[k] for k in ga] V=pd.Series(list(ga.values())) l=pd.Series([True]*df.shape[0]) for i in range(len(c)): l = (l) & (c[i]==V[i]) return df[list(l)][name].values
[docs] def set_row_attribute(df:pd.DataFrame,name,value,attributes): """Sets value of attribute name to value in all rows matching attributes dict. Args: df (pd.DataFrame): a pandas dataframe name (str): name of attribute whose value is to be set value (scalar): value the attribute is to be set to attributes (dict): dictionary of attribute:value pairs that specify the atoms whose attribute is to be set """ ga={k:v for k,v in attributes.items() if k in df} exla={k:v for k,v in attributes.items() if not k in df} if len(exla)>0: logger.warning(f'Caller attempts to use unrecognized attributes to refer to row: {exla}') if name in df and len(ga)>0: c=[df[k] for k in ga] V=pd.Series(list(ga.values())) l=pd.Series([True]*df.shape[0]) for i in range(len(c)): l = (l) & (c[i]==V[i]) cidx=[c==name for c in df.columns] df.loc[list(l),cidx]=value
[docs] def set_rows_attributes_from_dict(df:pd.DataFrame,valdict,attributes): """Sets values of attributes in valdict dict of all rows matching attributes dict. Args: df (pd.DataFrame): a pandas dataframe valdict (dict): dictionary of attribute:value pairs to set attributes (dict): dictionary of attribute:value pairs that specify the atoms whose attribute is to be set """ ga={k:v for k,v in attributes.items() if k in df} exla={k:v for k,v in attributes.items() if not k in df} if len(exla)>0: logger.warning(f'using unknown attributes to refer to atom: {exla}') if all([x in df for x in valdict]) and len(ga)>0: c=[df[k] for k in ga] V=pd.Series(list(ga.values())) l=pd.Series([True]*df.shape[0]) for i in range(len(c)): l = (l) & (c[i]==V[i]) for k,v in valdict.items(): cidx=[c==k for c in df.columns] df.loc[list(l),cidx]=v