"""Handles project filesystems and local caching of molecule data.
Author: Cameron F. Abrams <cfa22@drexel.edu>
"""
import glob
import importlib.resources
import logging
import os
import shutil
from pathlib import Path
logger = logging.getLogger(__name__)
_CACHE_ENV_VAR = 'HTPOLYNET_CACHE'
_CACHE_DEFAULT = Path.home() / '.htpolynet'
def _safe_copyfile(src, dst):
"""Like ``shutil.copyfile`` but a no-op when src and dst resolve to the
same path. This arises on restart, when the search path for a checkout
includes ``projPath`` and the cwd is already inside ``projPath`` (so the
source we find IS the destination)."""
try:
if os.path.abspath(src) == os.path.abspath(dst):
return
except Exception:
pass
shutil.copyfile(src, dst)
[docs]
class SystemLibrary:
"""Read-only access to bundled package resources via importlib.resources."""
def __init__(self):
self._root = importlib.resources.files('htpolynet.resources')
@property
def root(self):
"""Filesystem path of the resource root, for display."""
return str(self._root)
[docs]
def exists(self, filename):
"""Checks if filename exists in the system library.
Args:
filename (str): path relative to resource root
Returns:
bool: True if found
"""
try:
return self._root.joinpath(filename).is_file()
except Exception:
return False
[docs]
def checkout(self, filename):
"""Copies filename from the system library to the current working directory.
Args:
filename (str): path relative to resource root
Returns:
bool: True if successful
"""
try:
src = self._root.joinpath(filename)
dest = Path(os.getcwd()) / os.path.basename(filename)
dest.write_bytes(src.read_bytes())
return True
except Exception:
return False
[docs]
def get_example_names(self):
"""Returns sorted list of example names available in the depot.
Recognises self-contained .yaml configs, legacy .sh scripts, and
legacy .tgz tarballs; returns unique names (without extension) in
numeric-prefix order.
Returns:
list: example names without extension
"""
depot = self._root.joinpath('example_depot')
names = set()
for f in depot.iterdir():
if f.name.endswith(('.yaml', '.tgz', '.sh')):
names.add(f.stem)
return sorted(names)
[docs]
def get_molecule_names(self):
"""Returns sorted list of molecule names available as inputs in the system library.
Returns:
list: molecule names (stems of files in molecules/inputs/)
"""
mol_dir = self._root.joinpath('molecules/inputs')
return sorted(set(f.name.rsplit('.', 1)[0] for f in mol_dir.iterdir() if not f.name.startswith('.')))
[docs]
def get_example_depot_location(self):
"""Returns the filesystem path of the example depot directory.
Returns:
str: path to example_depot
"""
return str(self._root.joinpath('example_depot'))
[docs]
def info(self):
"""Returns a description string.
Returns:
str: description
"""
return f'System library is {self.root}'
[docs]
class UserCache:
"""Writable cache for user-generated files such as parameterized molecules.
The cache location defaults to ~/.htpolynet but can be overridden by
setting the HTPOLYNET_CACHE environment variable.
"""
def __init__(self, path=None):
"""Initializes the user cache, creating the directory if needed.
Args:
path (str or Path): cache root; if None uses HTPOLYNET_CACHE env var or ~/.htpolynet
"""
if path is None:
path = os.environ.get(_CACHE_ENV_VAR, _CACHE_DEFAULT)
self.root = Path(path)
self.root.mkdir(parents=True, exist_ok=True)
[docs]
def exists(self, filename):
"""Checks if filename exists in the cache.
Args:
filename (str): path relative to cache root
Returns:
bool: True if found
"""
return (self.root / filename).exists()
[docs]
def checkout(self, filename):
"""Copies filename from the cache to the current working directory.
Args:
filename (str): path relative to cache root
Returns:
bool: True if successful
"""
src = self.root / filename
if src.exists():
_safe_copyfile(src, os.path.basename(filename))
return True
return False
[docs]
def checkin(self, filename, overwrite=False):
"""Copies a file from the current working directory into the cache.
Args:
filename (str): destination path relative to cache root
overwrite (bool): overwrite if already cached, defaults to False
Returns:
bool: False if source not found in cwd; True otherwise
"""
src = Path(os.path.basename(filename))
if not src.exists():
logger.debug(f'{src} not found in {os.getcwd()}. No check-in performed.')
return False
dest = self.root / filename
dest.parent.mkdir(parents=True, exist_ok=True)
if not dest.exists() or overwrite:
_safe_copyfile(src, dest)
return True
[docs]
def get_molecule_names(self):
"""Returns sorted list of parameterized molecule names in the cache.
Returns:
list: molecule names (stems of files in molecules/parameterized/)
"""
mol_dir = self.root / 'molecules' / 'parameterized'
if not mol_dir.exists():
return []
return sorted(set(f.stem for f in mol_dir.iterdir() if f.is_file()))
[docs]
def info(self):
"""Returns a description string.
Returns:
str: description
"""
return f'User cache is {self.root}'
[docs]
class UserLibrary:
"""User-specified directory of molecule input files."""
def __init__(self, pathname='.'):
"""Initializes a user library from an existing directory.
Args:
pathname (str): path to the user library directory, defaults to '.'
"""
self.root = Path(os.path.abspath(pathname))
assert self.root.exists() and self.root.is_dir(), f'{pathname} is not a directory'
[docs]
def exists(self, filename):
"""Checks if filename exists in the user library.
Args:
filename (str): path relative to library root
Returns:
bool: True if found
"""
return (self.root / filename).exists()
[docs]
def checkout(self, filename, searchpath=[], altpath=[]):
"""Copies filename from the user library to the current working directory.
Args:
filename (str): path relative to library root
searchpath (list): additional directories to search, defaults to []
altpath (list): extra directories to append to the search path, defaults to []
Returns:
bool: True if successful
"""
src = self.root / filename
if src.exists():
_safe_copyfile(src, os.path.basename(filename))
return True
all_paths = list(searchpath)
if altpath:
all_paths.extend(altpath if isinstance(altpath, list) else [altpath])
for p in all_paths:
candidate = Path(p) / filename
if candidate.exists():
_safe_copyfile(candidate, os.path.basename(filename))
return True
return False
[docs]
def info(self):
"""Returns a description string.
Returns:
str: description
"""
return f'User library is {self.root}'
[docs]
class Dirs:
"""Canonical directory names used throughout the project filesystem.
Use these instead of bare strings so that renaming a directory requires
only a change here.
"""
# top-level project subdirectories
molecules = 'molecules'
systems = 'systems'
plots = 'plots'
postsim = 'postsim'
analyze = 'analyze'
mdp = 'mdp'
# molecule subdirectories
molecules_inputs = 'molecules/inputs'
molecules_parameterized = 'molecules/parameterized'
# system stage subdirectories
systems_init = 'systems/init'
systems_densification = 'systems/densification'
systems_precure = 'systems/precure'
systems_postcure = 'systems/postcure'
systems_capping = 'systems/capping'
systems_repair = 'systems/repair'
systems_final = 'systems/final-results'
# standard topdirs lists for pfs_setup
run_topdirs = ['molecules', 'systems', 'plots']
postsim_topdirs = ['molecules', 'systems', 'plots', 'postsim']
analyze_topdirs = ['molecules', 'systems', 'plots', 'postsim', 'analyze']
[docs]
@staticmethod
def systems_iter(n):
"""Returns the path for CURE iteration directory n."""
return f'systems/iter-{n}'
[docs]
@staticmethod
def mdp_file(name):
"""Returns the library path for an mdp file by base name."""
return f'mdp/{name}.mdp'
_SYSTEM_LIBRARY_: SystemLibrary = None
_USER_CACHE_: UserCache = None
[docs]
def lib_setup():
"""Sets up the system library and user cache.
Returns:
SystemLibrary: the system library object
"""
global _SYSTEM_LIBRARY_, _USER_CACHE_
if _SYSTEM_LIBRARY_ is None:
_SYSTEM_LIBRARY_ = SystemLibrary()
if _USER_CACHE_ is None:
_USER_CACHE_ = UserCache()
return _SYSTEM_LIBRARY_
[docs]
def system():
"""Returns the system library object.
Returns:
SystemLibrary: the system library
"""
return _SYSTEM_LIBRARY_
[docs]
class ProjectFileSystem:
"""Handles all aspects of the creation and organization of a project filesystem."""
def __init__(self, root='.', topdirs=['molecules', 'systems', 'plots'], projdir='next',
verbose=False, reProject=False, userlibrary=None, mock=False):
"""Generates a new ProjectFileSystem object.
Args:
root (str): path of root directory, defaults to '.'
topdirs (list): toplevel subdirectory names, defaults to ['molecules','systems','plots']
projdir (str): project directory name or 'next', defaults to 'next'
verbose (bool): verbose output flag, defaults to False
reProject (bool): restart flag, defaults to False
userlibrary (str): path to user library directory, defaults to None
mock (bool): mock call flag, defaults to False
"""
lib_setup()
self.userlibrary = UserLibrary(userlibrary) if userlibrary else None
self.rootPath = os.path.abspath(root)
os.chdir(self.rootPath)
self.cwd = self.rootPath
self.verbose = verbose
if not mock:
self._next_project_dir(projdir=projdir, reProject=reProject)
self._setup_project_dir(topdirs=topdirs)
[docs]
def cdroot(self):
"""Changes the cwd to the root directory."""
os.chdir(self.rootPath)
self.cwd = self.rootPath
[docs]
def cdproj(self):
"""Changes the cwd to the toplevel project directory."""
os.chdir(self.projPath)
self.cwd = self.projPath
[docs]
def go_to(self, subPath, make=False):
"""Changes the cwd to the directory named by subPath.
Args:
subPath (str): directory relative to project directory
make (bool): create directory if missing, defaults to False
"""
self.cdproj()
if os.path.exists(subPath):
os.chdir(subPath)
self.cwd = os.getcwd()
elif make:
os.mkdir(subPath)
os.chdir(subPath)
self.cwd = os.getcwd()
def __str__(self):
return f'root {self.rootPath}: cwd {self.cwd}'
def _next_project_dir(self, projdir='next', reProject=False, prefix='proj-'):
"""Determines and creates the project directory.
Args:
projdir (str): explicit name or 'next', defaults to 'next'
reProject (bool): restart flag, defaults to False
prefix (str): prefix for auto-named directories, defaults to 'proj-'
"""
if projdir != 'next':
self.projPath = os.path.join(self.rootPath, projdir)
if os.path.exists(projdir):
logger.info(f'Working in existing project {self.projPath}')
else:
os.mkdir(projdir)
logger.info(f'Working in new project {self.projPath}')
else:
i = 0
lastprojdir = ''
while os.path.isdir(os.path.join(self.rootPath, f'{prefix}{i}')):
lastprojdir = f'{prefix}{i}'
logger.debug(f'{lastprojdir} exists')
i += 1
assert not os.path.exists(f'{prefix}{i}')
if not reProject or lastprojdir == '':
currentprojdir = f'{prefix}0' if lastprojdir == '' else f'{prefix}{i}'
self.projPath = os.path.join(self.rootPath, currentprojdir)
logger.info(f'New project in {self.projPath}')
os.mkdir(currentprojdir)
else:
self.projPath = os.path.join(self.rootPath, lastprojdir)
logger.info(f'Restarting project in {self.projPath} (latest project)')
def _setup_project_dir(self, topdirs=['molecules', 'systems', 'plots']):
"""Creates toplevel subdirectories within the project directory.
Args:
topdirs (list): subdirectory names to create, defaults to ['molecules','systems','plots']
"""
os.chdir(self.projPath)
self.projSubPaths = {}
for tops in topdirs:
self.projSubPaths[tops] = os.path.join(self.projPath, tops)
if not os.path.isdir(self.projSubPaths[tops]):
os.mkdir(tops)
_PFS_: ProjectFileSystem = None
[docs]
def pfs_setup(root='.', topdirs=['molecules', 'systems', 'plots'], projdir='next',
verbose=False, reProject=False, userlibrary=None, mock=False):
"""Sets up the global ProjectFileSystem.
Args:
root (str): parent directory, defaults to '.'
topdirs (list): toplevel subdirectories, defaults to ['molecules','systems','plots']
projdir (str): project directory name, defaults to 'next'
verbose (bool): verbose flag, defaults to False
reProject (bool): restart flag, defaults to False
userlibrary (str): user library path, defaults to None
mock (bool): mock flag, defaults to False
"""
global _PFS_
_PFS_ = ProjectFileSystem(root=root, topdirs=topdirs, projdir=projdir,
verbose=verbose, reProject=reProject,
userlibrary=userlibrary, mock=mock)
[docs]
def checkout(filename, altpath=[]):
"""Copies a file to cwd; searches user library, then user cache, then system library.
Args:
filename (str): path relative to library root
altpath (list): extra search directories, defaults to []
Returns:
bool: True if checkout was successful
"""
if _PFS_ and _PFS_.userlibrary and _PFS_.userlibrary.checkout(
filename, searchpath=[_PFS_.rootPath, _PFS_.projPath], altpath=altpath):
return True
if _USER_CACHE_.checkout(filename):
return True
return _SYSTEM_LIBRARY_.checkout(filename)
[docs]
def checkin(filename, overwrite=False):
"""Checks a file from cwd into the user cache.
Args:
filename (str): destination path relative to cache root
overwrite (bool): overwrite if already cached, defaults to False
"""
_USER_CACHE_.checkin(filename, overwrite=overwrite)
[docs]
def fetch_molecule_files(mname):
"""Fetches all relevant molecule data files for the named molecule.
Args:
mname (str): molecule name
Returns:
list: file extensions found and fetched
"""
ret_exts = []
dirname = 'molecules/parameterized'
for e in ['mol2', 'pdb', 'gro', 'top', 'tpx', 'itp', 'grx']:
prob_filename = os.path.join(dirname, f'{mname}.{e}')
if exists(prob_filename):
ret_exts.append(e)
checkout(prob_filename)
return ret_exts
[docs]
def exists(filename):
"""Checks for filename in user library, user cache, then system library.
Args:
filename (str): path relative to library root
Returns:
bool: True if found anywhere
"""
if _PFS_ and _PFS_.userlibrary and _PFS_.userlibrary.exists(filename):
return True
if _USER_CACHE_.exists(filename):
return True
return _SYSTEM_LIBRARY_.exists(filename)
[docs]
def subpath(name):
"""Returns the path of the named project subdirectory.
Args:
name (str): subdirectory name
Returns:
str: path of subdirectory
"""
return _PFS_.projSubPaths[name]
[docs]
def go_proj():
"""Changes the current working directory to the project directory."""
_PFS_.cdproj()
[docs]
def go_root():
"""Changes the current working directory to the root directory."""
_PFS_.cdroot()
[docs]
def go_to(pathstr):
"""Changes the current working directory to pathstr relative to the project root.
Args:
pathstr (str): directory path relative to project root
Returns:
bool: True if the directory already existed, False if newly created
"""
_PFS_.cdproj()
dirname = os.path.dirname(pathstr)
if dirname == '':
dirname = pathstr
assert dirname in _PFS_.projSubPaths, f'Error: cannot navigate using pathstring {pathstr}'
reentry = os.path.exists(pathstr)
if not os.path.exists(dirname):
os.mkdir(dirname)
os.chdir(dirname)
basename = os.path.basename(pathstr)
if basename != pathstr:
if not os.path.exists(basename):
logger.debug(f'PFS: making {basename}')
os.mkdir(basename)
os.chdir(basename)
_PFS_.cwd = os.getcwd()
return reentry
[docs]
def root():
"""Returns the root directory path.
Returns:
str: root directory path
"""
return _PFS_.rootPath
[docs]
def cwd():
"""Returns the current working directory relative to the root path.
Returns:
str: relative path of current working directory
"""
return os.path.relpath(os.getcwd(), start=_PFS_.rootPath)
[docs]
def proj():
"""Returns the project directory path.
Returns:
str: project directory path
"""
return _PFS_.projPath
[docs]
def local_data_searchpath():
"""Returns root and project paths for local data searches.
Returns:
list: [rootPath, projPath]
"""
return [_PFS_.rootPath, _PFS_.projPath]
[docs]
def get_molecule_info():
"""Returns molecule names available in the system library and user cache.
Returns:
tuple: (system_molecules, cached_molecules) each a sorted list of names
"""
system_mols = _SYSTEM_LIBRARY_.get_molecule_names() if _SYSTEM_LIBRARY_ else []
cached_mols = _USER_CACHE_.get_molecule_names() if _USER_CACHE_ else []
return system_mols, cached_mols
[docs]
def info():
"""Prints summary information about active libraries to the console."""
if _PFS_ and _PFS_.userlibrary:
print(_PFS_.userlibrary.info())
if _USER_CACHE_:
print(_USER_CACHE_.info())
if _SYSTEM_LIBRARY_:
print(_SYSTEM_LIBRARY_.info())
[docs]
def proj_abspath(filename):
"""Returns the path of filename relative to the project directory.
Args:
filename (str): filename to resolve
Returns:
str: path relative to the project directory
"""
abf = os.path.abspath(filename)
return os.path.relpath(abf, _PFS_.projPath)