#!/usr/bin/env python3
"""Module containing the Ndx2resttop class and the command line interface."""
import fnmatch
from typing import Optional
from typing import Any
from pathlib import Path
from biobb_common.generic.biobb_object import BiobbObject
from biobb_common.tools import file_utils as fu
from biobb_common.tools.file_utils import launchlogger
[docs]
class Ndx2resttop(BiobbObject):
"""
| biobb_gromacs Ndx2resttop
| Generate a restrained topology from an index NDX file.
| This module automatizes the process of restrained topology generation starting from an index NDX file.
Args:
input_ndx_path (str): Path to the input NDX index file. File type: input. `Sample file <https://github.com/bioexcel/biobb_gromacs/raw/master/biobb_gromacs/test/data/gromacs_extra/ndx2resttop.ndx>`_. Accepted formats: ndx (edam:format_2033).
input_top_zip_path (str): Path the input TOP topology in zip format. File type: input. `Sample file <https://github.com/bioexcel/biobb_gromacs/raw/master/biobb_gromacs/test/data/gromacs_extra/ndx2resttop.zip>`_. Accepted formats: zip (edam:format_3987).
output_top_zip_path (str): Path the output TOP topology in zip format. File type: output. `Sample file <https://github.com/bioexcel/biobb_gromacs/raw/master/biobb_gromacs/test/reference/gromacs_extra/ref_ndx2resttop.zip>`_. Accepted formats: zip (edam:format_3987).
properties (dict - Python dictionary object containing the tool parameters, not input/output files):
* **posres_names** (*str*) - ("CUSTOM_POSRES") Space-separated preprocessor macro names, one per triplet (e.g. ``"CHAIN_A_POSRES CHAIN_B_POSRES"``). Activated at grompp time via ``-D <name>``. Must match the length of ref_rest_mol_triplet_list.
* **force_constants** (*str*) - ("500 500 500") Three space-separated force constants Fx Fy Fz in kJ/mol/nm². Written verbatim into each ITP line so the same value is applied to every restrained atom.
* **ref_rest_mol_triplet_list** (*str*) - (None) Comma-separated triplets ``(reference_group, restrain_group, molecule_name)``, one per molecule to restrain.``reference_group``: NDX group containing *all* atoms of the molecule in global (system-wide) GROMACS numbering. Used only as a coordinate frame to convert global indices to molecule-local 1-based indices. Example: ``r_1-9280``. ``restrain_group``: NDX group with the subset of atoms to actually restrain (must be a strict subset of reference_group). Example: ``P_&_r_1-9280``. ``molecule_name``: the ``[ moleculetype ]`` name in the topology used to locate where to splice the ``#ifdef`` block. Example: ``1TSR``.
Examples:
This is a use example of how to use the building block from Python::
from biobb_gromacs.gromacs_extra.ndx2resttop import ndx2resttop
prop = { 'ref_rest_mol_triplet_list': '( Chain_A, Chain_A_noMut, Protein_chain_A ), ( Chain_B, Chain_B_noMut, Protein_chain_B ), ( Chain_C, Chain_C_noMut, Protein_chain_C ), ( Chain_D, Chain_D_noMut, Protein_chain_D )' }
ndx2resttop(input_ndx_path='/path/to/myIndex.ndx',
input_top_zip_path='/path/to/myTopology.zip',
output_top_zip_path='/path/to/newTopology.zip',
properties=prop)
Info:
* wrapped_software:
* name: In house
* license: Apache-2.0
* ontology:
* name: EDAM
* schema: http://edamontology.org/EDAM.owl
"""
def __init__(self, input_ndx_path: str, input_top_zip_path: str, output_top_zip_path: str,
properties: Optional[dict] = None, **kwargs) -> None:
properties = properties or {}
# Call parent class constructor
super().__init__(properties)
self.locals_var_dict = locals().copy()
# Input/Output files
self.io_dict = {
"in": {"input_ndx_path": input_ndx_path, "input_top_zip_path": input_top_zip_path},
"out": {"output_top_zip_path": output_top_zip_path}
}
# Properties specific for BB
self.posres_names = properties.get('posres_names')
self.force_constants = properties.get('force_constants', '500 500 500')
self.ref_rest_mol_triplet_list = properties.get('ref_rest_mol_triplet_list')
# Check the properties
self.check_properties(properties)
self.check_arguments()
# --- Private helpers ---
def _parse_ndx_groups(self, ndx_lines: list) -> dict:
"""Parse a GROMACS NDX file into a lookup table of group line ranges.
NDX format: each named group starts with a ``[ group_name ]`` header line,
followed by one or more rows of whitespace-separated global atom indices.
Returns a dict mapping the raw header string (e.g. ``'[ System ]'``) to a
two-element list ``[start_line, end_line]`` where the range is **half-open**
(matching Python slice semantics): ``ndx_lines[start_line:end_line]`` yields
the data rows for that group (the header line itself at ``start_line - 1`` is
excluded).
Edge cases handled:
- **Last group**: the final group has no successor header to trigger the end
assignment, so it is closed explicitly with ``index + 1`` (not ``index``)
to include the very last data line.
- **Single-content-line groups**: when a header is immediately followed by
another header (no trailing blank line), the default range would be
``[H, H+1]`` giving an empty slice. The post-loop fix increments end by 1
to capture that single data line.
"""
groups_dic: dict[str, Any] = {}
current_group = ''
previous_group = ''
for index, line in enumerate(ndx_lines):
if line.startswith('['):
current_group = line
groups_dic[current_group] = [index, 0]
if previous_group:
groups_dic[previous_group][1] = index
previous_group = current_group
if index == len(ndx_lines) - 1:
groups_dic[current_group][1] = index + 1
# Fix single-content-line groups
for group_name in groups_dic:
if groups_dic[group_name][0] + 1 == groups_dic[group_name][1]:
groups_dic[group_name][1] += 1
return groups_dic
def _read_ndx_atoms(self, ndx_lines: list, groups_dic: dict, group_name: str) -> list:
"""Return the flat list of global atom indices for a named NDX group.
Slices the group's data rows from ndx_lines using the half-open range
stored in groups_dic, then flattens all whitespace-separated tokens into
a single list of integers.
Returned integers are **global** (system-wide) GROMACS atom numbers, 1-based.
"""
key = f'[ {group_name} ]'
start = groups_dic[key][0] + 1
stop = groups_dic[key][1]
fu.log(f'{group_name}: start_closed={start} stop_open={stop}', self.out_log, self.global_log)
atoms = [int(atom) for line in ndx_lines[start:stop] for atom in line.split()]
return atoms
def _write_posre_itp(self, itp_path: str, selected_atoms: list) -> None:
"""Write a GROMACS position restraint ITP file.
``selected_atoms`` must contain **molecule-local** 1-based atom indices
(not global system-wide indices). The atom type column is hardcoded to
``1`` (GROMACS position restraint type). ``self.force_constants`` is a
verbatim string ``"Fx Fy Fz"`` applied uniformly to every restrained atom.
"""
with open(itp_path, 'w') as f:
fu.log(f'Creating {itp_path} with selected atoms and force constants', self.out_log, self.global_log)
f.write('[ position_restraints ]\n')
f.write('; atom type fx fy fz\n')
for atom in selected_atoms:
f.write(f'{atom} 1 {self.force_constants}\n')
def _insert_posres_in_top(self, top_file: str, molecule_name: str, posre_name: str, itp_name: str) -> None:
"""Splice the #ifdef posres block into the .top file for single-chain topologies.
Uses a two-phase scan over the topology lines:
Phase 1 – locate the target moleculetype:
For each ``[ moleculetype ]`` directive, read the first non-comment
content line and compare its first token against ``molecule_name``.
Set ``found_molecule = True`` when matched.
Phase 2 – find the insertion point (only after found_molecule):
Scan forward for the earliest of:
- another top-level section (``[ moleculetype ]``, ``[ system ]``,
``[ molecules ]``)
- an existing ``#include`` or ``#ifdef POSRES`` line
Insert immediately before that line. Fall back to EOF if none found.
Important: the insertion-point check (Phase 2) runs *before* the
``[ moleculetype ]`` detector within the same loop iteration. Without this
ordering, a new ``[ moleculetype ]`` that should trigger insertion would
instead be consumed by the ``continue`` in the detector, causing the block
to be placed in the wrong molecule section.
"""
with open(top_file, 'r') as f:
lines = f.readlines()
in_moleculetype = False
found_molecule = False
index = None
for i, line in enumerate(lines):
stripped = line.strip()
# Find insertion point: just before the next top-level section or existing restraint block
if found_molecule:
is_next_section = stripped.startswith('[') and (
'system' in stripped or 'molecules' in stripped or 'moleculetype' in stripped)
if is_next_section or line.startswith('#include ') or line.startswith('#ifdef POSRES'):
index = i
break
# Detect [ moleculetype ] directive
if stripped.startswith('[') and 'moleculetype' in stripped:
in_moleculetype = True
continue
# Find the molecule name line inside the [ moleculetype ] block
if in_moleculetype:
if stripped and not stripped.startswith(';'):
in_moleculetype = False
if not stripped.startswith('[') and stripped.split()[0] == molecule_name:
found_molecule = True
if not found_molecule:
raise ValueError(f"Molecule type '{molecule_name}' not found in the topology file")
if index is None:
index = len(lines)
lines.insert(index, '\n')
lines.insert(index + 1, '; Include Position restraint file\n')
lines.insert(index + 2, f'#ifdef {posre_name}\n')
lines.insert(index + 3, f'#include "{itp_name}"\n')
lines.insert(index + 4, '#endif\n')
lines.insert(index + 5, '\n')
with open(top_file, 'w') as f:
f.writelines(lines)
# --- Main entry point ---
[docs]
@launchlogger
def launch(self) -> int:
"""Execute the :class:`Ndx2resttop <gromacs_extra.ndx2resttop.Ndx2resttop>` object."""
if self.check_restart():
return 0
top_file = fu.unzip_top(zip_file=self.io_dict['in'].get("input_top_zip_path", ""), out_log=self.out_log, unique_dir=self.stage_io_dict.get("unique_dir", ""))
ndx_lines = open(self.io_dict['in'].get("input_ndx_path", "")).read().splitlines()
groups_dic = self._parse_ndx_groups(ndx_lines)
fu.log(f'Parsed NDX groups: {list(groups_dic.keys())}', self.out_log, self.global_log)
# Parse triplet string: "(ref, restrain, mol), ..." -> list of 3-tuples
raw_triplets = str(self.ref_rest_mol_triplet_list).split('),')
self.ref_rest_mol_triplet_list = [
tuple(elem.strip(' ()').replace(' ', '').split(','))
for elem in raw_triplets
]
fu.log('ref_rest_mol_triplet_list: ' + str(self.ref_rest_mol_triplet_list), self.out_log, self.global_log)
if self.posres_names:
self.posres_names = [elem.strip() for elem in self.posres_names.split()]
else:
self.posres_names = ['CUSTOM_POSRES'] * len(self.ref_rest_mol_triplet_list)
fu.log('posres_names: ' + str(self.posres_names), self.out_log, self.global_log)
if len(self.posres_names) != len(self.ref_rest_mol_triplet_list):
raise ValueError("posres_names length must match ref_rest_mol_triplet_list length")
for triplet, posre_name in zip(self.ref_rest_mol_triplet_list, self.posres_names):
# Per-triplet workflow:
# 1. Unpack (reference_group, restrain_group, molecule_name)
# 2. Read global NDX indices for both groups
# 3. Remap restrain atoms to molecule-local 1-based indices
# 4. Write the _posre.itp
# 5. Inject #ifdef guard — multi-chain (per-chain .itp) or single-chain (.top)
reference_group, restrain_group, molecule_name = triplet
fu.log(f'Reference group: {reference_group}', self.out_log, self.global_log)
fu.log(f'Restrain group: {restrain_group}', self.out_log, self.global_log)
fu.log(f'Molecule name: {molecule_name}', self.out_log, self.global_log)
itp_path = fu.create_name(path=str(Path(top_file).parent), prefix=self.prefix,
step=self.step, name=restrain_group + '_posre.itp')
self.io_dict['out']["output_itp_path"] = itp_path
# Map global NDX indices -> molecule-local 1-based indices.
# reference_atoms.index(atom) gives the 0-based position of each global
# restrain atom within the reference group list. Adding 1 converts to
# GROMACS 1-based local numbering, which matches the atom order in the
# [ atoms ] section of each moleculetype block.
reference_atoms = self._read_ndx_atoms(ndx_lines, groups_dic, reference_group)
restrain_atoms = self._read_ndx_atoms(ndx_lines, groups_dic, restrain_group)
selected_atoms = [reference_atoms.index(atom) + 1 for atom in restrain_atoms]
self._write_posre_itp(itp_path, selected_atoms)
itp_name = Path(itp_path).name
# Multi-chain: append ifdef block to the matching per-chain ITP.
# Detection heuristic: if any .itp file in the topology directory has
# molecule_name in its filename (and is not already a posre/pr file),
# this is a multi-chain topology where each chain has its own .itp
# included by the .top. In that case, append directly to that file.
# Otherwise fall through to single-chain mode (_insert_posres_in_top).
multi_chain = False
for file_dir in Path(top_file).parent.iterdir():
if "posre" not in file_dir.name and not file_dir.name.endswith("_pr.itp"):
match = fnmatch.fnmatch(str(file_dir), "*" + molecule_name + "*.itp")
if match:
multi_chain = True
with open(str(file_dir), 'a') as f:
fu.log(f'Opening {file_dir} and adding ifdef include', self.out_log, self.global_log)
f.write('\n')
f.write('; Include Position restraint file\n')
f.write(f'#ifdef {posre_name}\n')
f.write(f'#include "{itp_name}"\n')
f.write('#endif\n')
f.write('\n')
# Single-chain: splice ifdef block into the .top file
if not multi_chain:
self._insert_posres_in_top(top_file, molecule_name, posre_name, itp_name)
fu.zip_top(zip_file=self.io_dict['out'].get("output_top_zip_path", ""),
top_file=top_file, out_log=self.out_log, remove_original_files=self.remove_tmp)
self.remove_tmp_files()
self.check_arguments(output_files_created=True, raise_exception=False)
return 0
[docs]
def ndx2resttop(input_ndx_path: str, input_top_zip_path: str, output_top_zip_path: str,
properties: Optional[dict] = None, **kwargs) -> int:
"""Create :class:`Ndx2resttop <gromacs_extra.ndx2resttop.Ndx2resttop>` class and
execute the :meth:`launch() <gromacs_extra.ndx2resttop.Ndx2resttop.launch>` method."""
return Ndx2resttop(**dict(locals())).launch()
ndx2resttop.__doc__ = Ndx2resttop.__doc__
main = Ndx2resttop.get_main(ndx2resttop, "Generate a restrained topology from an index NDX file.")
if __name__ == '__main__':
main()