Source code for gromacs.pdb2gmx

#!/usr/bin/env python3

"""Module containing the Pdb2gmx class and the command line interface."""
import os
from typing import Optional
from biobb_common.generic.biobb_object import BiobbObject
from biobb_common.tools import file_utils as fu
from biobb_common.tools.file_utils import launchlogger
from biobb_gromacs.gromacs.common import get_gromacs_version


[docs] class Pdb2gmx(BiobbObject): """ | biobb_gromacs Pdb2gmx | Wrapper class for the `GROMACS pdb2gmx <http://manual.gromacs.org/current/onlinehelp/gmx-pdb2gmx.html>`_ module. | The GROMACS pdb2gmx module, reads a .pdb (or .gro) file, reads some database files, adds hydrogens to the molecules and generates coordinates in GROMACS (GROMOS), or optionally .pdb, format and a topology in GROMACS format. These files can subsequently be processed to generate a run input file. Args: input_pdb_path (str): Path to the input PDB file. File type: input. `Sample file <https://github.com/bioexcel/biobb_gromacs/raw/master/biobb_gromacs/test/data/gromacs/egfr.pdb>`_. Accepted formats: pdb (edam:format_1476). output_gro_path (str): Path to the output GRO file. File type: output. `Sample file <https://github.com/bioexcel/biobb_gromacs/raw/master/biobb_gromacs/test/reference/gromacs/ref_pdb2gmx.gro>`_. Accepted formats: gro (edam:format_2033). output_top_zip_path (str): Path the output TOP topology in zip format. File type: output. `Sample file <https://github.com/bioexcel/biobb_gromacs/raw/master/biobb_gromacs/test/reference/gromacs/ref_pdb2gmx.zip>`_. Accepted formats: zip (edam:format_3987). properties (dict - Python dictionary object containing the tool parameters, not input/output files): * **water_type** (*str*) - ("spce") Water molecule type. Values: spc, spce, tip3p, tip4p, tip5p, tips3p. * **force_field** (*str*) - ("amber99sb-ildn") Force field to be used during the conversion. Values: gromos45a3, charmm27, gromos53a6, amber96, amber99, gromos43a2, gromos54a7, gromos43a1, amberGS, gromos53a5, amber99sb, amber03, amber99sb-ildn, oplsaa, amber94, amber99sb-star-ildn-mut. * **ignh** (*bool*) - (False) Should pdb2gmx ignore the hidrogens in the original structure. * **lys** (*list*) - (None) Lysine protonation states for each chain in the input pdb. Each item of the list should be a string with the protonation states for that chain or empty if the residue is not present in that chain (0: not protonated, 1: protonated). * **arg** (*list*) - (None) Arginine protonation states for each chain in the input pdb. Each item of the list should be a string with the protonation states for that chain or empty if the residue is not present in that chain (0: not protonated, 1: protonated). * **asp** (*list*) - (None) Aspartic acid protonation states for each chain in the input pdb. Each item of the list should be a string with the protonation states for that chain or empty if the residue is not present in that chain (0: not protonated, 1: protonated). * **glu** (*list*) - (None) Glutamic acid protonation states for each chain in the input pdb. Each item of the list should be a string with the protonation states for that chain or empty if the residue is not present in that chain (0: not protonated, 1: protonated). * **gln** (*list*) - (None) Glutamine protonation states for each chain in the input pdb. Each item of the list should be a string with the protonation states for that chain or empty if the residue is not present in that chain (0: not protonated, 1: protonated). * **his** (*list*) - (None) Histidine protonation states for each chain in the input pdb. Each item of the list should be a string with the protonation states for that chain or empty if the residue is not present in that chain. Make sure residues are named HIS (0: HID, 1: HIE, 2: HIP, 3: HIS1). * **merge** (*bool*) - (False) Merge all chains into a single molecule. * **gmx_lib** (*str*) - (None) Path set GROMACS GMXLIB environment variable. * **binary_path** (*str*) - ("gmx") Path to the GROMACS executable binary. * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. * **container_path** (*str*) - (None) Path to the binary executable of your container. * **container_image** (*str*) - ("gromacs/gromacs:latest") Container Image identifier. * **container_volume_path** (*str*) - ("/data") Path to an internal directory in the container. * **container_working_dir** (*str*) - (None) Path to the internal CWD in the container. * **container_user_id** (*str*) - (None) User number id to be mapped inside the container. * **container_shell_path** (*str*) - ("/bin/bash") Path to the binary executable of the container shell. Examples: This is a use example of how to use the building block from Python:: from biobb_gromacs.gromacs.pdb2gmx import pdb2gmx prop = { 'his': ['0 0 1 1 0 0 0', '1 1 0 1'] } pdb2gmx(input_pdb_path='/path/to/myStructure.pdb', output_gro_path='/path/to/newStructure.gro', output_top_zip_path='/path/to/newTopology.zip', properties=prop) Info: * wrapped_software: * name: GROMACS Pdb2gmx * version: 2025.2 * license: LGPL 2.1 * ontology: * name: EDAM * schema: http://edamontology.org/EDAM.owl """ def __init__(self, input_pdb_path: str, output_gro_path: str, output_top_zip_path: str, properties: Optional[dict] = None, **kwargs) -> None: properties = properties or {} # Call parent class constructor super().__init__(properties) self.locals_var_dict = locals().copy() # Input/Output files self.io_dict = { "in": {"input_pdb_path": input_pdb_path}, "out": {"output_gro_path": output_gro_path, "output_top_zip_path": output_top_zip_path} } # Properties specific for BB self.internal_top_name = properties.get('internal_top_name', 'p2g.top') # Excluded from documentation for simplicity self.internal_itp_name = properties.get('internal_itp_name', 'posre.itp') # Excluded from documentation for simplicity self.water_type = properties.get('water_type', 'spce') self.force_field = properties.get('force_field', 'amber99sb-ildn') self.ignh = properties.get('ignh', False) self.lys = properties.get('lys', None) self.arg = properties.get('arg', None) self.asp = properties.get('asp', None) self.glu = properties.get('glu', None) self.gln = properties.get('gln', None) self.his = properties.get('his', None) self.merge = properties.get('merge', False) # Properties common in all GROMACS BB self.gmx_lib = properties.get('gmx_lib', None) self.binary_path: str = properties.get('binary_path', 'gmx') self.gmx_nobackup = properties.get('gmx_nobackup', True) self.gmx_nocopyright = properties.get('gmx_nocopyright', True) if self.gmx_nobackup: self.binary_path += ' -nobackup' if self.gmx_nocopyright: self.binary_path += ' -nocopyright' if not self.container_path: self.gmx_version = get_gromacs_version(self.binary_path) # Support string for single chain if isinstance(self.lys, str): self.lys = [self.lys] if isinstance(self.arg, str): self.arg = [self.arg] if isinstance(self.asp, str): self.asp = [self.asp] if isinstance(self.glu, str): self.glu = [self.glu] if isinstance(self.gln, str): self.gln = [self.gln] if isinstance(self.his, str): self.his = [self.his] # Make sure all have the same length self.check_lengths(self.lys, self.arg, self.asp, self.glu, self.gln, self.his) # Check the properties self.check_properties(properties) self.check_arguments()
[docs] @launchlogger def launch(self) -> int: """Execute the :class:`Pdb2gmx <gromacs.pdb2gmx.Pdb2gmx>` object.""" # Setup Biobb if self.check_restart(): return 0 # Create stdin file if needed stdin_content = '' num_chains = self.find_length(self.lys, self.arg, self.asp, self.glu, self.gln, self.his) for i in range(num_chains): if self.lys is not None: stdin_content += f' {self.lys[i]}' if self.arg is not None: stdin_content += f' {self.arg[i]}' if self.asp is not None: stdin_content += f' {self.asp[i]}' if self.glu is not None: stdin_content += f' {self.glu[i]}' if self.gln is not None: stdin_content += f' {self.gln[i]}' if self.his is not None: stdin_content += f' {self.his[i]}' if stdin_content: self.io_dict['in']['stdin_file_path'] = fu.create_stdin_file(stdin_content) self.stage_files() internal_top_name = fu.create_name(prefix=self.prefix, step=self.step, name=self.internal_top_name) internal_itp_name = fu.create_name(prefix=self.prefix, step=self.step, name=self.internal_itp_name) # Create command line self.cmd = [self.binary_path, "pdb2gmx", "-f", self.stage_io_dict["in"]["input_pdb_path"], "-o", self.stage_io_dict["out"]["output_gro_path"], "-p", internal_top_name, "-water", self.water_type, "-ff", self.force_field, "-i", internal_itp_name] if self.ignh: self.cmd.append("-ignh") if self.merge: self.cmd.append("-merge") self.cmd.append("all") if self.lys: self.cmd.append("-lys") if self.arg: self.cmd.append("-arg") if self.asp: self.cmd.append("-asp") if self.glu: self.cmd.append("-glu") if self.gln: self.cmd.append("-gln") if self.his: self.cmd.append("-his") if stdin_content: self.cmd.append('<') self.cmd.append(self.stage_io_dict["in"]["stdin_file_path"]) if self.gmx_lib: self.env_vars_dict['GMXLIB'] = self.gmx_lib # Run Biobb block self.run_biobb() # Copy files to host self.copy_to_host() if self.container_path: internal_top_name = os.path.join(self.stage_io_dict.get("unique_dir", ""), internal_top_name) # zip topology fu.log('Compressing topology to: %s' % self.io_dict["out"]["output_top_zip_path"], self.out_log, self.global_log) fu.zip_top(zip_file=self.io_dict["out"]["output_top_zip_path"], top_file=internal_top_name, out_log=self.out_log, remove_original_files=self.remove_tmp) # Remove temporal files self.tmp_files.extend([ self.internal_top_name, self.internal_itp_name, self.io_dict['in'].get("stdin_file_path", "") ]) self.remove_tmp_files() self.check_arguments(output_files_created=True, raise_exception=False) return self.return_code
[docs] def check_lengths(self, *lists): """ Make sure all lists are the same length """ # Find length of each list lengths = [len(lst) for lst in lists if lst is not None] # Check if all lengths are the same all_equal = True if len(lengths) > 0: all_equal = len(set(lengths)) == 1 if not all_equal: raise ValueError(f"""All protonation arrays (lys, arg, asp, glu, gln, his) must have the same length (one string per chain and empty string if residue is not present in that chain). Found lengths: {lengths}""")
[docs] def find_length(self, *lists) -> int: """ Find length of the first list """ # Find length of each list lengths = [len(lst) for lst in lists if lst is not None] # Return the length of the first list, if any if len(lengths) > 0: return lengths[0] else: return 0
[docs] def pdb2gmx(input_pdb_path: str, output_gro_path: str, output_top_zip_path: str, properties: Optional[dict] = None, **kwargs) -> int: """Create :class:`Pdb2gmx <gromacs.pdb2gmx.Pdb2gmx>` class and execute the :meth:`launch() <gromacs.pdb2gmx.Pdb2gmx.launch>` method.""" return Pdb2gmx(**dict(locals())).launch()
pdb2gmx.__doc__ = Pdb2gmx.__doc__ main = Pdb2gmx.get_main(pdb2gmx, "Wrapper for the GROMACS pdb2gmx module.") if __name__ == '__main__': main()