diff --git a/doc/source/changelog.rst b/doc/source/changelog.rst index 5091ec218..60dff1d93 100644 --- a/doc/source/changelog.rst +++ b/doc/source/changelog.rst @@ -36,6 +36,8 @@ organisation on `GitHub `__. * Store OpenMM state at start of a dynamics run to use for crash recovery. +* Use ``RDKit::determineBondOrders()`` in Sire-to-RDKit conversion to infer bond orders. + `2025.4.0 `__ - February 2026 --------------------------------------------------------------------------------------------- diff --git a/tests/convert/test_rdkit.py b/tests/convert/test_rdkit.py index e3189f0fa..08a2399c8 100644 --- a/tests/convert/test_rdkit.py +++ b/tests/convert/test_rdkit.py @@ -181,6 +181,31 @@ def test_rdkit_force_infer(): assert bond_infer == "TRIPLE" +@pytest.mark.skipif( + "rdkit" not in sr.convert.supported_formats(), + reason="rdkit support is not available", +) +def test_rdkit_bond_order_inference(): + """ + Formal charges inferred from AMBER topology (no bond orders) should + match those obtained from an SDF file (explicit bond orders). + """ + + mol_prm = sr.load_test_files("bond_order_issue.prm7", "bond_order_issue.rst7")[0] + mol_sdf = sr.load_test_files("bond_order_issue.sdf")[0] + + rdmol_prm = sr.convert.to_rdkit(mol_prm) + rdmol_sdf = sr.convert.to_rdkit(mol_sdf) + + charges_prm = [atom.GetFormalCharge() for atom in rdmol_prm.GetAtoms()] + charges_sdf = [atom.GetFormalCharge() for atom in rdmol_sdf.GetAtoms()] + + assert charges_prm == charges_sdf + + # SMILES should also agree between the two loading paths + assert mol_prm.smiles() == mol_sdf.smiles() + + @pytest.mark.skipif( "rdkit" not in sr.convert.supported_formats(), reason="rdkit support is not available", diff --git a/wrapper/Convert/SireRDKit/sire_rdkit.cpp b/wrapper/Convert/SireRDKit/sire_rdkit.cpp index 3695b2026..27ca5ac99 100644 --- a/wrapper/Convert/SireRDKit/sire_rdkit.cpp +++ b/wrapper/Convert/SireRDKit/sire_rdkit.cpp @@ -1,35 +1,36 @@ #include "sire_rdkit.h" +#include +#include +#include #include +#include #include +#include #include #include -#include -#include -#include #include -#include #include "SireStream/datastream.h" #include "SireStream/shareddatastream.h" -#include "SireMol/core.h" -#include "SireMol/moleditor.h" -#include "SireMol/atomelements.h" #include "SireMol/atomcharges.h" #include "SireMol/atomcoords.h" +#include "SireMol/atomelements.h" #include "SireMol/atommasses.h" #include "SireMol/atommatch.h" #include "SireMol/atomproperty.hpp" -#include "SireMol/connectivity.h" #include "SireMol/bondid.h" #include "SireMol/bondorder.h" -#include "SireMol/stereochemistry.h" #include "SireMol/chirality.h" +#include "SireMol/connectivity.h" +#include "SireMol/core.h" #include "SireMol/hybridization.h" #include "SireMol/iswater.h" +#include "SireMol/moleditor.h" #include "SireMol/mover_metaid.h" +#include "SireMol/stereochemistry.h" #include "SireMM/selectorbond.h" @@ -43,6 +44,7 @@ #include "tostring.h" +#include #include #include @@ -848,8 +850,110 @@ namespace SireRDKit if (atoms.count() > 1 and (not has_bond_info or force_stereo_inference)) { - // we need to infer the bond information - infer_bond_info(molecule); + // we need to infer the bond information. + // + // Determine total molecular charge before any resets. + // For the force_stereo_inference case the formal charges were set from + // the source file (e.g. M CHG records in SDF) so we sum them directly + // from the RDKit atoms. For the no-bond-info case we derive the charge + // from the Sire partial charges (AMBER partial charges sum to the + // integer formal charge of the molecule). + int total_charge = 0; + + if (has_bond_info and force_stereo_inference) + { + for (auto a : molecule.atoms()) + { + total_charge += a->getFormalCharge(); + } + } + else + { + try + { + double charge_sum = 0.0; + for (int i = 0; i < atoms.count(); ++i) + { + charge_sum += atoms(i).property(map["charge"]).to(SireUnits::mod_electron); + } + total_charge = static_cast(std::round(charge_sum)); + } + catch (...) + { + total_charge = 0; + } + } + + // When bond info is present but force_stereo_inference is requested, + // reset all bonds to SINGLE and clear formal charges so that the + // inference algorithm starts from a clean connectivity graph. + if (has_bond_info and force_stereo_inference) + { + for (auto b : molecule.bonds()) + { + if (b->getBondType() != RDKit::Bond::ZERO) + { + b->setBondType(RDKit::Bond::SINGLE); + } + } + for (auto a : molecule.atoms()) + { + a->setFormalCharge(0); + } + molecule.updatePropertyCache(false); + } + + // Prefer RDKit's determineBondOrders, which is based on the xyz2mol + // linear-programming algorithm and is significantly more robust than the + // MDAnalysis heuristic implemented in infer_bond_info(). + // + // determineBondOrders() needs all heavy atoms to have noImplicit set so + // that it does not try to add implicit hydrogens (all H are explicit when + // loaded from formats such as AMBER that carry all hydrogen atoms). + for (auto a : molecule.atoms()) + { + if (a->getAtomicNum() > 1) + { + a->setNoImplicit(true); + } + } + + // Check for dummy atoms (atomic_num == 0): determineBondOrders may not + // handle them correctly, so fall back to the heuristic in that case. + bool has_dummy_atoms = false; + for (auto a : molecule.atoms()) + { + if (a->getAtomicNum() == 0) + { + has_dummy_atoms = true; + break; + } + } + + bool inferred = false; + + if (not has_dummy_atoms) + { + try + { + // embedChiral=false: we call sanitizeMol ourselves below, + // and assignStereochemistryFrom3D is called afterwards. + RDKit::determineBondOrders(molecule, total_charge, + /*allowChargedFragments=*/true, + /*embedChiral=*/false, + /*useAtomMap=*/false); + inferred = true; + } + catch (...) + { + } + } + + if (not inferred) + { + // Fall back to the MDAnalysis-based heuristic. + infer_bond_info(molecule); + } } // sanitze the molecule. diff --git a/wrapper/build/cmake/FindRDKit.cmake b/wrapper/build/cmake/FindRDKit.cmake index e7e62a3c0..5483cc4ca 100644 --- a/wrapper/build/cmake/FindRDKit.cmake +++ b/wrapper/build/cmake/FindRDKit.cmake @@ -65,6 +65,8 @@ else() HINTS ${RDKIT_LIBRARY_DIR}) find_library(SUBSTRUCTMATCH_LIB NAMES SubstructMatch RDKitSubstructMatch HINTS ${RDKIT_LIBRARY_DIR}) + find_library(DETERMINEBONDS_LIB NAMES DetermineBonds RDKitDetermineBonds + HINTS ${RDKIT_LIBRARY_DIR}) set (RDKIT_LIBRARIES ${GRAPHMOL_LIB} # RDKit::ROMol et al ${RDGENERAL_LIB} # Base RDKit objects @@ -74,6 +76,7 @@ else() ${FORCEFIELD_LIB} # Add forcefields to molecules ${FORCEFIELD_HELPERS_LIB} # Add forcefields to molecules ${SUBSTRUCTMATCH_LIB} # Substructure matching + ${DETERMINEBONDS_LIB} # Bond order inference ) endif() if(RDKIT_LIBRARIES)