diff --git a/CHANGELOG.md b/CHANGELOG.md index 7b6c5ae..d5cee1e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ Changelog ------------------------------------------------------------------------------------- * Add support for getting and restoring sampling statistics. +* Handle XED force field virtual sites. [2025.2.0](https://github.com/openbiosim/loch/compare/2025.1.0...2025.2.0) - Feb 2026 ------------------------------------------------------------------------------------- diff --git a/src/loch/_sampler.py b/src/loch/_sampler.py index 23698bd..b268716 100644 --- a/src/loch/_sampler.py +++ b/src/loch/_sampler.py @@ -566,6 +566,33 @@ def __init__( except Exception as e: raise ValueError(f"Could not prepare the system for GCMC sampling: {e}") + # Compute per-molecule virtual site information. Virtual sites are + # appended after the real atoms of each molecule in the OpenMM system, + # so all subsequent molecules have their OpenMM particle indices shifted + # by the cumulative number of virtual sites in preceding molecules. + ( + self._total_vsites, + self._vsite_atom_offsets, + self._mol_vsite_charges, + ) = self._get_vsite_offsets(self._system) + + if self._total_vsites > 0: + # Offset water oxygen indices from Sire atom indices to OpenMM + # particle indices. + self._water_indices = ( + self._water_indices + self._vsite_atom_offsets[self._water_indices] + ) + + # Apply the same correction to the reference atom indices. + if self._reference is not None: + self._reference_indices = ( + self._reference_indices + + self._vsite_atom_offsets[self._reference_indices] + ) + + # Update the total atom count to include virtual site particles. + self._num_atoms = self._system.num_atoms() + self._total_vsites + # Validate the platform parameter. valid_platforms = {"auto", "cuda", "opencl"} @@ -1102,6 +1129,13 @@ def reset(self) -> None: self._num_accepted_insertions = 0 self._num_accepted_deletions = 0 + # Clear the forces. + self._nonbonded_force = None + self._custom_nonbonded_force = None + + # Clear the OpenMM context. + self._openmm_context = None + def restore_stats(self, stats: dict) -> None: """ Restore sampler statistics from a dictionary. @@ -1140,17 +1174,11 @@ def get_stats(self) -> dict: "num_accepted_deletions": self._num_accepted_deletions, } - # Clear the forces. - self._nonbonded_force = None - self._custom_nonbonded_force = None - - # Clear the OpenMM context. - self._openmm_context = None - def ghost_residues(self) -> _np.ndarray: """ - Return the current indices of the ghost water residues in the OpenMM - context. + Return the residue indices of the current ghost waters in the input + topology. These are Sire/BioSimSpace residue indices and do not + include any virtual site particles that were added on context creation. Returns ------- @@ -1785,6 +1813,75 @@ def _get_box_information(self, space): return cell_matrix, cell_matrix_inverse, M + @staticmethod + def _get_vsite_offsets(system): + """ + Compute per-atom OpenMM index offsets due to virtual sites. + + In OpenMM, virtual site particles are appended after the real atoms + of each molecule. Molecules that appear after a molecule with virtual + sites therefore have their OpenMM particle indices shifted relative + to their Sire atom indices. + + Parameters + ---------- + + system: sire.system.System + The molecular system. + + Returns + ------- + + total_vsites: int + Total number of virtual site particles in the system. + + atom_offsets: numpy.ndarray + Array of shape (num_sire_atoms,) where entry i is the + cumulative number of virtual sites in all molecules that + precede the molecule containing Sire atom i. Adding this + offset to a Sire atom index yields the corresponding OpenMM + particle index. + + mol_vsite_charges: dict + Mapping from molecule number to a list of virtual site charges in + units of elementary charge. Only molecules that carry virtual sites + appear as keys; all other molecules are absent from the dict. + """ + n_sire_atoms = system.num_atoms() + atom_offsets = _np.zeros(n_sire_atoms, dtype=_np.int32) + mol_vsite_charges = {} + total_vsites = 0 + + try: + vsite_mols = system["property n_virtual_sites"].molecules() + except Exception: + # No molecules carry virtual sites. + return 0, atom_offsets, mol_vsite_charges + + all_atoms = system.atoms() + + for mol in vsite_mols: + n_vs = int(mol.property("n_virtual_sites")) + if n_vs <= 0: + continue + + # Locate where this molecule's atoms sit in the global index space, + # then shift every subsequent atom's offset by n_vs in one operation. + mol_start = int(_np.array(all_atoms.find(mol.atoms()))[0]) + mol_end = mol_start + mol.num_atoms() + atom_offsets[mol_end:] += n_vs + total_vsites += n_vs + + try: + raw_charges = mol.property("vs_charges") + vs_charges = [float(raw_charges[k]) for k in range(n_vs)] + except Exception: + vs_charges = [0.0] * n_vs + + mol_vsite_charges[mol.number()] = vs_charges + + return total_vsites, atom_offsets, mol_vsite_charges + @staticmethod def _get_reference_indices(system, reference): """ @@ -1937,6 +2034,10 @@ def _initialise_gpu_memory(self): for q in mol.property("charge"): charges[i] = q.value() i += 1 + # Append virtual site charges (zero LJ, non-zero charge). + for vc in self._mol_vsite_charges.get(mol.number(), []): + charges[i] = vc + i += 1 # Convert to a GPU array. charges = self._backend.to_gpu(charges.astype(_np.float32)) @@ -1954,6 +2055,12 @@ def _initialise_gpu_memory(self): sigmas[i] = lj.sigma().value() epsilons[i] = lj.epsilon().value() i += 1 + # Virtual sites have zero LJ. Use sigma=1.0 Å as a + # nominal placeholder (epsilon=0 so it has no effect). + for _ in self._mol_vsite_charges.get(mol.number(), []): + sigmas[i] = 1.0 + epsilons[i] = 0.0 + i += 1 # Convert to GPU arrays. sigmas = self._backend.to_gpu(sigmas.astype(_np.float32)) @@ -2063,8 +2170,9 @@ def _initialise_gpu_memory(self): # This is a null LJ parameter. if _np.isclose(lj.epsilon().value(), 0.0): - idx = atoms.find(atom) - is_ghost_fep[idx] = 1 + sire_idx = atoms.find(atom) + omm_idx = sire_idx + int(self._vsite_atom_offsets[sire_idx]) + is_ghost_fep[omm_idx] = 1 # The charge at the perturbed state is zero. elif _np.isclose(charge1, 0.0): @@ -2073,8 +2181,9 @@ def _initialise_gpu_memory(self): # This is a null LJ parameter. if _np.isclose(lj.epsilon().value(), 0.0): - idx = atoms.find(atom) - is_ghost_fep[idx] = 1 + sire_idx = atoms.find(atom) + omm_idx = sire_idx + int(self._vsite_atom_offsets[sire_idx]) + is_ghost_fep[omm_idx] = 1 # Convert to GPU array. is_ghost_fep = self._backend.to_gpu(is_ghost_fep.astype(_np.int32)) diff --git a/tests/test_vsites.py b/tests/test_vsites.py new file mode 100644 index 0000000..b28519d --- /dev/null +++ b/tests/test_vsites.py @@ -0,0 +1,160 @@ +import os + +import pytest +import sire as sr + +from loch import GCMCSampler + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _add_vsites_to_mol(mol, n_vs): + """ + Return a copy of mol with n_vs zero-charge virtual sites all parented to + atom 0. + + Using atom 0 for all parent references works for any molecule regardless + of its size. Virtual sites have zero charge so they do not affect energies, + making it possible to compare sampler results with and without virtual sites. + """ + n_atoms = mol.num_atoms() + + # LocalCoordinatesSite requires three parent atoms; clamp to valid range so + # this helper works even for single-atom molecules (e.g. ions). + p1 = min(1, n_atoms - 1) + p2 = min(2, n_atoms - 1) + + vsite_dict = { + str(k): { + "vs_indices": [0, p1, p2], + "vs_ows": [1, 0, 0], + "vs_xs": [1, -1, 0], + "vs_ys": [0, 1, -1], + # Offset each vsite slightly so their positions are distinct. + "vs_local": [(k + 1) * 0.03, 0, 0], + } + for k in range(n_vs) + } + + # All vsites are children of atom 0. + parents = {str(i): [] for i in range(n_atoms)} + parents["0"] = list(range(n_vs)) + + cursor = mol.cursor() + cursor.set("n_virtual_sites", n_vs) + cursor.set("vs_charges", [0.0] * n_vs) + cursor.set("virtual_sites", vsite_dict) + cursor.set("parents", parents) + return cursor.commit() + + +# --------------------------------------------------------------------------- +# Unit tests for _get_vsite_offsets (no GPU required) +# --------------------------------------------------------------------------- + + +def test_get_vsite_offsets_no_vsites(): + """ + _get_vsite_offsets on a system with no virtual sites returns all-zero + offsets and empty per-molecule charge lists. + """ + mols = sr.load_test_files("bpti.prm7", "bpti.rst7") + + total_vsites, offsets, mol_vsite_charges = GCMCSampler._get_vsite_offsets(mols) + + assert total_vsites == 0 + assert offsets.shape == (mols.num_atoms(),) + assert (offsets == 0).all() + assert mol_vsite_charges == {} + + +def test_get_vsite_offsets_with_vsites(): + """ + Adding N virtual sites to molecule 0 gives a zero offset for every atom + inside that molecule and an offset of N for all atoms in subsequent + molecules. + """ + mols = sr.load_test_files("bpti.prm7", "bpti.rst7") + + n_vs = 2 + n_atoms_mol0 = mols[0].num_atoms() + + mols_with_vs = mols.clone() + mols_with_vs.update(_add_vsites_to_mol(mols_with_vs[0], n_vs)) + + total_vsites, offsets, mol_vsite_charges = GCMCSampler._get_vsite_offsets( + mols_with_vs + ) + + assert total_vsites == n_vs + + # Atoms in molecule 0 precede no vsite-bearing molecule, so offset is 0. + assert (offsets[:n_atoms_mol0] == 0).all() + + # All atoms in molecules 1..N follow molecule 0 and are shifted by n_vs. + assert (offsets[n_atoms_mol0:] == n_vs).all() + + # Only molecule 0 appears in the dict, with n_vs zero charges. + assert len(mol_vsite_charges) == 1 + assert list(mol_vsite_charges.values())[0] == [0.0] * n_vs + + +# --------------------------------------------------------------------------- +# Integration tests (GPU required) +# --------------------------------------------------------------------------- + + +@pytest.mark.skipif( + "CUDA_VISIBLE_DEVICES" not in os.environ, + reason="Requires CUDA enabled GPU.", +) +@pytest.mark.parametrize("platform", ["cuda", "opencl"]) +def test_vsite_index_offsets(bpti, platform): + """ + GCMCSampler correctly offsets _water_indices and _reference_indices when + virtual sites are present on a preceding molecule. + + In BPTI the protein is molecule 0 and all water molecules follow it. + Adding N vsites to the protein shifts every water's OpenMM particle index + by N. The reference atoms also live in the protein, so their OpenMM + indices are not shifted (no vsites precede molecule 0). + """ + mols, reference = bpti + + common_kwargs = dict( + reference=reference, + cutoff_type="rf", + cutoff="10 A", + ghost_file=None, + log_file=None, + test=True, + platform=platform, + seed=42, + ) + + # Baseline: no virtual sites. + baseline = GCMCSampler(mols, **common_kwargs) + + # Add 2 zero-charge virtual sites to molecule 0 (the protein). All + # water molecules follow the protein, so their OpenMM indices shift by 2. + n_vs = 2 + mols_with_vs = mols.clone() + mols_with_vs.update(_add_vsites_to_mol(mols_with_vs[0], n_vs)) + + sampler = GCMCSampler(mols_with_vs, **common_kwargs) + + # Total vsite count must match what we added. + assert sampler._total_vsites == n_vs + + # _num_atoms must include the virtual site particles. + assert sampler._num_atoms == baseline._num_atoms + n_vs + + # Every water oxygen index is shifted by n_vs (waters follow the protein). + assert (sampler._water_indices == baseline._water_indices + n_vs).all() + + # Reference atoms are inside molecule 0 (the protein), which has no + # preceding vsites, so their OpenMM indices are unchanged. + assert (sampler._reference_indices == baseline._reference_indices).all()