Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ Changelog
-------------------------------------------------------------------------------------

* Add support for getting and restoring sampling statistics.
* Handle XED force field virtual sites.

[2025.2.0](https://github.com/openbiosim/loch/compare/2025.1.0...2025.2.0) - Feb 2026
-------------------------------------------------------------------------------------
Expand Down
135 changes: 122 additions & 13 deletions src/loch/_sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -566,6 +566,33 @@ def __init__(
except Exception as e:
raise ValueError(f"Could not prepare the system for GCMC sampling: {e}")

# Compute per-molecule virtual site information. Virtual sites are
# appended after the real atoms of each molecule in the OpenMM system,
# so all subsequent molecules have their OpenMM particle indices shifted
# by the cumulative number of virtual sites in preceding molecules.
(
self._total_vsites,
self._vsite_atom_offsets,
self._mol_vsite_charges,
) = self._get_vsite_offsets(self._system)

if self._total_vsites > 0:
# Offset water oxygen indices from Sire atom indices to OpenMM
# particle indices.
self._water_indices = (
self._water_indices + self._vsite_atom_offsets[self._water_indices]
)

# Apply the same correction to the reference atom indices.
if self._reference is not None:
self._reference_indices = (
self._reference_indices
+ self._vsite_atom_offsets[self._reference_indices]
)

# Update the total atom count to include virtual site particles.
self._num_atoms = self._system.num_atoms() + self._total_vsites

# Validate the platform parameter.
valid_platforms = {"auto", "cuda", "opencl"}

Expand Down Expand Up @@ -1102,6 +1129,13 @@ def reset(self) -> None:
self._num_accepted_insertions = 0
self._num_accepted_deletions = 0

# Clear the forces.
self._nonbonded_force = None
self._custom_nonbonded_force = None

# Clear the OpenMM context.
self._openmm_context = None

def restore_stats(self, stats: dict) -> None:
"""
Restore sampler statistics from a dictionary.
Expand Down Expand Up @@ -1140,17 +1174,11 @@ def get_stats(self) -> dict:
"num_accepted_deletions": self._num_accepted_deletions,
}

# Clear the forces.
self._nonbonded_force = None
self._custom_nonbonded_force = None

# Clear the OpenMM context.
self._openmm_context = None

def ghost_residues(self) -> _np.ndarray:
"""
Return the current indices of the ghost water residues in the OpenMM
context.
Return the residue indices of the current ghost waters in the input
topology. These are Sire/BioSimSpace residue indices and do not
include any virtual site particles that were added on context creation.

Returns
-------
Expand Down Expand Up @@ -1785,6 +1813,75 @@ def _get_box_information(self, space):

return cell_matrix, cell_matrix_inverse, M

@staticmethod
def _get_vsite_offsets(system):
"""
Compute per-atom OpenMM index offsets due to virtual sites.

In OpenMM, virtual site particles are appended after the real atoms
of each molecule. Molecules that appear after a molecule with virtual
sites therefore have their OpenMM particle indices shifted relative
to their Sire atom indices.

Parameters
----------

system: sire.system.System
The molecular system.

Returns
-------

total_vsites: int
Total number of virtual site particles in the system.

atom_offsets: numpy.ndarray
Array of shape (num_sire_atoms,) where entry i is the
cumulative number of virtual sites in all molecules that
precede the molecule containing Sire atom i. Adding this
offset to a Sire atom index yields the corresponding OpenMM
particle index.

mol_vsite_charges: dict
Mapping from molecule number to a list of virtual site charges in
units of elementary charge. Only molecules that carry virtual sites
appear as keys; all other molecules are absent from the dict.
"""
n_sire_atoms = system.num_atoms()
atom_offsets = _np.zeros(n_sire_atoms, dtype=_np.int32)
mol_vsite_charges = {}
total_vsites = 0

try:
vsite_mols = system["property n_virtual_sites"].molecules()
except Exception:
# No molecules carry virtual sites.
return 0, atom_offsets, mol_vsite_charges

all_atoms = system.atoms()

for mol in vsite_mols:
n_vs = int(mol.property("n_virtual_sites"))
if n_vs <= 0:
continue

# Locate where this molecule's atoms sit in the global index space,
# then shift every subsequent atom's offset by n_vs in one operation.
mol_start = int(_np.array(all_atoms.find(mol.atoms()))[0])
mol_end = mol_start + mol.num_atoms()
atom_offsets[mol_end:] += n_vs
total_vsites += n_vs

try:
raw_charges = mol.property("vs_charges")
vs_charges = [float(raw_charges[k]) for k in range(n_vs)]
except Exception:
vs_charges = [0.0] * n_vs

mol_vsite_charges[mol.number()] = vs_charges

return total_vsites, atom_offsets, mol_vsite_charges

@staticmethod
def _get_reference_indices(system, reference):
"""
Expand Down Expand Up @@ -1937,6 +2034,10 @@ def _initialise_gpu_memory(self):
for q in mol.property("charge"):
charges[i] = q.value()
i += 1
# Append virtual site charges (zero LJ, non-zero charge).
for vc in self._mol_vsite_charges.get(mol.number(), []):
charges[i] = vc
i += 1

# Convert to a GPU array.
charges = self._backend.to_gpu(charges.astype(_np.float32))
Expand All @@ -1954,6 +2055,12 @@ def _initialise_gpu_memory(self):
sigmas[i] = lj.sigma().value()
epsilons[i] = lj.epsilon().value()
i += 1
# Virtual sites have zero LJ. Use sigma=1.0 Å as a
# nominal placeholder (epsilon=0 so it has no effect).
for _ in self._mol_vsite_charges.get(mol.number(), []):
sigmas[i] = 1.0
epsilons[i] = 0.0
i += 1

# Convert to GPU arrays.
sigmas = self._backend.to_gpu(sigmas.astype(_np.float32))
Expand Down Expand Up @@ -2063,8 +2170,9 @@ def _initialise_gpu_memory(self):

# This is a null LJ parameter.
if _np.isclose(lj.epsilon().value(), 0.0):
idx = atoms.find(atom)
is_ghost_fep[idx] = 1
sire_idx = atoms.find(atom)
omm_idx = sire_idx + int(self._vsite_atom_offsets[sire_idx])
is_ghost_fep[omm_idx] = 1

# The charge at the perturbed state is zero.
elif _np.isclose(charge1, 0.0):
Expand All @@ -2073,8 +2181,9 @@ def _initialise_gpu_memory(self):

# This is a null LJ parameter.
if _np.isclose(lj.epsilon().value(), 0.0):
idx = atoms.find(atom)
is_ghost_fep[idx] = 1
sire_idx = atoms.find(atom)
omm_idx = sire_idx + int(self._vsite_atom_offsets[sire_idx])
is_ghost_fep[omm_idx] = 1

# Convert to GPU array.
is_ghost_fep = self._backend.to_gpu(is_ghost_fep.astype(_np.int32))
Expand Down
160 changes: 160 additions & 0 deletions tests/test_vsites.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
import os

import pytest
import sire as sr

from loch import GCMCSampler


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------


def _add_vsites_to_mol(mol, n_vs):
"""
Return a copy of mol with n_vs zero-charge virtual sites all parented to
atom 0.

Using atom 0 for all parent references works for any molecule regardless
of its size. Virtual sites have zero charge so they do not affect energies,
making it possible to compare sampler results with and without virtual sites.
"""
n_atoms = mol.num_atoms()

# LocalCoordinatesSite requires three parent atoms; clamp to valid range so
# this helper works even for single-atom molecules (e.g. ions).
p1 = min(1, n_atoms - 1)
p2 = min(2, n_atoms - 1)

vsite_dict = {
str(k): {
"vs_indices": [0, p1, p2],
"vs_ows": [1, 0, 0],
"vs_xs": [1, -1, 0],
"vs_ys": [0, 1, -1],
# Offset each vsite slightly so their positions are distinct.
"vs_local": [(k + 1) * 0.03, 0, 0],
}
for k in range(n_vs)
}

# All vsites are children of atom 0.
parents = {str(i): [] for i in range(n_atoms)}
parents["0"] = list(range(n_vs))

cursor = mol.cursor()
cursor.set("n_virtual_sites", n_vs)
cursor.set("vs_charges", [0.0] * n_vs)
cursor.set("virtual_sites", vsite_dict)
cursor.set("parents", parents)
return cursor.commit()


# ---------------------------------------------------------------------------
# Unit tests for _get_vsite_offsets (no GPU required)
# ---------------------------------------------------------------------------


def test_get_vsite_offsets_no_vsites():
"""
_get_vsite_offsets on a system with no virtual sites returns all-zero
offsets and empty per-molecule charge lists.
"""
mols = sr.load_test_files("bpti.prm7", "bpti.rst7")

total_vsites, offsets, mol_vsite_charges = GCMCSampler._get_vsite_offsets(mols)

assert total_vsites == 0
assert offsets.shape == (mols.num_atoms(),)
assert (offsets == 0).all()
assert mol_vsite_charges == {}


def test_get_vsite_offsets_with_vsites():
"""
Adding N virtual sites to molecule 0 gives a zero offset for every atom
inside that molecule and an offset of N for all atoms in subsequent
molecules.
"""
mols = sr.load_test_files("bpti.prm7", "bpti.rst7")

n_vs = 2
n_atoms_mol0 = mols[0].num_atoms()

mols_with_vs = mols.clone()
mols_with_vs.update(_add_vsites_to_mol(mols_with_vs[0], n_vs))

total_vsites, offsets, mol_vsite_charges = GCMCSampler._get_vsite_offsets(
mols_with_vs
)

assert total_vsites == n_vs

# Atoms in molecule 0 precede no vsite-bearing molecule, so offset is 0.
assert (offsets[:n_atoms_mol0] == 0).all()

# All atoms in molecules 1..N follow molecule 0 and are shifted by n_vs.
assert (offsets[n_atoms_mol0:] == n_vs).all()

# Only molecule 0 appears in the dict, with n_vs zero charges.
assert len(mol_vsite_charges) == 1
assert list(mol_vsite_charges.values())[0] == [0.0] * n_vs


# ---------------------------------------------------------------------------
# Integration tests (GPU required)
# ---------------------------------------------------------------------------


@pytest.mark.skipif(
"CUDA_VISIBLE_DEVICES" not in os.environ,
reason="Requires CUDA enabled GPU.",
)
@pytest.mark.parametrize("platform", ["cuda", "opencl"])
def test_vsite_index_offsets(bpti, platform):
"""
GCMCSampler correctly offsets _water_indices and _reference_indices when
virtual sites are present on a preceding molecule.

In BPTI the protein is molecule 0 and all water molecules follow it.
Adding N vsites to the protein shifts every water's OpenMM particle index
by N. The reference atoms also live in the protein, so their OpenMM
indices are not shifted (no vsites precede molecule 0).
"""
mols, reference = bpti

common_kwargs = dict(
reference=reference,
cutoff_type="rf",
cutoff="10 A",
ghost_file=None,
log_file=None,
test=True,
platform=platform,
seed=42,
)

# Baseline: no virtual sites.
baseline = GCMCSampler(mols, **common_kwargs)

# Add 2 zero-charge virtual sites to molecule 0 (the protein). All
# water molecules follow the protein, so their OpenMM indices shift by 2.
n_vs = 2
mols_with_vs = mols.clone()
mols_with_vs.update(_add_vsites_to_mol(mols_with_vs[0], n_vs))

sampler = GCMCSampler(mols_with_vs, **common_kwargs)

# Total vsite count must match what we added.
assert sampler._total_vsites == n_vs

# _num_atoms must include the virtual site particles.
assert sampler._num_atoms == baseline._num_atoms + n_vs

# Every water oxygen index is shifted by n_vs (waters follow the protein).
assert (sampler._water_indices == baseline._water_indices + n_vs).all()

# Reference atoms are inside molecule 0 (the protein), which has no
# preceding vsites, so their OpenMM indices are unchanged.
assert (sampler._reference_indices == baseline._reference_indices).all()
Loading