Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
175 changes: 175 additions & 0 deletions dpdata/gaussian/fchk.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
from __future__ import annotations

from typing import TYPE_CHECKING

import numpy as np

from dpdata.utils import open_file

if TYPE_CHECKING:
from dpdata.utils import FileType

from ..periodic_table import ELEMENTS
from ..unit import (
EnergyConversion,
ForceConversion,
HessianConversion,
LengthConversion,
)

length_convert = LengthConversion("bohr", "angstrom").value()
energy_convert = EnergyConversion("hartree", "eV").value()
force_convert = ForceConversion("hartree/bohr", "eV/angstrom").value()
hessian_convert = HessianConversion("hartree/bohr^2", "eV/angstrom^2").value()


def create_full_hessian(hessian_raw: list | np.ndarray, natoms: int) -> np.ndarray:
"""
Reconstructs the full, symmetric Hessian matrix from a 1D array
containing its lower triangular elements.

Args:
hessian_raw (list | np.ndarray): A 1D list or NumPy array containing the
lower triangular elements (including the
diagonal) of the Hessian matrix.
natoms (int): The number of atoms in the system.

Returns
-------
np.ndarray: A full, symmetric (3*natoms, 3*natoms) Hessian matrix.

Raises
------
ValueError: If the number of elements in `hessian_raw` does not match
the expected number for the lower triangle of a
(3*natoms, 3*natoms) matrix.
"""
# Convert input to a NumPy array in case it's a list
hessian_block = np.array(hessian_raw)

# Calculate the dimension of the final matrix
dim = 3 * natoms

# Validate that the input data has the correct length
# A lower triangle of an n x n matrix has n*(n+1)/2 elements
expected_length = dim * (dim + 1) // 2
if hessian_block.size != expected_length:
raise ValueError(
f"Input length {hessian_block.size} != expected {expected_length}"
)

# Create a zero matrix, then fill the lower triangle
hessian_full = np.zeros((dim, dim), dtype=hessian_block.dtype)
lower_triangle_indices = np.tril_indices(dim)
hessian_full[lower_triangle_indices] = hessian_block

# This is done by copying the lower triangle to the upper triangle
# M_full = M_lower + M_lower.T - diag(M_lower)
hessian_full = hessian_full + hessian_full.T - np.diag(np.diag(hessian_full))

return hessian_full


def to_system_data(file_name: FileType, has_forces=True, has_hessian=True):
"""Read Gaussian fchk file.

Parameters
----------
file_name : str
file name
has_forces : bool, default True
whether to read force
Note: Cartesian Gradient in fchk file is converted to forces by taking negative sign
has_hessian : bool, default True
whether to read hessian

Comment on lines +73 to +85
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Relax defaults for has_forces/has_hessian to avoid unexpected assertion failures.

Most fchk files won’t include gradients/Hessian. Defaulting to True is too strict for general use.

-def to_system_data(file_name: FileType, has_forces=True, has_hessian=True):
+def to_system_data(file_name: FileType, has_forces=False, has_hessian=False):
@@
-    has_forces : bool, default True
+    has_forces : bool, default False
@@
-    has_hessian : bool, default True
+    has_hessian : bool, default False
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
def to_system_data(file_name: FileType, has_forces=True, has_hessian=True):
"""Read Gaussian fchk file.
Parameters
----------
file_name : str
file name
has_forces : bool, default True
whether to read force
Note: Cartesian Gradient in fchk file is converted to forces by taking negative sign
has_hessian : bool, default True
whether to read hessian
def to_system_data(file_name: FileType, has_forces=False, has_hessian=False):
"""Read Gaussian fchk file.
Parameters
----------
file_name : str
file name
has_forces : bool, default False
whether to read force
Note: Cartesian Gradient in fchk file is converted to forces by taking negative sign
has_hessian : bool, default False
whether to read hessian
🤖 Prompt for AI Agents
In dpdata/gaussian/fchk.py around lines 73 to 85, the function to_system_data
currently defaults has_forces and has_hessian to True which causes unexpected
assertion failures for fchk files lacking gradients/Hessians; change the
parameter defaults to has_forces=False and has_hessian=False, update the
docstring to reflect the new defaults, and replace any unconditional assertions
that assume presence of gradient/Hessian with conditional checks that only
validate or convert those sections when the data actually exists (e.g., check
keys/flags in the parsed fchk content before accessing or asserting).

Returns
-------
data : dict
system data, including hessian if has_hessian is True
"""
data = {}
natoms = 0
atom_numbers = []
coords_t = []
energy_t = []
forces_t = []
hessian_t = []
# Read fchk file
with open_file(file_name) as fp:
for line in fp:
if isinstance(line, bytes):
line = line.decode(errors="ignore")
if "Number of atoms" in line:
natoms = int(line.split()[-1])
elif "Atomic numbers" in line and "I" in line:
n = int(line.split()[-1])
atom_numbers = []
while len(atom_numbers) < n:
next_line = next(fp)
if isinstance(next_line, bytes):
next_line = next_line.decode(errors="ignore")
atom_numbers += [int(x) for x in next_line.split()]
elif "Current cartesian coordinates" in line and "R" in line:
n = int(line.split()[-1])
coords_raw = []
while len(coords_raw) < n:
next_line = next(fp)
if isinstance(next_line, bytes):
next_line = next_line.decode(errors="ignore")
coords_raw += [float(x) for x in next_line.split()]
coords = np.array(coords_raw).reshape(-1, 3) * length_convert
coords_t.append(coords)
elif "Total Energy" in line:
energy = float(line.split()[-1]) * energy_convert
energy_t.append(energy)
elif "Cartesian Gradient" in line:
Comment on lines +123 to +126
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Fix fragile “Total Energy” parsing to handle fchk scalar on next line (N= 1).

Many fchk files put scalar values on the next line when the header includes “N= 1”. Current code can read “1” instead of the energy. Parse robustly as below.

-            elif "Total Energy" in line:
-                energy = float(line.split()[-1]) * energy_convert
-                energy_t.append(energy)
+            elif "Total Energy" in line:
+                # fchk may have scalar on the same line or next line when "N= 1" appears
+                if "N=" in line:
+                    next_line = next(fp)
+                    if isinstance(next_line, bytes):
+                        next_line = next_line.decode(errors="ignore")
+                    energy_val = float(next_line.split()[0])
+                else:
+                    energy_val = float(line.split()[-1])
+                energy_t.append(energy_val * energy_convert)
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
elif "Total Energy" in line:
energy = float(line.split()[-1]) * energy_convert
energy_t.append(energy)
elif "Cartesian Gradient" in line:
elif "Total Energy" in line:
# fchk may have scalar on the same line or next line when "N= 1" appears
if "N=" in line:
next_line = next(fp)
if isinstance(next_line, bytes):
next_line = next_line.decode(errors="ignore")
energy_val = float(next_line.split()[0])
else:
energy_val = float(line.split()[-1])
energy_t.append(energy_val * energy_convert)
elif "Cartesian Gradient" in line:

n = int(line.split()[-1])
forces_raw = []
while len(forces_raw) < n:
next_line = next(fp)
if isinstance(next_line, bytes):
next_line = next_line.decode(errors="ignore")
forces_raw += [float(x) for x in next_line.split()]
# Cartesian Gradient is the negative of forces: F = -∇E
forces = -np.array(forces_raw).reshape(-1, 3) * force_convert
forces_t.append(forces)
elif "Cartesian Force Constants" in line and "R" in line:
n = int(line.split()[-1])
hessian_raw = []
while len(hessian_raw) < n:
next_line = next(fp)
if isinstance(next_line, bytes):
next_line = next_line.decode(errors="ignore")
hessian_raw += [float(x) for x in next_line.split()]
hessian_full = (
create_full_hessian(hessian_raw, natoms) * hessian_convert
)
# store as (natoms, 3, natoms, 3) to align with registered shape
hessian_t.append(hessian_full.reshape(natoms, 3, natoms, 3))
# Assert key data
assert coords_t, "cannot find coords"
assert energy_t, "cannot find energy"
if has_forces:
assert forces_t, "cannot find forces"
if has_hessian:
assert hessian_t, "cannot find hessian"
# Assemble data
atom_symbols = [ELEMENTS[z - 1] for z in atom_numbers]
atom_names, atom_types, atom_numbs = np.unique(
atom_symbols, return_inverse=True, return_counts=True
)
data["atom_names"] = list(atom_names)
data["atom_numbs"] = list(atom_numbs)
data["atom_types"] = atom_types
data["coords"] = np.array(coords_t).reshape(-1, natoms, 3)
data["orig"] = np.zeros(3)
data["cells"] = np.array([np.eye(3) * 100])
data["nopbc"] = True
if energy_t:
data["energies"] = np.array(energy_t)
if has_forces and forces_t:
data["forces"] = np.array(forces_t)
if has_hessian and hessian_t:
data["hessian"] = np.array(hessian_t)
return data
31 changes: 31 additions & 0 deletions dpdata/plugins/gaussian.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,12 @@
import tempfile
from typing import TYPE_CHECKING

import numpy as np

import dpdata.gaussian.fchk
import dpdata.gaussian.gjf
import dpdata.gaussian.log
from dpdata.data_type import Axis, DataType
from dpdata.driver import Driver
from dpdata.format import Format
from dpdata.utils import open_file
Expand All @@ -15,6 +19,18 @@
from dpdata.utils import FileType


def register_hessian_data(data):
if "hessian" in data:
dt = DataType(
"hessian",
np.ndarray,
(Axis.NFRAMES, Axis.NATOMS, 3, Axis.NATOMS, 3),
required=False,
deepmd_name="hessian",
)
dpdata.LabeledSystem.register_data_type(dt)


@Format.register("gaussian/log")
class GaussianLogFormat(Format):
def from_labeled_system(self, file_name: FileType, md=False, **kwargs):
Expand All @@ -24,6 +40,21 @@ def from_labeled_system(self, file_name: FileType, md=False, **kwargs):
return {"energies": [], "forces": [], "nopbc": True}


@Format.register("gaussian/fchk")
class GaussianFChkFormat(Format):
def from_labeled_system(
self, file_name: FileType, has_forces=True, has_hessian=True, **kwargs
):
try:
data = dpdata.gaussian.fchk.to_system_data(
file_name, has_forces=has_forces, has_hessian=has_hessian
)
register_hessian_data(data)
return data
except AssertionError:
return {"energies": [], "forces": [], "hessian": [], "nopbc": True}


@Format.register("gaussian/md")
class GaussianMDFormat(Format):
def from_labeled_system(self, file_name: FileType, **kwargs):
Expand Down
28 changes: 28 additions & 0 deletions dpdata/unit.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,34 @@ def __init__(self, unitA, unitB):
self.set_value(econv / lconv)


class HessianConversion(Conversion):
def __init__(self, unitA, unitB):
"""Class for Hessian (second derivative) unit conversion.

Parameters
----------
unitA, unitB : str
in format of "energy_unit/length_unit^2"

Examples
--------
>>> conv = HessianConversion("hartree/bohr^2", "eV/angstrom^2")
>>> conv.value()
97.1736242922823
"""
super().__init__(unitA, unitB, check=False)
eunitA, lunitA = self._split_unit(unitA)
eunitB, lunitB = self._split_unit(unitB)
econv = EnergyConversion(eunitA, eunitB).value()
lconv = LengthConversion(lunitA, lunitB).value()
self.set_value(econv / lconv**2)

def _split_unit(self, unit):
eunit = unit.split("/")[0]
lunit = unit.split("/")[1][:-2]
return eunit, lunit


class PressureConversion(Conversion):
def __init__(self, unitA, unitB):
"""Class for pressure conversion.
Expand Down
Loading