deepmodeling · wanghan-iapcm · Sep 17, 2025 · Jul 30, 2025 · Jul 31, 2025 · Sep 16, 2025
diff --git a/dpdata/gaussian/fchk.py b/dpdata/gaussian/fchk.py
@@ -0,0 +1,175 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import numpy as np
+
+from dpdata.utils import open_file
+
+if TYPE_CHECKING:
+    from dpdata.utils import FileType
+
+from ..periodic_table import ELEMENTS
+from ..unit import (
+    EnergyConversion,
+    ForceConversion,
+    HessianConversion,
+    LengthConversion,
+)
+
+length_convert = LengthConversion("bohr", "angstrom").value()
+energy_convert = EnergyConversion("hartree", "eV").value()
+force_convert = ForceConversion("hartree/bohr", "eV/angstrom").value()
+hessian_convert = HessianConversion("hartree/bohr^2", "eV/angstrom^2").value()
+
+
+def create_full_hessian(hessian_raw: list | np.ndarray, natoms: int) -> np.ndarray:
+    """
+    Reconstructs the full, symmetric Hessian matrix from a 1D array
+    containing its lower triangular elements.
+
+    Args:
+        hessian_raw (list | np.ndarray): A 1D list or NumPy array containing the
+                                         lower triangular elements (including the
+                                         diagonal) of the Hessian matrix.
+        natoms (int): The number of atoms in the system.
+
+    Returns
+    -------
+    np.ndarray: A full, symmetric (3*natoms, 3*natoms) Hessian matrix.
+
+    Raises
+    ------
+    ValueError: If the number of elements in `hessian_raw` does not match
+        the expected number for the lower triangle of a
+        (3*natoms, 3*natoms) matrix.
+    """
+    # Convert input to a NumPy array in case it's a list
+    hessian_block = np.array(hessian_raw)
+
+    # Calculate the dimension of the final matrix
+    dim = 3 * natoms
+
+    # Validate that the input data has the correct length
+    # A lower triangle of an n x n matrix has n*(n+1)/2 elements
+    expected_length = dim * (dim + 1) // 2
+    if hessian_block.size != expected_length:
+        raise ValueError(
+            f"Input length {hessian_block.size} != expected {expected_length}"
+        )
+
+    # Create a zero matrix, then fill the lower triangle
+    hessian_full = np.zeros((dim, dim), dtype=hessian_block.dtype)
+    lower_triangle_indices = np.tril_indices(dim)
+    hessian_full[lower_triangle_indices] = hessian_block
+
+    # This is done by copying the lower triangle to the upper triangle
+    # M_full = M_lower + M_lower.T - diag(M_lower)
+    hessian_full = hessian_full + hessian_full.T - np.diag(np.diag(hessian_full))
+
+    return hessian_full
+
+
+def to_system_data(file_name: FileType, has_forces=True, has_hessian=True):
+    """Read Gaussian fchk file.
+
+    Parameters
+    ----------
+    file_name : str
+        file name
+    has_forces : bool, default True
+        whether to read force
+        Note: Cartesian Gradient in fchk file is converted to forces by taking negative sign
+    has_hessian : bool, default True
+        whether to read hessian
+
-def to_system_data(file_name: FileType, has_forces=True, has_hessian=True):
-    """Read Gaussian fchk file.
-
-    Parameters
-    ----------
-    file_name : str
-        file name
-    has_forces : bool, default True
-        whether to read force
-        Note: Cartesian Gradient in fchk file is converted to forces by taking negative sign
-    has_hessian : bool, default True
-        whether to read hessian
+def to_system_data(file_name: FileType, has_forces=False, has_hessian=False):
+    """Read Gaussian fchk file.
+
+    Parameters
+    ----------
+    file_name : str
+        file name
+    has_forces : bool, default False
+        whether to read force
+        Note: Cartesian Gradient in fchk file is converted to forces by taking negative sign
+    has_hessian : bool, default False
+        whether to read hessian
+
-def to_system_data(file_name: FileType, has_forces=True, has_hessian=True):
-    """Read Gaussian fchk file.
-
-    Parameters
-    ----------
-    file_name : str
-        file name
-    has_forces : bool, default True
-        whether to read force
-        Note: Cartesian Gradient in fchk file is converted to forces by taking negative sign
-    has_hessian : bool, default True
-        whether to read hessian
+def to_system_data(file_name: FileType, has_forces=False, has_hessian=False):
+    """Read Gaussian fchk file.
+
+    Parameters
+    ----------
+    file_name : str
+        file name
+    has_forces : bool, default False
+        whether to read force
+        Note: Cartesian Gradient in fchk file is converted to forces by taking negative sign
+    has_hessian : bool, default False
+        whether to read hessian
+
+    Returns
+    -------
+    data : dict
+        system data, including hessian if has_hessian is True
+    """
+    data = {}
+    natoms = 0
+    atom_numbers = []
+    coords_t = []
+    energy_t = []
+    forces_t = []
+    hessian_t = []
+    # Read fchk file
+    with open_file(file_name) as fp:
+        for line in fp:
+            if isinstance(line, bytes):
+                line = line.decode(errors="ignore")
+            if "Number of atoms" in line:
+                natoms = int(line.split()[-1])
+            elif "Atomic numbers" in line and "I" in line:
+                n = int(line.split()[-1])
+                atom_numbers = []
+                while len(atom_numbers) < n:
+                    next_line = next(fp)
+                    if isinstance(next_line, bytes):
+                        next_line = next_line.decode(errors="ignore")
+                    atom_numbers += [int(x) for x in next_line.split()]
+            elif "Current cartesian coordinates" in line and "R" in line:
+                n = int(line.split()[-1])
+                coords_raw = []
+                while len(coords_raw) < n:
+                    next_line = next(fp)
+                    if isinstance(next_line, bytes):
+                        next_line = next_line.decode(errors="ignore")
+                    coords_raw += [float(x) for x in next_line.split()]
+                coords = np.array(coords_raw).reshape(-1, 3) * length_convert
+                coords_t.append(coords)
+            elif "Total Energy" in line:
+                energy = float(line.split()[-1]) * energy_convert
+                energy_t.append(energy)
+            elif "Cartesian Gradient" in line:
-            elif "Total Energy" in line:
-                energy = float(line.split()[-1]) * energy_convert
-                energy_t.append(energy)
-            elif "Cartesian Gradient" in line:
+            elif "Total Energy" in line:
+                # fchk may have scalar on the same line or next line when "N= 1" appears
+                if "N=" in line:
+                    next_line = next(fp)
+                    if isinstance(next_line, bytes):
+                        next_line = next_line.decode(errors="ignore")
+                    energy_val = float(next_line.split()[0])
+                else:
+                    energy_val = float(line.split()[-1])
+                energy_t.append(energy_val * energy_convert)
+            elif "Cartesian Gradient" in line:
-            elif "Total Energy" in line:
-                energy = float(line.split()[-1]) * energy_convert
-                energy_t.append(energy)
-            elif "Cartesian Gradient" in line:
+            elif "Total Energy" in line:
+                # fchk may have scalar on the same line or next line when "N= 1" appears
+                if "N=" in line:
+                    next_line = next(fp)
+                    if isinstance(next_line, bytes):
+                        next_line = next_line.decode(errors="ignore")
+                    energy_val = float(next_line.split()[0])
+                else:
+                    energy_val = float(line.split()[-1])
+                energy_t.append(energy_val * energy_convert)
+            elif "Cartesian Gradient" in line:
+                n = int(line.split()[-1])
+                forces_raw = []
+                while len(forces_raw) < n:
+                    next_line = next(fp)
+                    if isinstance(next_line, bytes):
+                        next_line = next_line.decode(errors="ignore")
+                    forces_raw += [float(x) for x in next_line.split()]
+                # Cartesian Gradient is the negative of forces: F = -∇E
+                forces = -np.array(forces_raw).reshape(-1, 3) * force_convert
+                forces_t.append(forces)
+            elif "Cartesian Force Constants" in line and "R" in line:
+                n = int(line.split()[-1])
+                hessian_raw = []
+                while len(hessian_raw) < n:
+                    next_line = next(fp)
+                    if isinstance(next_line, bytes):
+                        next_line = next_line.decode(errors="ignore")
+                    hessian_raw += [float(x) for x in next_line.split()]
+                hessian_full = (
+                    create_full_hessian(hessian_raw, natoms) * hessian_convert
+                )
+                # store as (natoms, 3, natoms, 3) to align with registered shape
+                hessian_t.append(hessian_full.reshape(natoms, 3, natoms, 3))
+    # Assert key data
+    assert coords_t, "cannot find coords"
+    assert energy_t, "cannot find energy"
+    if has_forces:
+        assert forces_t, "cannot find forces"
+    if has_hessian:
+        assert hessian_t, "cannot find hessian"
+    # Assemble data
+    atom_symbols = [ELEMENTS[z - 1] for z in atom_numbers]
+    atom_names, atom_types, atom_numbs = np.unique(
+        atom_symbols, return_inverse=True, return_counts=True
+    )
+    data["atom_names"] = list(atom_names)
+    data["atom_numbs"] = list(atom_numbs)
+    data["atom_types"] = atom_types
+    data["coords"] = np.array(coords_t).reshape(-1, natoms, 3)
+    data["orig"] = np.zeros(3)
+    data["cells"] = np.array([np.eye(3) * 100])
+    data["nopbc"] = True
+    if energy_t:
+        data["energies"] = np.array(energy_t)
+    if has_forces and forces_t:
+        data["forces"] = np.array(forces_t)
+    if has_hessian and hessian_t:
+        data["hessian"] = np.array(hessian_t)
+    return data
diff --git a/dpdata/plugins/gaussian.py b/dpdata/plugins/gaussian.py
@@ -5,8 +5,12 @@
 import tempfile
 from typing import TYPE_CHECKING
 
+import numpy as np
+
+import dpdata.gaussian.fchk
 import dpdata.gaussian.gjf
 import dpdata.gaussian.log
+from dpdata.data_type import Axis, DataType
 from dpdata.driver import Driver
 from dpdata.format import Format
 from dpdata.utils import open_file
@@ -15,6 +19,18 @@
     from dpdata.utils import FileType
 
 
+def register_hessian_data(data):
+    if "hessian" in data:
+        dt = DataType(
+            "hessian",
+            np.ndarray,
+            (Axis.NFRAMES, Axis.NATOMS, 3, Axis.NATOMS, 3),
+            required=False,
+            deepmd_name="hessian",
+        )
+        dpdata.LabeledSystem.register_data_type(dt)
+
+
 @Format.register("gaussian/log")
 class GaussianLogFormat(Format):
     def from_labeled_system(self, file_name: FileType, md=False, **kwargs):
@@ -24,6 +40,21 @@ def from_labeled_system(self, file_name: FileType, md=False, **kwargs):
             return {"energies": [], "forces": [], "nopbc": True}
 
 
+@Format.register("gaussian/fchk")
+class GaussianFChkFormat(Format):
+    def from_labeled_system(
+        self, file_name: FileType, has_forces=True, has_hessian=True, **kwargs
+    ):
+        try:
+            data = dpdata.gaussian.fchk.to_system_data(
+                file_name, has_forces=has_forces, has_hessian=has_hessian
+            )
+            register_hessian_data(data)
+            return data
+        except AssertionError:
+            return {"energies": [], "forces": [], "hessian": [], "nopbc": True}
+
+
 @Format.register("gaussian/md")
 class GaussianMDFormat(Format):
     def from_labeled_system(self, file_name: FileType, **kwargs):

diff --git a/dpdata/unit.py b/dpdata/unit.py
@@ -152,6 +152,34 @@ def __init__(self, unitA, unitB):
         self.set_value(econv / lconv)
 
 
+class HessianConversion(Conversion):
+    def __init__(self, unitA, unitB):
+        """Class for Hessian (second derivative) unit conversion.
+
+        Parameters
+        ----------
+        unitA, unitB : str
+            in format of "energy_unit/length_unit^2"
+
+        Examples
+        --------
+        >>> conv = HessianConversion("hartree/bohr^2", "eV/angstrom^2")
+        >>> conv.value()
+        97.1736242922823
+        """
+        super().__init__(unitA, unitB, check=False)
+        eunitA, lunitA = self._split_unit(unitA)
+        eunitB, lunitB = self._split_unit(unitB)
+        econv = EnergyConversion(eunitA, eunitB).value()
+        lconv = LengthConversion(lunitA, lunitB).value()
+        self.set_value(econv / lconv**2)
+
+    def _split_unit(self, unit):
+        eunit = unit.split("/")[0]
+        lunit = unit.split("/")[1][:-2]
+        return eunit, lunit
+
+
 class PressureConversion(Conversion):
     def __init__(self, unitA, unitB):
         """Class for pressure conversion.