Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions dpdata/deepmd/comp.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import glob
import os
import shutil
import warnings

import numpy as np

import dpdata

from .raw import load_type


Expand Down Expand Up @@ -60,6 +63,40 @@ def to_system_data(folder, type_map=None, labels=True):
data["forces"] = np.concatenate(all_forces, axis=0)
if len(all_virs) > 0:
data["virials"] = np.concatenate(all_virs, axis=0)
# allow custom dtypes
if labels:
for dtype in dpdata.system.LabeledSystem.DTYPES:
if dtype.name in (
"atom_numbs",
"atom_names",
"atom_types",
"orig",
"cells",
"coords",
"real_atom_types",
"real_atom_names",
"nopbc",
"energies",
"forces",
"virials",
):
# skip as these data contains specific rules
continue
if not (len(dtype.shape) and dtype.shape[0] == dpdata.system.Axis.NFRAMES):
warnings.warn(
f"Shape of {dtype.name} is not (nframes, ...), but {dtype.shape}. This type of data will not converted from deepmd/npy format."
)
continue
shape = [
-1 if xx == dpdata.system.Axis.NATOMS else xx for xx in dtype.shape[1:]
]
all_data = []
for ii in sets:
tmp = _cond_load_data(os.path.join(ii, dtype.name + ".npy"))
if tmp is not None:
all_data.append(np.reshape(tmp, [tmp.shape[0], *shape]))
if len(all_data) > 0:
data[dtype.name] = np.concatenate(all_data, axis=0)
return data


Expand Down Expand Up @@ -131,3 +168,34 @@ def dump(folder, data, set_size=5000, comp_prec=np.float32, remove_sets=True):
if data.get("nopbc", False):
with open(os.path.join(folder, "nopbc"), "w") as fw_nopbc:
pass
# allow custom dtypes
for dtype in dpdata.system.LabeledSystem.DTYPES:
if dtype.name in (
"atom_numbs",
"atom_names",
"atom_types",
"orig",
"cells",
"coords",
"real_atom_types",
"real_atom_names",
"nopbc",
"energies",
"forces",
"virials",
):
# skip as these data contains specific rules
continue
if dtype.name not in data:
continue
if not (len(dtype.shape) and dtype.shape[0] == dpdata.system.Axis.NFRAMES):
warnings.warn(
f"Shape of {dtype.name} is not (nframes, ...), but {dtype.shape}. This type of data will not converted to deepmd/npy format."
)
continue
ddata = np.reshape(data[dtype.name], [nframes, -1]).astype(comp_prec)
for ii in range(nsets):
set_stt = ii * set_size
set_end = (ii + 1) * set_size
set_folder = os.path.join(folder, "set.%03d" % ii)
np.save(os.path.join(set_folder, dtype.name), ddata[set_stt:set_end])
64 changes: 64 additions & 0 deletions dpdata/deepmd/hdf5.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
"""Utils for deepmd/hdf5 format."""
import warnings
from typing import Optional, Union

import h5py
import numpy as np
from wcmatch.glob import globfilter

import dpdata

__all__ = ["to_system_data", "dump"]


Expand Down Expand Up @@ -92,6 +95,36 @@ def to_system_data(
"required": False,
},
}
# allow custom dtypes
for dtype in dpdata.system.LabeledSystem.DTYPES:
if dtype.name in (
"atom_numbs",
"atom_names",
"atom_types",
"orig",
"cells",
"coords",
"real_atom_types",
"real_atom_names",
"nopbc",
"energies",
"forces",
"virials",
):
# skip as these data contains specific rules
continue
if not (len(dtype.shape) and dtype.shape[0] == dpdata.system.Axis.NFRAMES):
warnings.warn(
f"Shape of {dtype.name} is not (nframes, ...), but {dtype.shape}. This type of data will not converted from deepmd/hdf5 format."
)
continue

data_types[dtype.name] = {
"fn": dtype.name,
"labeled": True,
"shape": dtype.shape[1:],
"required": False,
}

for dt, prop in data_types.items():
all_data = []
Expand Down Expand Up @@ -167,6 +200,37 @@ def dump(
"forces": {"fn": "force", "shape": (nframes, -1), "dump": True},
"virials": {"fn": "virial", "shape": (nframes, 9), "dump": True},
}

# allow custom dtypes
for dtype in dpdata.system.LabeledSystem.DTYPES:
if dtype.name in (
"atom_numbs",
"atom_names",
"atom_types",
"orig",
"cells",
"coords",
"real_atom_types",
"real_atom_names",
"nopbc",
"energies",
"forces",
"virials",
):
# skip as these data contains specific rules
continue
if not (len(dtype.shape) and dtype.shape[0] == dpdata.system.Axis.NFRAMES):
warnings.warn(
f"Shape of {dtype.name} is not (nframes, ...), but {dtype.shape}. This type of data will not converted to deepmd/hdf5 format."
)
continue

data_types[dtype.name] = {
"fn": dtype.name,
"shape": (nframes, -1),
"dump": True,
}

for dt, prop in data_types.items():
if dt in data:
if prop["dump"]:
Expand Down
65 changes: 65 additions & 0 deletions dpdata/deepmd/raw.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import os
import warnings

import numpy as np

import dpdata


def load_type(folder, type_map=None):
data = {}
Expand Down Expand Up @@ -57,6 +60,41 @@ def to_system_data(folder, type_map=None, labels=True):
data["virials"] = np.reshape(data["virials"], [nframes, 3, 3])
if os.path.isfile(os.path.join(folder, "nopbc")):
data["nopbc"] = True
# allow custom dtypes
if labels:
for dtype in dpdata.system.LabeledSystem.DTYPES:
if dtype.name in (
"atom_numbs",
"atom_names",
"atom_types",
"orig",
"cells",
"coords",
"real_atom_types",
"real_atom_names",
"nopbc",
"energies",
"forces",
"virials",
):
# skip as these data contains specific rules
continue
if not (
len(dtype.shape) and dtype.shape[0] == dpdata.system.Axis.NFRAMES
):
warnings.warn(
f"Shape of {dtype.name} is not (nframes, ...), but {dtype.shape}. This type of data will not converted from deepmd/raw format."
)
continue
shape = [
-1 if xx == dpdata.system.Axis.NATOMS else xx
for xx in dtype.shape[1:]
]
if os.path.exists(os.path.join(folder, f"{dtype.name}.raw")):
data[dtype.name] = np.reshape(
np.loadtxt(os.path.join(folder, f"{dtype.name}.raw")),
[nframes, *shape],
)
return data
else:
raise RuntimeError("not dir " + folder)
Expand Down Expand Up @@ -102,3 +140,30 @@ def dump(folder, data):
if data.get("nopbc", False):
with open(os.path.join(folder, "nopbc"), "w") as fw_nopbc:
pass
# allow custom dtypes
for dtype in dpdata.system.LabeledSystem.DTYPES:
if dtype.name in (
"atom_numbs",
"atom_names",
"atom_types",
"orig",
"cells",
"coords",
"real_atom_types",
"real_atom_names",
"nopbc",
"energies",
"forces",
"virials",
):
# skip as these data contains specific rules
continue
if dtype.name not in data:
continue
if not (len(dtype.shape) and dtype.shape[0] == dpdata.system.Axis.NFRAMES):
warnings.warn(
f"Shape of {dtype.name} is not (nframes, ...), but {dtype.shape}. This type of data will not converted to deepmd/raw format."
)
continue
ddata = np.reshape(data[dtype.name], [nframes, -1])
np.savetxt(os.path.join(folder, f"{dtype.name}.raw"), ddata)
53 changes: 53 additions & 0 deletions tests/test_custom_data_type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import unittest

import h5py
import numpy as np

import dpdata
from dpdata.system import Axis, DataType


class TestDeepmdLoadDumpComp(unittest.TestCase):
def setUp(self):
self.backup = dpdata.system.LabeledSystem.DTYPES
dpdata.system.LabeledSystem.DTYPES = dpdata.system.LabeledSystem.DTYPES + (
DataType("foo", np.ndarray, (Axis.NFRAMES, 2, 4), required=False),
)
self.system = dpdata.LabeledSystem("poscars/OUTCAR.h2o.md", fmt="vasp/outcar")
self.foo = np.ones((len(self.system), 2, 4))
self.system.data["foo"] = self.foo
self.system.check_data()

def tearDown(self) -> None:
dpdata.system.LabeledSystem.DTYPES = self.backup

def test_to_deepmd_raw(self):
self.system.to_deepmd_raw("data_foo")
foo = np.loadtxt("data_foo/foo.raw")
np.testing.assert_allclose(foo.reshape(self.foo.shape), self.foo)

def test_from_deepmd_raw(self):
self.system.to_deepmd_raw("data_foo")
x = dpdata.LabeledSystem("data_foo", fmt="deepmd/raw")
np.testing.assert_allclose(x.data["foo"], self.foo)

def test_to_deepmd_npy(self):
self.system.to_deepmd_npy("data_foo")
foo = np.load("data_foo/set.000/foo.npy")
np.testing.assert_allclose(foo.reshape(self.foo.shape), self.foo)

def test_from_deepmd_npy(self):
self.system.to_deepmd_npy("data_foo")
x = dpdata.LabeledSystem("data_foo", fmt="deepmd/npy")
np.testing.assert_allclose(x.data["foo"], self.foo)

def test_to_deepmd_hdf5(self):
self.system.to_deepmd_hdf5("data_foo.h5")
with h5py.File("data_foo.h5") as f:
foo = f["set.000/foo.npy"][:]
np.testing.assert_allclose(foo.reshape(self.foo.shape), self.foo)

def test_from_deepmd_hdf5(self):
self.system.to_deepmd_hdf5("data_foo.h5")
x = dpdata.LabeledSystem("data_foo.h5", fmt="deepmd/hdf5")
np.testing.assert_allclose(x.data["foo"], self.foo)