diff --git a/docs/changes/newsfragments/7725.improved b/docs/changes/newsfragments/7725.improved new file mode 100644 index 00000000000..81bdc30caa7 --- /dev/null +++ b/docs/changes/newsfragments/7725.improved @@ -0,0 +1,6 @@ +Parameters using ``has_control_of`` are now correctly handled when exporting to +xarray. Controlled parameters are no longer treated as independent top-level +parameters, preventing duplicate data rows. Additionally, inferred parameters +are now included as data variables in the xarray dataset when exporting via the +pandas-based path, and a warning is logged when the inferred parameter data size +does not match its parent parameter. diff --git a/src/qcodes/dataset/descriptions/dependencies.py b/src/qcodes/dataset/descriptions/dependencies.py index 3ec82477dc0..43c374c5703 100644 --- a/src/qcodes/dataset/descriptions/dependencies.py +++ b/src/qcodes/dataset/descriptions/dependencies.py @@ -287,6 +287,16 @@ def top_level_parameters(self) -> tuple[ParamSpecBase, ...]: for node_id, in_degree in self._dependency_subgraph.in_degree if in_degree == 0 } + # Parameters that are inferred from other parameters (have outgoing + # edges in the inference subgraph) should not be independent top-level + # parameters, since their data is part of the tree of the parameter + # they are inferred from. + parameters_inferred_from_others = { + self._node_to_paramspec(node_id) + for node_id, out_degree in self._inference_subgraph.out_degree + if out_degree > 0 + } + dependency_top_level = dependency_top_level - parameters_inferred_from_others standalone_top_level = { self._node_to_paramspec(node_id) for node_id, degree in self._graph.degree diff --git a/src/qcodes/dataset/exporters/export_to_xarray.py b/src/qcodes/dataset/exporters/export_to_xarray.py index 03a79b51b39..8854b49749c 100644 --- a/src/qcodes/dataset/exporters/export_to_xarray.py +++ b/src/qcodes/dataset/exporters/export_to_xarray.py @@ -6,6 +6,7 @@ from math import prod from typing import TYPE_CHECKING, Literal +import numpy as np from packaging import version as p_version from qcodes.dataset.linked_datasets.links import links_to_str @@ -61,6 +62,82 @@ def _calculate_index_shape(idx: pd.Index | pd.MultiIndex) -> dict[Hashable, int] return expanded_shape +def _add_inferred_data_vars( + dataset: DataSetProtocol, + name: str, + sub_dict: Mapping[str, npt.NDArray], + xr_dataset: xr.Dataset, +) -> xr.Dataset: + """Add inferred parameters as data variables to an xarray dataset. + + Parameters that are inferred from the top-level measurement parameter + and present in sub_dict but not yet in the dataset are added as data + variables along the existing dimensions. + """ + + interdeps = dataset.description.interdeps + meas_paramspec = interdeps.graph.nodes[name]["value"] + _, deps, inferred = interdeps.all_parameters_in_tree_by_group(meas_paramspec) + + dep_names = {dep.name for dep in deps} + dims = tuple(d for d in xr_dataset.dims) + + for inf in inferred: + if inf.name in dep_names: + continue + if inf.name in xr_dataset: + continue + if inf.name not in sub_dict: + continue + + inf_data = sub_dict[inf.name] + if inf_data.dtype == np.dtype("O"): + try: + flat = np.concatenate(inf_data) + except ValueError: + flat = inf_data.ravel() + else: + flat = inf_data.ravel() + + # Only add if the data has the same size as one of the parameters + # it is inferred from. A parameter may be inferred from multiple + # parents so we iterate over all of them. + inferred_from_params = interdeps.inferences.get(inf) + if not inferred_from_params: + continue + + matched_parent = False + for parent in inferred_from_params: + if parent.name not in sub_dict: + continue + expected_size = sub_dict[parent.name].ravel().shape[0] + if flat.shape[0] == expected_size: + xr_dataset[inf.name] = ( + dims, + flat.reshape(tuple(xr_dataset.sizes[d] for d in dims)), + ) + matched_parent = True + break + + if not matched_parent: + available_parents = [ + p.name for p in inferred_from_params if p.name in sub_dict + ] + _LOG.warning( + "Cannot add inferred parameter '%s' to xarray dataset for '%s' " + "(run_id=%s): data size %d does not match any of its parent " + "parameters %s. This is likely a user error in the measurement " + "setup.", + inf.name, + name, + dataset.run_id, + flat.shape[0], + available_parents, + ) + + return xr_dataset + + def _load_to_xarray_dataset_dict_no_metadata( dataset: DataSetProtocol, datadict: Mapping[str, Mapping[str, npt.NDArray]], @@ -100,7 +177,9 @@ def _load_to_xarray_dataset_dict_no_metadata( interdeps=dataset.description.interdeps, dependent_parameter=name, ).to_xarray() - xr_dataset_dict[name] = xr_dataset + xr_dataset_dict[name] = _add_inferred_data_vars( + dataset, name, sub_dict, xr_dataset + ) elif index_is_unique: df = _data_to_dataframe( sub_dict, @@ -108,9 +187,12 @@ def _load_to_xarray_dataset_dict_no_metadata( interdeps=dataset.description.interdeps, dependent_parameter=name, ) - xr_dataset_dict[name] = _xarray_data_set_from_pandas_multi_index( + xr_dataset = _xarray_data_set_from_pandas_multi_index( dataset, use_multi_index, name, df, index ) + xr_dataset_dict[name] = _add_inferred_data_vars( + dataset, name, sub_dict, xr_dataset + ) else: df = _data_to_dataframe( sub_dict, @@ -118,7 +200,10 @@ def _load_to_xarray_dataset_dict_no_metadata( interdeps=dataset.description.interdeps, dependent_parameter=name, ) - xr_dataset_dict[name] = df.reset_index().to_xarray() + xr_dataset = df.reset_index().to_xarray() + xr_dataset_dict[name] = _add_inferred_data_vars( + dataset, name, sub_dict, xr_dataset + ) return xr_dataset_dict diff --git a/tests/dataset/test_parameter_with_setpoints_has_control.py b/tests/dataset/test_parameter_with_setpoints_has_control.py new file mode 100644 index 00000000000..ebb41d15da0 --- /dev/null +++ b/tests/dataset/test_parameter_with_setpoints_has_control.py @@ -0,0 +1,193 @@ +import logging +from typing import TYPE_CHECKING + +import numpy as np +import numpy.testing as npt +import xarray as xr + +from qcodes.dataset import Measurement +from qcodes.dataset.exporters.export_to_xarray import _add_inferred_data_vars +from qcodes.parameters import ManualParameter, ParameterWithSetpoints +from qcodes.validators import Arrays + +if TYPE_CHECKING: + import pytest + + from qcodes.dataset.experiment_container import Experiment + + +def test_parameter_with_setpoints_has_control(experiment: "Experiment"): + class MySp(ParameterWithSetpoints): + def unpack_self(self, value): + res = super().unpack_self(value) + res.append((p1, p1())) + return res + + mp_data = np.arange(10) + p1_data = np.linspace(-1, 1, 10) + + mp = ManualParameter("mp", vals=Arrays(shape=(10,)), initial_value=mp_data) + p1 = ParameterWithSetpoints( + "p1", vals=Arrays(shape=(10,)), setpoints=(mp,), set_cmd=None + ) + p2 = MySp("p2", vals=Arrays(shape=(10,)), setpoints=(mp,), set_cmd=None) + p2.has_control_of.add(p1) + + p1(p1_data) + p2_data = np.random.randn(10) + p2(p2_data) + + meas = Measurement() + meas.register_parameter(p2) + + # Only p2 should be top-level; p1 is inferred from p2 + interdeps = meas._interdeps + top_level_names = [p.name for p in interdeps.top_level_parameters] + assert top_level_names == ["p2"] + + with meas.run() as ds: + ds.add_result((p2, p2())) + + # Verify raw parameter data has exactly one row per parameter + raw_data = ds.dataset.get_parameter_data() + assert list(raw_data.keys()) == ["p2"], "Only p2 should be a top-level result" + for name, arr in raw_data["p2"].items(): + assert arr.shape == (1, 10), ( + f"Expected shape (1, 10) for {name}, got {arr.shape}" + ) + + xds = ds.dataset.to_xarray_dataset() + + # mp should be the only dimension (not a generic 'index') + assert list(xds.sizes.keys()) == ["mp"] + assert xds.sizes["mp"] == 10 + + # mp values used as coordinate axis + npt.assert_array_equal(xds.coords["mp"].values, mp_data) + + # p2 is the primary data variable with correct values + assert "p2" in xds.data_vars + npt.assert_array_almost_equal(xds["p2"].values, p2_data) + + # p1 is included as a data variable (inferred from p2) with correct values + assert "p1" in xds.data_vars + npt.assert_array_almost_equal(xds["p1"].values, p1_data) + + # p1 data is also retrievable from the raw parameter data + npt.assert_array_almost_equal(raw_data["p2"]["p1"].ravel(), p1_data) + + +def test_parameter_with_setpoints_has_control_2d(experiment: "Experiment"): + """Test that an inferred parameter with the same size as its parent + but different from the full dimension product is correctly included.""" + + class MySp(ParameterWithSetpoints): + def unpack_self(self, value): + res = super().unpack_self(value) + res.append((p1, p1())) + return res + + n_x = 3 + n_y = 4 + mp_x_data = np.arange(n_x, dtype=float) + mp_y_data = np.arange(n_y, dtype=float) + + mp_x = ManualParameter("mp_x", initial_value=0.0) + mp_y = ManualParameter("mp_y", vals=Arrays(shape=(n_y,)), initial_value=mp_y_data) + + p1 = ParameterWithSetpoints( + "p1", vals=Arrays(shape=(n_y,)), setpoints=(mp_y,), set_cmd=None + ) + p2 = MySp("p2", vals=Arrays(shape=(n_y,)), setpoints=(mp_y,), set_cmd=None) + p2.has_control_of.add(p1) + + meas = Measurement() + meas.register_parameter(p2, setpoints=(mp_x,)) + + p1_all = [] + p2_all = [] + + with meas.run() as ds: + for x_val in mp_x_data: + mp_x(x_val) + p1_row = np.linspace(-1, 1, n_y) + x_val + p1(p1_row) + p2_row = np.random.randn(n_y) + p2(p2_row) + p1_all.append(p1_row) + p2_all.append(p2_row) + ds.add_result((mp_x, mp_x()), (p2, p2())) + + p1_all_arr = np.array(p1_all) + p2_all_arr = np.array(p2_all) + + xds = ds.dataset.to_xarray_dataset() + + # Should have 2 dimensions: mp_x and mp_y + assert set(xds.sizes.keys()) == {"mp_x", "mp_y"} + assert xds.sizes["mp_x"] == n_x + assert xds.sizes["mp_y"] == n_y + + # p2 is the primary data variable + assert "p2" in xds.data_vars + npt.assert_array_almost_equal(xds["p2"].values, p2_all_arr) + + # p1 is included as a data variable (inferred from p2) + # Its size (n_x * n_y = 12) matches its parent p2's size, + # which differs from either individual dimension. + assert "p1" in xds.data_vars + npt.assert_array_almost_equal(xds["p1"].values, p1_all_arr) + + +def test_parameter_with_setpoints_has_control_size_mismatch_warns( + experiment: "Experiment", caplog: "pytest.LogCaptureFixture" +) -> None: + """Test that a warning is emitted when the inferred parameter has a + different data size than its parent parameter.""" + + class MySp(ParameterWithSetpoints): + def unpack_self(self, value): + res = super().unpack_self(value) + res.append((p1, p1())) + return res + + mp_data = np.arange(10) + + mp = ManualParameter("mp", vals=Arrays(shape=(10,)), initial_value=mp_data) + p1 = ParameterWithSetpoints( + "p1", vals=Arrays(shape=(10,)), setpoints=(mp,), set_cmd=None + ) + p2 = MySp("p2", vals=Arrays(shape=(10,)), setpoints=(mp,), set_cmd=None) + p2.has_control_of.add(p1) + + p1(np.linspace(-1, 1, 10)) + p2(np.random.randn(10)) + + meas = Measurement() + meas.register_parameter(p2) + with meas.run() as ds: + ds.add_result((p2, p2())) + + # Build an xarray dataset and sub_dict with mismatched p1 data to + # exercise the warning path in _add_inferred_data_vars directly. + + raw_data = ds.dataset.get_parameter_data() + sub_dict = dict(raw_data["p2"]) + # Replace p1 with wrong-sized data (5 instead of 10) + sub_dict["p1"] = np.zeros(5) + + xr_dataset = xr.Dataset( + {"p2": (("mp",), sub_dict["p2"].ravel())}, + coords={"mp": sub_dict["mp"].ravel()}, + ) + + with caplog.at_level( + logging.WARNING, logger="qcodes.dataset.exporters.export_to_xarray" + ): + result = _add_inferred_data_vars(ds.dataset, "p2", sub_dict, xr_dataset) + + assert "p1" not in result.data_vars + assert any( + "Cannot add inferred parameter 'p1'" in msg and "'p2'" in msg + for msg in caplog.messages + )