Source code for qcodes.dataset.data_set_protocol

from __future__ import annotations

import logging
import os
import warnings
from collections.abc import Callable, Mapping, Sequence
from enum import Enum
from importlib.metadata import entry_points
from pathlib import Path
from typing import (
    TYPE_CHECKING,
    Any,
    Literal,
    Protocol,
    runtime_checkable,
)

import numpy as np

from qcodes.dataset.descriptions.dependencies import InterDependencies_
from qcodes.dataset.descriptions.param_spec import ParamSpec, ParamSpecBase
from qcodes.dataset.export_config import (
    DataExportType,
    get_data_export_name_elements,
    get_data_export_path,
    get_data_export_prefix,
    get_data_export_type,
)

from .descriptions.versioning.converters import new_to_old
from .exporters.export_to_csv import dataframe_to_csv
from .exporters.export_to_xarray import xarray_to_h5netcdf_with_complex_numbers
from .sqlite.queries import raw_time_to_str_time

if TYPE_CHECKING:
    from typing import TypeAlias

    import pandas as pd
    import xarray as xr

    from qcodes.dataset.descriptions.rundescriber import RunDescriber
    from qcodes.dataset.descriptions.versioning.rundescribertypes import Shapes
    from qcodes.dataset.linked_datasets.links import Link
    from qcodes.parameters import ParameterBase

    from .data_set_cache import DataSetCache
    from .exporters.export_info import ExportInfo

# for unknown reason entrypoints registered in pyproct.toml shows up
# twice here convert to set to ensure no duplication.
_EXPORT_CALLBACKS = set(entry_points(group="qcodes.dataset.on_export"))

array_like_types = (tuple, list, np.ndarray)
scalar_res_types: TypeAlias = (
    str | complex | np.integer | np.floating | np.complexfloating
)
values_type: TypeAlias = scalar_res_types | np.ndarray | Sequence[scalar_res_types]
res_type: TypeAlias = "tuple[ParameterBase | str, values_type]"
setpoints_type: TypeAlias = "Sequence[str | ParameterBase]"
SPECS: TypeAlias = list[ParamSpec]
# Transition period type: SpecsOrInterDeps. We will allow both as input to
# the DataSet constructor for a while, then deprecate SPECS and finally remove
# the ParamSpec class
SpecsOrInterDeps: TypeAlias = SPECS | InterDependencies_
ParameterData: TypeAlias = dict[str, dict[str, np.ndarray]]

LOG = logging.getLogger(__name__)


class CompletedError(RuntimeError):
    pass


[docs] @runtime_checkable class DataSetProtocol(Protocol): # the "persistent traits" are the attributes/properties of the DataSet # that are NOT tied to the representation of the DataSet in any particular # database persistent_traits: tuple[str, ...] = ( "name", "guid", "number_of_results", "exp_name", "sample_name", "completed", "snapshot", "run_timestamp_raw", "description", "completed_timestamp_raw", "metadata", "parent_dataset_links", "captured_run_id", "captured_counter", )
[docs] def prepare(
self, *, snapshot: Mapping[Any, Any], interdeps: InterDependencies_, shapes: Shapes | None = None, parent_datasets: Sequence[Mapping[Any, Any]] = (), write_in_background: bool = False, ) -> None: ... @property def pristine(self) -> bool: ... @property def running(self) -> bool: ... @property def completed(self) -> bool: ...
[docs] def mark_completed(self) -> None: ...
# dataset attributes @property def run_id(self) -> int: ... @property def captured_run_id(self) -> int: ... @property def counter(self) -> int: ... @property def captured_counter(self) -> int: ... @property def guid(self) -> str: ... @property def number_of_results(self) -> int: ... @property def name(self) -> str: ... @property def exp_name(self) -> str: ... @property def exp_id(self) -> int: ... @property def sample_name(self) -> str: ...
[docs] def run_timestamp(self, fmt: str = "%Y-%m-%d %H:%M:%S") -> str | None: ...
@property def run_timestamp_raw(self) -> float | None: ...
[docs] def completed_timestamp(self, fmt: str = "%Y-%m-%d %H:%M:%S") -> str | None: ...
@property def completed_timestamp_raw(self) -> float | None: ... # snapshot and metadata @property def snapshot(self) -> dict[str, Any] | None: ...
[docs] def add_snapshot(self, snapshot: str, overwrite: bool = False) -> None: ...
@property def _snapshot_raw(self) -> str | None: ...
[docs] def add_metadata(self, tag: str, metadata: Any) -> None: ...
@property def metadata(self) -> dict[str, Any]: ... @property def path_to_db(self) -> str | None: ... # dataset description and links @property def paramspecs(self) -> dict[str, ParamSpec]: ... @property def description(self) -> RunDescriber: ... @property def parent_dataset_links(self) -> list[Link]: ... # data related members
[docs] def export(
self, export_type: DataExportType | str | None = None, path: Path | str | None = None, prefix: str | None = None, automatic_export: bool = False, ) -> None: ... @property def export_info(self) -> ExportInfo: ... @property def cache(self) -> DataSetCache[DataSetProtocol]: ...
[docs] def get_parameter_data(
self, *params: str | ParamSpec | ParameterBase, start: int | None = None, end: int | None = None, callback: Callable[[float], None] | None = None, ) -> ParameterData: ...
[docs] def get_parameters(self) -> SPECS: # used by plottr ...
@property def dependent_parameters(self) -> tuple[ParamSpecBase, ...]: ... # exporters to other in memory formats
[docs] def to_xarray_dataarray_dict(
self, *params: str | ParamSpec | ParameterBase, start: int | None = None, end: int | None = None, use_multi_index: Literal["auto", "always", "never"] = "auto", ) -> dict[str, xr.DataArray]: ...
[docs] def to_xarray_dataset(
self, *params: str | ParamSpec | ParameterBase, start: int | None = None, end: int | None = None, use_multi_index: Literal["auto", "always", "never"] = "auto", ) -> xr.Dataset: ...
[docs] def to_pandas_dataframe_dict(
self, *params: str | ParamSpec | ParameterBase, start: int | None = None, end: int | None = None, ) -> dict[str, pd.DataFrame]: ...
[docs] def to_pandas_dataframe(
self, *params: str | ParamSpec | ParameterBase, start: int | None = None, end: int | None = None, ) -> pd.DataFrame: ... # private members called by various other parts or the api def _enqueue_results( self, result_dict: Mapping[ParamSpecBase, np.ndarray] ) -> None: ... def _flush_data_to_database(self, block: bool = False) -> None: ... @property def _parameters(self) -> str | None: ... def _set_export_info(self, export_info: ExportInfo) -> None: ... def __len__(self) -> int: ...
[docs] def the_same_dataset_as(self, other: DataSetProtocol) -> bool: ...
class BaseDataSet(DataSetProtocol, Protocol): # shared methods between all implementations of the dataset def the_same_dataset_as(self, other: DataSetProtocol) -> bool: """ Check if two datasets correspond to the same run by comparing all their persistent traits. Note that this method does not compare the data itself. This function raises if the GUIDs match but anything else doesn't Args: other: the dataset to compare self to """ if not isinstance(other, DataSetProtocol): return False guids_match = self.guid == other.guid # note that the guid is in itself a persistent trait of the DataSet. # We therefore do not need to handle the case of guids not equal # but all persistent traits equal, as this is not possible. # Thus, if all persistent traits are the same we can safely return True for attr in self.persistent_traits: if getattr(self, attr) != getattr(other, attr): if guids_match: raise RuntimeError( "Critical inconsistency detected! " "The two datasets have the same GUID, " f'but their "{attr}" differ.' ) return False return True def get_parameters(self) -> SPECS: old_interdeps = new_to_old(self.description.interdeps) return list(old_interdeps.paramspecs) def export( self, export_type: DataExportType | str | None = None, path: str | Path | None = None, prefix: str | None = None, automatic_export: bool = False, ) -> None: """Export data to disk with file name `{prefix}{name_elements}.{ext}`. Name elements are names of dataset object attributes that are taken from the dataset and inserted into the name of the export file, for example if name elements are ``["captured_run_id", "guid"]``, then the file name will be `{prefix}{captured_run_id}_{guid}.{ext}`. Values for the export type, path, export_name_elements and prefix can also be set in the "dataset" section of qcodes config. Args: export_type: Data export type, e.g. "netcdf" or ``DataExportType.NETCDF``, defaults to a value set in qcodes config path: Export path, defaults to value set in config prefix: File prefix, e.g. ``qcodes_``, defaults to value set in config. automatic_export: Is this export automatic? Raises: ValueError: If the export data type is not specified or unknown, raise an error """ if isinstance(path, str): path = Path(path) parsed_export_type = get_data_export_type(export_type) if parsed_export_type is None and export_type is None: raise ValueError( "No data export type specified. Please set the export data type " "by using ``qcodes.dataset.export_config.set_data_export_type`` or " "give an explicit export_type when calling ``dataset.export`` manually." ) elif parsed_export_type is None: raise ValueError( f"Export type {export_type} is unknown. Export type " f"should be a member of the `DataExportType` enum" ) export_path = self._export_data( export_type=parsed_export_type, path=path, prefix=prefix, automatic_export=automatic_export, ) export_info = self.export_info if export_path is not None: export_info.export_paths[parsed_export_type.value] = os.path.abspath( export_path ) self._set_export_info(export_info) def _export_data( self, export_type: DataExportType, path: Path | None = None, prefix: str | None = None, automatic_export: bool = False, ) -> Path | None: """Export data to disk with file name `{prefix}{name_elements}.{ext}`. Name elements are names of dataset object attributes that are taken from the dataset and inserted into the name of the export file, for example if name elements are ``["captured_run_id", "guid"]``, then the file name will be `{prefix}{captured_run_id}_{guid}.{ext}`. Values for the export type, path, export_name_elements and prefix can also be set in the "dataset" section of qcodes config. Args: export_type: Data export type, e.g. DataExportType.NETCDF path: Export path, defaults to value set in config prefix: File prefix, e.g. "qcodes_", defaults to value set in config. automatic_export: Is this export automatic? Returns: str: Path file was saved to, returns None if no file was saved. """ # Set defaults to values in config if the value was not set # (defaults to None) path = path if path is not None else get_data_export_path() path.mkdir(exist_ok=True, parents=True) prefix = prefix if prefix is not None else get_data_export_prefix() if DataExportType.NETCDF == export_type: file_name = self._export_file_name( prefix=prefix, export_type=DataExportType.NETCDF ) export_path = Path(self._export_as_netcdf(path=path, file_name=file_name)) elif DataExportType.CSV == export_type: file_name = self._export_file_name( prefix=prefix, export_type=DataExportType.CSV ) export_path = Path(self._export_as_csv(path=path, file_name=file_name)) else: export_path = None for export_callback in _EXPORT_CALLBACKS: try: export_callback_function = export_callback.load() LOG.info("Executing on_export callback %s", export_callback.name) export_callback_function(export_path, automatic_export=automatic_export) except Exception: LOG.exception("Exception during export callback function") return export_path def _export_file_name(self, prefix: str, export_type: DataExportType) -> str: """Get export file name""" extension = export_type.value name_elements = get_data_export_name_elements() post_fix = "_".join([str(getattr(self, name)) for name in name_elements]) return f"{prefix}{post_fix}.{extension}" def _export_as_netcdf(self, path: Path, file_name: str) -> Path: """Export data as netcdf to a given path with file prefix""" file_path = path / file_name xarr_dataset = self.to_xarray_dataset() xarray_to_h5netcdf_with_complex_numbers(xarr_dataset, file_path) return file_path def _export_as_csv(self, path: Path, file_name: str) -> Path: """Export data as csv to a given path with file prefix.""" dfdict = self.to_pandas_dataframe_dict() dataframe_to_csv( dfdict=dfdict, path=path, single_file=True, single_file_name=file_name, ) return path / file_name def _add_metadata_to_netcdf_if_nc_exported(self, tag: str, data: Any) -> None: export_paths = self.export_info.export_paths nc_file = export_paths.get(DataExportType.NETCDF.value, None) if nc_file is not None: import h5netcdf # type: ignore[import-untyped] try: with h5netcdf.File( nc_file, mode="r+", decode_vlen_strings=False ) as h5nc_file: h5nc_file.attrs[tag] = data except ( FileNotFoundError, OSError, ): # older versions of h5py may throw a OSError here warnings.warn( f"Could not add metadata to the exported NetCDF file, " f"was the file moved? GUID {self.guid}, NetCDF file {nc_file}" ) @staticmethod def _validate_parameters(*params: str | ParamSpec | ParameterBase) -> list[str]: """ Validate that the provided parameters have a name and return those names as a list. The Parameters may be a mix of strings, ParamSpecs or ordinary QCoDeS parameters. """ valid_param_names = [] for maybe_param in params: if isinstance(maybe_param, str): valid_param_names.append(maybe_param) else: try: maybe_param_name = maybe_param.name except Exception as e: raise ValueError("This parameter does not have a name") from e valid_param_names.append(maybe_param_name) return valid_param_names @staticmethod def _reshape_array_for_cache( param: ParamSpecBase, param_data: np.ndarray ) -> np.ndarray: """ Shape cache data so it matches data read from database. This means: - Add an extra singleton dim to array data - flatten non array data into a linear array. """ param_data = np.atleast_1d(param_data) if param.type == "array": new_data = np.reshape(param_data, (1, *param_data.shape)) else: new_data = param_data.ravel() return new_data def run_timestamp(self, fmt: str = "%Y-%m-%d %H:%M:%S") -> str | None: """ Returns run timestamp in a human-readable format The run timestamp is the moment when the measurement for this run started. If the run has not yet been started, this function returns None. Consult with :func:`time.strftime` for information about the format. """ return raw_time_to_str_time(self.run_timestamp_raw, fmt) def completed_timestamp(self, fmt: str = "%Y-%m-%d %H:%M:%S") -> str | None: """ Returns timestamp when measurement run was completed in a human-readable format If the run (or the dataset) is not completed, then returns None. Consult with ``time.strftime`` for information about the format. """ return raw_time_to_str_time(self.completed_timestamp_raw, fmt) @property def dependent_parameters(self) -> tuple[ParamSpecBase, ...]: """ Return all the parameters that explicitly depend on other parameters """ return tuple(self.description.interdeps.dependencies.keys())
[docs] class DataSetType(str, Enum): DataSet = "DataSet" DataSetInMem = "DataSetInMem"