This page was generated from docs/examples/DataSet/Benchmarking.ipynb. Interactive online version: Binder badge.

Dataset Benchmarking

This notebook is a behind-the-scenes benchmarking notebook, mainly for use by developers. The recommended way for users to interact with the dataset is via the Measurement object and its associated context manager. See the corresponding notebook for a comprehensive toturial on how to use those.

%matplotlib inline
from pathlib import Path

import numpy as np

import qcodes as qc
from qcodes.dataset import (
Logging hadn't been started.
Activating auto-logging. Current session state plus future input saved.
Filename       : /home/runner/.qcodes/logs/command_history.log
Mode           : append
Output logging : True
Raw input log  : False
Timestamping   : True
State          : active
Qcodes Logfile : /home/runner/.qcodes/logs/240508-20414-qcodes.log
initialise_or_create_database_at(Path.cwd() / "benchmarking.db")


exp = load_or_create_experiment("benchmarking", sample_name="the sample is a lie")
benchmarking#the sample is a lie#1@/home/runner/work/Qcodes/Qcodes/docs/examples/DataSet/benchmarking.db

Now we can create a dataset. Note two things:

- if we don't specfiy a exp_id, but we have an experiment in the experiment container the dataset will go into that one.
- dataset can be created from the experiment object
dataSet = new_data_set("benchmark_data", exp_id=exp.exp_id)
benchmarking#the sample is a lie#1@/home/runner/work/Qcodes/Qcodes/docs/examples/DataSet/benchmarking.db

In this benchmark we will assueme that we are doing a 2D loop and investigate the performance implications of writing to the dataset

x_shape = 100
y_shape = 100

Baseline: Generate data

for x in range(x_shape):
    for y in range(y_shape):
        z = np.random.random_sample(1)
CPU times: user 7.21 ms, sys: 0 ns, total: 7.21 ms
Wall time: 7.12 ms

and store in memory

x_data = np.zeros((x_shape, y_shape))
y_data = np.zeros((x_shape, y_shape))
z_data = np.zeros((x_shape, y_shape))
for x in range(x_shape):
    for y in range(y_shape):
        x_data[x,y] = x
        y_data[x,y] = y
        z_data[x,y] = np.random.random_sample()
CPU times: user 7.46 ms, sys: 0 ns, total: 7.46 ms
Wall time: 7.39 ms

Add to dataset inside double loop

double_dataset = new_data_set("doubledata", exp_id=exp.exp_id,
                              specs=[ParamSpec("x", "numeric"),
                                     ParamSpec("y", "numeric"),
                                     ParamSpec('z', "numeric")])

Note that this is so slow that we are only doing a 10th of the computation

for x in range(x_shape//10):
    for y in range(y_shape):
        double_dataset.add_results([{"x": x, 'y': y, 'z': np.random.random_sample()}])
CPU times: user 153 ms, sys: 64.6 ms, total: 218 ms
Wall time: 522 ms

Add the data in outer loop and store as np array

single_dataset = new_data_set("singledata", exp_id=exp.exp_id,
                              specs=[ParamSpec("x", "array"),
                                     ParamSpec("y", "array"),
                                     ParamSpec('z', "array")])
x_data = np.zeros(y_shape)
y_data = np.zeros(y_shape)
z_data = np.zeros(y_shape)
for x in range(x_shape):
    for y in range(y_shape):
        x_data[y] = x
        y_data[y] = y
        z_data[y] = np.random.random_sample(1)
    single_dataset.add_results([{"x": x_data, 'y': y_data, 'z': z_data}])
CPU times: user 35.4 ms, sys: 5.88 ms, total: 41.3 ms
Wall time: 53.9 ms

Save once after loop

zero_dataset = new_data_set("zerodata", exp_id=exp.exp_id,
                            specs=[ParamSpec("x", "array"),
                                   ParamSpec("y", "array"),
                                   ParamSpec('z', "array")])
x_data = np.zeros((x_shape, y_shape))
y_data = np.zeros((x_shape, y_shape))
z_data = np.zeros((x_shape, y_shape))
for x in range(x_shape):
    for y in range(y_shape):
        x_data[x,y] = x
        y_data[x,y] = y
        z_data[x,y] = np.random.random_sample(1)
zero_dataset.add_results([{'x':x_data, 'y':y_data, 'z':z_data}])
CPU times: user 19.3 ms, sys: 0 ns, total: 19.3 ms
Wall time: 19.5 ms

Array parameter

array1D_dataset = new_data_set("array1Ddata", exp_id=exp.exp_id,
                               specs=[ParamSpec("x", "array"),
                                      ParamSpec("y", "array"),
                                      ParamSpec('z', "array")])
y_setpoints = np.arange(y_shape)
for x in range(x_shape):
    x_data[x,:] = x
    array1D_dataset.add_results([{'x':x_data[x,:], 'y':y_setpoints, 'z':np.random.random_sample(y_shape)}])
37.5 ms ± 2.05 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
x_data = np.zeros((x_shape, y_shape))
y_data = np.zeros((x_shape, y_shape))
z_data = np.zeros((x_shape, y_shape))
y_setpoints = np.arange(y_shape)
array0D_dataset = new_data_set("array0Ddata", exp_id=exp.exp_id,
                               specs=[ParamSpec("x", "array"),
                                      ParamSpec("y", "array"),
                                      ParamSpec('z', "array")])
for x in range(x_shape):
    x_data[x,:] = x
    y_data[x,:] = y_setpoints
    z_data[x,:] = np.random.random_sample(y_shape)
array0D_dataset.add_results([{'x':x_data, 'y':y_data, 'z':z_data}])
2.01 ms ± 36.2 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)

Insert many

data = []
for i in range(100):
    for j in range(100):
        data.append({'x': i, 'y':j, 'z':np.random.random_sample()})
many_Data = new_data_set("many_data", exp_id=exp.exp_id,
                         specs=[ParamSpec("x", "numeric"),
                                ParamSpec("y", "numeric"),
                                ParamSpec("z", "numeric")])
18.4 ms ± 36.1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)