{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Dataset Benchmarking\n", "\n", "This notebook is a behind-the-scenes benchmarking notebook, mainly for use by developers. The recommended way for users to interact with the dataset is via the `Measurement` object and its associated context manager. See the corresponding notebook for a comprehensive toturial on how to use those." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "from pathlib import Path\n", "\n", "import numpy as np\n", "\n", "import qcodes as qc\n", "from qcodes.dataset import (\n", " ParamSpec,\n", " initialise_or_create_database_at,\n", " load_or_create_experiment,\n", " new_data_set,\n", ")" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'~/experiments.db'" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "qc.config.core.db_location" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "initialise_or_create_database_at(Path.cwd() / \"benchmarking.db\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Setup" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "benchmarking#the sample is a lie#3@C:\\Users\\jenielse\\experiments.db\n", "-------------------------------------------------------------------\n", "11-benchmark_data-1--0\n", "12-doubledata-2-x,y,z-1000\n", "13-singledata-3-y,z,x-100\n", "14-zerodata-4-y,z,x-1\n", "15-array1Ddata-5-y,z,x-800\n", "16-array0Ddata-6-y,z,x-81\n", "17-many_data-7-x,y,z-810000\n", "68-benchmark_data-8--0\n", "69-doubledata-9-y,z,x-1000\n", "70-singledata-10-y,z,x-100\n", "71-zerodata-11-y,z,x-1\n", "72-array1Ddata-12-y,z,x-800\n", "73-array0Ddata-13-y,z,x-81\n", "74-many_data-14-y,z,x-810000\n", "127-benchmark_data-15--0\n", "128-doubledata-16-y,z,x-1000\n", "129-singledata-17-y,z,x-100\n", "130-zerodata-18-y,z,x-1\n", "131-array1Ddata-19-y,z,x-800\n", "132-array0Ddata-20-y,z,x-811\n", "133-many_data-21-y,z,x-810000\n", "170-benchmark_data-22--0\n", "171-doubledata-23-z,y,x-1000\n", "172-singledata-24-z,y,x-100\n", "173-zerodata-25-z,y,x-1\n", "174-array1Ddata-26-z,y,x-800\n", "175-array0Ddata-27-z,y,x-811\n", "176-many_data-28-z,y,x-810000\n", "229-benchmark_data-29--0\n", "230-doubledata-30-x,z,y-1000\n", "231-singledata-31-y,z,x-100\n", "232-zerodata-32-y,z,x-1\n", "233-array1Ddata-33-y,z,x-800\n", "234-array0Ddata-34-y,z,x-811\n", "235-many_data-35-x,z,y-810000\n", "288-benchmark_data-36--0\n", "289-doubledata-37-y,z,x-1000\n", "290-singledata-38-z,y,x-100\n", "291-zerodata-39-z,y,x-1\n", "292-array1Ddata-40-z,y,x-800\n", "293-array0Ddata-41-z,y,x-811\n", "294-many_data-42-y,z,x-810000\n", "347-benchmark_data-43--0\n", "348-doubledata-44-x,y,z-1000\n", "349-singledata-45-y,z,x-100\n", "350-zerodata-46-y,z,x-1\n", "351-array1Ddata-47-y,z,x-800\n", "352-array0Ddata-48-y,z,x-811\n", "353-many_data-49-x,y,z-810000\n", "406-benchmark_data-50--0\n", "407-doubledata-51-y,x,z-1000\n", "408-singledata-52-y,x,z-100\n", "409-zerodata-53-y,x,z-1\n", "410-array1Ddata-54-y,x,z-800\n", "411-array0Ddata-55-y,x,z-811\n", "412-many_data-56-y,x,z-810000\n", "662-benchmark_data-57--0\n", "663-doubledata-58-z,y,x-1000\n", "664-singledata-59-y,x,z-100\n", "665-zerodata-60-y,x,z-1\n", "666-array1Ddata-61-y,x,z-800\n", "667-array0Ddata-62-y,x,z-811\n", "668-many_data-63-z,y,x-810000\n", "680-benchmark_data-64--0\n", "681-doubledata-65-y,z,x-1000\n", "682-singledata-66-y,z,x-100\n", "683-zerodata-67-y,z,x-1\n", "684-array1Ddata-68-y,z,x-800\n", "685-array0Ddata-69-y,z,x-811\n", "686-many_data-70-y,z,x-810000" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "exp = load_or_create_experiment(\"benchmarking\", sample_name=\"the sample is a lie\")\n", "exp" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now we can create a dataset. Note two things:\n", "\n", " - if we don't specfiy a exp_id, but we have an experiment in the experiment container the dataset will go into that one.\n", " - dataset can be created from the experiment object\n", " " ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "benchmarking#the sample is a lie#3@C:\\Users\\jenielse\\experiments.db\n", "-------------------------------------------------------------------\n", "11-benchmark_data-1--0\n", "12-doubledata-2-x,y,z-1000\n", "13-singledata-3-y,z,x-100\n", "14-zerodata-4-y,z,x-1\n", "15-array1Ddata-5-y,z,x-800\n", "16-array0Ddata-6-y,z,x-81\n", "17-many_data-7-x,y,z-810000\n", "68-benchmark_data-8--0\n", "69-doubledata-9-y,z,x-1000\n", "70-singledata-10-y,z,x-100\n", "71-zerodata-11-y,z,x-1\n", "72-array1Ddata-12-y,z,x-800\n", "73-array0Ddata-13-y,z,x-81\n", "74-many_data-14-y,z,x-810000\n", "127-benchmark_data-15--0\n", "128-doubledata-16-y,z,x-1000\n", "129-singledata-17-y,z,x-100\n", "130-zerodata-18-y,z,x-1\n", "131-array1Ddata-19-y,z,x-800\n", "132-array0Ddata-20-y,z,x-811\n", "133-many_data-21-y,z,x-810000\n", "170-benchmark_data-22--0\n", "171-doubledata-23-z,y,x-1000\n", "172-singledata-24-z,y,x-100\n", "173-zerodata-25-z,y,x-1\n", "174-array1Ddata-26-z,y,x-800\n", "175-array0Ddata-27-z,y,x-811\n", "176-many_data-28-z,y,x-810000\n", "229-benchmark_data-29--0\n", "230-doubledata-30-x,z,y-1000\n", "231-singledata-31-y,z,x-100\n", "232-zerodata-32-y,z,x-1\n", "233-array1Ddata-33-y,z,x-800\n", "234-array0Ddata-34-y,z,x-811\n", "235-many_data-35-x,z,y-810000\n", "288-benchmark_data-36--0\n", "289-doubledata-37-y,z,x-1000\n", "290-singledata-38-z,y,x-100\n", "291-zerodata-39-z,y,x-1\n", "292-array1Ddata-40-z,y,x-800\n", "293-array0Ddata-41-z,y,x-811\n", "294-many_data-42-y,z,x-810000\n", "347-benchmark_data-43--0\n", "348-doubledata-44-x,y,z-1000\n", "349-singledata-45-y,z,x-100\n", "350-zerodata-46-y,z,x-1\n", "351-array1Ddata-47-y,z,x-800\n", "352-array0Ddata-48-y,z,x-811\n", "353-many_data-49-x,y,z-810000\n", "406-benchmark_data-50--0\n", "407-doubledata-51-y,x,z-1000\n", "408-singledata-52-y,x,z-100\n", "409-zerodata-53-y,x,z-1\n", "410-array1Ddata-54-y,x,z-800\n", "411-array0Ddata-55-y,x,z-811\n", "412-many_data-56-y,x,z-810000\n", "662-benchmark_data-57--0\n", "663-doubledata-58-z,y,x-1000\n", "664-singledata-59-y,x,z-100\n", "665-zerodata-60-y,x,z-1\n", "666-array1Ddata-61-y,x,z-800\n", "667-array0Ddata-62-y,x,z-811\n", "668-many_data-63-z,y,x-810000\n", "680-benchmark_data-64--0\n", "681-doubledata-65-y,z,x-1000\n", "682-singledata-66-y,z,x-100\n", "683-zerodata-67-y,z,x-1\n", "684-array1Ddata-68-y,z,x-800\n", "685-array0Ddata-69-y,z,x-811\n", "686-many_data-70-y,z,x-810000\n", "734-benchmark_data-71--0" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dataSet = new_data_set(\"benchmark_data\", exp_id=exp.exp_id)\n", "exp" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "In this benchmark we will assueme that we are doing a 2D loop and investigate the performance implications of writing to the dataset" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "x_shape = 100\n", "y_shape = 100" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Baseline: Generate data" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Wall time: 29.6 ms\n" ] } ], "source": [ "%%time\n", "for x in range(x_shape):\n", " for y in range(y_shape):\n", " z = np.random.random_sample(1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "and store in memory" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "x_data = np.zeros((x_shape, y_shape))\n", "y_data = np.zeros((x_shape, y_shape))\n", "z_data = np.zeros((x_shape, y_shape))" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Wall time: 10 ms\n" ] } ], "source": [ "%%time\n", "for x in range(x_shape):\n", " for y in range(y_shape):\n", " x_data[x,y] = x\n", " y_data[x,y] = y\n", " z_data[x,y] = np.random.random_sample()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Add to dataset inside double loop" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "double_dataset = new_data_set(\"doubledata\", exp_id=exp.exp_id,\n", " specs=[ParamSpec(\"x\", \"numeric\"),\n", " ParamSpec(\"y\", \"numeric\"),\n", " ParamSpec('z', \"numeric\")])\n", "double_dataset.mark_started()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Note that this is so slow that we are only doing a 10th of the computation" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Wall time: 3.99 s\n" ] } ], "source": [ "%%time\n", "for x in range(x_shape//10):\n", " for y in range(y_shape):\n", " double_dataset.add_results([{\"x\": x, 'y': y, 'z': np.random.random_sample()}])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Add the data in outer loop and store as np array" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "single_dataset = new_data_set(\"singledata\", exp_id=exp.exp_id,\n", " specs=[ParamSpec(\"x\", \"array\"),\n", " ParamSpec(\"y\", \"array\"),\n", " ParamSpec('z', \"array\")])\n", "single_dataset.mark_started()\n", "x_data = np.zeros(y_shape)\n", "y_data = np.zeros(y_shape)\n", "z_data = np.zeros(y_shape)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Wall time: 521 ms\n" ] } ], "source": [ "%%time\n", "for x in range(x_shape):\n", " for y in range(y_shape):\n", " x_data[y] = x\n", " y_data[y] = y\n", " z_data[y] = np.random.random_sample(1)\n", " single_dataset.add_results([{\"x\": x_data, 'y': y_data, 'z': z_data}])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Save once after loop" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "zero_dataset = new_data_set(\"zerodata\", exp_id=exp.exp_id,\n", " specs=[ParamSpec(\"x\", \"array\"),\n", " ParamSpec(\"y\", \"array\"),\n", " ParamSpec('z', \"array\")])\n", "zero_dataset.mark_started()\n", "x_data = np.zeros((x_shape, y_shape))\n", "y_data = np.zeros((x_shape, y_shape))\n", "z_data = np.zeros((x_shape, y_shape))" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Wall time: 40.3 ms\n" ] }, { "data": { "text/plain": [ "1" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%time\n", "for x in range(x_shape):\n", " for y in range(y_shape):\n", " x_data[x,y] = x\n", " y_data[x,y] = y\n", " z_data[x,y] = np.random.random_sample(1)\n", "zero_dataset.add_results([{'x':x_data, 'y':y_data, 'z':z_data}])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Array parameter" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "array1D_dataset = new_data_set(\"array1Ddata\", exp_id=exp.exp_id,\n", " specs=[ParamSpec(\"x\", \"array\"),\n", " ParamSpec(\"y\", \"array\"),\n", " ParamSpec('z', \"array\")])\n", "array1D_dataset.mark_started()\n", "y_setpoints = np.arange(y_shape)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "497 ms ± 61.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" ] } ], "source": [ "%%timeit\n", "for x in range(x_shape):\n", " x_data[x,:] = x\n", " array1D_dataset.add_results([{'x':x_data[x,:], 'y':y_setpoints, 'z':np.random.random_sample(y_shape)}])" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "x_data = np.zeros((x_shape, y_shape))\n", "y_data = np.zeros((x_shape, y_shape))\n", "z_data = np.zeros((x_shape, y_shape))\n", "y_setpoints = np.arange(y_shape)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "array0D_dataset = new_data_set(\"array0Ddata\", exp_id=exp.exp_id,\n", " specs=[ParamSpec(\"x\", \"array\"),\n", " ParamSpec(\"y\", \"array\"),\n", " ParamSpec('z', \"array\")])\n", "array0D_dataset.mark_started()" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "10.3 ms ± 444 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" ] } ], "source": [ "%%timeit\n", "for x in range(x_shape):\n", " x_data[x,:] = x\n", " y_data[x,:] = y_setpoints\n", " z_data[x,:] = np.random.random_sample(y_shape)\n", "array0D_dataset.add_results([{'x':x_data, 'y':y_data, 'z':z_data}])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Insert many" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "data = []\n", "for i in range(100):\n", " for j in range(100):\n", " data.append({'x': i, 'y':j, 'z':np.random.random_sample()})" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "many_Data = new_data_set(\"many_data\", exp_id=exp.exp_id,\n", " specs=[ParamSpec(\"x\", \"numeric\"),\n", " ParamSpec(\"y\", \"numeric\"),\n", " ParamSpec(\"z\", \"numeric\")])\n", "many_Data.mark_started()" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "43.2 ms ± 2.46 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" ] } ], "source": [ "%%timeit\n", "many_Data.add_results(data)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.5" }, "nbsphinx": { "timeout": 600 } }, "nbformat": 4, "nbformat_minor": 2 }