{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# DataSet Performance\n", "\n", "This notebook shows the trade-off between inserting data into a database row-by-row and as binary blobs. Inserting the data row-by-row means that we have direct access to all the data and may perform queries directly on the values of the data. On the other hand, as we shall see, this is much slower than inserting the data directly as binary blobs." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "First, we choose a new location for the database to ensure that we don't add a bunch of benchmarking data to the default one." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "\n", "cwd = os.getcwd()\n", "import qcodes as qc\n", "\n", "qc.config[\"core\"][\"db_location\"] = os.path.join(cwd, 'testing.db')" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "import time\n", "from pathlib import Path\n", "\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "\n", "import qcodes as qc\n", "from qcodes.dataset import (\n", " Measurement,\n", " initialise_or_create_database_at,\n", " load_or_create_experiment,\n", ")\n", "from qcodes.parameters import ManualParameter" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Upgrading database; v0 -> v1: : 0it [00:00, ?it/s]\n", "Upgrading database; v1 -> v2: 100%|█████████████████████████████████████████████████████| 1/1 [00:00<00:00, 111.26it/s]\n", "Upgrading database; v2 -> v3: : 0it [00:00, ?it/s]\n", "Upgrading database; v3 -> v4: : 0it [00:00, ?it/s]\n", "Upgrading database; v4 -> v5: 100%|█████████████████████████████████████████████████████| 1/1 [00:00<00:00, 199.95it/s]\n", "Upgrading database; v5 -> v6: : 0it [00:00, ?it/s]\n", "Upgrading database; v6 -> v7: 100%|██████████████████████████████████████████████████████| 1/1 [00:00<00:00, 38.46it/s]\n", "Upgrading database; v7 -> v8: 100%|█████████████████████████████████████████████████████| 1/1 [00:00<00:00, 110.72it/s]\n" ] } ], "source": [ "initialise_or_create_database_at(Path.cwd() / \"dataset_performance.db\")\n", "exp = load_or_create_experiment(experiment_name='tutorial_exp', sample_name=\"no sample\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Here, we define a simple function to benchmark the time it takes to insert n points with either numeric or array data type.\n", "We will compare both the time used to call ``add_result`` and the time used for the full measurement." ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "def insert_data(paramtype, npoints, nreps=1):\n", "\n", " meas = Measurement(exp=exp)\n", "\n", " x1 = ManualParameter('x1')\n", " x2 = ManualParameter('x2')\n", " x3 = ManualParameter('x3')\n", " y1 = ManualParameter('y1')\n", " y2 = ManualParameter('y2')\n", "\n", " meas.register_parameter(x1, paramtype=paramtype)\n", " meas.register_parameter(x2, paramtype=paramtype)\n", " meas.register_parameter(x3, paramtype=paramtype)\n", " meas.register_parameter(y1, setpoints=[x1, x2, x3],\n", " paramtype=paramtype)\n", " meas.register_parameter(y2, setpoints=[x1, x2, x3],\n", " paramtype=paramtype)\n", " start = time.perf_counter()\n", " with meas.run() as datasaver:\n", " start_adding = time.perf_counter()\n", " for i in range(nreps):\n", " datasaver.add_result((x1, np.random.rand(npoints)),\n", " (x2, np.random.rand(npoints)),\n", " (x3, np.random.rand(npoints)),\n", " (y1, np.random.rand(npoints)),\n", " (y2, np.random.rand(npoints)))\n", " stop_adding = time.perf_counter()\n", " run_id = datasaver.run_id\n", " stop = time.perf_counter()\n", " tot_time = stop - start\n", " add_time = stop_adding - start_adding\n", " return tot_time, add_time, run_id" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Comparison between numeric/array data and binary blob" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Case1: Short experiment time" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Starting experimental run with id: 1\n", "Starting experimental run with id: 2\n", "Starting experimental run with id: 3\n", "Starting experimental run with id: 4\n", "Starting experimental run with id: 5\n", "Starting experimental run with id: 6\n", "Starting experimental run with id: 7\n", "Starting experimental run with id: 8\n", "Starting experimental run with id: 9\n", "Starting experimental run with id: 10\n", "Starting experimental run with id: 11\n", "Starting experimental run with id: 12\n", "Starting experimental run with id: 13\n", "Starting experimental run with id: 14\n" ] } ], "source": [ "sizes = [1,500,1000,2000,3000,4000,5000]\n", "t_numeric = []\n", "t_numeric_add = []\n", "t_array = []\n", "t_array_add = []\n", "for size in sizes:\n", " tn, tna, run_id_n = insert_data('numeric', size)\n", " t_numeric.append(tn)\n", " t_numeric_add.append(tna)\n", "\n", " ta, taa, run_id_a = insert_data('array', size)\n", " t_array.append(ta)\n", " t_array_add.append(taa)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "fig, ax = plt.subplots(1,1)\n", "ax.plot(sizes, t_numeric, 'o-', label='Inserting row-by-row')\n", "ax.plot(sizes, t_numeric_add, 'o-', label='Inserting row-by-row: add_result only')\n", "ax.plot(sizes, t_array, 'd-', label='Inserting as binary blob')\n", "ax.plot(sizes, t_array_add, 'd-', label='Inserting as binary blob: add_result only')\n", "ax.legend()\n", "ax.set_xlabel('Array length')\n", "ax.set_ylabel('Time (s)')\n", "fig.tight_layout()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "As shown in the latter figure, the time to setup and and close the experiment is approximately 0.4 sec. In case of small array sizes, the difference between inserting values of data as arrays and inserting them row-by-row is relatively unimportant. At larger array sizes, i.e. above 10000 points, the cost of writing data as individual datapoints starts to become important.\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Case2: Long experiment time " ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Starting experimental run with id: 15\n", "Starting experimental run with id: 16\n", "Starting experimental run with id: 17\n", "Starting experimental run with id: 18\n", "Starting experimental run with id: 19\n", "Starting experimental run with id: 20\n", "Starting experimental run with id: 21\n", "Starting experimental run with id: 22\n", "Starting experimental run with id: 23\n", "Starting experimental run with id: 24\n", "Starting experimental run with id: 25\n", "Starting experimental run with id: 26\n", "Starting experimental run with id: 27\n", "Starting experimental run with id: 28\n" ] } ], "source": [ "sizes = [1,500,1000,2000,3000,4000,5000]\n", "nreps = 100\n", "t_numeric = []\n", "t_numeric_add = []\n", "t_numeric_run_ids = []\n", "t_array = []\n", "t_array_add = []\n", "t_array_run_ids = []\n", "for size in sizes:\n", " tn, tna, run_id_n = insert_data('numeric', size, nreps=nreps)\n", " t_numeric.append(tn)\n", " t_numeric_add.append(tna)\n", " t_numeric_run_ids.append(run_id_n)\n", "\n", " ta, taa, run_id_a = insert_data('array', size, nreps=nreps)\n", " t_array.append(ta)\n", " t_array_add.append(taa)\n", " t_array_run_ids.append(run_id_a)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "scrolled": true }, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "fig, ax = plt.subplots(1,1)\n", "ax.plot(sizes, t_numeric, 'o-', label='Inserting row-by-row')\n", "ax.plot(sizes, t_numeric_add, 'o-', label='Inserting row-by-row: add_result only')\n", "ax.plot(sizes, t_array, 'd-', label='Inserting as binary blob')\n", "ax.plot(sizes, t_array_add, 'd-', label='Inserting as binary blob: add_result only')\n", "ax.legend()\n", "ax.set_xlabel('Array length')\n", "ax.set_ylabel('Time (s)')\n", "fig.tight_layout()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "However, as we increase the length of the experiment, as seen here by repeating the insertion 100 times, we see a big difference between inserting values of the data row-by-row and inserting it as a binary blob." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Loading the data " ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "from qcodes.dataset import load_by_id" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "As usual you can load the data by using the ``load_by_id`` function but you will notice that the different storage methods\n", "are reflected in shape of the data as it is retrieved. " ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "run_id_n = t_numeric_run_ids[0]\n", "run_id_a = t_array_run_ids[0]" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "{'x1': {'x1': array([0.08202966, 0.08202966, 0.81736 , 0.81736 , 0.37204527,\n", " 0.37204527, 0.64215591, 0.64215591, 0.42907488, 0.42907488,\n", " 0.91422619, 0.91422619, 0.48711059, 0.48711059, 0.5687964 ,\n", " 0.5687964 , 0.55769169, 0.55769169, 0.68063716, 0.68063716,\n", " 0.39693818, 0.39693818, 0.07115245, 0.07115245, 0.30753015,\n", " 0.30753015, 0.81669016, 0.81669016, 0.71676789, 0.71676789,\n", " 0.39164657, 0.39164657, 0.22781615, 0.22781615, 0.12692645,\n", " 0.12692645, 0.35825218, 0.35825218, 0.55365617, 0.55365617,\n", " 0.86899755, 0.86899755, 0.99711809, 0.99711809, 0.1422199 ,\n", " 0.1422199 , 0.13529255, 0.13529255, 0.11676167, 0.11676167,\n", " 0.52041344, 0.52041344, 0.53266126, 0.53266126, 0.33566174,\n", " 0.33566174, 0.17074707, 0.17074707, 0.26609821, 0.26609821,\n", " 0.64878666, 0.64878666, 0.40680066, 0.40680066, 0.76260801,\n", " 0.76260801, 0.82988643, 0.82988643, 0.42594241, 0.42594241,\n", " 0.25505782, 0.25505782, 0.42692093, 0.42692093, 0.16102798,\n", " 0.16102798, 0.67744127, 0.67744127, 0.71972203, 0.71972203,\n", " 0.78667903, 0.78667903, 0.32963498, 0.32963498, 0.96460611,\n", " 0.96460611, 0.84357755, 0.84357755, 0.6235959 , 0.6235959 ,\n", " 0.08189426, 0.08189426, 0.55784539, 0.55784539, 0.57925642,\n", " 0.57925642, 0.62981643, 0.62981643, 0.66787711, 0.66787711,\n", " 0.47908481, 0.47908481, 0.58068963, 0.58068963, 0.29729037,\n", " 0.29729037, 0.26277094, 0.26277094, 0.61698674, 0.61698674,\n", " 0.43012662, 0.43012662, 0.55085943, 0.55085943, 0.36183054,\n", " 0.36183054, 0.14753217, 0.14753217, 0.87692398, 0.87692398,\n", " 0.05518728, 0.05518728, 0.96710885, 0.96710885, 0.79153248,\n", " 0.79153248, 0.38286317, 0.38286317, 0.20342752, 0.20342752,\n", " 0.13601945, 0.13601945, 0.26827072, 0.26827072, 0.97185925,\n", " 0.97185925, 0.68675366, 0.68675366, 0.36971102, 0.36971102,\n", " 0.63540961, 0.63540961, 0.98329784, 0.98329784, 0.89429281,\n", " 0.89429281, 0.53237617, 0.53237617, 0.44909935, 0.44909935,\n", " 0.50965946, 0.50965946, 0.89606857, 0.89606857, 0.95121891,\n", " 0.95121891, 0.12129533, 0.12129533, 0.21761944, 0.21761944,\n", " 0.6268567 , 0.6268567 , 0.39633933, 0.39633933, 0.29943907,\n", " 0.29943907, 0.12229129, 0.12229129, 0.26713706, 0.26713706,\n", " 0.8498325 , 0.8498325 , 0.91354633, 0.91354633, 0.86042504,\n", " 0.86042504, 0.70061487, 0.70061487, 0.08552322, 0.08552322,\n", " 0.07936403, 0.07936403, 0.83829902, 0.83829902, 0.05007236,\n", " 0.05007236, 0.34722175, 0.34722175, 0.8549174 , 0.8549174 ,\n", " 0.97052411, 0.97052411, 0.12970903, 0.12970903, 0.12443746,\n", " 0.12443746, 0.44431197, 0.44431197, 0.34393138, 0.34393138])}}" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ds = load_by_id(run_id_n)\n", "ds.get_parameter_data('x1')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "And a dataset stored as binary arrays" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "{'x1': {'x1': array([[0.84365556],\n", " [0.84365556],\n", " [0.30271886],\n", " [0.30271886],\n", " [0.03745168],\n", " [0.03745168],\n", " [0.50045271],\n", " [0.50045271],\n", " [0.6992901 ],\n", " [0.6992901 ],\n", " [0.89623464],\n", " [0.89623464],\n", " [0.13055468],\n", " [0.13055468],\n", " [0.42048143],\n", " [0.42048143],\n", " [0.34587721],\n", " [0.34587721],\n", " [0.83386739],\n", " [0.83386739],\n", " [0.84585952],\n", " [0.84585952],\n", " [0.19556866],\n", " [0.19556866],\n", " [0.56415701],\n", " [0.56415701],\n", " [0.68068917],\n", " [0.68068917],\n", " [0.32230348],\n", " [0.32230348],\n", " [0.22598923],\n", " [0.22598923],\n", " [0.20068153],\n", " [0.20068153],\n", " [0.96070706],\n", " [0.96070706],\n", " [0.49664629],\n", " [0.49664629],\n", " [0.33654637],\n", " [0.33654637],\n", " [0.33698624],\n", " [0.33698624],\n", " [0.5530871 ],\n", " [0.5530871 ],\n", " [0.66498216],\n", " [0.66498216],\n", " [0.96038174],\n", " [0.96038174],\n", " [0.58180983],\n", " [0.58180983],\n", " [0.88381683],\n", " [0.88381683],\n", " [0.58146039],\n", " [0.58146039],\n", " [0.29067461],\n", " [0.29067461],\n", " [0.73548659],\n", " [0.73548659],\n", " [0.71566389],\n", " [0.71566389],\n", " [0.33592869],\n", " [0.33592869],\n", " [0.82891979],\n", " [0.82891979],\n", " [0.29425937],\n", " [0.29425937],\n", " [0.52277935],\n", " [0.52277935],\n", " [0.99382977],\n", " [0.99382977],\n", " [0.75594392],\n", " [0.75594392],\n", " [0.95034577],\n", " [0.95034577],\n", " [0.25983605],\n", " [0.25983605],\n", " [0.78885714],\n", " [0.78885714],\n", " [0.7285124 ],\n", " [0.7285124 ],\n", " [0.94490025],\n", " [0.94490025],\n", " [0.61913981],\n", " [0.61913981],\n", " [0.0486679 ],\n", " [0.0486679 ],\n", " [0.45029643],\n", " [0.45029643],\n", " [0.34801477],\n", " [0.34801477],\n", " [0.91711183],\n", " [0.91711183],\n", " [0.24487605],\n", " [0.24487605],\n", " [0.32663955],\n", " [0.32663955],\n", " [0.02492608],\n", " [0.02492608],\n", " [0.23145667],\n", " [0.23145667],\n", " [0.34212723],\n", " [0.34212723],\n", " [0.5479936 ],\n", " [0.5479936 ],\n", " [0.29172018],\n", " [0.29172018],\n", " [0.16603586],\n", " [0.16603586],\n", " [0.32246468],\n", " [0.32246468],\n", " [0.91298651],\n", " [0.91298651],\n", " [0.62585394],\n", " [0.62585394],\n", " [0.31829665],\n", " [0.31829665],\n", " [0.69378388],\n", " [0.69378388],\n", " [0.21653283],\n", " [0.21653283],\n", " [0.13432642],\n", " [0.13432642],\n", " [0.63437323],\n", " [0.63437323],\n", " [0.37152074],\n", " [0.37152074],\n", " [0.9777009 ],\n", " [0.9777009 ],\n", " [0.88118159],\n", " [0.88118159],\n", " [0.50597168],\n", " [0.50597168],\n", " [0.30776914],\n", " [0.30776914],\n", " [0.81870493],\n", " [0.81870493],\n", " [0.09389452],\n", " [0.09389452],\n", " [0.08229414],\n", " [0.08229414],\n", " [0.2394651 ],\n", " [0.2394651 ],\n", " [0.43591526],\n", " [0.43591526],\n", " [0.87484168],\n", " [0.87484168],\n", " [0.7203893 ],\n", " [0.7203893 ],\n", " [0.3248851 ],\n", " [0.3248851 ],\n", " [0.02897133],\n", " [0.02897133],\n", " [0.26393425],\n", " [0.26393425],\n", " [0.12966987],\n", " [0.12966987],\n", " [0.75482989],\n", " [0.75482989],\n", " [0.31662415],\n", " [0.31662415],\n", " [0.11957325],\n", " [0.11957325],\n", " [0.31395142],\n", " [0.31395142],\n", " [0.5000275 ],\n", " [0.5000275 ],\n", " [0.52358683],\n", " [0.52358683],\n", " [0.95638326],\n", " [0.95638326],\n", " [0.88934205],\n", " [0.88934205],\n", " [0.22408625],\n", " [0.22408625],\n", " [0.65085448],\n", " [0.65085448],\n", " [0.11088959],\n", " [0.11088959],\n", " [0.70819586],\n", " [0.70819586],\n", " [0.74773398],\n", " [0.74773398],\n", " [0.81457819],\n", " [0.81457819],\n", " [0.41284348],\n", " [0.41284348],\n", " [0.1538248 ],\n", " [0.1538248 ],\n", " [0.93380165],\n", " [0.93380165],\n", " [0.68450335],\n", " [0.68450335],\n", " [0.95488727],\n", " [0.95488727],\n", " [0.41080739],\n", " [0.41080739],\n", " [0.90861652],\n", " [0.90861652],\n", " [0.47719486],\n", " [0.47719486]])}}" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ds = load_by_id(run_id_a)\n", "ds.get_parameter_data('x1')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.10" }, "nbsphinx": { "timeout": 600 }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false }, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "window_display": false } }, "nbformat": 4, "nbformat_minor": 4 }