{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Dataset Benchmarking\n",
    "\n",
    "This notebook is a behind-the-scenes benchmarking notebook, mainly for use by developers. The recommended way for users to interact with the dataset is via the `Measurement` object and its associated context manager. See the corresponding notebook for a comprehensive toturial on how to use those."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "from pathlib import Path\n",
    "\n",
    "import numpy as np\n",
    "\n",
    "import qcodes as qc\n",
    "from qcodes.dataset import (\n",
    "    ParamSpec,\n",
    "    initialise_or_create_database_at,\n",
    "    load_or_create_experiment,\n",
    "    new_data_set,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'~/experiments.db'"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "qc.config.core.db_location"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "initialise_or_create_database_at(Path.cwd() / \"benchmarking.db\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Setup"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "benchmarking#the sample is a lie#3@C:\\Users\\jenielse\\experiments.db\n",
       "-------------------------------------------------------------------\n",
       "11-benchmark_data-1--0\n",
       "12-doubledata-2-x,y,z-1000\n",
       "13-singledata-3-y,z,x-100\n",
       "14-zerodata-4-y,z,x-1\n",
       "15-array1Ddata-5-y,z,x-800\n",
       "16-array0Ddata-6-y,z,x-81\n",
       "17-many_data-7-x,y,z-810000\n",
       "68-benchmark_data-8--0\n",
       "69-doubledata-9-y,z,x-1000\n",
       "70-singledata-10-y,z,x-100\n",
       "71-zerodata-11-y,z,x-1\n",
       "72-array1Ddata-12-y,z,x-800\n",
       "73-array0Ddata-13-y,z,x-81\n",
       "74-many_data-14-y,z,x-810000\n",
       "127-benchmark_data-15--0\n",
       "128-doubledata-16-y,z,x-1000\n",
       "129-singledata-17-y,z,x-100\n",
       "130-zerodata-18-y,z,x-1\n",
       "131-array1Ddata-19-y,z,x-800\n",
       "132-array0Ddata-20-y,z,x-811\n",
       "133-many_data-21-y,z,x-810000\n",
       "170-benchmark_data-22--0\n",
       "171-doubledata-23-z,y,x-1000\n",
       "172-singledata-24-z,y,x-100\n",
       "173-zerodata-25-z,y,x-1\n",
       "174-array1Ddata-26-z,y,x-800\n",
       "175-array0Ddata-27-z,y,x-811\n",
       "176-many_data-28-z,y,x-810000\n",
       "229-benchmark_data-29--0\n",
       "230-doubledata-30-x,z,y-1000\n",
       "231-singledata-31-y,z,x-100\n",
       "232-zerodata-32-y,z,x-1\n",
       "233-array1Ddata-33-y,z,x-800\n",
       "234-array0Ddata-34-y,z,x-811\n",
       "235-many_data-35-x,z,y-810000\n",
       "288-benchmark_data-36--0\n",
       "289-doubledata-37-y,z,x-1000\n",
       "290-singledata-38-z,y,x-100\n",
       "291-zerodata-39-z,y,x-1\n",
       "292-array1Ddata-40-z,y,x-800\n",
       "293-array0Ddata-41-z,y,x-811\n",
       "294-many_data-42-y,z,x-810000\n",
       "347-benchmark_data-43--0\n",
       "348-doubledata-44-x,y,z-1000\n",
       "349-singledata-45-y,z,x-100\n",
       "350-zerodata-46-y,z,x-1\n",
       "351-array1Ddata-47-y,z,x-800\n",
       "352-array0Ddata-48-y,z,x-811\n",
       "353-many_data-49-x,y,z-810000\n",
       "406-benchmark_data-50--0\n",
       "407-doubledata-51-y,x,z-1000\n",
       "408-singledata-52-y,x,z-100\n",
       "409-zerodata-53-y,x,z-1\n",
       "410-array1Ddata-54-y,x,z-800\n",
       "411-array0Ddata-55-y,x,z-811\n",
       "412-many_data-56-y,x,z-810000\n",
       "662-benchmark_data-57--0\n",
       "663-doubledata-58-z,y,x-1000\n",
       "664-singledata-59-y,x,z-100\n",
       "665-zerodata-60-y,x,z-1\n",
       "666-array1Ddata-61-y,x,z-800\n",
       "667-array0Ddata-62-y,x,z-811\n",
       "668-many_data-63-z,y,x-810000\n",
       "680-benchmark_data-64--0\n",
       "681-doubledata-65-y,z,x-1000\n",
       "682-singledata-66-y,z,x-100\n",
       "683-zerodata-67-y,z,x-1\n",
       "684-array1Ddata-68-y,z,x-800\n",
       "685-array0Ddata-69-y,z,x-811\n",
       "686-many_data-70-y,z,x-810000"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "exp = load_or_create_experiment(\"benchmarking\", sample_name=\"the sample is a lie\")\n",
    "exp"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now we can create a dataset. Note two things:\n",
    "\n",
    "    - if we don't specfiy a exp_id, but we have an experiment in the experiment container the dataset will go into that one.\n",
    "    - dataset can be created from the experiment object\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "benchmarking#the sample is a lie#3@C:\\Users\\jenielse\\experiments.db\n",
       "-------------------------------------------------------------------\n",
       "11-benchmark_data-1--0\n",
       "12-doubledata-2-x,y,z-1000\n",
       "13-singledata-3-y,z,x-100\n",
       "14-zerodata-4-y,z,x-1\n",
       "15-array1Ddata-5-y,z,x-800\n",
       "16-array0Ddata-6-y,z,x-81\n",
       "17-many_data-7-x,y,z-810000\n",
       "68-benchmark_data-8--0\n",
       "69-doubledata-9-y,z,x-1000\n",
       "70-singledata-10-y,z,x-100\n",
       "71-zerodata-11-y,z,x-1\n",
       "72-array1Ddata-12-y,z,x-800\n",
       "73-array0Ddata-13-y,z,x-81\n",
       "74-many_data-14-y,z,x-810000\n",
       "127-benchmark_data-15--0\n",
       "128-doubledata-16-y,z,x-1000\n",
       "129-singledata-17-y,z,x-100\n",
       "130-zerodata-18-y,z,x-1\n",
       "131-array1Ddata-19-y,z,x-800\n",
       "132-array0Ddata-20-y,z,x-811\n",
       "133-many_data-21-y,z,x-810000\n",
       "170-benchmark_data-22--0\n",
       "171-doubledata-23-z,y,x-1000\n",
       "172-singledata-24-z,y,x-100\n",
       "173-zerodata-25-z,y,x-1\n",
       "174-array1Ddata-26-z,y,x-800\n",
       "175-array0Ddata-27-z,y,x-811\n",
       "176-many_data-28-z,y,x-810000\n",
       "229-benchmark_data-29--0\n",
       "230-doubledata-30-x,z,y-1000\n",
       "231-singledata-31-y,z,x-100\n",
       "232-zerodata-32-y,z,x-1\n",
       "233-array1Ddata-33-y,z,x-800\n",
       "234-array0Ddata-34-y,z,x-811\n",
       "235-many_data-35-x,z,y-810000\n",
       "288-benchmark_data-36--0\n",
       "289-doubledata-37-y,z,x-1000\n",
       "290-singledata-38-z,y,x-100\n",
       "291-zerodata-39-z,y,x-1\n",
       "292-array1Ddata-40-z,y,x-800\n",
       "293-array0Ddata-41-z,y,x-811\n",
       "294-many_data-42-y,z,x-810000\n",
       "347-benchmark_data-43--0\n",
       "348-doubledata-44-x,y,z-1000\n",
       "349-singledata-45-y,z,x-100\n",
       "350-zerodata-46-y,z,x-1\n",
       "351-array1Ddata-47-y,z,x-800\n",
       "352-array0Ddata-48-y,z,x-811\n",
       "353-many_data-49-x,y,z-810000\n",
       "406-benchmark_data-50--0\n",
       "407-doubledata-51-y,x,z-1000\n",
       "408-singledata-52-y,x,z-100\n",
       "409-zerodata-53-y,x,z-1\n",
       "410-array1Ddata-54-y,x,z-800\n",
       "411-array0Ddata-55-y,x,z-811\n",
       "412-many_data-56-y,x,z-810000\n",
       "662-benchmark_data-57--0\n",
       "663-doubledata-58-z,y,x-1000\n",
       "664-singledata-59-y,x,z-100\n",
       "665-zerodata-60-y,x,z-1\n",
       "666-array1Ddata-61-y,x,z-800\n",
       "667-array0Ddata-62-y,x,z-811\n",
       "668-many_data-63-z,y,x-810000\n",
       "680-benchmark_data-64--0\n",
       "681-doubledata-65-y,z,x-1000\n",
       "682-singledata-66-y,z,x-100\n",
       "683-zerodata-67-y,z,x-1\n",
       "684-array1Ddata-68-y,z,x-800\n",
       "685-array0Ddata-69-y,z,x-811\n",
       "686-many_data-70-y,z,x-810000\n",
       "734-benchmark_data-71--0"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataSet = new_data_set(\"benchmark_data\", exp_id=exp.exp_id)\n",
    "exp"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "In this benchmark we will assueme that we are doing a 2D loop and investigate the performance implications of writing to the dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "x_shape = 100\n",
    "y_shape = 100"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Baseline: Generate data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Wall time: 29.6 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "for x in range(x_shape):\n",
    "    for y in range(y_shape):\n",
    "        z = np.random.random_sample(1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "and store in memory"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "x_data = np.zeros((x_shape, y_shape))\n",
    "y_data = np.zeros((x_shape, y_shape))\n",
    "z_data = np.zeros((x_shape, y_shape))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Wall time: 10 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "for x in range(x_shape):\n",
    "    for y in range(y_shape):\n",
    "        x_data[x,y] = x\n",
    "        y_data[x,y] = y\n",
    "        z_data[x,y] = np.random.random_sample()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Add to dataset inside double loop"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "double_dataset = new_data_set(\"doubledata\", exp_id=exp.exp_id,\n",
    "                              specs=[ParamSpec(\"x\", \"numeric\"),\n",
    "                                     ParamSpec(\"y\", \"numeric\"),\n",
    "                                     ParamSpec('z', \"numeric\")])\n",
    "double_dataset.mark_started()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Note that this is so slow that we are only doing a 10th of the computation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Wall time: 3.99 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "for x in range(x_shape//10):\n",
    "    for y in range(y_shape):\n",
    "        double_dataset.add_results([{\"x\": x, 'y': y, 'z': np.random.random_sample()}])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Add the data in outer loop and store as np array"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "single_dataset = new_data_set(\"singledata\", exp_id=exp.exp_id,\n",
    "                              specs=[ParamSpec(\"x\", \"array\"),\n",
    "                                     ParamSpec(\"y\", \"array\"),\n",
    "                                     ParamSpec('z', \"array\")])\n",
    "single_dataset.mark_started()\n",
    "x_data = np.zeros(y_shape)\n",
    "y_data = np.zeros(y_shape)\n",
    "z_data = np.zeros(y_shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Wall time: 521 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "for x in range(x_shape):\n",
    "    for y in range(y_shape):\n",
    "        x_data[y] = x\n",
    "        y_data[y] = y\n",
    "        z_data[y] = np.random.random_sample(1)\n",
    "    single_dataset.add_results([{\"x\": x_data, 'y': y_data, 'z': z_data}])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Save once after loop"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "zero_dataset = new_data_set(\"zerodata\", exp_id=exp.exp_id,\n",
    "                            specs=[ParamSpec(\"x\", \"array\"),\n",
    "                                   ParamSpec(\"y\", \"array\"),\n",
    "                                   ParamSpec('z', \"array\")])\n",
    "zero_dataset.mark_started()\n",
    "x_data = np.zeros((x_shape, y_shape))\n",
    "y_data = np.zeros((x_shape, y_shape))\n",
    "z_data = np.zeros((x_shape, y_shape))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Wall time: 40.3 ms\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "1"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%time\n",
    "for x in range(x_shape):\n",
    "    for y in range(y_shape):\n",
    "        x_data[x,y] = x\n",
    "        y_data[x,y] = y\n",
    "        z_data[x,y] = np.random.random_sample(1)\n",
    "zero_dataset.add_results([{'x':x_data, 'y':y_data, 'z':z_data}])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Array parameter"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "array1D_dataset = new_data_set(\"array1Ddata\", exp_id=exp.exp_id,\n",
    "                               specs=[ParamSpec(\"x\", \"array\"),\n",
    "                                      ParamSpec(\"y\", \"array\"),\n",
    "                                      ParamSpec('z', \"array\")])\n",
    "array1D_dataset.mark_started()\n",
    "y_setpoints = np.arange(y_shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "497 ms ± 61.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
     ]
    }
   ],
   "source": [
    "%%timeit\n",
    "for x in range(x_shape):\n",
    "    x_data[x,:] = x\n",
    "    array1D_dataset.add_results([{'x':x_data[x,:], 'y':y_setpoints, 'z':np.random.random_sample(y_shape)}])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "x_data = np.zeros((x_shape, y_shape))\n",
    "y_data = np.zeros((x_shape, y_shape))\n",
    "z_data = np.zeros((x_shape, y_shape))\n",
    "y_setpoints = np.arange(y_shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "array0D_dataset = new_data_set(\"array0Ddata\", exp_id=exp.exp_id,\n",
    "                               specs=[ParamSpec(\"x\", \"array\"),\n",
    "                                      ParamSpec(\"y\", \"array\"),\n",
    "                                      ParamSpec('z', \"array\")])\n",
    "array0D_dataset.mark_started()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "10.3 ms ± 444 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
     ]
    }
   ],
   "source": [
    "%%timeit\n",
    "for x in range(x_shape):\n",
    "    x_data[x,:] = x\n",
    "    y_data[x,:] = y_setpoints\n",
    "    z_data[x,:] = np.random.random_sample(y_shape)\n",
    "array0D_dataset.add_results([{'x':x_data, 'y':y_data, 'z':z_data}])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Insert many"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = []\n",
    "for i in range(100):\n",
    "    for j in range(100):\n",
    "        data.append({'x': i, 'y':j, 'z':np.random.random_sample()})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "many_Data = new_data_set(\"many_data\", exp_id=exp.exp_id,\n",
    "                         specs=[ParamSpec(\"x\", \"numeric\"),\n",
    "                                ParamSpec(\"y\", \"numeric\"),\n",
    "                                ParamSpec(\"z\", \"numeric\")])\n",
    "many_Data.mark_started()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "43.2 ms ± 2.46 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
     ]
    }
   ],
   "source": [
    "%%timeit\n",
    "many_Data.add_results(data)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.5"
  },
  "nbsphinx": {
   "timeout": 600
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}