Data export and classification

[11]:
import os
import numpy as np
import qcodes as qc
import nanotune as nt

from nanotune.data.export_data import correct_normalizations, export_data, prep_data
from nanotune.classification.classifier import Classifier, METRIC_NAMES
from nanotune.classification.utils import display_metrics

nt_root = os.path.dirname(os.path.dirname(os.path.abspath(nt.__file__)))

Export labelled data to a numpy file

[12]:
export_data('pinchoff', ['device_characterization.db'], db_folder=os.path.join(nt_root, 'data', 'tuning'), filename='test_export_data')

Train classifier with the data saved in the numpy file.

Note that this is just to small demo and that the data in the file is not sufficient for real-life purposes.

[13]:
pinchoff_classifier = Classifier(
        ['test_export_data.npy'],
        'pinchoff',
        data_types=["signal"],
        classifier_type="SVC",
        folder_path=os.path.join(nt_root, 'data', 'tuning'),
    )
[14]:
pinchoff_classifier.train()

Predict the quality of a dataset.

Note that this is again just to show how the code works and that a classifier should not be used to predict data it was trained on.

[15]:
pinchoff_classifier.predict(1203, db_name='device_characterization.db', db_folder=os.path.join(nt_root, 'data', 'tuning'))
[15]:
[array([1])]
[16]:
pinchoff_classifier = Classifier(
        ['pinchoff.npy'],
        'pinchoff',
        data_types=["signal"],
        classifier_type="SVC",
        folder_path=os.path.join(nt_root, 'data', 'training_data'),
    )

We can also compute metrics such as accuracy…

[17]:
metrics_results, metrics_all_iterations = pinchoff_classifier.compute_metrics(save_to_file=False, n_iter=10)

..and display them.

[18]:
display_metrics(metrics_results)
+--------------------------+-----------------+---------------------------+
|          metric          |       mean      |            std            |
+--------------------------+-----------------+---------------------------+
|      accuracy_score      |      0.857      |           0.022           |
|     brier_score_loss     |      0.143      |           0.022           |
|           auc            |      0.899      |           0.026           |
| average_precision_recall |      0.878      |           0.050           |
| average_precision_recall |  [[ 91.1  17.3] |  [[7.84155597 3.79605058] |
|                          |  [ 14.7 100.9]] |  [3.71618084 7.81600921]] |
+--------------------------+-----------------+---------------------------+
[19]:
metrics_results
[19]:
{'n_iter': 10,
 'classifier': 'SVC',
 'category': 'pinchoff',
 'data_files': ['/Users/jana/Documents/code/nanotune/nanotune/data/training_data/pinchoff.npy'],
 'data_types': ['signal'],
 'hyper_parameters': {'C': 1.0,
  'break_ties': False,
  'cache_size': 200,
  'class_weight': None,
  'coef0': 0.0,
  'decision_function_shape': 'ovr',
  'degree': 3,
  'gamma': 'auto',
  'kernel': 'linear',
  'max_iter': -1,
  'probability': True,
  'random_state': None,
  'shrinking': True,
  'tol': 0.001,
  'verbose': False},
 'metric_names': ['accuracy_score',
  'brier_score_loss',
  'auc',
  'average_precision_recall'],
 'elapsed_time [s/iter]': 0.23525290489196776,
 'n_test': 224,
 'n_train': 894,
 'mean_train_time': 0.22599971294403076,
 'std_train_time': 0.04204103765981187,
 'perform_pca': False,
 'scale_pc': False,
 'metadata': {},
 'supp_train_data': None,
 'accuracy_score': {'std': 0.02150284746212909, 'mean': 0.8571428571428571},
 'brier_score_loss': {'std': 0.021502847462129097,
  'mean': 0.14285714285714288},
 'auc': {'std': 0.026156287412055446, 'mean': 0.8992805600465585},
 'average_precision_recall': {'std': 0.049648983017123845,
  'mean': 0.8777369915616033},
 'confusion_matrix': {'std': [[7.841555968046137, 3.7960505792204615],
   [3.7161808352124095, 7.8160092118676525]],
  'mean': [[91.1, 17.3], [14.7, 100.9]]}}
[20]:
for metric_name, value in zip(METRIC_NAMES, metrics_results):
    print(metric_name)
    print(value)
    print('\n')
accuracy_score
n_iter


brier_score_loss
classifier


auc
category


average_precision_recall
data_files


[ ]: