Discrete Search Spaces#
[1]:
from typing import List, Optional
from overrides import overrides
import numpy as np
import torch
from torch import nn
The ArchaiModel
class#
The ArchaiModel
class is a base class used to wrap all model objects. ArchaiModel
also stores an architecture ID (ArchaiModel.archid
) and optionally a metadata dictionary (ArchaiModel.metadata
).
[2]:
from archai.discrete_search.api import ArchaiModel
Let’s first consider a simple PyTorch model
[3]:
class MyModel(nn.Module):
def __init__(self, nb_layers: int = 5, kernel_size: int = 3, hidden_dim: int = 32):
super().__init__()
self.nb_layers = nb_layers
self.kernel_size = kernel_size
self.hidden_dim = hidden_dim
layer_list = []
for i in range(nb_layers):
in_ch = (1 if i == 0 else hidden_dim)
layer_list += [
nn.Conv2d(in_ch, hidden_dim, kernel_size=kernel_size, padding=(kernel_size-1)//2),
nn.BatchNorm2d(hidden_dim),
nn.ReLU(),
]
layer_list += [
nn.AdaptiveAvgPool2d(output_size=(1, 1)),
nn.Conv2d(hidden_dim, 10, kernel_size=1)
]
self.model = nn.Sequential(*layer_list)
def forward(self, x):
return self.model(x).squeeze()
def get_archid(self):
return f'({self.nb_layers}, {self.kernel_size}, {self.hidden_dim})'
[4]:
model_obj = MyModel(nb_layers=2, kernel_size=3, hidden_dim=16)
We can now wrap a MyModel
instance into an ArchaiModel
:
[5]:
model = ArchaiModel(
arch=model_obj,
archid=f'L={model_obj.nb_layers}, K={model_obj.kernel_size}, H={model_obj.hidden_dim}',
metadata={'optional': {'metadata'}}
)
Architecture ids (archid
) are used to identify a unique model architecture. The contents of archid
can be decided by the search space designer, one good approach is to hash the architecture definition into a string. However, to keep things simple, in this example we’ll just use a simple string representing with the three available architecture parameters (L, K and H).
[6]:
model.archid
[6]:
'L=2, K=3, H=16'
[7]:
model.metadata
[7]:
{'optional': {'metadata'}}
[8]:
model.arch
[8]:
MyModel(
(model): Sequential(
(0): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
(3): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(4): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ReLU()
(6): AdaptiveAvgPool2d(output_size=(1, 1))
(7): Conv2d(16, 10, kernel_size=(1, 1), stride=(1, 1))
)
)
Building a Search Space#
Discrete search spaces in Archai are defined using the DiscreteSearchSpace
abstract class:
class DiscreteSearchSpace(EnforceOverrides):
@abstractmethod
def random_sample(self) -> ArchaiModel:
...
@abstractmethod
def save_arch(self, model: ArchaiModel, path: str) -> None:
...
@abstractmethod
def load_arch(self, path: str) -> ArchaiModel:
...
@abstractmethod
def save_model_weights(self, model: ArchaiModel, path: str) -> None:
...
@abstractmethod
def load_model_weights(self, model: ArchaiModel, path: str) -> None:
...
To turn MyModel
into a search space, we need to override the DiscreteSearchSpace
abstract base class:
[9]:
import json
from random import Random
from archai.discrete_search.api import DiscreteSearchSpace
class CNNSearchSpace(DiscreteSearchSpace):
def __init__(self, min_layers: int = 1, max_layers: int = 12,
kernel_list=(1, 3, 5, 7), hidden_list=(16, 32, 64, 128),
seed: int = 1):
self.min_layers = min_layers
self.max_layers = max_layers
self.kernel_list = kernel_list
self.hidden_list = hidden_list
self.rng = Random(seed)
def get_archid(self, model: MyModel) -> str:
return f'L={model.nb_layers}, K={model.kernel_size}, H={model.hidden_dim}'
@overrides
def random_sample(self) -> ArchaiModel:
# Randomly chooses architecture parameters
nb_layers = self.rng.randint(self.min_layers, self.max_layers)
kernel_size = self.rng.choice(self.kernel_list)
hidden_dim = self.rng.choice(self.hidden_list)
model = MyModel(nb_layers, kernel_size, hidden_dim)
# Wraps model into ArchaiModel
return ArchaiModel(arch=model, archid=self.get_archid(model))
@overrides
def save_arch(self, model: ArchaiModel, file: str):
with open(file, 'w') as fp:
json.dump({
'nb_layers': model.arch.nb_layers,
'kernel_size': model.arch.kernel_size,
'hidden_dim': model.arch.hidden_dim
}, fp)
@overrides
def load_arch(self, file: str):
config = json.load(open(file))
model = MyModel(**config)
return ArchaiModel(arch=model, archid=self.get_archid(model))
@overrides
def save_model_weights(self, model: ArchaiModel, file: str):
state_dict = model.arch.get_state_dict()
torch.save(state_dict, file)
@overrides
def load_model_weights(self, model: ArchaiModel, file: str):
model.arch.load_state_dict(torch.load(file))
[10]:
ss = CNNSearchSpace(hidden_list=[32, 64, 128])
Let’s try sampling an architecture
[11]:
m = ss.random_sample()
m
[11]:
ArchaiModel(
archid=L=3, K=1, H=64,
metadata={},
arch=MyModel(
(model): Sequential(
(0): Conv2d(1, 64, kernel_size=(1, 1), stride=(1, 1))
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
(3): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
(4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ReLU()
(6): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
(7): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(8): ReLU()
(9): AdaptiveAvgPool2d(output_size=(1, 1))
(10): Conv2d(64, 10, kernel_size=(1, 1), stride=(1, 1))
)
)
)
Saving an architecture
[12]:
ss.save_arch(m, 'arch.json')
[13]:
open('arch.json').read()
[13]:
'{"nb_layers": 3, "kernel_size": 1, "hidden_dim": 64}'
Loading an architecture without the weights
[14]:
ss.load_arch('arch.json')
[14]:
ArchaiModel(
archid=L=3, K=1, H=64,
metadata={},
arch=MyModel(
(model): Sequential(
(0): Conv2d(1, 64, kernel_size=(1, 1), stride=(1, 1))
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
(3): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
(4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ReLU()
(6): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
(7): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(8): ReLU()
(9): AdaptiveAvgPool2d(output_size=(1, 1))
(10): Conv2d(64, 10, kernel_size=(1, 1), stride=(1, 1))
)
)
)
Making the search space compatible with NAS algorithms#
Search spaces serve as the main interface between NAS algorithms and the application. Different classes of NAS algorithms interact with architectures from the search space using specific abstract classes:
Evolutionary algorithms:#
User must subclass
EvolutionarySearchSpace
and implementEvolutionarySearchSpace.mutate
andEvolutionarySearchSpace.crossover
Bayesian Optimization algorithms:#
User must subclass
BayesOptSearchSpace
and overrideBayesOptSearchSpace.encode
Encode should take an
ArchaiModel
and produce a fixed-length vector representation of that architecture. This numerical representation will be used to train surrogate models.
Example: Making CNNSearchSpace
compatible with NAS algorithsm#
Let’s make our search space compatible with Evolutionary and Bayesian Optimization NAS algorithms. To do that, we need to subclass EvolutionarySearchSpace
and BayesOptSearchSpace
, and implement mutation
, crossover
and encode
method.
[15]:
from archai.discrete_search.api.search_space import EvolutionarySearchSpace, BayesOptSearchSpace
class CNNSearchSpaceExt(CNNSearchSpace, EvolutionarySearchSpace, BayesOptSearchSpace):
''' We are subclassing CNNSearchSpace just to save up space'''
@overrides
def mutate(self, model_1: ArchaiModel) -> ArchaiModel:
config = {
'nb_layers': model_1.arch.nb_layers,
'kernel_size': model_1.arch.kernel_size,
'hidden_dim': model_1.arch.hidden_dim
}
if self.rng.random() < 0.2:
config['nb_layers'] = self.rng.randint(self.min_layers, self.max_layers)
if self.rng.random() < 0.2:
config['kernel_size'] = self.rng.choice(self.kernel_list)
if self.rng.random() < 0.2:
config['hidden_dim'] = self.rng.choice(self.hidden_list)
mutated_model = MyModel(**config)
return ArchaiModel(
arch=mutated_model, archid=self.get_archid(mutated_model)
)
@overrides
def crossover(self, model_list: List[ArchaiModel]) -> ArchaiModel:
new_config = {
'nb_layers': self.rng.choice([m.arch.nb_layers for m in model_list]),
'kernel_size': self.rng.choice([m.arch.kernel_size for m in model_list]),
'hidden_dim': self.rng.choice([m.arch.hidden_dim for m in model_list]),
}
crossover_model = MyModel(**new_config)
return ArchaiModel(
arch=crossover_model, archid=self.get_archid(crossover_model)
)
@overrides
def encode(self, model: ArchaiModel) -> np.ndarray:
return np.array([model.arch.nb_layers, model.arch.kernel_size, model.arch.hidden_dim])
[16]:
ss = CNNSearchSpaceExt(hidden_list=[32, 64, 128])
Now we can generate mutations, crossover and encodings from any architecture of this search space
[17]:
m = ss.random_sample()
m.archid
[17]:
'L=3, K=1, H=64'
[18]:
ss.mutate(m).archid
[18]:
'L=8, K=1, H=64'
[19]:
models = [ss.random_sample() for _ in range(4)]
[print(m.archid) for m in models]
ss.crossover(models).archid
L=4, K=1, H=64
L=1, K=7, H=64
L=10, K=1, H=128
L=8, K=5, H=128
[19]:
'L=1, K=1, H=128'
[20]:
ss.encode(m)
[20]:
array([ 3, 1, 64])
Now we can use CNNSearchSpaceExt
with EA and BO search algorithms
Built-in Search Spaces#
Instead of creating a search space from scratch, Archai has a list of built-in search spaces that can be used for many Machine Learning tasks. A list of built-in search spaces can be found in archai/discrete_search/search_spaces
.
Example: Semantic Segmentation Search Space (SegmentationDagSearchSpace
)
[21]:
from archai.discrete_search.search_spaces.cv import SegmentationDagSearchSpace
ss = SegmentationDagSearchSpace(nb_classes=1, img_size=(64, 64), max_layers=3)
ss.mutate(ss.random_sample())
[21]:
ArchaiModel(
archid=74f66612a0d01c5b7d4702234756b0ee4ffa5abc_64_64,
metadata={'parent': '32fa5956ab3ce9e05bc42836599a8dc9dd53e847_64_64'},
arch=SegmentationDagModel(
(edge_dict): ModuleDict(
(input-output): Block(
(op): Sequential(
(0): NormalConvBlock(
(conv): Conv2d(40, 40, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU()
)
)
)
)
(stem_block): NormalConvBlock(
(conv): Conv2d(3, 40, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
(bn): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU()
)
(up): Upsample(size=(64, 64), mode=nearest)
(post_upsample): Sequential(
(0): NormalConvBlock(
(conv): Conv2d(40, 40, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU()
)
(1): NormalConvBlock(
(conv): Conv2d(40, 40, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU()
)
(2): NormalConvBlock(
(conv): Conv2d(40, 40, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU()
)
)
(classifier): Conv2d(40, 1, kernel_size=(1, 1), stride=(1, 1))
)
)