Discrete Search Spaces#

[1]:
from typing import List, Optional

from overrides import overrides
import numpy as np
import torch
from torch import nn

The ArchaiModel class#

The ArchaiModel class is a base class used to wrap all model objects. ArchaiModel also stores an architecture ID (ArchaiModel.archid) and optionally a metadata dictionary (ArchaiModel.metadata).

[2]:
from archai.discrete_search.api import ArchaiModel

Let’s first consider a simple PyTorch model

[3]:
class MyModel(nn.Module):
    def __init__(self, nb_layers: int = 5, kernel_size: int = 3, hidden_dim: int = 32):
        super().__init__()

        self.nb_layers = nb_layers
        self.kernel_size = kernel_size
        self.hidden_dim = hidden_dim

        layer_list = []

        for i in range(nb_layers):
            in_ch = (1 if i == 0 else hidden_dim)

            layer_list += [
                nn.Conv2d(in_ch, hidden_dim, kernel_size=kernel_size, padding=(kernel_size-1)//2),
                nn.BatchNorm2d(hidden_dim),
                nn.ReLU(),
            ]

        layer_list += [
            nn.AdaptiveAvgPool2d(output_size=(1, 1)),
            nn.Conv2d(hidden_dim, 10, kernel_size=1)
        ]

        self.model = nn.Sequential(*layer_list)

    def forward(self, x):
        return self.model(x).squeeze()

    def get_archid(self):
        return f'({self.nb_layers}, {self.kernel_size}, {self.hidden_dim})'
[4]:
model_obj = MyModel(nb_layers=2, kernel_size=3, hidden_dim=16)

We can now wrap a MyModel instance into an ArchaiModel:

[5]:
model = ArchaiModel(
    arch=model_obj,
    archid=f'L={model_obj.nb_layers}, K={model_obj.kernel_size}, H={model_obj.hidden_dim}',
    metadata={'optional': {'metadata'}}
)

Architecture ids (archid) are used to identify a unique model architecture. The contents of archid can be decided by the search space designer, one good approach is to hash the architecture definition into a string. However, to keep things simple, in this example we’ll just use a simple string representing with the three available architecture parameters (L, K and H).

[6]:
model.archid
[6]:
'L=2, K=3, H=16'
[7]:
model.metadata
[7]:
{'optional': {'metadata'}}
[8]:
model.arch
[8]:
MyModel(
  (model): Sequential(
    (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): AdaptiveAvgPool2d(output_size=(1, 1))
    (7): Conv2d(16, 10, kernel_size=(1, 1), stride=(1, 1))
  )
)

Building a Search Space#

Discrete search spaces in Archai are defined using the DiscreteSearchSpace abstract class:

class DiscreteSearchSpace(EnforceOverrides):

    @abstractmethod
    def random_sample(self) -> ArchaiModel:
        ...

    @abstractmethod
    def save_arch(self, model: ArchaiModel, path: str) -> None:
        ...

    @abstractmethod
    def load_arch(self, path: str) -> ArchaiModel:
        ...

    @abstractmethod
    def save_model_weights(self, model: ArchaiModel, path: str) -> None:
        ...

    @abstractmethod
    def load_model_weights(self, model: ArchaiModel, path: str) -> None:
        ...

To turn MyModel into a search space, we need to override the DiscreteSearchSpace abstract base class:

[9]:
import json
from random import Random
from archai.discrete_search.api import DiscreteSearchSpace


class CNNSearchSpace(DiscreteSearchSpace):
    def __init__(self, min_layers: int = 1, max_layers: int = 12,
                 kernel_list=(1, 3, 5, 7), hidden_list=(16, 32, 64, 128),
                 seed: int = 1):

        self.min_layers = min_layers
        self.max_layers = max_layers
        self.kernel_list = kernel_list
        self.hidden_list = hidden_list

        self.rng = Random(seed)

    def get_archid(self, model: MyModel) -> str:
        return f'L={model.nb_layers}, K={model.kernel_size}, H={model.hidden_dim}'

    @overrides
    def random_sample(self) -> ArchaiModel:
        # Randomly chooses architecture parameters
        nb_layers = self.rng.randint(self.min_layers, self.max_layers)
        kernel_size = self.rng.choice(self.kernel_list)
        hidden_dim = self.rng.choice(self.hidden_list)

        model = MyModel(nb_layers, kernel_size, hidden_dim)

        # Wraps model into ArchaiModel
        return ArchaiModel(arch=model, archid=self.get_archid(model))

    @overrides
    def save_arch(self, model: ArchaiModel, file: str):
        with open(file, 'w') as fp:
            json.dump({
                'nb_layers': model.arch.nb_layers,
                'kernel_size': model.arch.kernel_size,
                'hidden_dim': model.arch.hidden_dim
            }, fp)

    @overrides
    def load_arch(self, file: str):
        config = json.load(open(file))
        model = MyModel(**config)

        return ArchaiModel(arch=model, archid=self.get_archid(model))

    @overrides
    def save_model_weights(self, model: ArchaiModel, file: str):
        state_dict = model.arch.get_state_dict()
        torch.save(state_dict, file)

    @overrides
    def load_model_weights(self, model: ArchaiModel, file: str):
        model.arch.load_state_dict(torch.load(file))

[10]:
ss = CNNSearchSpace(hidden_list=[32, 64, 128])

Let’s try sampling an architecture

[11]:
m = ss.random_sample()
m
[11]:
ArchaiModel(
        archid=L=3, K=1, H=64,
        metadata={},
        arch=MyModel(
  (model): Sequential(
    (0): Conv2d(1, 64, kernel_size=(1, 1), stride=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
    (7): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): ReLU()
    (9): AdaptiveAvgPool2d(output_size=(1, 1))
    (10): Conv2d(64, 10, kernel_size=(1, 1), stride=(1, 1))
  )
)
)

Saving an architecture

[12]:
ss.save_arch(m, 'arch.json')
[13]:
open('arch.json').read()
[13]:
'{"nb_layers": 3, "kernel_size": 1, "hidden_dim": 64}'

Loading an architecture without the weights

[14]:
ss.load_arch('arch.json')
[14]:
ArchaiModel(
        archid=L=3, K=1, H=64,
        metadata={},
        arch=MyModel(
  (model): Sequential(
    (0): Conv2d(1, 64, kernel_size=(1, 1), stride=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
    (7): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): ReLU()
    (9): AdaptiveAvgPool2d(output_size=(1, 1))
    (10): Conv2d(64, 10, kernel_size=(1, 1), stride=(1, 1))
  )
)
)

Making the search space compatible with NAS algorithms#

Search spaces serve as the main interface between NAS algorithms and the application. Different classes of NAS algorithms interact with architectures from the search space using specific abstract classes:

Evolutionary algorithms:#

  • User must subclass EvolutionarySearchSpace and implement EvolutionarySearchSpace.mutate and EvolutionarySearchSpace.crossover

Bayesian Optimization algorithms:#

  • User must subclass BayesOptSearchSpace and override BayesOptSearchSpace.encode

  • Encode should take an ArchaiModel and produce a fixed-length vector representation of that architecture. This numerical representation will be used to train surrogate models.

Example: Making CNNSearchSpace compatible with NAS algorithsm#

Let’s make our search space compatible with Evolutionary and Bayesian Optimization NAS algorithms. To do that, we need to subclass EvolutionarySearchSpace and BayesOptSearchSpace, and implement mutation, crossover and encode method.

[15]:
from archai.discrete_search.api.search_space import EvolutionarySearchSpace, BayesOptSearchSpace

class CNNSearchSpaceExt(CNNSearchSpace, EvolutionarySearchSpace, BayesOptSearchSpace):
    ''' We are subclassing CNNSearchSpace just to save up space'''

    @overrides
    def mutate(self, model_1: ArchaiModel) -> ArchaiModel:
        config = {
            'nb_layers': model_1.arch.nb_layers,
            'kernel_size': model_1.arch.kernel_size,
            'hidden_dim': model_1.arch.hidden_dim
        }

        if self.rng.random() < 0.2:
            config['nb_layers'] = self.rng.randint(self.min_layers, self.max_layers)

        if self.rng.random() < 0.2:
            config['kernel_size'] = self.rng.choice(self.kernel_list)

        if self.rng.random() < 0.2:
            config['hidden_dim'] = self.rng.choice(self.hidden_list)

        mutated_model = MyModel(**config)

        return ArchaiModel(
            arch=mutated_model, archid=self.get_archid(mutated_model)
        )

    @overrides
    def crossover(self, model_list: List[ArchaiModel]) -> ArchaiModel:
        new_config = {
            'nb_layers': self.rng.choice([m.arch.nb_layers for m in model_list]),
            'kernel_size': self.rng.choice([m.arch.kernel_size for m in model_list]),
            'hidden_dim': self.rng.choice([m.arch.hidden_dim for m in model_list]),
        }

        crossover_model = MyModel(**new_config)

        return ArchaiModel(
            arch=crossover_model, archid=self.get_archid(crossover_model)
        )

    @overrides
    def encode(self, model: ArchaiModel) -> np.ndarray:
        return np.array([model.arch.nb_layers, model.arch.kernel_size, model.arch.hidden_dim])
[16]:
ss = CNNSearchSpaceExt(hidden_list=[32, 64, 128])

Now we can generate mutations, crossover and encodings from any architecture of this search space

[17]:
m = ss.random_sample()
m.archid
[17]:
'L=3, K=1, H=64'
[18]:
ss.mutate(m).archid
[18]:
'L=8, K=1, H=64'
[19]:
models = [ss.random_sample() for _ in range(4)]
[print(m.archid) for m in models]
ss.crossover(models).archid
L=4, K=1, H=64
L=1, K=7, H=64
L=10, K=1, H=128
L=8, K=5, H=128
[19]:
'L=1, K=1, H=128'
[20]:
ss.encode(m)
[20]:
array([ 3,  1, 64])

Now we can use CNNSearchSpaceExt with EA and BO search algorithms

Built-in Search Spaces#

Instead of creating a search space from scratch, Archai has a list of built-in search spaces that can be used for many Machine Learning tasks. A list of built-in search spaces can be found in archai/discrete_search/search_spaces.

Example: Semantic Segmentation Search Space (SegmentationDagSearchSpace)

[21]:
from archai.discrete_search.search_spaces.cv import SegmentationDagSearchSpace

ss = SegmentationDagSearchSpace(nb_classes=1, img_size=(64, 64), max_layers=3)
ss.mutate(ss.random_sample())
[21]:
ArchaiModel(
        archid=74f66612a0d01c5b7d4702234756b0ee4ffa5abc_64_64,
        metadata={'parent': '32fa5956ab3ce9e05bc42836599a8dc9dd53e847_64_64'},
        arch=SegmentationDagModel(
  (edge_dict): ModuleDict(
    (input-output): Block(
      (op): Sequential(
        (0): NormalConvBlock(
          (conv): Conv2d(40, 40, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (bn): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU()
        )
      )
    )
  )
  (stem_block): NormalConvBlock(
    (conv): Conv2d(3, 40, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (bn): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU()
  )
  (up): Upsample(size=(64, 64), mode=nearest)
  (post_upsample): Sequential(
    (0): NormalConvBlock(
      (conv): Conv2d(40, 40, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU()
    )
    (1): NormalConvBlock(
      (conv): Conv2d(40, 40, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU()
    )
    (2): NormalConvBlock(
      (conv): Conv2d(40, 40, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU()
    )
  )
  (classifier): Conv2d(40, 1, kernel_size=(1, 1), stride=(1, 1))
)
)