Quick Start
Setting up a single-step model¶
Let's start by creating a test molecule and querying LocalRetro for proposed reactions.
from syntheseus.interface.molecule import Molecule
from syntheseus.reaction_prediction.inference import LocalRetroModel
test_mol = Molecule("Cc1ccc(-c2ccc(C)cc2)cc1")
model = LocalRetroModel()
Note that we didn't provide a path to the model checkpoint, so syntheseus
will download a default checkpoint trained on USPTO-50K and cache it for later use. This behaviour can be overriden by providing a model_dir
argument.
print(model.model_dir)
/home/krmaziar/.cache/torch/syntheseus/LocalRetro_backward
Now let's print the top 5 predictions for our test molecule.
def mols_to_str(mols) -> str:
return " + ".join([mol.smiles for mol in mols])
def print_results(results) -> None:
for idx, prediction in enumerate(results):
print(f"{idx + 1}: " + mols_to_str(prediction.output))
[results] = model([test_mol], num_results=5)
print_results(results)
1: Cc1ccc(B(O)O)cc1 + Cc1ccc(Br)cc1 2: Cc1ccc(B(O)O)cc1 + Cc1ccc(I)cc1 3: Cc1ccc(B(O)O)cc1 + Cc1ccc(Br)cc1 4: Cc1ccc(B(O)O)cc1 + Cc1ccc(I)cc1 5: Cc1ccc(Br)cc1 + Cc1ccc([Mg+])cc1
The outputs from the underlying model contain duplicates. We can use a higher-level utility function to get unique outputs as well as timing.
from syntheseus.cli.eval_single_step import get_results
results_with_timing = get_results(
model, inputs=[test_mol], num_results=5, measure_time=True
)
print_results(results_with_timing.results[0])
time_taken = results_with_timing.model_timing_results.time_model_call
print(f"\nTime taken by model call: {time_taken:.2f}s")
1: Cc1ccc(B(O)O)cc1 + Cc1ccc(Br)cc1 2: Cc1ccc(B(O)O)cc1 + Cc1ccc(I)cc1 3: Cc1ccc(Br)cc1 + Cc1ccc([Mg+])cc1 Time taken by model call: 0.04s
As syntheseus
sets up all single-step models in a consistent way, it's easy to run several models and compare their outputs.
from syntheseus.reaction_prediction.inference import *
models = [
ChemformerModel(),
Graph2EditsModel(),
LocalRetroModel(),
MEGANModel(),
MHNreactModel(),
RetroKNNModel(),
RootAlignedModel(),
]
for model in models:
# When interested in very few predictions (e.g. one), it may be
# useful to set `num_results > 1`, as this will cause e.g.
# larger beam size for models based on beam search.
[results] = get_results(model, [test_mol], num_results=5).results
top_prediction = results[0].output
print(f"{model.name + ':':12} {mols_to_str(top_prediction)}")
Chemformer: Cc1ccc(Br)cc1 + Cc1ccc(Br)cc1 Graph2Edits: Cc1ccc(Br)cc1 + Cc1ccc([Sn](C)(C)C)cc1 LocalRetro: Cc1ccc(B(O)O)cc1 + Cc1ccc(Br)cc1 MEGAN: Cc1ccc(Br)cc1 + Cc1ccc([Mg+])cc1 MHNreact: Cc1ccc(Br)cc1 + Cc1ccc([Mg+])cc1 RetroKNN: Cc1ccc(B(O)O)cc1 + Cc1ccc(Br)cc1 RootAligned: Cc1ccc(Br)cc1 + Cc1ccc([Mg+])cc1
Running search¶
To run multi-step search we need three things:
- a reaction model
- an inventory of purchasable (building block) molecules
- a search algorithm
We can use any of the single-step models shown above, but they need to be wrapped to make them usable in search.
from syntheseus.reaction_prediction.utils.syntheseus_wrapper import (
SyntheseusBackwardReactionModel,
)
search_model = SyntheseusBackwardReactionModel(
model=LocalRetroModel(), num_results=10
)
from syntheseus.search.mol_inventory import SmilesListInventory
from syntheseus.search.algorithms.breadth_first import (
AndOr_BreadthFirstSearch
)
# Dummy inventory with just two purchasable molecules.
inventory = SmilesListInventory(
smiles_list=["Cc1ccc(B(O)O)cc1", "O=Cc1ccc(I)cc1"]
)
search_algorithm = AndOr_BreadthFirstSearch(
reaction_model=search_model,
mol_inventory=inventory,
limit_iterations=100, # max number of algorithm iterations
limit_reaction_model_calls=100, # max number of model calls
time_limit_s=60.0 # max runtime in seconds
)
output_graph, _ = search_algorithm.run_from_mol(test_mol)
print(f"Explored {len(output_graph)} nodes")
Explored 1256 nodes
The resulting graph contains all the explored molecules and reactions, some of which might have led to complete routes while others remained unsolved. From that we can extract complete routes.
from syntheseus.search.analysis.route_extraction import (
iter_routes_time_order,
)
from syntheseus.search.graph.and_or import AndNode
# Extract the routes simply in the order they were found.
routes = list(iter_routes_time_order(output_graph, max_routes=10))
for idx, route in enumerate(routes):
num_reactions = len({n for n in route if isinstance(n, AndNode)})
print(f"Route {idx + 1} consists of {num_reactions} reactions")
Route 1 consists of 2 reactions Route 2 consists of 3 reactions
We can use visualization utilities to get a quick look at the routes found.
from syntheseus.search.visualization import visualize_andor
for idx, route in enumerate(routes):
visualize_andor(
output_graph, filename=f"route_{idx + 1}.pdf", nodes=route
)
The contents of the files route_{1, 2}.pdf
should look roughly like the below.