# Single Agent: Battleship

## Introduction

This notebook will focus on using `trace` to optimize multiple pieces of code automatically in the context of a Battleship game. 

## Setup and Installation

Let's start by importing the necessary libraries.

In [None]:
!pip install trace-opt

In [1]:
import random
import numpy as np
from opto.trace import bundle, node, Module, GRAPH
from opto.trace.errors import ExecutionError
from opto.trace.bundle import ExceptionNode
from opto.optimizers import OptoPrime
from autogen import config_list_from_json

## Environment Setup

Define the Battleship board creation and environment handling functions.

In [2]:
def create_battleship_board(width, height):
    board = [['.' for _ in range(width)] for _ in range(height)]
    return board

def can_place_ship(board, row, col, size, is_vertical):
    if is_vertical:
        if row + size > len(board):
            return False
        for i in range(size):
            if board[row + i][col] != '.':
                return False
    else:
        if col + size > len(board[0]):
            return False
        for i in range(size):
            if board[row][col + i] != '.':
                return False
    return True

def place_ship(board, row, col, size, is_vertical, ship_symbol):
    if is_vertical:
        for i in range(size):
            board[row + i][col] = ship_symbol
    else:
        for i in range(size):
            board[row][col + i] = ship_symbol

def create_and_fill_battleship_board(width, height, ships, num_each_type=2):
    board = [['.' for _ in range(width)] for _ in range(height)]
    for ship_symbol, size in ships.items():
        for num in range(1, num_each_type + 1):
            placed = False
            while not placed:
                row = random.randint(0, height - 1)
                col = random.randint(0, width - 1)
                is_vertical = random.choice([True, False])
                if can_place_ship(board, row, col, size, is_vertical):
                    place_ship(board, row, col, size, is_vertical, ship_symbol)
                    placed = True
    return board

def check_hit(board, row, col):
    if 0 <= row < len(board) and 0 <= col < len(board[0]):
        if board[row][col] not in ['.', 'O', 'X']:
            board[row][col] = 'X'
            return True
        else:
            if board[row][col] == '.':
                board[row][col] = 'O'
    return False

# Ships to be placed on the board
ships = {
    'C': 5,  # Carrier
    'B': 4,  # Battleship
    'R': 3,  # Cruiser
    'S': 3,  # Submarine
    'D': 2  # Destroyer
}

## Wrap the environment into a BattleshipBoard class

In [3]:
# Define BattleshipBoard class
class BattleshipBoard:
    def __init__(self, width, height, num_each_type=2, exclude_ships=[], init_with_one_hit=False):
        self.width = width
        self.height = height
        self.ships = {s: ships[s] for s in ships if s not in exclude_ships}
        self.board = create_and_fill_battleship_board(width, height, self.ships, num_each_type=num_each_type)
        self.shots = [['.' for _ in range(width)] for _ in range(height)]
        self.hits = 0
        self.misses = 0

        if init_with_one_hit:
            initialized = False
            for row in range(height):
                for col in range(width):
                    if self.board[row][col] != '.':
                        self.check_shot(row, col)
                        initialized = True
                        break
                if initialized:
                    break

    def get_life_points(self):
        return sum(self.ships.values())

    def check_shot(self, row, col):
        is_hit = check_hit(self.board, row, col)
        if is_hit:
            self.hits += 1
            self.shots[row][col] = 'X'
        else:
            self.misses += 1
            if self.shots[row][col] == '.':
                self.shots[row][col] = 'O'
        return is_hit

    def check_terminate(self):
        return (self.hits >= sum(self.ships.values())) or (self.misses + self.hits >= self.width * self.height)

    def get_board(self):
        return self.board

    def get_shots(self):
        return self.shots

    def get_shots_overlay_board(self):
        shots_overlay_board = [[self.board[row][col] if self.shots[row][col] == '.' else self.shots[row][col] for col in range(self.width)] for row in range(self.height)]
        return shots_overlay_board

    def get_hits(self):
        return self.hits

    def get_misses(self):
        return self.misses

    def get_game_status(self):
        if self.hits == sum(self.ships.values()):
            return 'Game Over: All ships sunk!'
        return 'Game in progress'

    def visualize_board(self):
        str_rep = ''
        for row in self.board:
            str_rep += ' '.join(row) + '\n'
        print(str_rep)

    def visualize_own_board(self):
        str_rep = ''
        board = self.get_shots_overlay_board()
        for row in board:
            str_rep += ' '.join(row) + '\n'
        print(str_rep)

    def visualize_shots(self):
        str_rep = ''
        for row in self.shots:
            str_rep += ' '.join(row) + '\n'
        print(str_rep)

## Define a Policy class with multiple trainable functions

In [4]:
# Define Policy class
class Policy(Module):
    def init(self, width, height):
        pass

    def __call__(self, map):
        return self.select_coordinate(map).data

    def select_coordinate(self, map):
        plan = self.reason(map)
        output = self.act(map, plan)
        return output

    @bundle(trainable=True)
    def act(self, map, plan):
        """
        Given a map, select a target coordinate in a game.
        X denotes hits, O denotes misses, and . denotes unknown positions.
        """
        return

    @bundle(trainable=True)
    def reason(self, map):
        """
        Given a map, analyze the board in a game.
        X denotes hits, O denotes misses, and . denotes unknown positions.
        """
        return


## Helper Functions to rollout and evaluate the policy


In [5]:
# Function to get user feedback for placing shot
def user_fb_for_placing_shot(board, coords):
    try:
        reward = board.check_shot(coords[0], coords[1])
        new_map = board.get_shots()
        terminal = board.check_terminate()
        return new_map, reward, terminal, f"Got {int(reward)} reward."
    except Exception as e:
        board.misses += 1
        return board.get_shots(), 0, False, str(e)
    
# Function to rollout policy
def rollout(policy, board):
    rewards = []
    obs = board.get_shots()
    while not board.check_terminate():
        output = policy(obs)
        obs, reward, terminal, feedback = user_fb_for_placing_shot(board, output)
        if terminal:
            break
        rewards.append(reward)
    rewards = np.array(rewards)
    return rewards

# Function to evaluate policy
def eval_policy(policy, board_size, num_each_type, exclude_ships, n_eval_episodes):
    scores = []
    for _ in range(n_eval_episodes):
        board = BattleshipBoard(board_size, board_size, num_each_type=num_each_type, exclude_ships=exclude_ships)
        rewards = rollout(policy, board)
        scores.append(rewards.mean())
    scores = np.array(scores)
    print(f"Scores: {scores.mean()} ({scores.std()})")
    return scores

# Creating the initial policy

Finally, create a `Policy` object and evaluate the performance of the initial code.

In [6]:
# Set parameters
board_size = 5
num_each_type = 1
exclude_ships = ['C']
n_eval_episodes = 3

# Create policy and evaluate
policy = Policy()
init_scores = eval_policy(policy, board_size, num_each_type, exclude_ships, n_eval_episodes)
print("Initial scores:", init_scores)

Scores: 0.0 (0.0)
Initial scores: [0. 0. 0.]


# Putting it all together

Create an optimizer and evaluate the online optimization of the produced codes.

In [7]:
optimizer = OptoPrime(policy.parameters(), memory_size=0, config_list=config_list_from_json("OAI_CONFIG_LIST"))


feedback = ""
# This is an online optimization problem. we have the opportunity to
# keep changing the function with each round of interaction
board = BattleshipBoard(board_size, board_size, num_each_type=num_each_type, exclude_ships=exclude_ships)
obs = node(board.get_shots())  # init observation
i = 0
while i < 4:
    GRAPH.clear()

    try:
        output = policy.select_coordinate(obs)
        obs, reward, terminal, feedback = user_fb_for_placing_shot(board, output.data)  # not traced
    except ExecutionError as e:  # this is a retry
        output = e.exception_node
        feedback = output.data
        reward, terminal = 0, False

    if terminal:
        board = BattleshipBoard(board_size, board_size, num_each_type=num_each_type, exclude_ships=exclude_ships)
        obs = node(board.get_shots())  # init observation

    # Update
    optimizer.zero_feedback()
    optimizer.backward(output, feedback)
    optimizer.step(verbose=True)

    # Logging
    if not isinstance(output, ExceptionNode):
        try:
            returns = eval_policy(policy, board_size, num_each_type, exclude_ships, n_eval_episodes)
            print("Iteration", i, "returns:", returns)
        except Exception:
            pass

        i += 1
    

Prompt
 
You're tasked to solve a coding/algorithm problem. You will see the instruction, the code, the documentation of each function used in the code, and the feedback about the execution result.

Specifically, a problem will be composed of the following parts:
- #Instruction: the instruction which describes the things you need to do or the question you should answer.
- #Code: the code defined in the problem.
- #Documentation: the documentation of each function used in #Code. The explanation might be incomplete and just contain high-level description. You can use the values in #Others to help infer how those functions work.
- #Variables: the input variables that you can change.
- #Constraints: the constraints or descriptions of the variables in #Variables.
- #Inputs: the values of other inputs to the code, which are not changeable.
- #Others: the intermediate values created through the code execution.
- #Outputs: the result of the code output.
- #Feedback: the feedback about the code

This completes the tutorial on using the Trace package for optimizing multiple codes in an episodic setting. Happy optimizing!