Skip to content

Transforms Module

Classification_Inference_Transform

A transformation class to preprocess images for classification inference. This includes resizing, normalization, and conversion to a tensor.

Source code in PytorchWildlife/data/transforms.py
class Classification_Inference_Transform:
    """
    A transformation class to preprocess images for classification inference.
    This includes resizing, normalization, and conversion to a tensor.
    """
    # Normalization constants
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]

    def __init__(self, target_size=224, **kwargs):
        """
        Initializes the transform.

        Args:
            target_size (int): Desired size for the height and width after resizing.
        """
        # Define the sequence of transformations
        self.trans = transforms.Compose([
            # transforms.Resize((target_size, target_size)),
            transforms.Resize((target_size, target_size), **kwargs),
            transforms.ToTensor(),
            transforms.Normalize(self.mean, self.std)
        ])

    def __call__(self, img) -> torch.Tensor:
        """
        Applies the transformation on the provided image.

        Args:
            img (PIL.Image.Image): Input image in PIL format.

        Returns:
            torch.Tensor: Transformed image.
        """
        img = self.trans(img)
        return img

__call__(img)

Applies the transformation on the provided image.

Parameters:

Name Type Description Default
img Image

Input image in PIL format.

required

Returns:

Type Description
Tensor

torch.Tensor: Transformed image.

Source code in PytorchWildlife/data/transforms.py
def __call__(self, img) -> torch.Tensor:
    """
    Applies the transformation on the provided image.

    Args:
        img (PIL.Image.Image): Input image in PIL format.

    Returns:
        torch.Tensor: Transformed image.
    """
    img = self.trans(img)
    return img

__init__(target_size=224, **kwargs)

Initializes the transform.

Parameters:

Name Type Description Default
target_size int

Desired size for the height and width after resizing.

224
Source code in PytorchWildlife/data/transforms.py
def __init__(self, target_size=224, **kwargs):
    """
    Initializes the transform.

    Args:
        target_size (int): Desired size for the height and width after resizing.
    """
    # Define the sequence of transformations
    self.trans = transforms.Compose([
        # transforms.Resize((target_size, target_size)),
        transforms.Resize((target_size, target_size), **kwargs),
        transforms.ToTensor(),
        transforms.Normalize(self.mean, self.std)
    ])

MegaDetector_v5_Transform

A transformation class to preprocess images for the MegaDetector v5 model. This includes resizing, transposing, and normalization operations. This is a required transformation for the YoloV5 model.

Source code in PytorchWildlife/data/transforms.py
class MegaDetector_v5_Transform:
    """
    A transformation class to preprocess images for the MegaDetector v5 model.
    This includes resizing, transposing, and normalization operations.
    This is a required transformation for the YoloV5 model.

    """

    def __init__(self, target_size=1280, stride=32):
        """
        Initializes the transform.

        Args:
            target_size (int): Desired size for the image's longest side after resizing.
            stride (int): Stride value for resizing.
        """
        self.target_size = target_size
        self.stride = stride

    def __call__(self, np_img) -> torch.Tensor:
        """
        Applies the transformation on the provided image.

        Args:
            np_img (np.ndarray): Input image as a numpy array or PIL Image.

        Returns:
            torch.Tensor: Transformed image.
        """
        # Convert the image to a PyTorch tensor and normalize it
        if isinstance(np_img, np.ndarray):
            np_img = np_img.transpose((2, 0, 1))
            np_img = np.ascontiguousarray(np_img)
            np_img = torch.from_numpy(np_img).float()
            np_img /= 255.0

        # Resize and pad the image using a customized letterbox function. 
        img = letterbox(np_img, new_shape=self.target_size, stride=self.stride, auto=False)

        return img

__call__(np_img)

Applies the transformation on the provided image.

Parameters:

Name Type Description Default
np_img ndarray

Input image as a numpy array or PIL Image.

required

Returns:

Type Description
Tensor

torch.Tensor: Transformed image.

Source code in PytorchWildlife/data/transforms.py
def __call__(self, np_img) -> torch.Tensor:
    """
    Applies the transformation on the provided image.

    Args:
        np_img (np.ndarray): Input image as a numpy array or PIL Image.

    Returns:
        torch.Tensor: Transformed image.
    """
    # Convert the image to a PyTorch tensor and normalize it
    if isinstance(np_img, np.ndarray):
        np_img = np_img.transpose((2, 0, 1))
        np_img = np.ascontiguousarray(np_img)
        np_img = torch.from_numpy(np_img).float()
        np_img /= 255.0

    # Resize and pad the image using a customized letterbox function. 
    img = letterbox(np_img, new_shape=self.target_size, stride=self.stride, auto=False)

    return img

__init__(target_size=1280, stride=32)

Initializes the transform.

Parameters:

Name Type Description Default
target_size int

Desired size for the image's longest side after resizing.

1280
stride int

Stride value for resizing.

32
Source code in PytorchWildlife/data/transforms.py
def __init__(self, target_size=1280, stride=32):
    """
    Initializes the transform.

    Args:
        target_size (int): Desired size for the image's longest side after resizing.
        stride (int): Stride value for resizing.
    """
    self.target_size = target_size
    self.stride = stride

letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=False, scaleFill=False, scaleup=True, stride=32)

Resize and pad an image to a desired shape while keeping the aspect ratio unchanged.

This function is commonly used in object detection tasks to prepare images for models like YOLOv5. It resizes the image to fit into the new shape with the correct aspect ratio and then pads the rest.

Parameters:

Name Type Description Default
im Image or Tensor

The input image. It can be a PIL image or a PyTorch tensor.

required
new_shape tuple

The target size of the image, in the form (height, width). Defaults to (640, 640).

(640, 640)
color tuple

The color used for padding. Defaults to (114, 114, 114).

(114, 114, 114)
auto bool

Adjust padding to ensure the padded image dimensions are a multiple of the stride. Defaults to False.

False
scaleFill bool

If True, scales the image to fill the new shape, ignoring the aspect ratio. Defaults to False.

False
scaleup bool

Allow the function to scale up the image. Defaults to True.

True
stride int

The stride used in the model. The padding is adjusted to be a multiple of this stride. Defaults to 32.

32

Returns:

Type Description
Tensor

torch.Tensor: The transformed image with padding applied.

Source code in PytorchWildlife/data/transforms.py
def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=False, scaleFill=False, scaleup=True, stride=32) -> torch.Tensor:
    """
    Resize and pad an image to a desired shape while keeping the aspect ratio unchanged.

    This function is commonly used in object detection tasks to prepare images for models like YOLOv5. 
    It resizes the image to fit into the new shape with the correct aspect ratio and then pads the rest.

    Args:
        im (PIL.Image.Image or torch.Tensor): The input image. It can be a PIL image or a PyTorch tensor.
        new_shape (tuple, optional): The target size of the image, in the form (height, width). Defaults to (640, 640).
        color (tuple, optional): The color used for padding. Defaults to (114, 114, 114).
        auto (bool, optional): Adjust padding to ensure the padded image dimensions are a multiple of the stride. Defaults to False.
        scaleFill (bool, optional): If True, scales the image to fill the new shape, ignoring the aspect ratio. Defaults to False.
        scaleup (bool, optional): Allow the function to scale up the image. Defaults to True.
        stride (int, optional): The stride used in the model. The padding is adjusted to be a multiple of this stride. Defaults to 32.

    Returns:
        torch.Tensor: The transformed image with padding applied.
    """

    # Convert PIL Image to Torch Tensor

    if isinstance(im, Image.Image):
        im = T.ToTensor()(im)

    # Original shape
    shape = im.shape[1:]  # shape = [height, width]

    # New shape
    if isinstance(new_shape, int):
        new_shape = (new_shape, new_shape)

    # Scale ratio (new / old) and compute padding
    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
    if not scaleup:
        r = min(r, 1.0)

    new_unpad = (int(round(shape[1] * r)), int(round(shape[0] * r)))
    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]

    if auto:
        dw, dh = dw % stride, dh % stride
    elif scaleFill:
        dw, dh = 0, 0
        new_unpad = new_shape
        r = new_shape[1] / shape[1], new_shape[0] / shape[0]

    dw /= 2
    dh /= 2

    # Resize image
    if shape[::-1] != new_unpad:
        resize_transform = T.Resize(new_unpad[::-1], interpolation=T.InterpolationMode.BILINEAR,
                                    antialias=False)
        im = resize_transform(im)

    # Pad image
    padding = (int(round(dw - 0.1)), int(round(dw + 0.1)), int(round(dh + 0.1)), int(round(dh - 0.1)))
    im = F.pad(im*255.0, padding, value=114)/255.0

    return im