import numpy as np
import pandas as pd
from pe.embedding import Embedding
from pe.constant.data import IMAGE_DATA_COLUMN_NAME
from pe.logging import execution_logger
def to_uint8(x, min, max):
x = (x - min) / (max - min)
x = np.around(np.clip(x * 255, a_min=0, a_max=255)).astype(np.uint8)
return x
[docs]
class RawPixel(Embedding):
"""Use the raw pixels of images as the embedding."""
[docs]
def compute_embedding(self, data):
"""Extract the raw pixels of images.
:param data: The data object containing the images
:type data: :py:class:`pe.data.Data`
:return: The data object with the computed embedding
:rtype: :py:class:`pe.data.Data`
"""
uncomputed_data = self.filter_uncomputed_rows(data)
if len(uncomputed_data.data_frame) == 0:
execution_logger.info(f"Embedding: {self.column_name} already computed")
return data
execution_logger.info(
f"Embedding: computing {self.column_name} for {len(uncomputed_data.data_frame)}/{len(data.data_frame)}"
" samples"
)
x = np.stack(uncomputed_data.data_frame[IMAGE_DATA_COLUMN_NAME].values, axis=0)
embeddings = np.reshape(x, (x.shape[0], -1))
uncomputed_data.data_frame[self.column_name] = pd.Series(
list(embeddings), index=uncomputed_data.data_frame.index
)
execution_logger.info(
f"Embedding: finished computing {self.column_name} for "
f"{len(uncomputed_data.data_frame)}/{len(data.data_frame)} samples"
)
return self.merge_computed_rows(data, uncomputed_data)