Presidio Structured API Reference

presidio_structured

presidio-structured root module.

JsonAnalysisBuilder

Bases: AnalysisBuilder

Concrete configuration generator for JSON data.

METHOD	DESCRIPTION
`generate_analysis`	Generate a configuration from the given JSON data.

Source code in presidio_structured/analysis_builder.py

class JsonAnalysisBuilder(AnalysisBuilder):
    """Concrete configuration generator for JSON data."""

    def generate_analysis(
        self,
        data: Dict,
        language: str = "en",
    ) -> StructuredAnalysis:
        """
        Generate a configuration from the given JSON data.

        :param data: The input JSON data.
        :return: The generated configuration.
        """
        logger.debug("Starting JSON BatchAnalyzer analysis")
        analyzer_results = self.batch_analyzer.analyze_dict(
            input_dict=data,
            language=language,
            n_process=self.n_process,
            batch_size=self.batch_size
        )

        key_recognizer_result_map = self._generate_analysis_from_results_json(
            analyzer_results
        )

        key_entity_map = {
            key: result.entity_type for key, result in key_recognizer_result_map.items()
        }

        return StructuredAnalysis(entity_mapping=key_entity_map)

    def _generate_analysis_from_results_json(
        self, analyzer_results: Iterator[DictAnalyzerResult], prefix: str = ""
    ) -> Dict[str, RecognizerResult]:
        """
        Generate a configuration from the given analyzer results. Always uses the first recognizer result if there are more than one.

        :param analyzer_results: The analyzer results.
        :param prefix: The prefix for the configuration keys.
        :return: The generated configuration.
        """  # noqa: E501
        key_recognizer_result_map = {}

        if not isinstance(analyzer_results, Iterable):
            logger.debug(
                "No analyzer results found, returning empty StructuredAnalysis"
            )
            return key_recognizer_result_map

        for result in analyzer_results:
            current_key = prefix + result.key

            if isinstance(result.value, dict) and isinstance(
                result.recognizer_results, Iterator
            ):
                nested_mappings = self._generate_analysis_from_results_json(
                    result.recognizer_results, prefix=current_key + "."
                )
                key_recognizer_result_map.update(nested_mappings)
            first_recognizer_result = next(iter(result.recognizer_results), None)
            if isinstance(first_recognizer_result, RecognizerResult):
                logger.debug(
                    f"Found result with entity {first_recognizer_result.entity_type} \
                        in {current_key}"
                )
                key_recognizer_result_map[current_key] = first_recognizer_result
        return key_recognizer_result_map

generate_analysis

generate_analysis(data: Dict, language: str = 'en') -> StructuredAnalysis

Generate a configuration from the given JSON data.

PARAMETER	DESCRIPTION
`data`	The input JSON data. TYPE: `Dict`

RETURNS	DESCRIPTION
`StructuredAnalysis`	The generated configuration.

Source code in presidio_structured/analysis_builder.py

def generate_analysis(
    self,
    data: Dict,
    language: str = "en",
) -> StructuredAnalysis:
    """
    Generate a configuration from the given JSON data.

    :param data: The input JSON data.
    :return: The generated configuration.
    """
    logger.debug("Starting JSON BatchAnalyzer analysis")
    analyzer_results = self.batch_analyzer.analyze_dict(
        input_dict=data,
        language=language,
        n_process=self.n_process,
        batch_size=self.batch_size
    )

    key_recognizer_result_map = self._generate_analysis_from_results_json(
        analyzer_results
    )

    key_entity_map = {
        key: result.entity_type for key, result in key_recognizer_result_map.items()
    }

    return StructuredAnalysis(entity_mapping=key_entity_map)

PandasAnalysisBuilder

Bases: TabularAnalysisBuilder

Concrete configuration generator for tabular data.

METHOD	DESCRIPTION
`generate_analysis`	Generate a configuration from the given tabular data.

Source code in presidio_structured/analysis_builder.py

class PandasAnalysisBuilder(TabularAnalysisBuilder):
    """Concrete configuration generator for tabular data."""

    entity_selection_strategies = {"highest_confidence", "mixed", "most_common"}

    def generate_analysis(
        self,
        df: DataFrame,
        n: Optional[int] = None,
        language: str = "en",
        selection_strategy: str = "most_common",
        mixed_strategy_threshold: float = 0.5,
    ) -> StructuredAnalysis:
        """
        Generate a configuration from the given tabular data.

        :param df: The input tabular data (dataframe).
        :param n: The number of samples to be taken from the dataframe.
        :param language: The language to be used for analysis.
        :param selection_strategy: A string that specifies the entity selection strategy
        ('highest_confidence', 'mixed', or default to most common).
        :param mixed_strategy_threshold: A float value for the threshold to be used in
        the entity selection mixed strategy.
        :return: A StructuredAnalysis object containing the analysis results.
        """
        if not n:
            n = len(df)
        elif n > len(df):
            logger.debug(
                f"Number of samples ({n}) is larger than the number of rows \
                    ({len(df)}), using all rows"
            )
            n = len(df)

        df = df.sample(n, random_state=123)

        key_recognizer_result_map = self._generate_key_rec_results_map(
            df, language, selection_strategy, mixed_strategy_threshold
        )

        key_entity_map = {
            key: result.entity_type
            for key, result in key_recognizer_result_map.items()
            if result.entity_type != NON_PII_ENTITY_TYPE
        }

        return StructuredAnalysis(entity_mapping=key_entity_map)

    def _generate_key_rec_results_map(
        self,
        df: DataFrame,
        language: str,
        selection_strategy: str = "most_common",
        mixed_strategy_threshold: float = 0.5,
    ) -> Dict[str, RecognizerResult]:
        """
        Find the most common entity in a dataframe column.

        If more than one entity is found in a cell, the first one is used.

        :param df: The dataframe where entities will be searched.
        :param language: Language to be used in the analysis engine.
        :param selection_strategy: A string that specifies the entity selection strategy
        ('highest_confidence', 'mixed', or default to most common).
        :param mixed_strategy_threshold: A float value for the threshold to be used in
        the entity selection mixed strategy.
        :return: A dictionary mapping column names to the most common RecognizerResult.
        """
        column_analyzer_results_map = self._batch_analyze_df(df, language)
        key_recognizer_result_map = {}
        for column, analyzer_result in column_analyzer_results_map.items():
            key_recognizer_result_map[column] = self._find_entity_based_on_strategy(
                analyzer_result, selection_strategy, mixed_strategy_threshold
            )
        return key_recognizer_result_map

    def _batch_analyze_df(
        self, df: DataFrame, language: str
    ) -> Dict[str, List[List[RecognizerResult]]]:
        """
        Analyze each column in the dataframe for entities using the batch analyzer.

        :param df: The dataframe to be analyzed.
        :param language: The language configuration for the analyzer.
        :return: A dictionary mapping each column name to a \
            list of lists of RecognizerResults.
        """
        column_analyzer_results_map = {}
        for column in df.columns:
            logger.debug(f"Finding most common PII entity for column {column}")
            analyzer_results = self.batch_analyzer.analyze_iterator(
                [val for val in df[column]],
                language=language,
                n_process=self.n_process,
                batch_size=self.batch_size
            )
            column_analyzer_results_map[column] = analyzer_results

        return column_analyzer_results_map

    def _find_entity_based_on_strategy(
        self,
        analyzer_results: List[List[RecognizerResult]],
        selection_strategy: str,
        mixed_strategy_threshold: float,
    ) -> RecognizerResult:
        """
        Determine the most suitable entity based on the specified selection strategy.

        :param analyzer_results: A nested list of RecognizerResult objects from the
        analysis results.
        :param selection_strategy: A string that specifies the entity selection strategy
        ('highest_confidence', 'mixed', or default to most common).
        :return: A RecognizerResult object representing the selected entity based on the
        given strategy.
        """
        if selection_strategy not in self.entity_selection_strategies:
            raise ValueError(
                f"Unsupported entity selection strategy: {selection_strategy}."
            )

        if not any(analyzer_results):
            return RecognizerResult(
                entity_type=NON_PII_ENTITY_TYPE, start=0, end=1, score=1.0
            )

        flat_results = self._flatten_results(analyzer_results)

        # Select the entity based on the desired strategy
        if selection_strategy == "highest_confidence":
            return self._select_highest_confidence_entity(flat_results)
        elif selection_strategy == "mixed":
            return self._select_mixed_strategy_entity(
                flat_results, mixed_strategy_threshold
            )

        return self._select_most_common_entity(flat_results)

    def _select_most_common_entity(self, flat_results):
        """
        Select the most common entity from the flattened analysis results.

        :param flat_results: A list of tuples containing index and RecognizerResult
        objects from the flattened analysis results.
        :return: A RecognizerResult object for the most commonly found entity type.
        """
        # Count occurrences of each entity type
        type_counter = Counter(res.entity_type for _, res in flat_results)
        most_common_type, most_common_count = type_counter.most_common(1)[0]

        # Calculate the score as the proportion of occurrences
        score = most_common_count / len(flat_results)

        return RecognizerResult(
            entity_type=most_common_type, start=0, end=1, score=score
        )

    def _select_highest_confidence_entity(self, flat_results):
        """
        Select the entity with the highest confidence score.

        :param flat_results: A list of tuples containing index and RecognizerResult
        objects from the flattened analysis results.
        :return: A RecognizerResult object for the entity with the highest confidence
        score.
        """
        score_aggregator = self._aggregate_scores(flat_results)

        # Find the highest score across all entities
        highest_score = max(
            max(scores) for scores in score_aggregator.values() if scores
        )

        # Find the entities with the highest score and count their occurrences
        entities_highest_score = {
            entity: scores.count(highest_score)
            for entity, scores in score_aggregator.items()
            if highest_score in scores
        }

        # Find the entity(ies) with the most number of high scores
        max_occurrences = max(entities_highest_score.values())
        highest_confidence_entities = [
            entity
            for entity, count in entities_highest_score.items()
            if count == max_occurrences
        ]

        return RecognizerResult(
            entity_type=highest_confidence_entities[0],
            start=0,
            end=1,
            score=highest_score,
        )

    def _select_mixed_strategy_entity(self, flat_results, mixed_strategy_threshold):
        """
        Select an entity using a mixed strategy.

        Chooses an entity based on the highest confidence score if it is above the
        threshold. Otherwise, it defaults to the most common entity.

        :param flat_results: A list of tuples containing index and RecognizerResult
        objects from the flattened analysis results.
        :return: A RecognizerResult object selected based on the mixed strategy.
        """
        # Check if mixed strategy threshold is within the valid range
        if not 0 <= mixed_strategy_threshold <= 1:
            raise ValueError(
                f"Invalid mixed strategy threshold: {mixed_strategy_threshold}."
            )

        score_aggregator = self._aggregate_scores(flat_results)

        # Check if the highest score is greater than threshold and select accordingly
        highest_score = max(
            max(scores) for scores in score_aggregator.values() if scores
        )
        if highest_score > mixed_strategy_threshold:
            return self._select_highest_confidence_entity(flat_results)
        else:
            return self._select_most_common_entity(flat_results)

    @staticmethod
    def _aggregate_scores(flat_results):
        """
        Aggregate the scores for each entity type from the flattened analysis results.

        :param flat_results: A list of tuples containing index and RecognizerResult
        objects from the flattened analysis results.
        :return: A dictionary with entity types as keys and lists of scores as values.
        """
        score_aggregator = {}
        for _, res in flat_results:
            if res.entity_type not in score_aggregator:
                score_aggregator[res.entity_type] = []
            score_aggregator[res.entity_type].append(res.score)
        return score_aggregator

    @staticmethod
    def _flatten_results(analyzer_results):
        """
        Flattens a nested lists of RecognizerResult objects into a list of tuples.

        :param analyzer_results: A nested list of RecognizerResult objects from
        the analysis results.
        :return: A flattened list of tuples containing index and RecognizerResult
        objects.
        """
        return [
            (cell_idx, res)
            for cell_idx, cell_results in enumerate(analyzer_results)
            for res in cell_results
        ]

generate_analysis

generate_analysis(
    df: DataFrame,
    n: Optional[int] = None,
    language: str = "en",
    selection_strategy: str = "most_common",
    mixed_strategy_threshold: float = 0.5,
) -> StructuredAnalysis

Generate a configuration from the given tabular data.

PARAMETER	DESCRIPTION
`df`	The input tabular data (dataframe). TYPE: `DataFrame`
`n`	The number of samples to be taken from the dataframe. TYPE: `Optional[int]` DEFAULT: `None`
`language`	The language to be used for analysis. TYPE: `str` DEFAULT: `'en'`
`selection_strategy`	A string that specifies the entity selection strategy ('highest_confidence', 'mixed', or default to most common). TYPE: `str` DEFAULT: `'most_common'`
`mixed_strategy_threshold`	A float value for the threshold to be used in the entity selection mixed strategy. TYPE: `float` DEFAULT: `0.5`

RETURNS	DESCRIPTION
`StructuredAnalysis`	A StructuredAnalysis object containing the analysis results.

Source code in presidio_structured/analysis_builder.py

def generate_analysis(
    self,
    df: DataFrame,
    n: Optional[int] = None,
    language: str = "en",
    selection_strategy: str = "most_common",
    mixed_strategy_threshold: float = 0.5,
) -> StructuredAnalysis:
    """
    Generate a configuration from the given tabular data.

    :param df: The input tabular data (dataframe).
    :param n: The number of samples to be taken from the dataframe.
    :param language: The language to be used for analysis.
    :param selection_strategy: A string that specifies the entity selection strategy
    ('highest_confidence', 'mixed', or default to most common).
    :param mixed_strategy_threshold: A float value for the threshold to be used in
    the entity selection mixed strategy.
    :return: A StructuredAnalysis object containing the analysis results.
    """
    if not n:
        n = len(df)
    elif n > len(df):
        logger.debug(
            f"Number of samples ({n}) is larger than the number of rows \
                ({len(df)}), using all rows"
        )
        n = len(df)

    df = df.sample(n, random_state=123)

    key_recognizer_result_map = self._generate_key_rec_results_map(
        df, language, selection_strategy, mixed_strategy_threshold
    )

    key_entity_map = {
        key: result.entity_type
        for key, result in key_recognizer_result_map.items()
        if result.entity_type != NON_PII_ENTITY_TYPE
    }

    return StructuredAnalysis(entity_mapping=key_entity_map)

StructuredAnalysis `dataclass`

Dataclass containing entity analysis from structured data.

Currently, this class only contains entity mapping.

param entity_mapping : dict. Mapping column/key names to entity types, e.g., { "person.name": "PERSON", "person.address": "LOCATION" }

Source code in presidio_structured/config/structured_analysis.py

@dataclass
class StructuredAnalysis:
    """
    Dataclass containing entity analysis from structured data.

    Currently, this class only contains entity mapping.

    param entity_mapping : dict. Mapping column/key names to entity types, e.g., {
        "person.name": "PERSON",
        "person.address": "LOCATION"
        }
    """

    entity_mapping: Dict[str, str]

CsvReader

Bases: ReaderBase

Reader for reading csv files.

Usage::

reader = CsvReader()
data = reader.read(path="filepath.csv")

METHOD	DESCRIPTION
`read`	Read csv file to pandas dataframe.

Source code in presidio_structured/data/data_reader.py

class CsvReader(ReaderBase):
    """
    Reader for reading csv files.

    Usage::

        reader = CsvReader()
        data = reader.read(path="filepath.csv")

    """

    def read(self, path: Union[str, Path], **kwargs) -> pd.DataFrame:
        """
        Read csv file to pandas dataframe.

        :param path: String defining the location of the csv file to read.
        :return: Pandas DataFrame with the data read from the csv file.
        """
        return pd.read_csv(path, **kwargs)

read

read(path: Union[str, Path], **kwargs) -> pd.DataFrame

Read csv file to pandas dataframe.

PARAMETER	DESCRIPTION
`path`	String defining the location of the csv file to read. TYPE: `Union[str, Path]`

RETURNS	DESCRIPTION
`DataFrame`	Pandas DataFrame with the data read from the csv file.

Source code in presidio_structured/data/data_reader.py

def read(self, path: Union[str, Path], **kwargs) -> pd.DataFrame:
    """
    Read csv file to pandas dataframe.

    :param path: String defining the location of the csv file to read.
    :return: Pandas DataFrame with the data read from the csv file.
    """
    return pd.read_csv(path, **kwargs)

JsonDataProcessor

Bases: DataProcessorBase

JSON Data Processor, Supports arbitrary nesting of dictionaries and lists.

METHOD	DESCRIPTION
`operate`	Perform operations over the text using the operators, as per the structured analysis.

Source code in presidio_structured/data/data_processors.py

class JsonDataProcessor(DataProcessorBase):
    """JSON Data Processor, Supports arbitrary nesting of dictionaries and lists."""

    @staticmethod
    def _get_nested_value(data: Union[Dict, List, None], path: List[str]) -> Any:
        """
        Recursively retrieves the value from nested data using a given path.

        :param data: Nested data (list or dictionary).
        :param path: List of keys/indexes representing the path.
        :return: Retrieved value.
        """
        for i, key in enumerate(path):
            if isinstance(data, list):
                if key.isdigit():
                    data = data[int(key)]
                else:
                    return [
                        JsonDataProcessor._get_nested_value(item, path[i:])
                        for item in data
                    ]
            elif isinstance(data, dict):
                data = data.get(key)
            else:
                return data
        return data

    @staticmethod
    def _set_nested_value(data: Union[Dict, List], path: List[str], value: Any) -> None:
        """
        Recursively sets a value in nested data using a given path.

        :param data: Nested data (JSON-like).
        :param path: List of keys/indexes representing the path.
        :param value: Value to be set.
        """
        for i, key in enumerate(path):
            if isinstance(data, list):
                if i + 1 < len(path) and path[i + 1].isdigit():
                    idx = int(path[i + 1])
                    while len(data) <= idx:
                        data.append({})
                    data = data[idx]
                    continue
                else:
                    for item in data:
                        JsonDataProcessor._set_nested_value(item, path[i:], value)
                    return
            elif isinstance(data, dict):
                if i == len(path) - 1:
                    data[key] = value
                else:
                    data = data.setdefault(key, {})

    def _process(
        self,
        data: Union[Dict, List],
        key_to_operator_mapping: Dict[str, Callable],
    ) -> Union[Dict, List]:
        """
        Operates on the given JSON-like data based on the provided configuration.

        :param data: JSON-like data to be operated on.
        :param key_to_operator_mapping: maps keys to Callable operators.
        :return: JSON-like data after the operation.
        """

        if not isinstance(data, (dict, list)):
            raise ValueError("Data must be a JSON-like object")

        for key, operator_callable in key_to_operator_mapping.items():
            self.logger.debug(f"Operating on key {key}")
            keys = key.split(".")
            if isinstance(data, list):
                for item in data:
                    self._process(item, key_to_operator_mapping)
            else:
                text_to_operate_on = self._get_nested_value(data, keys)
                if text_to_operate_on:
                    if isinstance(text_to_operate_on, list):
                        for text in text_to_operate_on:
                            operated_text = self._operate_on_text(
                                text, operator_callable
                            )
                            self._set_nested_value(data, keys, operated_text)
                    else:
                        operated_text = self._operate_on_text(
                            text_to_operate_on, operator_callable
                        )
                        self._set_nested_value(data, keys, operated_text)
        return data

operate

operate(
    data: Any,
    structured_analysis: StructuredAnalysis,
    operators: Dict[str, OperatorConfig],
) -> Any

Perform operations over the text using the operators, as per the structured analysis.

PARAMETER	DESCRIPTION
`data`	Data to be operated on. TYPE: `Any`
`structured_analysis`	Analysis schema as per the structured data. TYPE: `StructuredAnalysis`
`operators`	Dictionary containing operator configuration objects. TYPE: `Dict[str, OperatorConfig]`

RETURNS	DESCRIPTION
`Any`	Data after being operated upon.

Source code in presidio_structured/data/data_processors.py

def operate(
    self,
    data: Any,
    structured_analysis: StructuredAnalysis,
    operators: Dict[str, OperatorConfig],
) -> Any:
    """
    Perform operations over the text using the operators, as per the structured analysis.

    :param data: Data to be operated on.
    :param structured_analysis: Analysis schema as per the structured data.
    :param operators: Dictionary containing operator configuration objects.
    :return: Data after being operated upon.
    """  # noqa: E501
    key_to_operator_mapping = self._generate_operator_mapping(
        structured_analysis, operators
    )
    return self._process(data, key_to_operator_mapping)

JsonReader

Bases: ReaderBase

Reader for reading json files.

Usage::

reader = JsonReader()
data = reader.read(path="filepath.json")

METHOD	DESCRIPTION
`read`	Read json file to dict.

Source code in presidio_structured/data/data_reader.py

class JsonReader(ReaderBase):
    """
    Reader for reading json files.

    Usage::

        reader = JsonReader()
        data = reader.read(path="filepath.json")

    """

    def read(self, path: Union[str, Path], **kwargs) -> Dict[str, Any]:
        """
        Read json file to dict.

        :param path: String defining the location of the json file to read.
        :return: dictionary with the data read from the json file.
        """
        with open(path) as f:
            data = json.load(f, **kwargs)
        return data

read

read(path: Union[str, Path], **kwargs) -> Dict[str, Any]

Read json file to dict.

PARAMETER	DESCRIPTION
`path`	String defining the location of the json file to read. TYPE: `Union[str, Path]`

RETURNS	DESCRIPTION
`Dict[str, Any]`	dictionary with the data read from the json file.

Source code in presidio_structured/data/data_reader.py

def read(self, path: Union[str, Path], **kwargs) -> Dict[str, Any]:
    """
    Read json file to dict.

    :param path: String defining the location of the json file to read.
    :return: dictionary with the data read from the json file.
    """
    with open(path) as f:
        data = json.load(f, **kwargs)
    return data

PandasDataProcessor

Bases: DataProcessorBase

Pandas Data Processor.

METHOD	DESCRIPTION
`operate`	Perform operations over the text using the operators, as per the structured analysis.

Source code in presidio_structured/data/data_processors.py

class PandasDataProcessor(DataProcessorBase):
    """Pandas Data Processor."""

    def _process(
        self, data: DataFrame, key_to_operator_mapping: Dict[str, Callable]
    ) -> DataFrame:
        """
        Operates on the given pandas DataFrame based on the provided operators.

        :param data: DataFrame to be operated on.
        :param key_to_operator_mapping: Mapping of keys to operator callables.
        :return: DataFrame after the operation.
        """

        if not isinstance(data, DataFrame):
            raise ValueError("Data must be a pandas DataFrame")

        for key, operator_callable in key_to_operator_mapping.items():
            self.logger.debug(f"Operating on column {key}")
            for row in data.itertuples(index=True):
                text_to_operate_on = getattr(row, key)
                operated_text = self._operate_on_text(
                    text_to_operate_on, operator_callable
                )
                data.at[row.Index, key] = operated_text
        return data

operate

operate(
    data: Any,
    structured_analysis: StructuredAnalysis,
    operators: Dict[str, OperatorConfig],
) -> Any

Perform operations over the text using the operators, as per the structured analysis.

PARAMETER	DESCRIPTION
`data`	Data to be operated on. TYPE: `Any`
`structured_analysis`	Analysis schema as per the structured data. TYPE: `StructuredAnalysis`
`operators`	Dictionary containing operator configuration objects. TYPE: `Dict[str, OperatorConfig]`

RETURNS	DESCRIPTION
`Any`	Data after being operated upon.

Source code in presidio_structured/data/data_processors.py

def operate(
    self,
    data: Any,
    structured_analysis: StructuredAnalysis,
    operators: Dict[str, OperatorConfig],
) -> Any:
    """
    Perform operations over the text using the operators, as per the structured analysis.

    :param data: Data to be operated on.
    :param structured_analysis: Analysis schema as per the structured data.
    :param operators: Dictionary containing operator configuration objects.
    :return: Data after being operated upon.
    """  # noqa: E501
    key_to_operator_mapping = self._generate_operator_mapping(
        structured_analysis, operators
    )
    return self._process(data, key_to_operator_mapping)

StructuredEngine

Class to implement methods for anonymizing tabular data.

METHOD	DESCRIPTION
`anonymize`	Anonymize the given data using the given configuration.

Source code in presidio_structured/structured_engine.py

class StructuredEngine:
    """Class to implement methods for anonymizing tabular data."""

    def __init__(self, data_processor: Optional[DataProcessorBase] = None) -> None:
        """
        Initialize the class with a data processor.

        :param data_processor: Instance of DataProcessorBase.
        """
        if data_processor is None:
            self.data_processor = PandasDataProcessor()
        else:
            self.data_processor = data_processor

        self.logger = logging.getLogger("presidio-structured")

    def anonymize(
        self,
        data: Union[Dict, DataFrame],
        structured_analysis: StructuredAnalysis,
        operators: Union[Dict[str, OperatorConfig], None] = None,
    ) -> Union[Dict, DataFrame]:
        """
        Anonymize the given data using the given configuration.

        :param data: input data as dictionary or pandas DataFrame.
        :param structured_analysis: structured analysis configuration.
        :param operators: a dictionary of operator configurations, optional.
        :return: Anonymized dictionary or DataFrame.
        """
        self.logger.debug("Starting anonymization")
        operators = self.__check_or_add_default_operator(operators)

        return self.data_processor.operate(data, structured_analysis, operators)

    def __check_or_add_default_operator(
        self, operators: Union[Dict[str, OperatorConfig], None]
    ) -> Dict[str, OperatorConfig]:
        """
        Check if the provided operators dictionary has a default operator. If not, add a default operator.

        :param operators: dictionary of operator configurations.
        :return: operators dictionary with the default operator added \
            if it was not initially present.
        """  # noqa: E501
        default_operator = OperatorConfig(DEFAULT)
        if not operators:
            self.logger.debug("No operators provided, using default operator")
            return {"DEFAULT": default_operator}
        if not operators.get("DEFAULT"):
            self.logger.debug("No default operator provided, using default operator")
            operators["DEFAULT"] = default_operator
        return operators

anonymize

anonymize(
    data: Union[Dict, DataFrame],
    structured_analysis: StructuredAnalysis,
    operators: Union[Dict[str, OperatorConfig], None] = None,
) -> Union[Dict, DataFrame]

Anonymize the given data using the given configuration.

PARAMETER	DESCRIPTION
`data`	input data as dictionary or pandas DataFrame. TYPE: `Union[Dict, DataFrame]`
`structured_analysis`	structured analysis configuration. TYPE: `StructuredAnalysis`
`operators`	a dictionary of operator configurations, optional. TYPE: `Union[Dict[str, OperatorConfig], None]` DEFAULT: `None`

RETURNS	DESCRIPTION
`Union[Dict, DataFrame]`	Anonymized dictionary or DataFrame.

Source code in presidio_structured/structured_engine.py

def anonymize(
    self,
    data: Union[Dict, DataFrame],
    structured_analysis: StructuredAnalysis,
    operators: Union[Dict[str, OperatorConfig], None] = None,
) -> Union[Dict, DataFrame]:
    """
    Anonymize the given data using the given configuration.

    :param data: input data as dictionary or pandas DataFrame.
    :param structured_analysis: structured analysis configuration.
    :param operators: a dictionary of operator configurations, optional.
    :return: Anonymized dictionary or DataFrame.
    """
    self.logger.debug("Starting anonymization")
    operators = self.__check_or_add_default_operator(operators)

    return self.data_processor.operate(data, structured_analysis, operators)

handler: python

Presidio Structured API Reference