Presidio Anonymizer API Reference

presidio_anonymizer

Anonymizer root module.

AnonymizerEngine

Bases: EngineBase

AnonymizerEngine class.

Handles the entire logic of the Presidio-anonymizer. Gets the original text and replaces the PII entities with the desired anonymizers.

METHOD	DESCRIPTION
`anonymize`	Anonymize method to anonymize the given text.
`add_anonymizer`	Add a new anonymizer to the engine.
`remove_anonymizer`	Remove an anonymizer from the engine.
`get_anonymizers`	Return a list of supported anonymizers.

Source code in presidio_anonymizer/anonymizer_engine.py

class AnonymizerEngine(EngineBase):
    """
    AnonymizerEngine class.

    Handles the entire logic of the Presidio-anonymizer. Gets the original text
    and replaces the PII entities with the desired anonymizers.
    """

    def anonymize(
        self,
        text: str,
        analyzer_results: List[RecognizerResult],
        operators: Optional[Dict[str, OperatorConfig]] = None,
        conflict_resolution: ConflictResolutionStrategy = (
            ConflictResolutionStrategy.MERGE_SIMILAR_OR_CONTAINED
        ),
    ) -> EngineResult:
        """Anonymize method to anonymize the given text.

        :param text: the text we are anonymizing
        :param analyzer_results: A list of RecognizerResult class -> The results we
        received from the analyzer
        :param operators: The configuration of the anonymizers we would like
        to use for each entity e.g.: {"PHONE_NUMBER":OperatorConfig("redact", {})}
        received from the analyzer
        :param conflict_resolution: The configuration designed to handle conflicts
        among entities
        :return: the anonymized text and a list of information about the
        anonymized entities.

        :example:

        >>> from presidio_anonymizer import AnonymizerEngine
        >>> from presidio_anonymizer.entities import RecognizerResult, OperatorConfig

        >>> # Initialize the engine with logger.
        >>> engine = AnonymizerEngine()

        >>> # Invoke the anonymize function with the text, analyzer results and
        >>> # Operators to define the anonymization type.
        >>> result = engine.anonymize(
        >>>     text="My name is Bond, James Bond",
        >>>     analyzer_results=[RecognizerResult(entity_type="PERSON",
        >>>                                        start=11,
        >>>                                        end=15,
        >>>                                        score=0.8),
        >>>                       RecognizerResult(entity_type="PERSON",
        >>>                                        start=17,
        >>>                                        end=27,
        >>>                                        score=0.8)],
        >>>     operators={"PERSON": OperatorConfig("replace", {"new_value": "BIP"})}
        >>> )

        >>> print(result)
        text: My name is BIP, BIP.
        items:
        [
            {'start': 16, 'end': 19, 'entity_type': 'PERSON',
             'text': 'BIP', 'operator': 'replace'},
            {'start': 11, 'end': 14, 'entity_type': 'PERSON',
             'text': 'BIP', 'operator': 'replace'}
        ]


        """
        analyzer_results = self._remove_conflicts_and_get_text_manipulation_data(
            analyzer_results, conflict_resolution
        )

        merged_results = self._merge_entities_with_whitespace_between(
            text, analyzer_results
        )

        operators = self.__check_or_add_default_operator(operators)

        return self._operate(
            text=text,
            pii_entities=merged_results,
            operators_metadata=operators,
            operator_type=OperatorType.Anonymize,
        )

    def add_anonymizer(self, anonymizer_cls: Type[Operator]) -> None:
        """
        Add a new anonymizer to the engine.

        anonymizer_cls: The anonymizer class to add to the engine.
        """
        logger.info(f"Added anonymizer {anonymizer_cls.__name__}")
        self.operators_factory.add_anonymize_operator(anonymizer_cls)

    def remove_anonymizer(self, anonymizer_cls: Type[Operator]) -> None:
        """
        Remove an anonymizer from the engine.

        anonymizer_cls: The anonymizer class to remove from the engine.
        """
        logger.info(f"Removed anonymizer {anonymizer_cls.__name__}")
        self.operators_factory.remove_anonymize_operator(anonymizer_cls)

    def _remove_conflicts_and_get_text_manipulation_data(
        self,
        analyzer_results: List[RecognizerResult],
        conflict_resolution: ConflictResolutionStrategy,
    ) -> List[RecognizerResult]:
        """
        Iterate the list and create a sorted unique results list from it.

        Only insert results which are:
        1. Indices are not contained in other result.
        2. Have the same indices as other results but with larger score.
        :return: List
        """
        tmp_analyzer_results = []
        # This list contains all elements which we need to check a single result
        # against. If a result is dropped, it can also be dropped from this list
        # since it is intersecting with another result and we selected the other one.
        other_elements = analyzer_results.copy()
        for result in analyzer_results:
            other_elements.remove(result)

            is_merge_same_entity_type = False
            for other_element in other_elements:
                if other_element.entity_type != result.entity_type:
                    continue
                if result.intersects(other_element) == 0:
                    continue

                other_element.start = min(result.start, other_element.start)
                other_element.end = max(result.end, other_element.end)
                other_element.score = max(result.score, other_element.score)
                is_merge_same_entity_type = True
                break
            if not is_merge_same_entity_type:
                other_elements.append(result)
                tmp_analyzer_results.append(result)
            else:
                self.logger.debug(
                    f"removing element {result} from " f"results list due to merge"
                )

        unique_text_metadata_elements = []
        # This list contains all elements which we need to check a single result
        # against. If a result is dropped, it can also be dropped from this list
        # since it is intersecting with another result and we selected the other one.
        other_elements = tmp_analyzer_results.copy()
        for result in tmp_analyzer_results:
            other_elements.remove(result)
            result_conflicted = self.__is_result_conflicted_with_other_elements(
                other_elements, result
            )
            if not result_conflicted:
                other_elements.append(result)
                unique_text_metadata_elements.append(result)
            else:
                self.logger.debug(
                    f"removing element {result} from results list due to conflict"
                )

        # This further improves the quality of handling the conflict between the
        # various entities overlapping. This will not drop the results insted
        # it adjust the start and end positions of overlapping results and removes
        # All types of conflicts among entities as well as text.
        if conflict_resolution == ConflictResolutionStrategy.REMOVE_INTERSECTIONS:
            unique_text_metadata_elements.sort(key=lambda element: element.start)
            elements_length = len(unique_text_metadata_elements)
            index = 0
            while index < elements_length - 1:
                current_entity = unique_text_metadata_elements[index]
                next_entity = unique_text_metadata_elements[index + 1]
                if current_entity.end <= next_entity.start:
                    index += 1
                else:
                    if current_entity.score >= next_entity.score:
                        next_entity.start = current_entity.end
                    else:
                        current_entity.end = next_entity.start
                    unique_text_metadata_elements.sort(
                        key=lambda element: element.start
                    )
            unique_text_metadata_elements = [
                element
                for element in unique_text_metadata_elements
                if element.start <= element.end
            ]
        return unique_text_metadata_elements

    def _merge_entities_with_whitespace_between(
        self, text: str, analyzer_results: List[RecognizerResult]
    ) -> List[RecognizerResult]:
        """Merge adjacent entities of the same type separated by whitespace."""
        merged_results = []
        prev_result = None
        for result in analyzer_results:
            if prev_result is not None:
                if prev_result.entity_type == result.entity_type:
                    if re.search(r"^( )+$", text[prev_result.end : result.start]):
                        merged_results.remove(prev_result)
                        result.start = prev_result.start
            merged_results.append(result)
            prev_result = result
        return merged_results

    def get_anonymizers(self) -> List[str]:
        """Return a list of supported anonymizers."""
        names = [p for p in self.operators_factory.get_anonymizers().keys()]
        return names

    @staticmethod
    def __is_result_conflicted_with_other_elements(other_elements, result):
        return any(
            [result.has_conflict(other_element) for other_element in other_elements]
        )

    @staticmethod
    def __check_or_add_default_operator(
        operators: Dict[str, OperatorConfig],
    ) -> Dict[str, OperatorConfig]:
        default_operator = OperatorConfig(DEFAULT)
        if not operators:
            return {"DEFAULT": default_operator}
        if not operators.get("DEFAULT"):
            operators["DEFAULT"] = default_operator
        return operators

anonymize

anonymize(
    text: str,
    analyzer_results: List[RecognizerResult],
    operators: Optional[Dict[str, OperatorConfig]] = None,
    conflict_resolution: ConflictResolutionStrategy = ConflictResolutionStrategy.MERGE_SIMILAR_OR_CONTAINED,
) -> EngineResult

Anonymize method to anonymize the given text.

:example:

from presidio_anonymizer import AnonymizerEngine from presidio_anonymizer.entities import RecognizerResult, OperatorConfig

Initialize the engine with logger.

engine = AnonymizerEngine()

Invoke the anonymize function with the text, analyzer results and

Operators to define the anonymization type.

result = engine.anonymize( text="My name is Bond, James Bond", analyzer_results=[RecognizerResult(entity_type="PERSON", start=11, end=15, score=0.8), RecognizerResult(entity_type="PERSON", start=17, end=27, score=0.8)], operators={"PERSON": OperatorConfig("replace", {"new_value": "BIP"})} )

print(result) text: My name is BIP, BIP. items: [ {'start': 16, 'end': 19, 'entity_type': 'PERSON', 'text': 'BIP', 'operator': 'replace'}, {'start': 11, 'end': 14, 'entity_type': 'PERSON', 'text': 'BIP', 'operator': 'replace'} ]

PARAMETER	DESCRIPTION
`text`	the text we are anonymizing TYPE: `str`
`analyzer_results`	A list of RecognizerResult class -> The results we received from the analyzer TYPE: `List[RecognizerResult]`
`operators`	The configuration of the anonymizers we would like to use for each entity e.g.: {"PHONE_NUMBER":OperatorConfig("redact", {})} received from the analyzer TYPE: `Optional[Dict[str, OperatorConfig]]` DEFAULT: `None`
`conflict_resolution`	The configuration designed to handle conflicts among entities TYPE: `ConflictResolutionStrategy` DEFAULT: `MERGE_SIMILAR_OR_CONTAINED`

RETURNS	DESCRIPTION
`EngineResult`	the anonymized text and a list of information about the anonymized entities.

Source code in presidio_anonymizer/anonymizer_engine.py

def anonymize(
    self,
    text: str,
    analyzer_results: List[RecognizerResult],
    operators: Optional[Dict[str, OperatorConfig]] = None,
    conflict_resolution: ConflictResolutionStrategy = (
        ConflictResolutionStrategy.MERGE_SIMILAR_OR_CONTAINED
    ),
) -> EngineResult:
    """Anonymize method to anonymize the given text.

    :param text: the text we are anonymizing
    :param analyzer_results: A list of RecognizerResult class -> The results we
    received from the analyzer
    :param operators: The configuration of the anonymizers we would like
    to use for each entity e.g.: {"PHONE_NUMBER":OperatorConfig("redact", {})}
    received from the analyzer
    :param conflict_resolution: The configuration designed to handle conflicts
    among entities
    :return: the anonymized text and a list of information about the
    anonymized entities.

    :example:

    >>> from presidio_anonymizer import AnonymizerEngine
    >>> from presidio_anonymizer.entities import RecognizerResult, OperatorConfig

    >>> # Initialize the engine with logger.
    >>> engine = AnonymizerEngine()

    >>> # Invoke the anonymize function with the text, analyzer results and
    >>> # Operators to define the anonymization type.
    >>> result = engine.anonymize(
    >>>     text="My name is Bond, James Bond",
    >>>     analyzer_results=[RecognizerResult(entity_type="PERSON",
    >>>                                        start=11,
    >>>                                        end=15,
    >>>                                        score=0.8),
    >>>                       RecognizerResult(entity_type="PERSON",
    >>>                                        start=17,
    >>>                                        end=27,
    >>>                                        score=0.8)],
    >>>     operators={"PERSON": OperatorConfig("replace", {"new_value": "BIP"})}
    >>> )

    >>> print(result)
    text: My name is BIP, BIP.
    items:
    [
        {'start': 16, 'end': 19, 'entity_type': 'PERSON',
         'text': 'BIP', 'operator': 'replace'},
        {'start': 11, 'end': 14, 'entity_type': 'PERSON',
         'text': 'BIP', 'operator': 'replace'}
    ]


    """
    analyzer_results = self._remove_conflicts_and_get_text_manipulation_data(
        analyzer_results, conflict_resolution
    )

    merged_results = self._merge_entities_with_whitespace_between(
        text, analyzer_results
    )

    operators = self.__check_or_add_default_operator(operators)

    return self._operate(
        text=text,
        pii_entities=merged_results,
        operators_metadata=operators,
        operator_type=OperatorType.Anonymize,
    )

add_anonymizer

add_anonymizer(anonymizer_cls: Type[Operator]) -> None

Add a new anonymizer to the engine.

anonymizer_cls: The anonymizer class to add to the engine.

Source code in presidio_anonymizer/anonymizer_engine.py

def add_anonymizer(self, anonymizer_cls: Type[Operator]) -> None:
    """
    Add a new anonymizer to the engine.

    anonymizer_cls: The anonymizer class to add to the engine.
    """
    logger.info(f"Added anonymizer {anonymizer_cls.__name__}")
    self.operators_factory.add_anonymize_operator(anonymizer_cls)

remove_anonymizer

remove_anonymizer(anonymizer_cls: Type[Operator]) -> None

Remove an anonymizer from the engine.

anonymizer_cls: The anonymizer class to remove from the engine.

Source code in presidio_anonymizer/anonymizer_engine.py

def remove_anonymizer(self, anonymizer_cls: Type[Operator]) -> None:
    """
    Remove an anonymizer from the engine.

    anonymizer_cls: The anonymizer class to remove from the engine.
    """
    logger.info(f"Removed anonymizer {anonymizer_cls.__name__}")
    self.operators_factory.remove_anonymize_operator(anonymizer_cls)

get_anonymizers

get_anonymizers() -> List[str]

Return a list of supported anonymizers.

Source code in presidio_anonymizer/anonymizer_engine.py

def get_anonymizers(self) -> List[str]:
    """Return a list of supported anonymizers."""
    names = [p for p in self.operators_factory.get_anonymizers().keys()]
    return names

BatchAnonymizerEngine

BatchAnonymizerEngine class.

A class that provides functionality to anonymize in batches.

PARAMETER	DESCRIPTION
`anonymizer_engine`	An instance of the AnonymizerEngine class. TYPE: `Optional[AnonymizerEngine]` DEFAULT: `None`

METHOD	DESCRIPTION
`anonymize_list`	Anonymize a list of strings.
`anonymize_dict`	Anonymize values in a dictionary.

Source code in presidio_anonymizer/batch_anonymizer_engine.py

class BatchAnonymizerEngine:
    """
    BatchAnonymizerEngine class.

    A class that provides functionality to anonymize in batches.
    :param anonymizer_engine: An instance of the AnonymizerEngine class.
    """

    def __init__(self, anonymizer_engine: Optional[AnonymizerEngine] = None):
        self.anonymizer_engine = anonymizer_engine or AnonymizerEngine()

    def anonymize_list(
        self,
        texts: List[Optional[Union[str, bool, int, float]]],
        recognizer_results_list: List[List[RecognizerResult]],
        **kwargs,
    ) -> List[Union[str, Any]]:
        """
        Anonymize a list of strings.

        :param texts: List containing the texts to be anonymized (original texts).
            Items with a `type` not in `(str, bool, int, float)` will not be anonymized.
        :param recognizer_results_list: A list of lists of RecognizerResult,
        the output of the AnalyzerEngine on each text in the list.
        :param kwargs: Additional kwargs for the `AnonymizerEngine.anonymize` method
        """
        return_list = []
        if not recognizer_results_list:
            recognizer_results_list = [[] for _ in range(len(texts))]
        for text, recognizer_results in zip(texts, recognizer_results_list):
            if type(text) in (str, bool, int, float):
                res = self.anonymizer_engine.anonymize(
                    text=str(text), analyzer_results=recognizer_results, **kwargs
                )
                return_list.append(res.text)
            else:
                return_list.append(text)

        return return_list

    def anonymize_dict(
        self, analyzer_results: Iterable[DictRecognizerResult], **kwargs
    ) -> Dict[str, str]:
        """
        Anonymize values in a dictionary.

        :param analyzer_results: Iterator of `DictRecognizerResult`
        containing the output of the AnalyzerEngine.analyze_dict on the input text.
        :param kwargs: Additional kwargs for the `AnonymizerEngine.anonymize` method
        """

        return_dict = {}
        for result in analyzer_results:
            if isinstance(result.value, dict):
                resp = self.anonymize_dict(
                    analyzer_results=result.recognizer_results, **kwargs
                )
                return_dict[result.key] = resp

            elif isinstance(result.value, str):
                resp = self.anonymizer_engine.anonymize(
                    text=result.value,
                    analyzer_results=result.recognizer_results,
                    **kwargs,
                )
                return_dict[result.key] = resp.text

            elif isinstance(result.value, collections.abc.Iterable):
                anonymize_response = self.anonymize_list(
                    texts=result.value,
                    recognizer_results_list=result.recognizer_results,
                    **kwargs,
                )
                return_dict[result.key] = anonymize_response
            else:
                return_dict[result.key] = result.value
        return return_dict

anonymize_list

anonymize_list(
    texts: List[Optional[Union[str, bool, int, float]]],
    recognizer_results_list: List[List[RecognizerResult]],
    **kwargs
) -> List[Union[str, Any]]

Anonymize a list of strings.

PARAMETER	DESCRIPTION
`texts`	List containing the texts to be anonymized (original texts). Items with a `type` not in `(str, bool, int, float)` will not be anonymized. TYPE: `List[Optional[Union[str, bool, int, float]]]`
`recognizer_results_list`	A list of lists of RecognizerResult, the output of the AnalyzerEngine on each text in the list. TYPE: `List[List[RecognizerResult]]`
`kwargs`	Additional kwargs for the `AnonymizerEngine.anonymize` method DEFAULT: `{}`

Source code in presidio_anonymizer/batch_anonymizer_engine.py

def anonymize_list(
    self,
    texts: List[Optional[Union[str, bool, int, float]]],
    recognizer_results_list: List[List[RecognizerResult]],
    **kwargs,
) -> List[Union[str, Any]]:
    """
    Anonymize a list of strings.

    :param texts: List containing the texts to be anonymized (original texts).
        Items with a `type` not in `(str, bool, int, float)` will not be anonymized.
    :param recognizer_results_list: A list of lists of RecognizerResult,
    the output of the AnalyzerEngine on each text in the list.
    :param kwargs: Additional kwargs for the `AnonymizerEngine.anonymize` method
    """
    return_list = []
    if not recognizer_results_list:
        recognizer_results_list = [[] for _ in range(len(texts))]
    for text, recognizer_results in zip(texts, recognizer_results_list):
        if type(text) in (str, bool, int, float):
            res = self.anonymizer_engine.anonymize(
                text=str(text), analyzer_results=recognizer_results, **kwargs
            )
            return_list.append(res.text)
        else:
            return_list.append(text)

    return return_list

anonymize_dict

anonymize_dict(
    analyzer_results: Iterable[DictRecognizerResult], **kwargs
) -> Dict[str, str]

Anonymize values in a dictionary.

PARAMETER	DESCRIPTION
`analyzer_results`	Iterator of `DictRecognizerResult` containing the output of the AnalyzerEngine.analyze_dict on the input text. TYPE: `Iterable[DictRecognizerResult]`
`kwargs`	Additional kwargs for the `AnonymizerEngine.anonymize` method DEFAULT: `{}`

Source code in presidio_anonymizer/batch_anonymizer_engine.py

def anonymize_dict(
    self, analyzer_results: Iterable[DictRecognizerResult], **kwargs
) -> Dict[str, str]:
    """
    Anonymize values in a dictionary.

    :param analyzer_results: Iterator of `DictRecognizerResult`
    containing the output of the AnalyzerEngine.analyze_dict on the input text.
    :param kwargs: Additional kwargs for the `AnonymizerEngine.anonymize` method
    """

    return_dict = {}
    for result in analyzer_results:
        if isinstance(result.value, dict):
            resp = self.anonymize_dict(
                analyzer_results=result.recognizer_results, **kwargs
            )
            return_dict[result.key] = resp

        elif isinstance(result.value, str):
            resp = self.anonymizer_engine.anonymize(
                text=result.value,
                analyzer_results=result.recognizer_results,
                **kwargs,
            )
            return_dict[result.key] = resp.text

        elif isinstance(result.value, collections.abc.Iterable):
            anonymize_response = self.anonymize_list(
                texts=result.value,
                recognizer_results_list=result.recognizer_results,
                **kwargs,
            )
            return_dict[result.key] = anonymize_response
        else:
            return_dict[result.key] = result.value
    return return_dict

DeanonymizeEngine

Bases: EngineBase

Deanonymize text that was previously anonymized.

METHOD	DESCRIPTION
`deanonymize`	Receive the text, entities and operators to perform deanonymization over.
`get_deanonymizers`	Return a list of supported deanonymizers.
`add_deanonymizer`	Add a new deanonymizer to the engine.
`remove_deanonymizer`	Remove a deanonymizer from the engine.

Source code in presidio_anonymizer/deanonymize_engine.py

class DeanonymizeEngine(EngineBase):
    """Deanonymize text that was previously anonymized."""

    def deanonymize(
        self,
        text: str,
        entities: List[OperatorResult],
        operators: Dict[str, OperatorConfig],
    ) -> EngineResult:
        """
        Receive the text, entities and operators to perform deanonymization over.

        :param operators: the operators to apply on the anonymizer result entities
        :param text: the full text with the encrypted entities
        :param entities: list of encrypted entities
        :return: EngineResult - the new text and data about the deanonymized entities.
        """
        return self._operate(text, entities, operators, OperatorType.Deanonymize)

    def get_deanonymizers(self) -> List[str]:
        """Return a list of supported deanonymizers."""
        names = [p for p in self.operators_factory.get_deanonymizers().keys()]
        return names

    def add_deanonymizer(self, deanonymizer_cls: Type[Operator]) -> None:
        """
        Add a new deanonymizer to the engine.

        anonymizer_cls: The deanonymizer class to add to the engine.
        """
        logger.info(f"Added deanonymizer {deanonymizer_cls.__name__}")
        self.operators_factory.add_deanonymize_operator(deanonymizer_cls)

    def remove_deanonymizer(self, deanonymizer_cls: Type[Operator]) -> None:
        """
        Remove a deanonymizer from the engine.

        deanonymizer_cls: The deanonymizer class to remove from the engine.
        """
        logger.info(f"Removed deanonymizer {deanonymizer_cls.__name__}")
        self.operators_factory.remove_deanonymize_operator(deanonymizer_cls)

deanonymize

deanonymize(
    text: str,
    entities: List[OperatorResult],
    operators: Dict[str, OperatorConfig],
) -> EngineResult

Receive the text, entities and operators to perform deanonymization over.

PARAMETER	DESCRIPTION
`operators`	the operators to apply on the anonymizer result entities TYPE: `Dict[str, OperatorConfig]`
`text`	the full text with the encrypted entities TYPE: `str`
`entities`	list of encrypted entities TYPE: `List[OperatorResult]`

RETURNS	DESCRIPTION
`EngineResult`	EngineResult - the new text and data about the deanonymized entities.

Source code in presidio_anonymizer/deanonymize_engine.py

def deanonymize(
    self,
    text: str,
    entities: List[OperatorResult],
    operators: Dict[str, OperatorConfig],
) -> EngineResult:
    """
    Receive the text, entities and operators to perform deanonymization over.

    :param operators: the operators to apply on the anonymizer result entities
    :param text: the full text with the encrypted entities
    :param entities: list of encrypted entities
    :return: EngineResult - the new text and data about the deanonymized entities.
    """
    return self._operate(text, entities, operators, OperatorType.Deanonymize)

get_deanonymizers

get_deanonymizers() -> List[str]

Return a list of supported deanonymizers.

Source code in presidio_anonymizer/deanonymize_engine.py

def get_deanonymizers(self) -> List[str]:
    """Return a list of supported deanonymizers."""
    names = [p for p in self.operators_factory.get_deanonymizers().keys()]
    return names

add_deanonymizer

add_deanonymizer(deanonymizer_cls: Type[Operator]) -> None

Add a new deanonymizer to the engine.

anonymizer_cls: The deanonymizer class to add to the engine.

Source code in presidio_anonymizer/deanonymize_engine.py

def add_deanonymizer(self, deanonymizer_cls: Type[Operator]) -> None:
    """
    Add a new deanonymizer to the engine.

    anonymizer_cls: The deanonymizer class to add to the engine.
    """
    logger.info(f"Added deanonymizer {deanonymizer_cls.__name__}")
    self.operators_factory.add_deanonymize_operator(deanonymizer_cls)

remove_deanonymizer

remove_deanonymizer(deanonymizer_cls: Type[Operator]) -> None

Remove a deanonymizer from the engine.

deanonymizer_cls: The deanonymizer class to remove from the engine.

Source code in presidio_anonymizer/deanonymize_engine.py

def remove_deanonymizer(self, deanonymizer_cls: Type[Operator]) -> None:
    """
    Remove a deanonymizer from the engine.

    deanonymizer_cls: The deanonymizer class to remove from the engine.
    """
    logger.info(f"Removed deanonymizer {deanonymizer_cls.__name__}")
    self.operators_factory.remove_deanonymize_operator(deanonymizer_cls)

ConflictResolutionStrategy

Bases: Enum

Conflict resolution strategy.

The strategy to use when there is a conflict between two entities.

MERGE_SIMILAR_OR_CONTAINED: This default strategy resolves conflicts between similar or contained entities. REMOVE_INTERSECTIONS: Effectively resolves both intersection conflicts among entities and default strategy conflicts. NONE: No conflict resolution will be performed.

Source code in presidio_anonymizer/entities/conflict_resolution_strategy.py

class ConflictResolutionStrategy(Enum):
    """Conflict resolution strategy.

    The strategy to use when there is a conflict between two entities.

    MERGE_SIMILAR_OR_CONTAINED: This default strategy resolves conflicts
    between similar or contained entities.
    REMOVE_INTERSECTIONS: Effectively resolves both intersection conflicts
    among entities and default strategy conflicts.
    NONE: No conflict resolution will be performed.
    """

    MERGE_SIMILAR_OR_CONTAINED = "merge_similar_or_contained"
    REMOVE_INTERSECTIONS = "remove_intersections"

DictRecognizerResult `dataclass`

Data class for holding the output of the Presidio Analyzer on dictionaries.

PARAMETER	DESCRIPTION
`key`	key in dictionary TYPE: `str`
`value`	value to run analysis on (either string or list of strings) TYPE: `Union[str, List[str], dict]`
`recognizer_results`	Analyzer output for one value. Could be either: - A list of recognizer results if the input is one string - A list of lists of recognizer results, if the input is a list of strings. - An iterator of a DictRecognizerResult, if the input is a dictionary. In this case the recognizer_results would be the iterator of the DictRecognizerResult next level in the dictionary. TYPE: `Union[List[RecognizerResult], List[List[RecognizerResult]], Iterator[DictRecognizerResult]]`

Source code in presidio_anonymizer/entities/engine/dict_recognizer_result.py

@dataclass
class DictRecognizerResult:
    """
    Data class for holding the output of the Presidio Analyzer on dictionaries.

    :param key: key in dictionary
    :param value: value to run analysis on (either string or list of strings)
    :param recognizer_results: Analyzer output for one value.
    Could be either:
     - A list of recognizer results if the input is one string
     - A list of lists of recognizer results, if the input is a list of strings.
     - An iterator of a DictRecognizerResult, if the input is a dictionary.
     In this case the recognizer_results would be the iterator
     of the DictRecognizerResult next level in the dictionary.
    """

    key: str
    value: Union[str, List[str], dict]
    recognizer_results: Union[
        List[RecognizerResult],
        List[List[RecognizerResult]],
        Iterator["DictRecognizerResult"],
    ]

EngineResult

Engine result.

METHOD	DESCRIPTION
`set_text`	Set a text.
`add_item`	Add an item.
`normalize_item_indexes`	Normalize the indexes to be index from start.
`to_json`	Return a json string serializing this instance.

Source code in presidio_anonymizer/entities/engine/result/engine_result.py

class EngineResult:
    """Engine result."""

    def __init__(self, text: str = None, items: List[OperatorResult] = None):
        """Create EngineResult entity.

        :param text: The anonymized text.
        :param items: List of PII entities and the indices
         of their replacements in the anonymized text.
        """
        if items is None:
            items = []
        self.text = text
        self.items = items

    def set_text(self, text: str):
        """Set a text."""
        self.text = text

    def add_item(self, item: OperatorResult):
        """Add an item.

        :param item: an item to add to the list.
        """
        self.items.append(item)

    def normalize_item_indexes(self):
        """Normalize the indexes to be index from start."""
        text_len = len(self.text)
        for result_item in self.items:
            result_item.start = text_len - result_item.end
            result_item.end = result_item.start + len(result_item.text)

    def to_json(self) -> str:
        """Return a json string serializing this instance."""
        return json.dumps(self, default=lambda x: x.__dict__)

    def __repr__(self):
        """Return a string representation of the object."""

        items_repr = (
            ",\n    ".join([str(item) for item in self.items]) if self.items else ""
        )
        return f"text: {self.text}\nitems:\n[\n    {items_repr}\n]\n"

    def __eq__(self, other) -> bool:
        """Verify two instances are equal.

        Returns true if the two instances are equal, false otherwise.
        """
        return self.text == other.text and all(
            map(lambda x, y: x == y, self.items, other.items)
        )

set_text

set_text(text: str)

Set a text.

Source code in presidio_anonymizer/entities/engine/result/engine_result.py

def set_text(self, text: str):
    """Set a text."""
    self.text = text

add_item

add_item(item: OperatorResult)

Add an item.

PARAMETER	DESCRIPTION
`item`	an item to add to the list. TYPE: `OperatorResult`

Source code in presidio_anonymizer/entities/engine/result/engine_result.py

def add_item(self, item: OperatorResult):
    """Add an item.

    :param item: an item to add to the list.
    """
    self.items.append(item)

normalize_item_indexes

normalize_item_indexes()

Normalize the indexes to be index from start.

Source code in presidio_anonymizer/entities/engine/result/engine_result.py

def normalize_item_indexes(self):
    """Normalize the indexes to be index from start."""
    text_len = len(self.text)
    for result_item in self.items:
        result_item.start = text_len - result_item.end
        result_item.end = result_item.start + len(result_item.text)

to_json

to_json() -> str

Return a json string serializing this instance.

Source code in presidio_anonymizer/entities/engine/result/engine_result.py

def to_json(self) -> str:
    """Return a json string serializing this instance."""
    return json.dumps(self, default=lambda x: x.__dict__)

InvalidParamError

Bases: Exception

Throw exception with error when user input is not valid.

param msg: Message to be added to the exception

Source code in presidio_anonymizer/entities/invalid_exception.py

class InvalidParamError(Exception):
    """Throw exception with error when user input is not valid.

    param msg: Message to be added to the exception
    """

    def __init__(self, msg: str):
        self.err_msg = msg
        super().__init__(self.err_msg)

OperatorConfig

Hold the data of the required operator.

METHOD	DESCRIPTION
`from_json`	Create OperatorConfig from json.

Source code in presidio_anonymizer/entities/engine/operator_config.py

class OperatorConfig:
    """Hold the data of the required operator."""

    def __init__(self, operator_name: str, params: Dict = None):
        """
        Create an operator config instance.

        :param operator_name: the name of the operator we want to work with
        :param params: the parameters the operator needs in order to work
        """
        self.operator_name = operator_name
        if not params:
            params = {}
        self.params = params
        self.__validate_fields()

    def __repr__(self):
        """Return a string representation of the object."""
        return f"operator_name: {self.operator_name}, params: {self.params}"

    @classmethod
    def from_json(cls, params: Dict) -> "OperatorConfig":
        """
        Create OperatorConfig from json.

        :param params: json e.g.: {
            "type": "mask",
            "masking_char": "*",
            "chars_to_mask": 4,
            "from_end": true
            }
        :return: OperatorConfig
        """
        operator_name = params.get("type")
        if operator_name:
            params.pop("type")
        return cls(operator_name, params)

    def __eq__(self, other: "OperatorConfig"):
        """Verify two OperatorConfigs are equal."""
        operator_name = self.operator_name == other.operator_name
        return self.params == other.params and operator_name

    def __validate_fields(self):
        validate_parameter_not_empty(
            self.operator_name, "operator config", "operator_name"
        )

from_json `classmethod`

from_json(params: Dict) -> OperatorConfig

Create OperatorConfig from json.

PARAMETER	DESCRIPTION
`params`	json e.g.: { "type": "mask", "masking_char": "", "chars_to_mask": 4, "from_end": true } TYPE:* `Dict`

RETURNS	DESCRIPTION
`OperatorConfig`	OperatorConfig

Source code in presidio_anonymizer/entities/engine/operator_config.py

@classmethod
def from_json(cls, params: Dict) -> "OperatorConfig":
    """
    Create OperatorConfig from json.

    :param params: json e.g.: {
        "type": "mask",
        "masking_char": "*",
        "chars_to_mask": 4,
        "from_end": true
        }
    :return: OperatorConfig
    """
    operator_name = params.get("type")
    if operator_name:
        params.pop("type")
    return cls(operator_name, params)

OperatorResult

Bases: PIIEntity

A class to hold data for engines results either anonymize or deanonymize.

METHOD	DESCRIPTION
`to_dict`	Return object as Dict.
`from_json`	Create OperatorResult from user json.

Source code in presidio_anonymizer/entities/engine/result/operator_result.py

class OperatorResult(PIIEntity):
    """A class to hold data for engines results either anonymize or deanonymize."""

    def __init__(
        self,
        start: int,
        end: int,
        entity_type: str,
        text: str = None,
        operator: str = None,
    ):
        PIIEntity.__init__(self, start, end, entity_type)
        self.text = text
        self.operator = operator

    def __repr__(self):
        """Return a string representation of the object."""
        return str(self.to_dict())

    def to_dict(self) -> Dict:
        """Return object as Dict."""
        return self.__dict__

    def __str__(self):
        """Return a string representation of the object."""
        return str(self.to_dict())

    def __eq__(self, other: "OperatorResult") -> bool:
        """
        Verify two OperatorResults are equal.

        :param other: OperatorResult
        :return: bool
        """
        return (
            self.start == other.start
            and self.end == other.end
            and self.entity_type == other.entity_type
            and self.operator == other.operator
            and self.text == other.text
        )

    @classmethod
    def from_json(cls, json: Dict) -> "OperatorResult":
        """
        Create OperatorResult from user json.

        :param json: json representation for this operator result. For example:
        {
            "start": 0,
            "end": 10,
            "key": "1111111111111111",
            "entity_type":"PERSON",
            "text":"resulted_text",
            "operator":"encrypt",
        }
        """
        start = json.get("start")
        end = json.get("end")
        entity_type = json.get("entity_type")
        text = json.get("text")
        operator = json.get("operator")
        return cls(
            start=start,
            end=end,
            entity_type=entity_type,
            text=text,
            operator=operator,
        )

to_dict

to_dict() -> Dict

Return object as Dict.

Source code in presidio_anonymizer/entities/engine/result/operator_result.py

def to_dict(self) -> Dict:
    """Return object as Dict."""
    return self.__dict__

from_json `classmethod`

from_json(json: Dict) -> OperatorResult

Create OperatorResult from user json.

PARAMETER	DESCRIPTION
`json`	json representation for this operator result. For example: { "start": 0, "end": 10, "key": "1111111111111111", "entity_type":"PERSON", "text":"resulted_text", "operator":"encrypt", } TYPE: `Dict`

Source code in presidio_anonymizer/entities/engine/result/operator_result.py

@classmethod
def from_json(cls, json: Dict) -> "OperatorResult":
    """
    Create OperatorResult from user json.

    :param json: json representation for this operator result. For example:
    {
        "start": 0,
        "end": 10,
        "key": "1111111111111111",
        "entity_type":"PERSON",
        "text":"resulted_text",
        "operator":"encrypt",
    }
    """
    start = json.get("start")
    end = json.get("end")
    entity_type = json.get("entity_type")
    text = json.get("text")
    operator = json.get("operator")
    return cls(
        start=start,
        end=end,
        entity_type=entity_type,
        text=text,
        operator=operator,
    )

PIIEntity

Bases: ABC

Abstract class to hold the text we are going to operate on metadata.

Source code in presidio_anonymizer/entities/engine/pii_entity.py

class PIIEntity(ABC):
    """Abstract class to hold the text we are going to operate on metadata."""

    logger = logging.getLogger("presidio-anonymizer")

    def __init__(self, start: int, end: int, entity_type: str):
        self.start = start
        self.end = end
        self.entity_type = entity_type
        self.__validate_fields()

    def __repr__(self):
        """Return a string representation of the object."""
        return (
            f"start: {self.start}"
            f"end: {self.end},"
            f"entity_type: {self.entity_type}"
        )

    def __gt__(self, other):
        """Check one entity is greater then other by the text end index."""
        return self.start > other.start

    def __eq__(self, other):
        """Check two text metadata entities are equal."""
        return (
            self.start == other.start
            and self.end == other.end
            and self.entity_type == other.entity_type
        )

    def __validate_fields(self):
        validate_parameter_exists(self.start, "result", "start")
        validate_type(self.start, "start", int)
        validate_parameter_exists(self.end, "result", "end")
        validate_type(self.end, "end", int)
        validate_parameter_not_empty(self.entity_type, "result", "entity_type")
        if self.start < 0 or self.end < 0:
            raise InvalidParamError(
                "Invalid input, result start and end must be positive"
            )
        if self.start > self.end:
            raise InvalidParamError(
                f"Invalid input, start index '{self.start}' "
                f"must be smaller than end index '{self.end}'"
            )

RecognizerResult

Bases: PIIEntity

Recognizer Result represents the findings of the detected entity.

Result of a recognizer analyzing the text.

PARAMETER	DESCRIPTION
`entity_type`	the type of the entity TYPE: `str`
`start`	the start location of the detected entity TYPE: `int`
`end`	the end location of the detected entity TYPE: `int`
`score`	the score of the detection TYPE: `float`

METHOD	DESCRIPTION
`from_json`	Create RecognizerResult from json.
`has_conflict`	Check if two recognizer results are conflicted or not.
`contains`	Check if one result is contained or equal to another result.
`equal_indices`	Check if the indices are equal between two results.
`intersects`	Check if self intersects with a different RecognizerResult.

Source code in presidio_anonymizer/entities/engine/recognizer_result.py

class RecognizerResult(PIIEntity):
    """
    Recognizer Result represents the findings of the detected entity.

    Result of a recognizer analyzing the text.

    :param entity_type: the type of the entity
    :param start: the start location of the detected entity
    :param end: the end location of the detected entity
    :param score: the score of the detection
    """

    logger = logging.getLogger("presidio-anonymizer")

    def __init__(self, entity_type: str, start: int, end: int, score: float):
        PIIEntity.__init__(self, start, end, entity_type)
        self.score = score
        validate_parameter_exists(score, "analyzer result", "score")

    @classmethod
    def from_json(cls, data: Dict):
        """
        Create RecognizerResult from json.

        :param data: e.g. {
            "start": 24,
            "end": 32,
            "score": 0.8,
            "entity_type": "NAME"
        }
        :return: RecognizerResult
        """
        score = data.get("score")
        entity_type = data.get("entity_type")
        start = data.get("start")
        end = data.get("end")
        return cls(entity_type, start, end, score)

    def __gt__(self, other):
        """
        Check if one result is greater by using the results indices in the text.

        :param other: another RecognizerResult
        :return: bool
        """
        if self.start == other.start:
            return self.end > other.end
        return self.start > other.start

    def __eq__(self, other):
        """
        Check two results are equal by using all class fields.

        :param other: another RecognizerResult
        :return: bool
        """
        equal_type = self.entity_type == other.entity_type
        equal_score = self.score == other.score
        return self.equal_indices(other) and equal_type and equal_score

    def __hash__(self):
        """
        Hash the result data by using all class fields.

        :return: int
        """
        return hash(
            f"{str(self.start)} {str(self.end)} {str(self.score)} {self.entity_type}"
        )

    def __str__(self) -> str:
        """Return a string representation of the instance."""
        return (
            f"type: {self.entity_type}, "
            f"start: {self.start}, "
            f"end: {self.end}, "
            f"score: {self.score}"
        )

    def has_conflict(self, other):
        """
        Check if two recognizer results are conflicted or not.

        I have a conflict if:
        1. My indices are the same as the other and my score is lower.
        2. If my indices are contained in another.

        :param other: RecognizerResult
        :return:
        """
        if self.equal_indices(other):
            return self.score <= other.score
        return other.contains(self)

    def contains(self, other):
        """
        Check if one result is contained or equal to another result.

        :param other: another RecognizerResult
        :return: bool
        """
        return self.start <= other.start and self.end >= other.end

    def equal_indices(self, other):
        """
        Check if the indices are equal between two results.

        :param other: another RecognizerResult
        :return:
        """
        return self.start == other.start and self.end == other.end

    def intersects(self, other) -> int:
        """
        Check if self intersects with a different RecognizerResult.

        :return: If intersecting, returns the number of
        intersecting characters.
        If not, returns 0
        """
        # if they do not overlap the intersection is 0
        if self.end < other.start or other.end < self.start:
            return 0

        # otherwise the intersection is min(end) - max(start)
        return min(self.end, other.end) - max(self.start, other.start)

from_json `classmethod`

from_json(data: Dict)

Create RecognizerResult from json.

PARAMETER	DESCRIPTION
`data`	e.g. { "start": 24, "end": 32, "score": 0.8, "entity_type": "NAME" } TYPE: `Dict`

RETURNS	DESCRIPTION
	RecognizerResult

Source code in presidio_anonymizer/entities/engine/recognizer_result.py

@classmethod
def from_json(cls, data: Dict):
    """
    Create RecognizerResult from json.

    :param data: e.g. {
        "start": 24,
        "end": 32,
        "score": 0.8,
        "entity_type": "NAME"
    }
    :return: RecognizerResult
    """
    score = data.get("score")
    entity_type = data.get("entity_type")
    start = data.get("start")
    end = data.get("end")
    return cls(entity_type, start, end, score)

has_conflict

has_conflict(other)

Check if two recognizer results are conflicted or not.

I have a conflict if: 1. My indices are the same as the other and my score is lower. 2. If my indices are contained in another.

PARAMETER	DESCRIPTION
`other`	RecognizerResult

RETURNS	DESCRIPTION

Source code in presidio_anonymizer/entities/engine/recognizer_result.py

def has_conflict(self, other):
    """
    Check if two recognizer results are conflicted or not.

    I have a conflict if:
    1. My indices are the same as the other and my score is lower.
    2. If my indices are contained in another.

    :param other: RecognizerResult
    :return:
    """
    if self.equal_indices(other):
        return self.score <= other.score
    return other.contains(self)

contains

contains(other)

Check if one result is contained or equal to another result.

PARAMETER	DESCRIPTION
`other`	another RecognizerResult

RETURNS	DESCRIPTION
	bool

Source code in presidio_anonymizer/entities/engine/recognizer_result.py

def contains(self, other):
    """
    Check if one result is contained or equal to another result.

    :param other: another RecognizerResult
    :return: bool
    """
    return self.start <= other.start and self.end >= other.end

equal_indices

equal_indices(other)

Check if the indices are equal between two results.

PARAMETER	DESCRIPTION
`other`	another RecognizerResult

RETURNS	DESCRIPTION

Source code in presidio_anonymizer/entities/engine/recognizer_result.py

def equal_indices(self, other):
    """
    Check if the indices are equal between two results.

    :param other: another RecognizerResult
    :return:
    """
    return self.start == other.start and self.end == other.end

intersects

intersects(other) -> int

Check if self intersects with a different RecognizerResult.

RETURNS	DESCRIPTION
`int`	If intersecting, returns the number of intersecting characters. If not, returns 0

Source code in presidio_anonymizer/entities/engine/recognizer_result.py

def intersects(self, other) -> int:
    """
    Check if self intersects with a different RecognizerResult.

    :return: If intersecting, returns the number of
    intersecting characters.
    If not, returns 0
    """
    # if they do not overlap the intersection is 0
    if self.end < other.start or other.end < self.start:
        return 0

    # otherwise the intersection is min(end) - max(start)
    return min(self.end, other.end) - max(self.start, other.start)

Presidio Anonymizer API Reference