Skip to content

Presidio Anonymizer API Reference

Anonymizer root module.

AnonymizerEngine

Bases: EngineBase

AnonymizerEngine class.

Handles the entire logic of the Presidio-anonymizer. Gets the original text and replaces the PII entities with the desired anonymizers.

Source code in /opt/hostedtoolcache/Python/3.10.11/x64/lib/python3.10/site-packages/presidio_anonymizer/anonymizer_engine.py
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
class AnonymizerEngine(EngineBase):
    """
    AnonymizerEngine class.

    Handles the entire logic of the Presidio-anonymizer. Gets the original text
    and replaces the PII entities with the desired anonymizers.
    """

    logger = logging.getLogger("presidio-anonymizer")

    def __init__(self):
        EngineBase.__init__(self)

    def anonymize(
            self,
            text: str,
            analyzer_results: List[RecognizerResult],
            operators: Optional[Dict[str, OperatorConfig]] = None,
    ) -> EngineResult:
        """Anonymize method to anonymize the given text.

        :param text: the text we are anonymizing
        :param analyzer_results: A list of RecognizerResult class -> The results we
        received from the analyzer
        :param operators: The configuration of the anonymizers we would like
        to use for each entity e.g.: {"PHONE_NUMBER":OperatorConfig("redact", {})}
        received from the analyzer
        :return: the anonymized text and a list of information about the
        anonymized entities.

        :example:

        >>> from presidio_anonymizer import AnonymizerEngine
        >>> from presidio_anonymizer.entities import RecognizerResult, OperatorConfig

        >>> # Initialize the engine with logger.
        >>> engine = AnonymizerEngine()

        >>> # Invoke the anonymize function with the text, analyzer results and
        >>> # Operators to define the anonymization type.
        >>> result = engine.anonymize(
        >>>     text="My name is Bond, James Bond",
        >>>     analyzer_results=[RecognizerResult(entity_type="PERSON",
        >>>                                        start=11,
        >>>                                        end=15,
        >>>                                        score=0.8),
        >>>                       RecognizerResult(entity_type="PERSON",
        >>>                                        start=17,
        >>>                                        end=27,
        >>>                                        score=0.8)],
        >>>     operators={"PERSON": OperatorConfig("replace", {"new_value": "BIP"})}
        >>> )

        >>> print(result)
        text: My name is BIP, BIP.
        items:
        [
            {'start': 16, 'end': 19, 'entity_type': 'PERSON',
             'text': 'BIP', 'operator': 'replace'},
            {'start': 11, 'end': 14, 'entity_type': 'PERSON',
             'text': 'BIP', 'operator': 'replace'}
        ]


        """
        analyzer_results = self._remove_conflicts_and_get_text_manipulation_data(
            analyzer_results
        )

        operators = self.__check_or_add_default_operator(operators)

        return self._operate(text, analyzer_results, operators, OperatorType.Anonymize)

    def _remove_conflicts_and_get_text_manipulation_data(
            self, analyzer_results: List[RecognizerResult]
    ) -> List[RecognizerResult]:
        """
        Iterate the list and create a sorted unique results list from it.

        Only insert results which are:
        1. Indices are not contained in other result.
        2. Have the same indices as other results but with larger score.
        :return: List
        """
        tmp_analyzer_results = []
        # This list contains all elements which we need to check a single result
        # against. If a result is dropped, it can also be dropped from this list
        # since it is intersecting with another result and we selected the other one.
        other_elements = analyzer_results.copy()
        for result in analyzer_results:
            other_elements.remove(result)

            is_merge_same_entity_type = False
            for other_element in other_elements:
                if other_element.entity_type != result.entity_type:
                    continue
                if result.intersects(other_element) == 0:
                    continue

                other_element.start = min(result.start, other_element.start)
                other_element.end = max(result.end, other_element.end)
                other_element.score = max(result.score, other_element.score)
                is_merge_same_entity_type = True
                break
            if not is_merge_same_entity_type:
                other_elements.append(result)
                tmp_analyzer_results.append(result)
            else:
                self.logger.debug(f"removing element {result} from "
                                  f"results list due to merge")

        unique_text_metadata_elements = []
        # This list contains all elements which we need to check a single result
        # against. If a result is dropped, it can also be dropped from this list
        # since it is intersecting with another result and we selected the other one.
        other_elements = tmp_analyzer_results.copy()
        for result in tmp_analyzer_results:
            other_elements.remove(result)
            result_conflicted = self.__is_result_conflicted_with_other_elements(
                other_elements, result
            )
            if not result_conflicted:
                other_elements.append(result)
                unique_text_metadata_elements.append(result)
            else:
                self.logger.debug(
                    f"removing element {result} from results list due to conflict"
                )
        return unique_text_metadata_elements

    def get_anonymizers(self) -> List[str]:
        """Return a list of supported anonymizers."""
        names = [p for p in self.operators_factory.get_anonymizers().keys()]
        return names

    @staticmethod
    def __is_result_conflicted_with_other_elements(other_elements, result):
        return any(
            [result.has_conflict(other_element) for other_element in other_elements]
        )

    @staticmethod
    def __check_or_add_default_operator(
            operators: Dict[str, OperatorConfig]
    ) -> Dict[str, OperatorConfig]:
        default_operator = OperatorConfig(DEFAULT)
        if not operators:
            return {"DEFAULT": default_operator}
        if not operators.get("DEFAULT"):
            operators["DEFAULT"] = default_operator
        return operators

anonymize(text, analyzer_results, operators=None)

Anonymize method to anonymize the given text.

:example:

from presidio_anonymizer import AnonymizerEngine from presidio_anonymizer.entities import RecognizerResult, OperatorConfig

Initialize the engine with logger.

engine = AnonymizerEngine()

Invoke the anonymize function with the text, analyzer results and

Operators to define the anonymization type.

result = engine.anonymize( text="My name is Bond, James Bond", analyzer_results=[RecognizerResult(entity_type="PERSON", start=11, end=15, score=0.8), RecognizerResult(entity_type="PERSON", start=17, end=27, score=0.8)], operators={"PERSON": OperatorConfig("replace", {"new_value": "BIP"})} )

print(result) text: My name is BIP, BIP. items: [ {'start': 16, 'end': 19, 'entity_type': 'PERSON', 'text': 'BIP', 'operator': 'replace'}, {'start': 11, 'end': 14, 'entity_type': 'PERSON', 'text': 'BIP', 'operator': 'replace'} ]

Parameters:

Name Type Description Default
text str

the text we are anonymizing

required
analyzer_results List[RecognizerResult]

A list of RecognizerResult class -> The results we received from the analyzer

required
operators Optional[Dict[str, OperatorConfig]]

The configuration of the anonymizers we would like to use for each entity e.g.: {"PHONE_NUMBER":OperatorConfig("redact", {})} received from the analyzer

None

Returns:

Type Description
EngineResult

the anonymized text and a list of information about the anonymized entities.

Source code in /opt/hostedtoolcache/Python/3.10.11/x64/lib/python3.10/site-packages/presidio_anonymizer/anonymizer_engine.py
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
def anonymize(
        self,
        text: str,
        analyzer_results: List[RecognizerResult],
        operators: Optional[Dict[str, OperatorConfig]] = None,
) -> EngineResult:
    """Anonymize method to anonymize the given text.

    :param text: the text we are anonymizing
    :param analyzer_results: A list of RecognizerResult class -> The results we
    received from the analyzer
    :param operators: The configuration of the anonymizers we would like
    to use for each entity e.g.: {"PHONE_NUMBER":OperatorConfig("redact", {})}
    received from the analyzer
    :return: the anonymized text and a list of information about the
    anonymized entities.

    :example:

    >>> from presidio_anonymizer import AnonymizerEngine
    >>> from presidio_anonymizer.entities import RecognizerResult, OperatorConfig

    >>> # Initialize the engine with logger.
    >>> engine = AnonymizerEngine()

    >>> # Invoke the anonymize function with the text, analyzer results and
    >>> # Operators to define the anonymization type.
    >>> result = engine.anonymize(
    >>>     text="My name is Bond, James Bond",
    >>>     analyzer_results=[RecognizerResult(entity_type="PERSON",
    >>>                                        start=11,
    >>>                                        end=15,
    >>>                                        score=0.8),
    >>>                       RecognizerResult(entity_type="PERSON",
    >>>                                        start=17,
    >>>                                        end=27,
    >>>                                        score=0.8)],
    >>>     operators={"PERSON": OperatorConfig("replace", {"new_value": "BIP"})}
    >>> )

    >>> print(result)
    text: My name is BIP, BIP.
    items:
    [
        {'start': 16, 'end': 19, 'entity_type': 'PERSON',
         'text': 'BIP', 'operator': 'replace'},
        {'start': 11, 'end': 14, 'entity_type': 'PERSON',
         'text': 'BIP', 'operator': 'replace'}
    ]


    """
    analyzer_results = self._remove_conflicts_and_get_text_manipulation_data(
        analyzer_results
    )

    operators = self.__check_or_add_default_operator(operators)

    return self._operate(text, analyzer_results, operators, OperatorType.Anonymize)

get_anonymizers()

Return a list of supported anonymizers.

Source code in /opt/hostedtoolcache/Python/3.10.11/x64/lib/python3.10/site-packages/presidio_anonymizer/anonymizer_engine.py
142
143
144
145
def get_anonymizers(self) -> List[str]:
    """Return a list of supported anonymizers."""
    names = [p for p in self.operators_factory.get_anonymizers().keys()]
    return names

BatchAnonymizerEngine

BatchAnonymizerEngine class.

A class that provides functionality to anonymize in batches.

Parameters:

Name Type Description Default
anonymizer_engine Optional[AnonymizerEngine]

An instance of the AnonymizerEngine class.

None
Source code in /opt/hostedtoolcache/Python/3.10.11/x64/lib/python3.10/site-packages/presidio_anonymizer/batch_anonymizer_engine.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
class BatchAnonymizerEngine:
    """
    BatchAnonymizerEngine class.

    A class that provides functionality to anonymize in batches.
    :param anonymizer_engine: An instance of the AnonymizerEngine class.
    """

    def __init__(self, anonymizer_engine: Optional[AnonymizerEngine] = None):
        self.anonymizer_engine = anonymizer_engine or AnonymizerEngine()

    def anonymize_list(
        self,
        texts: List[Union[str, bool, int, float]],
        recognizer_results_list: List[List[RecognizerResult]],
        **kwargs
    ) -> List[EngineResult]:
        """
        Anonymize a list of strings.

        :param texts: List containing the texts to be anonymized (original texts)
        :param recognizer_results_list: A list of lists of RecognizerResult,
        the output of the AnalyzerEngine on each text in the list.
        :param kwargs: Additional kwargs for the `AnonymizerEngine.anonymize` method
        """
        return_list = []
        if not recognizer_results_list:
            recognizer_results_list = [[] for _ in range(len(texts))]
        for text, recognizer_results in zip(texts, recognizer_results_list):
            if type(text) in (str, bool, int, float):
                res = self.anonymizer_engine.anonymize(
                    text=str(text), analyzer_results=recognizer_results, **kwargs
                )
                return_list.append(res.text)
            else:
                return_list.append(text)

        return return_list

    def anonymize_dict(
        self, analyzer_results: Iterable[DictRecognizerResult], **kwargs
    ) -> Dict[str, str]:
        """
        Anonymize values in a dictionary.

        :param analyzer_results: Iterator of `DictRecognizerResult`
        containing the output of the AnalyzerEngine.analyze_dict on the input text.
        :param kwargs: Additional kwargs for the `AnonymizerEngine.anonymize` method
        """

        return_dict = {}
        for result in analyzer_results:
            if isinstance(result.value, dict):
                resp = self.anonymize_dict(
                    analyzer_results=result.recognizer_results, **kwargs
                )
                return_dict[result.key] = resp

            elif isinstance(result.value, str):
                resp = self.anonymizer_engine.anonymize(
                    text=result.value,
                    analyzer_results=result.recognizer_results,
                    **kwargs
                )
                return_dict[result.key] = resp.text

            elif isinstance(result.value, collections.abc.Iterable):
                anonymize_response = self.anonymize_list(
                    texts=result.value,
                    recognizer_results_list=result.recognizer_results,
                    **kwargs
                )
                return_dict[result.key] = anonymize_response
            else:
                return_dict[result.key] = result.value
        return return_dict

anonymize_dict(analyzer_results, **kwargs)

Anonymize values in a dictionary.

Parameters:

Name Type Description Default
analyzer_results Iterable[DictRecognizerResult]

Iterator of DictRecognizerResult containing the output of the AnalyzerEngine.analyze_dict on the input text.

required
kwargs

Additional kwargs for the AnonymizerEngine.anonymize method

{}
Source code in /opt/hostedtoolcache/Python/3.10.11/x64/lib/python3.10/site-packages/presidio_anonymizer/batch_anonymizer_engine.py
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
def anonymize_dict(
    self, analyzer_results: Iterable[DictRecognizerResult], **kwargs
) -> Dict[str, str]:
    """
    Anonymize values in a dictionary.

    :param analyzer_results: Iterator of `DictRecognizerResult`
    containing the output of the AnalyzerEngine.analyze_dict on the input text.
    :param kwargs: Additional kwargs for the `AnonymizerEngine.anonymize` method
    """

    return_dict = {}
    for result in analyzer_results:
        if isinstance(result.value, dict):
            resp = self.anonymize_dict(
                analyzer_results=result.recognizer_results, **kwargs
            )
            return_dict[result.key] = resp

        elif isinstance(result.value, str):
            resp = self.anonymizer_engine.anonymize(
                text=result.value,
                analyzer_results=result.recognizer_results,
                **kwargs
            )
            return_dict[result.key] = resp.text

        elif isinstance(result.value, collections.abc.Iterable):
            anonymize_response = self.anonymize_list(
                texts=result.value,
                recognizer_results_list=result.recognizer_results,
                **kwargs
            )
            return_dict[result.key] = anonymize_response
        else:
            return_dict[result.key] = result.value
    return return_dict

anonymize_list(texts, recognizer_results_list, **kwargs)

Anonymize a list of strings.

Parameters:

Name Type Description Default
texts List[Union[str, bool, int, float]]

List containing the texts to be anonymized (original texts)

required
recognizer_results_list List[List[RecognizerResult]]

A list of lists of RecognizerResult, the output of the AnalyzerEngine on each text in the list.

required
kwargs

Additional kwargs for the AnonymizerEngine.anonymize method

{}
Source code in /opt/hostedtoolcache/Python/3.10.11/x64/lib/python3.10/site-packages/presidio_anonymizer/batch_anonymizer_engine.py
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
def anonymize_list(
    self,
    texts: List[Union[str, bool, int, float]],
    recognizer_results_list: List[List[RecognizerResult]],
    **kwargs
) -> List[EngineResult]:
    """
    Anonymize a list of strings.

    :param texts: List containing the texts to be anonymized (original texts)
    :param recognizer_results_list: A list of lists of RecognizerResult,
    the output of the AnalyzerEngine on each text in the list.
    :param kwargs: Additional kwargs for the `AnonymizerEngine.anonymize` method
    """
    return_list = []
    if not recognizer_results_list:
        recognizer_results_list = [[] for _ in range(len(texts))]
    for text, recognizer_results in zip(texts, recognizer_results_list):
        if type(text) in (str, bool, int, float):
            res = self.anonymizer_engine.anonymize(
                text=str(text), analyzer_results=recognizer_results, **kwargs
            )
            return_list.append(res.text)
        else:
            return_list.append(text)

    return return_list

DeanonymizeEngine

Bases: EngineBase

Deanonymize text that was previously anonymized.

Source code in /opt/hostedtoolcache/Python/3.10.11/x64/lib/python3.10/site-packages/presidio_anonymizer/deanonymize_engine.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
class DeanonymizeEngine(EngineBase):
    """Deanonymize text that was previously anonymized."""

    def __init__(self):
        self.logger = logging.getLogger("presidio-anonymizer")
        EngineBase.__init__(self)

    def deanonymize(
        self,
        text: str,
        entities: List[OperatorResult],
        operators: Dict[str, OperatorConfig],
    ) -> EngineResult:
        """
        Receive the text, entities and operators to perform deanonymization over.

        :param operators: the operators to apply on the anonymizer result entities
        :param text: the full text with the encrypted entities
        :param entities: list of encrypted entities
        :return: EngineResult - the new text and data about the deanonymized entities.
        """
        return self._operate(text, entities, operators, OperatorType.Deanonymize)

    def get_deanonymizers(self) -> List[str]:
        """Return a list of supported deanonymizers."""
        names = [p for p in self.operators_factory.get_deanonymizers().keys()]
        return names

deanonymize(text, entities, operators)

Receive the text, entities and operators to perform deanonymization over.

Parameters:

Name Type Description Default
operators Dict[str, OperatorConfig]

the operators to apply on the anonymizer result entities

required
text str

the full text with the encrypted entities

required
entities List[OperatorResult]

list of encrypted entities

required

Returns:

Type Description
EngineResult

EngineResult - the new text and data about the deanonymized entities.

Source code in /opt/hostedtoolcache/Python/3.10.11/x64/lib/python3.10/site-packages/presidio_anonymizer/deanonymize_engine.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
def deanonymize(
    self,
    text: str,
    entities: List[OperatorResult],
    operators: Dict[str, OperatorConfig],
) -> EngineResult:
    """
    Receive the text, entities and operators to perform deanonymization over.

    :param operators: the operators to apply on the anonymizer result entities
    :param text: the full text with the encrypted entities
    :param entities: list of encrypted entities
    :return: EngineResult - the new text and data about the deanonymized entities.
    """
    return self._operate(text, entities, operators, OperatorType.Deanonymize)

get_deanonymizers()

Return a list of supported deanonymizers.

Source code in /opt/hostedtoolcache/Python/3.10.11/x64/lib/python3.10/site-packages/presidio_anonymizer/deanonymize_engine.py
33
34
35
36
def get_deanonymizers(self) -> List[str]:
    """Return a list of supported deanonymizers."""
    names = [p for p in self.operators_factory.get_deanonymizers().keys()]
    return names