Skip to content

Presidio Anonymizer API Reference

Anonymizer root module.

AnonymizerEngine

Bases: EngineBase

AnonymizerEngine class.

Handles the entire logic of the Presidio-anonymizer. Gets the original text and replaces the PII entities with the desired anonymizers.

Source code in presidio_anonymizer/anonymizer_engine.py
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
class AnonymizerEngine(EngineBase):
    """
    AnonymizerEngine class.

    Handles the entire logic of the Presidio-anonymizer. Gets the original text
    and replaces the PII entities with the desired anonymizers.
    """

    def anonymize(
        self,
        text: str,
        analyzer_results: List[RecognizerResult],
        operators: Optional[Dict[str, OperatorConfig]] = None,
        conflict_resolution: ConflictResolutionStrategy = (
            ConflictResolutionStrategy.MERGE_SIMILAR_OR_CONTAINED
        ),
    ) -> EngineResult:
        """Anonymize method to anonymize the given text.

        :param text: the text we are anonymizing
        :param analyzer_results: A list of RecognizerResult class -> The results we
        received from the analyzer
        :param operators: The configuration of the anonymizers we would like
        to use for each entity e.g.: {"PHONE_NUMBER":OperatorConfig("redact", {})}
        received from the analyzer
        :param conflict_resolution: The configuration designed to handle conflicts
        among entities
        :return: the anonymized text and a list of information about the
        anonymized entities.

        :example:

        >>> from presidio_anonymizer import AnonymizerEngine
        >>> from presidio_anonymizer.entities import RecognizerResult, OperatorConfig

        >>> # Initialize the engine with logger.
        >>> engine = AnonymizerEngine()

        >>> # Invoke the anonymize function with the text, analyzer results and
        >>> # Operators to define the anonymization type.
        >>> result = engine.anonymize(
        >>>     text="My name is Bond, James Bond",
        >>>     analyzer_results=[RecognizerResult(entity_type="PERSON",
        >>>                                        start=11,
        >>>                                        end=15,
        >>>                                        score=0.8),
        >>>                       RecognizerResult(entity_type="PERSON",
        >>>                                        start=17,
        >>>                                        end=27,
        >>>                                        score=0.8)],
        >>>     operators={"PERSON": OperatorConfig("replace", {"new_value": "BIP"})}
        >>> )

        >>> print(result)
        text: My name is BIP, BIP.
        items:
        [
            {'start': 16, 'end': 19, 'entity_type': 'PERSON',
             'text': 'BIP', 'operator': 'replace'},
            {'start': 11, 'end': 14, 'entity_type': 'PERSON',
             'text': 'BIP', 'operator': 'replace'}
        ]


        """
        analyzer_results = self._remove_conflicts_and_get_text_manipulation_data(
            analyzer_results, conflict_resolution
        )

        merged_results = self._merge_entities_with_whitespace_between(
            text, analyzer_results
        )

        operators = self.__check_or_add_default_operator(operators)

        return self._operate(
            text=text,
            pii_entities=merged_results,
            operators_metadata=operators,
            operator_type=OperatorType.Anonymize,
        )

    def add_anonymizer(self, anonymizer_cls: Type[Operator]) -> None:
        """
        Add a new anonymizer to the engine.

        anonymizer_cls: The anonymizer class to add to the engine.
        """
        logger.info(f"Added anonymizer {anonymizer_cls.__name__}")
        self.operators_factory.add_anonymize_operator(anonymizer_cls)

    def remove_anonymizer(self, anonymizer_cls: Type[Operator]) -> None:
        """
        Remove an anonymizer from the engine.

        anonymizer_cls: The anonymizer class to remove from the engine.
        """
        logger.info(f"Removed anonymizer {anonymizer_cls.__name__}")
        self.operators_factory.remove_anonymize_operator(anonymizer_cls)

    def _remove_conflicts_and_get_text_manipulation_data(
        self,
        analyzer_results: List[RecognizerResult],
        conflict_resolution: ConflictResolutionStrategy,
    ) -> List[RecognizerResult]:
        """
        Iterate the list and create a sorted unique results list from it.

        Only insert results which are:
        1. Indices are not contained in other result.
        2. Have the same indices as other results but with larger score.
        :return: List
        """
        tmp_analyzer_results = []
        # This list contains all elements which we need to check a single result
        # against. If a result is dropped, it can also be dropped from this list
        # since it is intersecting with another result and we selected the other one.
        other_elements = analyzer_results.copy()
        for result in analyzer_results:
            other_elements.remove(result)

            is_merge_same_entity_type = False
            for other_element in other_elements:
                if other_element.entity_type != result.entity_type:
                    continue
                if result.intersects(other_element) == 0:
                    continue

                other_element.start = min(result.start, other_element.start)
                other_element.end = max(result.end, other_element.end)
                other_element.score = max(result.score, other_element.score)
                is_merge_same_entity_type = True
                break
            if not is_merge_same_entity_type:
                other_elements.append(result)
                tmp_analyzer_results.append(result)
            else:
                self.logger.debug(
                    f"removing element {result} from " f"results list due to merge"
                )

        unique_text_metadata_elements = []
        # This list contains all elements which we need to check a single result
        # against. If a result is dropped, it can also be dropped from this list
        # since it is intersecting with another result and we selected the other one.
        other_elements = tmp_analyzer_results.copy()
        for result in tmp_analyzer_results:
            other_elements.remove(result)
            result_conflicted = self.__is_result_conflicted_with_other_elements(
                other_elements, result
            )
            if not result_conflicted:
                other_elements.append(result)
                unique_text_metadata_elements.append(result)
            else:
                self.logger.debug(
                    f"removing element {result} from results list due to conflict"
                )

        # This further improves the quality of handling the conflict between the
        # various entities overlapping. This will not drop the results insted
        # it adjust the start and end positions of overlapping results and removes
        # All types of conflicts among entities as well as text.
        if conflict_resolution == ConflictResolutionStrategy.REMOVE_INTERSECTIONS:
            unique_text_metadata_elements.sort(key=lambda element: element.start)
            elements_length = len(unique_text_metadata_elements)
            index = 0
            while index < elements_length - 1:
                current_entity = unique_text_metadata_elements[index]
                next_entity = unique_text_metadata_elements[index + 1]
                if current_entity.end <= next_entity.start:
                    index += 1
                else:
                    if current_entity.score >= next_entity.score:
                        next_entity.start = current_entity.end
                    else:
                        current_entity.end = next_entity.start
                    unique_text_metadata_elements.sort(
                        key=lambda element: element.start
                    )
            unique_text_metadata_elements = [
                element
                for element in unique_text_metadata_elements
                if element.start <= element.end
            ]
        return unique_text_metadata_elements

    def _merge_entities_with_whitespace_between(
        self, text: str, analyzer_results: List[RecognizerResult]
    ) -> List[RecognizerResult]:
        """Merge adjacent entities of the same type separated by whitespace."""
        merged_results = []
        prev_result = None
        for result in analyzer_results:
            if prev_result is not None:
                if prev_result.entity_type == result.entity_type:
                    if re.search(r"^( )+$", text[prev_result.end : result.start]):
                        merged_results.remove(prev_result)
                        result.start = prev_result.start
            merged_results.append(result)
            prev_result = result
        return merged_results

    def get_anonymizers(self) -> List[str]:
        """Return a list of supported anonymizers."""
        names = [p for p in self.operators_factory.get_anonymizers().keys()]
        return names

    @staticmethod
    def __is_result_conflicted_with_other_elements(other_elements, result):
        return any(
            [result.has_conflict(other_element) for other_element in other_elements]
        )

    @staticmethod
    def __check_or_add_default_operator(
        operators: Dict[str, OperatorConfig],
    ) -> Dict[str, OperatorConfig]:
        default_operator = OperatorConfig(DEFAULT)
        if not operators:
            return {"DEFAULT": default_operator}
        if not operators.get("DEFAULT"):
            operators["DEFAULT"] = default_operator
        return operators

add_anonymizer(anonymizer_cls)

Add a new anonymizer to the engine.

anonymizer_cls: The anonymizer class to add to the engine.

Source code in presidio_anonymizer/anonymizer_engine.py
103
104
105
106
107
108
109
110
def add_anonymizer(self, anonymizer_cls: Type[Operator]) -> None:
    """
    Add a new anonymizer to the engine.

    anonymizer_cls: The anonymizer class to add to the engine.
    """
    logger.info(f"Added anonymizer {anonymizer_cls.__name__}")
    self.operators_factory.add_anonymize_operator(anonymizer_cls)

anonymize(text, analyzer_results, operators=None, conflict_resolution=ConflictResolutionStrategy.MERGE_SIMILAR_OR_CONTAINED)

Anonymize method to anonymize the given text.

:example:

from presidio_anonymizer import AnonymizerEngine from presidio_anonymizer.entities import RecognizerResult, OperatorConfig

Initialize the engine with logger.

engine = AnonymizerEngine()

Invoke the anonymize function with the text, analyzer results and

Operators to define the anonymization type.

result = engine.anonymize( text="My name is Bond, James Bond", analyzer_results=[RecognizerResult(entity_type="PERSON", start=11, end=15, score=0.8), RecognizerResult(entity_type="PERSON", start=17, end=27, score=0.8)], operators={"PERSON": OperatorConfig("replace", {"new_value": "BIP"})} )

print(result) text: My name is BIP, BIP. items: [ {'start': 16, 'end': 19, 'entity_type': 'PERSON', 'text': 'BIP', 'operator': 'replace'}, {'start': 11, 'end': 14, 'entity_type': 'PERSON', 'text': 'BIP', 'operator': 'replace'} ]

Parameters:

Name Type Description Default
text str

the text we are anonymizing

required
analyzer_results List[RecognizerResult]

A list of RecognizerResult class -> The results we received from the analyzer

required
operators Optional[Dict[str, OperatorConfig]]

The configuration of the anonymizers we would like to use for each entity e.g.: {"PHONE_NUMBER":OperatorConfig("redact", {})} received from the analyzer

None
conflict_resolution ConflictResolutionStrategy

The configuration designed to handle conflicts among entities

MERGE_SIMILAR_OR_CONTAINED

Returns:

Type Description
EngineResult

the anonymized text and a list of information about the anonymized entities.

Source code in presidio_anonymizer/anonymizer_engine.py
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
def anonymize(
    self,
    text: str,
    analyzer_results: List[RecognizerResult],
    operators: Optional[Dict[str, OperatorConfig]] = None,
    conflict_resolution: ConflictResolutionStrategy = (
        ConflictResolutionStrategy.MERGE_SIMILAR_OR_CONTAINED
    ),
) -> EngineResult:
    """Anonymize method to anonymize the given text.

    :param text: the text we are anonymizing
    :param analyzer_results: A list of RecognizerResult class -> The results we
    received from the analyzer
    :param operators: The configuration of the anonymizers we would like
    to use for each entity e.g.: {"PHONE_NUMBER":OperatorConfig("redact", {})}
    received from the analyzer
    :param conflict_resolution: The configuration designed to handle conflicts
    among entities
    :return: the anonymized text and a list of information about the
    anonymized entities.

    :example:

    >>> from presidio_anonymizer import AnonymizerEngine
    >>> from presidio_anonymizer.entities import RecognizerResult, OperatorConfig

    >>> # Initialize the engine with logger.
    >>> engine = AnonymizerEngine()

    >>> # Invoke the anonymize function with the text, analyzer results and
    >>> # Operators to define the anonymization type.
    >>> result = engine.anonymize(
    >>>     text="My name is Bond, James Bond",
    >>>     analyzer_results=[RecognizerResult(entity_type="PERSON",
    >>>                                        start=11,
    >>>                                        end=15,
    >>>                                        score=0.8),
    >>>                       RecognizerResult(entity_type="PERSON",
    >>>                                        start=17,
    >>>                                        end=27,
    >>>                                        score=0.8)],
    >>>     operators={"PERSON": OperatorConfig("replace", {"new_value": "BIP"})}
    >>> )

    >>> print(result)
    text: My name is BIP, BIP.
    items:
    [
        {'start': 16, 'end': 19, 'entity_type': 'PERSON',
         'text': 'BIP', 'operator': 'replace'},
        {'start': 11, 'end': 14, 'entity_type': 'PERSON',
         'text': 'BIP', 'operator': 'replace'}
    ]


    """
    analyzer_results = self._remove_conflicts_and_get_text_manipulation_data(
        analyzer_results, conflict_resolution
    )

    merged_results = self._merge_entities_with_whitespace_between(
        text, analyzer_results
    )

    operators = self.__check_or_add_default_operator(operators)

    return self._operate(
        text=text,
        pii_entities=merged_results,
        operators_metadata=operators,
        operator_type=OperatorType.Anonymize,
    )

get_anonymizers()

Return a list of supported anonymizers.

Source code in presidio_anonymizer/anonymizer_engine.py
224
225
226
227
def get_anonymizers(self) -> List[str]:
    """Return a list of supported anonymizers."""
    names = [p for p in self.operators_factory.get_anonymizers().keys()]
    return names

remove_anonymizer(anonymizer_cls)

Remove an anonymizer from the engine.

anonymizer_cls: The anonymizer class to remove from the engine.

Source code in presidio_anonymizer/anonymizer_engine.py
112
113
114
115
116
117
118
119
def remove_anonymizer(self, anonymizer_cls: Type[Operator]) -> None:
    """
    Remove an anonymizer from the engine.

    anonymizer_cls: The anonymizer class to remove from the engine.
    """
    logger.info(f"Removed anonymizer {anonymizer_cls.__name__}")
    self.operators_factory.remove_anonymize_operator(anonymizer_cls)

BatchAnonymizerEngine

BatchAnonymizerEngine class.

A class that provides functionality to anonymize in batches.

Parameters:

Name Type Description Default
anonymizer_engine Optional[AnonymizerEngine]

An instance of the AnonymizerEngine class.

None
Source code in presidio_anonymizer/batch_anonymizer_engine.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
class BatchAnonymizerEngine:
    """
    BatchAnonymizerEngine class.

    A class that provides functionality to anonymize in batches.
    :param anonymizer_engine: An instance of the AnonymizerEngine class.
    """

    def __init__(self, anonymizer_engine: Optional[AnonymizerEngine] = None):
        self.anonymizer_engine = anonymizer_engine or AnonymizerEngine()

    def anonymize_list(
        self,
        texts: List[Optional[Union[str, bool, int, float]]],
        recognizer_results_list: List[List[RecognizerResult]],
        **kwargs,
    ) -> List[Union[str, Any]]:
        """
        Anonymize a list of strings.

        :param texts: List containing the texts to be anonymized (original texts).
            Items with a `type` not in `(str, bool, int, float)` will not be anonymized.
        :param recognizer_results_list: A list of lists of RecognizerResult,
        the output of the AnalyzerEngine on each text in the list.
        :param kwargs: Additional kwargs for the `AnonymizerEngine.anonymize` method
        """
        return_list = []
        if not recognizer_results_list:
            recognizer_results_list = [[] for _ in range(len(texts))]
        for text, recognizer_results in zip(texts, recognizer_results_list):
            if type(text) in (str, bool, int, float):
                res = self.anonymizer_engine.anonymize(
                    text=str(text), analyzer_results=recognizer_results, **kwargs
                )
                return_list.append(res.text)
            else:
                return_list.append(text)

        return return_list

    def anonymize_dict(
        self, analyzer_results: Iterable[DictRecognizerResult], **kwargs
    ) -> Dict[str, str]:
        """
        Anonymize values in a dictionary.

        :param analyzer_results: Iterator of `DictRecognizerResult`
        containing the output of the AnalyzerEngine.analyze_dict on the input text.
        :param kwargs: Additional kwargs for the `AnonymizerEngine.anonymize` method
        """

        return_dict = {}
        for result in analyzer_results:
            if isinstance(result.value, dict):
                resp = self.anonymize_dict(
                    analyzer_results=result.recognizer_results, **kwargs
                )
                return_dict[result.key] = resp

            elif isinstance(result.value, str):
                resp = self.anonymizer_engine.anonymize(
                    text=result.value,
                    analyzer_results=result.recognizer_results,
                    **kwargs,
                )
                return_dict[result.key] = resp.text

            elif isinstance(result.value, collections.abc.Iterable):
                anonymize_response = self.anonymize_list(
                    texts=result.value,
                    recognizer_results_list=result.recognizer_results,
                    **kwargs,
                )
                return_dict[result.key] = anonymize_response
            else:
                return_dict[result.key] = result.value
        return return_dict

anonymize_dict(analyzer_results, **kwargs)

Anonymize values in a dictionary.

Parameters:

Name Type Description Default
analyzer_results Iterable[DictRecognizerResult]

Iterator of DictRecognizerResult containing the output of the AnalyzerEngine.analyze_dict on the input text.

required
kwargs

Additional kwargs for the AnonymizerEngine.anonymize method

{}
Source code in presidio_anonymizer/batch_anonymizer_engine.py
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
def anonymize_dict(
    self, analyzer_results: Iterable[DictRecognizerResult], **kwargs
) -> Dict[str, str]:
    """
    Anonymize values in a dictionary.

    :param analyzer_results: Iterator of `DictRecognizerResult`
    containing the output of the AnalyzerEngine.analyze_dict on the input text.
    :param kwargs: Additional kwargs for the `AnonymizerEngine.anonymize` method
    """

    return_dict = {}
    for result in analyzer_results:
        if isinstance(result.value, dict):
            resp = self.anonymize_dict(
                analyzer_results=result.recognizer_results, **kwargs
            )
            return_dict[result.key] = resp

        elif isinstance(result.value, str):
            resp = self.anonymizer_engine.anonymize(
                text=result.value,
                analyzer_results=result.recognizer_results,
                **kwargs,
            )
            return_dict[result.key] = resp.text

        elif isinstance(result.value, collections.abc.Iterable):
            anonymize_response = self.anonymize_list(
                texts=result.value,
                recognizer_results_list=result.recognizer_results,
                **kwargs,
            )
            return_dict[result.key] = anonymize_response
        else:
            return_dict[result.key] = result.value
    return return_dict

anonymize_list(texts, recognizer_results_list, **kwargs)

Anonymize a list of strings.

Parameters:

Name Type Description Default
texts List[Optional[Union[str, bool, int, float]]]

List containing the texts to be anonymized (original texts). Items with a type not in (str, bool, int, float) will not be anonymized.

required
recognizer_results_list List[List[RecognizerResult]]

A list of lists of RecognizerResult, the output of the AnalyzerEngine on each text in the list.

required
kwargs

Additional kwargs for the AnonymizerEngine.anonymize method

{}
Source code in presidio_anonymizer/batch_anonymizer_engine.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
def anonymize_list(
    self,
    texts: List[Optional[Union[str, bool, int, float]]],
    recognizer_results_list: List[List[RecognizerResult]],
    **kwargs,
) -> List[Union[str, Any]]:
    """
    Anonymize a list of strings.

    :param texts: List containing the texts to be anonymized (original texts).
        Items with a `type` not in `(str, bool, int, float)` will not be anonymized.
    :param recognizer_results_list: A list of lists of RecognizerResult,
    the output of the AnalyzerEngine on each text in the list.
    :param kwargs: Additional kwargs for the `AnonymizerEngine.anonymize` method
    """
    return_list = []
    if not recognizer_results_list:
        recognizer_results_list = [[] for _ in range(len(texts))]
    for text, recognizer_results in zip(texts, recognizer_results_list):
        if type(text) in (str, bool, int, float):
            res = self.anonymizer_engine.anonymize(
                text=str(text), analyzer_results=recognizer_results, **kwargs
            )
            return_list.append(res.text)
        else:
            return_list.append(text)

    return return_list

ConflictResolutionStrategy

Bases: Enum

Conflict resolution strategy.

The strategy to use when there is a conflict between two entities.

MERGE_SIMILAR_OR_CONTAINED: This default strategy resolves conflicts between similar or contained entities. REMOVE_INTERSECTIONS: Effectively resolves both intersection conflicts among entities and default strategy conflicts. NONE: No conflict resolution will be performed.

Source code in presidio_anonymizer/entities/conflict_resolution_strategy.py
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
class ConflictResolutionStrategy(Enum):
    """Conflict resolution strategy.

    The strategy to use when there is a conflict between two entities.

    MERGE_SIMILAR_OR_CONTAINED: This default strategy resolves conflicts
    between similar or contained entities.
    REMOVE_INTERSECTIONS: Effectively resolves both intersection conflicts
    among entities and default strategy conflicts.
    NONE: No conflict resolution will be performed.
    """

    MERGE_SIMILAR_OR_CONTAINED = "merge_similar_or_contained"
    REMOVE_INTERSECTIONS = "remove_intersections"

DeanonymizeEngine

Bases: EngineBase

Deanonymize text that was previously anonymized.

Source code in presidio_anonymizer/deanonymize_engine.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
class DeanonymizeEngine(EngineBase):
    """Deanonymize text that was previously anonymized."""

    def deanonymize(
        self,
        text: str,
        entities: List[OperatorResult],
        operators: Dict[str, OperatorConfig],
    ) -> EngineResult:
        """
        Receive the text, entities and operators to perform deanonymization over.

        :param operators: the operators to apply on the anonymizer result entities
        :param text: the full text with the encrypted entities
        :param entities: list of encrypted entities
        :return: EngineResult - the new text and data about the deanonymized entities.
        """
        return self._operate(text, entities, operators, OperatorType.Deanonymize)

    def get_deanonymizers(self) -> List[str]:
        """Return a list of supported deanonymizers."""
        names = [p for p in self.operators_factory.get_deanonymizers().keys()]
        return names

    def add_deanonymizer(self, deanonymizer_cls: Type[Operator]) -> None:
        """
        Add a new deanonymizer to the engine.

        anonymizer_cls: The deanonymizer class to add to the engine.
        """
        logger.info(f"Added deanonymizer {deanonymizer_cls.__name__}")
        self.operators_factory.add_deanonymize_operator(deanonymizer_cls)

    def remove_deanonymizer(self, deanonymizer_cls: Type[Operator]) -> None:
        """
        Remove a deanonymizer from the engine.

        deanonymizer_cls: The deanonymizer class to remove from the engine.
        """
        logger.info(f"Removed deanonymizer {deanonymizer_cls.__name__}")
        self.operators_factory.remove_deanonymize_operator(deanonymizer_cls)

add_deanonymizer(deanonymizer_cls)

Add a new deanonymizer to the engine.

anonymizer_cls: The deanonymizer class to add to the engine.

Source code in presidio_anonymizer/deanonymize_engine.py
37
38
39
40
41
42
43
44
def add_deanonymizer(self, deanonymizer_cls: Type[Operator]) -> None:
    """
    Add a new deanonymizer to the engine.

    anonymizer_cls: The deanonymizer class to add to the engine.
    """
    logger.info(f"Added deanonymizer {deanonymizer_cls.__name__}")
    self.operators_factory.add_deanonymize_operator(deanonymizer_cls)

deanonymize(text, entities, operators)

Receive the text, entities and operators to perform deanonymization over.

Parameters:

Name Type Description Default
operators Dict[str, OperatorConfig]

the operators to apply on the anonymizer result entities

required
text str

the full text with the encrypted entities

required
entities List[OperatorResult]

list of encrypted entities

required

Returns:

Type Description
EngineResult

EngineResult - the new text and data about the deanonymized entities.

Source code in presidio_anonymizer/deanonymize_engine.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
def deanonymize(
    self,
    text: str,
    entities: List[OperatorResult],
    operators: Dict[str, OperatorConfig],
) -> EngineResult:
    """
    Receive the text, entities and operators to perform deanonymization over.

    :param operators: the operators to apply on the anonymizer result entities
    :param text: the full text with the encrypted entities
    :param entities: list of encrypted entities
    :return: EngineResult - the new text and data about the deanonymized entities.
    """
    return self._operate(text, entities, operators, OperatorType.Deanonymize)

get_deanonymizers()

Return a list of supported deanonymizers.

Source code in presidio_anonymizer/deanonymize_engine.py
32
33
34
35
def get_deanonymizers(self) -> List[str]:
    """Return a list of supported deanonymizers."""
    names = [p for p in self.operators_factory.get_deanonymizers().keys()]
    return names

remove_deanonymizer(deanonymizer_cls)

Remove a deanonymizer from the engine.

deanonymizer_cls: The deanonymizer class to remove from the engine.

Source code in presidio_anonymizer/deanonymize_engine.py
46
47
48
49
50
51
52
53
def remove_deanonymizer(self, deanonymizer_cls: Type[Operator]) -> None:
    """
    Remove a deanonymizer from the engine.

    deanonymizer_cls: The deanonymizer class to remove from the engine.
    """
    logger.info(f"Removed deanonymizer {deanonymizer_cls.__name__}")
    self.operators_factory.remove_deanonymize_operator(deanonymizer_cls)

DictRecognizerResult dataclass

Data class for holding the output of the Presidio Analyzer on dictionaries.

Parameters:

Name Type Description Default
key str

key in dictionary

required
value Union[str, List[str], dict]

value to run analysis on (either string or list of strings)

required
recognizer_results Union[List[RecognizerResult], List[List[RecognizerResult]], Iterator[DictRecognizerResult]]

Analyzer output for one value. Could be either: - A list of recognizer results if the input is one string - A list of lists of recognizer results, if the input is a list of strings. - An iterator of a DictRecognizerResult, if the input is a dictionary. In this case the recognizer_results would be the iterator of the DictRecognizerResult next level in the dictionary.

required
Source code in presidio_anonymizer/entities/engine/dict_recognizer_result.py
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
@dataclass
class DictRecognizerResult:
    """
    Data class for holding the output of the Presidio Analyzer on dictionaries.

    :param key: key in dictionary
    :param value: value to run analysis on (either string or list of strings)
    :param recognizer_results: Analyzer output for one value.
    Could be either:
     - A list of recognizer results if the input is one string
     - A list of lists of recognizer results, if the input is a list of strings.
     - An iterator of a DictRecognizerResult, if the input is a dictionary.
     In this case the recognizer_results would be the iterator
     of the DictRecognizerResult next level in the dictionary.
    """

    key: str
    value: Union[str, List[str], dict]
    recognizer_results: Union[
        List[RecognizerResult],
        List[List[RecognizerResult]],
        Iterator["DictRecognizerResult"],
    ]

EngineResult

Engine result.

Source code in presidio_anonymizer/entities/engine/result/engine_result.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
class EngineResult:
    """Engine result."""

    def __init__(self, text: str = None, items: List[OperatorResult] = None):
        """Create EngineResult entity.

        :param text: The anonymized text.
        :param items: List of PII entities and the indices
         of their replacements in the anonymized text.
        """
        if items is None:
            items = []
        self.text = text
        self.items = items

    def set_text(self, text: str):
        """Set a text."""
        self.text = text

    def add_item(self, item: OperatorResult):
        """Add an item.

        :param item: an item to add to the list.
        """
        self.items.append(item)

    def normalize_item_indexes(self):
        """Normalize the indexes to be index from start."""
        text_len = len(self.text)
        for result_item in self.items:
            result_item.start = text_len - result_item.end
            result_item.end = result_item.start + len(result_item.text)

    def to_json(self) -> str:
        """Return a json string serializing this instance."""
        return json.dumps(self, default=lambda x: x.__dict__)

    def __repr__(self):
        """Return a string representation of the object."""

        items_repr = (
            ",\n    ".join([str(item) for item in self.items]) if self.items else ""
        )
        return f"text: {self.text}\nitems:\n[\n    {items_repr}\n]\n"

    def __eq__(self, other) -> bool:
        """Verify two instances are equal.

        Returns true if the two instances are equal, false otherwise.
        """
        return self.text == other.text and all(
            map(lambda x, y: x == y, self.items, other.items)
        )

__eq__(other)

Verify two instances are equal.

Returns true if the two instances are equal, false otherwise.

Source code in presidio_anonymizer/entities/engine/result/engine_result.py
54
55
56
57
58
59
60
61
def __eq__(self, other) -> bool:
    """Verify two instances are equal.

    Returns true if the two instances are equal, false otherwise.
    """
    return self.text == other.text and all(
        map(lambda x, y: x == y, self.items, other.items)
    )

__init__(text=None, items=None)

Create EngineResult entity.

Parameters:

Name Type Description Default
text str

The anonymized text.

None
items List[OperatorResult]

List of PII entities and the indices of their replacements in the anonymized text.

None
Source code in presidio_anonymizer/entities/engine/result/engine_result.py
12
13
14
15
16
17
18
19
20
21
22
def __init__(self, text: str = None, items: List[OperatorResult] = None):
    """Create EngineResult entity.

    :param text: The anonymized text.
    :param items: List of PII entities and the indices
     of their replacements in the anonymized text.
    """
    if items is None:
        items = []
    self.text = text
    self.items = items

__repr__()

Return a string representation of the object.

Source code in presidio_anonymizer/entities/engine/result/engine_result.py
46
47
48
49
50
51
52
def __repr__(self):
    """Return a string representation of the object."""

    items_repr = (
        ",\n    ".join([str(item) for item in self.items]) if self.items else ""
    )
    return f"text: {self.text}\nitems:\n[\n    {items_repr}\n]\n"

add_item(item)

Add an item.

Parameters:

Name Type Description Default
item OperatorResult

an item to add to the list.

required
Source code in presidio_anonymizer/entities/engine/result/engine_result.py
28
29
30
31
32
33
def add_item(self, item: OperatorResult):
    """Add an item.

    :param item: an item to add to the list.
    """
    self.items.append(item)

normalize_item_indexes()

Normalize the indexes to be index from start.

Source code in presidio_anonymizer/entities/engine/result/engine_result.py
35
36
37
38
39
40
def normalize_item_indexes(self):
    """Normalize the indexes to be index from start."""
    text_len = len(self.text)
    for result_item in self.items:
        result_item.start = text_len - result_item.end
        result_item.end = result_item.start + len(result_item.text)

set_text(text)

Set a text.

Source code in presidio_anonymizer/entities/engine/result/engine_result.py
24
25
26
def set_text(self, text: str):
    """Set a text."""
    self.text = text

to_json()

Return a json string serializing this instance.

Source code in presidio_anonymizer/entities/engine/result/engine_result.py
42
43
44
def to_json(self) -> str:
    """Return a json string serializing this instance."""
    return json.dumps(self, default=lambda x: x.__dict__)

InvalidParamError

Bases: Exception

Throw exception with error when user input is not valid.

param msg: Message to be added to the exception

Source code in presidio_anonymizer/entities/invalid_exception.py
 4
 5
 6
 7
 8
 9
10
11
12
class InvalidParamError(Exception):
    """Throw exception with error when user input is not valid.

    param msg: Message to be added to the exception
    """

    def __init__(self, msg: str):
        self.err_msg = msg
        super().__init__(self.err_msg)

OperatorConfig

Hold the data of the required operator.

Source code in presidio_anonymizer/entities/engine/operator_config.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
class OperatorConfig:
    """Hold the data of the required operator."""

    def __init__(self, operator_name: str, params: Dict = None):
        """
        Create an operator config instance.

        :param operator_name: the name of the operator we want to work with
        :param params: the parameters the operator needs in order to work
        """
        self.operator_name = operator_name
        if not params:
            params = {}
        self.params = params
        self.__validate_fields()

    def __repr__(self):
        """Return a string representation of the object."""
        return f"operator_name: {self.operator_name}, params: {self.params}"

    @classmethod
    def from_json(cls, params: Dict) -> "OperatorConfig":
        """
        Create OperatorConfig from json.

        :param params: json e.g.: {
            "type": "mask",
            "masking_char": "*",
            "chars_to_mask": 4,
            "from_end": true
            }
        :return: OperatorConfig
        """
        operator_name = params.get("type")
        if operator_name:
            params.pop("type")
        return cls(operator_name, params)

    def __eq__(self, other: "OperatorConfig"):
        """Verify two OperatorConfigs are equal."""
        operator_name = self.operator_name == other.operator_name
        return self.params == other.params and operator_name

    def __validate_fields(self):
        validate_parameter_not_empty(
            self.operator_name, "operator config", "operator_name"
        )

__eq__(other)

Verify two OperatorConfigs are equal.

Source code in presidio_anonymizer/entities/engine/operator_config.py
47
48
49
50
def __eq__(self, other: "OperatorConfig"):
    """Verify two OperatorConfigs are equal."""
    operator_name = self.operator_name == other.operator_name
    return self.params == other.params and operator_name

__init__(operator_name, params=None)

Create an operator config instance.

Parameters:

Name Type Description Default
operator_name str

the name of the operator we want to work with

required
params Dict

the parameters the operator needs in order to work

None
Source code in presidio_anonymizer/entities/engine/operator_config.py
12
13
14
15
16
17
18
19
20
21
22
23
def __init__(self, operator_name: str, params: Dict = None):
    """
    Create an operator config instance.

    :param operator_name: the name of the operator we want to work with
    :param params: the parameters the operator needs in order to work
    """
    self.operator_name = operator_name
    if not params:
        params = {}
    self.params = params
    self.__validate_fields()

__repr__()

Return a string representation of the object.

Source code in presidio_anonymizer/entities/engine/operator_config.py
25
26
27
def __repr__(self):
    """Return a string representation of the object."""
    return f"operator_name: {self.operator_name}, params: {self.params}"

from_json(params) classmethod

Create OperatorConfig from json.

Parameters:

Name Type Description Default
params Dict

json e.g.: { "type": "mask", "masking_char": "*", "chars_to_mask": 4, "from_end": true }

required

Returns:

Type Description
OperatorConfig

OperatorConfig

Source code in presidio_anonymizer/entities/engine/operator_config.py
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
@classmethod
def from_json(cls, params: Dict) -> "OperatorConfig":
    """
    Create OperatorConfig from json.

    :param params: json e.g.: {
        "type": "mask",
        "masking_char": "*",
        "chars_to_mask": 4,
        "from_end": true
        }
    :return: OperatorConfig
    """
    operator_name = params.get("type")
    if operator_name:
        params.pop("type")
    return cls(operator_name, params)

OperatorResult

Bases: PIIEntity

A class to hold data for engines results either anonymize or deanonymize.

Source code in presidio_anonymizer/entities/engine/result/operator_result.py
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
class OperatorResult(PIIEntity):
    """A class to hold data for engines results either anonymize or deanonymize."""

    def __init__(
        self,
        start: int,
        end: int,
        entity_type: str,
        text: str = None,
        operator: str = None,
    ):
        PIIEntity.__init__(self, start, end, entity_type)
        self.text = text
        self.operator = operator

    def __repr__(self):
        """Return a string representation of the object."""
        return str(self.to_dict())

    def to_dict(self) -> Dict:
        """Return object as Dict."""
        return self.__dict__

    def __str__(self):
        """Return a string representation of the object."""
        return str(self.to_dict())

    def __eq__(self, other: "OperatorResult") -> bool:
        """
        Verify two OperatorResults are equal.

        :param other: OperatorResult
        :return: bool
        """
        return (
            self.start == other.start
            and self.end == other.end
            and self.entity_type == other.entity_type
            and self.operator == other.operator
            and self.text == other.text
        )

    @classmethod
    def from_json(cls, json: Dict) -> "OperatorResult":
        """
        Create OperatorResult from user json.

        :param json: json representation for this operator result. For example:
        {
            "start": 0,
            "end": 10,
            "key": "1111111111111111",
            "entity_type":"PERSON",
            "text":"resulted_text",
            "operator":"encrypt",
        }
        """
        start = json.get("start")
        end = json.get("end")
        entity_type = json.get("entity_type")
        text = json.get("text")
        operator = json.get("operator")
        return cls(
            start=start,
            end=end,
            entity_type=entity_type,
            text=text,
            operator=operator,
        )

__eq__(other)

Verify two OperatorResults are equal.

Parameters:

Name Type Description Default
other OperatorResult

OperatorResult

required

Returns:

Type Description
bool

bool

Source code in presidio_anonymizer/entities/engine/result/operator_result.py
33
34
35
36
37
38
39
40
41
42
43
44
45
46
def __eq__(self, other: "OperatorResult") -> bool:
    """
    Verify two OperatorResults are equal.

    :param other: OperatorResult
    :return: bool
    """
    return (
        self.start == other.start
        and self.end == other.end
        and self.entity_type == other.entity_type
        and self.operator == other.operator
        and self.text == other.text
    )

__repr__()

Return a string representation of the object.

Source code in presidio_anonymizer/entities/engine/result/operator_result.py
21
22
23
def __repr__(self):
    """Return a string representation of the object."""
    return str(self.to_dict())

__str__()

Return a string representation of the object.

Source code in presidio_anonymizer/entities/engine/result/operator_result.py
29
30
31
def __str__(self):
    """Return a string representation of the object."""
    return str(self.to_dict())

from_json(json) classmethod

Create OperatorResult from user json.

Parameters:

Name Type Description Default
json Dict

json representation for this operator result. For example: { "start": 0, "end": 10, "key": "1111111111111111", "entity_type":"PERSON", "text":"resulted_text", "operator":"encrypt", }

required
Source code in presidio_anonymizer/entities/engine/result/operator_result.py
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
@classmethod
def from_json(cls, json: Dict) -> "OperatorResult":
    """
    Create OperatorResult from user json.

    :param json: json representation for this operator result. For example:
    {
        "start": 0,
        "end": 10,
        "key": "1111111111111111",
        "entity_type":"PERSON",
        "text":"resulted_text",
        "operator":"encrypt",
    }
    """
    start = json.get("start")
    end = json.get("end")
    entity_type = json.get("entity_type")
    text = json.get("text")
    operator = json.get("operator")
    return cls(
        start=start,
        end=end,
        entity_type=entity_type,
        text=text,
        operator=operator,
    )

to_dict()

Return object as Dict.

Source code in presidio_anonymizer/entities/engine/result/operator_result.py
25
26
27
def to_dict(self) -> Dict:
    """Return object as Dict."""
    return self.__dict__

PIIEntity

Bases: ABC

Abstract class to hold the text we are going to operate on metadata.

Source code in presidio_anonymizer/entities/engine/pii_entity.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
class PIIEntity(ABC):
    """Abstract class to hold the text we are going to operate on metadata."""

    logger = logging.getLogger("presidio-anonymizer")

    def __init__(self, start: int, end: int, entity_type: str):
        self.start = start
        self.end = end
        self.entity_type = entity_type
        self.__validate_fields()

    def __repr__(self):
        """Return a string representation of the object."""
        return (
            f"start: {self.start}"
            f"end: {self.end},"
            f"entity_type: {self.entity_type}"
        )

    def __gt__(self, other):
        """Check one entity is greater then other by the text end index."""
        return self.start > other.start

    def __eq__(self, other):
        """Check two text metadata entities are equal."""
        return (
            self.start == other.start
            and self.end == other.end
            and self.entity_type == other.entity_type
        )

    def __validate_fields(self):
        validate_parameter_exists(self.start, "result", "start")
        validate_type(self.start, "start", int)
        validate_parameter_exists(self.end, "result", "end")
        validate_type(self.end, "end", int)
        validate_parameter_not_empty(self.entity_type, "result", "entity_type")
        if self.start < 0 or self.end < 0:
            raise InvalidParamError(
                "Invalid input, result start and end must be positive"
            )
        if self.start > self.end:
            raise InvalidParamError(
                f"Invalid input, start index '{self.start}' "
                f"must be smaller than end index '{self.end}'"
            )

__eq__(other)

Check two text metadata entities are equal.

Source code in presidio_anonymizer/entities/engine/pii_entity.py
35
36
37
38
39
40
41
def __eq__(self, other):
    """Check two text metadata entities are equal."""
    return (
        self.start == other.start
        and self.end == other.end
        and self.entity_type == other.entity_type
    )

__gt__(other)

Check one entity is greater then other by the text end index.

Source code in presidio_anonymizer/entities/engine/pii_entity.py
31
32
33
def __gt__(self, other):
    """Check one entity is greater then other by the text end index."""
    return self.start > other.start

__repr__()

Return a string representation of the object.

Source code in presidio_anonymizer/entities/engine/pii_entity.py
23
24
25
26
27
28
29
def __repr__(self):
    """Return a string representation of the object."""
    return (
        f"start: {self.start}"
        f"end: {self.end},"
        f"entity_type: {self.entity_type}"
    )

RecognizerResult

Bases: PIIEntity

Recognizer Result represents the findings of the detected entity.

Result of a recognizer analyzing the text.

Parameters:

Name Type Description Default
entity_type str

the type of the entity

required
start int

the start location of the detected entity

required
end int

the end location of the detected entity

required
score float

the score of the detection

required
Source code in presidio_anonymizer/entities/engine/recognizer_result.py
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
class RecognizerResult(PIIEntity):
    """
    Recognizer Result represents the findings of the detected entity.

    Result of a recognizer analyzing the text.

    :param entity_type: the type of the entity
    :param start: the start location of the detected entity
    :param end: the end location of the detected entity
    :param score: the score of the detection
    """

    logger = logging.getLogger("presidio-anonymizer")

    def __init__(self, entity_type: str, start: int, end: int, score: float):
        PIIEntity.__init__(self, start, end, entity_type)
        self.score = score
        validate_parameter_exists(score, "analyzer result", "score")

    @classmethod
    def from_json(cls, data: Dict):
        """
        Create RecognizerResult from json.

        :param data: e.g. {
            "start": 24,
            "end": 32,
            "score": 0.8,
            "entity_type": "NAME"
        }
        :return: RecognizerResult
        """
        score = data.get("score")
        entity_type = data.get("entity_type")
        start = data.get("start")
        end = data.get("end")
        return cls(entity_type, start, end, score)

    def __gt__(self, other):
        """
        Check if one result is greater by using the results indices in the text.

        :param other: another RecognizerResult
        :return: bool
        """
        if self.start == other.start:
            return self.end > other.end
        return self.start > other.start

    def __eq__(self, other):
        """
        Check two results are equal by using all class fields.

        :param other: another RecognizerResult
        :return: bool
        """
        equal_type = self.entity_type == other.entity_type
        equal_score = self.score == other.score
        return self.equal_indices(other) and equal_type and equal_score

    def __hash__(self):
        """
        Hash the result data by using all class fields.

        :return: int
        """
        return hash(
            f"{str(self.start)} {str(self.end)} {str(self.score)} {self.entity_type}"
        )

    def __str__(self) -> str:
        """Return a string representation of the instance."""
        return (
            f"type: {self.entity_type}, "
            f"start: {self.start}, "
            f"end: {self.end}, "
            f"score: {self.score}"
        )

    def has_conflict(self, other):
        """
        Check if two recognizer results are conflicted or not.

        I have a conflict if:
        1. My indices are the same as the other and my score is lower.
        2. If my indices are contained in another.

        :param other: RecognizerResult
        :return:
        """
        if self.equal_indices(other):
            return self.score <= other.score
        return other.contains(self)

    def contains(self, other):
        """
        Check if one result is contained or equal to another result.

        :param other: another RecognizerResult
        :return: bool
        """
        return self.start <= other.start and self.end >= other.end

    def equal_indices(self, other):
        """
        Check if the indices are equal between two results.

        :param other: another RecognizerResult
        :return:
        """
        return self.start == other.start and self.end == other.end

    def intersects(self, other) -> int:
        """
        Check if self intersects with a different RecognizerResult.

        :return: If intersecting, returns the number of
        intersecting characters.
        If not, returns 0
        """
        # if they do not overlap the intersection is 0
        if self.end < other.start or other.end < self.start:
            return 0

        # otherwise the intersection is min(end) - max(start)
        return min(self.end, other.end) - max(self.start, other.start)

__eq__(other)

Check two results are equal by using all class fields.

Parameters:

Name Type Description Default
other

another RecognizerResult

required

Returns:

Type Description

bool

Source code in presidio_anonymizer/entities/engine/recognizer_result.py
63
64
65
66
67
68
69
70
71
72
def __eq__(self, other):
    """
    Check two results are equal by using all class fields.

    :param other: another RecognizerResult
    :return: bool
    """
    equal_type = self.entity_type == other.entity_type
    equal_score = self.score == other.score
    return self.equal_indices(other) and equal_type and equal_score

__gt__(other)

Check if one result is greater by using the results indices in the text.

Parameters:

Name Type Description Default
other

another RecognizerResult

required

Returns:

Type Description

bool

Source code in presidio_anonymizer/entities/engine/recognizer_result.py
52
53
54
55
56
57
58
59
60
61
def __gt__(self, other):
    """
    Check if one result is greater by using the results indices in the text.

    :param other: another RecognizerResult
    :return: bool
    """
    if self.start == other.start:
        return self.end > other.end
    return self.start > other.start

__hash__()

Hash the result data by using all class fields.

Returns:

Type Description

int

Source code in presidio_anonymizer/entities/engine/recognizer_result.py
74
75
76
77
78
79
80
81
82
def __hash__(self):
    """
    Hash the result data by using all class fields.

    :return: int
    """
    return hash(
        f"{str(self.start)} {str(self.end)} {str(self.score)} {self.entity_type}"
    )

__str__()

Return a string representation of the instance.

Source code in presidio_anonymizer/entities/engine/recognizer_result.py
84
85
86
87
88
89
90
91
def __str__(self) -> str:
    """Return a string representation of the instance."""
    return (
        f"type: {self.entity_type}, "
        f"start: {self.start}, "
        f"end: {self.end}, "
        f"score: {self.score}"
    )

contains(other)

Check if one result is contained or equal to another result.

Parameters:

Name Type Description Default
other

another RecognizerResult

required

Returns:

Type Description

bool

Source code in presidio_anonymizer/entities/engine/recognizer_result.py
108
109
110
111
112
113
114
115
def contains(self, other):
    """
    Check if one result is contained or equal to another result.

    :param other: another RecognizerResult
    :return: bool
    """
    return self.start <= other.start and self.end >= other.end

equal_indices(other)

Check if the indices are equal between two results.

Parameters:

Name Type Description Default
other

another RecognizerResult

required

Returns:

Type Description
Source code in presidio_anonymizer/entities/engine/recognizer_result.py
117
118
119
120
121
122
123
124
def equal_indices(self, other):
    """
    Check if the indices are equal between two results.

    :param other: another RecognizerResult
    :return:
    """
    return self.start == other.start and self.end == other.end

from_json(data) classmethod

Create RecognizerResult from json.

Parameters:

Name Type Description Default
data Dict

e.g. { "start": 24, "end": 32, "score": 0.8, "entity_type": "NAME" }

required

Returns:

Type Description

RecognizerResult

Source code in presidio_anonymizer/entities/engine/recognizer_result.py
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
@classmethod
def from_json(cls, data: Dict):
    """
    Create RecognizerResult from json.

    :param data: e.g. {
        "start": 24,
        "end": 32,
        "score": 0.8,
        "entity_type": "NAME"
    }
    :return: RecognizerResult
    """
    score = data.get("score")
    entity_type = data.get("entity_type")
    start = data.get("start")
    end = data.get("end")
    return cls(entity_type, start, end, score)

has_conflict(other)

Check if two recognizer results are conflicted or not.

I have a conflict if: 1. My indices are the same as the other and my score is lower. 2. If my indices are contained in another.

Parameters:

Name Type Description Default
other

RecognizerResult

required

Returns:

Type Description
Source code in presidio_anonymizer/entities/engine/recognizer_result.py
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
def has_conflict(self, other):
    """
    Check if two recognizer results are conflicted or not.

    I have a conflict if:
    1. My indices are the same as the other and my score is lower.
    2. If my indices are contained in another.

    :param other: RecognizerResult
    :return:
    """
    if self.equal_indices(other):
        return self.score <= other.score
    return other.contains(self)

intersects(other)

Check if self intersects with a different RecognizerResult.

Returns:

Type Description
int

If intersecting, returns the number of intersecting characters. If not, returns 0

Source code in presidio_anonymizer/entities/engine/recognizer_result.py
126
127
128
129
130
131
132
133
134
135
136
137
138
139
def intersects(self, other) -> int:
    """
    Check if self intersects with a different RecognizerResult.

    :return: If intersecting, returns the number of
    intersecting characters.
    If not, returns 0
    """
    # if they do not overlap the intersection is 0
    if self.end < other.start or other.end < self.start:
        return 0

    # otherwise the intersection is min(end) - max(start)
    return min(self.end, other.end) - max(self.start, other.start)