Skip to content

Presidio Anonymizer API Reference

Anonymizer root module.

anonymizer_engine

Handles the entire logic of the Presidio-anonymizer and text anonymizing.

AnonymizerEngine (EngineBase)

AnonymizerEngine class.

Handles the entire logic of the Presidio-anonymizer. Gets the original text and replaces the PII entities with the desired anonymizers.

Source code in presidio_anonymizer/anonymizer_engine.py
class AnonymizerEngine(EngineBase):
    """
    AnonymizerEngine class.

    Handles the entire logic of the Presidio-anonymizer. Gets the original text
    and replaces the PII entities with the desired anonymizers.
    """

    logger = logging.getLogger("presidio-anonymizer")

    def __init__(self):
        EngineBase.__init__(self)

    def anonymize(
        self,
        text: str,
        analyzer_results: List[RecognizerResult],
        operators: Optional[Dict[str, OperatorConfig]] = None,
    ) -> EngineResult:
        """Anonymize method to anonymize the given text.

        :param text: the text we are anonymizing
        :param analyzer_results: A list of RecognizerResult class -> The results we
        received from the analyzer
        :param operators: The configuration of the anonymizers we would like
        to use for each entity e.g.: {"PHONE_NUMBER":OperatorConfig("redact", {})}
        received from the analyzer
        :return: the anonymized text and a list of information about the
        anonymized entities.

        :example:

        >>> from presidio_anonymizer import AnonymizerEngine
        >>> from presidio_anonymizer.entities import RecognizerResult, OperatorConfig

        >>> # Initialize the engine with logger.
        >>> engine = AnonymizerEngine()

        >>> # Invoke the anonymize function with the text, analyzer results and
        >>> # Operators to define the anonymization type.
        >>> result = engine.anonymize(
        >>>     text="My name is Bond, James Bond",
        >>>     analyzer_results=[RecognizerResult(entity_type="PERSON",
        >>>                                        start=11,
        >>>                                        end=15,
        >>>                                        score=0.8),
        >>>                       RecognizerResult(entity_type="PERSON",
        >>>                                        start=17,
        >>>                                        end=27,
        >>>                                        score=0.8)],
        >>>     operators={"PERSON": OperatorConfig("replace", {"new_value": "BIP"})}
        >>> )

        >>> print(result)
        text: My name is BIP, BIP.
        items:
        [
            {'start': 16, 'end': 19, 'entity_type': 'PERSON',
             'text': 'BIP', 'operator': 'replace'},
            {'start': 11, 'end': 14, 'entity_type': 'PERSON',
             'text': 'BIP', 'operator': 'replace'}
        ]


        """
        analyzer_results = self._remove_conflicts_and_get_text_manipulation_data(
            analyzer_results
        )

        operators = self.__check_or_add_default_operator(operators)

        return self._operate(text, analyzer_results, operators, OperatorType.Anonymize)

    def _remove_conflicts_and_get_text_manipulation_data(
        self, analyzer_results: List[RecognizerResult]
    ) -> List[RecognizerResult]:
        """
        Iterate the list and create a sorted unique results list from it.

        Only insert results which are:
        1. Indices are not contained in other result.
        2. Have the same indices as other results but with larger score.
        :return: List
        """
        unique_text_metadata_elements = []
        # This list contains all elements which we need to check a single result
        # against. If a result is dropped, it can also be dropped from this list
        # since it is intersecting with another result and we selected the other one.
        other_elements = analyzer_results.copy()
        for result in analyzer_results:
            other_elements.remove(result)
            result_conflicted = self.__is_result_conflicted_with_other_elements(
                other_elements, result
            )
            if not result_conflicted:
                other_elements.append(result)
                unique_text_metadata_elements.append(result)
            else:
                self.logger.debug(
                    f"removing element {result} from results list due to conflict"
                )
        return unique_text_metadata_elements

    def get_anonymizers(self) -> List[str]:
        """Return a list of supported anonymizers."""
        names = [p for p in self.operators_factory.get_anonymizers().keys()]
        return names

    @staticmethod
    def __is_result_conflicted_with_other_elements(other_elements, result):
        return any(
            [result.has_conflict(other_element) for other_element in other_elements]
        )

    @staticmethod
    def __check_or_add_default_operator(
        operators: Dict[str, OperatorConfig]
    ) -> Dict[str, OperatorConfig]:
        default_operator = OperatorConfig(DEFAULT)
        if not operators:
            return {"DEFAULT": default_operator}
        if not operators.get("DEFAULT"):
            operators["DEFAULT"] = default_operator
        return operators

anonymize(self, text, analyzer_results, operators=None)

Anonymize method to anonymize the given text.

:param text: the text we are anonymizing :param analyzer_results: A list of RecognizerResult class -> The results we received from the analyzer :param operators: The configuration of the anonymizers we would like to use for each entity e.g.: {"PHONE_NUMBER":OperatorConfig("redact", {})} received from the analyzer :return: the anonymized text and a list of information about the anonymized entities.

:example:

from presidio_anonymizer import AnonymizerEngine from presidio_anonymizer.entities import RecognizerResult, OperatorConfig

Initialize the engine with logger.

engine = AnonymizerEngine()

Invoke the anonymize function with the text, analyzer results and
Operators to define the anonymization type.

result = engine.anonymize( text="My name is Bond, James Bond", analyzer_results=[RecognizerResult(entity_type="PERSON", start=11, end=15, score=0.8), RecognizerResult(entity_type="PERSON", start=17, end=27, score=0.8)], operators={"PERSON": OperatorConfig("replace", {"new_value": "BIP"})} )

print(result) text: My name is BIP, BIP. items: [ {'start': 16, 'end': 19, 'entity_type': 'PERSON', 'text': 'BIP', 'operator': 'replace'}, {'start': 11, 'end': 14, 'entity_type': 'PERSON', 'text': 'BIP', 'operator': 'replace'} ]

Source code in presidio_anonymizer/anonymizer_engine.py
def anonymize(
    self,
    text: str,
    analyzer_results: List[RecognizerResult],
    operators: Optional[Dict[str, OperatorConfig]] = None,
) -> EngineResult:
    """Anonymize method to anonymize the given text.

    :param text: the text we are anonymizing
    :param analyzer_results: A list of RecognizerResult class -> The results we
    received from the analyzer
    :param operators: The configuration of the anonymizers we would like
    to use for each entity e.g.: {"PHONE_NUMBER":OperatorConfig("redact", {})}
    received from the analyzer
    :return: the anonymized text and a list of information about the
    anonymized entities.

    :example:

    >>> from presidio_anonymizer import AnonymizerEngine
    >>> from presidio_anonymizer.entities import RecognizerResult, OperatorConfig

    >>> # Initialize the engine with logger.
    >>> engine = AnonymizerEngine()

    >>> # Invoke the anonymize function with the text, analyzer results and
    >>> # Operators to define the anonymization type.
    >>> result = engine.anonymize(
    >>>     text="My name is Bond, James Bond",
    >>>     analyzer_results=[RecognizerResult(entity_type="PERSON",
    >>>                                        start=11,
    >>>                                        end=15,
    >>>                                        score=0.8),
    >>>                       RecognizerResult(entity_type="PERSON",
    >>>                                        start=17,
    >>>                                        end=27,
    >>>                                        score=0.8)],
    >>>     operators={"PERSON": OperatorConfig("replace", {"new_value": "BIP"})}
    >>> )

    >>> print(result)
    text: My name is BIP, BIP.
    items:
    [
        {'start': 16, 'end': 19, 'entity_type': 'PERSON',
         'text': 'BIP', 'operator': 'replace'},
        {'start': 11, 'end': 14, 'entity_type': 'PERSON',
         'text': 'BIP', 'operator': 'replace'}
    ]


    """
    analyzer_results = self._remove_conflicts_and_get_text_manipulation_data(
        analyzer_results
    )

    operators = self.__check_or_add_default_operator(operators)

    return self._operate(text, analyzer_results, operators, OperatorType.Anonymize)

get_anonymizers(self)

Return a list of supported anonymizers.

Source code in presidio_anonymizer/anonymizer_engine.py
def get_anonymizers(self) -> List[str]:
    """Return a list of supported anonymizers."""
    names = [p for p in self.operators_factory.get_anonymizers().keys()]
    return names

core special

The core text functionality.

engine_base

Handle the entire text operations using the operators.

EngineBase (ABC)

Handle the logic of operations over the text using the operators.

Source code in presidio_anonymizer/core/engine_base.py
class EngineBase(ABC):
    """Handle the logic of operations over the text using the operators."""

    def __init__(self):
        self.logger = logging.getLogger("presidio-anonymizer")
        self.operators_factory = OperatorsFactory()

    def _operate(
        self,
        text: str,
        pii_entities: List[PIIEntity],
        operators_metadata: Dict[str, OperatorConfig],
        operator_type: OperatorType,
    ) -> EngineResult:
        """
        Operate will do the operations required by the user over the text.

        :param text: the text we need to operate on.
        :param pii_entities: data about the text entities we want to operate over.
        :param operators_metadata: dictionary where the key is the entity_type and what
        :type operator_type: either anonymize or deanonymize
        we want to perform over this entity_type.
        :return:
        """
        text_replace_builder = TextReplaceBuilder(original_text=text)
        engine_result = EngineResult()
        sorted_pii_entities = sorted(pii_entities, reverse=True)
        for operator in sorted_pii_entities:
            text_to_operate_on = text_replace_builder.get_text_in_position(
                operator.start, operator.end
            )

            self.logger.debug(f"performing operation {operator}")
            operator_metadata = self.__get_entity_operator_metadata(
                operator.entity_type, operators_metadata
            )
            changed_text = self.__operate_on_text(
                operator, text_to_operate_on, operator_metadata, operator_type
            )
            index_from_end = text_replace_builder.replace_text_get_insertion_index(
                changed_text, operator.start, operator.end
            )

            # The following creates an intermediate list of result entities,
            # ordered from end to start, and the indexes will be normalized
            # from start to end once the loop ends and the text length is deterministic.
            result_item = OperatorResult(
                0,
                index_from_end,
                operator.entity_type,
                changed_text,
                operator_metadata.operator_name,
            )
            engine_result.add_item(result_item)

        engine_result.set_text(text_replace_builder.output_text)
        engine_result.normalize_item_indexes()
        return engine_result

    def __operate_on_text(
        self,
        text_metadata: PIIEntity,
        text_to_operate_on: str,
        operator_metadata: OperatorConfig,
        operator_type: OperatorType,
    ) -> str:
        entity_type = text_metadata.entity_type
        self.logger.debug(f"getting operator for {entity_type}")
        operator = self.operators_factory.create_operator_class(
            operator_metadata.operator_name, operator_type
        )
        self.logger.debug(f"validating operator {operator} for {entity_type}")
        operator.validate(params=operator_metadata.params)
        params = operator_metadata.params
        params["entity_type"] = entity_type
        self.logger.debug(f"operating on {entity_type} with {operator}")
        operated_on_text = operator.operate(params=params, text=text_to_operate_on)
        return operated_on_text

    @staticmethod
    def __get_entity_operator_metadata(
        entity_type: str, operators_metadata: Dict = None
    ) -> OperatorConfig:
        # We try to get the operator from the list by entity_type.
        # If it does not exist, we get the default from the list.
        if operators_metadata is None:
            operators_metadata = {}
        operator = operators_metadata.get(entity_type)
        if operator:
            return operator
        else:
            return operators_metadata.get("DEFAULT")

text_replace_builder

Handles the original text and creates a new one according to changes requests.

TextReplaceBuilder

Creates new text according to users request.

Source code in presidio_anonymizer/core/text_replace_builder.py
class TextReplaceBuilder:
    """Creates new text according to users request."""

    def __init__(self, original_text: str):
        self.logger = logging.getLogger("presidio-anonymizer")
        self.__validate_text_not_empty(original_text)
        self.output_text = original_text
        self.original_text = original_text
        self.text_len = len(original_text)
        self.last_replacement_index = self.text_len

    def __validate_text_not_empty(self, text: str):
        if not text:
            self.logger.debug("invalid input, json is missing text field")
            raise InvalidParamException("Invalid input, text can not be empty")

    def get_text_in_position(self, start: int, end: int) -> str:
        """
        Get part of the text inside the original text.

        :param start: start position of inner text
        :param end: end position of inner text
        :return: str - part of the original text
        """
        self.__validate_position_in_text(start, end)
        return self.original_text[start:end]

    def replace_text_get_insertion_index(
        self, replacement_text: str, start: int, end: int
    ) -> int:
        """
        Replace text in a specific position with the text.

        :param replacement_text: new text to replace the old text according to indices
        :param start: the startpoint to replace the text
        :param end: the endpoint to replace the text
        :return: The index of inserted text
        """
        end_of_text_index = min(end, self.last_replacement_index)
        self.last_replacement_index = start

        before_text = self.output_text[:start]
        after_text = self.output_text[end_of_text_index:]
        self.output_text = before_text + replacement_text + after_text

        # The replace algorithm is replacing the text from end to start.
        # calculate and return the start point from the end.
        return len(after_text) + len(replacement_text)

    def __validate_position_in_text(self, start: int, end: int):
        """Validate the start and end position match the text length."""
        if self.text_len < start or end > self.text_len:
            err_msg = (
                f"Invalid analyzer result, start: {start} and end: "
                f"{end}, while text length is only {self.text_len}."
            )
            raise InvalidParamException(err_msg)
get_text_in_position(self, start, end)

Get part of the text inside the original text.

:param start: start position of inner text :param end: end position of inner text :return: str - part of the original text

Source code in presidio_anonymizer/core/text_replace_builder.py
def get_text_in_position(self, start: int, end: int) -> str:
    """
    Get part of the text inside the original text.

    :param start: start position of inner text
    :param end: end position of inner text
    :return: str - part of the original text
    """
    self.__validate_position_in_text(start, end)
    return self.original_text[start:end]
replace_text_get_insertion_index(self, replacement_text, start, end)

Replace text in a specific position with the text.

:param replacement_text: new text to replace the old text according to indices :param start: the startpoint to replace the text :param end: the endpoint to replace the text :return: The index of inserted text

Source code in presidio_anonymizer/core/text_replace_builder.py
def replace_text_get_insertion_index(
    self, replacement_text: str, start: int, end: int
) -> int:
    """
    Replace text in a specific position with the text.

    :param replacement_text: new text to replace the old text according to indices
    :param start: the startpoint to replace the text
    :param end: the endpoint to replace the text
    :return: The index of inserted text
    """
    end_of_text_index = min(end, self.last_replacement_index)
    self.last_replacement_index = start

    before_text = self.output_text[:start]
    after_text = self.output_text[end_of_text_index:]
    self.output_text = before_text + replacement_text + after_text

    # The replace algorithm is replacing the text from end to start.
    # calculate and return the start point from the end.
    return len(after_text) + len(replacement_text)

deanonymize_engine

Deanonymize anonymized text by using deanonymize operators.

DeanonymizeEngine (EngineBase)

Deanonymize text that was previously anonymized.

Source code in presidio_anonymizer/deanonymize_engine.py
class DeanonymizeEngine(EngineBase):
    """Deanonymize text that was previously anonymized."""

    def __init__(self):
        self.logger = logging.getLogger("presidio-anonymizer")
        EngineBase.__init__(self)

    def deanonymize(
        self,
        text: str,
        entities: List[OperatorResult],
        operators: Dict[str, OperatorConfig],
    ) -> EngineResult:
        """
        Receive the text, entities and operators to perform deanonymization over.

        :param operators: the operators to apply on the anonymizer result entities
        :param text: the full text with the encrypted entities
        :param entities: list of encrypted entities
        :return: EngineResult - the new text and data about the deanonymized entities.
        """
        return self._operate(text, entities, operators, OperatorType.Deanonymize)

    def get_deanonymizers(self) -> List[str]:
        """Return a list of supported deanonymizers."""
        names = [p for p in self.operators_factory.get_deanonymizers().keys()]
        return names

deanonymize(self, text, entities, operators)

Receive the text, entities and operators to perform deanonymization over.

:param operators: the operators to apply on the anonymizer result entities :param text: the full text with the encrypted entities :param entities: list of encrypted entities :return: EngineResult - the new text and data about the deanonymized entities.

Source code in presidio_anonymizer/deanonymize_engine.py
def deanonymize(
    self,
    text: str,
    entities: List[OperatorResult],
    operators: Dict[str, OperatorConfig],
) -> EngineResult:
    """
    Receive the text, entities and operators to perform deanonymization over.

    :param operators: the operators to apply on the anonymizer result entities
    :param text: the full text with the encrypted entities
    :param entities: list of encrypted entities
    :return: EngineResult - the new text and data about the deanonymized entities.
    """
    return self._operate(text, entities, operators, OperatorType.Deanonymize)

get_deanonymizers(self)

Return a list of supported deanonymizers.

Source code in presidio_anonymizer/deanonymize_engine.py
def get_deanonymizers(self) -> List[str]:
    """Return a list of supported deanonymizers."""
    names = [p for p in self.operators_factory.get_deanonymizers().keys()]
    return names

entities special

Handles all the entities objects (structs) of the anonymizer.

engine special

Engine request entities.

operator_config

OperatorConfig

Hold the data of the required operator.

Source code in presidio_anonymizer/entities/engine/operator_config.py
class OperatorConfig:
    """Hold the data of the required operator."""

    def __init__(self, operator_name: str, params: Dict = None):
        """
        Create an operator config instance.

        :param operator_name: the name of the operator we want to work with
        :param params: the parameters the operator needs in order to work
        """
        self.logger = logging.getLogger("presidio-anonymizer")
        self.operator_name = operator_name
        if not params:
            params = {}
        self.params = params
        self.__validate_fields()

    def __repr__(self):
        """Return a string representation of the object."""
        return f"operator_name: {self.operator_name}, params: {self.params}"

    @classmethod
    def from_json(cls, params: Dict) -> "OperatorConfig":
        """
        Create OperatorConfig from json.

        :param params: json e.g.: {
            "type": "mask",
            "masking_char": "*",
            "chars_to_mask": 4,
            "from_end": true
        }
        :return: OperatorConfig
        """
        operator_name = params.get("type")
        if operator_name:
            params.pop("type")
        return cls(operator_name, params)

    def __eq__(self, other: "OperatorConfig"):
        """Verify two OperatorConfigs are equal."""
        operator_name = self.operator_name == other.operator_name
        return self.params == other.params and operator_name

    def __validate_fields(self):
        validate_parameter_not_empty(
            self.operator_name, "operator config", "operator_name"
        )
__eq__(self, other) special

Verify two OperatorConfigs are equal.

Source code in presidio_anonymizer/entities/engine/operator_config.py
def __eq__(self, other: "OperatorConfig"):
    """Verify two OperatorConfigs are equal."""
    operator_name = self.operator_name == other.operator_name
    return self.params == other.params and operator_name
__init__(self, operator_name, params=None) special

Create an operator config instance.

:param operator_name: the name of the operator we want to work with :param params: the parameters the operator needs in order to work

Source code in presidio_anonymizer/entities/engine/operator_config.py
def __init__(self, operator_name: str, params: Dict = None):
    """
    Create an operator config instance.

    :param operator_name: the name of the operator we want to work with
    :param params: the parameters the operator needs in order to work
    """
    self.logger = logging.getLogger("presidio-anonymizer")
    self.operator_name = operator_name
    if not params:
        params = {}
    self.params = params
    self.__validate_fields()
__repr__(self) special

Return a string representation of the object.

Source code in presidio_anonymizer/entities/engine/operator_config.py
def __repr__(self):
    """Return a string representation of the object."""
    return f"operator_name: {self.operator_name}, params: {self.params}"
from_json(params) classmethod

Create OperatorConfig from json.

:param params: json e.g.: { "type": "mask", "masking_char": "*", "chars_to_mask": 4, "from_end": true } :return: OperatorConfig

Source code in presidio_anonymizer/entities/engine/operator_config.py
@classmethod
def from_json(cls, params: Dict) -> "OperatorConfig":
    """
    Create OperatorConfig from json.

    :param params: json e.g.: {
        "type": "mask",
        "masking_char": "*",
        "chars_to_mask": 4,
        "from_end": true
    }
    :return: OperatorConfig
    """
    operator_name = params.get("type")
    if operator_name:
        params.pop("type")
    return cls(operator_name, params)

pii_entity

PIIEntity (ABC)

Abstract class to hold the text we are going to operate on metadata.

Source code in presidio_anonymizer/entities/engine/pii_entity.py
class PIIEntity(ABC):
    """Abstract class to hold the text we are going to operate on metadata."""

    logger = logging.getLogger("presidio-anonymizer")

    def __init__(self, start: int, end: int, entity_type: str):
        self.start = start
        self.end = end
        self.entity_type = entity_type
        self.__validate_fields()

    def __repr__(self):
        """Return a string representation of the object."""
        return (
            f"start: {self.start}"
            f"end: {self.end},"
            f"entity_type: {self.entity_type}"
        )

    def __gt__(self, other):
        """Check one entity is greater then other by the text end index."""
        return self.start > other.start

    def __eq__(self, other):
        """Check two text metadata entities are equal."""
        return (
            self.start == other.start
            and self.end == other.end
            and self.entity_type == other.entity_type
        )

    def __validate_fields(self):
        validate_parameter_exists(self.start, "result", "start")
        validate_type(self.start, "start", int)
        validate_parameter_exists(self.end, "result", "end")
        validate_type(self.end, "end", int)
        validate_parameter_not_empty(self.entity_type, "result", "entity_type")
        if self.start < 0 or self.end < 0:
            raise InvalidParamException(
                "Invalid input, result start and end must be positive"
            )
        if self.start > self.end:
            raise InvalidParamException(
                f"Invalid input, start index '{self.start}' "
                f"must be smaller than end index '{self.end}'"
            )
__eq__(self, other) special

Check two text metadata entities are equal.

Source code in presidio_anonymizer/entities/engine/pii_entity.py
def __eq__(self, other):
    """Check two text metadata entities are equal."""
    return (
        self.start == other.start
        and self.end == other.end
        and self.entity_type == other.entity_type
    )
__gt__(self, other) special

Check one entity is greater then other by the text end index.

Source code in presidio_anonymizer/entities/engine/pii_entity.py
def __gt__(self, other):
    """Check one entity is greater then other by the text end index."""
    return self.start > other.start
__repr__(self) special

Return a string representation of the object.

Source code in presidio_anonymizer/entities/engine/pii_entity.py
def __repr__(self):
    """Return a string representation of the object."""
    return (
        f"start: {self.start}"
        f"end: {self.end},"
        f"entity_type: {self.entity_type}"
    )

recognizer_result

RecognizerResult is an exact copy of the RecognizerResult object from presidio-analyzer.

Represents the findings of detected entity.

RecognizerResult (PIIEntity)

Recognizer Result represents the findings of the detected entity.

Result of a recognizer analyzing the text.

:param entity_type: the type of the entity :param start: the start location of the detected entity :param end: the end location of the detected entity :param score: the score of the detection

Source code in presidio_anonymizer/entities/engine/recognizer_result.py
class RecognizerResult(PIIEntity):
    """
    Recognizer Result represents the findings of the detected entity.

    Result of a recognizer analyzing the text.

    :param entity_type: the type of the entity
    :param start: the start location of the detected entity
    :param end: the end location of the detected entity
    :param score: the score of the detection
    """

    logger = logging.getLogger("presidio-anonymizer")

    def __init__(self, entity_type: str, start: int, end: int, score: float):
        PIIEntity.__init__(self, start, end, entity_type)
        self.score = score
        validate_parameter_exists(score, "analyzer result", "score")

    @classmethod
    def from_json(cls, data: Dict):
        """
        Create RecognizerResult from json.

        :param data: e.g. {
            "start": 24,
            "end": 32,
            "score": 0.8,
            "entity_type": "NAME"
        }
        :return: RecognizerResult
        """
        score = data.get("score")
        entity_type = data.get("entity_type")
        start = data.get("start")
        end = data.get("end")
        return cls(entity_type, start, end, score)

    def __gt__(self, other):
        """
        Check if one result is greater by using the results indices in the text.

        :param other: another RecognizerResult
        :return: bool
        """
        if self.start == other.start:
            return self.end > other.end
        return self.start > other.start

    def __eq__(self, other):
        """
        Check two results are equal by using all class fields.

        :param other: another RecognizerResult
        :return: bool
        """
        equal_type = self.entity_type == other.entity_type
        equal_score = self.score == other.score
        return self.equal_indices(other) and equal_type and equal_score

    def __hash__(self):
        """
        Hash the result data by using all class fields.

        :return: int
        """
        return hash(
            f"{str(self.start)} {str(self.end)} {str(self.score)} {self.entity_type}"
        )

    def __str__(self) -> str:
        """Return a string representation of the instance."""
        return (
            f"type: {self.entity_type}, "
            f"start: {self.start}, "
            f"end: {self.end}, "
            f"score: {self.score}"
        )

    def has_conflict(self, other):
        """
        Check if two recognizer results are conflicted or not.

        I have a conflict if:
        1. My indices are the same as the other and my score is lower.
        2. If my indices are contained in another.

        :param other: RecognizerResult
        :return:
        """
        if self.equal_indices(other):
            return self.score <= other.score
        return other.contains(self)

    def contains(self, other):
        """
        Check if one result is contained or equal to another result.

        :param other: another RecognizerResult
        :return: bool
        """
        return self.start <= other.start and self.end >= other.end

    def equal_indices(self, other):
        """
        Check if the indices are equal between two results.

        :param other: another RecognizerResult
        :return:
        """
        return self.start == other.start and self.end == other.end
__eq__(self, other) special

Check two results are equal by using all class fields.

:param other: another RecognizerResult :return: bool

Source code in presidio_anonymizer/entities/engine/recognizer_result.py
def __eq__(self, other):
    """
    Check two results are equal by using all class fields.

    :param other: another RecognizerResult
    :return: bool
    """
    equal_type = self.entity_type == other.entity_type
    equal_score = self.score == other.score
    return self.equal_indices(other) and equal_type and equal_score
__gt__(self, other) special

Check if one result is greater by using the results indices in the text.

:param other: another RecognizerResult :return: bool

Source code in presidio_anonymizer/entities/engine/recognizer_result.py
def __gt__(self, other):
    """
    Check if one result is greater by using the results indices in the text.

    :param other: another RecognizerResult
    :return: bool
    """
    if self.start == other.start:
        return self.end > other.end
    return self.start > other.start
__hash__(self) special

Hash the result data by using all class fields.

:return: int

Source code in presidio_anonymizer/entities/engine/recognizer_result.py
def __hash__(self):
    """
    Hash the result data by using all class fields.

    :return: int
    """
    return hash(
        f"{str(self.start)} {str(self.end)} {str(self.score)} {self.entity_type}"
    )
__str__(self) special

Return a string representation of the instance.

Source code in presidio_anonymizer/entities/engine/recognizer_result.py
def __str__(self) -> str:
    """Return a string representation of the instance."""
    return (
        f"type: {self.entity_type}, "
        f"start: {self.start}, "
        f"end: {self.end}, "
        f"score: {self.score}"
    )
contains(self, other)

Check if one result is contained or equal to another result.

:param other: another RecognizerResult :return: bool

Source code in presidio_anonymizer/entities/engine/recognizer_result.py
def contains(self, other):
    """
    Check if one result is contained or equal to another result.

    :param other: another RecognizerResult
    :return: bool
    """
    return self.start <= other.start and self.end >= other.end
equal_indices(self, other)

Check if the indices are equal between two results.

:param other: another RecognizerResult :return:

Source code in presidio_anonymizer/entities/engine/recognizer_result.py
def equal_indices(self, other):
    """
    Check if the indices are equal between two results.

    :param other: another RecognizerResult
    :return:
    """
    return self.start == other.start and self.end == other.end
from_json(data) classmethod

Create RecognizerResult from json.

:param data: e.g. { "start": 24, "end": 32, "score": 0.8, "entity_type": "NAME" } :return: RecognizerResult

Source code in presidio_anonymizer/entities/engine/recognizer_result.py
@classmethod
def from_json(cls, data: Dict):
    """
    Create RecognizerResult from json.

    :param data: e.g. {
        "start": 24,
        "end": 32,
        "score": 0.8,
        "entity_type": "NAME"
    }
    :return: RecognizerResult
    """
    score = data.get("score")
    entity_type = data.get("entity_type")
    start = data.get("start")
    end = data.get("end")
    return cls(entity_type, start, end, score)
has_conflict(self, other)

Check if two recognizer results are conflicted or not.

I have a conflict if: 1. My indices are the same as the other and my score is lower. 2. If my indices are contained in another.

:param other: RecognizerResult :return:

Source code in presidio_anonymizer/entities/engine/recognizer_result.py
def has_conflict(self, other):
    """
    Check if two recognizer results are conflicted or not.

    I have a conflict if:
    1. My indices are the same as the other and my score is lower.
    2. If my indices are contained in another.

    :param other: RecognizerResult
    :return:
    """
    if self.equal_indices(other):
        return self.score <= other.score
    return other.contains(self)

result special

Engine result items either for anonymize or decrypt.

engine_result

Handle a serializable anonymizer result.

EngineResult

Engine result.

Source code in presidio_anonymizer/entities/engine/result/engine_result.py
class EngineResult:
    """Engine result."""

    def __init__(self, text: str = None, items: List[OperatorResult] = None):
        """Create EngineResult entity.

        :param text: The anonymized text.
        :param items: List of PII entities and the indices
         of their replacements in the anonymized text.
        """
        if items is None:
            items = []
        self.text = text
        self.items = items

    def set_text(self, text: str):
        """Set a text."""
        self.text = text

    def add_item(self, item: OperatorResult):
        """Add an item.

        :param item: an item to add to the list.
        """
        self.items.append(item)

    def normalize_item_indexes(self):
        """Normalize the indexes to be index from start."""
        text_len = len(self.text)
        for result_item in self.items:
            result_item.start = text_len - result_item.end
            result_item.end = result_item.start + len(result_item.text)

    def to_json(self) -> str:
        """Return a json string serializing this instance."""
        return json.dumps(self, default=lambda x: x.__dict__)

    def __repr__(self):
        """Return a string representation of the object."""

        items_repr = (
            ",\n    ".join([str(item) for item in self.items]) if self.items else ""
        )
        return f"text: {self.text}\nitems:\n[\n    {items_repr}\n]\n"

    def __eq__(self, other) -> bool:
        """Verify two instances are equal.

        Returns true if the two instances are equal, false otherwise.
        """
        return self.text == other.text and all(
            map(lambda x, y: x == y, self.items, other.items)
        )
__eq__(self, other) special

Verify two instances are equal.

Returns true if the two instances are equal, false otherwise.

Source code in presidio_anonymizer/entities/engine/result/engine_result.py
def __eq__(self, other) -> bool:
    """Verify two instances are equal.

    Returns true if the two instances are equal, false otherwise.
    """
    return self.text == other.text and all(
        map(lambda x, y: x == y, self.items, other.items)
    )
__init__(self, text=None, items=None) special

Create EngineResult entity.

:param text: The anonymized text. :param items: List of PII entities and the indices of their replacements in the anonymized text.

Source code in presidio_anonymizer/entities/engine/result/engine_result.py
def __init__(self, text: str = None, items: List[OperatorResult] = None):
    """Create EngineResult entity.

    :param text: The anonymized text.
    :param items: List of PII entities and the indices
     of their replacements in the anonymized text.
    """
    if items is None:
        items = []
    self.text = text
    self.items = items
__repr__(self) special

Return a string representation of the object.

Source code in presidio_anonymizer/entities/engine/result/engine_result.py
def __repr__(self):
    """Return a string representation of the object."""

    items_repr = (
        ",\n    ".join([str(item) for item in self.items]) if self.items else ""
    )
    return f"text: {self.text}\nitems:\n[\n    {items_repr}\n]\n"
add_item(self, item)

Add an item.

:param item: an item to add to the list.

Source code in presidio_anonymizer/entities/engine/result/engine_result.py
def add_item(self, item: OperatorResult):
    """Add an item.

    :param item: an item to add to the list.
    """
    self.items.append(item)
normalize_item_indexes(self)

Normalize the indexes to be index from start.

Source code in presidio_anonymizer/entities/engine/result/engine_result.py
def normalize_item_indexes(self):
    """Normalize the indexes to be index from start."""
    text_len = len(self.text)
    for result_item in self.items:
        result_item.start = text_len - result_item.end
        result_item.end = result_item.start + len(result_item.text)
set_text(self, text)

Set a text.

Source code in presidio_anonymizer/entities/engine/result/engine_result.py
def set_text(self, text: str):
    """Set a text."""
    self.text = text
to_json(self)

Return a json string serializing this instance.

Source code in presidio_anonymizer/entities/engine/result/engine_result.py
def to_json(self) -> str:
    """Return a json string serializing this instance."""
    return json.dumps(self, default=lambda x: x.__dict__)
operator_result
OperatorResult (PIIEntity)

A class to hold data for engines results either anonymize or deanonymize.

Source code in presidio_anonymizer/entities/engine/result/operator_result.py
class OperatorResult(PIIEntity):
    """A class to hold data for engines results either anonymize or deanonymize."""

    def __init__(
        self,
        start: int,
        end: int,
        entity_type: str,
        text: str = None,
        operator: str = None,
    ):
        PIIEntity.__init__(self, start, end, entity_type)
        self.text = text
        self.operator = operator

    def __repr__(self):
        """Return a string representation of the object."""
        return str(self.to_dict())

    def to_dict(self) -> Dict:
        """Return object as Dict."""
        return self.__dict__

    def __str__(self):
        """Return a string representation of the object."""
        return str(self.to_dict())

    def __eq__(self, other: "OperatorResult") -> bool:
        """
        Verify two OperatorResults are equal.

        :param other: OperatorResult
        :return: bool
        """
        return (
            self.start == other.start
            and self.end == other.end
            and self.entity_type == other.entity_type
            and self.operator == other.operator
            and self.text == other.text
        )

    @classmethod
    def from_json(cls, json: Dict) -> "OperatorResult":
        """
        Create OperatorResult from user json.

        :param json: json representation for this operator result. For example:
        {
            "start": 0,
            "end": 10,
            "key": "1111111111111111",
            "entity_type":"PERSON",
            "text":"resulted_text",
            "operator":"encrypt",
        }
        """
        start = json.get("start")
        end = json.get("end")
        entity_type = json.get("entity_type")
        text = json.get("text")
        operator = json.get("operator")
        return cls(
            start=start,
            end=end,
            entity_type=entity_type,
            text=text,
            operator=operator,
        )
__eq__(self, other) special

Verify two OperatorResults are equal.

:param other: OperatorResult :return: bool

Source code in presidio_anonymizer/entities/engine/result/operator_result.py
def __eq__(self, other: "OperatorResult") -> bool:
    """
    Verify two OperatorResults are equal.

    :param other: OperatorResult
    :return: bool
    """
    return (
        self.start == other.start
        and self.end == other.end
        and self.entity_type == other.entity_type
        and self.operator == other.operator
        and self.text == other.text
    )
__str__(self) special

Return a string representation of the object.

Source code in presidio_anonymizer/entities/engine/result/operator_result.py
def __str__(self):
    """Return a string representation of the object."""
    return str(self.to_dict())
from_json(json) classmethod

Create OperatorResult from user json.

:param json: json representation for this operator result. For example: { "start": 0, "end": 10, "key": "1111111111111111", "entity_type":"PERSON", "text":"resulted_text", "operator":"encrypt", }

Source code in presidio_anonymizer/entities/engine/result/operator_result.py
@classmethod
def from_json(cls, json: Dict) -> "OperatorResult":
    """
    Create OperatorResult from user json.

    :param json: json representation for this operator result. For example:
    {
        "start": 0,
        "end": 10,
        "key": "1111111111111111",
        "entity_type":"PERSON",
        "text":"resulted_text",
        "operator":"encrypt",
    }
    """
    start = json.get("start")
    end = json.get("end")
    entity_type = json.get("entity_type")
    text = json.get("text")
    operator = json.get("operator")
    return cls(
        start=start,
        end=end,
        entity_type=entity_type,
        text=text,
        operator=operator,
    )
to_dict(self)

Return object as Dict.

Source code in presidio_anonymizer/entities/engine/result/operator_result.py
def to_dict(self) -> Dict:
    """Return object as Dict."""
    return self.__dict__

invalid_exception

Exception to indicate the request we received is invalid.

InvalidParamException (Exception)

Throw exception with error when user input is not valid.

param msg: Message to be added to the exception

Source code in presidio_anonymizer/entities/invalid_exception.py
class InvalidParamException(Exception):
    """Throw exception with error when user input is not valid.

    param msg: Message to be added to the exception
    """

    def __init__(self, msg: str):
        self.err_msg = msg
        super().__init__(self.err_msg)

operators special

Initializing all the existing anonymizers.

aes_cipher

AESCipher

Advanced Encryption Standard (aka Rijndael) en/decryption in CBC mode.

Source code in presidio_anonymizer/operators/aes_cipher.py
class AESCipher:
    """Advanced Encryption Standard (aka Rijndael) en/decryption in CBC mode."""

    @staticmethod
    def encrypt(key: bytes, text: str) -> str:
        """
        Encrypts a text using AES cypher in CBC mode.

        Uses padding and random IV.
        :param key: AES encryption key in bytes.
        :param text: The text for encryption.
        :returns: The encrypted text.
        """
        encoded_text = text.encode("utf-8")
        padded_text = pad(encoded_text, AES.block_size)
        iv = Random.new().read(AES.block_size)
        cipher = AES.new(key, AES.MODE_CBC, iv)
        encrypted_text = base64.b64encode(iv + cipher.encrypt(padded_text))
        return encrypted_text.decode()

    @staticmethod
    def decrypt(key: bytes, text: str) -> str:
        """
        Decrypts a previously AES-CBC encrypted text.

        :param key: AES encryption key in bytes.
        :param text: The text for decryption.
        :returns: The decrypted text.
        """
        decoded_text = base64.b64decode(text)
        iv = decoded_text[: AES.block_size]
        cipher = AES.new(key, AES.MODE_CBC, iv)
        decrypted_text = unpad(
            cipher.decrypt(decoded_text[AES.block_size :]), AES.block_size
        )
        return decrypted_text.decode("utf-8")

    @staticmethod
    def is_valid_key_size(key: bytes) -> bool:
        """
        Validate key size for AES.

        :param key: AES encryption key in bytes.
        :returns: True if the key is of valid size, False otherwise.
        """
        return len(key) in AES.key_size
decrypt(key, text) staticmethod

Decrypts a previously AES-CBC encrypted text.

:param key: AES encryption key in bytes. :param text: The text for decryption. :returns: The decrypted text.

Source code in presidio_anonymizer/operators/aes_cipher.py
@staticmethod
def decrypt(key: bytes, text: str) -> str:
    """
    Decrypts a previously AES-CBC encrypted text.

    :param key: AES encryption key in bytes.
    :param text: The text for decryption.
    :returns: The decrypted text.
    """
    decoded_text = base64.b64decode(text)
    iv = decoded_text[: AES.block_size]
    cipher = AES.new(key, AES.MODE_CBC, iv)
    decrypted_text = unpad(
        cipher.decrypt(decoded_text[AES.block_size :]), AES.block_size
    )
    return decrypted_text.decode("utf-8")
encrypt(key, text) staticmethod

Encrypts a text using AES cypher in CBC mode.

Uses padding and random IV. :param key: AES encryption key in bytes. :param text: The text for encryption. :returns: The encrypted text.

Source code in presidio_anonymizer/operators/aes_cipher.py
@staticmethod
def encrypt(key: bytes, text: str) -> str:
    """
    Encrypts a text using AES cypher in CBC mode.

    Uses padding and random IV.
    :param key: AES encryption key in bytes.
    :param text: The text for encryption.
    :returns: The encrypted text.
    """
    encoded_text = text.encode("utf-8")
    padded_text = pad(encoded_text, AES.block_size)
    iv = Random.new().read(AES.block_size)
    cipher = AES.new(key, AES.MODE_CBC, iv)
    encrypted_text = base64.b64encode(iv + cipher.encrypt(padded_text))
    return encrypted_text.decode()
is_valid_key_size(key) staticmethod

Validate key size for AES.

:param key: AES encryption key in bytes. :returns: True if the key is of valid size, False otherwise.

Source code in presidio_anonymizer/operators/aes_cipher.py
@staticmethod
def is_valid_key_size(key: bytes) -> bool:
    """
    Validate key size for AES.

    :param key: AES encryption key in bytes.
    :returns: True if the key is of valid size, False otherwise.
    """
    return len(key) in AES.key_size

custom

Replaces the PII text with function result.

Custom (Operator)

Replace PII text entity with the results of a function executed on the PII text.

The function retrun type must be a string

Source code in presidio_anonymizer/operators/custom.py
class Custom(Operator):
    """
    Replace PII text entity with the results of a function executed on the PII text.

    The function retrun type must be a string
    """

    LAMBDA = "lambda"

    def operate(self, text: str = None, params: Dict = None) -> str:
        """:return: result of function executed on the text."""
        new_val = params.get(self.LAMBDA)
        return new_val(text)

    def validate(self, params: Dict) -> None:
        """Validate the provided function is returning a string."""
        new_val = params.get(self.LAMBDA)
        if callable(new_val):
            if not type(new_val("PII")) == str:
                raise InvalidParamException("Function return type must be a str")

        else:
            raise InvalidParamException("New value must be a callable function")

    def operator_name(self) -> str:
        """Return operator name."""
        return "custom"

    def operator_type(self) -> OperatorType:
        """Return operator type."""
        return OperatorType.Anonymize
operate(self, text=None, params=None)

:return: result of function executed on the text.

Source code in presidio_anonymizer/operators/custom.py
def operate(self, text: str = None, params: Dict = None) -> str:
    """:return: result of function executed on the text."""
    new_val = params.get(self.LAMBDA)
    return new_val(text)
operator_name(self)

Return operator name.

Source code in presidio_anonymizer/operators/custom.py
def operator_name(self) -> str:
    """Return operator name."""
    return "custom"
operator_type(self)

Return operator type.

Source code in presidio_anonymizer/operators/custom.py
def operator_type(self) -> OperatorType:
    """Return operator type."""
    return OperatorType.Anonymize
validate(self, params)

Validate the provided function is returning a string.

Source code in presidio_anonymizer/operators/custom.py
def validate(self, params: Dict) -> None:
    """Validate the provided function is returning a string."""
    new_val = params.get(self.LAMBDA)
    if callable(new_val):
        if not type(new_val("PII")) == str:
            raise InvalidParamException("Function return type must be a str")

    else:
        raise InvalidParamException("New value must be a callable function")

decrypt

Decrypt (Operator)

Decrypt text to from its encrypted form.

Source code in presidio_anonymizer/operators/decrypt.py
class Decrypt(Operator):
    """Decrypt text to from its encrypted form."""

    NAME = "decrypt"
    KEY = "key"

    def operate(self, text: str = None, params: Dict = None) -> str:
        """
        Decrypt the text.

        :param text: The text for decryption.
        :param params:
            **key* The key supplied by the user for the encryption.
        :return: The encrypted text
        """
        encoded_key = params.get(self.KEY).encode("utf8")
        decrypted_text = AESCipher.decrypt(key=encoded_key, text=text)
        return decrypted_text

    def validate(self, params: Dict = None) -> None:
        """
        Validate Decrypt parameters.

        :param params:
            * *key* The key supplied by the user for the encryption.
                    Should be a string of 128, 192 or 256 bits length.
        :raises InvalidParamException in case on an invalid parameter.
        """
        key = params.get(self.KEY)
        validate_parameter(key, self.KEY, str)
        if not AESCipher.is_valid_key_size(key.encode("utf8")):
            raise InvalidParamException(
                f"Invalid input, {self.KEY} must be of length 128, 192 or 256 bits"
            )

    def operator_name(self) -> str:
        """Return operator name."""
        return self.NAME

    def operator_type(self) -> OperatorType:
        """Return operator type."""
        return OperatorType.Deanonymize
operate(self, text=None, params=None)

Decrypt the text.

:param text: The text for decryption. :param params: *key The key supplied by the user for the encryption. :return: The encrypted text

Source code in presidio_anonymizer/operators/decrypt.py
def operate(self, text: str = None, params: Dict = None) -> str:
    """
    Decrypt the text.

    :param text: The text for decryption.
    :param params:
        **key* The key supplied by the user for the encryption.
    :return: The encrypted text
    """
    encoded_key = params.get(self.KEY).encode("utf8")
    decrypted_text = AESCipher.decrypt(key=encoded_key, text=text)
    return decrypted_text
operator_name(self)

Return operator name.

Source code in presidio_anonymizer/operators/decrypt.py
def operator_name(self) -> str:
    """Return operator name."""
    return self.NAME
operator_type(self)

Return operator type.

Source code in presidio_anonymizer/operators/decrypt.py
def operator_type(self) -> OperatorType:
    """Return operator type."""
    return OperatorType.Deanonymize
validate(self, params=None)

Validate Decrypt parameters.

:param params: * key The key supplied by the user for the encryption. Should be a string of 128, 192 or 256 bits length. :raises InvalidParamException in case on an invalid parameter.

Source code in presidio_anonymizer/operators/decrypt.py
def validate(self, params: Dict = None) -> None:
    """
    Validate Decrypt parameters.

    :param params:
        * *key* The key supplied by the user for the encryption.
                Should be a string of 128, 192 or 256 bits length.
    :raises InvalidParamException in case on an invalid parameter.
    """
    key = params.get(self.KEY)
    validate_parameter(key, self.KEY, str)
    if not AESCipher.is_valid_key_size(key.encode("utf8")):
        raise InvalidParamException(
            f"Invalid input, {self.KEY} must be of length 128, 192 or 256 bits"
        )

encrypt

Encrypt (Operator)

Anonymizes text to an encrypted form, or it to be restored using decrypted.

Source code in presidio_anonymizer/operators/encrypt.py
class Encrypt(Operator):
    """Anonymizes text to an encrypted form, or it to be restored using decrypted."""

    KEY = "key"

    def operate(self, text: str = None, params: Dict = None) -> str:
        """
        Anonymize the text with an encrypted text.

        :param text: The text for encryption.
        :param params:
            * *key* The key supplied by the user for the encryption.
        :return: The encrypted text
        """
        encoded_key = params.get(self.KEY).encode("utf8")
        encrypted_text = AESCipher.encrypt(encoded_key, text)
        return encrypted_text

    def validate(self, params: Dict = None) -> None:
        """
        Validate Encrypt parameters.

        :param params:
            * *key* The key supplied by the user for the encryption.
                    Should be a string of 128, 192 or 256 bits length.
        :raises InvalidParamException in case on an invalid parameter.
        """
        key = params.get(self.KEY)
        validate_parameter(key, self.KEY, str)
        if not AESCipher.is_valid_key_size(key.encode("utf8")):
            raise InvalidParamException(
                f"Invalid input, {self.KEY} must be of length 128, 192 or 256 bits"
            )

    def operator_name(self) -> str:
        """Return operator name."""
        return "encrypt"

    def operator_type(self) -> OperatorType:
        """Return operator type."""
        return OperatorType.Anonymize
operate(self, text=None, params=None)

Anonymize the text with an encrypted text.

:param text: The text for encryption. :param params: * key The key supplied by the user for the encryption. :return: The encrypted text

Source code in presidio_anonymizer/operators/encrypt.py
def operate(self, text: str = None, params: Dict = None) -> str:
    """
    Anonymize the text with an encrypted text.

    :param text: The text for encryption.
    :param params:
        * *key* The key supplied by the user for the encryption.
    :return: The encrypted text
    """
    encoded_key = params.get(self.KEY).encode("utf8")
    encrypted_text = AESCipher.encrypt(encoded_key, text)
    return encrypted_text
operator_name(self)

Return operator name.

Source code in presidio_anonymizer/operators/encrypt.py
def operator_name(self) -> str:
    """Return operator name."""
    return "encrypt"
operator_type(self)

Return operator type.

Source code in presidio_anonymizer/operators/encrypt.py
def operator_type(self) -> OperatorType:
    """Return operator type."""
    return OperatorType.Anonymize
validate(self, params=None)

Validate Encrypt parameters.

:param params: * key The key supplied by the user for the encryption. Should be a string of 128, 192 or 256 bits length. :raises InvalidParamException in case on an invalid parameter.

Source code in presidio_anonymizer/operators/encrypt.py
def validate(self, params: Dict = None) -> None:
    """
    Validate Encrypt parameters.

    :param params:
        * *key* The key supplied by the user for the encryption.
                Should be a string of 128, 192 or 256 bits length.
    :raises InvalidParamException in case on an invalid parameter.
    """
    key = params.get(self.KEY)
    validate_parameter(key, self.KEY, str)
    if not AESCipher.is_valid_key_size(key.encode("utf8")):
        raise InvalidParamException(
            f"Invalid input, {self.KEY} must be of length 128, 192 or 256 bits"
        )

hash

Hashes the PII text entity.

Hash (Operator)

Hash given text with sha256/sha512/md5 algorithm.

Source code in presidio_anonymizer/operators/hash.py
class Hash(Operator):
    """Hash given text with sha256/sha512/md5 algorithm."""

    HASH_TYPE = "hash_type"
    SHA256 = "sha256"
    SHA512 = "sha512"
    MD5 = "md5"

    def operate(self, text: str = None, params: Dict = None) -> str:
        """
        Hash given value using sha256.

        :return: hashed original text
        """
        hash_type = self._get_hash_type_or_default(params)
        hash_switcher = {
            self.SHA256: lambda s: sha256(s),
            self.SHA512: lambda s: sha512(s),
            self.MD5: lambda s: md5(s),
        }
        return hash_switcher.get(hash_type)(text.encode()).hexdigest()

    def validate(self, params: Dict = None) -> None:
        """Validate the hash type is string and in range of allowed hash types."""
        validate_parameter_in_range(
            [self.SHA256, self.SHA512, self.MD5],
            self._get_hash_type_or_default(params),
            self.HASH_TYPE,
            str,
        )
        pass

    def operator_name(self) -> str:
        """Return operator name."""
        return "hash"

    def _get_hash_type_or_default(self, params: Dict = None):
        return params.get(self.HASH_TYPE, self.SHA256)

    def operator_type(self) -> OperatorType:
        """Return operator type."""
        return OperatorType.Anonymize
operate(self, text=None, params=None)

Hash given value using sha256.

:return: hashed original text

Source code in presidio_anonymizer/operators/hash.py
def operate(self, text: str = None, params: Dict = None) -> str:
    """
    Hash given value using sha256.

    :return: hashed original text
    """
    hash_type = self._get_hash_type_or_default(params)
    hash_switcher = {
        self.SHA256: lambda s: sha256(s),
        self.SHA512: lambda s: sha512(s),
        self.MD5: lambda s: md5(s),
    }
    return hash_switcher.get(hash_type)(text.encode()).hexdigest()
operator_name(self)

Return operator name.

Source code in presidio_anonymizer/operators/hash.py
def operator_name(self) -> str:
    """Return operator name."""
    return "hash"
operator_type(self)

Return operator type.

Source code in presidio_anonymizer/operators/hash.py
def operator_type(self) -> OperatorType:
    """Return operator type."""
    return OperatorType.Anonymize
validate(self, params=None)

Validate the hash type is string and in range of allowed hash types.

Source code in presidio_anonymizer/operators/hash.py
def validate(self, params: Dict = None) -> None:
    """Validate the hash type is string and in range of allowed hash types."""
    validate_parameter_in_range(
        [self.SHA256, self.SHA512, self.MD5],
        self._get_hash_type_or_default(params),
        self.HASH_TYPE,
        str,
    )
    pass

mask

Mask some or all given text entity PII with given character.

Mask (Operator)

Mask the given text with given value.

Source code in presidio_anonymizer/operators/mask.py
class Mask(Operator):
    """Mask the given text with given value."""

    CHARS_TO_MASK = "chars_to_mask"
    FROM_END = "from_end"
    MASKING_CHAR = "masking_char"

    def operate(self, text: str = None, params: Dict = None) -> str:
        """
        Mask a given amount of text with a given character.

        :param text: the text to be masked
        :param params:
            masking_char: The character to be masked with
            chars_to_mask: The amount of characters to mask
            from_end: Whether to mask the text from it's end
        :return: the masked text
        """
        effective_chars_to_mask = self._get_effective_chars_to_mask(
            text, params.get(self.CHARS_TO_MASK)
        )
        from_end = params.get(self.FROM_END)
        masking_char = params.get(self.MASKING_CHAR)
        return self._get_anonymized_text(
            text, effective_chars_to_mask, from_end, masking_char
        )

    def validate(self, params: Dict = None) -> None:
        """
        Validate the parameters for mask.

        :param params:
            masking_char: The character to be masked with
            chars_to_mask: The amount of characters to mask
            from_end: Whether to mask the text from it's end
        """
        masking_char = params.get(self.MASKING_CHAR)
        validate_parameter(masking_char, self.MASKING_CHAR, str)
        if len(masking_char) > 1:
            raise InvalidParamException(
                f"Invalid input, {self.MASKING_CHAR} must be a character"
            )

        validate_parameter(params.get(self.CHARS_TO_MASK), self.CHARS_TO_MASK, int)
        validate_parameter(params.get(self.FROM_END), self.FROM_END, bool)

    def operator_name(self) -> str:
        """Return operator name."""
        return "mask"

    def operator_type(self) -> OperatorType:
        """Return operator type."""
        return OperatorType.Anonymize

    @staticmethod
    def _get_effective_chars_to_mask(text, chars_to_mask):
        return min(len(text), chars_to_mask) if chars_to_mask > 0 else 0

    @staticmethod
    def _get_anonymized_text(text, chars_to_mask, from_end, masking_char):
        if not from_end:
            return masking_char * chars_to_mask + text[chars_to_mask:]
        else:
            mask_from_index = len(text) - chars_to_mask
            return text[:mask_from_index] + masking_char * chars_to_mask
operate(self, text=None, params=None)

Mask a given amount of text with a given character.

:param text: the text to be masked :param params: masking_char: The character to be masked with chars_to_mask: The amount of characters to mask from_end: Whether to mask the text from it's end :return: the masked text

Source code in presidio_anonymizer/operators/mask.py
def operate(self, text: str = None, params: Dict = None) -> str:
    """
    Mask a given amount of text with a given character.

    :param text: the text to be masked
    :param params:
        masking_char: The character to be masked with
        chars_to_mask: The amount of characters to mask
        from_end: Whether to mask the text from it's end
    :return: the masked text
    """
    effective_chars_to_mask = self._get_effective_chars_to_mask(
        text, params.get(self.CHARS_TO_MASK)
    )
    from_end = params.get(self.FROM_END)
    masking_char = params.get(self.MASKING_CHAR)
    return self._get_anonymized_text(
        text, effective_chars_to_mask, from_end, masking_char
    )
operator_name(self)

Return operator name.

Source code in presidio_anonymizer/operators/mask.py
def operator_name(self) -> str:
    """Return operator name."""
    return "mask"
operator_type(self)

Return operator type.

Source code in presidio_anonymizer/operators/mask.py
def operator_type(self) -> OperatorType:
    """Return operator type."""
    return OperatorType.Anonymize
validate(self, params=None)

Validate the parameters for mask.

:param params: masking_char: The character to be masked with chars_to_mask: The amount of characters to mask from_end: Whether to mask the text from it's end

Source code in presidio_anonymizer/operators/mask.py
def validate(self, params: Dict = None) -> None:
    """
    Validate the parameters for mask.

    :param params:
        masking_char: The character to be masked with
        chars_to_mask: The amount of characters to mask
        from_end: Whether to mask the text from it's end
    """
    masking_char = params.get(self.MASKING_CHAR)
    validate_parameter(masking_char, self.MASKING_CHAR, str)
    if len(masking_char) > 1:
        raise InvalidParamException(
            f"Invalid input, {self.MASKING_CHAR} must be a character"
        )

    validate_parameter(params.get(self.CHARS_TO_MASK), self.CHARS_TO_MASK, int)
    validate_parameter(params.get(self.FROM_END), self.FROM_END, bool)

operator

Operator abstraction - each operator should implement this class.

Operator (ABC)

Operator abstract class to be implemented by each operator.

Source code in presidio_anonymizer/operators/operator.py
class Operator(ABC):
    """Operator abstract class to be implemented by each operator."""

    @abstractmethod
    def operate(self, text: str, params: Dict = None) -> str:
        """Operate method to be implemented in each operator."""
        pass

    @abstractmethod
    def validate(self, params: Dict = None) -> None:
        """Validate each operator parameters."""
        pass

    @abstractmethod
    def operator_name(self) -> str:
        """Return operator name."""
        pass

    @abstractmethod
    def operator_type(self) -> OperatorType:
        """Return operator type."""
        pass
operate(self, text, params=None)

Operate method to be implemented in each operator.

Source code in presidio_anonymizer/operators/operator.py
@abstractmethod
def operate(self, text: str, params: Dict = None) -> str:
    """Operate method to be implemented in each operator."""
    pass
operator_name(self)

Return operator name.

Source code in presidio_anonymizer/operators/operator.py
@abstractmethod
def operator_name(self) -> str:
    """Return operator name."""
    pass
operator_type(self)

Return operator type.

Source code in presidio_anonymizer/operators/operator.py
@abstractmethod
def operator_type(self) -> OperatorType:
    """Return operator type."""
    pass
validate(self, params=None)

Validate each operator parameters.

Source code in presidio_anonymizer/operators/operator.py
@abstractmethod
def validate(self, params: Dict = None) -> None:
    """Validate each operator parameters."""
    pass

OperatorType (Enum)

Operator type either anonymize or decrypt to separate the operators.

Source code in presidio_anonymizer/operators/operator.py
class OperatorType(Enum):
    """Operator type either anonymize or decrypt to separate the operators."""

    Anonymize = 1
    Deanonymize = 2

operators_factory

OperatorsFactory

Operators factory to get the correct operator class.

Source code in presidio_anonymizer/operators/operators_factory.py
class OperatorsFactory:
    """Operators factory to get the correct operator class."""

    _anonymizers: Dict = None
    _deanonymizers: Dict = None
    _operator_class: Dict = None

    def __init__(self):
        self.logger = logging.getLogger("presidio-anonymizer")

    def create_operator_class(
        self, operator_name: str, operator_type: OperatorType
    ) -> Operator:
        """
        Extract the operator class from the operators list.

        :param operator_type: Either Anonymize or Decrypt to defer between operators.
        :type operator_name: operator name.
        :return: operator class entity.
        """
        operators_by_type = self.__get_operators_classes().get(operator_type)
        if not operators_by_type:
            self.logger.error(f"No such operator type {operator_type}")
            raise InvalidParamException(f"Invalid operator type '{operator_type}'.")
        operator_class = operators_by_type.get(operator_name)
        if not operator_class:
            self.logger.error(f"No such operator class {operator_name}")
            raise InvalidParamException(f"Invalid operator class '{operator_name}'.")
        self.logger.debug(f"applying class {operator_class}")
        return operator_class()

    @staticmethod
    def __get_operators_classes():
        if not OperatorsFactory._operator_class:
            OperatorsFactory._operator_class = {
                OperatorType.Anonymize: OperatorsFactory.get_anonymizers(),
                OperatorType.Deanonymize: OperatorsFactory.get_deanonymizers(),
            }
        return OperatorsFactory._operator_class

    @staticmethod
    def get_anonymizers() -> Dict[str, "Operator"]:
        """Return all anonymizers classes currently available."""
        if not OperatorsFactory._anonymizers:
            OperatorsFactory._anonymizers = OperatorsFactory.__get_operators_by_type(
                OperatorType.Anonymize
            )
        return OperatorsFactory._anonymizers

    @staticmethod
    def get_deanonymizers() -> Dict[str, "Operator"]:
        """Return all deanonymizers classes currently available."""
        if not OperatorsFactory._deanonymizers:
            OperatorsFactory._deanonymizers = OperatorsFactory.__get_operators_by_type(
                OperatorType.Deanonymize
            )
        return OperatorsFactory._deanonymizers

    @staticmethod
    def __get_operators_by_type(operator_type: OperatorType):
        operators = Operator.__subclasses__()
        return {
            cls.operator_name(cls): cls
            for cls in operators
            if cls.operator_type(cls) == operator_type
        }
create_operator_class(self, operator_name, operator_type)

Extract the operator class from the operators list.

:param operator_type: Either Anonymize or Decrypt to defer between operators. :type operator_name: operator name. :return: operator class entity.

Source code in presidio_anonymizer/operators/operators_factory.py
def create_operator_class(
    self, operator_name: str, operator_type: OperatorType
) -> Operator:
    """
    Extract the operator class from the operators list.

    :param operator_type: Either Anonymize or Decrypt to defer between operators.
    :type operator_name: operator name.
    :return: operator class entity.
    """
    operators_by_type = self.__get_operators_classes().get(operator_type)
    if not operators_by_type:
        self.logger.error(f"No such operator type {operator_type}")
        raise InvalidParamException(f"Invalid operator type '{operator_type}'.")
    operator_class = operators_by_type.get(operator_name)
    if not operator_class:
        self.logger.error(f"No such operator class {operator_name}")
        raise InvalidParamException(f"Invalid operator class '{operator_name}'.")
    self.logger.debug(f"applying class {operator_class}")
    return operator_class()
get_anonymizers() staticmethod

Return all anonymizers classes currently available.

Source code in presidio_anonymizer/operators/operators_factory.py
@staticmethod
def get_anonymizers() -> Dict[str, "Operator"]:
    """Return all anonymizers classes currently available."""
    if not OperatorsFactory._anonymizers:
        OperatorsFactory._anonymizers = OperatorsFactory.__get_operators_by_type(
            OperatorType.Anonymize
        )
    return OperatorsFactory._anonymizers
get_deanonymizers() staticmethod

Return all deanonymizers classes currently available.

Source code in presidio_anonymizer/operators/operators_factory.py
@staticmethod
def get_deanonymizers() -> Dict[str, "Operator"]:
    """Return all deanonymizers classes currently available."""
    if not OperatorsFactory._deanonymizers:
        OperatorsFactory._deanonymizers = OperatorsFactory.__get_operators_by_type(
            OperatorType.Deanonymize
        )
    return OperatorsFactory._deanonymizers

redact

Replaces the PII text entity with empty string.

Redact (Operator)

Redact the string - empty value.

Source code in presidio_anonymizer/operators/redact.py
class Redact(Operator):
    """Redact the string - empty value."""

    def operate(self, text: str = None, params: Dict = None) -> str:
        """:return: an empty value."""
        return ""

    def validate(self, params: Dict = None) -> None:
        """Redact does not require any paramters so no validation is needed."""
        pass

    def operator_name(self) -> str:
        """Return operator name."""
        return "redact"

    def operator_type(self) -> OperatorType:
        """Return operator type."""
        return OperatorType.Anonymize
operate(self, text=None, params=None)

:return: an empty value.

Source code in presidio_anonymizer/operators/redact.py
def operate(self, text: str = None, params: Dict = None) -> str:
    """:return: an empty value."""
    return ""
operator_name(self)

Return operator name.

Source code in presidio_anonymizer/operators/redact.py
def operator_name(self) -> str:
    """Return operator name."""
    return "redact"
operator_type(self)

Return operator type.

Source code in presidio_anonymizer/operators/redact.py
def operator_type(self) -> OperatorType:
    """Return operator type."""
    return OperatorType.Anonymize
validate(self, params=None)

Redact does not require any paramters so no validation is needed.

Source code in presidio_anonymizer/operators/redact.py
def validate(self, params: Dict = None) -> None:
    """Redact does not require any paramters so no validation is needed."""
    pass

replace

Replaces the PII text entity with new string.

Replace (Operator)

Receives new text to replace old PII text entity with.

Source code in presidio_anonymizer/operators/replace.py
class Replace(Operator):
    """Receives new text to replace old PII text entity with."""

    NEW_VALUE = "new_value"

    def operate(self, text: str = None, params: Dict = None) -> str:
        """:return: new_value."""
        new_val = params.get(self.NEW_VALUE)
        if not new_val:
            return f"<{params.get('entity_type')}>"
        return new_val

    def validate(self, params: Dict = None) -> None:
        """Validate the new value is string."""
        validate_type(params.get(self.NEW_VALUE), self.NEW_VALUE, str)
        pass

    def operator_name(self) -> str:
        """Return operator name."""
        return "replace"

    def operator_type(self) -> OperatorType:
        """Return operator type."""
        return OperatorType.Anonymize
operate(self, text=None, params=None)

:return: new_value.

Source code in presidio_anonymizer/operators/replace.py
def operate(self, text: str = None, params: Dict = None) -> str:
    """:return: new_value."""
    new_val = params.get(self.NEW_VALUE)
    if not new_val:
        return f"<{params.get('entity_type')}>"
    return new_val
operator_name(self)

Return operator name.

Source code in presidio_anonymizer/operators/replace.py
def operator_name(self) -> str:
    """Return operator name."""
    return "replace"
operator_type(self)

Return operator type.

Source code in presidio_anonymizer/operators/replace.py
def operator_type(self) -> OperatorType:
    """Return operator type."""
    return OperatorType.Anonymize
validate(self, params=None)

Validate the new value is string.

Source code in presidio_anonymizer/operators/replace.py
def validate(self, params: Dict = None) -> None:
    """Validate the new value is string."""
    validate_type(params.get(self.NEW_VALUE), self.NEW_VALUE, str)
    pass

services special

Services init.

app_entities_convertor

AppEntitiesConvertor

Assisting class to convert API json entities to engine entities.

Source code in presidio_anonymizer/services/app_entities_convertor.py
class AppEntitiesConvertor:
    """Assisting class to convert API json entities to engine entities."""

    @staticmethod
    def analyzer_results_from_json(data: List[Dict]) -> List["RecognizerResult"]:
        """
        Go over analyzer results, validate them and convert to List[RecognizerResult].

        :param data: contains the anonymizers and analyzer_results_json
        """
        if data is None:
            raise InvalidParamException(
                "Invalid input, " "request must contain analyzer results"
            )
        return [RecognizerResult.from_json(analyzer_result) for analyzer_result in data]

    @staticmethod
    def operators_config_from_json(data: Dict) -> Dict[str, "OperatorConfig"]:
        """
        Go over the operators list and get the relevant create operator config entity.

        :param data: contains the list of configuration
        value - OperatorConfig
        """
        if data is not None:
            return {
                key: OperatorConfig.from_json(operator_json)
                for (key, operator_json) in data.items()
            }
        return {}

    @staticmethod
    def deanonymize_entities_from_json(json: Dict) -> List["OperatorResult"]:
        """
        Create DecryptEntity list.

        :param json:
        {
            "text": text,
            "encrypt_results": [{
                "start": 0,
                "end": 10,
                "key": "1111111111111111",
                "entity_type":"PHONE_NUMBER"
            }],
        }
        :return: List[OperatorResult]
        """
        decrypt_entity = json.get("anonymizer_results")
        return (
            [OperatorResult.from_json(result) for result in decrypt_entity]
            if decrypt_entity
            else []
        )

    @staticmethod
    def check_custom_operator(operators: Dict[str, OperatorConfig]):
        """Check if an operator is of type custom."""
        return any([config.operator_name == "custom" for config in operators.values()])
analyzer_results_from_json(data) staticmethod

Go over analyzer results, validate them and convert to List[RecognizerResult].

:param data: contains the anonymizers and analyzer_results_json

Source code in presidio_anonymizer/services/app_entities_convertor.py
@staticmethod
def analyzer_results_from_json(data: List[Dict]) -> List["RecognizerResult"]:
    """
    Go over analyzer results, validate them and convert to List[RecognizerResult].

    :param data: contains the anonymizers and analyzer_results_json
    """
    if data is None:
        raise InvalidParamException(
            "Invalid input, " "request must contain analyzer results"
        )
    return [RecognizerResult.from_json(analyzer_result) for analyzer_result in data]
check_custom_operator(operators) staticmethod

Check if an operator is of type custom.

Source code in presidio_anonymizer/services/app_entities_convertor.py
@staticmethod
def check_custom_operator(operators: Dict[str, OperatorConfig]):
    """Check if an operator is of type custom."""
    return any([config.operator_name == "custom" for config in operators.values()])
deanonymize_entities_from_json(json) staticmethod

Create DecryptEntity list.

:param json: { "text": text, "encrypt_results": [{ "start": 0, "end": 10, "key": "1111111111111111", "entity_type":"PHONE_NUMBER" }], } :return: List[OperatorResult]

Source code in presidio_anonymizer/services/app_entities_convertor.py
@staticmethod
def deanonymize_entities_from_json(json: Dict) -> List["OperatorResult"]:
    """
    Create DecryptEntity list.

    :param json:
    {
        "text": text,
        "encrypt_results": [{
            "start": 0,
            "end": 10,
            "key": "1111111111111111",
            "entity_type":"PHONE_NUMBER"
        }],
    }
    :return: List[OperatorResult]
    """
    decrypt_entity = json.get("anonymizer_results")
    return (
        [OperatorResult.from_json(result) for result in decrypt_entity]
        if decrypt_entity
        else []
    )
operators_config_from_json(data) staticmethod

Go over the operators list and get the relevant create operator config entity.

:param data: contains the list of configuration value - OperatorConfig

Source code in presidio_anonymizer/services/app_entities_convertor.py
@staticmethod
def operators_config_from_json(data: Dict) -> Dict[str, "OperatorConfig"]:
    """
    Go over the operators list and get the relevant create operator config entity.

    :param data: contains the list of configuration
    value - OperatorConfig
    """
    if data is not None:
        return {
            key: OperatorConfig.from_json(operator_json)
            for (key, operator_json) in data.items()
        }
    return {}

validators

Anomnymizers validations utility methods.

validate_parameter(parameter_value, parameter_name, parameter_type)

Validate an anonymizer parameter.

Both validate the existence of an anonymizer parameter and that it is an instance of the parameter_type. Otherwise, raise the appropriate InvalidParamException with the parameter_name as content.

Source code in presidio_anonymizer/services/validators.py
def validate_parameter(
    parameter_value, parameter_name: str, parameter_type: type
) -> None:
    """Validate an anonymizer parameter.

    Both validate the existence of an anonymizer parameter and that it is an
    instance of the parameter_type. Otherwise, raise the appropriate
    InvalidParamException with the parameter_name as content.
    """
    if parameter_value is None:
        raise InvalidParamException(f"Expected parameter {parameter_name}")
    validate_type(parameter_value, parameter_name, parameter_type)

validate_parameter_exists(parameter_value, entity, parameter_name)

Validate parameter is not empty.

Source code in presidio_anonymizer/services/validators.py
def validate_parameter_exists(
    parameter_value, entity: str, parameter_name: str
) -> None:
    """Validate parameter is not empty."""
    if parameter_value is None:
        raise InvalidParamException(
            f"Invalid input, {entity} must contain {parameter_name}"
        )

validate_parameter_in_range(values_range, parameter_value, parameter_name, parameter_type)

Validate an anonymizer parameter.

validates the existence of an anonymizer parameter and that it is an instance of the parameter_type and that it is within the range of provided values. Otherwise, raise the appropriate InvalidParamException with the parameter_name as content.

Source code in presidio_anonymizer/services/validators.py
def validate_parameter_in_range(
    values_range, parameter_value, parameter_name: str, parameter_type: type
) -> None:
    """Validate an anonymizer parameter.

    validates the existence of an anonymizer parameter and that it is an
    instance of the parameter_type and that it is within the range of provided values.
    Otherwise, raise the appropriate InvalidParamException with the
    parameter_name as content.
    """
    validate_parameter(parameter_value, parameter_name, object)
    if parameter_value not in values_range:
        raise InvalidParamException(
            f"Parameter {parameter_name} value {parameter_value} is not in "
            f"range of values {values_range}"
        )

validate_parameter_not_empty(parameter_value, entity, parameter_name)

Validate parameter exists and not only empty.

Source code in presidio_anonymizer/services/validators.py
def validate_parameter_not_empty(
    parameter_value, entity: str, parameter_name: str
) -> None:
    """Validate parameter exists and not only empty."""
    if not parameter_value:
        raise InvalidParamException(
            f"Invalid input, {entity} must contain {parameter_name}"
        )

validate_type(parameter_value, parameter_name, parameter_type)

Validate an anonymizer parameter.

Validate it exists and if so, that it is the instance of the parameter_type. Otherwise, raise the appropriate InvalidParamException with the parameter_name as content.

Source code in presidio_anonymizer/services/validators.py
def validate_type(parameter_value, parameter_name, parameter_type):
    """
    Validate an anonymizer parameter.

    Validate it exists and if so, that it is the instance of the parameter_type.
    Otherwise, raise the appropriate InvalidParamException with the parameter_name
    as content.
    """
    if parameter_value and not isinstance(parameter_value, parameter_type):
        message = _get_bad_typed_parameter_error_message(
            parameter_name,
            expected_type=parameter_type,
            actual_type=type(parameter_value),
        )
        raise InvalidParamException(message)