Hashing

dataset_hash(dataset, as_int=True)

Show source code in recon/hashing.py
72
73
74
75
76
77
78
79
80
81
82
83
84
85
def dataset_hash(dataset: Any, as_int: bool = True) -> Union[str, int]:
    """Hash of Dataset

    Args:
        dataset (Dataset): Dataset to hash
        as_int (bool, optional): Encode hash as int

    Returns:
        Union[str, int]: Dataset hash
    """
    hash_data = (dataset.name,) + tuple(
        (example_hash(example, as_int=False) for example in dataset.data)
    )
    return _hash(hash_data, as_int=as_int)

Hash of Dataset

Parameters

Name Type Description Default
dataset Any Dataset to hash required
as_int bool Encode hash as int True

Returns

Type Description
Union[str, int] Union[str, int]: Dataset hash

example_hash(example, as_int=True)

Show source code in recon/hashing.py
39
40
41
42
43
44
45
46
47
48
49
50
def example_hash(example: Any, as_int: bool = True) -> Union[str, int]:
    """Hash of Example type

    Args:
        example (Example): Example to hash
        as_int (bool, optional): Encode hash as int

    Returns:
        Union[str, int]: Example hash
    """
    hash_data = (example.text,) + tuple((span_hash(span, as_int=False) for span in example.spans))
    return _hash(hash_data, as_int=as_int)

Hash of Example type

Parameters

Name Type Description Default
example Any Example to hash required
as_int bool Encode hash as int True

Returns

Type Description
Union[str, int] Union[str, int]: Example hash

span_hash(span, as_int=True)

Show source code in recon/hashing.py
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
def span_hash(span: Any, as_int: bool = True) -> Union[str, int]:
    """Hash of Span type

    Args:
        span (Span): Span to hash
        as_int (bool, optional): Encode hash as int

    Returns:
        Union[str, int]: Span hash
    """
    hash_data = (
        span.start,
        span.end,
        span.label,
        span.text,
        span.token_start if span.token_start else 0,
        span.token_end if span.token_end else 0,
    )
    return _hash(hash_data, as_int=as_int)

Hash of Span type

Parameters

Name Type Description Default
span Any Span to hash required
as_int bool Encode hash as int True

Returns

Type Description
Union[str, int] Union[str, int]: Span hash

token_hash(token, as_int=True)

Show source code in recon/hashing.py
 5
 6
 7
 8
 9
10
11
12
13
14
15
def token_hash(token: Any, as_int: bool = True) -> Union[str, int]:
    """Hash of Token type

    Args:
        token (Token): Token to hash
        as_int (bool, optional): Encode hash as int

    Returns:
        Union[str, int]: Token hash
    """
    return _hash((token.text, token.start, token.end, token.id), as_int=as_int)

Hash of Token type

Parameters

Name Type Description Default
token Any Token to hash required
as_int bool Encode hash as int True

Returns

Type Description
Union[str, int] Union[str, int]: Token hash

tokenized_example_hash(example, as_int=True)

Show source code in recon/hashing.py
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
def tokenized_example_hash(example: Any, as_int: bool = True) -> Union[str, int]:
    """Hash of Example type including token data

    Args:
        example (Example): Example to hash
        as_int (bool, optional): Encode hash as int

    Returns:
        Union[str, int]: Example hash
    """
    tokens = example.tokens or []
    hash_data = (
        (example.text,)
        + tuple((span_hash(span, as_int=False) for span in example.spans))
        + tuple((token_hash(token, as_int=False) for token in tokens))
    )
    return _hash(hash_data, as_int=as_int)

Hash of Example type including token data

Parameters

Name Type Description Default
example Any Example to hash required
as_int bool Encode hash as int True

Returns

Type Description
Union[str, int] Union[str, int]: Example hash