Corrections

Make corrections to your data.

fix_annotations(example, corrections, case_sensitive=False)

Show source code in recon/corrections.py
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
@operation("recon.v1.fix_annotations")
def fix_annotations(
    example: Example, corrections: Dict[str, str], case_sensitive: bool = False
) -> Example:
    """Fix annotations in a copy of List[Example] data.

    This function will NOT add annotations to your data.
    It will only remove erroneous annotations and fix the
    labels for specific spans.

    Args:
        example (Example): Input Example
        corrections (Dict[str, str]): Dictionary of corrections mapping entity text to a new label.
            If the value is set to None, the annotation will be removed
        case_sensitive (bool, optional): Consider case of text for each correction

    Returns:
        Example: Example with fixed annotations
    """
    if case_sensitive:
        corrections = {t: l for t, l in corrections.items()}
    else:
        corrections = {t.lower(): l for t, l in corrections.items()}

    prints: DefaultDict[str, List[str]] = defaultdict(list)

    ents_to_remove = []
    for i, s in enumerate(example.spans):
        t = s.text if case_sensitive else s.text.lower()

        if t in corrections:
            if corrections[t] is print:
                prints[t] += [("=" * 100), example.text, s.label]
            elif corrections[t] is None:
                ents_to_remove.append(i)
            else:
                s.label = corrections[t]

    i = len(ents_to_remove) - 1
    while i >= 0:
        idx = ents_to_remove[i]
        del example.spans[idx]
        i -= 1

    for k in sorted(prints):
        print(f"**{k}**")
        for line in prints[k]:
            print(line)

    return example

Fix annotations in a copy of List[Example] data.

This function will NOT add annotations to your data. It will only remove erroneous annotations and fix the labels for specific spans.

Parameters

Name Type Description Default
example Example Input Example required
corrections Dict[str, str] Dictionary of corrections mapping entity text to a new label. If the value is set to None, the annotation will be removed required
case_sensitive bool Consider case of text for each correction False

Returns

Type Description
Example Example: Example with fixed annotations

rename_labels(example, label_map)

Show source code in recon/corrections.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
@operation("recon.v1.rename_labels")
def rename_labels(example: Example, label_map: Dict[str, str]) -> Example:
    """Rename labels in a copy of List[Example] data

    Args:
        example (Example): Input Example
        label_map (Dict[str, str]): One-to-one mapping of label names

    Returns:
        Example: Copy of Example with renamed labels
    """
    for span in example.spans:
        span.label = label_map.get(span.label, span.label)
    return example

Rename labels in a copy of List[Example] data

Parameters

Name Type Description Default
example Example Input Example required
label_map Dict[str, str] One-to-one mapping of label names required

Returns

Type Description
Example Example: Copy of Example with renamed labels

strip_annotations(example, strip_chars=['.', '!', '?', '-', ':', ' '])

Show source code in recon/corrections.py
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
@operation("recon.v1.strip_annotations")
def strip_annotations(
    example: Example, strip_chars: List[str] = [".", "!", "?", "-", ":", " "]
) -> Example:
    """Strip punctuation and spaces from start and end of annotations.
    These characters are almost always a mistake and will confuse a model

    Args:
        example (Example): Input Example
        strip_chars (List[str], optional): Characters to strip.

    Returns:
        Example: Example with stripped spans
    """

    for s in example.spans:
        for ch in strip_chars:
            if s.text.startswith(ch):
                ch = s.text[0]

                while ch in strip_chars:
                    s.text = s.text[1:]
                    s.start += 1
                    ch = s.text[0]
            elif s.text.endswith(ch):
                ch = s.text[-1]
                while ch in strip_chars:
                    s.text = s.text[:-1]
                    ch = s.text[-1]
                    s.end -= 1
    return example

Strip punctuation and spaces from start and end of annotations. These characters are almost always a mistake and will confuse a model

Parameters

Name Type Description Default
example Example Input Example required
strip_chars List[str] Characters to strip. ['.', '!', '?', '-', ':', ' ']

Returns

Type Description
Example Example: Example with stripped spans