Corrections
Make corrections to your data.
fix_annotations(example, corrections, case_sensitive=False)
Show source code in recon/corrections.py
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76 | @operation("recon.v1.fix_annotations")
def fix_annotations(
example: Example, corrections: Dict[str, str], case_sensitive: bool = False
) -> Example:
"""Fix annotations in a copy of List[Example] data.
This function will NOT add annotations to your data.
It will only remove erroneous annotations and fix the
labels for specific spans.
Args:
example (Example): Input Example
corrections (Dict[str, str]): Dictionary of corrections mapping entity text to a new label.
If the value is set to None, the annotation will be removed
case_sensitive (bool, optional): Consider case of text for each correction
Returns:
Example: Example with fixed annotations
"""
if case_sensitive:
corrections = {t: l for t, l in corrections.items()}
else:
corrections = {t.lower(): l for t, l in corrections.items()}
prints: DefaultDict[str, List[str]] = defaultdict(list)
ents_to_remove = []
for i, s in enumerate(example.spans):
t = s.text if case_sensitive else s.text.lower()
if t in corrections:
if corrections[t] is print:
prints[t] += [("=" * 100), example.text, s.label]
elif corrections[t] is None:
ents_to_remove.append(i)
else:
s.label = corrections[t]
i = len(ents_to_remove) - 1
while i >= 0:
idx = ents_to_remove[i]
del example.spans[idx]
i -= 1
for k in sorted(prints):
print(f"**{k}**")
for line in prints[k]:
print(line)
return example
|
Fix annotations in a copy of List[Example] data.
This function will NOT add annotations to your data.
It will only remove erroneous annotations and fix the
labels for specific spans.
Parameters
| Name |
Type |
Description |
Default |
example |
Example |
Input Example |
required |
corrections |
Dict[str, str] |
Dictionary of corrections mapping entity text to a new label. If the value is set to None, the annotation will be removed |
required |
case_sensitive |
bool |
Consider case of text for each correction |
False |
Returns
| Type |
Description |
Example |
Example: Example with fixed annotations |
rename_labels(example, label_map)
Show source code in recon/corrections.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24 | @operation("recon.v1.rename_labels")
def rename_labels(example: Example, label_map: Dict[str, str]) -> Example:
"""Rename labels in a copy of List[Example] data
Args:
example (Example): Input Example
label_map (Dict[str, str]): One-to-one mapping of label names
Returns:
Example: Copy of Example with renamed labels
"""
for span in example.spans:
span.label = label_map.get(span.label, span.label)
return example
|
Rename labels in a copy of List[Example] data
Parameters
| Name |
Type |
Description |
Default |
example |
Example |
Input Example |
required |
label_map |
Dict[str, str] |
One-to-one mapping of label names |
required |
Returns
| Type |
Description |
Example |
Example: Copy of Example with renamed labels |
strip_annotations(example, strip_chars=['.', '!', '?', '-', ':', ' '])
Show source code in recon/corrections.py
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109 | @operation("recon.v1.strip_annotations")
def strip_annotations(
example: Example, strip_chars: List[str] = [".", "!", "?", "-", ":", " "]
) -> Example:
"""Strip punctuation and spaces from start and end of annotations.
These characters are almost always a mistake and will confuse a model
Args:
example (Example): Input Example
strip_chars (List[str], optional): Characters to strip.
Returns:
Example: Example with stripped spans
"""
for s in example.spans:
for ch in strip_chars:
if s.text.startswith(ch):
ch = s.text[0]
while ch in strip_chars:
s.text = s.text[1:]
s.start += 1
ch = s.text[0]
elif s.text.endswith(ch):
ch = s.text[-1]
while ch in strip_chars:
s.text = s.text[:-1]
ch = s.text[-1]
s.end -= 1
return example
|
Strip punctuation and spaces from start and end of annotations.
These characters are almost always a mistake and will confuse a model
Parameters
| Name |
Type |
Description |
Default |
example |
Example |
Input Example |
required |
strip_chars |
List[str] |
Characters to strip. |
['.', '!', '?', '-', ':', ' '] |
Returns
| Type |
Description |
Example |
Example: Example with stripped spans |