Validation
filter_overlaps(example)
Show source code in recon/validation.py
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40 | @operation("recon.v1.filter_overlaps")
def filter_overlaps(example: Example) -> Example:
"""Filter overlapping entity spans by picking the longest one.
Args:
example (Example): Input Example
Returns:
List[Example]: Example with fixed overlaps
"""
annotations: List[Span] = sorted(example.spans, key=lambda s: s.start)
filtered_annotations = remove_overlapping_entities(annotations)
example.spans = filtered_annotations
return example
|
Filter overlapping entity spans by picking the longest one.
Parameters
Name |
Type |
Description |
Default |
example |
Example |
Input Example |
required |
Returns
Type |
Description |
Example |
List[Example]: Example with fixed overlaps |
remove_overlapping_entities(sorted_spans)
Show source code in recon/validation.py
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113 | def remove_overlapping_entities(sorted_spans: List[Span]) -> List[Span]:
"""
Removes overlapping entities from the entity set, by greedilytaking the longest
entity from each overlapping chain. The input list of entities should be sorted
and follow the spacy format.
"""
spans_without_overlap: List[Span] = []
current_overlapping_chain: List[Span] = []
current_overlapping_chain_start = 0
current_overlapping_chain_end = 0
for i, current_entity in enumerate(sorted_spans):
current_entity = sorted_spans[i]
current_entity_start = current_entity.start
current_entity_end = current_entity.end
if len(current_overlapping_chain) == 0:
current_overlapping_chain.append(current_entity)
current_overlapping_chain_start = current_entity_start
current_overlapping_chain_end = current_entity_end
else:
min_end = min(current_entity_end, current_overlapping_chain_end)
max_start = max(current_entity_start, current_overlapping_chain_start)
if min_end - max_start > 0:
current_overlapping_chain.append(current_entity)
current_overlapping_chain_start = min(
current_entity_start, current_overlapping_chain_start
)
current_overlapping_chain_end = max(
current_entity_end, current_overlapping_chain_end
)
else:
selections_from_chain: List[Span] = select_subset_of_overlapping_chain(
current_overlapping_chain
)
current_overlapping_chain = []
spans_without_overlap.extend(selections_from_chain)
current_overlapping_chain.append(current_entity)
current_overlapping_chain_start = current_entity_start
current_overlapping_chain_end = current_entity_end
spans_without_overlap.extend(select_subset_of_overlapping_chain(current_overlapping_chain))
return sorted(spans_without_overlap, key=lambda x: x.start)
|
Removes overlapping entities from the entity set, by greedilytaking the longest
entity from each overlapping chain. The input list of entities should be sorted
and follow the spacy format.
select_subset_of_overlapping_chain(chain)
Show source code in recon/validation.py
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67 | def select_subset_of_overlapping_chain(chain: List[Span]) -> List[Span]:
"""
Select the subset of entities in an overlapping chain to return by greedily choosing the
longest entity in the chain until there are no entities remaining
"""
sorted_chain = sorted(chain, key=lambda s: s.end - s.start, reverse=True)
selections_from_chain: List[Span] = []
chain_index = 0
# dump the current chain by greedily keeping the longest entity that doesn't overlap
while chain_index < len(sorted_chain):
entity = sorted_chain[chain_index]
match_found = False
for already_selected_entity in selections_from_chain:
max_start = max(entity.start, already_selected_entity.start)
min_end = min(entity.end, already_selected_entity.end)
if len(range(max_start, min_end)) > 0:
match_found = True
break
if not match_found:
selections_from_chain.append(entity)
chain_index += 1
return selections_from_chain
|
Select the subset of entities in an overlapping chain to return by greedily choosing the
longest entity in the chain until there are no entities remaining
upcase_labels(example)
Show source code in recon/validation.py
11
12
13
14
15
16
17
18
19
20
21
22
23 | @operation("recon.v1.upcase_labels")
def upcase_labels(example: Example) -> Example:
"""Convert all span labels to uppercase to normalize
Args:
example (Example): Input Example
Returns:
Example: Example with fixed labels
"""
for s in example.spans:
s.label = s.label.upper()
return example
|
Convert all span labels to uppercase to normalize
Parameters
Name |
Type |
Description |
Default |
example |
Example |
Input Example |
required |
Returns
Type |
Description |
Example |
Example: Example with fixed labels |