Insights

get_ents_by_label(data, use_lower=True)

Show source code in recon/insights.py
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
def get_ents_by_label(data: List[Example], use_lower: bool = True) -> DefaultDict[str, List[str]]:
    """Get a dictionary of unique text spans by label for your data

    Args:
        data (List[Example]): List of examples
        use_lower (bool, optional): Use the lowercase form of the span text.

    Returns:
        DefaultDict[str, List[str]]: DefaultDict mapping label to sorted list of the unique
            spans annotated for that label.
    """
    annotations: DefaultDict[str, Set[str]] = defaultdict(set)
    sorted_annotations: DefaultDict[str, List[str]] = defaultdict(list)

    for e in data:
        for s in e.spans:
            span_text = s.text.lower() if use_lower else s.text
            annotations[s.label].add(span_text)

    for label in annotations.keys():
        sorted_annotations[label] = sorted(annotations[label])

    return sorted_annotations

Get a dictionary of unique text spans by label for your data

Parameters

Name Type Description Default
data List[recon.types.Example] List of examples required
use_lower bool Use the lowercase form of the span text. True

Returns

Type Description
DefaultDict[str, List[str]] DefaultDict[str, List[str]]: DefaultDict mapping label to sorted list of the unique spans annotated for that label.

get_hardest_examples(pred_errors, return_pred_errors=True, remove_pred_error_examples=True)

Show source code in recon/insights.py
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
def get_hardest_examples(
    pred_errors: List[PredictionError],
    return_pred_errors: bool = True,
    remove_pred_error_examples: bool = True,
) -> List[HardestExample]:
    """Get hardest examples from list of PredictionError types

    Args:
        pred_errors (List[PredictionError]): list of PredictionError
        return_pred_errors (bool, optional): Whether to return prediction errors. Defaults to True.
        remove_pred_error_examples (bool, optional): Whether to remove examples from returned PredictionError. Defaults to True.

    Raises:
        ValueError: Each PredictionError must have a List of examples 

    Returns:
        List[HardestExample]: Sorted list of the hardest examples for a model to work on.
    """

    has_examples = any([pe.examples for pe in pred_errors])
    if not has_examples:
        raise ValueError(
            "Each PredictionError in Parameter pred_errors must have examples attached."
        )

    examples_text_map: Dict[str, Example] = {}
    example_pred_errors_map: DefaultDict[str, List[PredictionError]] = defaultdict(list)
    for pe in pred_errors:
        if pe.examples:
            for example in pe.examples:
                examples_text_map[example.original.text] = example.original
                example_pred_errors_map[example.original.text].append(
                    PredictionError(
                        text=pe.text,
                        true_label=pe.true_label,
                        pred_label=pe.pred_label,
                        count=pe.count,
                        examples=[example],
                    )
                )

    hardest_examples = []
    for example_text, example_pred_errors in example_pred_errors_map.items():
        example = examples_text_map[example_text]  # type: ignore

        prediction_errors: List[PredictionError] = []
        if remove_pred_error_examples and example_pred_errors:
            prediction_errors = [
                PredictionError(
                    text=pe.text,
                    true_label=pe.true_label,
                    pred_label=pe.pred_label,
                    count=pe.count,
                    examples=[],
                )
                for pe in example_pred_errors
            ]
        else:
            prediction_errors = example_pred_errors

        prediction_error_hashes: Set[str] = set()
        deduped_prediction_errors: List[PredictionError] = []

        for pe in prediction_errors:
            pe_hash = f"{pe.text}||{pe.true_label}||{pe.pred_label}"
            if pe_hash not in prediction_error_hashes:
                deduped_prediction_errors.append(pe)

        record = HardestExample(example=example, count=len(deduped_prediction_errors))
        if return_pred_errors:
            record.prediction_errors = deduped_prediction_errors
        hardest_examples.append(record)

    sorted_hardest_examples = sorted(hardest_examples, key=lambda he: he.count, reverse=True)
    return sorted_hardest_examples

Get hardest examples from list of PredictionError types

Parameters

Name Type Description Default
pred_errors List[recon.types.PredictionError] list of PredictionError required
return_pred_errors bool Whether to return prediction errors. Defaults to True. True
remove_pred_error_examples bool Whether to remove examples from returned PredictionError. Defaults to True. True

Exceptions

Type Description
ValueError Each PredictionError must have a List of examples

Returns

Type Description
List[recon.types.HardestExample] List[HardestExample]: Sorted list of the hardest examples for a model to work on.

get_label_disparities(data, label1, label2, use_lower=True)

Show source code in recon/insights.py
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
def get_label_disparities(
    data: List[Example], label1: str, label2: str, use_lower: bool = True
) -> Set[str]:
    """Identify annotated spans that have different labels in different examples

    Args:
        data (List[Example]): Input List of examples
        label1 (str): First label to compare
        label2 (str): Second label to compare
        use_lower (bool, optional): Use the lowercase form of the span text in ents_to_label.

    Returns:
        Set[str]: Set of all unique text spans that overlap between label1 and label2
    """
    annotations = get_ents_by_label(data, use_lower=use_lower)
    return set(annotations[label1]).intersection(set(annotations[label2]))

Identify annotated spans that have different labels in different examples

Parameters

Name Type Description Default
data List[recon.types.Example] Input List of examples required
label1 str First label to compare required
label2 str Second label to compare required
use_lower bool Use the lowercase form of the span text in ents_to_label. True

Returns

Type Description
Set[str] Set[str]: Set of all unique text spans that overlap between label1 and label2

top_label_disparities(data, use_lower=True, dedupe=False)

Show source code in recon/insights.py
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
def top_label_disparities(
    data: List[Example], use_lower: bool = True, dedupe: bool = False
) -> List[LabelDisparity]:
    """Identify annotated spans that have different labels
    in different examples for all label pairs in data.

    Args:
        data (List[Example]): Input List of examples
        use_lower (bool, optional): Use the lowercase form of the span text in ents_to_label.
        dedupe (bool, optional): Whether to deduplicate for table view vs confusion matrix.
            False by default for easy confusion matrix display.

    Returns:
        List[LabelDisparity]: List of LabelDisparity objects for each label pair combination
            sorted by the number of disparities between them.
    """
    annotations = get_ents_by_label(data, use_lower=use_lower)
    label_disparities = {}
    for label1 in annotations.keys():
        for label2 in annotations.keys():
            if label1 != label2:
                intersection = set(annotations[label1]).intersection(set(annotations[label2]))
                n_disparities = len(intersection)
                if n_disparities > 0:
                    if dedupe:
                        input_hash = "||".join(sorted([label1, label2]))
                    else:
                        input_hash = "||".join([label1, label2])

                    label_disparities[input_hash] = LabelDisparity(
                        label1=label1, label2=label2, count=n_disparities
                    )

    return sorted(label_disparities.values(), key=lambda ld: ld.count, reverse=True)

Identify annotated spans that have different labels in different examples for all label pairs in data.

Parameters

Name Type Description Default
data List[recon.types.Example] Input List of examples required
use_lower bool Use the lowercase form of the span text in ents_to_label. True
dedupe bool Whether to deduplicate for table view vs confusion matrix. False by default for easy confusion matrix display. False

Returns

Type Description
List[recon.types.LabelDisparity] List[LabelDisparity]: List of LabelDisparity objects for each label pair combination sorted by the number of disparities between them.

top_prediction_errors(recognizer, data, labels=None, n=None, k=None, exclude_fp=False, exclude_fn=False, verbose=False)

Show source code in recon/insights.py
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
def top_prediction_errors(
    recognizer: EntityRecognizer,
    data: List[Example],
    labels: List[str] = None,
    n: int = None,
    k: int = None,
    exclude_fp: bool = False,
    exclude_fn: bool = False,
    verbose: bool = False,
) -> List[PredictionError]:
    """Get a sorted list of examples your model is worst at predicting.

    Args:
        recognizer (EntityRecognizer): An instance of EntityRecognizer
        data (List[Example]): List of annotated Examples
        labels (List[str], optional): List of labels to get errors for. 
            Defaults to the labels property of `recognizer`.
        n (int, optional): If set, only use the top n examples from data.
        k (int, optional): If set, return the top k prediction errors, otherwise the whole list.
        exclude_fp (bool, optional): Flag to exclude False Positive errors.
        exclude_fn (bool, optional): Flag to exclude False Negative errors.
        verbose (bool, optional): Show verbose output.

    Returns:
        List[PredictionError]: List of Prediction Errors your model is making, sorted by the
            spans your model has the most trouble with.
    """
    labels_ = labels or recognizer.labels
    if n is not None:
        data = data[:n]

    n_examples = len(data)
    texts = (e.text for e in data)
    anns = (e.spans for e in data)

    errors = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))  # type: ignore
    error_examples: DefaultDict[str, List[PredictionErrorExamplePair]] = defaultdict(list)
    n_errors = 0

    for orig_example, pred_example, ann in zip(data, recognizer.predict(texts), anns):
        if k is not None and n_errors > k:
            break

        pred_error_example_pair = PredictionErrorExamplePair(
            original=orig_example, predicted=pred_example
        )

        cand = set([(s.start, s.end, s.label) for s in pred_example.spans])
        gold = set([(s.start, s.end, s.label) for s in ann])

        fp_diff = cand - gold
        fn_diff = gold - cand

        seen = set()

        if fp_diff and not exclude_fp:
            for fp in fp_diff:
                gold_ent = None
                for ge in gold:
                    if fp[0] == ge[0] and fp[1] == ge[1]:
                        gold_ent = ge
                        break
                if gold_ent:
                    start, end, label = gold_ent
                    text = pred_example.text[start:end]
                    false_label = fp[2]
                    errors[label][text][false_label] += 1
                    error_examples[f"{text}||{label}||{false_label}"].append(
                        pred_error_example_pair
                    )
                else:
                    start, end, false_label = fp
                    text = pred_example.text[start:end]
                    errors[NONE][text][false_label] += 1
                    error_examples[f"{text}||{NONE}||{false_label}"].append(pred_error_example_pair)
                n_errors += 1
                seen.add((start, end))

        if fn_diff and not exclude_fn:
            for fn in fn_diff:
                start, end, label = fn
                if (start, end) not in seen:
                    text = pred_example.text[start:end]
                    errors[label][text][NONE] += 1
                    error_examples[f"{text}||{label}||{NONE}"].append(pred_error_example_pair)
                    n_errors += 1

    ranked_errors_map: Dict[str, PredictionError] = {}

    for label, errors_per_label in errors.items():
        for error_text, error_labels in errors_per_label.items():
            for error_label, count in error_labels.items():
                pe_hash = f"{error_text}||{label}||{error_label}"
                ranked_errors_map[pe_hash] = PredictionError(
                    text=error_text,
                    true_label=label,
                    pred_label=error_label,
                    count=count,
                    examples=error_examples[f"{error_text}||{label}||{error_label}"],
                )

    ranked_errors: List[PredictionError] = sorted(
        list(ranked_errors_map.values()), key=lambda error: error.count, reverse=True  # type: ignore
    )
    error_texts = set()
    for re in ranked_errors:
        if re.examples:
            for e in re.examples:
                error_texts.add(e.original.text)

    error_rate = round(len(error_texts) / len(data), 2)
    if verbose:
        error_summary = {
            "N Examples": len(data),
            "N Errors": len(ranked_errors),
            "N Error Examples": len(error_texts),
            "Error Rate": error_rate,
        }
        msg = Printer()
        msg.divider("Error Analysis")
        msg.table(error_summary)

    return ranked_errors

Get a sorted list of examples your model is worst at predicting.

Parameters

Name Type Description Default
recognizer EntityRecognizer An instance of EntityRecognizer required
data List[recon.types.Example] List of annotated Examples required
labels List[str] List of labels to get errors for. Defaults to the labels property of recognizer. None
n int If set, only use the top n examples from data. None
k int If set, return the top k prediction errors, otherwise the whole list. None
exclude_fp bool Flag to exclude False Positive errors. False
exclude_fn bool Flag to exclude False Negative errors. False
verbose bool Show verbose output. False

Returns

Type Description
List[recon.types.PredictionError] List[PredictionError]: List of Prediction Errors your model is making, sorted by the spans your model has the most trouble with.