

Insights

`get_ents_by_label(data, use_lower=True)`¶

Show source code in recon/insights.py

def get_ents_by_label(data: List[Example], use_lower: bool = True) -> DefaultDict[str, List[str]]:
    """Get a dictionary of unique text spans by label for your data

    Args:
        data (List[Example]): List of examples
        use_lower (bool, optional): Use the lowercase form of the span text.

    Returns:
        DefaultDict[str, List[str]]: DefaultDict mapping label to sorted list of the unique
            spans annotated for that label.
    """
    annotations: DefaultDict[str, Set[str]] = defaultdict(set)
    sorted_annotations: DefaultDict[str, List[str]] = defaultdict(list)

    for e in data:
        for s in e.spans:
            span_text = s.text.lower() if use_lower else s.text
            annotations[s.label].add(span_text)

    for label in annotations.keys():
        sorted_annotations[label] = sorted(annotations[label])

    return sorted_annotations

Get a dictionary of unique text spans by label for your data

Parameters

Name	Type	Description	Default
`data`	`List[recon.types.Example]`	List of examples	required
`use_lower`	`bool`	Use the lowercase form of the span text.	`True`

Returns

Type	Description
`DefaultDict[str, List[str]]`	DefaultDict[str, List[str]]: DefaultDict mapping label to sorted list of the unique spans annotated for that label.

`get_hardest_examples(pred_errors, return_pred_errors=True, remove_pred_error_examples=True)`¶

Show source code in recon/insights.py

def get_hardest_examples(
    pred_errors: List[PredictionError],
    return_pred_errors: bool = True,
    remove_pred_error_examples: bool = True,
) -> List[HardestExample]:
    """Get hardest examples from list of PredictionError types

    Args:
        pred_errors (List[PredictionError]): list of PredictionError
        return_pred_errors (bool, optional): Whether to return prediction errors. Defaults to True.
        remove_pred_error_examples (bool, optional): Whether to remove examples from returned PredictionError. Defaults to True.

    Raises:
        ValueError: Each PredictionError must have a List of examples 

    Returns:
        List[HardestExample]: Sorted list of the hardest examples for a model to work on.
    """

    has_examples = any([pe.examples for pe in pred_errors])
    if not has_examples:
        raise ValueError(
            "Each PredictionError in Parameter pred_errors must have examples attached."
        )

    examples_text_map: Dict[str, Example] = {}
    example_pred_errors_map: DefaultDict[str, List[PredictionError]] = defaultdict(list)
    for pe in pred_errors:
        if pe.examples:
            for example in pe.examples:
                examples_text_map[example.original.text] = example.original
                example_pred_errors_map[example.original.text].append(
                    PredictionError(
                        text=pe.text,
                        true_label=pe.true_label,
                        pred_label=pe.pred_label,
                        count=pe.count,
                        examples=[example],
                    )
                )

    hardest_examples = []
    for example_text, example_pred_errors in example_pred_errors_map.items():
        example = examples_text_map[example_text]  # type: ignore

        prediction_errors: List[PredictionError] = []
        if remove_pred_error_examples and example_pred_errors:
            prediction_errors = [
                PredictionError(
                    text=pe.text,
                    true_label=pe.true_label,
                    pred_label=pe.pred_label,
                    count=pe.count,
                    examples=[],
                )
                for pe in example_pred_errors
            ]
        else:
            prediction_errors = example_pred_errors

        prediction_error_hashes: Set[str] = set()
        deduped_prediction_errors: List[PredictionError] = []

        for pe in prediction_errors:
            pe_hash = f"{pe.text}||{pe.true_label}||{pe.pred_label}"
            if pe_hash not in prediction_error_hashes:
                deduped_prediction_errors.append(pe)

        record = HardestExample(example=example, count=len(deduped_prediction_errors))
        if return_pred_errors:
            record.prediction_errors = deduped_prediction_errors
        hardest_examples.append(record)

    sorted_hardest_examples = sorted(hardest_examples, key=lambda he: he.count, reverse=True)
    return sorted_hardest_examples

Get hardest examples from list of PredictionError types

Parameters

Name	Type	Description	Default
`pred_errors`	`List[recon.types.PredictionError]`	list of PredictionError	required
`return_pred_errors`	`bool`	Whether to return prediction errors. Defaults to True.	`True`
`remove_pred_error_examples`	`bool`	Whether to remove examples from returned PredictionError. Defaults to True.	`True`

Exceptions

Type	Description
`ValueError`	Each PredictionError must have a List of examples

Returns

Type	Description
`List[recon.types.HardestExample]`	List[HardestExample]: Sorted list of the hardest examples for a model to work on.

`get_label_disparities(data, label1, label2, use_lower=True)`¶

Show source code in recon/insights.py

def get_label_disparities(
    data: List[Example], label1: str, label2: str, use_lower: bool = True
) -> Set[str]:
    """Identify annotated spans that have different labels in different examples

    Args:
        data (List[Example]): Input List of examples
        label1 (str): First label to compare
        label2 (str): Second label to compare
        use_lower (bool, optional): Use the lowercase form of the span text in ents_to_label.

    Returns:
        Set[str]: Set of all unique text spans that overlap between label1 and label2
    """
    annotations = get_ents_by_label(data, use_lower=use_lower)
    return set(annotations[label1]).intersection(set(annotations[label2]))

Identify annotated spans that have different labels in different examples

Parameters

Name	Type	Description	Default
`data`	`List[recon.types.Example]`	Input List of examples	required
`label1`	`str`	First label to compare	required
`label2`	`str`	Second label to compare	required
`use_lower`	`bool`	Use the lowercase form of the span text in ents_to_label.	`True`

Returns

Type	Description
`Set[str]`	Set[str]: Set of all unique text spans that overlap between label1 and label2

`top_label_disparities(data, use_lower=True, dedupe=False)`¶

Show source code in recon/insights.py

def top_label_disparities(
    data: List[Example], use_lower: bool = True, dedupe: bool = False
) -> List[LabelDisparity]:
    """Identify annotated spans that have different labels
    in different examples for all label pairs in data.

    Args:
        data (List[Example]): Input List of examples
        use_lower (bool, optional): Use the lowercase form of the span text in ents_to_label.
        dedupe (bool, optional): Whether to deduplicate for table view vs confusion matrix.
            False by default for easy confusion matrix display.

    Returns:
        List[LabelDisparity]: List of LabelDisparity objects for each label pair combination
            sorted by the number of disparities between them.
    """
    annotations = get_ents_by_label(data, use_lower=use_lower)
    label_disparities = {}
    for label1 in annotations.keys():
        for label2 in annotations.keys():
            if label1 != label2:
                intersection = set(annotations[label1]).intersection(set(annotations[label2]))
                n_disparities = len(intersection)
                if n_disparities > 0:
                    if dedupe:
                        input_hash = "||".join(sorted([label1, label2]))
                    else:
                        input_hash = "||".join([label1, label2])

                    label_disparities[input_hash] = LabelDisparity(
                        label1=label1, label2=label2, count=n_disparities
                    )

    return sorted(label_disparities.values(), key=lambda ld: ld.count, reverse=True)

Identify annotated spans that have different labels in different examples for all label pairs in data.

Parameters

Name	Type	Description	Default
`data`	`List[recon.types.Example]`	Input List of examples	required
`use_lower`	`bool`	Use the lowercase form of the span text in ents_to_label.	`True`
`dedupe`	`bool`	Whether to deduplicate for table view vs confusion matrix. False by default for easy confusion matrix display.	`False`

Returns

Type	Description
`List[recon.types.LabelDisparity]`	List[LabelDisparity]: List of LabelDisparity objects for each label pair combination sorted by the number of disparities between them.

`top_prediction_errors(recognizer, data, labels=None, n=None, k=None, exclude_fp=False, exclude_fn=False, verbose=False)`¶

Show source code in recon/insights.py

def top_prediction_errors(
    recognizer: EntityRecognizer,
    data: List[Example],
    labels: List[str] = None,
    n: int = None,
    k: int = None,
    exclude_fp: bool = False,
    exclude_fn: bool = False,
    verbose: bool = False,
) -> List[PredictionError]:
    """Get a sorted list of examples your model is worst at predicting.

    Args:
        recognizer (EntityRecognizer): An instance of EntityRecognizer
        data (List[Example]): List of annotated Examples
        labels (List[str], optional): List of labels to get errors for. 
            Defaults to the labels property of `recognizer`.
        n (int, optional): If set, only use the top n examples from data.
        k (int, optional): If set, return the top k prediction errors, otherwise the whole list.
        exclude_fp (bool, optional): Flag to exclude False Positive errors.
        exclude_fn (bool, optional): Flag to exclude False Negative errors.
        verbose (bool, optional): Show verbose output.

    Returns:
        List[PredictionError]: List of Prediction Errors your model is making, sorted by the
            spans your model has the most trouble with.
    """
    labels_ = labels or recognizer.labels
    if n is not None:
        data = data[:n]

    n_examples = len(data)
    texts = (e.text for e in data)
    anns = (e.spans for e in data)

    errors = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))  # type: ignore
    error_examples: DefaultDict[str, List[PredictionErrorExamplePair]] = defaultdict(list)
    n_errors = 0

    for orig_example, pred_example, ann in zip(data, recognizer.predict(texts), anns):
        if k is not None and n_errors > k:
            break

        pred_error_example_pair = PredictionErrorExamplePair(
            original=orig_example, predicted=pred_example
        )

        cand = set([(s.start, s.end, s.label) for s in pred_example.spans])
        gold = set([(s.start, s.end, s.label) for s in ann])

        fp_diff = cand - gold
        fn_diff = gold - cand

        seen = set()

        if fp_diff and not exclude_fp:
            for fp in fp_diff:
                gold_ent = None
                for ge in gold:
                    if fp[0] == ge[0] and fp[1] == ge[1]:
                        gold_ent = ge
                        break
                if gold_ent:
                    start, end, label = gold_ent
                    text = pred_example.text[start:end]
                    false_label = fp[2]
                    errors[label][text][false_label] += 1
                    error_examples[f"{text}||{label}||{false_label}"].append(
                        pred_error_example_pair
                    )
                else:
                    start, end, false_label = fp
                    text = pred_example.text[start:end]
                    errors[NONE][text][false_label] += 1
                    error_examples[f"{text}||{NONE}||{false_label}"].append(pred_error_example_pair)
                n_errors += 1
                seen.add((start, end))

        if fn_diff and not exclude_fn:
            for fn in fn_diff:
                start, end, label = fn
                if (start, end) not in seen:
                    text = pred_example.text[start:end]
                    errors[label][text][NONE] += 1
                    error_examples[f"{text}||{label}||{NONE}"].append(pred_error_example_pair)
                    n_errors += 1

    ranked_errors_map: Dict[str, PredictionError] = {}

    for label, errors_per_label in errors.items():
        for error_text, error_labels in errors_per_label.items():
            for error_label, count in error_labels.items():
                pe_hash = f"{error_text}||{label}||{error_label}"
                ranked_errors_map[pe_hash] = PredictionError(
                    text=error_text,
                    true_label=label,
                    pred_label=error_label,
                    count=count,
                    examples=error_examples[f"{error_text}||{label}||{error_label}"],
                )

    ranked_errors: List[PredictionError] = sorted(
        list(ranked_errors_map.values()), key=lambda error: error.count, reverse=True  # type: ignore
    )
    error_texts = set()
    for re in ranked_errors:
        if re.examples:
            for e in re.examples:
                error_texts.add(e.original.text)

    error_rate = round(len(error_texts) / len(data), 2)
    if verbose:
        error_summary = {
            "N Examples": len(data),
            "N Errors": len(ranked_errors),
            "N Error Examples": len(error_texts),
            "Error Rate": error_rate,
        }
        msg = Printer()
        msg.divider("Error Analysis")
        msg.table(error_summary)

    return ranked_errors

Get a sorted list of examples your model is worst at predicting.

Parameters

Name	Type	Description	Default
`recognizer`	`EntityRecognizer`	An instance of EntityRecognizer	required
`data`	`List[recon.types.Example]`	List of annotated Examples	required
`labels`	`List[str]`	List of labels to get errors for. Defaults to the labels property of `recognizer`.	`None`
`n`	`int`	If set, only use the top n examples from data.	`None`
`k`	`int`	If set, return the top k prediction errors, otherwise the whole list.	`None`
`exclude_fp`	`bool`	Flag to exclude False Positive errors.	`False`
`exclude_fn`	`bool`	Flag to exclude False Negative errors.	`False`
`verbose`	`bool`	Show verbose output.	`False`

Returns

Type	Description
`List[recon.types.PredictionError]`	List[PredictionError]: List of Prediction Errors your model is making, sorted by the spans your model has the most trouble with.

Insights

get_ents_by_label(data, use_lower=True)¶

get_hardest_examples(pred_errors, return_pred_errors=True, remove_pred_error_examples=True)¶

get_label_disparities(data, label1, label2, use_lower=True)¶

top_label_disparities(data, use_lower=True, dedupe=False)¶

top_prediction_errors(recognizer, data, labels=None, n=None, k=None, exclude_fp=False, exclude_fn=False, verbose=False)¶

`get_ents_by_label(data, use_lower=True)`¶

`get_hardest_examples(pred_errors, return_pred_errors=True, remove_pred_error_examples=True)`¶

`get_label_disparities(data, label1, label2, use_lower=True)`¶

`top_label_disparities(data, use_lower=True, dedupe=False)`¶

`top_prediction_errors(recognizer, data, labels=None, n=None, k=None, exclude_fp=False, exclude_fn=False, verbose=False)`¶