Operations

Operations are functions that operate on either a list of the examples or a single example. If the function operates on a single example, Recon will take care of applying it to all examples in a dataset.

The following operations are built into Recon

Error

... full list of operations to come

Operation

Operation class that takes care of calling and reporting the results of an operation on a Dataset

__call__(self, dataset, *args, **kwargs)

Show source code in recon/operations.py
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
    def __call__(self, dataset: Any, *args: Any, **kwargs: Any) -> OperationResult:
        """Runs op on a dataset and records the results

        Args:
            dataset (Dataset): Dataset to operate on

        Raises:
            ValueError: if track_example is called in the op with no data

        Returns:
            OperationResult: Container holding new data and the state of the Operation
        """
        initial_state = kwargs.pop("initial_state") if "initial_state" in kwargs else None
        if not initial_state:
            initial_state = OperationState(name=self.name)
        state = initial_state.copy(deep=True)

        if state.status == OperationStatus.NOT_STARTED:
            state.status = OperationStatus.IN_PROGRESS

        def add_example(new_example: Example) -> None:
            state.transformations.append(
                Transformation(example=hash(new_example), type=TransformationType.EXAMPLE_ADDED)
            )
            dataset.example_store.add(new_example)

        def remove_example(orig_example_hash: int) -> None:
            state.transformations.append(
                Transformation(
                    prev_example=orig_example_hash, type=TransformationType.EXAMPLE_REMOVED
                )
            )

        def change_example(orig_example_hash: int, new_example: Example) -> None:
            state.transformations.append(
                Transformation(
                    prev_example=orig_example_hash,
                    example=hash(new_example),
                    type=TransformationType.EXAMPLE_CHANGED,
                )
            )
            dataset.example_store.add(new_example)

        new_data = []
        for orig_example_hash, example, preprocessed_outputs in op_iter(dataset.data, self.pre):
            if preprocessed_outputs:
                res = self.op(example, *args, preprocessed_outputs=preprocessed_outputs, **kwargs)
            else:
                res = self.op(example, *args, **kwargs)

            if res is None:
                remove_example(orig_example_hash)
            elif isinstance(res, list):
                old_example_present = False
                for new_example in res:
                    if hash(new_example) == orig_example_hash:
                        old_example_present = True
                    else:
                        new_data.append(new_example)
                        add_example(new_example)
                if not old_example_present:
                    remove_example(orig_example_hash)
            else:
                assert isinstance(res.text, str)
                assert isinstance(res.spans, list)
                new_data.append(res)
                if hash(res) != orig_example_hash:
                    change_example(orig_example_hash, res)

        transformation_counts = Counter([t.type for t in state.transformations])

        state.examples_added = transformation_counts[TransformationType.EXAMPLE_ADDED]
        state.examples_removed = transformation_counts[TransformationType.EXAMPLE_REMOVED]
        state.examples_changed = transformation_counts[TransformationType.EXAMPLE_CHANGED]
        state.status = OperationStatus.COMPLETED

        state_copy = state.copy(deep=True)
        state = OperationState(name=self.name)
        return OperationResult(data=new_data, state=state_copy)

Runs op on a dataset and records the results

Parameters

Name Type Description Default
dataset Any Dataset to operate on required

Exceptions

Type Description
ValueError if track_example is called in the op with no data

Returns

Type Description
OperationResult OperationResult: Container holding new data and the state of the Operation

__init__(self, name, pre, op)

Show source code in recon/operations.py
88
89
90
91
92
93
94
95
96
97
98
    def __init__(self, name: str, pre: List[PreProcessor], op: Callable):
        """Initialize an Operation instance

        Args:
            name (str): Name of operation
            pre (List[PreProcessor]): List of preprocessors to run
            op (Callable): Decorated function
        """
        self.name = name
        self.pre = pre
        self.op = op

Initialize an Operation instance

Parameters

Name Type Description Default
name str Name of operation required
pre List[recon.preprocess.PreProcessor] List of preprocessors to run required
op Callable Decorated function required

operation

__call__(self, *args, **kwargs)

Show source code in recon/operations.py
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
    def __call__(self, *args: Any, **kwargs: Any) -> Callable:
        """Decorator for an operation. 
        The first arg is the function being decorated.
        This function can either operate on a List[Example]
        and in that case self.batch should be True.

        e.g. @operation("recon.v1.some_name", batch=True)

        Or it should operate on a single example and 
        recon will take care of applying it to a full Dataset

        Args:
            args: First arg is function to decorate

        Returns:
            Callable: Original function
        """
        op: Callable = args[0]
        registry.operations.register(self.name)(Operation(self.name, self.pre, op))

        return op

Decorator for an operation. The first arg is the function being decorated. This function can either operate on a List[Example] and in that case self.batch should be True.

e.g. @operation("recon.v1.some_name", batch=True)

Or it should operate on a single example and recon will take care of applying it to a full Dataset

Parameters

Name Type Description Default
*args Any First arg is function to decorate ()

Returns

Type Description
Callable Callable: Original function

__init__(self, name, pre=[])

Show source code in recon/operations.py
51
52
53
54
55
56
57
58
59
    def __init__(self, name: str, pre: List[PreProcessor] = []):
        """Decorate an operation that makes some changes to a dataset.

        Args:
            name (str): Operation name.
            pre (List[PreProcessor]): List of preprocessors to run
        """
        self.name = name
        self.pre = pre

Decorate an operation that makes some changes to a dataset.

Parameters

Name Type Description Default
name str Operation name. required
pre List[recon.preprocess.PreProcessor] List of preprocessors to run []

op_iter(data, pre)

Show source code in recon/operations.py
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
def op_iter(
    data: List[Example], pre: List[PreProcessor]
) -> Iterator[Tuple[int, Example, Dict[str, Any]]]:
    """Iterate over list of examples for an operation
    yielding tuples of (example hash, example)

    Args:
        data (List[Example]): List of examples to iterate
        pre (List[PreProcessor]): List of preprocessors to run

    Yields:
        Iterator[Tuple[int, Example]]: Tuples of (example hash, example)
    """
    preprocessed_outputs: Dict[Example, Dict[str, Any]] = defaultdict(dict)
    for processor in pre:
        processor_outputs = list(processor(data))

        for i, (example, output) in enumerate(zip(data, processor_outputs)):
            preprocessed_outputs[example][processor.name] = processor_outputs[i]

    for example in data:
        yield hash(example), example.copy(deep=True), preprocessed_outputs[example]

Iterate over list of examples for an operation yielding tuples of (example hash, example)

Parameters

Name Type Description Default
data List[recon.types.Example] List of examples to iterate required
pre List[recon.preprocess.PreProcessor] List of preprocessors to run required

Yields: Iterator[Tuple[int, Example]]: Tuples of (example hash, example)