anz2 commited on
Commit
85b9718
·
1 Parent(s): d051333

update evaluation package with evaluation and ocr packages and completed _compute logic.

Browse files
Files changed (5) hide show
  1. cer.py +0 -2
  2. evaluation/iou.py +370 -0
  3. evaluation/metrics.py +589 -0
  4. iliauniiccocrevaluation.py +72 -28
  5. ocr/fiftyone.py +26 -0
cer.py DELETED
@@ -1,2 +0,0 @@
1
- def calculate_cer(*args):
2
- return -1
 
 
 
evaluation/iou.py ADDED
@@ -0,0 +1,370 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://www.pyimagesearch.com/2016/11/07/intersection-over-union-iou-for-object-detection/
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+ from scipy.sparse import csr_matrix
6
+ from scipy.sparse.csgraph import connected_components
7
+
8
+
9
+ def bb_intersection_over_union(boxA, boxB):
10
+ EPS = 1e-5
11
+ # determine the (x, y)-coordinates of the intersection rectangle
12
+ xA = max(boxA[0], boxB[0])
13
+ yA = max(boxA[1], boxB[1])
14
+ xB = min(boxA[2], boxB[2])
15
+ yB = min(boxA[3], boxB[3])
16
+ # compute the area of intersection rectangle
17
+ interArea = max(0, xB - xA + EPS) * max(0, yB - yA + EPS)
18
+ # compute the area of both the prediction and ground-truth
19
+ # rectangles
20
+ boxAArea = (boxA[2] - boxA[0] + EPS) * (boxA[3] - boxA[1] + EPS)
21
+ boxBArea = (boxB[2] - boxB[0] + EPS) * (boxB[3] - boxB[1] + EPS)
22
+ # compute the intersection over union by taking the intersection
23
+ # area and dividing it by the sum of prediction + ground-truth
24
+ # areas - the interesection area
25
+ iou = interArea / float(boxAArea + boxBArea - interArea)
26
+ # return the intersection over union value
27
+ return iou
28
+
29
+
30
+ def bb_intersection_over_union_vectorized(bboxes1, bboxes2):
31
+ low = np.s_[..., :2]
32
+ high = np.s_[..., 2:]
33
+
34
+ EPS = 1e-5
35
+
36
+ A, B = bboxes1.copy(), bboxes2.copy()
37
+ A = np.tile(A, (1, len(bboxes2))).reshape(len(bboxes1) * len(bboxes2), -1)
38
+ B = np.tile(B, (len(bboxes1), 1))
39
+
40
+ A[high] += EPS
41
+ B[high] += EPS
42
+
43
+ intrs = (
44
+ np.maximum(
45
+ 0.0,
46
+ np.minimum(
47
+ A[high],
48
+ B[high]
49
+ )
50
+ -
51
+ np.maximum(
52
+ A[low],
53
+ B[low]
54
+ )
55
+ )
56
+ ).prod(-1)
57
+
58
+ ious = intrs / ((A[high] - A[low]).prod(-1) + (B[high] - B[low]).prod(-1) - intrs)
59
+
60
+ return ious.reshape(len(bboxes1), len(bboxes2))
61
+
62
+
63
+ def bb_is_on_same_line_vectorized(bboxes1, bboxes2):
64
+ low = np.s_[..., 1]
65
+ high = np.s_[..., 3]
66
+
67
+ A, B = bboxes1.copy(), bboxes2.copy()
68
+ A = np.tile(A, (1, len(bboxes2))).reshape(len(bboxes1) * len(bboxes2), -1)
69
+ B = np.tile(B, (len(bboxes1), 1))
70
+
71
+ is_on_same_line = np.bitwise_and(
72
+ np.bitwise_and(A[low] <= (B[low] + B[high]) / 2, (B[low] + B[high]) / 2 <= A[high]),
73
+ np.bitwise_and(B[low] <= (A[low] + A[high]) / 2, (A[low] + A[high]) / 2 <= B[high]),
74
+ )
75
+
76
+ return is_on_same_line.reshape(len(bboxes1), len(bboxes2))
77
+
78
+
79
+ def iou(ocr1, ocr2):
80
+ return bb_intersection_over_union(
81
+ (ocr1['x1'], ocr1['y1'], ocr1['x2'], ocr1['y2']),
82
+ (ocr2['x1'], ocr2['y1'], ocr2['x2'], ocr2['y2'])
83
+ )
84
+
85
+
86
+ def _generate_empty_row(example_row, index):
87
+ """This will generate empty row with empty values but it also generates tiny but valid bounding box
88
+ to avoid exceptions while cropping the image"""
89
+
90
+ example_row_dict = example_row.to_dict()
91
+ example_row_dict['page'] = example_row_dict.get('page', 0)
92
+ example_row_dict['block'] = 0
93
+ example_row_dict['paragraph'] = 0
94
+ example_row_dict['word'] = 0
95
+ example_row_dict['x1'] = 0
96
+ example_row_dict['y1'] = 0
97
+ example_row_dict['x2'] = 1
98
+ example_row_dict['y2'] = 1
99
+ example_row_dict['conf'] = 0.0
100
+ example_row_dict['text'] = ""
101
+
102
+ empty_row = pd.DataFrame([example_row_dict], columns=example_row.index, index=[index])
103
+
104
+ return empty_row
105
+
106
+
107
+ def word_or_symbol_pair_matching(df1, df2, pref1, pref2):
108
+ """Applies IOU based matching of words or symbol elements using rectangular bounding boxes (x1,y1,x2,y2).
109
+ It sorts makes sure that matching between first and second set is unique which means that it's not allowed to have
110
+ one item in two different pairs. If pair isn't found then empty element is used as a pair. This way it's guaranteed
111
+ that word or symbol level matching is correctly evaluated. Pairs are generated in decreasing order of IOU values.
112
+ """
113
+ # match word pairs by page
114
+ text_pairs_dfs_per_page = []
115
+ unique_page_ids = sorted(list(set(df1['page'].unique().tolist() + df2['page'].unique().tolist())))
116
+
117
+ for page_id in unique_page_ids:
118
+ # extract words for given page only
119
+ df1_page = df1[df1.page == page_id]
120
+ df2_page = df2[df2.page == page_id]
121
+
122
+ if not df1_page.empty and not df1_page.empty:
123
+
124
+ # calculate similarities
125
+ similarity_metrics = calculate_ious_fast(ocr1_df=df1_page, ocr2_df=df2_page)
126
+ similarities = []
127
+ for idx1, index1 in enumerate(df1_page.index):
128
+ for idx2, index2 in enumerate(df2_page.index):
129
+ similarities.append((index1, index2, similarity_metrics[idx1, idx2]))
130
+
131
+ # process pair similarities in decreasing order of similarity values
132
+ sorted_similarities = sorted(similarities, key=lambda x: -x[2])
133
+ paired_items_1 = set()
134
+ paired_items_2 = set()
135
+ pairs = []
136
+ for idx1, idx2, similarity in sorted_similarities:
137
+ if idx1 not in paired_items_1 and idx2 not in paired_items_2:
138
+ if similarity > 0.0:
139
+ paired_items_1.add(idx1)
140
+ paired_items_2.add(idx2)
141
+ pairs.append((idx1, idx2, similarity))
142
+
143
+ # add items as empty pairs which weren't matched but index is considered across all pages to avoid collisions
144
+ EMPTY_ITEM_INDEX = max(df1.shape[0], df2.shape[0]) + 100 + page_id
145
+ for idx1, row1 in df1_page.iterrows():
146
+ if idx1 not in paired_items_1:
147
+ pairs.append((idx1, EMPTY_ITEM_INDEX, 0.0))
148
+ for idx2, row2 in df2_page.iterrows():
149
+ if idx2 not in paired_items_2:
150
+ pairs.append((EMPTY_ITEM_INDEX, idx2, 0.0))
151
+
152
+ # sort pairs according to df2 items original indices
153
+ sorted_pairs = sorted(pairs, key=lambda x: (x[1], x[0]))
154
+
155
+ # create row for empty items in each dataframe
156
+ df1_page = pd.concat([df1_page, _generate_empty_row(example_row=df1_page.iloc[0], index=EMPTY_ITEM_INDEX)])
157
+ df2_page = pd.concat([df2_page, _generate_empty_row(example_row=df2_page.iloc[0], index=EMPTY_ITEM_INDEX)])
158
+
159
+ # generate pairs dataset
160
+ text_pairs_df = pd.concat(
161
+ [
162
+ df1_page.loc[[item[0] for item in sorted_pairs], :].reset_index(drop=True).add_prefix(pref1),
163
+ df2_page.loc[[item[1] for item in sorted_pairs], :].reset_index(drop=True).add_prefix(pref2),
164
+ pd.DataFrame(
165
+ data=[item[2] for item in sorted_pairs],
166
+ columns=["iou"]
167
+ )
168
+ ],
169
+ axis=1
170
+ )
171
+
172
+ text_pairs_dfs_per_page.append(text_pairs_df)
173
+
174
+ all_text_pairs_df = pd.concat(text_pairs_dfs_per_page, axis=0)
175
+
176
+ return all_text_pairs_df
177
+
178
+
179
+ def word_or_symbol_group_pair_matching(df1, df2, pref1, pref2):
180
+ """Applies IOU based matching of words or symbol elements groups using rectangular bounding boxes (x1,y1,x2,y2).
181
+ It sorts makes sure that matching between first and second set is unique which means that it's not allowed to have
182
+ one item in two different pairs. If pair isn't found then empty element is used as a pair. BUT the difference from
183
+ non-group approach is that here it's possible to match group of words or symbols on each other. This way it's
184
+ more guaranteed that OCR detected result is evaluated correctly.
185
+
186
+ Example:
187
+ Let's say we have 2 words: ["abc", "d"] and target has only one word: ["abcd"] then it's better to group first
188
+ two words and match them with the one target word. This way we try to evaluate the overall text detection
189
+ accuracy and not the actual symbol or word boundary detection.
190
+
191
+ Note: the grouping operation will happen on one line to avoid unpredictable results if word bounding boxes on
192
+ neighboring lines has some intersection.
193
+ """
194
+ # match word pairs by page
195
+ text_pairs_dfs_per_page = []
196
+ unique_page_ids = sorted(list(set(df1['page'].unique().tolist() + df2['page'].unique().tolist())))
197
+
198
+ for page_id in unique_page_ids:
199
+ # extract words for given page only
200
+ df1_page = df1[df1.page == page_id]
201
+ df2_page = df2[df2.page == page_id]
202
+
203
+ if not df1_page.empty and not df1_page.empty:
204
+ df1_page_groups, df2_page_groups = get_connected_components(ocr1_df=df1_page, ocr2_df=df2_page)
205
+
206
+ # calculate similarities
207
+ similarity_metrics = calculate_ious_fast(ocr1_df=df1_page_groups, ocr2_df=df2_page_groups)
208
+ similarities = []
209
+ for idx1, index1 in enumerate(df1_page_groups.index):
210
+ for idx2, index2 in enumerate(df2_page_groups.index):
211
+ similarities.append((index1, index2, similarity_metrics[idx1, idx2]))
212
+
213
+ # process pair similarities in decreasing order of similarity values
214
+ sorted_similarities = sorted(similarities, key=lambda x: -x[2])
215
+ paired_items_1 = set()
216
+ paired_items_2 = set()
217
+ pairs = []
218
+ for idx1, idx2, similarity in sorted_similarities:
219
+ if idx1 not in paired_items_1 and idx2 not in paired_items_2:
220
+ if similarity > 0.0:
221
+ paired_items_1.add(idx1)
222
+ paired_items_2.add(idx2)
223
+ pairs.append((idx1, idx2, similarity))
224
+
225
+ # add items as empty pairs which weren't matched but index is considered across all pages to avoid collisions
226
+ EMPTY_ITEM_INDEX = max(df1.shape[0], df2.shape[0]) + 100 + page_id
227
+ for idx1, row1 in df1_page_groups.iterrows():
228
+ if idx1 not in paired_items_1:
229
+ pairs.append((idx1, EMPTY_ITEM_INDEX, 0.0))
230
+ for idx2, row2 in df2_page_groups.iterrows():
231
+ if idx2 not in paired_items_2:
232
+ pairs.append((EMPTY_ITEM_INDEX, idx2, 0.0))
233
+
234
+ # sort pairs according to df2 items original indices
235
+ sorted_pairs = sorted(pairs, key=lambda x: (x[1], x[0]))
236
+
237
+ # create row for empty items in each dataframe
238
+ df1_page_groups = pd.concat(
239
+ [df1_page_groups, _generate_empty_row(example_row=df1_page_groups.iloc[0], index=EMPTY_ITEM_INDEX)])
240
+ df2_page_groups = pd.concat(
241
+ [df2_page_groups, _generate_empty_row(example_row=df2_page_groups.iloc[0], index=EMPTY_ITEM_INDEX)])
242
+
243
+ # generate pairs dataset
244
+ text_pairs_df = pd.concat(
245
+ [
246
+ df1_page_groups.loc[[item[0] for item in sorted_pairs], :].reset_index(drop=True).add_prefix(pref1),
247
+ df2_page_groups.loc[[item[1] for item in sorted_pairs], :].reset_index(drop=True).add_prefix(pref2),
248
+ pd.DataFrame(
249
+ data=[item[2] for item in sorted_pairs],
250
+ columns=["iou"]
251
+ )
252
+ ],
253
+ axis=1
254
+ )
255
+
256
+ text_pairs_dfs_per_page.append(text_pairs_df)
257
+
258
+ all_text_pairs_df = pd.concat(text_pairs_dfs_per_page, axis=0)
259
+
260
+ return all_text_pairs_df
261
+
262
+ def calculate_ious_fast(ocr1_df, ocr2_df):
263
+ ious = None
264
+ if not ocr1_df.empty and not ocr2_df.empty:
265
+ bboxes1 = np.array(ocr1_df["bounding_box"].values.tolist())
266
+ bboxes2 = np.array(ocr2_df["bounding_box"].values.tolist())
267
+
268
+ if len(bboxes1) > 0 and len(bboxes2) > 0:
269
+ ious = bb_intersection_over_union_vectorized(bboxes1=bboxes1, bboxes2=bboxes2)
270
+
271
+ return ious
272
+
273
+
274
+ def calculate_iosl_fast(ocr1_df, ocr2_df):
275
+ iosls = None
276
+ if not ocr1_df.empty and not ocr2_df.empty:
277
+ bboxes1 = np.array(ocr1_df["bounding_box"].values.tolist())
278
+ bboxes2 = np.array(ocr2_df["bounding_box"].values.tolist())
279
+
280
+ if len(bboxes1) > 0 and len(bboxes2) > 0:
281
+ iosls = bb_is_on_same_line_vectorized(bboxes1=bboxes1, bboxes2=bboxes2)
282
+
283
+ return iosls
284
+
285
+
286
+ def calculate_adjacency_matrix(ocr1_df, ocr2_df):
287
+ """Calculates Adjacency Matrix based on IOU values and for two different sets of items. For each item the adjacency
288
+ is defined by the maximum IOU value. We do 2 sided approach since it can be the case that i is adjacent to j but j
289
+ isn't adjacent to i, so we generate adjacency matrix for directed graph"""
290
+ # concat both dataframes
291
+ ocr_df = pd.concat([ocr1_df, ocr2_df], axis=0).reset_index()
292
+
293
+ # calculate ious
294
+ ious = calculate_ious_fast(ocr1_df=ocr_df, ocr2_df=ocr_df)
295
+
296
+ # calculate `is on same line` property
297
+ iosls = calculate_iosl_fast(ocr1_df=ocr_df, ocr2_df=ocr_df)
298
+
299
+ # build adjacency matrix (1s and 0s)
300
+ adjacency_matrix = np.bitwise_and(ious > 0.0, iosls).astype(np.int)
301
+
302
+ return adjacency_matrix
303
+
304
+
305
+ def get_connected_components(ocr1_df, ocr2_df):
306
+ """Apply connected component analysis and group items"""
307
+
308
+ def _aggregate_group_items_into_one(df):
309
+ if len(df) == 1:
310
+ return df
311
+ else:
312
+ _df = df.iloc[0, :]
313
+ _bboxes = np.array(df["bounding_box"].values.tolist())
314
+
315
+
316
+ _df["bounding_box"] = [
317
+ [
318
+ np.min(_bboxes[:, 0]),
319
+ np.min(_bboxes[:, 1]),
320
+ np.max(_bboxes[:, 2]),
321
+ np.max(_bboxes[:, 3]),
322
+ ]
323
+ ]
324
+ _df["confidence"] = df["confidence"].mean()
325
+ _df["text"] = " ".join(df["text"].tolist())
326
+
327
+ return _df
328
+
329
+ # 1. calculate adjacency matrix
330
+ adjacency_matrix = calculate_adjacency_matrix(ocr1_df=ocr1_df, ocr2_df=ocr2_df)
331
+
332
+ # 2. find connected components
333
+ n_components, labels = connected_components(csgraph=csr_matrix(adjacency_matrix), directed=False,
334
+ return_labels=True)
335
+
336
+ # 3. separate df1 and df2 items and group for each connected component
337
+ connected_component_groups = pd.Series(labels).to_frame().groupby(0).apply(
338
+ lambda x: {1: [item for item in x.index.tolist() if item < ocr1_df.shape[0]],
339
+ 2: [item - len(ocr1_df) for item in x.index.tolist() if item >= ocr1_df.shape[0]]}).to_dict()
340
+
341
+ # 4. check if group of items are consecutive (Optional but interesting)
342
+ # assert np.all(pd.DataFrame(connected_component_groups).loc[1, :].apply(
343
+ # lambda x: sum(x) == (min(x) * 2 + (len(x) - 1)) * len(x) / 2 if x else True))
344
+ # assert np.all(pd.DataFrame(connected_component_groups).loc[2, :].apply(
345
+ # lambda x: sum(x) == (min(x) * 2 + (len(x) - 1)) * len(x) / 2 if x else True))
346
+
347
+ # 5. merge group items into one
348
+ ocr1_df_groups = pd.concat(
349
+ [
350
+ _aggregate_group_items_into_one(
351
+ ocr1_df.loc[group_data[1], :]
352
+ )
353
+ for group_id, group_data in connected_component_groups.items()
354
+ if group_data[1]
355
+ ],
356
+ axis=0
357
+ ).reset_index(drop=True)
358
+
359
+ ocr2_df_groups = pd.concat(
360
+ [
361
+ _aggregate_group_items_into_one(
362
+ ocr2_df.loc[group_data[2], :]
363
+ )
364
+ for group_id, group_data in connected_component_groups.items()
365
+ if group_data[2]
366
+ ],
367
+ axis=0
368
+ ).reset_index(drop=True)
369
+
370
+ return ocr1_df_groups, ocr2_df_groups
evaluation/metrics.py ADDED
@@ -0,0 +1,589 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ from evaluation.iou import word_or_symbol_pair_matching, word_or_symbol_group_pair_matching
4
+
5
+
6
+ def text_accuracy(df, pref_1, pref_2):
7
+ return (df[f'{pref_1}text'] == df[f'{pref_2}text']).sum() / df.shape[0]
8
+
9
+
10
+ def text_precision(df, pref_1, pref_2):
11
+ ocr1_nonempty = df[f'{pref_1}text'].apply(lambda x: bool(x))
12
+ ocr1 = df[f'{pref_1}text']
13
+ ocr2 = df[f'{pref_2}text']
14
+ return (ocr1_nonempty & (ocr1 == ocr2)).sum() / ocr1_nonempty.sum()
15
+
16
+
17
+ def text_recall(df, pref_1, pref_2):
18
+ ocr2_nonempty = df[f'{pref_2}text'].apply(lambda x: bool(x))
19
+ ocr2 = df[f'{pref_1}text']
20
+ ocr1 = df[f'{pref_2}text']
21
+ return (ocr2_nonempty & (ocr2 == ocr1)).sum() / ocr2_nonempty.sum()
22
+
23
+
24
+ def text_f1(df, pref_1, pref_2):
25
+ precision = text_precision(df, pref_1, pref_2)
26
+ recall = text_recall(df, pref_1, pref_2)
27
+
28
+ if precision == 0 or recall == 0:
29
+ f1 = 0.0
30
+ else:
31
+ f1 = (2 * precision * recall) / (precision + recall)
32
+
33
+ return f1
34
+
35
+
36
+ def symbol_confusion_matrix(df, pref_1, pref_2):
37
+ all_symbols = list(sorted(set(df[f'{pref_1}text'].tolist() + df[f'{pref_2}text'].tolist())))
38
+ pair_value_counts = df[
39
+ [f'{pref_1}text', f'{pref_2}text']
40
+ ].value_counts()
41
+
42
+ pair_cnts = pair_value_counts.reset_index().rename({0: "count"}, axis=1).sort_values(
43
+ by=[f'{pref_1}text', f'{pref_2}text'], ascending=True)
44
+
45
+ pair_value_counts_dict = pair_value_counts.to_dict()
46
+
47
+ confusion_matrix = pd.DataFrame(
48
+ [
49
+ [pair_value_counts_dict.get((symbol1, symbol2), 0) for symbol2 in all_symbols]
50
+ for symbol1 in all_symbols
51
+ ],
52
+ columns=all_symbols,
53
+ index=all_symbols,
54
+ )
55
+
56
+ return confusion_matrix, pair_cnts
57
+
58
+
59
+ def levenstein(text1, text2):
60
+ """Measures the metrics based on edit operations.
61
+ - levenstein_distance: number of character operations (insertion, deletion, substitution) that
62
+ required to get text2 from text1
63
+ - levenstein_similarity: number of matches divided by the number of all operations (fraction of characters that
64
+ don't require modification while transforming text1 into text2)
65
+ - edit_operations: list of character operations (<operation name>, <text1 character>, <text2 character>)
66
+ """
67
+ levenstein_distance, edit_operations = edit_distance(text1, text2)
68
+ if levenstein_distance == 0:
69
+ levenstein_similarity = 1.0
70
+ else:
71
+ matches_cnt = len([item for item in edit_operations if item[0] == "match"])
72
+ all_operations_cnt = len(edit_operations)
73
+
74
+ if matches_cnt == 0:
75
+ levenstein_similarity = 0.0
76
+ else:
77
+ levenstein_similarity = float(matches_cnt / all_operations_cnt)
78
+
79
+ return levenstein_similarity, levenstein_distance, edit_operations
80
+
81
+
82
+ def edit_distance(text1, text2):
83
+ """
84
+ we have three allowed edit operations:
85
+ - Insert a character
86
+ - Delete a character
87
+ - Substitute a character
88
+ Each of these operations has cost of 1
89
+ Our goal is to minimize number of required operations to convert text1 into text2
90
+ This DP problem which is being solved with 2d array (NxM) where N is the length of text1 and M - length of
91
+ text2.
92
+
93
+ DP[i][j]: this is minimum amount of operations to convert text1[:i] into text2[:j]
94
+ The update rule is the following:
95
+ DP[i][j] = min of the following
96
+
97
+ case 1: DP[i-1][j-1] # match
98
+ case 2: DP[i-1][j] + 1 # insertion,
99
+ case 3: DP[i][j-1] + 1 # deletion
100
+ case 4: DP[i-1][j-1] + 1 # substitution
101
+
102
+ Example:
103
+ text1 = "horse"
104
+ text2 = "ros"
105
+
106
+ DP _ r o s
107
+ _ [0, 1, 2, 3]
108
+ h [1, 1, 2, 3]
109
+ o [2, 2, 1, 2]
110
+ r [3, 2, 2, 2]
111
+ s [4, 3, 3, 2]
112
+ e [5, 4, 4, 3]
113
+ """
114
+ if not text1:
115
+ return len(text2), []
116
+ elif not text2:
117
+ return len(text1), []
118
+
119
+ INF = 10 ** 10
120
+ N = len(text1)
121
+ M = len(text2)
122
+
123
+ DP = [[INF for _ in range(M + 1)] for _ in range(N + 1)]
124
+ P = [[None for _ in range(M + 1)] for _ in range(N + 1)]
125
+
126
+ for i in range(N + 1):
127
+ DP[i][0] = i
128
+ P[i][0] = "insertion"
129
+ for j in range(M + 1):
130
+ DP[0][j] = j
131
+ P[0][j] = "deletion"
132
+
133
+ for j in range(1, M + 1):
134
+ for i in range(1, N + 1):
135
+
136
+ pair_mismatch = int(text1[i - 1] != text2[j - 1])
137
+ match_case = None
138
+ match_cost = INF
139
+
140
+ # match
141
+ if match_cost > DP[i - 1][j - 1] + pair_mismatch:
142
+ match_cost = DP[i - 1][j - 1] + pair_mismatch
143
+ match_case = "substitution" if pair_mismatch == 1 else "match"
144
+
145
+ # insertion
146
+ if match_cost > DP[i - 1][j] + 1:
147
+ match_cost = DP[i - 1][j] + 1
148
+ match_case = "insertion"
149
+
150
+ # deletion
151
+ if match_cost > DP[i][j - 1] + 1:
152
+ match_cost = DP[i][j - 1] + 1
153
+ match_case = "deletion"
154
+
155
+ DP[i][j] = match_cost
156
+ P[i][j] = match_case
157
+
158
+ operations = []
159
+ i = N
160
+ j = M
161
+ while (i >= 0 and j >= 0) and not (i == 0 and j == 0):
162
+ if P[i][j] == "substitution":
163
+ operations.append(("substitution", text1[i - 1] if i - 1 >= 0 else "",
164
+ text2[j - 1] if j - 1 >= 0 else "", i - 1, j - 1))
165
+ i -= 1
166
+ j -= 1
167
+ elif P[i][j] == "match":
168
+ operations.append(
169
+ ("match", text1[i - 1] if i - 1 >= 0 else "", text2[j - 1] if j - 1 >= 0 else "", i - 1, j - 1))
170
+ i -= 1
171
+ j -= 1
172
+ elif P[i][j] == "insertion":
173
+ operations.append(("insertion", text1[i - 1] if i - 1 >= 0 else "",
174
+ "", i - 1, j - 1))
175
+ i -= 1
176
+ elif P[i][j] == "deletion":
177
+ operations.append(("deletion", "",
178
+ text2[j - 1] if j - 1 >= 0 else "", i - 1, j - 1))
179
+ j -= 1
180
+
181
+ levenstein_distance = DP[N][M]
182
+ operations = operations[::-1]
183
+
184
+ return levenstein_distance, operations
185
+
186
+
187
+ def levenstein_metrics(df, pref_1="Pred_", pref_2='Tar_'):
188
+ levenstein_results = df[[f'{pref_1}text', f'{pref_2}text']].apply(
189
+ lambda x: levenstein(text1=x[f'{pref_1}text'], text2=x[f'{pref_2}text']),
190
+ axis=1
191
+ )
192
+ levenstein_similarities = levenstein_results.apply(lambda x: x[0])
193
+ levenstein_distances = levenstein_results.apply(lambda x: x[1])
194
+ edit_operations = levenstein_results.apply(lambda x: x[2])
195
+
196
+ return levenstein_similarities, levenstein_distances, edit_operations
197
+
198
+
199
+ def evaluate_by_words(pred_df, target_df, pred_pref='Pred_', target_pref='Target_', **kwargs):
200
+ if not pred_df.empty and not target_df.empty:
201
+
202
+ show_hist = kwargs.get("show_hist", False)
203
+ text_pairs = word_or_symbol_pair_matching(df1=pred_df, df2=target_df, pref1=pred_pref, pref2=target_pref)
204
+ levenstein_similarities, levenstein_distances, edit_operations = levenstein_metrics(
205
+ df=text_pairs, pref_1=pred_pref, pref_2=target_pref
206
+ )
207
+
208
+ levenstein_similarities_stats = {
209
+ **levenstein_similarities.describe().to_dict(),
210
+ "values": levenstein_similarities.tolist()
211
+ }
212
+ levenstein_distances_stats = {
213
+ **levenstein_distances.describe().to_dict(),
214
+ "values": levenstein_distances.tolist()
215
+ }
216
+ iou_stats = {
217
+ **text_pairs.iou.describe().to_dict(),
218
+ "values": text_pairs.iou.tolist()
219
+ }
220
+ edit_operations_stats = {
221
+ operation_id: pd.Series(
222
+ edit_operations.apply(
223
+ lambda x: [f"[{item[1]}]_[{item[2]}]" for item in x if item[0] == operation_id]
224
+ ).sum(axis=0)).value_counts().to_dict()
225
+ for operation_id in ["insertion", "deletion", "substitution"]
226
+ }
227
+
228
+ if show_hist is True:
229
+ pd.Series(levenstein_similarities).plot(kind='hist', bins=20, title="Levestein Similarities")
230
+ pd.Series(levenstein_distances).plot(kind='hist', bins=20, title="Levestein Distances")
231
+ for edit_operation_id, edit_operation_data in edit_operations_stats.items():
232
+ pd.Series(edit_operation_data).plot(kind='barh', title=f"{edit_operation_id.capitalize()} Stats")
233
+
234
+ report = {
235
+ "accuracy": text_accuracy(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
236
+ "precision": text_precision(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
237
+ "recall": text_recall(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
238
+ "f1": text_f1(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
239
+ "levenstein_distances_stats": levenstein_distances_stats,
240
+ "levenstein_similarities_stats": levenstein_similarities_stats,
241
+ "iou_stats": iou_stats,
242
+ "edit_operations_stats": edit_operations_stats,
243
+ }
244
+ else:
245
+ report = {
246
+ "accuracy": None,
247
+ "precision": None,
248
+ "recall": None,
249
+ "f1": None,
250
+ "levenstein_distances_stats": {},
251
+ "levenstein_similarities_stats": {},
252
+ "iou_stats": {},
253
+ "edit_operations_stats": {key: {} for key in ["insertion", "deletion", "substitution"]},
254
+ }
255
+
256
+ return report
257
+
258
+
259
+ def evaluate_by_word_groups(pred_df, target_df, pred_pref='Pred_', target_pref='Target_', **kwargs):
260
+ if not pred_df.empty and not target_df.empty:
261
+
262
+ show_hist = kwargs.get("show_hist", False)
263
+ text_pairs = word_or_symbol_group_pair_matching(df1=pred_df, df2=target_df, pref1=pred_pref, pref2=target_pref)
264
+ levenstein_similarities, levenstein_distances, edit_operations = levenstein_metrics(
265
+ df=text_pairs, pref_1=pred_pref, pref_2=target_pref
266
+ )
267
+
268
+ levenstein_similarities_stats = {
269
+ **levenstein_similarities.describe().to_dict(),
270
+ "values": levenstein_similarities.tolist()
271
+ }
272
+ levenstein_distances_stats = {
273
+ **levenstein_distances.describe().to_dict(),
274
+ "values": levenstein_distances.tolist()
275
+ }
276
+ iou_stats = {
277
+ **text_pairs.iou.describe().to_dict(),
278
+ "values": text_pairs.iou.tolist()
279
+ }
280
+ edit_operations_stats = {
281
+ operation_id: pd.Series(
282
+ edit_operations.apply(
283
+ lambda x: [f"[{item[1]}]_[{item[2]}]" for item in x if item[0] == operation_id]
284
+ ).sum(axis=0)).value_counts().to_dict()
285
+ for operation_id in ["insertion", "deletion", "substitution"]
286
+ }
287
+
288
+ if show_hist is True:
289
+ pd.Series(levenstein_similarities).plot(kind='hist', bins=20, title="Levestein Similarities")
290
+ pd.Series(levenstein_distances).plot(kind='hist', bins=20, title="Levestein Distances")
291
+ for edit_operation_id, edit_operation_data in edit_operations_stats.items():
292
+ pd.Series(edit_operation_data).plot(kind='barh', title=f"{edit_operation_id.capitalize()} Stats")
293
+
294
+ report = {
295
+ "accuracy": text_accuracy(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
296
+ "precision": text_precision(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
297
+ "recall": text_recall(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
298
+ "f1": text_f1(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
299
+ "levenstein_distances_stats": levenstein_distances_stats,
300
+ "levenstein_similarities_stats": levenstein_similarities_stats,
301
+ "iou_stats": iou_stats,
302
+ "edit_operations_stats": edit_operations_stats,
303
+ }
304
+ else:
305
+ report = {
306
+ "accuracy": None,
307
+ "precision": None,
308
+ "recall": None,
309
+ "f1": None,
310
+ "levenstein_distances_stats": {},
311
+ "levenstein_similarities_stats": {},
312
+ "iou_stats": {},
313
+ "edit_operations_stats": {key: {} for key in ["insertion", "deletion", "substitution"]},
314
+ }
315
+
316
+ return report
317
+
318
+
319
+ def reduce_word_evaluation_results(eval_results):
320
+ if eval_results:
321
+ accuracies = pd.Series([item['accuracy'] for item in eval_results])
322
+ precisions = pd.Series([item['precision'] for item in eval_results])
323
+ recalls = pd.Series([item['recall'] for item in eval_results])
324
+ f1s = pd.Series([item['f1'] for item in eval_results])
325
+ levenstein_similarities = pd.Series(
326
+ [
327
+ pd.Series(item['levenstein_similarities_stats'].get('values', [])).mean()
328
+ for item in eval_results
329
+ ]
330
+ )
331
+ levenstein_distances = pd.Series(
332
+ [
333
+ pd.Series(item['levenstein_distances_stats'].get('values', [])).mean()
334
+ for item in eval_results
335
+ ]
336
+ )
337
+ ious = pd.Series(
338
+ [
339
+ pd.Series(item['iou_stats'].get('values', [])).mean()
340
+ for item in eval_results
341
+ ]
342
+ )
343
+
344
+ levenstein_similarities_stats = {
345
+ **levenstein_similarities.describe().to_dict(),
346
+ "values": levenstein_similarities.tolist()
347
+ }
348
+ levenstein_distances_stats = {
349
+ **levenstein_distances.describe().to_dict(),
350
+ "values": levenstein_distances.tolist()
351
+ }
352
+ iou_stats = {
353
+ **ious.describe().to_dict(),
354
+ "values": ious.tolist()
355
+ }
356
+
357
+ edit_operations_stats = {}
358
+ for eval_result in eval_results:
359
+ for edit_operation, edit_operation_data in eval_result['edit_operations_stats'].items():
360
+ if edit_operation not in edit_operations_stats:
361
+ edit_operations_stats[edit_operation] = {}
362
+
363
+ for key, count in edit_operation_data.items():
364
+ edit_operations_stats[edit_operation][key] = edit_operations_stats[edit_operation].get(key,
365
+ 0) + count
366
+
367
+ summary = {
368
+ "accuracy": {
369
+ "mean": accuracies.mean(),
370
+ "std": accuracies.std(),
371
+ "values": accuracies.tolist()
372
+ },
373
+ "precision": {
374
+ "mean": precisions.mean(),
375
+ "std": precisions.std(),
376
+ "values": precisions.tolist(),
377
+ },
378
+ "recall": {
379
+ "mean": recalls.mean(),
380
+ "std": recalls.std(),
381
+ "values": recalls.tolist(),
382
+ },
383
+ "f1": {
384
+ "mean": f1s.mean(),
385
+ "std": f1s.std(),
386
+ "values": f1s.tolist(),
387
+ },
388
+ "document_count": len(eval_results),
389
+ "levenstein_distances_stats": levenstein_distances_stats,
390
+ "levenstein_similarities_stats": levenstein_similarities_stats,
391
+ "iou_stats": iou_stats,
392
+ "edit_operations_stats": edit_operations_stats,
393
+ }
394
+
395
+
396
+ else:
397
+ summary = {
398
+ "accuracy": {},
399
+ "precision": {},
400
+ "recall": {},
401
+ "f1": {},
402
+ "document_count": 0,
403
+ "levenstein_distances_stats": {},
404
+ "levenstein_similarities_stats": {},
405
+ "iou_stats": {},
406
+ "edit_operations_stats": {key: {} for key in ["insertion", "deletion", "substitution"]},
407
+ }
408
+
409
+ return summary
410
+
411
+
412
+ def evaluate_by_symbols(pred_df, target_df, pred_pref='Pred_', target_pref='Target_', **kwargs):
413
+ if not pred_df.empty and not target_df.empty:
414
+
415
+ show_hist = kwargs.get("show_hist", False)
416
+ text_pairs = word_or_symbol_pair_matching(df1=pred_df, df2=target_df, pref1=pred_pref, pref2=target_pref)
417
+
418
+ confusion_matrix, pair_counts = symbol_confusion_matrix(text_pairs, pref_1=pred_pref, pref_2=target_pref)
419
+
420
+ iou_stats = {
421
+ **text_pairs.iou.describe().to_dict(),
422
+ "values": text_pairs.iou.tolist()
423
+ }
424
+
425
+ if show_hist is True:
426
+ pd.Series(pair_counts).plot(kind='barh', title="Symbol Pair Counts")
427
+
428
+ report = {
429
+ "accuracy": text_accuracy(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
430
+ "precision": text_precision(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
431
+ "recall": text_recall(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
432
+ "f1": text_f1(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
433
+ "confusion_matrix": confusion_matrix,
434
+ "pair_counts": pair_counts,
435
+ "iou_stats": iou_stats,
436
+ }
437
+ else:
438
+ report = {
439
+ "accuracy": None,
440
+ "precision": None,
441
+ "recall": None,
442
+ "f1": None,
443
+ "confusion_matrix": pd.DataFrame(),
444
+ "pair_counts": pd.DataFrame(),
445
+ "iou_stats": {},
446
+ }
447
+
448
+ return report
449
+
450
+
451
+ def reduce_pair_counts(pair_counts):
452
+ reduced_pair_counts_df = pd.DataFrame()
453
+ columns = []
454
+ if pair_counts:
455
+ pair_counts_dict = {}
456
+ for pair_count in pair_counts:
457
+ if not pair_count.empty:
458
+ pair_count_dict = pair_count.set_index(pair_count.columns[:-1].tolist(), drop=True).to_dict()[
459
+ pair_count.columns[-1]]
460
+ columns = pair_count.columns.tolist()
461
+ else:
462
+ pair_count_dict = {}
463
+
464
+ for key, value in pair_count_dict.items():
465
+ pair_counts_dict[key] = pair_counts_dict.get(key, 0) + value
466
+
467
+ reduced_pair_counts_df = pd.Series(pair_counts_dict).to_frame().reset_index()
468
+ if columns:
469
+ reduced_pair_counts_df.columns = columns
470
+
471
+ return reduced_pair_counts_df
472
+
473
+
474
+ def reduce_confusion_matrices(confusion_matrices):
475
+ reduced_confusion_matrices_df = pd.DataFrame()
476
+ if confusion_matrices:
477
+ all_index_values = set()
478
+ confusion_matrices_dict = {}
479
+ for confusion_matrix in confusion_matrices:
480
+ if not confusion_matrix.empty:
481
+ confusion_matrix_dict = {
482
+ (index, column): confusion_matrix.loc[index, column]
483
+ for index in confusion_matrix.index
484
+ for column in confusion_matrix.columns
485
+ }
486
+ else:
487
+ confusion_matrix_dict = {}
488
+
489
+ for key, value in confusion_matrix_dict.items():
490
+ all_index_values.add(key[0])
491
+ all_index_values.add(key[1])
492
+ confusion_matrices_dict[key] = confusion_matrices_dict.get(key, 0) + value
493
+
494
+ all_index_values = list(sorted(list(all_index_values)))
495
+ reduced_confusion_matrices_df = pd.DataFrame(
496
+ [
497
+ [
498
+ confusion_matrices_dict.get((index, column), 0)
499
+ for column in all_index_values
500
+ ]
501
+ for index in all_index_values
502
+ ],
503
+ columns=all_index_values,
504
+ index=all_index_values,
505
+ )
506
+
507
+ return reduced_confusion_matrices_df
508
+
509
+
510
+ def reduce_symbol_evaluation_results(eval_results):
511
+ """
512
+ all_symbols = list(sorted(set(df[f'{pref_1}text'].tolist() + df[f'{pref_2}text'].tolist())))
513
+ pair_value_counts = df[
514
+ [f'{pref_1}text', f'{pref_2}text']
515
+ ].value_counts()
516
+
517
+ pair_cnts = pair_value_counts.reset_index().rename({0: "count"}, axis=1).sort_values(
518
+ by=[f'{pref_1}text', f'{pref_2}text'], ascending=True)
519
+
520
+ pair_value_counts_dict = pair_value_counts.to_dict()
521
+
522
+ confusion_matrix = pd.DataFrame(
523
+ [
524
+ [pair_value_counts_dict.get((symbol1, symbol2), 0) for symbol2 in all_symbols]
525
+ for symbol1 in all_symbols
526
+ ],
527
+ columns=all_symbols,
528
+ index=all_symbols,
529
+ )
530
+ """
531
+ if eval_results:
532
+ accuracies = pd.Series([item['accuracy'] for item in eval_results])
533
+ precisions = pd.Series([item['precision'] for item in eval_results])
534
+ recalls = pd.Series([item['recall'] for item in eval_results])
535
+ f1s = pd.Series([item['f1'] for item in eval_results])
536
+ confusion_matrices = [item['confusion_matrix'] for item in eval_results]
537
+ pair_counts = [item['pair_counts'] for item in eval_results]
538
+ ious = pd.Series(
539
+ [
540
+ pd.Series(item['iou_stats'].get('values', [])).mean()
541
+ for item in eval_results
542
+ ]
543
+ )
544
+
545
+ iou_stats = {
546
+ **ious.describe().to_dict(),
547
+ "values": ious.tolist()
548
+ }
549
+
550
+ summary = {
551
+ "accuracy": {
552
+ "mean": accuracies.mean(),
553
+ "std": accuracies.std(),
554
+ "values": accuracies.tolist()
555
+ },
556
+ "precision": {
557
+ "mean": precisions.mean(),
558
+ "std": precisions.std(),
559
+ "values": precisions.tolist(),
560
+ },
561
+ "recall": {
562
+ "mean": recalls.mean(),
563
+ "std": recalls.std(),
564
+ "values": recalls.tolist(),
565
+ },
566
+ "f1": {
567
+ "mean": f1s.mean(),
568
+ "std": f1s.std(),
569
+ "values": f1s.tolist(),
570
+ },
571
+ "document_count": len(eval_results),
572
+ "pair_counts": reduce_pair_counts(pair_counts),
573
+ "confusion_matrix": reduce_confusion_matrices(confusion_matrices),
574
+ "iou_stats": iou_stats,
575
+ }
576
+
577
+ else:
578
+ summary = {
579
+ "accuracy": {},
580
+ "precision": {},
581
+ "recall": {},
582
+ "f1": {},
583
+ "document_count": 0,
584
+ "pair_counts": pd.DataFrame(),
585
+ "confusion_matrix": pd.DataFrame(),
586
+ "iou_stats": {},
587
+ }
588
+
589
+ return summary
iliauniiccocrevaluation.py CHANGED
@@ -13,51 +13,48 @@
13
  # limitations under the License.
14
  """TODO: Add a description here."""
15
 
16
- import evaluate
17
  import datasets
18
-
19
 
20
  # TODO: Add BibTeX citation
21
- from cer import calculate_cer
 
22
 
23
  _CITATION = """\
24
  @InProceedings{huggingface:module,
25
- title = {A great new module},
26
- authors={huggingface, Inc.},
27
- year={2020}
28
  }
29
  """
30
 
31
  # TODO: Add description of the module here
32
  _DESCRIPTION = """\
33
- This new module is designed to solve this great ML task and is crafted with a lot of care.
 
 
34
  """
35
 
36
-
37
  # TODO: Add description of the arguments of the module here
38
  _KWARGS_DESCRIPTION = """
39
  Calculates how good are predictions given some references, using certain scores
40
  Args:
41
- predictions: list of predictions to score. Each predictions
42
- should be a string with tokens separated by spaces.
43
- references: list of reference for each prediction. Each
44
- reference should be a string with tokens separated by spaces.
45
  Returns:
46
- accuracy: description of the first score,
47
- another_score: description of the second score,
48
  Examples:
49
  Examples should be written in doctest format, and should illustrate how
50
  to use the function.
51
 
52
- >>> my_new_module = evaluate.load("my_new_module")
53
- >>> results = my_new_module.compute(references=[0, 1], predictions=[0, 1])
 
 
54
  >>> print(results)
55
  {'accuracy': 1.0}
56
  """
57
 
58
- # TODO: Define external resources urls if needed
59
- BAD_WORDS_URL = "http://url/to/external/resource/bad_words.txt"
60
-
61
 
62
  @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
63
  class IliauniIccOCREvaluation(evaluate.Metric):
@@ -72,10 +69,52 @@ class IliauniIccOCREvaluation(evaluate.Metric):
72
  citation=_CITATION,
73
  inputs_description=_KWARGS_DESCRIPTION,
74
  # This defines the format of each prediction and reference
75
- features=datasets.Features({
76
- 'predictions': datasets.Value('int64'),
77
- 'references': datasets.Value('int64'),
78
- }),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  # Homepage of the module for documentation
80
  homepage="http://module.homepage",
81
  # Additional links to the codebase or references
@@ -90,9 +129,14 @@ class IliauniIccOCREvaluation(evaluate.Metric):
90
 
91
  def _compute(self, predictions, references):
92
  """Returns the scores"""
93
- # TODO: Compute the different scores of the module
94
- cer = calculate_cer(predictions, references)
95
 
96
- return {
97
- "CER": cer,
98
- }
 
 
 
 
 
 
 
 
13
  # limitations under the License.
14
  """TODO: Add a description here."""
15
 
 
16
  import datasets
17
+ import evaluate
18
 
19
  # TODO: Add BibTeX citation
20
+ from evaluation.metrics import evaluate_by_words
21
+ from ocr.fiftyone import FiftyOneOcr
22
 
23
  _CITATION = """\
24
  @InProceedings{huggingface:module,
25
+ title = {Iliauni ICC OCR Evaluation},
26
+ authors={},
27
+ year={2022}
28
  }
29
  """
30
 
31
  # TODO: Add description of the module here
32
  _DESCRIPTION = """\
33
+ Better OCR evaluation metric that enables to evaluate OCR results in various ways. It is robust in a way that
34
+ it matches the words using their bounding boxes instead of using plain edit distance matching between two texts.
35
+ Elaborate more on this later.
36
  """
37
 
 
38
  # TODO: Add description of the arguments of the module here
39
  _KWARGS_DESCRIPTION = """
40
  Calculates how good are predictions given some references, using certain scores
41
  Args:
42
+ predictions: list of OCR detections in FiftyOne dataset format.
43
+ references: list of OCR detections in FiftyOne dataset format.
 
 
44
  Returns:
45
+ evaluation_results: dictionary containing multiple metrics
 
46
  Examples:
47
  Examples should be written in doctest format, and should illustrate how
48
  to use the function.
49
 
50
+ >>> dataset = load_dataset("anz2/iliauni_icc_georgian_ocr", use_auth_token="<auth token here>")
51
+ >>> sample = dataset['test'][0]
52
+ >>> ocr_evaluator = evaluate.load("iliauniiccocrevaluation")
53
+ >>> results = ocr_evaluator.compute(references=[sample], predictions=[0, 1])
54
  >>> print(results)
55
  {'accuracy': 1.0}
56
  """
57
 
 
 
 
58
 
59
  @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
60
  class IliauniIccOCREvaluation(evaluate.Metric):
 
69
  citation=_CITATION,
70
  inputs_description=_KWARGS_DESCRIPTION,
71
  # This defines the format of each prediction and reference
72
+ features=datasets.Features(
73
+ {
74
+ "id": datasets.Value("string"),
75
+ "filepath": datasets.Value("string"),
76
+ "tags": datasets.Sequence(datasets.Value("string")),
77
+ "metadata": datasets.Features(
78
+ {
79
+ "size_bytes": datasets.Value("int32"),
80
+ "mime_type": datasets.Value("string"),
81
+ "width": datasets.Value("int32"),
82
+ "height": datasets.Value("int32"),
83
+ "num_channels": datasets.Value("int32"),
84
+ "author": datasets.Value("string"),
85
+ "category": datasets.Value("string"),
86
+ "document_name": datasets.Value("string"),
87
+ "source": datasets.Value("string"),
88
+ "year": datasets.Value("int32")
89
+ }
90
+ ),
91
+ "_media_type": datasets.Value("string"),
92
+ "_rand": datasets.Value("string"),
93
+ "detections": datasets.Features(
94
+ {
95
+ "detections": datasets.Sequence(
96
+ datasets.Features(
97
+ {
98
+ "id": datasets.Value("string"),
99
+ "attributes": datasets.Sequence(datasets.Value("string")),
100
+ "tags": datasets.Value("string"),
101
+ "label": datasets.Value("string"),
102
+ "bounding_box": datasets.Sequence(datasets.Value("float32")),
103
+ "confidence": datasets.Value("float32"),
104
+ "index": datasets.Value("int32"),
105
+ "page": datasets.Value("int32"),
106
+ "block": datasets.Value("int32"),
107
+ "paragraph": datasets.Value("int32"),
108
+ "word": datasets.Value("int32"),
109
+ "text": datasets.Value("string"),
110
+ }
111
+ )
112
+ )
113
+ }
114
+ ),
115
+ "image": datasets.Image(),
116
+ }
117
+ ),
118
  # Homepage of the module for documentation
119
  homepage="http://module.homepage",
120
  # Additional links to the codebase or references
 
129
 
130
  def _compute(self, predictions, references):
131
  """Returns the scores"""
 
 
132
 
133
+ assert len(predictions) == len(references)
134
+ eval_results = []
135
+ for prediction, reference in zip(predictions, references):
136
+ prediction_df = FiftyOneOcr(data=prediction).get_word_annotations(convert_bbox=True)
137
+ reference_df = FiftyOneOcr(data=reference).get_word_annotations(convert_bbox=True)
138
+
139
+ eval_result = evaluate_by_words(prediction_df, reference_df, pref1="Pred_", pref2="Tar_")
140
+ eval_results.append(eval_result)
141
+
142
+ return eval_results
ocr/fiftyone.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+
4
+
5
+ class FiftyOneOcr:
6
+ def __init__(self, data):
7
+ self.data = data
8
+
9
+ def get_word_annotations(self, convert_bbox: bool = True) -> pd.DataFrame:
10
+ """Returns dataframe of detections where each row represents independent word annotation
11
+
12
+ Args:
13
+ convert_bbox: FiftyOne bounding box type (x1, x2, dx, xy) to 2 point bounding box type (x1, y1, x2, y2)
14
+ """
15
+
16
+ annotations = self.data.get("detections", {}).get("detections", {})
17
+
18
+ annotations_df = pd.DataFrame(annotations)
19
+
20
+ # convert bounding box into 2 point values format
21
+ if convert_bbox:
22
+ bbox = np.array(annotations_df['bounding_box'].values.tolist())
23
+ bbox[:, 2:] += bbox[:, :2]
24
+ annotations_df['bounding_box'] = bbox.tolist()
25
+
26
+ return annotations_df