Spaces:
Runtime error
Runtime error
install evaluation package from git separately. remove packages that were displaced on separete repository.
Browse files- evaluation/iou.py +0 -370
- evaluation/metrics.py +0 -589
- iliauniiccocrevaluation.py +2 -2
- ocr/fiftyone.py +0 -26
- requirements.txt +2 -1
evaluation/iou.py
DELETED
@@ -1,370 +0,0 @@
|
|
1 |
-
# https://www.pyimagesearch.com/2016/11/07/intersection-over-union-iou-for-object-detection/
|
2 |
-
|
3 |
-
import numpy as np
|
4 |
-
import pandas as pd
|
5 |
-
from scipy.sparse import csr_matrix
|
6 |
-
from scipy.sparse.csgraph import connected_components
|
7 |
-
|
8 |
-
|
9 |
-
def bb_intersection_over_union(boxA, boxB):
|
10 |
-
EPS = 1e-5
|
11 |
-
# determine the (x, y)-coordinates of the intersection rectangle
|
12 |
-
xA = max(boxA[0], boxB[0])
|
13 |
-
yA = max(boxA[1], boxB[1])
|
14 |
-
xB = min(boxA[2], boxB[2])
|
15 |
-
yB = min(boxA[3], boxB[3])
|
16 |
-
# compute the area of intersection rectangle
|
17 |
-
interArea = max(0, xB - xA + EPS) * max(0, yB - yA + EPS)
|
18 |
-
# compute the area of both the prediction and ground-truth
|
19 |
-
# rectangles
|
20 |
-
boxAArea = (boxA[2] - boxA[0] + EPS) * (boxA[3] - boxA[1] + EPS)
|
21 |
-
boxBArea = (boxB[2] - boxB[0] + EPS) * (boxB[3] - boxB[1] + EPS)
|
22 |
-
# compute the intersection over union by taking the intersection
|
23 |
-
# area and dividing it by the sum of prediction + ground-truth
|
24 |
-
# areas - the interesection area
|
25 |
-
iou = interArea / float(boxAArea + boxBArea - interArea)
|
26 |
-
# return the intersection over union value
|
27 |
-
return iou
|
28 |
-
|
29 |
-
|
30 |
-
def bb_intersection_over_union_vectorized(bboxes1, bboxes2):
|
31 |
-
low = np.s_[..., :2]
|
32 |
-
high = np.s_[..., 2:]
|
33 |
-
|
34 |
-
EPS = 1e-5
|
35 |
-
|
36 |
-
A, B = bboxes1.copy(), bboxes2.copy()
|
37 |
-
A = np.tile(A, (1, len(bboxes2))).reshape(len(bboxes1) * len(bboxes2), -1)
|
38 |
-
B = np.tile(B, (len(bboxes1), 1))
|
39 |
-
|
40 |
-
A[high] += EPS
|
41 |
-
B[high] += EPS
|
42 |
-
|
43 |
-
intrs = (
|
44 |
-
np.maximum(
|
45 |
-
0.0,
|
46 |
-
np.minimum(
|
47 |
-
A[high],
|
48 |
-
B[high]
|
49 |
-
)
|
50 |
-
-
|
51 |
-
np.maximum(
|
52 |
-
A[low],
|
53 |
-
B[low]
|
54 |
-
)
|
55 |
-
)
|
56 |
-
).prod(-1)
|
57 |
-
|
58 |
-
ious = intrs / ((A[high] - A[low]).prod(-1) + (B[high] - B[low]).prod(-1) - intrs)
|
59 |
-
|
60 |
-
return ious.reshape(len(bboxes1), len(bboxes2))
|
61 |
-
|
62 |
-
|
63 |
-
def bb_is_on_same_line_vectorized(bboxes1, bboxes2):
|
64 |
-
low = np.s_[..., 1]
|
65 |
-
high = np.s_[..., 3]
|
66 |
-
|
67 |
-
A, B = bboxes1.copy(), bboxes2.copy()
|
68 |
-
A = np.tile(A, (1, len(bboxes2))).reshape(len(bboxes1) * len(bboxes2), -1)
|
69 |
-
B = np.tile(B, (len(bboxes1), 1))
|
70 |
-
|
71 |
-
is_on_same_line = np.bitwise_and(
|
72 |
-
np.bitwise_and(A[low] <= (B[low] + B[high]) / 2, (B[low] + B[high]) / 2 <= A[high]),
|
73 |
-
np.bitwise_and(B[low] <= (A[low] + A[high]) / 2, (A[low] + A[high]) / 2 <= B[high]),
|
74 |
-
)
|
75 |
-
|
76 |
-
return is_on_same_line.reshape(len(bboxes1), len(bboxes2))
|
77 |
-
|
78 |
-
|
79 |
-
def iou(ocr1, ocr2):
|
80 |
-
return bb_intersection_over_union(
|
81 |
-
(ocr1['x1'], ocr1['y1'], ocr1['x2'], ocr1['y2']),
|
82 |
-
(ocr2['x1'], ocr2['y1'], ocr2['x2'], ocr2['y2'])
|
83 |
-
)
|
84 |
-
|
85 |
-
|
86 |
-
def _generate_empty_row(example_row, index):
|
87 |
-
"""This will generate empty row with empty values but it also generates tiny but valid bounding box
|
88 |
-
to avoid exceptions while cropping the image"""
|
89 |
-
|
90 |
-
example_row_dict = example_row.to_dict()
|
91 |
-
example_row_dict['page'] = example_row_dict.get('page', 0)
|
92 |
-
example_row_dict['block'] = 0
|
93 |
-
example_row_dict['paragraph'] = 0
|
94 |
-
example_row_dict['word'] = 0
|
95 |
-
example_row_dict['x1'] = 0
|
96 |
-
example_row_dict['y1'] = 0
|
97 |
-
example_row_dict['x2'] = 1
|
98 |
-
example_row_dict['y2'] = 1
|
99 |
-
example_row_dict['conf'] = 0.0
|
100 |
-
example_row_dict['text'] = ""
|
101 |
-
|
102 |
-
empty_row = pd.DataFrame([example_row_dict], columns=example_row.index, index=[index])
|
103 |
-
|
104 |
-
return empty_row
|
105 |
-
|
106 |
-
|
107 |
-
def word_or_symbol_pair_matching(df1, df2, pref1, pref2):
|
108 |
-
"""Applies IOU based matching of words or symbol elements using rectangular bounding boxes (x1,y1,x2,y2).
|
109 |
-
It sorts makes sure that matching between first and second set is unique which means that it's not allowed to have
|
110 |
-
one item in two different pairs. If pair isn't found then empty element is used as a pair. This way it's guaranteed
|
111 |
-
that word or symbol level matching is correctly evaluated. Pairs are generated in decreasing order of IOU values.
|
112 |
-
"""
|
113 |
-
# match word pairs by page
|
114 |
-
text_pairs_dfs_per_page = []
|
115 |
-
unique_page_ids = sorted(list(set(df1['page'].unique().tolist() + df2['page'].unique().tolist())))
|
116 |
-
|
117 |
-
for page_id in unique_page_ids:
|
118 |
-
# extract words for given page only
|
119 |
-
df1_page = df1[df1.page == page_id]
|
120 |
-
df2_page = df2[df2.page == page_id]
|
121 |
-
|
122 |
-
if not df1_page.empty and not df1_page.empty:
|
123 |
-
|
124 |
-
# calculate similarities
|
125 |
-
similarity_metrics = calculate_ious_fast(ocr1_df=df1_page, ocr2_df=df2_page)
|
126 |
-
similarities = []
|
127 |
-
for idx1, index1 in enumerate(df1_page.index):
|
128 |
-
for idx2, index2 in enumerate(df2_page.index):
|
129 |
-
similarities.append((index1, index2, similarity_metrics[idx1, idx2]))
|
130 |
-
|
131 |
-
# process pair similarities in decreasing order of similarity values
|
132 |
-
sorted_similarities = sorted(similarities, key=lambda x: -x[2])
|
133 |
-
paired_items_1 = set()
|
134 |
-
paired_items_2 = set()
|
135 |
-
pairs = []
|
136 |
-
for idx1, idx2, similarity in sorted_similarities:
|
137 |
-
if idx1 not in paired_items_1 and idx2 not in paired_items_2:
|
138 |
-
if similarity > 0.0:
|
139 |
-
paired_items_1.add(idx1)
|
140 |
-
paired_items_2.add(idx2)
|
141 |
-
pairs.append((idx1, idx2, similarity))
|
142 |
-
|
143 |
-
# add items as empty pairs which weren't matched but index is considered across all pages to avoid collisions
|
144 |
-
EMPTY_ITEM_INDEX = max(df1.shape[0], df2.shape[0]) + 100 + page_id
|
145 |
-
for idx1, row1 in df1_page.iterrows():
|
146 |
-
if idx1 not in paired_items_1:
|
147 |
-
pairs.append((idx1, EMPTY_ITEM_INDEX, 0.0))
|
148 |
-
for idx2, row2 in df2_page.iterrows():
|
149 |
-
if idx2 not in paired_items_2:
|
150 |
-
pairs.append((EMPTY_ITEM_INDEX, idx2, 0.0))
|
151 |
-
|
152 |
-
# sort pairs according to df2 items original indices
|
153 |
-
sorted_pairs = sorted(pairs, key=lambda x: (x[1], x[0]))
|
154 |
-
|
155 |
-
# create row for empty items in each dataframe
|
156 |
-
df1_page = pd.concat([df1_page, _generate_empty_row(example_row=df1_page.iloc[0], index=EMPTY_ITEM_INDEX)])
|
157 |
-
df2_page = pd.concat([df2_page, _generate_empty_row(example_row=df2_page.iloc[0], index=EMPTY_ITEM_INDEX)])
|
158 |
-
|
159 |
-
# generate pairs dataset
|
160 |
-
text_pairs_df = pd.concat(
|
161 |
-
[
|
162 |
-
df1_page.loc[[item[0] for item in sorted_pairs], :].reset_index(drop=True).add_prefix(pref1),
|
163 |
-
df2_page.loc[[item[1] for item in sorted_pairs], :].reset_index(drop=True).add_prefix(pref2),
|
164 |
-
pd.DataFrame(
|
165 |
-
data=[item[2] for item in sorted_pairs],
|
166 |
-
columns=["iou"]
|
167 |
-
)
|
168 |
-
],
|
169 |
-
axis=1
|
170 |
-
)
|
171 |
-
|
172 |
-
text_pairs_dfs_per_page.append(text_pairs_df)
|
173 |
-
|
174 |
-
all_text_pairs_df = pd.concat(text_pairs_dfs_per_page, axis=0)
|
175 |
-
|
176 |
-
return all_text_pairs_df
|
177 |
-
|
178 |
-
|
179 |
-
def word_or_symbol_group_pair_matching(df1, df2, pref1, pref2):
|
180 |
-
"""Applies IOU based matching of words or symbol elements groups using rectangular bounding boxes (x1,y1,x2,y2).
|
181 |
-
It sorts makes sure that matching between first and second set is unique which means that it's not allowed to have
|
182 |
-
one item in two different pairs. If pair isn't found then empty element is used as a pair. BUT the difference from
|
183 |
-
non-group approach is that here it's possible to match group of words or symbols on each other. This way it's
|
184 |
-
more guaranteed that OCR detected result is evaluated correctly.
|
185 |
-
|
186 |
-
Example:
|
187 |
-
Let's say we have 2 words: ["abc", "d"] and target has only one word: ["abcd"] then it's better to group first
|
188 |
-
two words and match them with the one target word. This way we try to evaluate the overall text detection
|
189 |
-
accuracy and not the actual symbol or word boundary detection.
|
190 |
-
|
191 |
-
Note: the grouping operation will happen on one line to avoid unpredictable results if word bounding boxes on
|
192 |
-
neighboring lines has some intersection.
|
193 |
-
"""
|
194 |
-
# match word pairs by page
|
195 |
-
text_pairs_dfs_per_page = []
|
196 |
-
unique_page_ids = sorted(list(set(df1['page'].unique().tolist() + df2['page'].unique().tolist())))
|
197 |
-
|
198 |
-
for page_id in unique_page_ids:
|
199 |
-
# extract words for given page only
|
200 |
-
df1_page = df1[df1.page == page_id]
|
201 |
-
df2_page = df2[df2.page == page_id]
|
202 |
-
|
203 |
-
if not df1_page.empty and not df1_page.empty:
|
204 |
-
df1_page_groups, df2_page_groups = get_connected_components(ocr1_df=df1_page, ocr2_df=df2_page)
|
205 |
-
|
206 |
-
# calculate similarities
|
207 |
-
similarity_metrics = calculate_ious_fast(ocr1_df=df1_page_groups, ocr2_df=df2_page_groups)
|
208 |
-
similarities = []
|
209 |
-
for idx1, index1 in enumerate(df1_page_groups.index):
|
210 |
-
for idx2, index2 in enumerate(df2_page_groups.index):
|
211 |
-
similarities.append((index1, index2, similarity_metrics[idx1, idx2]))
|
212 |
-
|
213 |
-
# process pair similarities in decreasing order of similarity values
|
214 |
-
sorted_similarities = sorted(similarities, key=lambda x: -x[2])
|
215 |
-
paired_items_1 = set()
|
216 |
-
paired_items_2 = set()
|
217 |
-
pairs = []
|
218 |
-
for idx1, idx2, similarity in sorted_similarities:
|
219 |
-
if idx1 not in paired_items_1 and idx2 not in paired_items_2:
|
220 |
-
if similarity > 0.0:
|
221 |
-
paired_items_1.add(idx1)
|
222 |
-
paired_items_2.add(idx2)
|
223 |
-
pairs.append((idx1, idx2, similarity))
|
224 |
-
|
225 |
-
# add items as empty pairs which weren't matched but index is considered across all pages to avoid collisions
|
226 |
-
EMPTY_ITEM_INDEX = max(df1.shape[0], df2.shape[0]) + 100 + page_id
|
227 |
-
for idx1, row1 in df1_page_groups.iterrows():
|
228 |
-
if idx1 not in paired_items_1:
|
229 |
-
pairs.append((idx1, EMPTY_ITEM_INDEX, 0.0))
|
230 |
-
for idx2, row2 in df2_page_groups.iterrows():
|
231 |
-
if idx2 not in paired_items_2:
|
232 |
-
pairs.append((EMPTY_ITEM_INDEX, idx2, 0.0))
|
233 |
-
|
234 |
-
# sort pairs according to df2 items original indices
|
235 |
-
sorted_pairs = sorted(pairs, key=lambda x: (x[1], x[0]))
|
236 |
-
|
237 |
-
# create row for empty items in each dataframe
|
238 |
-
df1_page_groups = pd.concat(
|
239 |
-
[df1_page_groups, _generate_empty_row(example_row=df1_page_groups.iloc[0], index=EMPTY_ITEM_INDEX)])
|
240 |
-
df2_page_groups = pd.concat(
|
241 |
-
[df2_page_groups, _generate_empty_row(example_row=df2_page_groups.iloc[0], index=EMPTY_ITEM_INDEX)])
|
242 |
-
|
243 |
-
# generate pairs dataset
|
244 |
-
text_pairs_df = pd.concat(
|
245 |
-
[
|
246 |
-
df1_page_groups.loc[[item[0] for item in sorted_pairs], :].reset_index(drop=True).add_prefix(pref1),
|
247 |
-
df2_page_groups.loc[[item[1] for item in sorted_pairs], :].reset_index(drop=True).add_prefix(pref2),
|
248 |
-
pd.DataFrame(
|
249 |
-
data=[item[2] for item in sorted_pairs],
|
250 |
-
columns=["iou"]
|
251 |
-
)
|
252 |
-
],
|
253 |
-
axis=1
|
254 |
-
)
|
255 |
-
|
256 |
-
text_pairs_dfs_per_page.append(text_pairs_df)
|
257 |
-
|
258 |
-
all_text_pairs_df = pd.concat(text_pairs_dfs_per_page, axis=0)
|
259 |
-
|
260 |
-
return all_text_pairs_df
|
261 |
-
|
262 |
-
def calculate_ious_fast(ocr1_df, ocr2_df):
|
263 |
-
ious = None
|
264 |
-
if not ocr1_df.empty and not ocr2_df.empty:
|
265 |
-
bboxes1 = np.array(ocr1_df["bounding_box"].values.tolist())
|
266 |
-
bboxes2 = np.array(ocr2_df["bounding_box"].values.tolist())
|
267 |
-
|
268 |
-
if len(bboxes1) > 0 and len(bboxes2) > 0:
|
269 |
-
ious = bb_intersection_over_union_vectorized(bboxes1=bboxes1, bboxes2=bboxes2)
|
270 |
-
|
271 |
-
return ious
|
272 |
-
|
273 |
-
|
274 |
-
def calculate_iosl_fast(ocr1_df, ocr2_df):
|
275 |
-
iosls = None
|
276 |
-
if not ocr1_df.empty and not ocr2_df.empty:
|
277 |
-
bboxes1 = np.array(ocr1_df["bounding_box"].values.tolist())
|
278 |
-
bboxes2 = np.array(ocr2_df["bounding_box"].values.tolist())
|
279 |
-
|
280 |
-
if len(bboxes1) > 0 and len(bboxes2) > 0:
|
281 |
-
iosls = bb_is_on_same_line_vectorized(bboxes1=bboxes1, bboxes2=bboxes2)
|
282 |
-
|
283 |
-
return iosls
|
284 |
-
|
285 |
-
|
286 |
-
def calculate_adjacency_matrix(ocr1_df, ocr2_df):
|
287 |
-
"""Calculates Adjacency Matrix based on IOU values and for two different sets of items. For each item the adjacency
|
288 |
-
is defined by the maximum IOU value. We do 2 sided approach since it can be the case that i is adjacent to j but j
|
289 |
-
isn't adjacent to i, so we generate adjacency matrix for directed graph"""
|
290 |
-
# concat both dataframes
|
291 |
-
ocr_df = pd.concat([ocr1_df, ocr2_df], axis=0).reset_index()
|
292 |
-
|
293 |
-
# calculate ious
|
294 |
-
ious = calculate_ious_fast(ocr1_df=ocr_df, ocr2_df=ocr_df)
|
295 |
-
|
296 |
-
# calculate `is on same line` property
|
297 |
-
iosls = calculate_iosl_fast(ocr1_df=ocr_df, ocr2_df=ocr_df)
|
298 |
-
|
299 |
-
# build adjacency matrix (1s and 0s)
|
300 |
-
adjacency_matrix = np.bitwise_and(ious > 0.0, iosls).astype(np.int)
|
301 |
-
|
302 |
-
return adjacency_matrix
|
303 |
-
|
304 |
-
|
305 |
-
def get_connected_components(ocr1_df, ocr2_df):
|
306 |
-
"""Apply connected component analysis and group items"""
|
307 |
-
|
308 |
-
def _aggregate_group_items_into_one(df):
|
309 |
-
if len(df) == 1:
|
310 |
-
return df
|
311 |
-
else:
|
312 |
-
_df = df.iloc[0, :]
|
313 |
-
_bboxes = np.array(df["bounding_box"].values.tolist())
|
314 |
-
|
315 |
-
|
316 |
-
_df["bounding_box"] = [
|
317 |
-
[
|
318 |
-
np.min(_bboxes[:, 0]),
|
319 |
-
np.min(_bboxes[:, 1]),
|
320 |
-
np.max(_bboxes[:, 2]),
|
321 |
-
np.max(_bboxes[:, 3]),
|
322 |
-
]
|
323 |
-
]
|
324 |
-
_df["confidence"] = df["confidence"].mean()
|
325 |
-
_df["text"] = " ".join(df["text"].tolist())
|
326 |
-
|
327 |
-
return _df
|
328 |
-
|
329 |
-
# 1. calculate adjacency matrix
|
330 |
-
adjacency_matrix = calculate_adjacency_matrix(ocr1_df=ocr1_df, ocr2_df=ocr2_df)
|
331 |
-
|
332 |
-
# 2. find connected components
|
333 |
-
n_components, labels = connected_components(csgraph=csr_matrix(adjacency_matrix), directed=False,
|
334 |
-
return_labels=True)
|
335 |
-
|
336 |
-
# 3. separate df1 and df2 items and group for each connected component
|
337 |
-
connected_component_groups = pd.Series(labels).to_frame().groupby(0).apply(
|
338 |
-
lambda x: {1: [item for item in x.index.tolist() if item < ocr1_df.shape[0]],
|
339 |
-
2: [item - len(ocr1_df) for item in x.index.tolist() if item >= ocr1_df.shape[0]]}).to_dict()
|
340 |
-
|
341 |
-
# 4. check if group of items are consecutive (Optional but interesting)
|
342 |
-
# assert np.all(pd.DataFrame(connected_component_groups).loc[1, :].apply(
|
343 |
-
# lambda x: sum(x) == (min(x) * 2 + (len(x) - 1)) * len(x) / 2 if x else True))
|
344 |
-
# assert np.all(pd.DataFrame(connected_component_groups).loc[2, :].apply(
|
345 |
-
# lambda x: sum(x) == (min(x) * 2 + (len(x) - 1)) * len(x) / 2 if x else True))
|
346 |
-
|
347 |
-
# 5. merge group items into one
|
348 |
-
ocr1_df_groups = pd.concat(
|
349 |
-
[
|
350 |
-
_aggregate_group_items_into_one(
|
351 |
-
ocr1_df.loc[group_data[1], :]
|
352 |
-
)
|
353 |
-
for group_id, group_data in connected_component_groups.items()
|
354 |
-
if group_data[1]
|
355 |
-
],
|
356 |
-
axis=0
|
357 |
-
).reset_index(drop=True)
|
358 |
-
|
359 |
-
ocr2_df_groups = pd.concat(
|
360 |
-
[
|
361 |
-
_aggregate_group_items_into_one(
|
362 |
-
ocr2_df.loc[group_data[2], :]
|
363 |
-
)
|
364 |
-
for group_id, group_data in connected_component_groups.items()
|
365 |
-
if group_data[2]
|
366 |
-
],
|
367 |
-
axis=0
|
368 |
-
).reset_index(drop=True)
|
369 |
-
|
370 |
-
return ocr1_df_groups, ocr2_df_groups
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
evaluation/metrics.py
DELETED
@@ -1,589 +0,0 @@
|
|
1 |
-
import pandas as pd
|
2 |
-
|
3 |
-
from evaluation.iou import word_or_symbol_pair_matching, word_or_symbol_group_pair_matching
|
4 |
-
|
5 |
-
|
6 |
-
def text_accuracy(df, pref_1, pref_2):
|
7 |
-
return (df[f'{pref_1}text'] == df[f'{pref_2}text']).sum() / df.shape[0]
|
8 |
-
|
9 |
-
|
10 |
-
def text_precision(df, pref_1, pref_2):
|
11 |
-
ocr1_nonempty = df[f'{pref_1}text'].apply(lambda x: bool(x))
|
12 |
-
ocr1 = df[f'{pref_1}text']
|
13 |
-
ocr2 = df[f'{pref_2}text']
|
14 |
-
return (ocr1_nonempty & (ocr1 == ocr2)).sum() / ocr1_nonempty.sum()
|
15 |
-
|
16 |
-
|
17 |
-
def text_recall(df, pref_1, pref_2):
|
18 |
-
ocr2_nonempty = df[f'{pref_2}text'].apply(lambda x: bool(x))
|
19 |
-
ocr2 = df[f'{pref_1}text']
|
20 |
-
ocr1 = df[f'{pref_2}text']
|
21 |
-
return (ocr2_nonempty & (ocr2 == ocr1)).sum() / ocr2_nonempty.sum()
|
22 |
-
|
23 |
-
|
24 |
-
def text_f1(df, pref_1, pref_2):
|
25 |
-
precision = text_precision(df, pref_1, pref_2)
|
26 |
-
recall = text_recall(df, pref_1, pref_2)
|
27 |
-
|
28 |
-
if precision == 0 or recall == 0:
|
29 |
-
f1 = 0.0
|
30 |
-
else:
|
31 |
-
f1 = (2 * precision * recall) / (precision + recall)
|
32 |
-
|
33 |
-
return f1
|
34 |
-
|
35 |
-
|
36 |
-
def symbol_confusion_matrix(df, pref_1, pref_2):
|
37 |
-
all_symbols = list(sorted(set(df[f'{pref_1}text'].tolist() + df[f'{pref_2}text'].tolist())))
|
38 |
-
pair_value_counts = df[
|
39 |
-
[f'{pref_1}text', f'{pref_2}text']
|
40 |
-
].value_counts()
|
41 |
-
|
42 |
-
pair_cnts = pair_value_counts.reset_index().rename({0: "count"}, axis=1).sort_values(
|
43 |
-
by=[f'{pref_1}text', f'{pref_2}text'], ascending=True)
|
44 |
-
|
45 |
-
pair_value_counts_dict = pair_value_counts.to_dict()
|
46 |
-
|
47 |
-
confusion_matrix = pd.DataFrame(
|
48 |
-
[
|
49 |
-
[pair_value_counts_dict.get((symbol1, symbol2), 0) for symbol2 in all_symbols]
|
50 |
-
for symbol1 in all_symbols
|
51 |
-
],
|
52 |
-
columns=all_symbols,
|
53 |
-
index=all_symbols,
|
54 |
-
)
|
55 |
-
|
56 |
-
return confusion_matrix, pair_cnts
|
57 |
-
|
58 |
-
|
59 |
-
def levenstein(text1, text2):
|
60 |
-
"""Measures the metrics based on edit operations.
|
61 |
-
- levenstein_distance: number of character operations (insertion, deletion, substitution) that
|
62 |
-
required to get text2 from text1
|
63 |
-
- levenstein_similarity: number of matches divided by the number of all operations (fraction of characters that
|
64 |
-
don't require modification while transforming text1 into text2)
|
65 |
-
- edit_operations: list of character operations (<operation name>, <text1 character>, <text2 character>)
|
66 |
-
"""
|
67 |
-
levenstein_distance, edit_operations = edit_distance(text1, text2)
|
68 |
-
if levenstein_distance == 0:
|
69 |
-
levenstein_similarity = 1.0
|
70 |
-
else:
|
71 |
-
matches_cnt = len([item for item in edit_operations if item[0] == "match"])
|
72 |
-
all_operations_cnt = len(edit_operations)
|
73 |
-
|
74 |
-
if matches_cnt == 0:
|
75 |
-
levenstein_similarity = 0.0
|
76 |
-
else:
|
77 |
-
levenstein_similarity = float(matches_cnt / all_operations_cnt)
|
78 |
-
|
79 |
-
return levenstein_similarity, levenstein_distance, edit_operations
|
80 |
-
|
81 |
-
|
82 |
-
def edit_distance(text1, text2):
|
83 |
-
"""
|
84 |
-
we have three allowed edit operations:
|
85 |
-
- Insert a character
|
86 |
-
- Delete a character
|
87 |
-
- Substitute a character
|
88 |
-
Each of these operations has cost of 1
|
89 |
-
Our goal is to minimize number of required operations to convert text1 into text2
|
90 |
-
This DP problem which is being solved with 2d array (NxM) where N is the length of text1 and M - length of
|
91 |
-
text2.
|
92 |
-
|
93 |
-
DP[i][j]: this is minimum amount of operations to convert text1[:i] into text2[:j]
|
94 |
-
The update rule is the following:
|
95 |
-
DP[i][j] = min of the following
|
96 |
-
|
97 |
-
case 1: DP[i-1][j-1] # match
|
98 |
-
case 2: DP[i-1][j] + 1 # insertion,
|
99 |
-
case 3: DP[i][j-1] + 1 # deletion
|
100 |
-
case 4: DP[i-1][j-1] + 1 # substitution
|
101 |
-
|
102 |
-
Example:
|
103 |
-
text1 = "horse"
|
104 |
-
text2 = "ros"
|
105 |
-
|
106 |
-
DP _ r o s
|
107 |
-
_ [0, 1, 2, 3]
|
108 |
-
h [1, 1, 2, 3]
|
109 |
-
o [2, 2, 1, 2]
|
110 |
-
r [3, 2, 2, 2]
|
111 |
-
s [4, 3, 3, 2]
|
112 |
-
e [5, 4, 4, 3]
|
113 |
-
"""
|
114 |
-
if not text1:
|
115 |
-
return len(text2), []
|
116 |
-
elif not text2:
|
117 |
-
return len(text1), []
|
118 |
-
|
119 |
-
INF = 10 ** 10
|
120 |
-
N = len(text1)
|
121 |
-
M = len(text2)
|
122 |
-
|
123 |
-
DP = [[INF for _ in range(M + 1)] for _ in range(N + 1)]
|
124 |
-
P = [[None for _ in range(M + 1)] for _ in range(N + 1)]
|
125 |
-
|
126 |
-
for i in range(N + 1):
|
127 |
-
DP[i][0] = i
|
128 |
-
P[i][0] = "insertion"
|
129 |
-
for j in range(M + 1):
|
130 |
-
DP[0][j] = j
|
131 |
-
P[0][j] = "deletion"
|
132 |
-
|
133 |
-
for j in range(1, M + 1):
|
134 |
-
for i in range(1, N + 1):
|
135 |
-
|
136 |
-
pair_mismatch = int(text1[i - 1] != text2[j - 1])
|
137 |
-
match_case = None
|
138 |
-
match_cost = INF
|
139 |
-
|
140 |
-
# match
|
141 |
-
if match_cost > DP[i - 1][j - 1] + pair_mismatch:
|
142 |
-
match_cost = DP[i - 1][j - 1] + pair_mismatch
|
143 |
-
match_case = "substitution" if pair_mismatch == 1 else "match"
|
144 |
-
|
145 |
-
# insertion
|
146 |
-
if match_cost > DP[i - 1][j] + 1:
|
147 |
-
match_cost = DP[i - 1][j] + 1
|
148 |
-
match_case = "insertion"
|
149 |
-
|
150 |
-
# deletion
|
151 |
-
if match_cost > DP[i][j - 1] + 1:
|
152 |
-
match_cost = DP[i][j - 1] + 1
|
153 |
-
match_case = "deletion"
|
154 |
-
|
155 |
-
DP[i][j] = match_cost
|
156 |
-
P[i][j] = match_case
|
157 |
-
|
158 |
-
operations = []
|
159 |
-
i = N
|
160 |
-
j = M
|
161 |
-
while (i >= 0 and j >= 0) and not (i == 0 and j == 0):
|
162 |
-
if P[i][j] == "substitution":
|
163 |
-
operations.append(("substitution", text1[i - 1] if i - 1 >= 0 else "",
|
164 |
-
text2[j - 1] if j - 1 >= 0 else "", i - 1, j - 1))
|
165 |
-
i -= 1
|
166 |
-
j -= 1
|
167 |
-
elif P[i][j] == "match":
|
168 |
-
operations.append(
|
169 |
-
("match", text1[i - 1] if i - 1 >= 0 else "", text2[j - 1] if j - 1 >= 0 else "", i - 1, j - 1))
|
170 |
-
i -= 1
|
171 |
-
j -= 1
|
172 |
-
elif P[i][j] == "insertion":
|
173 |
-
operations.append(("insertion", text1[i - 1] if i - 1 >= 0 else "",
|
174 |
-
"", i - 1, j - 1))
|
175 |
-
i -= 1
|
176 |
-
elif P[i][j] == "deletion":
|
177 |
-
operations.append(("deletion", "",
|
178 |
-
text2[j - 1] if j - 1 >= 0 else "", i - 1, j - 1))
|
179 |
-
j -= 1
|
180 |
-
|
181 |
-
levenstein_distance = DP[N][M]
|
182 |
-
operations = operations[::-1]
|
183 |
-
|
184 |
-
return levenstein_distance, operations
|
185 |
-
|
186 |
-
|
187 |
-
def levenstein_metrics(df, pref_1="Pred_", pref_2='Tar_'):
|
188 |
-
levenstein_results = df[[f'{pref_1}text', f'{pref_2}text']].apply(
|
189 |
-
lambda x: levenstein(text1=x[f'{pref_1}text'], text2=x[f'{pref_2}text']),
|
190 |
-
axis=1
|
191 |
-
)
|
192 |
-
levenstein_similarities = levenstein_results.apply(lambda x: x[0])
|
193 |
-
levenstein_distances = levenstein_results.apply(lambda x: x[1])
|
194 |
-
edit_operations = levenstein_results.apply(lambda x: x[2])
|
195 |
-
|
196 |
-
return levenstein_similarities, levenstein_distances, edit_operations
|
197 |
-
|
198 |
-
|
199 |
-
def evaluate_by_words(pred_df, target_df, pred_pref='Pred_', target_pref='Target_', **kwargs):
|
200 |
-
if not pred_df.empty and not target_df.empty:
|
201 |
-
|
202 |
-
show_hist = kwargs.get("show_hist", False)
|
203 |
-
text_pairs = word_or_symbol_pair_matching(df1=pred_df, df2=target_df, pref1=pred_pref, pref2=target_pref)
|
204 |
-
levenstein_similarities, levenstein_distances, edit_operations = levenstein_metrics(
|
205 |
-
df=text_pairs, pref_1=pred_pref, pref_2=target_pref
|
206 |
-
)
|
207 |
-
|
208 |
-
levenstein_similarities_stats = {
|
209 |
-
**levenstein_similarities.describe().to_dict(),
|
210 |
-
"values": levenstein_similarities.tolist()
|
211 |
-
}
|
212 |
-
levenstein_distances_stats = {
|
213 |
-
**levenstein_distances.describe().to_dict(),
|
214 |
-
"values": levenstein_distances.tolist()
|
215 |
-
}
|
216 |
-
iou_stats = {
|
217 |
-
**text_pairs.iou.describe().to_dict(),
|
218 |
-
"values": text_pairs.iou.tolist()
|
219 |
-
}
|
220 |
-
edit_operations_stats = {
|
221 |
-
operation_id: pd.Series(
|
222 |
-
edit_operations.apply(
|
223 |
-
lambda x: [f"[{item[1]}]_[{item[2]}]" for item in x if item[0] == operation_id]
|
224 |
-
).sum(axis=0)).value_counts().to_dict()
|
225 |
-
for operation_id in ["insertion", "deletion", "substitution"]
|
226 |
-
}
|
227 |
-
|
228 |
-
if show_hist is True:
|
229 |
-
pd.Series(levenstein_similarities).plot(kind='hist', bins=20, title="Levestein Similarities")
|
230 |
-
pd.Series(levenstein_distances).plot(kind='hist', bins=20, title="Levestein Distances")
|
231 |
-
for edit_operation_id, edit_operation_data in edit_operations_stats.items():
|
232 |
-
pd.Series(edit_operation_data).plot(kind='barh', title=f"{edit_operation_id.capitalize()} Stats")
|
233 |
-
|
234 |
-
report = {
|
235 |
-
"accuracy": text_accuracy(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
|
236 |
-
"precision": text_precision(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
|
237 |
-
"recall": text_recall(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
|
238 |
-
"f1": text_f1(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
|
239 |
-
"levenstein_distances_stats": levenstein_distances_stats,
|
240 |
-
"levenstein_similarities_stats": levenstein_similarities_stats,
|
241 |
-
"iou_stats": iou_stats,
|
242 |
-
"edit_operations_stats": edit_operations_stats,
|
243 |
-
}
|
244 |
-
else:
|
245 |
-
report = {
|
246 |
-
"accuracy": None,
|
247 |
-
"precision": None,
|
248 |
-
"recall": None,
|
249 |
-
"f1": None,
|
250 |
-
"levenstein_distances_stats": {},
|
251 |
-
"levenstein_similarities_stats": {},
|
252 |
-
"iou_stats": {},
|
253 |
-
"edit_operations_stats": {key: {} for key in ["insertion", "deletion", "substitution"]},
|
254 |
-
}
|
255 |
-
|
256 |
-
return report
|
257 |
-
|
258 |
-
|
259 |
-
def evaluate_by_word_groups(pred_df, target_df, pred_pref='Pred_', target_pref='Target_', **kwargs):
|
260 |
-
if not pred_df.empty and not target_df.empty:
|
261 |
-
|
262 |
-
show_hist = kwargs.get("show_hist", False)
|
263 |
-
text_pairs = word_or_symbol_group_pair_matching(df1=pred_df, df2=target_df, pref1=pred_pref, pref2=target_pref)
|
264 |
-
levenstein_similarities, levenstein_distances, edit_operations = levenstein_metrics(
|
265 |
-
df=text_pairs, pref_1=pred_pref, pref_2=target_pref
|
266 |
-
)
|
267 |
-
|
268 |
-
levenstein_similarities_stats = {
|
269 |
-
**levenstein_similarities.describe().to_dict(),
|
270 |
-
"values": levenstein_similarities.tolist()
|
271 |
-
}
|
272 |
-
levenstein_distances_stats = {
|
273 |
-
**levenstein_distances.describe().to_dict(),
|
274 |
-
"values": levenstein_distances.tolist()
|
275 |
-
}
|
276 |
-
iou_stats = {
|
277 |
-
**text_pairs.iou.describe().to_dict(),
|
278 |
-
"values": text_pairs.iou.tolist()
|
279 |
-
}
|
280 |
-
edit_operations_stats = {
|
281 |
-
operation_id: pd.Series(
|
282 |
-
edit_operations.apply(
|
283 |
-
lambda x: [f"[{item[1]}]_[{item[2]}]" for item in x if item[0] == operation_id]
|
284 |
-
).sum(axis=0)).value_counts().to_dict()
|
285 |
-
for operation_id in ["insertion", "deletion", "substitution"]
|
286 |
-
}
|
287 |
-
|
288 |
-
if show_hist is True:
|
289 |
-
pd.Series(levenstein_similarities).plot(kind='hist', bins=20, title="Levestein Similarities")
|
290 |
-
pd.Series(levenstein_distances).plot(kind='hist', bins=20, title="Levestein Distances")
|
291 |
-
for edit_operation_id, edit_operation_data in edit_operations_stats.items():
|
292 |
-
pd.Series(edit_operation_data).plot(kind='barh', title=f"{edit_operation_id.capitalize()} Stats")
|
293 |
-
|
294 |
-
report = {
|
295 |
-
"accuracy": text_accuracy(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
|
296 |
-
"precision": text_precision(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
|
297 |
-
"recall": text_recall(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
|
298 |
-
"f1": text_f1(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
|
299 |
-
"levenstein_distances_stats": levenstein_distances_stats,
|
300 |
-
"levenstein_similarities_stats": levenstein_similarities_stats,
|
301 |
-
"iou_stats": iou_stats,
|
302 |
-
"edit_operations_stats": edit_operations_stats,
|
303 |
-
}
|
304 |
-
else:
|
305 |
-
report = {
|
306 |
-
"accuracy": None,
|
307 |
-
"precision": None,
|
308 |
-
"recall": None,
|
309 |
-
"f1": None,
|
310 |
-
"levenstein_distances_stats": {},
|
311 |
-
"levenstein_similarities_stats": {},
|
312 |
-
"iou_stats": {},
|
313 |
-
"edit_operations_stats": {key: {} for key in ["insertion", "deletion", "substitution"]},
|
314 |
-
}
|
315 |
-
|
316 |
-
return report
|
317 |
-
|
318 |
-
|
319 |
-
def reduce_word_evaluation_results(eval_results):
|
320 |
-
if eval_results:
|
321 |
-
accuracies = pd.Series([item['accuracy'] for item in eval_results])
|
322 |
-
precisions = pd.Series([item['precision'] for item in eval_results])
|
323 |
-
recalls = pd.Series([item['recall'] for item in eval_results])
|
324 |
-
f1s = pd.Series([item['f1'] for item in eval_results])
|
325 |
-
levenstein_similarities = pd.Series(
|
326 |
-
[
|
327 |
-
pd.Series(item['levenstein_similarities_stats'].get('values', [])).mean()
|
328 |
-
for item in eval_results
|
329 |
-
]
|
330 |
-
)
|
331 |
-
levenstein_distances = pd.Series(
|
332 |
-
[
|
333 |
-
pd.Series(item['levenstein_distances_stats'].get('values', [])).mean()
|
334 |
-
for item in eval_results
|
335 |
-
]
|
336 |
-
)
|
337 |
-
ious = pd.Series(
|
338 |
-
[
|
339 |
-
pd.Series(item['iou_stats'].get('values', [])).mean()
|
340 |
-
for item in eval_results
|
341 |
-
]
|
342 |
-
)
|
343 |
-
|
344 |
-
levenstein_similarities_stats = {
|
345 |
-
**levenstein_similarities.describe().to_dict(),
|
346 |
-
"values": levenstein_similarities.tolist()
|
347 |
-
}
|
348 |
-
levenstein_distances_stats = {
|
349 |
-
**levenstein_distances.describe().to_dict(),
|
350 |
-
"values": levenstein_distances.tolist()
|
351 |
-
}
|
352 |
-
iou_stats = {
|
353 |
-
**ious.describe().to_dict(),
|
354 |
-
"values": ious.tolist()
|
355 |
-
}
|
356 |
-
|
357 |
-
edit_operations_stats = {}
|
358 |
-
for eval_result in eval_results:
|
359 |
-
for edit_operation, edit_operation_data in eval_result['edit_operations_stats'].items():
|
360 |
-
if edit_operation not in edit_operations_stats:
|
361 |
-
edit_operations_stats[edit_operation] = {}
|
362 |
-
|
363 |
-
for key, count in edit_operation_data.items():
|
364 |
-
edit_operations_stats[edit_operation][key] = edit_operations_stats[edit_operation].get(key,
|
365 |
-
0) + count
|
366 |
-
|
367 |
-
summary = {
|
368 |
-
"accuracy": {
|
369 |
-
"mean": accuracies.mean(),
|
370 |
-
"std": accuracies.std(),
|
371 |
-
"values": accuracies.tolist()
|
372 |
-
},
|
373 |
-
"precision": {
|
374 |
-
"mean": precisions.mean(),
|
375 |
-
"std": precisions.std(),
|
376 |
-
"values": precisions.tolist(),
|
377 |
-
},
|
378 |
-
"recall": {
|
379 |
-
"mean": recalls.mean(),
|
380 |
-
"std": recalls.std(),
|
381 |
-
"values": recalls.tolist(),
|
382 |
-
},
|
383 |
-
"f1": {
|
384 |
-
"mean": f1s.mean(),
|
385 |
-
"std": f1s.std(),
|
386 |
-
"values": f1s.tolist(),
|
387 |
-
},
|
388 |
-
"document_count": len(eval_results),
|
389 |
-
"levenstein_distances_stats": levenstein_distances_stats,
|
390 |
-
"levenstein_similarities_stats": levenstein_similarities_stats,
|
391 |
-
"iou_stats": iou_stats,
|
392 |
-
"edit_operations_stats": edit_operations_stats,
|
393 |
-
}
|
394 |
-
|
395 |
-
|
396 |
-
else:
|
397 |
-
summary = {
|
398 |
-
"accuracy": {},
|
399 |
-
"precision": {},
|
400 |
-
"recall": {},
|
401 |
-
"f1": {},
|
402 |
-
"document_count": 0,
|
403 |
-
"levenstein_distances_stats": {},
|
404 |
-
"levenstein_similarities_stats": {},
|
405 |
-
"iou_stats": {},
|
406 |
-
"edit_operations_stats": {key: {} for key in ["insertion", "deletion", "substitution"]},
|
407 |
-
}
|
408 |
-
|
409 |
-
return summary
|
410 |
-
|
411 |
-
|
412 |
-
def evaluate_by_symbols(pred_df, target_df, pred_pref='Pred_', target_pref='Target_', **kwargs):
|
413 |
-
if not pred_df.empty and not target_df.empty:
|
414 |
-
|
415 |
-
show_hist = kwargs.get("show_hist", False)
|
416 |
-
text_pairs = word_or_symbol_pair_matching(df1=pred_df, df2=target_df, pref1=pred_pref, pref2=target_pref)
|
417 |
-
|
418 |
-
confusion_matrix, pair_counts = symbol_confusion_matrix(text_pairs, pref_1=pred_pref, pref_2=target_pref)
|
419 |
-
|
420 |
-
iou_stats = {
|
421 |
-
**text_pairs.iou.describe().to_dict(),
|
422 |
-
"values": text_pairs.iou.tolist()
|
423 |
-
}
|
424 |
-
|
425 |
-
if show_hist is True:
|
426 |
-
pd.Series(pair_counts).plot(kind='barh', title="Symbol Pair Counts")
|
427 |
-
|
428 |
-
report = {
|
429 |
-
"accuracy": text_accuracy(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
|
430 |
-
"precision": text_precision(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
|
431 |
-
"recall": text_recall(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
|
432 |
-
"f1": text_f1(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
|
433 |
-
"confusion_matrix": confusion_matrix,
|
434 |
-
"pair_counts": pair_counts,
|
435 |
-
"iou_stats": iou_stats,
|
436 |
-
}
|
437 |
-
else:
|
438 |
-
report = {
|
439 |
-
"accuracy": None,
|
440 |
-
"precision": None,
|
441 |
-
"recall": None,
|
442 |
-
"f1": None,
|
443 |
-
"confusion_matrix": pd.DataFrame(),
|
444 |
-
"pair_counts": pd.DataFrame(),
|
445 |
-
"iou_stats": {},
|
446 |
-
}
|
447 |
-
|
448 |
-
return report
|
449 |
-
|
450 |
-
|
451 |
-
def reduce_pair_counts(pair_counts):
|
452 |
-
reduced_pair_counts_df = pd.DataFrame()
|
453 |
-
columns = []
|
454 |
-
if pair_counts:
|
455 |
-
pair_counts_dict = {}
|
456 |
-
for pair_count in pair_counts:
|
457 |
-
if not pair_count.empty:
|
458 |
-
pair_count_dict = pair_count.set_index(pair_count.columns[:-1].tolist(), drop=True).to_dict()[
|
459 |
-
pair_count.columns[-1]]
|
460 |
-
columns = pair_count.columns.tolist()
|
461 |
-
else:
|
462 |
-
pair_count_dict = {}
|
463 |
-
|
464 |
-
for key, value in pair_count_dict.items():
|
465 |
-
pair_counts_dict[key] = pair_counts_dict.get(key, 0) + value
|
466 |
-
|
467 |
-
reduced_pair_counts_df = pd.Series(pair_counts_dict).to_frame().reset_index()
|
468 |
-
if columns:
|
469 |
-
reduced_pair_counts_df.columns = columns
|
470 |
-
|
471 |
-
return reduced_pair_counts_df
|
472 |
-
|
473 |
-
|
474 |
-
def reduce_confusion_matrices(confusion_matrices):
|
475 |
-
reduced_confusion_matrices_df = pd.DataFrame()
|
476 |
-
if confusion_matrices:
|
477 |
-
all_index_values = set()
|
478 |
-
confusion_matrices_dict = {}
|
479 |
-
for confusion_matrix in confusion_matrices:
|
480 |
-
if not confusion_matrix.empty:
|
481 |
-
confusion_matrix_dict = {
|
482 |
-
(index, column): confusion_matrix.loc[index, column]
|
483 |
-
for index in confusion_matrix.index
|
484 |
-
for column in confusion_matrix.columns
|
485 |
-
}
|
486 |
-
else:
|
487 |
-
confusion_matrix_dict = {}
|
488 |
-
|
489 |
-
for key, value in confusion_matrix_dict.items():
|
490 |
-
all_index_values.add(key[0])
|
491 |
-
all_index_values.add(key[1])
|
492 |
-
confusion_matrices_dict[key] = confusion_matrices_dict.get(key, 0) + value
|
493 |
-
|
494 |
-
all_index_values = list(sorted(list(all_index_values)))
|
495 |
-
reduced_confusion_matrices_df = pd.DataFrame(
|
496 |
-
[
|
497 |
-
[
|
498 |
-
confusion_matrices_dict.get((index, column), 0)
|
499 |
-
for column in all_index_values
|
500 |
-
]
|
501 |
-
for index in all_index_values
|
502 |
-
],
|
503 |
-
columns=all_index_values,
|
504 |
-
index=all_index_values,
|
505 |
-
)
|
506 |
-
|
507 |
-
return reduced_confusion_matrices_df
|
508 |
-
|
509 |
-
|
510 |
-
def reduce_symbol_evaluation_results(eval_results):
|
511 |
-
"""
|
512 |
-
all_symbols = list(sorted(set(df[f'{pref_1}text'].tolist() + df[f'{pref_2}text'].tolist())))
|
513 |
-
pair_value_counts = df[
|
514 |
-
[f'{pref_1}text', f'{pref_2}text']
|
515 |
-
].value_counts()
|
516 |
-
|
517 |
-
pair_cnts = pair_value_counts.reset_index().rename({0: "count"}, axis=1).sort_values(
|
518 |
-
by=[f'{pref_1}text', f'{pref_2}text'], ascending=True)
|
519 |
-
|
520 |
-
pair_value_counts_dict = pair_value_counts.to_dict()
|
521 |
-
|
522 |
-
confusion_matrix = pd.DataFrame(
|
523 |
-
[
|
524 |
-
[pair_value_counts_dict.get((symbol1, symbol2), 0) for symbol2 in all_symbols]
|
525 |
-
for symbol1 in all_symbols
|
526 |
-
],
|
527 |
-
columns=all_symbols,
|
528 |
-
index=all_symbols,
|
529 |
-
)
|
530 |
-
"""
|
531 |
-
if eval_results:
|
532 |
-
accuracies = pd.Series([item['accuracy'] for item in eval_results])
|
533 |
-
precisions = pd.Series([item['precision'] for item in eval_results])
|
534 |
-
recalls = pd.Series([item['recall'] for item in eval_results])
|
535 |
-
f1s = pd.Series([item['f1'] for item in eval_results])
|
536 |
-
confusion_matrices = [item['confusion_matrix'] for item in eval_results]
|
537 |
-
pair_counts = [item['pair_counts'] for item in eval_results]
|
538 |
-
ious = pd.Series(
|
539 |
-
[
|
540 |
-
pd.Series(item['iou_stats'].get('values', [])).mean()
|
541 |
-
for item in eval_results
|
542 |
-
]
|
543 |
-
)
|
544 |
-
|
545 |
-
iou_stats = {
|
546 |
-
**ious.describe().to_dict(),
|
547 |
-
"values": ious.tolist()
|
548 |
-
}
|
549 |
-
|
550 |
-
summary = {
|
551 |
-
"accuracy": {
|
552 |
-
"mean": accuracies.mean(),
|
553 |
-
"std": accuracies.std(),
|
554 |
-
"values": accuracies.tolist()
|
555 |
-
},
|
556 |
-
"precision": {
|
557 |
-
"mean": precisions.mean(),
|
558 |
-
"std": precisions.std(),
|
559 |
-
"values": precisions.tolist(),
|
560 |
-
},
|
561 |
-
"recall": {
|
562 |
-
"mean": recalls.mean(),
|
563 |
-
"std": recalls.std(),
|
564 |
-
"values": recalls.tolist(),
|
565 |
-
},
|
566 |
-
"f1": {
|
567 |
-
"mean": f1s.mean(),
|
568 |
-
"std": f1s.std(),
|
569 |
-
"values": f1s.tolist(),
|
570 |
-
},
|
571 |
-
"document_count": len(eval_results),
|
572 |
-
"pair_counts": reduce_pair_counts(pair_counts),
|
573 |
-
"confusion_matrix": reduce_confusion_matrices(confusion_matrices),
|
574 |
-
"iou_stats": iou_stats,
|
575 |
-
}
|
576 |
-
|
577 |
-
else:
|
578 |
-
summary = {
|
579 |
-
"accuracy": {},
|
580 |
-
"precision": {},
|
581 |
-
"recall": {},
|
582 |
-
"f1": {},
|
583 |
-
"document_count": 0,
|
584 |
-
"pair_counts": pd.DataFrame(),
|
585 |
-
"confusion_matrix": pd.DataFrame(),
|
586 |
-
"iou_stats": {},
|
587 |
-
}
|
588 |
-
|
589 |
-
return summary
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
iliauniiccocrevaluation.py
CHANGED
@@ -17,8 +17,8 @@ import datasets
|
|
17 |
import evaluate
|
18 |
|
19 |
# TODO: Add BibTeX citation
|
20 |
-
from
|
21 |
-
from ocr.fiftyone import FiftyOneOcr
|
22 |
|
23 |
_CITATION = """\
|
24 |
@InProceedings{huggingface:module,
|
|
|
17 |
import evaluate
|
18 |
|
19 |
# TODO: Add BibTeX citation
|
20 |
+
from ocr_evaluation.evaliate.metrics import evaluate_by_words
|
21 |
+
from ocr_evaluation.ocr.fiftyone import FiftyOneOcr
|
22 |
|
23 |
_CITATION = """\
|
24 |
@InProceedings{huggingface:module,
|
ocr/fiftyone.py
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
import pandas as pd
|
2 |
-
import numpy as np
|
3 |
-
|
4 |
-
|
5 |
-
class FiftyOneOcr:
|
6 |
-
def __init__(self, data):
|
7 |
-
self.data = data
|
8 |
-
|
9 |
-
def get_word_annotations(self, convert_bbox: bool = True) -> pd.DataFrame:
|
10 |
-
"""Returns dataframe of detections where each row represents independent word annotation
|
11 |
-
|
12 |
-
Args:
|
13 |
-
convert_bbox: FiftyOne bounding box type (x1, x2, dx, xy) to 2 point bounding box type (x1, y1, x2, y2)
|
14 |
-
"""
|
15 |
-
|
16 |
-
annotations = self.data.get("detections", {}).get("detections", {})
|
17 |
-
|
18 |
-
annotations_df = pd.DataFrame(annotations)
|
19 |
-
|
20 |
-
# convert bounding box into 2 point values format
|
21 |
-
if convert_bbox:
|
22 |
-
bbox = np.array(annotations_df['bounding_box'].values.tolist())
|
23 |
-
bbox[:, 2:] += bbox[:, :2]
|
24 |
-
annotations_df['bounding_box'] = bbox.tolist()
|
25 |
-
|
26 |
-
return annotations_df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
@@ -1 +1,2 @@
|
|
1 |
-
git+https://github.com/huggingface/evaluate@main
|
|
|
|
1 |
+
git+https://github.com/huggingface/evaluate@main
|
2 |
+
git+https://github.com/IliaUni-ICC/ocr_evaluation@main
|