Spaces:

anz2
/

iliauniiccocrevaluation

Runtime error

App Files Files Community

anz2 commited on Nov 3, 2022

Commit

48fef9e

1 Parent(s): 85b9718

install evaluation package from git separately. remove packages that were displaced on separete repository.

Browse files

Files changed (5) hide show

evaluation/iou.py +0 -370
evaluation/metrics.py +0 -589
iliauniiccocrevaluation.py +2 -2
ocr/fiftyone.py +0 -26
requirements.txt +2 -1

evaluation/iou.py DELETED Viewed

@@ -1,370 +0,0 @@
-# https://www.pyimagesearch.com/2016/11/07/intersection-over-union-iou-for-object-detection/
-import numpy as np
-import pandas as pd
-from scipy.sparse import csr_matrix
-from scipy.sparse.csgraph import connected_components
-def bb_intersection_over_union(boxA, boxB):
-    EPS = 1e-5
-    # determine the (x, y)-coordinates of the intersection rectangle
-    xA = max(boxA[0], boxB[0])
-    yA = max(boxA[1], boxB[1])
-    xB = min(boxA[2], boxB[2])
-    yB = min(boxA[3], boxB[3])
-    # compute the area of intersection rectangle
-    interArea = max(0, xB - xA + EPS) * max(0, yB - yA + EPS)
-    # compute the area of both the prediction and ground-truth
-    # rectangles
-    boxAArea = (boxA[2] - boxA[0] + EPS) * (boxA[3] - boxA[1] + EPS)
-    boxBArea = (boxB[2] - boxB[0] + EPS) * (boxB[3] - boxB[1] + EPS)
-    # compute the intersection over union by taking the intersection
-    # area and dividing it by the sum of prediction + ground-truth
-    # areas - the interesection area
-    iou = interArea / float(boxAArea + boxBArea - interArea)
-    # return the intersection over union value
-    return iou
-def bb_intersection_over_union_vectorized(bboxes1, bboxes2):
-    low = np.s_[..., :2]
-    high = np.s_[..., 2:]
-    EPS = 1e-5
-    A, B = bboxes1.copy(), bboxes2.copy()
-    A = np.tile(A, (1, len(bboxes2))).reshape(len(bboxes1) * len(bboxes2), -1)
-    B = np.tile(B, (len(bboxes1), 1))
-    A[high] += EPS
-    B[high] += EPS
-    intrs = (
-        np.maximum(
-            0.0,
-            np.minimum(
-                A[high],
-                B[high]
-            )
-            -
-            np.maximum(
-                A[low],
-                B[low]
-            )
-        )
-    ).prod(-1)
-    ious = intrs / ((A[high] - A[low]).prod(-1) + (B[high] - B[low]).prod(-1) - intrs)
-    return ious.reshape(len(bboxes1), len(bboxes2))
-def bb_is_on_same_line_vectorized(bboxes1, bboxes2):
-    low = np.s_[..., 1]
-    high = np.s_[..., 3]
-    A, B = bboxes1.copy(), bboxes2.copy()
-    A = np.tile(A, (1, len(bboxes2))).reshape(len(bboxes1) * len(bboxes2), -1)
-    B = np.tile(B, (len(bboxes1), 1))
-    is_on_same_line = np.bitwise_and(
-        np.bitwise_and(A[low] <= (B[low] + B[high]) / 2, (B[low] + B[high]) / 2 <= A[high]),
-        np.bitwise_and(B[low] <= (A[low] + A[high]) / 2, (A[low] + A[high]) / 2 <= B[high]),
-    )
-    return is_on_same_line.reshape(len(bboxes1), len(bboxes2))
-def iou(ocr1, ocr2):
-    return bb_intersection_over_union(
-        (ocr1['x1'], ocr1['y1'], ocr1['x2'], ocr1['y2']),
-        (ocr2['x1'], ocr2['y1'], ocr2['x2'], ocr2['y2'])
-    )
-def _generate_empty_row(example_row, index):
-    """This will generate empty row with empty values but it also generates tiny but valid bounding box
-    to avoid exceptions while cropping the image"""
-    example_row_dict = example_row.to_dict()
-    example_row_dict['page'] = example_row_dict.get('page', 0)
-    example_row_dict['block'] = 0
-    example_row_dict['paragraph'] = 0
-    example_row_dict['word'] = 0
-    example_row_dict['x1'] = 0
-    example_row_dict['y1'] = 0
-    example_row_dict['x2'] = 1
-    example_row_dict['y2'] = 1
-    example_row_dict['conf'] = 0.0
-    example_row_dict['text'] = ""
-    empty_row = pd.DataFrame([example_row_dict], columns=example_row.index, index=[index])
-    return empty_row
-def word_or_symbol_pair_matching(df1, df2, pref1, pref2):
-    """Applies IOU based matching of words or symbol elements using rectangular bounding boxes (x1,y1,x2,y2).
-    It sorts makes sure that matching between first and second set is unique which means that it's not allowed to have
-    one item in two different pairs. If pair isn't found then empty element is used as a pair. This way it's guaranteed
-    that word or symbol level matching is correctly evaluated. Pairs are generated in decreasing order of IOU values.
-    """
-    # match word pairs by page
-    text_pairs_dfs_per_page = []
-    unique_page_ids = sorted(list(set(df1['page'].unique().tolist() + df2['page'].unique().tolist())))
-    for page_id in unique_page_ids:
-        # extract words for given page only
-        df1_page = df1[df1.page == page_id]
-        df2_page = df2[df2.page == page_id]
-        if not df1_page.empty and not df1_page.empty:
-            # calculate similarities
-            similarity_metrics = calculate_ious_fast(ocr1_df=df1_page, ocr2_df=df2_page)
-            similarities = []
-            for idx1, index1 in enumerate(df1_page.index):
-                for idx2, index2 in enumerate(df2_page.index):
-                    similarities.append((index1, index2, similarity_metrics[idx1, idx2]))
-            # process pair similarities in decreasing order of similarity values
-            sorted_similarities = sorted(similarities, key=lambda x: -x[2])
-            paired_items_1 = set()
-            paired_items_2 = set()
-            pairs = []
-            for idx1, idx2, similarity in sorted_similarities:
-                if idx1 not in paired_items_1 and idx2 not in paired_items_2:
-                    if similarity > 0.0:
-                        paired_items_1.add(idx1)
-                        paired_items_2.add(idx2)
-                        pairs.append((idx1, idx2, similarity))
-            # add items as empty pairs which weren't matched but index is considered across all pages to avoid collisions
-            EMPTY_ITEM_INDEX = max(df1.shape[0], df2.shape[0]) + 100 + page_id
-            for idx1, row1 in df1_page.iterrows():
-                if idx1 not in paired_items_1:
-                    pairs.append((idx1, EMPTY_ITEM_INDEX, 0.0))
-            for idx2, row2 in df2_page.iterrows():
-                if idx2 not in paired_items_2:
-                    pairs.append((EMPTY_ITEM_INDEX, idx2, 0.0))
-            # sort pairs according to df2 items original indices
-            sorted_pairs = sorted(pairs, key=lambda x: (x[1], x[0]))
-            # create row for empty items in each dataframe
-            df1_page = pd.concat([df1_page, _generate_empty_row(example_row=df1_page.iloc[0], index=EMPTY_ITEM_INDEX)])
-            df2_page = pd.concat([df2_page, _generate_empty_row(example_row=df2_page.iloc[0], index=EMPTY_ITEM_INDEX)])
-            # generate pairs dataset
-            text_pairs_df = pd.concat(
-                [
-                    df1_page.loc[[item[0] for item in sorted_pairs], :].reset_index(drop=True).add_prefix(pref1),
-                    df2_page.loc[[item[1] for item in sorted_pairs], :].reset_index(drop=True).add_prefix(pref2),
-                    pd.DataFrame(
-                        data=[item[2] for item in sorted_pairs],
-                        columns=["iou"]
-                    )
-                ],
-                axis=1
-            )
-            text_pairs_dfs_per_page.append(text_pairs_df)
-    all_text_pairs_df = pd.concat(text_pairs_dfs_per_page, axis=0)
-    return all_text_pairs_df
-def word_or_symbol_group_pair_matching(df1, df2, pref1, pref2):
-    """Applies IOU based matching of words or symbol elements groups using rectangular bounding boxes (x1,y1,x2,y2).
-    It sorts makes sure that matching between first and second set is unique which means that it's not allowed to have
-    one item in two different pairs. If pair isn't found then empty element is used as a pair. BUT the difference from
-    non-group approach is that here it's possible to match group of words or symbols on each other. This way it's
-    more guaranteed that OCR detected result is evaluated correctly.
-    Example:
-        Let's say we have 2 words: ["abc", "d"] and target has only one word: ["abcd"] then it's better to group first
-        two words and match them with the one target word. This way we try to evaluate the overall text detection
-        accuracy and not the actual symbol or word boundary detection.
-    Note: the grouping operation will happen on one line to avoid unpredictable results if word bounding boxes on
-    neighboring lines has some intersection.
-    """
-    # match word pairs by page
-    text_pairs_dfs_per_page = []
-    unique_page_ids = sorted(list(set(df1['page'].unique().tolist() + df2['page'].unique().tolist())))
-    for page_id in unique_page_ids:
-        # extract words for given page only
-        df1_page = df1[df1.page == page_id]
-        df2_page = df2[df2.page == page_id]
-        if not df1_page.empty and not df1_page.empty:
-            df1_page_groups, df2_page_groups = get_connected_components(ocr1_df=df1_page, ocr2_df=df2_page)
-            # calculate similarities
-            similarity_metrics = calculate_ious_fast(ocr1_df=df1_page_groups, ocr2_df=df2_page_groups)
-            similarities = []
-            for idx1, index1 in enumerate(df1_page_groups.index):
-                for idx2, index2 in enumerate(df2_page_groups.index):
-                    similarities.append((index1, index2, similarity_metrics[idx1, idx2]))
-            # process pair similarities in decreasing order of similarity values
-            sorted_similarities = sorted(similarities, key=lambda x: -x[2])
-            paired_items_1 = set()
-            paired_items_2 = set()
-            pairs = []
-            for idx1, idx2, similarity in sorted_similarities:
-                if idx1 not in paired_items_1 and idx2 not in paired_items_2:
-                    if similarity > 0.0:
-                        paired_items_1.add(idx1)
-                        paired_items_2.add(idx2)
-                        pairs.append((idx1, idx2, similarity))
-            # add items as empty pairs which weren't matched but index is considered across all pages to avoid collisions
-            EMPTY_ITEM_INDEX = max(df1.shape[0], df2.shape[0]) + 100 + page_id
-            for idx1, row1 in df1_page_groups.iterrows():
-                if idx1 not in paired_items_1:
-                    pairs.append((idx1, EMPTY_ITEM_INDEX, 0.0))
-            for idx2, row2 in df2_page_groups.iterrows():
-                if idx2 not in paired_items_2:
-                    pairs.append((EMPTY_ITEM_INDEX, idx2, 0.0))
-            # sort pairs according to df2 items original indices
-            sorted_pairs = sorted(pairs, key=lambda x: (x[1], x[0]))
-            # create row for empty items in each dataframe
-            df1_page_groups = pd.concat(
-                [df1_page_groups, _generate_empty_row(example_row=df1_page_groups.iloc[0], index=EMPTY_ITEM_INDEX)])
-            df2_page_groups = pd.concat(
-                [df2_page_groups, _generate_empty_row(example_row=df2_page_groups.iloc[0], index=EMPTY_ITEM_INDEX)])
-            # generate pairs dataset
-            text_pairs_df = pd.concat(
-                [
-                    df1_page_groups.loc[[item[0] for item in sorted_pairs], :].reset_index(drop=True).add_prefix(pref1),
-                    df2_page_groups.loc[[item[1] for item in sorted_pairs], :].reset_index(drop=True).add_prefix(pref2),
-                    pd.DataFrame(
-                        data=[item[2] for item in sorted_pairs],
-                        columns=["iou"]
-                    )
-                ],
-                axis=1
-            )
-            text_pairs_dfs_per_page.append(text_pairs_df)
-    all_text_pairs_df = pd.concat(text_pairs_dfs_per_page, axis=0)
-    return all_text_pairs_df
-def calculate_ious_fast(ocr1_df, ocr2_df):
-    ious = None
-    if not ocr1_df.empty and not ocr2_df.empty:
-        bboxes1 = np.array(ocr1_df["bounding_box"].values.tolist())
-        bboxes2 = np.array(ocr2_df["bounding_box"].values.tolist())
-        if len(bboxes1) > 0 and len(bboxes2) > 0:
-            ious = bb_intersection_over_union_vectorized(bboxes1=bboxes1, bboxes2=bboxes2)
-    return ious
-def calculate_iosl_fast(ocr1_df, ocr2_df):
-    iosls = None
-    if not ocr1_df.empty and not ocr2_df.empty:
-        bboxes1 = np.array(ocr1_df["bounding_box"].values.tolist())
-        bboxes2 = np.array(ocr2_df["bounding_box"].values.tolist())
-        if len(bboxes1) > 0 and len(bboxes2) > 0:
-            iosls = bb_is_on_same_line_vectorized(bboxes1=bboxes1, bboxes2=bboxes2)
-    return iosls
-def calculate_adjacency_matrix(ocr1_df, ocr2_df):
-    """Calculates Adjacency Matrix based on IOU values and for two different sets of items. For each item the adjacency
-    is defined by the maximum IOU value. We do 2 sided approach since it can be the case that i is adjacent to j but j
-    isn't adjacent to i, so we generate adjacency matrix for directed graph"""
-    # concat both dataframes
-    ocr_df = pd.concat([ocr1_df, ocr2_df], axis=0).reset_index()
-    # calculate ious
-    ious = calculate_ious_fast(ocr1_df=ocr_df, ocr2_df=ocr_df)
-    # calculate `is on same line` property
-    iosls = calculate_iosl_fast(ocr1_df=ocr_df, ocr2_df=ocr_df)
-    # build adjacency matrix (1s and 0s)
-    adjacency_matrix = np.bitwise_and(ious > 0.0, iosls).astype(np.int)
-    return adjacency_matrix
-def get_connected_components(ocr1_df, ocr2_df):
-    """Apply connected component analysis and group items"""
-    def _aggregate_group_items_into_one(df):
-        if len(df) == 1:
-            return df
-        else:
-            _df = df.iloc[0, :]
-            _bboxes = np.array(df["bounding_box"].values.tolist())
-            _df["bounding_box"] = [
-                [
-                    np.min(_bboxes[:, 0]),
-                    np.min(_bboxes[:, 1]),
-                    np.max(_bboxes[:, 2]),
-                    np.max(_bboxes[:, 3]),
-                ]
-            ]
-            _df["confidence"] = df["confidence"].mean()
-            _df["text"] = " ".join(df["text"].tolist())
-            return _df
-    # 1. calculate adjacency matrix
-    adjacency_matrix = calculate_adjacency_matrix(ocr1_df=ocr1_df, ocr2_df=ocr2_df)
-    # 2. find connected components
-    n_components, labels = connected_components(csgraph=csr_matrix(adjacency_matrix), directed=False,
-                                                return_labels=True)
-    # 3. separate df1 and df2 items and group for each connected component
-    connected_component_groups = pd.Series(labels).to_frame().groupby(0).apply(
-        lambda x: {1: [item for item in x.index.tolist() if item < ocr1_df.shape[0]],
-                   2: [item - len(ocr1_df) for item in x.index.tolist() if item >= ocr1_df.shape[0]]}).to_dict()
-    # 4. check if group of items are consecutive (Optional but interesting)
-    # assert np.all(pd.DataFrame(connected_component_groups).loc[1, :].apply(
-    #     lambda x: sum(x) == (min(x) * 2 + (len(x) - 1)) * len(x) / 2 if x else True))
-    # assert np.all(pd.DataFrame(connected_component_groups).loc[2, :].apply(
-    #     lambda x: sum(x) == (min(x) * 2 + (len(x) - 1)) * len(x) / 2 if x else True))
-    # 5. merge group items into one
-    ocr1_df_groups = pd.concat(
-        [
-            _aggregate_group_items_into_one(
-                ocr1_df.loc[group_data[1], :]
-            )
-            for group_id, group_data in connected_component_groups.items()
-            if group_data[1]
-        ],
-        axis=0
-    ).reset_index(drop=True)
-    ocr2_df_groups = pd.concat(
-        [
-            _aggregate_group_items_into_one(
-                ocr2_df.loc[group_data[2], :]
-            )
-            for group_id, group_data in connected_component_groups.items()
-            if group_data[2]
-        ],
-        axis=0
-    ).reset_index(drop=True)
-    return ocr1_df_groups, ocr2_df_groups

evaluation/metrics.py DELETED Viewed

@@ -1,589 +0,0 @@
-import pandas as pd
-from evaluation.iou import word_or_symbol_pair_matching, word_or_symbol_group_pair_matching
-def text_accuracy(df, pref_1, pref_2):
-    return (df[f'{pref_1}text'] == df[f'{pref_2}text']).sum() / df.shape[0]
-def text_precision(df, pref_1, pref_2):
-    ocr1_nonempty = df[f'{pref_1}text'].apply(lambda x: bool(x))
-    ocr1 = df[f'{pref_1}text']
-    ocr2 = df[f'{pref_2}text']
-    return (ocr1_nonempty & (ocr1 == ocr2)).sum() / ocr1_nonempty.sum()
-def text_recall(df, pref_1, pref_2):
-    ocr2_nonempty = df[f'{pref_2}text'].apply(lambda x: bool(x))
-    ocr2 = df[f'{pref_1}text']
-    ocr1 = df[f'{pref_2}text']
-    return (ocr2_nonempty & (ocr2 == ocr1)).sum() / ocr2_nonempty.sum()
-def text_f1(df, pref_1, pref_2):
-    precision = text_precision(df, pref_1, pref_2)
-    recall = text_recall(df, pref_1, pref_2)
-    if precision == 0 or recall == 0:
-        f1 = 0.0
-    else:
-        f1 = (2 * precision * recall) / (precision + recall)
-    return f1
-def symbol_confusion_matrix(df, pref_1, pref_2):
-    all_symbols = list(sorted(set(df[f'{pref_1}text'].tolist() + df[f'{pref_2}text'].tolist())))
-    pair_value_counts = df[
-        [f'{pref_1}text', f'{pref_2}text']
-    ].value_counts()
-    pair_cnts = pair_value_counts.reset_index().rename({0: "count"}, axis=1).sort_values(
-        by=[f'{pref_1}text', f'{pref_2}text'], ascending=True)
-    pair_value_counts_dict = pair_value_counts.to_dict()
-    confusion_matrix = pd.DataFrame(
-        [
-            [pair_value_counts_dict.get((symbol1, symbol2), 0) for symbol2 in all_symbols]
-            for symbol1 in all_symbols
-        ],
-        columns=all_symbols,
-        index=all_symbols,
-    )
-    return confusion_matrix, pair_cnts
-def levenstein(text1, text2):
-    """Measures the metrics based on edit operations.
-    - levenstein_distance: number of character operations (insertion, deletion, substitution) that
-        required to get text2 from text1
-    - levenstein_similarity: number of matches divided by the number of all operations (fraction of characters that
-        don't require modification while transforming text1 into text2)
-    - edit_operations: list of character operations (<operation name>, <text1 character>, <text2 character>)
-    """
-    levenstein_distance, edit_operations = edit_distance(text1, text2)
-    if levenstein_distance == 0:
-        levenstein_similarity = 1.0
-    else:
-        matches_cnt = len([item for item in edit_operations if item[0] == "match"])
-        all_operations_cnt = len(edit_operations)
-        if matches_cnt == 0:
-            levenstein_similarity = 0.0
-        else:
-            levenstein_similarity = float(matches_cnt / all_operations_cnt)
-    return levenstein_similarity, levenstein_distance, edit_operations
-def edit_distance(text1, text2):
-    """
-    we have three allowed edit operations:
-    - Insert a character
-    - Delete a character
-    - Substitute a character
-    Each of these operations has cost of 1
-    Our goal is to minimize number of required operations to convert text1 into text2
-    This DP problem which is being solved with 2d array (NxM) where N is the length of text1 and M - length of
-    text2.
-    DP[i][j]: this is minimum amount of operations to convert text1[:i] into text2[:j]
-    The update rule is the following:
-    DP[i][j] = min of the following
-    case 1: DP[i-1][j-1] # match
-    case 2: DP[i-1][j] + 1 # insertion,
-    case 3: DP[i][j-1] + 1 # deletion
-    case 4: DP[i-1][j-1] + 1 # substitution
-    Example:
-    text1 = "horse"
-    text2 = "ros"
-    DP _  r  o  s
-    _ [0, 1, 2, 3]
-    h [1, 1, 2, 3]
-    o [2, 2, 1, 2]
-    r [3, 2, 2, 2]
-    s [4, 3, 3, 2]
-    e [5, 4, 4, 3]
-    """
-    if not text1:
-        return len(text2), []
-    elif not text2:
-        return len(text1), []
-    INF = 10 ** 10
-    N = len(text1)
-    M = len(text2)
-    DP = [[INF for _ in range(M + 1)] for _ in range(N + 1)]
-    P = [[None for _ in range(M + 1)] for _ in range(N + 1)]
-    for i in range(N + 1):
-        DP[i][0] = i
-        P[i][0] = "insertion"
-    for j in range(M + 1):
-        DP[0][j] = j
-        P[0][j] = "deletion"
-    for j in range(1, M + 1):
-        for i in range(1, N + 1):
-            pair_mismatch = int(text1[i - 1] != text2[j - 1])
-            match_case = None
-            match_cost = INF
-            # match
-            if match_cost > DP[i - 1][j - 1] + pair_mismatch:
-                match_cost = DP[i - 1][j - 1] + pair_mismatch
-                match_case = "substitution" if pair_mismatch == 1 else "match"
-            # insertion
-            if match_cost > DP[i - 1][j] + 1:
-                match_cost = DP[i - 1][j] + 1
-                match_case = "insertion"
-            # deletion
-            if match_cost > DP[i][j - 1] + 1:
-                match_cost = DP[i][j - 1] + 1
-                match_case = "deletion"
-            DP[i][j] = match_cost
-            P[i][j] = match_case
-    operations = []
-    i = N
-    j = M
-    while (i >= 0 and j >= 0) and not (i == 0 and j == 0):
-        if P[i][j] == "substitution":
-            operations.append(("substitution", text1[i - 1] if i - 1 >= 0 else "",
-                               text2[j - 1] if j - 1 >= 0 else "", i - 1, j - 1))
-            i -= 1
-            j -= 1
-        elif P[i][j] == "match":
-            operations.append(
-                ("match", text1[i - 1] if i - 1 >= 0 else "", text2[j - 1] if j - 1 >= 0 else "", i - 1, j - 1))
-            i -= 1
-            j -= 1
-        elif P[i][j] == "insertion":
-            operations.append(("insertion", text1[i - 1] if i - 1 >= 0 else "",
-                               "", i - 1, j - 1))
-            i -= 1
-        elif P[i][j] == "deletion":
-            operations.append(("deletion", "",
-                               text2[j - 1] if j - 1 >= 0 else "", i - 1, j - 1))
-            j -= 1
-    levenstein_distance = DP[N][M]
-    operations = operations[::-1]
-    return levenstein_distance, operations
-def levenstein_metrics(df, pref_1="Pred_", pref_2='Tar_'):
-    levenstein_results = df[[f'{pref_1}text', f'{pref_2}text']].apply(
-        lambda x: levenstein(text1=x[f'{pref_1}text'], text2=x[f'{pref_2}text']),
-        axis=1
-    )
-    levenstein_similarities = levenstein_results.apply(lambda x: x[0])
-    levenstein_distances = levenstein_results.apply(lambda x: x[1])
-    edit_operations = levenstein_results.apply(lambda x: x[2])
-    return levenstein_similarities, levenstein_distances, edit_operations
-def evaluate_by_words(pred_df, target_df, pred_pref='Pred_', target_pref='Target_', **kwargs):
-    if not pred_df.empty and not target_df.empty:
-        show_hist = kwargs.get("show_hist", False)
-        text_pairs = word_or_symbol_pair_matching(df1=pred_df, df2=target_df, pref1=pred_pref, pref2=target_pref)
-        levenstein_similarities, levenstein_distances, edit_operations = levenstein_metrics(
-            df=text_pairs, pref_1=pred_pref, pref_2=target_pref
-        )
-        levenstein_similarities_stats = {
-            **levenstein_similarities.describe().to_dict(),
-            "values": levenstein_similarities.tolist()
-        }
-        levenstein_distances_stats = {
-            **levenstein_distances.describe().to_dict(),
-            "values": levenstein_distances.tolist()
-        }
-        iou_stats = {
-            **text_pairs.iou.describe().to_dict(),
-            "values": text_pairs.iou.tolist()
-        }
-        edit_operations_stats = {
-            operation_id: pd.Series(
-                edit_operations.apply(
-                    lambda x: [f"[{item[1]}]_[{item[2]}]" for item in x if item[0] == operation_id]
-                ).sum(axis=0)).value_counts().to_dict()
-            for operation_id in ["insertion", "deletion", "substitution"]
-        }
-        if show_hist is True:
-            pd.Series(levenstein_similarities).plot(kind='hist', bins=20, title="Levestein Similarities")
-            pd.Series(levenstein_distances).plot(kind='hist', bins=20, title="Levestein Distances")
-            for edit_operation_id, edit_operation_data in edit_operations_stats.items():
-                pd.Series(edit_operation_data).plot(kind='barh', title=f"{edit_operation_id.capitalize()} Stats")
-        report = {
-            "accuracy": text_accuracy(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
-            "precision": text_precision(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
-            "recall": text_recall(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
-            "f1": text_f1(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
-            "levenstein_distances_stats": levenstein_distances_stats,
-            "levenstein_similarities_stats": levenstein_similarities_stats,
-            "iou_stats": iou_stats,
-            "edit_operations_stats": edit_operations_stats,
-        }
-    else:
-        report = {
-            "accuracy": None,
-            "precision": None,
-            "recall": None,
-            "f1": None,
-            "levenstein_distances_stats": {},
-            "levenstein_similarities_stats": {},
-            "iou_stats": {},
-            "edit_operations_stats": {key: {} for key in ["insertion", "deletion", "substitution"]},
-        }
-    return report
-def evaluate_by_word_groups(pred_df, target_df, pred_pref='Pred_', target_pref='Target_', **kwargs):
-    if not pred_df.empty and not target_df.empty:
-        show_hist = kwargs.get("show_hist", False)
-        text_pairs = word_or_symbol_group_pair_matching(df1=pred_df, df2=target_df, pref1=pred_pref, pref2=target_pref)
-        levenstein_similarities, levenstein_distances, edit_operations = levenstein_metrics(
-            df=text_pairs, pref_1=pred_pref, pref_2=target_pref
-        )
-        levenstein_similarities_stats = {
-            **levenstein_similarities.describe().to_dict(),
-            "values": levenstein_similarities.tolist()
-        }
-        levenstein_distances_stats = {
-            **levenstein_distances.describe().to_dict(),
-            "values": levenstein_distances.tolist()
-        }
-        iou_stats = {
-            **text_pairs.iou.describe().to_dict(),
-            "values": text_pairs.iou.tolist()
-        }
-        edit_operations_stats = {
-            operation_id: pd.Series(
-                edit_operations.apply(
-                    lambda x: [f"[{item[1]}]_[{item[2]}]" for item in x if item[0] == operation_id]
-                ).sum(axis=0)).value_counts().to_dict()
-            for operation_id in ["insertion", "deletion", "substitution"]
-        }
-        if show_hist is True:
-            pd.Series(levenstein_similarities).plot(kind='hist', bins=20, title="Levestein Similarities")
-            pd.Series(levenstein_distances).plot(kind='hist', bins=20, title="Levestein Distances")
-            for edit_operation_id, edit_operation_data in edit_operations_stats.items():
-                pd.Series(edit_operation_data).plot(kind='barh', title=f"{edit_operation_id.capitalize()} Stats")
-        report = {
-            "accuracy": text_accuracy(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
-            "precision": text_precision(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
-            "recall": text_recall(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
-            "f1": text_f1(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
-            "levenstein_distances_stats": levenstein_distances_stats,
-            "levenstein_similarities_stats": levenstein_similarities_stats,
-            "iou_stats": iou_stats,
-            "edit_operations_stats": edit_operations_stats,
-        }
-    else:
-        report = {
-            "accuracy": None,
-            "precision": None,
-            "recall": None,
-            "f1": None,
-            "levenstein_distances_stats": {},
-            "levenstein_similarities_stats": {},
-            "iou_stats": {},
-            "edit_operations_stats": {key: {} for key in ["insertion", "deletion", "substitution"]},
-        }
-    return report
-def reduce_word_evaluation_results(eval_results):
-    if eval_results:
-        accuracies = pd.Series([item['accuracy'] for item in eval_results])
-        precisions = pd.Series([item['precision'] for item in eval_results])
-        recalls = pd.Series([item['recall'] for item in eval_results])
-        f1s = pd.Series([item['f1'] for item in eval_results])
-        levenstein_similarities = pd.Series(
-            [
-                pd.Series(item['levenstein_similarities_stats'].get('values', [])).mean()
-                for item in eval_results
-            ]
-        )
-        levenstein_distances = pd.Series(
-            [
-                pd.Series(item['levenstein_distances_stats'].get('values', [])).mean()
-                for item in eval_results
-            ]
-        )
-        ious = pd.Series(
-            [
-                pd.Series(item['iou_stats'].get('values', [])).mean()
-                for item in eval_results
-            ]
-        )
-        levenstein_similarities_stats = {
-            **levenstein_similarities.describe().to_dict(),
-            "values": levenstein_similarities.tolist()
-        }
-        levenstein_distances_stats = {
-            **levenstein_distances.describe().to_dict(),
-            "values": levenstein_distances.tolist()
-        }
-        iou_stats = {
-            **ious.describe().to_dict(),
-            "values": ious.tolist()
-        }
-        edit_operations_stats = {}
-        for eval_result in eval_results:
-            for edit_operation, edit_operation_data in eval_result['edit_operations_stats'].items():
-                if edit_operation not in edit_operations_stats:
-                    edit_operations_stats[edit_operation] = {}
-                for key, count in edit_operation_data.items():
-                    edit_operations_stats[edit_operation][key] = edit_operations_stats[edit_operation].get(key,
-                                                                                                           0) + count
-        summary = {
-            "accuracy": {
-                "mean": accuracies.mean(),
-                "std": accuracies.std(),
-                "values": accuracies.tolist()
-            },
-            "precision": {
-                "mean": precisions.mean(),
-                "std": precisions.std(),
-                "values": precisions.tolist(),
-            },
-            "recall": {
-                "mean": recalls.mean(),
-                "std": recalls.std(),
-                "values": recalls.tolist(),
-            },
-            "f1": {
-                "mean": f1s.mean(),
-                "std": f1s.std(),
-                "values": f1s.tolist(),
-            },
-            "document_count": len(eval_results),
-            "levenstein_distances_stats": levenstein_distances_stats,
-            "levenstein_similarities_stats": levenstein_similarities_stats,
-            "iou_stats": iou_stats,
-            "edit_operations_stats": edit_operations_stats,
-        }
-    else:
-        summary = {
-            "accuracy": {},
-            "precision": {},
-            "recall": {},
-            "f1": {},
-            "document_count": 0,
-            "levenstein_distances_stats": {},
-            "levenstein_similarities_stats": {},
-            "iou_stats": {},
-            "edit_operations_stats": {key: {} for key in ["insertion", "deletion", "substitution"]},
-        }
-    return summary
-def evaluate_by_symbols(pred_df, target_df, pred_pref='Pred_', target_pref='Target_', **kwargs):
-    if not pred_df.empty and not target_df.empty:
-        show_hist = kwargs.get("show_hist", False)
-        text_pairs = word_or_symbol_pair_matching(df1=pred_df, df2=target_df, pref1=pred_pref, pref2=target_pref)
-        confusion_matrix, pair_counts = symbol_confusion_matrix(text_pairs, pref_1=pred_pref, pref_2=target_pref)
-        iou_stats = {
-            **text_pairs.iou.describe().to_dict(),
-            "values": text_pairs.iou.tolist()
-        }
-        if show_hist is True:
-            pd.Series(pair_counts).plot(kind='barh', title="Symbol Pair Counts")
-        report = {
-            "accuracy": text_accuracy(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
-            "precision": text_precision(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
-            "recall": text_recall(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
-            "f1": text_f1(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
-            "confusion_matrix": confusion_matrix,
-            "pair_counts": pair_counts,
-            "iou_stats": iou_stats,
-        }
-    else:
-        report = {
-            "accuracy": None,
-            "precision": None,
-            "recall": None,
-            "f1": None,
-            "confusion_matrix": pd.DataFrame(),
-            "pair_counts": pd.DataFrame(),
-            "iou_stats": {},
-        }
-    return report
-def reduce_pair_counts(pair_counts):
-    reduced_pair_counts_df = pd.DataFrame()
-    columns = []
-    if pair_counts:
-        pair_counts_dict = {}
-        for pair_count in pair_counts:
-            if not pair_count.empty:
-                pair_count_dict = pair_count.set_index(pair_count.columns[:-1].tolist(), drop=True).to_dict()[
-                    pair_count.columns[-1]]
-                columns = pair_count.columns.tolist()
-            else:
-                pair_count_dict = {}
-            for key, value in pair_count_dict.items():
-                pair_counts_dict[key] = pair_counts_dict.get(key, 0) + value
-        reduced_pair_counts_df = pd.Series(pair_counts_dict).to_frame().reset_index()
-        if columns:
-            reduced_pair_counts_df.columns = columns
-    return reduced_pair_counts_df
-def reduce_confusion_matrices(confusion_matrices):
-    reduced_confusion_matrices_df = pd.DataFrame()
-    if confusion_matrices:
-        all_index_values = set()
-        confusion_matrices_dict = {}
-        for confusion_matrix in confusion_matrices:
-            if not confusion_matrix.empty:
-                confusion_matrix_dict = {
-                    (index, column): confusion_matrix.loc[index, column]
-                    for index in confusion_matrix.index
-                    for column in confusion_matrix.columns
-                }
-            else:
-                confusion_matrix_dict = {}
-            for key, value in confusion_matrix_dict.items():
-                all_index_values.add(key[0])
-                all_index_values.add(key[1])
-                confusion_matrices_dict[key] = confusion_matrices_dict.get(key, 0) + value
-        all_index_values = list(sorted(list(all_index_values)))
-        reduced_confusion_matrices_df = pd.DataFrame(
-            [
-                [
-                    confusion_matrices_dict.get((index, column), 0)
-                    for column in all_index_values
-                ]
-                for index in all_index_values
-            ],
-            columns=all_index_values,
-            index=all_index_values,
-        )
-    return reduced_confusion_matrices_df
-def reduce_symbol_evaluation_results(eval_results):
-    """
-    all_symbols = list(sorted(set(df[f'{pref_1}text'].tolist() + df[f'{pref_2}text'].tolist())))
-    pair_value_counts = df[
-        [f'{pref_1}text', f'{pref_2}text']
-    ].value_counts()
-    pair_cnts = pair_value_counts.reset_index().rename({0: "count"}, axis=1).sort_values(
-        by=[f'{pref_1}text', f'{pref_2}text'], ascending=True)
-    pair_value_counts_dict = pair_value_counts.to_dict()
-    confusion_matrix = pd.DataFrame(
-        [
-            [pair_value_counts_dict.get((symbol1, symbol2), 0) for symbol2 in all_symbols]
-            for symbol1 in all_symbols
-        ],
-        columns=all_symbols,
-        index=all_symbols,
-    )
-    """
-    if eval_results:
-        accuracies = pd.Series([item['accuracy'] for item in eval_results])
-        precisions = pd.Series([item['precision'] for item in eval_results])
-        recalls = pd.Series([item['recall'] for item in eval_results])
-        f1s = pd.Series([item['f1'] for item in eval_results])
-        confusion_matrices = [item['confusion_matrix'] for item in eval_results]
-        pair_counts = [item['pair_counts'] for item in eval_results]
-        ious = pd.Series(
-            [
-                pd.Series(item['iou_stats'].get('values', [])).mean()
-                for item in eval_results
-            ]
-        )
-        iou_stats = {
-            **ious.describe().to_dict(),
-            "values": ious.tolist()
-        }
-        summary = {
-            "accuracy": {
-                "mean": accuracies.mean(),
-                "std": accuracies.std(),
-                "values": accuracies.tolist()
-            },
-            "precision": {
-                "mean": precisions.mean(),
-                "std": precisions.std(),
-                "values": precisions.tolist(),
-            },
-            "recall": {
-                "mean": recalls.mean(),
-                "std": recalls.std(),
-                "values": recalls.tolist(),
-            },
-            "f1": {
-                "mean": f1s.mean(),
-                "std": f1s.std(),
-                "values": f1s.tolist(),
-            },
-            "document_count": len(eval_results),
-            "pair_counts": reduce_pair_counts(pair_counts),
-            "confusion_matrix": reduce_confusion_matrices(confusion_matrices),
-            "iou_stats": iou_stats,
-        }
-    else:
-        summary = {
-            "accuracy": {},
-            "precision": {},
-            "recall": {},
-            "f1": {},
-            "document_count": 0,
-            "pair_counts": pd.DataFrame(),
-            "confusion_matrix": pd.DataFrame(),
-            "iou_stats": {},
-        }
-    return summary

iliauniiccocrevaluation.py CHANGED Viewed

@@ -17,8 +17,8 @@ import datasets
 import evaluate
 # TODO: Add BibTeX citation
-from evaluation.metrics import evaluate_by_words
-from ocr.fiftyone import FiftyOneOcr
 _CITATION = """\
 @InProceedings{huggingface:module,

 import evaluate
 # TODO: Add BibTeX citation
+from ocr_evaluation.evaliate.metrics import evaluate_by_words
+from ocr_evaluation.ocr.fiftyone import FiftyOneOcr
 _CITATION = """\
 @InProceedings{huggingface:module,

ocr/fiftyone.py DELETED Viewed

@@ -1,26 +0,0 @@
-import pandas as pd
-import numpy as np
-class FiftyOneOcr:
-    def __init__(self, data):
-        self.data = data
-    def get_word_annotations(self, convert_bbox: bool = True) -> pd.DataFrame:
-        """Returns dataframe of detections where each row represents independent word annotation
-        Args:
-            convert_bbox: FiftyOne bounding box type (x1, x2, dx, xy) to 2 point bounding box type (x1, y1, x2, y2)
-        """
-        annotations = self.data.get("detections", {}).get("detections", {})
-        annotations_df = pd.DataFrame(annotations)
-        # convert bounding box into 2 point values format
-        if convert_bbox:
-            bbox = np.array(annotations_df['bounding_box'].values.tolist())
-            bbox[:, 2:] += bbox[:, :2]
-            annotations_df['bounding_box'] = bbox.tolist()
-        return annotations_df

requirements.txt CHANGED Viewed

	@@ -1 +1,2 @@
1	- git+https://github.com/huggingface/evaluate@main


1	+ git+https://github.com/huggingface/evaluate@main
2	+ git+https://github.com/IliaUni-ICC/ocr_evaluation@main