average_precision / average_precision.py
chanelcolgate's picture
modified: average_precision.py
ce530c1
raw
history blame
6.73 kB
# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Average Precision"""
import evaluate
import datasets
from sklearn.metrics import average_precision_score
# TODO: Add BibTeX citation
_CITATION = """\
@InProceedings{huggingface:module,
title = {A great new module},
authors={chanelcolgate, Inc.},
year={2023}
}
"""
# TODO: Add description of the module here
_DESCRIPTION = """\
Average Precision
"""
# TODO: Add description of the arguments of the module here
_KWARGS_DESCRIPTION = """
Note: To be consistent with the `evaluate` input conventions the scikit-learn inputs are renamed:
- `y_true`: `references`
- `y_pred`: `prediction_scores`
Scikit-learn docstring:
Average precision score.
Compute average precision (AP) from prediction scores.
AP summarizes a precision-recall curve as the weighted mean of precisions
achieved at each threshold, with the increase in recall from the previous
threshold used as the weight:
.. math::
\\text{AP} = \\sum_n (R_n - R_{n-1}) P_n
where :math:`P_n` and :math:`R_n` are the precision and recall at the nth
threshold [1]_. This implementation is not interpolated and is different
from computing the area under the precision-recall curve with the
trapezoidal rule, which uses linear interpolation and can be too optimistic.
Note: this implementation is restricted to the binary classification task or
multilabel classification task.
Read more in the :ref:`User Guide <precision_recall_f_measure_metrics`.
Args:
y_true: ndarray of shape (n_samples,) or (n_samples, n_classes)
True binary labels or binary label indicators.
y_score: ndarray of shape (n_samples,) or (n_samples, n_classes)
Target scores, can either be probability estimates of the positive
class, confidence values, or non-thresholded measure of decisions
(as returned by :term:`decision_function` on some classifiers).
average: {'micro', 'samples', 'weighted', 'macro'} or None, \
default='macro'
If ``None``, the scores for each class are retruned. Otherwise,
this determines the type of averaging performed on the data:
``'micro'``:
Calculate metrics globally be considering each element of the label
indicator matrix as a label.
``'macro'``:
Calculate metrics for each label, and find their unweighted
mean. This does not take label imbalance into account.
``'weighted'``:
Calculate metrics for each label, and find their average, weighted
by support (the number of true instances for each label).
``'samples'``:
Calculate metrics for each instance, and find their average.
Will be ignored when ``y_true`` is binary.
pos_label: int or str, default=1
The label of the positive class. Only applied to binary ``y_true``.
For multilabel-indicator ``y_true``, ``pos_label`` is fixed to 1.
sample_weight: array_like of shape (n_samples,), default=None
Sample weights.
Returns:
accuracy: description of the first score,
another_score: description of the second score,
average_precision: float
Average precision score.
See Also
roc_auc_score: Compute the area under the ROC curve.
precision_recall_curve: Compute precision-recall pairs for different
probability thresholds.
Examples:
Examples should be written in doctest format, and should illustrate how
to use the function.
>>> import numpy as np
>>> from sklearn.metrics import average_precision_score
>>> y_true = np.array([0, 0, 1, 1])
>>> y_scores = np.array([0.1, 0.4, 0.35, 0.8])
>>> average_precision_score(y_true, y_scores)
0.8333333333333333
"""
@evaluate.utils.file_utils.add_start_docstrings(
_DESCRIPTION, _KWARGS_DESCRIPTION
)
class AveragePrecision(evaluate.Metric):
"""TODO: Short description of my evaluation module."""
def _info(self):
# TODO: Specifies the evaluate.EvaluationModuleInfo object
return evaluate.MetricInfo(
# This is the description that will appear on the modules page.
module_type="metric",
description=_DESCRIPTION,
citation=_CITATION,
inputs_description=_KWARGS_DESCRIPTION,
# This defines the format of each prediction and reference
features=[
datasets.Features(
{
"references": datasets.Value("int64"),
"prediction_scores": datasets.Value("float"),
}
),
datasets.Features(
{
"references": datasets.Sequence(
datasets.Value("int64")
),
"prediction_scores": datasets.Sequence(
datasets.Value("float")
),
}
),
],
# Homepage of the module for documentation
homepage="https://scikit-learn.org/stable/modules/generated/sklearn.metrics.average_precision_score.html",
# Additional links to the codebase or references
codebase_urls=["https://github.com/scikit-learn/scikit-learn"],
reference_urls=["https://scikit-learn.org/stable/index.html"],
)
def _download_and_prepare(self, dl_manager):
"""Optional: download external resources useful to compute the scores"""
# TODO: Download external resources if needed
pass
def _compute(
self,
references,
prediction_scores,
average="macro",
pos_label=1,
sample_weight=None,
):
"""Returns the scores"""
# TODO: Compute the different scores of the module
return {
"average_precision_score": average_precision_score(
y_true=references,
y_score=prediction_scores,
average=average,
pos_label=pos_label,
sample_weight=sample_weight,
)
}