Spaces:
Running
Running
Yeshwant123
commited on
Commit
·
835911f
1
Parent(s):
758b953
Updating the mcc.py file with computation of mcc
Browse files
mcc.py
CHANGED
@@ -11,16 +11,18 @@
|
|
11 |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
# See the License for the specific language governing permissions and
|
13 |
# limitations under the License.
|
14 |
-
"""TODO:
|
15 |
|
16 |
import evaluate
|
17 |
import datasets
|
|
|
|
|
18 |
|
19 |
|
20 |
# TODO: Add BibTeX citation
|
21 |
_CITATION = """\
|
22 |
@InProceedings{huggingface:module,
|
23 |
-
title = {
|
24 |
authors={huggingface, Inc.},
|
25 |
year={2020}
|
26 |
}
|
@@ -28,7 +30,9 @@ year={2020}
|
|
28 |
|
29 |
# TODO: Add description of the module here
|
30 |
_DESCRIPTION = """\
|
31 |
-
|
|
|
|
|
32 |
"""
|
33 |
|
34 |
|
@@ -36,60 +40,50 @@ This new module is designed to solve this great ML task and is crafted with a lo
|
|
36 |
_KWARGS_DESCRIPTION = """
|
37 |
Calculates how good are predictions given some references, using certain scores
|
38 |
Args:
|
39 |
-
predictions
|
40 |
-
|
41 |
-
references: list of reference for each prediction. Each
|
42 |
-
reference should be a string with tokens separated by spaces.
|
43 |
Returns:
|
44 |
-
|
45 |
-
another_score: description of the second score,
|
46 |
Examples:
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
|
|
|
|
|
|
54 |
"""
|
55 |
|
56 |
# TODO: Define external resources urls if needed
|
57 |
-
BAD_WORDS_URL = "
|
58 |
|
59 |
|
60 |
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
|
61 |
class MCC(evaluate.Metric):
|
62 |
-
"""
|
63 |
|
64 |
def _info(self):
|
65 |
-
# TODO: Specifies the evaluate.EvaluationModuleInfo object
|
66 |
return evaluate.MetricInfo(
|
67 |
-
# This is the description that will appear on the modules page.
|
68 |
module_type="metric",
|
69 |
description=_DESCRIPTION,
|
70 |
citation=_CITATION,
|
71 |
inputs_description=_KWARGS_DESCRIPTION,
|
72 |
-
# This defines the format of each prediction and reference
|
73 |
features=datasets.Features({
|
74 |
'predictions': datasets.Value('int64'),
|
75 |
'references': datasets.Value('int64'),
|
76 |
}),
|
77 |
# Homepage of the module for documentation
|
78 |
-
homepage="
|
79 |
# Additional links to the codebase or references
|
80 |
-
codebase_urls=[
|
81 |
-
reference_urls=[
|
82 |
)
|
83 |
|
84 |
-
def _download_and_prepare(self, dl_manager):
|
85 |
-
"""Optional: download external resources useful to compute the scores"""
|
86 |
-
# TODO: Download external resources if needed
|
87 |
-
pass
|
88 |
-
|
89 |
def _compute(self, predictions, references):
|
90 |
-
"""Returns the scores"""
|
91 |
-
#
|
92 |
-
|
93 |
-
return {
|
94 |
-
"accuracy": accuracy,
|
95 |
-
}
|
|
|
11 |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
# See the License for the specific language governing permissions and
|
13 |
# limitations under the License.
|
14 |
+
"""TODO: MCC is a correlation coefficient between the observed and predicted binary classifications, and takes into account true and false positives and negatives."""
|
15 |
|
16 |
import evaluate
|
17 |
import datasets
|
18 |
+
from sklearn.metrics import matthews_corrcoef
|
19 |
+
|
20 |
|
21 |
|
22 |
# TODO: Add BibTeX citation
|
23 |
_CITATION = """\
|
24 |
@InProceedings{huggingface:module,
|
25 |
+
title = {MCC Metric},
|
26 |
authors={huggingface, Inc.},
|
27 |
year={2020}
|
28 |
}
|
|
|
30 |
|
31 |
# TODO: Add description of the module here
|
32 |
_DESCRIPTION = """\
|
33 |
+
MCC (Matthews Correlation Coefficient) is a correlation coefficient between the observed and predicted binary classifications, and takes into account true and false positives and negatives. It can be computed with the equation:
|
34 |
+
MCC = (TP * TN - FP * FN) / sqrt((TP+FP) * (TP+FN) * (TN+FP) * (TN+FN))
|
35 |
+
Where TP is the true positives, TN is the true negatives, FP is the false positives, and FN is the false negatives.
|
36 |
"""
|
37 |
|
38 |
|
|
|
40 |
_KWARGS_DESCRIPTION = """
|
41 |
Calculates how good are predictions given some references, using certain scores
|
42 |
Args:
|
43 |
+
- **predictions** (`list` of `int`): The predicted labels.
|
44 |
+
- **references** (`list` of `int`): The ground truth labels.
|
|
|
|
|
45 |
Returns:
|
46 |
+
- **mcc** (`float`): The MCC score. Minimum possible value is -1. Maximum possible value is 1. A higher MCC means that the predicted and observed binary classifications agree better, while a negative MCC means that they agree worse than chance.
|
|
|
47 |
Examples:
|
48 |
+
Example 1-A simple example with some errors
|
49 |
+
>>> mcc_metric = evaluate.load('mcc')
|
50 |
+
>>> results = mcc_metric.compute(references=[0, 0, 1, 1, 1], predictions=[0, 1, 0, 1, 1])
|
51 |
+
>>> print(results)
|
52 |
+
{'mcc': 0.16666666666666666}
|
53 |
+
Example 2-The same example as Example 1, but with some different labels
|
54 |
+
>>> mcc_metric = evaluate.load('mcc')
|
55 |
+
>>> results = mcc_metric.compute(references=[0, 1, 2, 2, 2], predictions=[0, 2, 2, 1, 2])
|
56 |
+
>>> print(results)
|
57 |
+
{'mcc': 0.2041241452319315}
|
58 |
"""
|
59 |
|
60 |
# TODO: Define external resources urls if needed
|
61 |
+
BAD_WORDS_URL = "https://scikit-learn.org/stable/modules/generated/sklearn.metrics.matthews_corrcoef.html"
|
62 |
|
63 |
|
64 |
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
|
65 |
class MCC(evaluate.Metric):
|
66 |
+
"""Compute MCC Scores"""
|
67 |
|
68 |
def _info(self):
|
|
|
69 |
return evaluate.MetricInfo(
|
|
|
70 |
module_type="metric",
|
71 |
description=_DESCRIPTION,
|
72 |
citation=_CITATION,
|
73 |
inputs_description=_KWARGS_DESCRIPTION,
|
|
|
74 |
features=datasets.Features({
|
75 |
'predictions': datasets.Value('int64'),
|
76 |
'references': datasets.Value('int64'),
|
77 |
}),
|
78 |
# Homepage of the module for documentation
|
79 |
+
homepage="https://huggingface.co/evaluate-metric?message=Request%20sent",
|
80 |
# Additional links to the codebase or references
|
81 |
+
codebase_urls=[],
|
82 |
+
reference_urls=[]
|
83 |
)
|
84 |
|
|
|
|
|
|
|
|
|
|
|
85 |
def _compute(self, predictions, references):
|
86 |
+
"""Returns the mcc scores"""
|
87 |
+
# Computes the MCC score using matthews_corrcoef from sklearn
|
88 |
+
|
89 |
+
return {"mcc": matthews_corrcoef(references, predictions)}
|
|
|
|