Yeshwant123 commited on
Commit
835911f
·
1 Parent(s): 758b953

Updating the mcc.py file with computation of mcc

Browse files
Files changed (1) hide show
  1. mcc.py +29 -35
mcc.py CHANGED
@@ -11,16 +11,18 @@
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
  # See the License for the specific language governing permissions and
13
  # limitations under the License.
14
- """TODO: Add a description here."""
15
 
16
  import evaluate
17
  import datasets
 
 
18
 
19
 
20
  # TODO: Add BibTeX citation
21
  _CITATION = """\
22
  @InProceedings{huggingface:module,
23
- title = {A great new module},
24
  authors={huggingface, Inc.},
25
  year={2020}
26
  }
@@ -28,7 +30,9 @@ year={2020}
28
 
29
  # TODO: Add description of the module here
30
  _DESCRIPTION = """\
31
- This new module is designed to solve this great ML task and is crafted with a lot of care.
 
 
32
  """
33
 
34
 
@@ -36,60 +40,50 @@ This new module is designed to solve this great ML task and is crafted with a lo
36
  _KWARGS_DESCRIPTION = """
37
  Calculates how good are predictions given some references, using certain scores
38
  Args:
39
- predictions: list of predictions to score. Each predictions
40
- should be a string with tokens separated by spaces.
41
- references: list of reference for each prediction. Each
42
- reference should be a string with tokens separated by spaces.
43
  Returns:
44
- accuracy: description of the first score,
45
- another_score: description of the second score,
46
  Examples:
47
- Examples should be written in doctest format, and should illustrate how
48
- to use the function.
49
-
50
- >>> my_new_module = evaluate.load("my_new_module")
51
- >>> results = my_new_module.compute(references=[0, 1], predictions=[0, 1])
52
- >>> print(results)
53
- {'accuracy': 1.0}
 
 
 
54
  """
55
 
56
  # TODO: Define external resources urls if needed
57
- BAD_WORDS_URL = "http://url/to/external/resource/bad_words.txt"
58
 
59
 
60
  @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
61
  class MCC(evaluate.Metric):
62
- """TODO: Short description of my evaluation module."""
63
 
64
  def _info(self):
65
- # TODO: Specifies the evaluate.EvaluationModuleInfo object
66
  return evaluate.MetricInfo(
67
- # This is the description that will appear on the modules page.
68
  module_type="metric",
69
  description=_DESCRIPTION,
70
  citation=_CITATION,
71
  inputs_description=_KWARGS_DESCRIPTION,
72
- # This defines the format of each prediction and reference
73
  features=datasets.Features({
74
  'predictions': datasets.Value('int64'),
75
  'references': datasets.Value('int64'),
76
  }),
77
  # Homepage of the module for documentation
78
- homepage="http://module.homepage",
79
  # Additional links to the codebase or references
80
- codebase_urls=["http://github.com/path/to/codebase/of/new_module"],
81
- reference_urls=["http://path.to.reference.url/new_module"]
82
  )
83
 
84
- def _download_and_prepare(self, dl_manager):
85
- """Optional: download external resources useful to compute the scores"""
86
- # TODO: Download external resources if needed
87
- pass
88
-
89
  def _compute(self, predictions, references):
90
- """Returns the scores"""
91
- # TODO: Compute the different scores of the module
92
- accuracy = sum(i == j for i, j in zip(predictions, references)) / len(predictions)
93
- return {
94
- "accuracy": accuracy,
95
- }
 
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
  # See the License for the specific language governing permissions and
13
  # limitations under the License.
14
+ """TODO: MCC is a correlation coefficient between the observed and predicted binary classifications, and takes into account true and false positives and negatives."""
15
 
16
  import evaluate
17
  import datasets
18
+ from sklearn.metrics import matthews_corrcoef
19
+
20
 
21
 
22
  # TODO: Add BibTeX citation
23
  _CITATION = """\
24
  @InProceedings{huggingface:module,
25
+ title = {MCC Metric},
26
  authors={huggingface, Inc.},
27
  year={2020}
28
  }
 
30
 
31
  # TODO: Add description of the module here
32
  _DESCRIPTION = """\
33
+ MCC (Matthews Correlation Coefficient) is a correlation coefficient between the observed and predicted binary classifications, and takes into account true and false positives and negatives. It can be computed with the equation:
34
+ MCC = (TP * TN - FP * FN) / sqrt((TP+FP) * (TP+FN) * (TN+FP) * (TN+FN))
35
+ Where TP is the true positives, TN is the true negatives, FP is the false positives, and FN is the false negatives.
36
  """
37
 
38
 
 
40
  _KWARGS_DESCRIPTION = """
41
  Calculates how good are predictions given some references, using certain scores
42
  Args:
43
+ - **predictions** (`list` of `int`): The predicted labels.
44
+ - **references** (`list` of `int`): The ground truth labels.
 
 
45
  Returns:
46
+ - **mcc** (`float`): The MCC score. Minimum possible value is -1. Maximum possible value is 1. A higher MCC means that the predicted and observed binary classifications agree better, while a negative MCC means that they agree worse than chance.
 
47
  Examples:
48
+ Example 1-A simple example with some errors
49
+ >>> mcc_metric = evaluate.load('mcc')
50
+ >>> results = mcc_metric.compute(references=[0, 0, 1, 1, 1], predictions=[0, 1, 0, 1, 1])
51
+ >>> print(results)
52
+ {'mcc': 0.16666666666666666}
53
+ Example 2-The same example as Example 1, but with some different labels
54
+ >>> mcc_metric = evaluate.load('mcc')
55
+ >>> results = mcc_metric.compute(references=[0, 1, 2, 2, 2], predictions=[0, 2, 2, 1, 2])
56
+ >>> print(results)
57
+ {'mcc': 0.2041241452319315}
58
  """
59
 
60
  # TODO: Define external resources urls if needed
61
+ BAD_WORDS_URL = "https://scikit-learn.org/stable/modules/generated/sklearn.metrics.matthews_corrcoef.html"
62
 
63
 
64
  @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
65
  class MCC(evaluate.Metric):
66
+ """Compute MCC Scores"""
67
 
68
  def _info(self):
 
69
  return evaluate.MetricInfo(
 
70
  module_type="metric",
71
  description=_DESCRIPTION,
72
  citation=_CITATION,
73
  inputs_description=_KWARGS_DESCRIPTION,
 
74
  features=datasets.Features({
75
  'predictions': datasets.Value('int64'),
76
  'references': datasets.Value('int64'),
77
  }),
78
  # Homepage of the module for documentation
79
+ homepage="https://huggingface.co/evaluate-metric?message=Request%20sent",
80
  # Additional links to the codebase or references
81
+ codebase_urls=[],
82
+ reference_urls=[]
83
  )
84
 
 
 
 
 
 
85
  def _compute(self, predictions, references):
86
+ """Returns the mcc scores"""
87
+ # Computes the MCC score using matthews_corrcoef from sklearn
88
+
89
+ return {"mcc": matthews_corrcoef(references, predictions)}