Spaces:
Sleeping
Sleeping
Update README.md
Browse files
README.md
CHANGED
@@ -43,7 +43,7 @@ The Code Eval metric calculates how good are predictions given a set of referenc
|
|
43 |
|
44 |
`predictions`: a list of candidates to evaluate. Each candidate should be a list of strings with several code candidates to solve the problem.
|
45 |
|
46 |
-
`references`: a list of
|
47 |
|
48 |
`k`: number of code candidates to consider in the evaluation. The default value is `[1, 10, 100]`.
|
49 |
|
@@ -54,7 +54,7 @@ The Code Eval metric calculates how good are predictions given a set of referenc
|
|
54 |
```python
|
55 |
from evaluate import load
|
56 |
code_eval_stdio = load("hage2000/code_eval_stdio")
|
57 |
-
references = [
|
58 |
candidates = [[ "nums = list(map(int, input().split()))\nprint(sum(nums))"]]
|
59 |
pass_at_k, results = code_eval_stdio.compute(references=references, predictions=candidates, k=[1, 2])
|
60 |
```
|
@@ -86,7 +86,7 @@ Full match at `k=1`:
|
|
86 |
```python
|
87 |
from evaluate import load
|
88 |
code_eval_stdio = load("hage2000/code_eval_stdio")
|
89 |
-
references = [
|
90 |
candidates = [[ "nums = list(map(int, input().split()))\nprint(sum(nums))"]]
|
91 |
pass_at_k, results = code_eval_stdio.compute(references=references, predictions=candidates, k=[1, 2])
|
92 |
print(pass_at_k)
|
@@ -98,7 +98,7 @@ No match for k = 1:
|
|
98 |
```python
|
99 |
from evaluate import load
|
100 |
code_eval_stdio = load("hage2000/code_eval_stdio")
|
101 |
-
references = [
|
102 |
candidates = [[ "nums = list(map(int, input().split()))\nprint(nums[0]*nums[1])"]]
|
103 |
pass_at_k, results = code_eval_stdio.compute(references=references, predictions=candidates, k=[1, 2])
|
104 |
print(pass_at_k)
|
@@ -110,7 +110,7 @@ Partial match at k=1, full match at k=2:
|
|
110 |
```python
|
111 |
from evaluate import load
|
112 |
code_eval_stdio = load("hage2000/code_eval_stdio")
|
113 |
-
references = [
|
114 |
candidates = [[ "nums = list(map(int, input().split()))\nprint(sum(nums))", "nums = list(map(int, input().split()))\nprint(nums[0]*nums[1])"]]
|
115 |
pass_at_k, results = code_eval_stdio.compute(references=references, predictions=candidates, k=[1, 2])
|
116 |
print(pass_at_k)
|
|
|
43 |
|
44 |
`predictions`: a list of candidates to evaluate. Each candidate should be a list of strings with several code candidates to solve the problem.
|
45 |
|
46 |
+
`references`: a list of Dict of [str, str], each dict has two keys "input" and "reference_output". e.g. [{"input": "1 2", "reference_output": "3"}]
|
47 |
|
48 |
`k`: number of code candidates to consider in the evaluation. The default value is `[1, 10, 100]`.
|
49 |
|
|
|
54 |
```python
|
55 |
from evaluate import load
|
56 |
code_eval_stdio = load("hage2000/code_eval_stdio")
|
57 |
+
references = [{"input":"2 3", "reference_output":"5})]
|
58 |
candidates = [[ "nums = list(map(int, input().split()))\nprint(sum(nums))"]]
|
59 |
pass_at_k, results = code_eval_stdio.compute(references=references, predictions=candidates, k=[1, 2])
|
60 |
```
|
|
|
86 |
```python
|
87 |
from evaluate import load
|
88 |
code_eval_stdio = load("hage2000/code_eval_stdio")
|
89 |
+
references = [{"input":"2 3", "reference_output":"5"}]
|
90 |
candidates = [[ "nums = list(map(int, input().split()))\nprint(sum(nums))"]]
|
91 |
pass_at_k, results = code_eval_stdio.compute(references=references, predictions=candidates, k=[1, 2])
|
92 |
print(pass_at_k)
|
|
|
98 |
```python
|
99 |
from evaluate import load
|
100 |
code_eval_stdio = load("hage2000/code_eval_stdio")
|
101 |
+
references = [{"input":"2 3", "reference_output":"5"}]
|
102 |
candidates = [[ "nums = list(map(int, input().split()))\nprint(nums[0]*nums[1])"]]
|
103 |
pass_at_k, results = code_eval_stdio.compute(references=references, predictions=candidates, k=[1, 2])
|
104 |
print(pass_at_k)
|
|
|
110 |
```python
|
111 |
from evaluate import load
|
112 |
code_eval_stdio = load("hage2000/code_eval_stdio")
|
113 |
+
references = [{"input":, "reference_output":"5"}]
|
114 |
candidates = [[ "nums = list(map(int, input().split()))\nprint(sum(nums))", "nums = list(map(int, input().split()))\nprint(nums[0]*nums[1])"]]
|
115 |
pass_at_k, results = code_eval_stdio.compute(references=references, predictions=candidates, k=[1, 2])
|
116 |
print(pass_at_k)
|