alea-institute
commited on
Upload tokenizer
Browse files- tokenizer.json +2 -2
- tokenizer_config.json +4 -3
tokenizer.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2fd86e9e674651b16da995094a1ca1223efa763d222d3dc86f802ae1523c2414
|
3 |
+
size 11977933
|
tokenizer_config.json
CHANGED
@@ -34815,13 +34815,14 @@
|
|
34815 |
"cls_token": "<|cls|>",
|
34816 |
"do_lower_case": true,
|
34817 |
"eos_token": "<|end|>",
|
34818 |
-
"errors": "replace",
|
34819 |
"mask_token": "<|mask|>",
|
|
|
34820 |
"model_max_length": 1000000000000000019884624838656,
|
|
|
34821 |
"pad_token": "<|pad|>",
|
|
|
34822 |
"padding_side": "right",
|
34823 |
"sep_token": "<|sep|>",
|
34824 |
-
"tokenizer_class": "
|
34825 |
-
"trim_offsets": true,
|
34826 |
"unk_token": "<|unk|>"
|
34827 |
}
|
|
|
34815 |
"cls_token": "<|cls|>",
|
34816 |
"do_lower_case": true,
|
34817 |
"eos_token": "<|end|>",
|
|
|
34818 |
"mask_token": "<|mask|>",
|
34819 |
+
"max_length": null,
|
34820 |
"model_max_length": 1000000000000000019884624838656,
|
34821 |
+
"pad_to_multiple_of": null,
|
34822 |
"pad_token": "<|pad|>",
|
34823 |
+
"pad_token_type_id": 0,
|
34824 |
"padding_side": "right",
|
34825 |
"sep_token": "<|sep|>",
|
34826 |
+
"tokenizer_class": "PreTrainedTokenizerFast",
|
|
|
34827 |
"unk_token": "<|unk|>"
|
34828 |
}
|