{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 29, "content": "[PAD]", "single_word": false, "lstrip": true, "rstrip": true, "normalized": false, "special": true }, { "id": 28, "content": "[UNK]", "single_word": false, "lstrip": true, "rstrip": true, "normalized": false, "special": true } ], "normalizer": { "type": "Replace", "pattern": { "String": " " }, "content": "|" }, "pre_tokenizer": { "type": "Split", "pattern": { "Regex": "" }, "behavior": "Isolated", "invert": false }, "post_processor": null, "decoder": { "type": "CTC", "pad_token": "[PAD]", "word_delimiter_token": "|", "cleanup": true }, "model": { "vocab": { "'": 1, "[PAD]": 29, "[UNK]": 28, "a": 2, "b": 3, "c": 4, "d": 5, "e": 6, "f": 7, "g": 8, "h": 9, "i": 10, "j": 11, "k": 12, "l": 13, "m": 14, "n": 15, "o": 16, "p": 17, "q": 18, "r": 19, "s": 20, "t": 21, "u": 22, "v": 23, "w": 24, "x": 25, "y": 26, "z": 27, "|": 0 } } }