{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "[UNK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "[PAD]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "[BOS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "[EOS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "[CLS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 5, "content": "[SEP]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 6, "content": "[MASK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "Sequence", "normalizers": [ { "type": "NFD" }, { "type": "StripAccents" } ] }, "pre_tokenizer": { "type": "WhitespaceSplit" }, "post_processor": null, "decoder": null, "model": { "type": "WordLevel", "vocab": { "[UNK]": 0, "[PAD]": 1, "[BOS]": 2, "[EOS]": 3, "[CLS]": 4, "[SEP]": 5, "[MASK]": 6, "AAA": 7, "AAT": 8, "AAG": 9, "AAC": 10, "ATA": 11, "ATT": 12, "ATG": 13, "ATC": 14, "AGA": 15, "AGT": 16, "AGG": 17, "AGC": 18, "ACA": 19, "ACT": 20, "ACG": 21, "ACC": 22, "TAA": 23, "TAT": 24, "TAG": 25, "TAC": 26, "TTA": 27, "TTT": 28, "TTG": 29, "TTC": 30, "TGA": 31, "TGT": 32, "TGG": 33, "TGC": 34, "TCA": 35, "TCT": 36, "TCG": 37, "TCC": 38, "GAA": 39, "GAT": 40, "GAG": 41, "GAC": 42, "GTA": 43, "GTT": 44, "GTG": 45, "GTC": 46, "GGA": 47, "GGT": 48, "GGG": 49, "GGC": 50, "GCA": 51, "GCT": 52, "GCG": 53, "GCC": 54, "CAA": 55, "CAT": 56, "CAG": 57, "CAC": 58, "CTA": 59, "CTT": 60, "CTG": 61, "CTC": 62, "CGA": 63, "CGT": 64, "CGG": 65, "CGC": 66, "CCA": 67, "CCT": 68, "CCG": 69, "CCC": 70 }, "unk_token": "[UNK]" } }