ai-voice-cloning-mrq/models/tokenizers/spanish_bpe_tokenizer.json
2024-03-29 00:39:29 -07:00

583 lines
No EOL
13 KiB
JSON

{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "[STOP]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "[UNK]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 2,
"content": "[SPACE]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 3,
"content": "0",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 4,
"content": "1",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 5,
"content": "2",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 6,
"content": "3",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 7,
"content": "4",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 8,
"content": "5",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 9,
"content": "6",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 10,
"content": "7",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 11,
"content": "8",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 12,
"content": "9",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
"pre_tokenizer": {
"type": "Whitespace"
},
"post_processor": null,
"decoder": null,
"model": {
"type": "BPE",
"dropout": null,
"unk_token": "[UNK]",
"continuing_subword_prefix": null,
"end_of_word_suffix": null,
"fuse_unk": false,
"byte_fallback": false,
"vocab": {
"[STOP]": 0,
"[UNK]": 1,
"[SPACE]": 2,
"0": 3,
"1": 4,
"2": 5,
"3": 6,
"4": 7,
"5": 8,
"6": 9,
"7": 10,
"8": 11,
"9": 12,
"!": 13,
"\"": 14,
"%": 15,
"'": 16,
",": 17,
"-": 18,
".": 19,
"?": 20,
"a": 21,
"b": 22,
"c": 23,
"d": 24,
"e": 25,
"f": 26,
"g": 27,
"h": 28,
"i": 29,
"j": 30,
"k": 31,
"l": 32,
"m": 33,
"n": 34,
"o": 35,
"p": 36,
"q": 37,
"r": 38,
"s": 39,
"t": 40,
"u": 41,
"v": 42,
"w": 43,
"x": 44,
"y": 45,
"z": 46,
"¡": 47,
"ª": 48,
"°": 49,
"º": 50,
"»": 51,
"¿": 52,
"à": 53,
"á": 54,
"â": 55,
"å": 56,
"ç": 57,
"è": 58,
"é": 59,
"ê": 60,
"ë": 61,
"í": 62,
"ñ": 63,
"ó": 64,
"ô": 65,
"ö": 66,
"ú": 67,
"û": 68,
"ü": 69,
"œ": 70,
"—": 71,
"…": 72,
"de": 73,
"en": 74,
"os": 75,
"es": 76,
"la": 77,
"er": 78,
"qu": 79,
"an": 80,
"ar": 81,
"on": 82,
"as": 83,
"or": 84,
"que": 85,
"el": 86,
"do": 87,
"al": 88,
"ci": 89,
"re": 90,
"in": 91,
"un": 92,
"ab": 93,
"to": 94,
"te": 95,
"se": 96,
"di": 97,
"tr": 98,
"con": 99,
"ad": 100,
"su": 101,
"los": 102,
"le": 103,
"ta": 104,
"co": 105,
"ti": 106,
"mi": 107,
"no": 108,
"lo": 109,
"cu": 110,
"ía": 111,
"me": 112,
"ri": 113,
"por": 114,
"vi": 115,
"si": 116,
"ch": 117,
"ca": 118,
"mo": 119,
"ra": 120,
"par": 121,
"las": 122,
"ro": 123,
"th": 124,
"da": 125,
"pu": 126,
"per": 127,
"ón": 128,
"ent": 129,
"des": 130,
"li": 131,
"so": 132,
"ma": 133,
"est": 134,
"del": 135,
"aba": 136,
"gu": 137,
"men": 138,
"mp": 139,
"is": 140,
"mb": 141,
"ha": 142,
"mu": 143,
"tu": 144,
"una": 145,
"era": 146,
"hab": 147,
"go": 148,
"res": 149,
"dos": 150,
"man": 151,
"ás": 152,
"para": 153,
"po": 154,
"ve": 155,
"tra": 156,
"pa": 157,
"vo": 158,
"mos": 159,
"the": 160,
"ten": 161,
"ando": 162,
"pi": 163,
"fu": 164,
"ada": 165,
"jo": 166,
"ce": 167,
"ver": 168,
"bi": 169,
"qui": 170,
"como": 171,
"tan": 172,
"us": 173,
"más": 174,
"pe": 175,
"dad": 176,
"ción": 177,
"ho": 178,
"hi": 179,
"car": 180,
"ter": 181,
"án": 182,
"cer": 183,
"cia": 184,
"sus": 185,
"cas": 186,
"he": 187,
"ado": 188,
"fi": 189,
"yo": 190,
"gr": 191,
"mente": 192,
"ba": 193,
"sa": 194,
"ni": 195,
"ser": 196,
"tro": 197,
"cor": 198,
"mar": 199,
"pro": 200,
"tar": 201,
"lla": 202,
"lu": 203,
"za": 204,
"ces": 205,
"les": 206,
"sin": 207,
"ex": 208,
"pero": 209,
"and": 210,
"mis": 211,
"ven": 212,
"cho": 213,
"tes": 214,
"ya": 215,
"of": 216,
"cos": 217,
"ia": 218,
"du": 219,
"lan": 220,
"señ": 221,
"esta": 222,
"ga": 223,
"va": 224,
"ir": 225,
"ing": 226,
"endo": 227,
"bre": 228,
"min": 229,
"aque": 230,
"na": 231,
"lle": 232,
"bu": 233,
"pas": 234,
"tos": 235,
"io": 236,
"mas": 237,
"for": 238,
"emp": 239,
"ente": 240,
"és": 241,
"había": 242,
"ora": 243,
"ían": 244,
"ones": 245,
"je": 246,
"tor": 247,
"cab": 248,
"pre": 249,
"all": 250,
"nu": 251,
"én": 252,
"eron": 253,
"él": 254,
"ero": 255
},
"merges": [
"d e",
"e n",
"o s",
"e s",
"l a",
"e r",
"q u",
"a n",
"a r",
"o n",
"a s",
"o r",
"qu e",
"e l",
"d o",
"a l",
"c i",
"r e",
"i n",
"u n",
"a b",
"t o",
"t e",
"s e",
"d i",
"t r",
"c on",
"a d",
"s u",
"l os",
"l e",
"t a",
"c o",
"t i",
"m i",
"n o",
"l o",
"c u",
"í a",
"m e",
"r i",
"p or",
"v i",
"s i",
"c h",
"c a",
"m o",
"r a",
"p ar",
"la s",
"r o",
"t h",
"d a",
"p u",
"p er",
"ó n",
"en t",
"de s",
"l i",
"s o",
"m a",
"es t",
"de l",
"ab a",
"g u",
"m en",
"m p",
"i s",
"m b",
"h a",
"m u",
"t u",
"un a",
"er a",
"h ab",
"g o",
"r es",
"d os",
"m an",
"á s",
"par a",
"p o",
"v e",
"tr a",
"p a",
"v o",
"m os",
"th e",
"t en",
"an do",
"p i",
"f u",
"ad a",
"j o",
"c e",
"v er",
"b i",
"qu i",
"co mo",
"t an",
"u s",
"m ás",
"p e",
"d ad",
"ci ón",
"h o",
"h i",
"c ar",
"t er",
"á n",
"c er",
"ci a",
"su s",
"c as",
"h e",
"a do",
"f i",
"y o",
"g r",
"men te",
"b a",
"s a",
"n i",
"s er",
"tr o",
"c or",
"m ar",
"p ro",
"t ar",
"l la",
"l u",
"z a",
"c es",
"l es",
"s in",
"e x",
"per o",
"an d",
"mi s",
"v en",
"ch o",
"t es",
"y a",
"o f",
"c os",
"i a",
"d u",
"la n",
"se ñ",
"es ta",
"g a",
"v a",
"i r",
"in g",
"en do",
"b re",
"m in",
"a que",
"n a",
"l le",
"b u",
"p as",
"t os",
"i o",
"m as",
"f or",
"e mp",
"en te",
"é s",
"hab ía",
"or a",
"í an",
"on es",
"j e",
"t or",
"c ab",
"p re",
"al l",
"n u",
"é n",
"er on",
"é l",
"er o"
],
"language": "es"
}
}