Spaces:
Running
Running
p
commited on
Commit
ยท
c9574d9
0
Parent(s):
Text to speech for 1000+ languages
Browse files- .gitattributes +34 -0
- .vscode/settings.json +6 -0
- README.md +21 -0
- app.py +202 -0
- lang_code.json +1114 -0
- mm_num2word.py +142 -0
- requirements.txt +5 -0
.gitattributes
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
.vscode/settings.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"[python]": {
|
3 |
+
"editor.defaultFormatter": "ms-python.black-formatter"
|
4 |
+
},
|
5 |
+
"python.formatting.provider": "none"
|
6 |
+
}
|
README.md
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Massively Multilingual Speech (MMS) - Text To Speech
|
3 |
+
emoji: ๐
|
4 |
+
colorFrom: yellow
|
5 |
+
colorTo: gray
|
6 |
+
sdk: gradio
|
7 |
+
app_file: app.py
|
8 |
+
pinned: true
|
9 |
+
---
|
10 |
+
|
11 |
+
## Info
|
12 |
+
Text to speech for more than 1000+ languages - Using [fairseq](https://github.com/facebookresearch/fairseq/blob/main/examples/mms/README.md) MMS TTS and [ttsmms](https://github.com/wannaphong/ttsmms) wrapper.
|
13 |
+
|
14 |
+
+ Language Iso code list (`lang_code.json`) is adapted from
|
15 |
+
https://dl.fbaipublicfiles.com/mms/tts/all-tts-languages.html
|
16 |
+
|
17 |
+
The dropdown list is quite long, so I have placed some of my friends' frequently used languages at the top. The other 1000+ languages are sorted alphabetically.
|
18 |
+
|
19 |
+
+ `mm_num2word.py` is adapted from https://github.com/hpbyte/Myanmar_Number_to_Words
|
20 |
+
|
21 |
+
+ Other dependencies, please prefer to the `requirements.txt` file.
|
app.py
ADDED
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Based on example code of https://huggingface.co/facebook/m2m100_1.2B
|
2 |
+
# and https://github.com/wannaphong/ttsmms
|
3 |
+
# See also https://github.com/facebookresearch/fairseq/blob/main/examples/mms/README.md
|
4 |
+
|
5 |
+
import gradio as gr
|
6 |
+
import os
|
7 |
+
import re
|
8 |
+
import soundfile as sf
|
9 |
+
|
10 |
+
import json
|
11 |
+
import nltk
|
12 |
+
from underthesea import sent_tokenize as vie_sent_tokenize # Vietnamese NLP toolkit
|
13 |
+
from underthesea import text_normalize as vie_text_normalize
|
14 |
+
from nltk import sent_tokenize as nltk_sent_tokenize
|
15 |
+
from ttsmms import download
|
16 |
+
from ttsmms import TTS
|
17 |
+
|
18 |
+
from collections import OrderedDict
|
19 |
+
import uuid
|
20 |
+
import datetime
|
21 |
+
import shutil
|
22 |
+
from num2words import num2words
|
23 |
+
|
24 |
+
|
25 |
+
this_description = """Text To Speech for [1000+ languages](https://dl.fbaipublicfiles.com/mms/tts/all-tts-languages.html) - using [fairseq MMS TTS](https://github.com/facebookresearch/fairseq/blob/main/examples/mms/README.md) and [ttsmms](https://github.com/wannaphong/ttsmms) wrapper.
|
26 |
+
Please note that for some languages, it may not pronounce all words correctly (yet).
|
27 |
+
"""
|
28 |
+
|
29 |
+
nltk.download("punkt")
|
30 |
+
|
31 |
+
# Pre-download some languages
|
32 |
+
tts_models = {}
|
33 |
+
eng_path = download("eng", "./data")
|
34 |
+
tts_models["eng"] = eng_path
|
35 |
+
vie_path = download("vie", "./data")
|
36 |
+
tts_models["vie"] = vie_path
|
37 |
+
mya_path = download("mya", "./data")
|
38 |
+
tts_models["mya"] = mya_path
|
39 |
+
|
40 |
+
# Do some work in the user directory...
|
41 |
+
|
42 |
+
# Load language codes from lang_code.json with ordered keys
|
43 |
+
with open("lang_code.json") as f:
|
44 |
+
lang_codes = json.load(f, object_pairs_hook=OrderedDict)
|
45 |
+
|
46 |
+
lang_codes = {
|
47 |
+
key + " (" + lang_codes[key] + ")": lang_codes[key] for key in lang_codes}
|
48 |
+
# Extract language names
|
49 |
+
language_names = list(lang_codes.keys())
|
50 |
+
|
51 |
+
|
52 |
+
def convert_eng_numbers_to_words(text):
|
53 |
+
# Find all numbers in the text using regex
|
54 |
+
numbers = re.findall(r"\d+", text)
|
55 |
+
# Sort numbers in descending order of length
|
56 |
+
sorted_numbers = sorted(numbers, key=len, reverse=True)
|
57 |
+
print(sorted_numbers)
|
58 |
+
|
59 |
+
# Replace numbers with their word equivalents
|
60 |
+
for number in sorted_numbers:
|
61 |
+
number_word = num2words(int(number))
|
62 |
+
text = text.replace(number, number_word)
|
63 |
+
|
64 |
+
return text
|
65 |
+
|
66 |
+
|
67 |
+
def convert_mya_numbers_to_words(text):
|
68 |
+
from mm_num2word import mm_num2word, extract_num
|
69 |
+
|
70 |
+
numbers = extract_num(text)
|
71 |
+
sorted_numbers = sorted(numbers, key=len, reverse=True)
|
72 |
+
print(sorted_numbers)
|
73 |
+
|
74 |
+
for n in sorted_numbers:
|
75 |
+
text = text.replace(n, mm_num2word(n))
|
76 |
+
return text
|
77 |
+
|
78 |
+
|
79 |
+
def prepare_sentences(text, lang="mya"):
|
80 |
+
sentences = []
|
81 |
+
# pre-process the text for some languages
|
82 |
+
if lang.lower() == "mya":
|
83 |
+
text = text.replace("\u104A", ",").replace("\u104B", ".")
|
84 |
+
text = convert_mya_numbers_to_words(text)
|
85 |
+
|
86 |
+
if lang.lower() == "eng":
|
87 |
+
text = convert_eng_numbers_to_words(text)
|
88 |
+
|
89 |
+
print("Processed text", text)
|
90 |
+
|
91 |
+
paragraphs = [paragraph for paragraph in text.split(
|
92 |
+
"\n") if paragraph.strip()]
|
93 |
+
|
94 |
+
if lang.lower() == "vie":
|
95 |
+
for paragraph in paragraphs:
|
96 |
+
sentences_raw = vie_sent_tokenize(paragraph)
|
97 |
+
sentences.extend(
|
98 |
+
[
|
99 |
+
vie_text_normalize(sentence)
|
100 |
+
for sentence in sentences_raw
|
101 |
+
if sentence.strip()
|
102 |
+
]
|
103 |
+
)
|
104 |
+
else:
|
105 |
+
sentences = [
|
106 |
+
sentence
|
107 |
+
for paragraph in paragraphs
|
108 |
+
for sentence in nltk_sent_tokenize(paragraph)
|
109 |
+
if sentence.strip()
|
110 |
+
]
|
111 |
+
return sentences
|
112 |
+
|
113 |
+
|
114 |
+
def list_dir():
|
115 |
+
# Get the current directory
|
116 |
+
current_dir = os.getcwd()
|
117 |
+
print(current_dir)
|
118 |
+
|
119 |
+
# List all files in the current directory
|
120 |
+
files = os.listdir(current_dir)
|
121 |
+
|
122 |
+
# Filter the list to include only WAV files
|
123 |
+
wav_files = [file for file in files if file.endswith(".wav")]
|
124 |
+
|
125 |
+
# Print the list of WAV files
|
126 |
+
for wav_file in wav_files:
|
127 |
+
print(wav_file)
|
128 |
+
|
129 |
+
|
130 |
+
def combine_wav(source_dir, stamp):
|
131 |
+
# Get a list of all WAV files in the folder
|
132 |
+
wav_files = [file for file in os.listdir(
|
133 |
+
source_dir) if file.endswith(".wav")]
|
134 |
+
|
135 |
+
# Sort the files alphabetically to ensure the correct order of combination
|
136 |
+
wav_files.sort()
|
137 |
+
|
138 |
+
# Combine the WAV files
|
139 |
+
combined_data = []
|
140 |
+
for file in wav_files:
|
141 |
+
file_path = os.path.join(source_dir, file)
|
142 |
+
data, sr = sf.read(file_path)
|
143 |
+
combined_data.extend(data)
|
144 |
+
|
145 |
+
# Save the combined audio to a new WAV file
|
146 |
+
combined_file_path = f"{stamp}.wav"
|
147 |
+
sf.write(combined_file_path, combined_data, sr)
|
148 |
+
|
149 |
+
shutil.rmtree(source_dir)
|
150 |
+
list_dir()
|
151 |
+
|
152 |
+
# Display the combined audio in the Hugging Face Space app
|
153 |
+
return combined_file_path
|
154 |
+
|
155 |
+
|
156 |
+
def mms_tts(Input_Text, lang_name="Burmese (mya)"):
|
157 |
+
lang_code = lang_codes[lang_name]
|
158 |
+
|
159 |
+
user_model = download(lang_code, "./data")
|
160 |
+
tts = TTS(user_model)
|
161 |
+
|
162 |
+
sentences = prepare_sentences(Input_Text, lang_code)
|
163 |
+
|
164 |
+
# output_dir = f"out_{lang_code}"
|
165 |
+
current_datetime = datetime.datetime.now()
|
166 |
+
timestamp = current_datetime.strftime("%Y%m%d%H%M%S%f")
|
167 |
+
|
168 |
+
user_dir = f"u_{timestamp}"
|
169 |
+
if os.path.exists(user_dir):
|
170 |
+
session_id = str(uuid.uuid4()) # Generate a random session ID
|
171 |
+
user_dir = f"u_{session_id}_{timestamp}"
|
172 |
+
os.makedirs(user_dir, exist_ok=True)
|
173 |
+
print("New user directory", user_dir)
|
174 |
+
|
175 |
+
for i, sentence in enumerate(sentences):
|
176 |
+
tts.synthesis(
|
177 |
+
sentence, wav_path=f"{user_dir}/s_{str(i).zfill(10)}.wav")
|
178 |
+
combined_file_path = combine_wav(user_dir, timestamp)
|
179 |
+
return combined_file_path
|
180 |
+
|
181 |
+
|
182 |
+
# common_languages = ["eng", "mya", "vie"] # List of common language codes
|
183 |
+
iface = gr.Interface(
|
184 |
+
fn=mms_tts,
|
185 |
+
title="Massively Multilingual Speech (MMS) - Text To Speech",
|
186 |
+
description=this_description,
|
187 |
+
inputs=[
|
188 |
+
gr.Textbox(lines=5, placeholder="Enter text to speech",
|
189 |
+
label="Input text"),
|
190 |
+
gr.Dropdown(
|
191 |
+
choices=language_names, label="Select language 1,000+", value="Burmese (mya)"
|
192 |
+
),
|
193 |
+
],
|
194 |
+
outputs="audio",
|
195 |
+
)
|
196 |
+
# outputs=[
|
197 |
+
# "audio",
|
198 |
+
# gr.File(label="Download", type="file", download_to="done.wav")
|
199 |
+
# ])
|
200 |
+
|
201 |
+
|
202 |
+
iface.launch()
|
lang_code.json
ADDED
@@ -0,0 +1,1114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"Burmese": "mya",
|
3 |
+
|
4 |
+
"Mon": "mnw",
|
5 |
+
"Shan": "shn",
|
6 |
+
|
7 |
+
"English": "eng",
|
8 |
+
"Vietnamese": "vie",
|
9 |
+
"Thai": "tha",
|
10 |
+
"Thai, Northern": "nod",
|
11 |
+
"Indonesian": "ind",
|
12 |
+
|
13 |
+
"Khmer": "khm",
|
14 |
+
"Khmer, Northern": "kxm",
|
15 |
+
|
16 |
+
"Abidji": "abi",
|
17 |
+
"Aceh": "ace",
|
18 |
+
"Achagua": "aca",
|
19 |
+
"Achang": "acn",
|
20 |
+
"Achi": "acr",
|
21 |
+
"Acholi": "ach",
|
22 |
+
"Achuar-Shiwiar": "acu",
|
23 |
+
"Ach\u00e9": "guq",
|
24 |
+
"Adele": "ade",
|
25 |
+
"Adioukrou": "adj",
|
26 |
+
"Agarabi": "agd",
|
27 |
+
"Aghul": "agx",
|
28 |
+
"Agutaynen": "agn",
|
29 |
+
"Ahanta": "aha",
|
30 |
+
"Akan": "aka",
|
31 |
+
"Akateko": "knj",
|
32 |
+
"Akawaio": "ake",
|
33 |
+
"Akeu": "aeu",
|
34 |
+
"Akha": "ahk",
|
35 |
+
"Akoose": "bss",
|
36 |
+
"Alangan": "alj",
|
37 |
+
"Albanian": "sqi",
|
38 |
+
"Altai, Southern": "alt",
|
39 |
+
"Alune": "alp",
|
40 |
+
"Alur": "alz",
|
41 |
+
"Amazigh": "kab",
|
42 |
+
"Ambai": "amk",
|
43 |
+
"Ambrym, North": "mmg",
|
44 |
+
"Amharic": "amh",
|
45 |
+
"Amis": "ami",
|
46 |
+
"Amuzgo, San Pedro Amuzgos": "azg",
|
47 |
+
"Angor": "agg",
|
48 |
+
"Anjam": "boj",
|
49 |
+
"Anufo": "cko",
|
50 |
+
"Anyin": "any",
|
51 |
+
"Arabela": "arl",
|
52 |
+
"Arabic": "ara",
|
53 |
+
"Aralle-Tabulahan": "atq",
|
54 |
+
"Aringa": "luc",
|
55 |
+
"Armenian, Western": "hyw",
|
56 |
+
"Arop-Lokep": "apr",
|
57 |
+
"Arosi": "aia",
|
58 |
+
"Aruamu": "msy",
|
59 |
+
"Ash\u00e1ninka": "cni",
|
60 |
+
"Ash\u00e9ninka, Pajonal": "cjo",
|
61 |
+
"Ash\u00e9ninka, Pichis": "cpu",
|
62 |
+
"Ash\u00e9ninka, Ucayali-Yur\u00faa": "cpb",
|
63 |
+
"Assamese": "asm",
|
64 |
+
"Asu": "asa",
|
65 |
+
"Ateso": "teo",
|
66 |
+
"Atti\u00e9": "ati",
|
67 |
+
"Aukan": "djk",
|
68 |
+
"Avar": "ava",
|
69 |
+
"Avatime": "avn",
|
70 |
+
"Avokaya": "avu",
|
71 |
+
"Awa": "awb",
|
72 |
+
"Awa-Cuaiquer": "kwi",
|
73 |
+
"Awadhi": "awa",
|
74 |
+
"Awaj\u00fan": "agr",
|
75 |
+
"Awakateko": "agu",
|
76 |
+
"Aymara, Central": "ayr",
|
77 |
+
"Ayoreo": "ayo",
|
78 |
+
"Ayta, Abellen": "abp",
|
79 |
+
"Ayta, Mag-Indi": "blx",
|
80 |
+
"Ayta, Mag-antsi": "sgb",
|
81 |
+
"Azerbaijani, North": "azj-script_latin",
|
82 |
+
"Azerbaijani, South": "azb",
|
83 |
+
"Baatonum": "bba",
|
84 |
+
"Bada": "bhz",
|
85 |
+
"Baelelea": "bvc",
|
86 |
+
"Bagheli": "bfy",
|
87 |
+
"Bagri": "bgq",
|
88 |
+
"Bahnar": "bdq",
|
89 |
+
"Baka": "bdh",
|
90 |
+
"Bakhti\u00e2ri": "bqi",
|
91 |
+
"Bakw\u00e9": "bjw",
|
92 |
+
"Balantak": "blz",
|
93 |
+
"Bali": "ban",
|
94 |
+
"Balochi, Southern": "bcc-script_arabic",
|
95 |
+
"Bamanankan": "bam",
|
96 |
+
"Bambam": "ptu",
|
97 |
+
"Bana": "bcw",
|
98 |
+
"Bandial": "bqj",
|
99 |
+
"Bantoanon": "bno",
|
100 |
+
"Barai": "bbb",
|
101 |
+
"Bari": "bfa",
|
102 |
+
"Baruga": "bjz",
|
103 |
+
"Bashkort": "bak",
|
104 |
+
"Basque": "eus",
|
105 |
+
"Bassa": "bsq",
|
106 |
+
"Batak Angkola": "akb",
|
107 |
+
"Batak Dairi": "btd",
|
108 |
+
"Batak Karo": "btx",
|
109 |
+
"Batak Simalungun": "bts",
|
110 |
+
"Batak Toba": "bbc",
|
111 |
+
"Bauzi": "bvz",
|
112 |
+
"Bedjond": "bjv",
|
113 |
+
"Behoa": "bep",
|
114 |
+
"Bekwarra": "bkv",
|
115 |
+
"Belize English Creole": "bzj",
|
116 |
+
"Bemba": "bem",
|
117 |
+
"Benga": "bng",
|
118 |
+
"Bengali": "ben",
|
119 |
+
"Berom": "bom",
|
120 |
+
"Bete-Bendi": "btt",
|
121 |
+
"Bharia": "bha",
|
122 |
+
"Bhatri": "bgw",
|
123 |
+
"Bhattiyali": "bht",
|
124 |
+
"Biali": "beh",
|
125 |
+
"Bidayuh, Bau": "sne",
|
126 |
+
"Bikol, Buhi\u2019non": "ubl",
|
127 |
+
"Bikol, Central": "bcl",
|
128 |
+
"Bimoba": "bim",
|
129 |
+
"Binukid": "bkd",
|
130 |
+
"Binumarien": "bjr",
|
131 |
+
"Birifor, Malba": "bfo",
|
132 |
+
"Birifor, Southern": "biv",
|
133 |
+
"Bisa": "bib",
|
134 |
+
"Bislama": "bis",
|
135 |
+
"Bisu": "bzi",
|
136 |
+
"Bis\u00e3": "bqp",
|
137 |
+
"Blaan, Koronadal": "bpr",
|
138 |
+
"Blaan, Sarangani": "bps",
|
139 |
+
"Bobo Madar\u00e9, Southern": "bwq",
|
140 |
+
"Bodo Parja": "bdv",
|
141 |
+
"Boko": "bqc",
|
142 |
+
"Bokobaru": "bus",
|
143 |
+
"Bola": "bnp",
|
144 |
+
"Bomu": "bmq",
|
145 |
+
"Bonggi": "bdg",
|
146 |
+
"Bora": "boa",
|
147 |
+
"Borong": "ksr",
|
148 |
+
"Bor\u00f4ro": "bor",
|
149 |
+
"Bru, Eastern": "bru",
|
150 |
+
"Buamu": "box",
|
151 |
+
"Buang, Mapos": "bzh",
|
152 |
+
"Bughotu": "bgt",
|
153 |
+
"Buglere": "sab",
|
154 |
+
"Bulgarian": "bul",
|
155 |
+
"Buli": "bwu",
|
156 |
+
"Bum": "bmv",
|
157 |
+
"Bwanabwana": "tte",
|
158 |
+
"Cab\u00e9car": "cjp",
|
159 |
+
"Cacua": "cbv",
|
160 |
+
"Capanahua": "kaq",
|
161 |
+
"Caquinte": "cot",
|
162 |
+
"Carapana": "cbc",
|
163 |
+
"Carib": "car",
|
164 |
+
"Catalan": "cat",
|
165 |
+
"Cebuano": "ceb",
|
166 |
+
"Cerma": "cme",
|
167 |
+
"Chachi": "cbi",
|
168 |
+
"Chamacoco": "ceg",
|
169 |
+
"Chatino, Eastern Highland": "cly",
|
170 |
+
"Chatino, Nopala": "cya",
|
171 |
+
"Chechen": "che",
|
172 |
+
"Chhattisgarhi": "hne",
|
173 |
+
"Chichewa": "nya",
|
174 |
+
"Chidigo": "dig",
|
175 |
+
"Chiduruma": "dug",
|
176 |
+
"Chin, Bawm": "bgr",
|
177 |
+
"Chin, Eastern Khumi": "cek",
|
178 |
+
"Chin, Falam": "cfm",
|
179 |
+
"Chin, Hakha": "cnh",
|
180 |
+
"Chin, Matu": "hlt",
|
181 |
+
"Chin, M\u00fc\u00fcn": "mwq",
|
182 |
+
"Chin, Tedim": "ctd",
|
183 |
+
"Chin, Thado": "tcz",
|
184 |
+
"Chin, Zyphe": "zyp",
|
185 |
+
"Chinantec, Comaltepec": "cco",
|
186 |
+
"Chinantec, Lalana": "cnl",
|
187 |
+
"Chinantec, Lealao": "cle",
|
188 |
+
"Chinantec, Ozumac\u00edn": "chz",
|
189 |
+
"Chinantec, Palantla": "cpa",
|
190 |
+
"Chinantec, Sochiapam": "cso",
|
191 |
+
"Chinantec, Tepetotutla": "cnt",
|
192 |
+
"Chinantec, Usila": "cuc",
|
193 |
+
"Chinese, Hakka": "hak",
|
194 |
+
"Chinese, Min Nan": "nan",
|
195 |
+
"Chingoni": "xnj",
|
196 |
+
"Chipaya": "cap",
|
197 |
+
"Chiquitano": "cax",
|
198 |
+
"Chittagonian": "ctg",
|
199 |
+
"Chol": "ctu",
|
200 |
+
"Chontal, Tabasco": "chf",
|
201 |
+
"Chopi": "cce",
|
202 |
+
"Chorote, Iyojwa\u2019ja": "crt",
|
203 |
+
"Chorote, Iyo\u2019wujwa": "crq",
|
204 |
+
"Chuj": "cac-dialect_sanmateoixtat\u00e1n",
|
205 |
+
"Chukchi": "ckt",
|
206 |
+
"Chumburung": "ncu",
|
207 |
+
"Churahi": "cdj",
|
208 |
+
"Chuvash": "chv",
|
209 |
+
"Ch\u2019orti\u2019": "caa",
|
210 |
+
"Cishingini": "asg",
|
211 |
+
"Cof\u00e1n": "con",
|
212 |
+
"Cora, El Nayar": "crn",
|
213 |
+
"Cora, Santa Teresa": "cok",
|
214 |
+
"Cree, Plains": "crk-script_syllabics",
|
215 |
+
"Crimean Tatar": "crh",
|
216 |
+
"Cuiba": "cui",
|
217 |
+
"Daasanach": "dsh",
|
218 |
+
"Daba": "dbq",
|
219 |
+
"Dagaare, Southern": "dga",
|
220 |
+
"Dagara, Northern": "dgi",
|
221 |
+
"Dagba": "dgk",
|
222 |
+
"Dan": "dnj-dialect_blowowest",
|
223 |
+
"Dangal\u00e9at": "daa",
|
224 |
+
"Dani, Mid Grand Valley": "dnt",
|
225 |
+
"Dani, Western": "dnw",
|
226 |
+
"Dargwa": "dar",
|
227 |
+
"Datooga": "tcc",
|
228 |
+
"Dawro": "dwr",
|
229 |
+
"Dedua": "ded",
|
230 |
+
"Deg": "mzw",
|
231 |
+
"Delo": "ntr",
|
232 |
+
"Dendi": "ddn",
|
233 |
+
"Desano": "des",
|
234 |
+
"Desiya": "dso",
|
235 |
+
"Dhao": "nfa",
|
236 |
+
"Dhimal": "dhi",
|
237 |
+
"Dida, Yocobou\u00e9": "gud",
|
238 |
+
"Didinga": "did",
|
239 |
+
"Digaro-Mishmi": "mhu",
|
240 |
+
"Dinka, Northeastern": "dip",
|
241 |
+
"Dinka, Southwestern": "dik",
|
242 |
+
"Ditammari": "tbz",
|
243 |
+
"Dogon, Toro So": "dts",
|
244 |
+
"Dogos\u00e9": "dos",
|
245 |
+
"Dogri": "dgo",
|
246 |
+
"Duri": "mvp",
|
247 |
+
"Dutch": "nld",
|
248 |
+
"Dza": "jen",
|
249 |
+
"Dzongkha": "dzo",
|
250 |
+
"Ede Idaca": "idd",
|
251 |
+
"Ekajuk": "eka",
|
252 |
+
"Embera Cat\u00edo": "cto",
|
253 |
+
"Ember\u00e1, Northern": "emp",
|
254 |
+
"Enxet": "enx",
|
255 |
+
"Epena": "sja",
|
256 |
+
"Erzya": "myv",
|
257 |
+
"Ese": "mcq",
|
258 |
+
"Ese Ejja": "ese",
|
259 |
+
"Evenki": "evn",
|
260 |
+
"Ezaa": "eza",
|
261 |
+
"Fali, South": "fal",
|
262 |
+
"Faroese": "fao",
|
263 |
+
"Fataleka": "far",
|
264 |
+
"Fijian": "fij",
|
265 |
+
"Finnish": "fin",
|
266 |
+
"Fon": "fon",
|
267 |
+
"Fordata": "frd",
|
268 |
+
"French": "fra",
|
269 |
+
"Fulah": "ful",
|
270 |
+
"Fuliiru": "flr",
|
271 |
+
"Gadaba, Mudhili": "gau",
|
272 |
+
"Gaddi": "gbk",
|
273 |
+
"Gagauz": "gag-script_latin",
|
274 |
+
"Galela": "gbi",
|
275 |
+
"Gamo": "gmv",
|
276 |
+
"Ganda": "lug",
|
277 |
+
"Gapapaiwa": "pwg",
|
278 |
+
"Garhwali": "gbm",
|
279 |
+
"Garifuna": "cab",
|
280 |
+
"Garo": "grt",
|
281 |
+
"Gbaya": "krs",
|
282 |
+
"Gbaya, Southwest": "gso",
|
283 |
+
"Gela": "nlg",
|
284 |
+
"Gen": "gej",
|
285 |
+
"German, Standard": "deu",
|
286 |
+
"Ghari": "gri",
|
287 |
+
"Gikuyu": "kik",
|
288 |
+
"Gikyode": "acd",
|
289 |
+
"Gilaki": "glk",
|
290 |
+
"Gofa": "gof-script_latin",
|
291 |
+
"Gogo": "gog",
|
292 |
+
"Gokana": "gkn",
|
293 |
+
"Gondi, Adilabad": "wsg",
|
294 |
+
"Gonja": "gjn",
|
295 |
+
"Gor": "gqr",
|
296 |
+
"Gorontalo": "gor",
|
297 |
+
"Gourmanch\u00e9ma": "gux",
|
298 |
+
"Grebo, Northern": "gbo",
|
299 |
+
"Greek": "ell",
|
300 |
+
"Greek, Ancient": "grc",
|
301 |
+
"Guahibo": "guh",
|
302 |
+
"Guajaj\u00e1ra": "gub",
|
303 |
+
"Guarani": "grn",
|
304 |
+
"Guarayu": "gyr",
|
305 |
+
"Guayabero": "guo",
|
306 |
+
"Gude": "gde",
|
307 |
+
"Gujarati": "guj",
|
308 |
+
"Gulay": "gvl",
|
309 |
+
"Gumuz": "guk",
|
310 |
+
"Gungu": "rub",
|
311 |
+
"Gwahatike": "dah",
|
312 |
+
"Gwere": "gwr",
|
313 |
+
"Gwich\u2019in": "gwi",
|
314 |
+
"Haitian Creole": "hat",
|
315 |
+
"Halbi": "hlb",
|
316 |
+
"Hamer-Banna": "amf",
|
317 |
+
"Hanga": "hag",
|
318 |
+
"Hanunoo": "hnn",
|
319 |
+
"Haryanvi": "bgc",
|
320 |
+
"Hatam": "had",
|
321 |
+
"Hausa": "hau",
|
322 |
+
"Hawaii Pidgin": "hwc",
|
323 |
+
"Hawu": "hvn",
|
324 |
+
"Haya": "hay",
|
325 |
+
"Hdi": "xed",
|
326 |
+
"Hebrew": "heb",
|
327 |
+
"Hehe": "heh",
|
328 |
+
"Hiligaynon": "hil",
|
329 |
+
"Hindi": "hin",
|
330 |
+
"Hindi, Fiji": "hif",
|
331 |
+
"Hindustani, Sarnami": "hns",
|
332 |
+
"Ho": "hoc",
|
333 |
+
"Holiya": "hoy",
|
334 |
+
"Huastec": "hus-dialect_centralveracruz",
|
335 |
+
"Huave, San Mateo del Mar": "huv",
|
336 |
+
"Huli": "hui",
|
337 |
+
"Hungarian": "hun",
|
338 |
+
"Hupla": "hap",
|
339 |
+
"Iban": "iba",
|
340 |
+
"Icelandic": "isl",
|
341 |
+
"Ida\u2019an": "dbj",
|
342 |
+
"Ifugao, Amganad": "ifa",
|
343 |
+
"Ifugao, Batad": "ifb",
|
344 |
+
"Ifugao, Mayoyao": "ifu",
|
345 |
+
"Ifugao, Tuwali": "ifk",
|
346 |
+
"If\u00e8": "ife",
|
347 |
+
"Ignaciano": "ign",
|
348 |
+
"Ika": "ikk",
|
349 |
+
"Ikwo": "iqw",
|
350 |
+
"Ila": "ilb",
|
351 |
+
"Ilocano": "ilo",
|
352 |
+
"Imbongu": "imo",
|
353 |
+
|
354 |
+
"Inga": "inb",
|
355 |
+
"Ipili": "ipi",
|
356 |
+
"Iraqw": "irk",
|
357 |
+
"Islander English Creole": "icr",
|
358 |
+
"Itawit": "itv",
|
359 |
+
"Itelmen": "itl",
|
360 |
+
"Ivbie North-Okpela-Arhe": "atg",
|
361 |
+
"Ixil": "ixl-dialect_santamarianebaj",
|
362 |
+
"Iyo": "nca",
|
363 |
+
"Izere": "izr",
|
364 |
+
"Izii": "izz",
|
365 |
+
"Jakalteko": "jac",
|
366 |
+
"Jamaican English Creole": "jam",
|
367 |
+
"Javanese": "jav",
|
368 |
+
"Javanese, Suriname": "jvn",
|
369 |
+
"Jingpho": "kac",
|
370 |
+
"Jola-Fonyi": "dyo",
|
371 |
+
"Jola-Kasa": "csk",
|
372 |
+
"Jopadhola": "adh",
|
373 |
+
"Juang": "jun",
|
374 |
+
"Jukun Takum": "jbu",
|
375 |
+
"Jula": "dyu",
|
376 |
+
"Jur Modo": "bex",
|
377 |
+
"Juray": "juy",
|
378 |
+
"Kaansa": "gna",
|
379 |
+
"Kaapor": "urb",
|
380 |
+
"Kabiy\u00e8": "kbp",
|
381 |
+
"Kabwa": "cwa",
|
382 |
+
"Kadazan Dusun": "dtp",
|
383 |
+
"Kafa": "kbr",
|
384 |
+
"Kagayanen": "cgc",
|
385 |
+
"Kagulu": "kki",
|
386 |
+
"Kaili, Da\u2019a": "kzf",
|
387 |
+
"Kaili, Ledo": "lew",
|
388 |
+
"Kakataibo-Kashibo": "cbr",
|
389 |
+
"Kako": "kkj",
|
390 |
+
"Kakwa": "keo",
|
391 |
+
"Kalagan": "kqe",
|
392 |
+
"Kalanguya": "kak",
|
393 |
+
"Kalinga, Butbut": "kyb",
|
394 |
+
"Kalinga, Lubuagan": "knb",
|
395 |
+
"Kalinga, Majukayang": "kmd",
|
396 |
+
"Kalinga, Tanudan": "kml",
|
397 |
+
"Kallahan, Keley-i": "ify",
|
398 |
+
"Kalmyk-Oirat": "xal",
|
399 |
+
"Kamano": "kbq",
|
400 |
+
"Kamayur\u00e1": "kay",
|
401 |
+
"Kambaata": "ktb",
|
402 |
+
"Kamwe": "hig",
|
403 |
+
"Kandawo": "gam",
|
404 |
+
"Kandozi-Chapra": "cbu",
|
405 |
+
"Kangri": "xnr",
|
406 |
+
"Kanite": "kmu",
|
407 |
+
"Kankanaey": "kne",
|
408 |
+
"Kannada": "kan",
|
409 |
+
"Kanuri, Manga": "kby",
|
410 |
+
"Kapampangan": "pam",
|
411 |
+
"Kaqchikel": "cak-dialect_central",
|
412 |
+
"Karaboro, Eastern": "xrb",
|
413 |
+
"Karachay-Balkar": "krc",
|
414 |
+
"Karakalpak": "kaa",
|
415 |
+
"Karelian": "krl",
|
416 |
+
"Karen, Pwo Northern": "pww",
|
417 |
+
"Kasem": "xsm",
|
418 |
+
"Kashinawa": "cbs",
|
419 |
+
"Kaulong": "pss",
|
420 |
+
"Kawyaw": "kxf",
|
421 |
+
"Kayab\u00ed": "kyz",
|
422 |
+
"Kayah, Western": "kyu",
|
423 |
+
"Kayap\u00f3": "txu",
|
424 |
+
"Kazakh": "kaz",
|
425 |
+
"Kebu": "ndp",
|
426 |
+
"Keliko": "kbo",
|
427 |
+
"Kenga": "kyq",
|
428 |
+
"Kenyang": "ken",
|
429 |
+
"Kera": "ker",
|
430 |
+
"Ketengban": "xte",
|
431 |
+
"Keyagana": "kyg",
|
432 |
+
"Khakas": "kjh",
|
433 |
+
"Khanty": "kca",
|
434 |
+
"Khmu": "kjg",
|
435 |
+
"Kigiryama": "nyf",
|
436 |
+
"Kilivila": "kij",
|
437 |
+
"Kim": "kia",
|
438 |
+
"Kimaragang": "kqr",
|
439 |
+
"Kimr\u00e9": "kqp",
|
440 |
+
"Kinaray-a": "krj",
|
441 |
+
"Kinga": "zga",
|
442 |
+
"Kinyarwanda": "kin",
|
443 |
+
"Kipfokomo": "pkb",
|
444 |
+
"Kire": "geb",
|
445 |
+
"Kiribati": "gil",
|
446 |
+
"Kisar": "kje",
|
447 |
+
"Kisi, Southern": "kss",
|
448 |
+
"Kitharaka": "thk",
|
449 |
+
"Klao": "klu",
|
450 |
+
"Klon": "kyo",
|
451 |
+
"Kogi": "kog",
|
452 |
+
"Kolami, Northwestern": "kfb",
|
453 |
+
"Komi-Zyrian": "kpv",
|
454 |
+
"Konab\u00e9r\u00e9": "bbo",
|
455 |
+
"Konkomba": "xon",
|
456 |
+
"Konni": "kma",
|
457 |
+
"Kono": "kno",
|
458 |
+
"Konso": "kxc",
|
459 |
+
"Koonzime": "ozm",
|
460 |
+
"Koorete": "kqy",
|
461 |
+
"Korean": "kor",
|
462 |
+
"Koreguaje": "coe",
|
463 |
+
"Korupun-Sela": "kpq",
|
464 |
+
"Koryak": "kpy",
|
465 |
+
"Kouya": "kyf",
|
466 |
+
"Koya": "kff-script_telugu",
|
467 |
+
"Krio": "kri",
|
468 |
+
"Kriol": "rop",
|
469 |
+
"Krumen, Plapo": "ktj",
|
470 |
+
"Krumen, Tepo": "ted",
|
471 |
+
"Krung": "krr",
|
472 |
+
"Kuay": "kdt",
|
473 |
+
"Kukele": "kez",
|
474 |
+
"Kulina": "cul",
|
475 |
+
"Kulung": "kle",
|
476 |
+
"Kumam": "kdi",
|
477 |
+
"Kuman": "kue",
|
478 |
+
"Kumyk": "kum",
|
479 |
+
"Kuna, Border": "kvn",
|
480 |
+
"Kuna, San Blas": "cuk",
|
481 |
+
"Kunda": "kdn",
|
482 |
+
"Kuo": "xuo",
|
483 |
+
"Kupia": "key",
|
484 |
+
"Kupsapiiny": "kpz",
|
485 |
+
"Kuranko": "knk",
|
486 |
+
"Kurdish, Northern": "kmr-script_cyrillic",
|
487 |
+
"Kurumba, Alu": "xua",
|
488 |
+
"Kurux": "kru",
|
489 |
+
"Kusaal": "kus",
|
490 |
+
"Kutep": "kub",
|
491 |
+
"Kutu": "kdc",
|
492 |
+
"Kuvi": "kxv",
|
493 |
+
"Kuwaa": "blh",
|
494 |
+
"Kuwaataay": "cwt",
|
495 |
+
"Kwaio": "kwd",
|
496 |
+
"Kwamera": "tnk",
|
497 |
+
"Kwara\u2019ae": "kwf",
|
498 |
+
"Kwere": "cwe",
|
499 |
+
"Kyaka": "kyc",
|
500 |
+
"Kyanga": "tye",
|
501 |
+
"Kyrgyz": "kir",
|
502 |
+
"K\u2019iche\u2019": "quc-dialect_central",
|
503 |
+
"Lacandon": "lac",
|
504 |
+
"Lacid": "lsi",
|
505 |
+
"Ladakhi": "lbj",
|
506 |
+
"Lahu": "lhu",
|
507 |
+
"Lama": "las",
|
508 |
+
"Lamba": "lam",
|
509 |
+
"Lamnso\u2019": "lns",
|
510 |
+
"Lampung Api": "ljp",
|
511 |
+
"Lango": "laj",
|
512 |
+
"Lao": "lao",
|
513 |
+
"Latin": "lat",
|
514 |
+
"Latvian": "lav",
|
515 |
+
"Lauje": "law",
|
516 |
+
"Lawa, Western": "lcp",
|
517 |
+
"Laz": "lzz",
|
518 |
+
"Lele": "lln",
|
519 |
+
"Lelemi": "lef",
|
520 |
+
"Lesser Antillean French Creole": "acf",
|
521 |
+
"Lewo": "lww",
|
522 |
+
"Lhao Vo": "mhx",
|
523 |
+
"Lik": "eip",
|
524 |
+
"Limba, West-Central": "lia",
|
525 |
+
"Limbu": "lif",
|
526 |
+
"Lingao": "onb",
|
527 |
+
"Lisu": "lis",
|
528 |
+
"Lobala": "loq",
|
529 |
+
"Lobi": "lob",
|
530 |
+
"Lokaa": "yaz",
|
531 |
+
"Loko": "lok",
|
532 |
+
"Lole": "llg",
|
533 |
+
"Lolopo": "ycl",
|
534 |
+
"Loma": "lom",
|
535 |
+
"Lomwe": "ngl",
|
536 |
+
"Lomwe, Malawi": "lon",
|
537 |
+
"Luang": "lex",
|
538 |
+
"Lugbara": "lgg",
|
539 |
+
"Luguru": "ruf",
|
540 |
+
"Lukpa": "dop",
|
541 |
+
"Lundayeh": "lnd",
|
542 |
+
"Lutos": "ndy",
|
543 |
+
"Luwo": "lwo",
|
544 |
+
"Ly\u00e9l\u00e9": "lee",
|
545 |
+
"Maan": "mev",
|
546 |
+
"Mabaan": "mfz",
|
547 |
+
"Machame": "jmc",
|
548 |
+
"Macuna": "myy",
|
549 |
+
"Macushi": "mbc",
|
550 |
+
"Mada": "mda",
|
551 |
+
"Madura": "mad",
|
552 |
+
"Magahi": "mag",
|
553 |
+
"Mai Brat": "ayz",
|
554 |
+
"Maithili": "mai",
|
555 |
+
"Maka": "mca",
|
556 |
+
"Makaa": "mcp",
|
557 |
+
"Makasar": "mak",
|
558 |
+
"Makhuwa": "vmw",
|
559 |
+
"Makhuwa-Meetto": "mgh",
|
560 |
+
"Makonde": "kde",
|
561 |
+
"Malagasy": "mlg",
|
562 |
+
"Malay": "zlm",
|
563 |
+
"Malay, Central": "pse",
|
564 |
+
"Malay, Kupang": "mkn",
|
565 |
+
"Malay, Manado": "xmm",
|
566 |
+
"Malayalam": "mal",
|
567 |
+
"Malayic Dayak": "xdy",
|
568 |
+
"Maldivian": "div",
|
569 |
+
"Male": "mdy",
|
570 |
+
"Malvi": "mup",
|
571 |
+
"Mam": "mam-dialect_western",
|
572 |
+
"Mamasa": "mqj",
|
573 |
+
"Mambila, Cameroon": "mcu",
|
574 |
+
"Mambila, Nigeria": "mzk",
|
575 |
+
"Mampruli": "maw",
|
576 |
+
"Mandeali": "mjl",
|
577 |
+
"Mandinka": "mnk",
|
578 |
+
"Mango": "mge",
|
579 |
+
"Mangseng": "mbh",
|
580 |
+
"Mankanya": "knf",
|
581 |
+
"Mannan": "mjv",
|
582 |
+
"Manobo, Matigsalug": "mbt",
|
583 |
+
"Manobo, Obo": "obo",
|
584 |
+
"Manobo, Western Bukidnon": "mbb",
|
585 |
+
"Manya": "mzj",
|
586 |
+
"Mapun": "sjm",
|
587 |
+
"Maranao": "mrw",
|
588 |
+
"Marathi": "mar",
|
589 |
+
"Marba": "mpg",
|
590 |
+
"Mari, Meadow": "mhr",
|
591 |
+
"Markweeta": "enb",
|
592 |
+
"Marshallese": "mah",
|
593 |
+
"Masaaba": "myx",
|
594 |
+
"Maskelynes": "klv",
|
595 |
+
"Matal": "mfh",
|
596 |
+
"Mato": "met",
|
597 |
+
"Matsigenka": "mcb",
|
598 |
+
"Maya, Mop\u00e1n": "mop",
|
599 |
+
"Maya, Yucatec": "yua",
|
600 |
+
"Mayo": "mfy",
|
601 |
+
"Mazahua, Central": "maz",
|
602 |
+
"Mazatec, Ayautla": "vmy",
|
603 |
+
"Mazatec, Chiquihuitl\u00e1n": "maq",
|
604 |
+
"Mazatec, Ixcatl\u00e1n": "mzi",
|
605 |
+
"Mazatec, Jalapa de D\u00edaz": "maj",
|
606 |
+
"Mazatec, San Jer\u00f3nimo Tec\u00f3atl": "maa-dialect_sanjer\u00f3nimo",
|
607 |
+
"Ma\u2019anyan": "mhy",
|
608 |
+
"Ma\u2019di": "mhi",
|
609 |
+
"Mbandja": "zmz",
|
610 |
+
"Mbay": "myb",
|
611 |
+
"Mbore": "gai",
|
612 |
+
"Mbuko": "mqb",
|
613 |
+
"Mbula-Bwazza": "mbu",
|
614 |
+
"Melpa": "med",
|
615 |
+
"Mende": "men",
|
616 |
+
"Mengen": "mee",
|
617 |
+
"Mentawai": "mwv",
|
618 |
+
"Merey": "meq",
|
619 |
+
"Mesme": "zim",
|
620 |
+
"Meta\u2019": "mgo",
|
621 |
+
"Meyah": "mej",
|
622 |
+
"Migabac": "mpp",
|
623 |
+
"Minangkabau": "min",
|
624 |
+
"Misak": "gum",
|
625 |
+
"Misima-Panaeati": "mpx",
|
626 |
+
"Mixe, Coatl\u00e1n": "mco",
|
627 |
+
"Mixe, Juquila": "mxq",
|
628 |
+
"Mixe, Quetzaltepec": "pxm",
|
629 |
+
"Mixe, Totontepec": "mto",
|
630 |
+
"Mixtec, Alacatlatzala": "mim",
|
631 |
+
"Mixtec, Alcozauca": "xta",
|
632 |
+
"Mixtec, Amoltepec": "mbz",
|
633 |
+
"Mixtec, Apasco-Apoala": "mip",
|
634 |
+
"Mixtec, Atatlahuca": "mib",
|
635 |
+
"Mixtec, Ayutla": "miy",
|
636 |
+
"Mixtec, Chayuco": "mih",
|
637 |
+
"Mixtec, Coatzospan": "miz",
|
638 |
+
"Mixtec, Diuxi-Tilantongo": "xtd",
|
639 |
+
"Mixtec, Jamiltepec": "mxt",
|
640 |
+
"Mixtec, Magdalena Pe\u00f1asco": "xtm",
|
641 |
+
"Mixtec, Metlat\u00f3noc": "mxv",
|
642 |
+
"Mixtec, Northern Tlaxiaco": "xtn",
|
643 |
+
"Mixtec, Ocotepec": "mie",
|
644 |
+
"Mixtec, Pe\u00f1oles": "mil",
|
645 |
+
"Mixtec, Pinotepa Nacional": "mio",
|
646 |
+
"Mixtec, Santa Luc\u00eda Monteverde": "mdv",
|
647 |
+
"Mixtec, Santa Mar\u00eda Zacatepec": "mza",
|
648 |
+
"Mixtec, Southern Puebla": "mit",
|
649 |
+
"Mixtec, Tezoatl\u00e1n": "mxb",
|
650 |
+
"Mixtec, Yosond\u00faa": "mpm",
|
651 |
+
"Miyobe": "soy",
|
652 |
+
"Mnong, Central": "cmo-script_khmer",
|
653 |
+
"Moba": "mfq",
|
654 |
+
"Mochi": "old",
|
655 |
+
"Mofu, North": "mfk",
|
656 |
+
"Mofu-Gudur": "mif",
|
657 |
+
"Mokole": "mkl",
|
658 |
+
"Molima": "mox",
|
659 |
+
"Moma": "myl",
|
660 |
+
"Momuna": "mqf",
|
661 |
+
"Mongolian": "mon",
|
662 |
+
"Mongondow": "mog",
|
663 |
+
"Morisyen": "mfe",
|
664 |
+
"Moro": "mor",
|
665 |
+
"Moronene": "mqn",
|
666 |
+
"Moru": "mgd",
|
667 |
+
"Moskona": "mtj",
|
668 |
+
"Mro-Khimi": "cmr",
|
669 |
+
"Mualang": "mtd",
|
670 |
+
"Muinane": "bmr",
|
671 |
+
"Mukulu": "moz",
|
672 |
+
"Mumuye": "mzm",
|
673 |
+
"Muna": "mnb",
|
674 |
+
"Mundani": "mnf",
|
675 |
+
"Mundari": "unr",
|
676 |
+
"Muria, Far Western": "fmu",
|
677 |
+
"Murle": "mur",
|
678 |
+
"Murut, Timugon": "tih",
|
679 |
+
"Muthuvan": "muv",
|
680 |
+
"Muyang": "muy",
|
681 |
+
"Mwaghavul": "sur",
|
682 |
+
"Mwan": "moa",
|
683 |
+
"Mwani": "wmw",
|
684 |
+
"M\u00e9nik": "tnr",
|
685 |
+
"M\u00edskito": "miq",
|
686 |
+
"M\u00f2or\u00e9": "mos",
|
687 |
+
"M\u00fcnd\u00fc": "muh",
|
688 |
+
"Naasioi": "nas",
|
689 |
+
"Nad\u00ebb": "mbj",
|
690 |
+
"Nafaanra": "nfr",
|
691 |
+
"Naga, Kharam": "kfw",
|
692 |
+
"Naga, Tangshang": "nst",
|
693 |
+
"Nagamese": "nag",
|
694 |
+
"Nahuatl, Central Huasteca": "nch",
|
695 |
+
"Nahuatl, Eastern Huasteca": "nhe",
|
696 |
+
"Nahuatl, Guerrero": "ngu",
|
697 |
+
"Nahuatl, Highland Puebla": "azz",
|
698 |
+
"Nahuatl, Isthmus-Mecayapan": "nhx",
|
699 |
+
"Nahuatl, Michoac\u00e1n": "ncl",
|
700 |
+
"Nahuatl, Northern Oaxaca": "nhy",
|
701 |
+
"Nahuatl, Northern Puebla": "ncj",
|
702 |
+
"Nahuatl, Sierra Negra": "nsu",
|
703 |
+
"Nahuatl, Southeastern Puebla": "npl",
|
704 |
+
"Nahuatl, Tlamacazapa": "nuz",
|
705 |
+
"Nahuatl, Western Huasteca": "nhw",
|
706 |
+
"Nahuatl, Zacatl\u00e1n-Ahuacatl\u00e1n-Tepetzintla": "nhi",
|
707 |
+
"Nalca": "nlc",
|
708 |
+
"Nambiku\u00e1ra, Southern": "nab",
|
709 |
+
"Nanai": "gld",
|
710 |
+
"Nande": "nnb",
|
711 |
+
"Napu": "npy",
|
712 |
+
"Nasa": "pbb",
|
713 |
+
"Nateni": "ntm",
|
714 |
+
"Nawdm": "nmz",
|
715 |
+
"Nawuri": "naw",
|
716 |
+
"Naxi": "nxq",
|
717 |
+
"Ndamba": "ndj",
|
718 |
+
"Ndogo": "ndz",
|
719 |
+
"Ndut": "ndv",
|
720 |
+
"Newar": "new",
|
721 |
+
"Ngaju": "nij",
|
722 |
+
"Ngambay": "sba",
|
723 |
+
"Ngangam": "gng",
|
724 |
+
"Ngbaka": "nga",
|
725 |
+
"Ngindo": "nnq",
|
726 |
+
"Ngulu": "ngp",
|
727 |
+
"Ng\u00e4bere": "gym",
|
728 |
+
"Ng\u2019akarimojong": "kdj",
|
729 |
+
"Nias": "nia",
|
730 |
+
"Nilamba": "nim",
|
731 |
+
"Ninzo": "nin",
|
732 |
+
"Nkonya": "nko",
|
733 |
+
"Nogai": "nog",
|
734 |
+
"Nomaande": "lem",
|
735 |
+
"Nomatsigenga": "not",
|
736 |
+
"Noone": "nhu",
|
737 |
+
"Ntcham": "bud",
|
738 |
+
"Nuer": "nus",
|
739 |
+
"Nugunu": "yas",
|
740 |
+
"Nuni, Southern": "nnw",
|
741 |
+
"Nyabwa": "nwb",
|
742 |
+
"Nyakyusa-Ngonde": "nyy",
|
743 |
+
"Nyankore": "nyn",
|
744 |
+
"Nyaturu": "rim",
|
745 |
+
"Nyindrou": "lid",
|
746 |
+
"Nyole": "nuj",
|
747 |
+
"Nyoro": "nyo",
|
748 |
+
"Nzema": "nzi",
|
749 |
+
"Obolo": "ann",
|
750 |
+
"Odia": "ory",
|
751 |
+
"Ojibwa, Northwestern": "ojb-script_syllabics",
|
752 |
+
"Oku": "oku",
|
753 |
+
"Oniyan": "bsc",
|
754 |
+
"Oroko": "bdu",
|
755 |
+
"Oromo": "orm",
|
756 |
+
"Orya": "ury",
|
757 |
+
"Ossetic": "oss",
|
758 |
+
"Otomi, Mezquital": "ote",
|
759 |
+
"Otomi, Quer\u00e9taro": "otq",
|
760 |
+
"Owa": "stn",
|
761 |
+
"Paasaal": "sig",
|
762 |
+
"Pahari, Kullu": "kfx",
|
763 |
+
"Pahari, Mahasu": "bfz",
|
764 |
+
"Paicoca": "sey",
|
765 |
+
"Paiute, Northern": "pao",
|
766 |
+
"Palauan": "pau",
|
767 |
+
"Palaung, Ruching": "pce",
|
768 |
+
"Palawano, Brooke\u2019s Point": "plw",
|
769 |
+
"Pamona": "pmf",
|
770 |
+
"Pangasinan": "pag",
|
771 |
+
"Papiamentu": "pap",
|
772 |
+
"Paranan": "prf",
|
773 |
+
"Parec\u00eds": "pab",
|
774 |
+
"Parkwa": "pbi",
|
775 |
+
"Patamona": "pbc",
|
776 |
+
"Paumar\u00ed": "pad",
|
777 |
+
"Pele-Ata": "ata",
|
778 |
+
"Penan, Eastern": "pez",
|
779 |
+
"Pengo": "peg",
|
780 |
+
"Persian": "fas",
|
781 |
+
"Pidgin, Nigerian": "pcm",
|
782 |
+
"Pijin": "pis",
|
783 |
+
"Pinyin": "pny",
|
784 |
+
"Piratapuyo": "pir",
|
785 |
+
"Pitjantjatjara": "pjt",
|
786 |
+
"Pogolo": "poy",
|
787 |
+
"Polish": "pol",
|
788 |
+
"Popoloca, San Lu\u00eds Temalacayuca": "pps",
|
789 |
+
"Popoloca, San Marcos Tlacoyalco": "pls",
|
790 |
+
"Popoluca, Highland": "poi",
|
791 |
+
"Poqomchi\u2019": "poh-dialect_western",
|
792 |
+
"Portuguese": "por",
|
793 |
+
"Prai": "prt",
|
794 |
+
"Puinave": "pui",
|
795 |
+
"Punjabi, Eastern": "pan",
|
796 |
+
"Purepecha": "tsz",
|
797 |
+
"Puroik": "suv",
|
798 |
+
"P\u00e9v\u00e9": "lme",
|
799 |
+
"Quechua, Ayacucho": "quy",
|
800 |
+
"Quechua, Cajamarca": "qvc",
|
801 |
+
"Quechua, Cusco": "quz",
|
802 |
+
"Quechua, Eastern Apur\u00edmac": "qve",
|
803 |
+
"Quechua, Huallaga": "qub",
|
804 |
+
"Quechua, Huamal\u00edes-Dos de Mayo Hu\u00e1nuco": "qvh",
|
805 |
+
"Quechua, Huaylas Ancash": "qwh",
|
806 |
+
"Quechua, Huaylla Wanca": "qvw",
|
807 |
+
"Quechua, Lambayeque": "quf",
|
808 |
+
"Quechua, Margos-Yarowilca-Lauricocha": "qvm",
|
809 |
+
"Quechua, North Bolivian": "qul",
|
810 |
+
"Quechua, North Jun\u00edn": "qvn",
|
811 |
+
"Quechua, Northern Conchucos Ancash": "qxn",
|
812 |
+
"Quechua, Panao": "qxh",
|
813 |
+
"Quechua, San Mart\u00edn": "qvs",
|
814 |
+
"Quechua, South Bolivian": "quh",
|
815 |
+
"Quechua, Southern Conchucos": "qxo",
|
816 |
+
"Quichua, Ca\u00f1ar Highland": "qxr",
|
817 |
+
"Quichua, Napo": "qvo",
|
818 |
+
"Quichua, Northern Pastaza": "qvz",
|
819 |
+
"Quichua, Salasaca Highland": "qxl",
|
820 |
+
"Quichua, Tena Lowland": "quw",
|
821 |
+
"Q\u2019anjob\u2019al": "kjb",
|
822 |
+
"Q\u2019eqchi\u2019": "kek",
|
823 |
+
"Rabha": "rah",
|
824 |
+
"Rajbanshi": "rjs",
|
825 |
+
"Ramoaaina": "rai",
|
826 |
+
"Rampi": "lje",
|
827 |
+
"Ranglong": "rnl",
|
828 |
+
"Rangpuri": "rkt",
|
829 |
+
"Rapa Nui": "rap",
|
830 |
+
"Ravula": "yea",
|
831 |
+
"Rawang": "raw",
|
832 |
+
"Rejang": "rej",
|
833 |
+
"Rendille": "rel",
|
834 |
+
"Riang Lang": "ril",
|
835 |
+
"Rigwe": "iri",
|
836 |
+
"Rikou": "rgu",
|
837 |
+
"Rohingya": "rhg",
|
838 |
+
"Romani, Carpathian": "rmc-script_cyrillic",
|
839 |
+
"Romani, Sinte": "rmo",
|
840 |
+
"Romani, Vlax": "rmy-script_cyrillic",
|
841 |
+
"Romanian": "ron",
|
842 |
+
"Romblomanon": "rol",
|
843 |
+
"Ron": "cla",
|
844 |
+
"Ronga": "rng",
|
845 |
+
"Roviana": "rug",
|
846 |
+
"Rundi": "run",
|
847 |
+
"Russian": "rus",
|
848 |
+
"Saamya-Gwe": "lsm",
|
849 |
+
"Sabaot": "spy",
|
850 |
+
"Sadri": "sck",
|
851 |
+
"Sahu": "saj",
|
852 |
+
"Sakachep": "sch",
|
853 |
+
"Sama, Central": "sml",
|
854 |
+
"Sambal": "xsb",
|
855 |
+
"Sambal, Botolan": "sbl",
|
856 |
+
"Samburu": "saq",
|
857 |
+
"Samo, Southern": "sbd",
|
858 |
+
"Samoan": "smo",
|
859 |
+
"Sampang": "rav",
|
860 |
+
"Sangir": "sxn",
|
861 |
+
"Sango": "sag",
|
862 |
+
"Sangu": "sbp",
|
863 |
+
"Sanum\u00e1": "xsu",
|
864 |
+
"Saramaccan": "srm",
|
865 |
+
"Sasak": "sas",
|
866 |
+
"Sa\u2019a": "apb",
|
867 |
+
"Sebat Bet Gurage": "sgw",
|
868 |
+
"Sedoa": "tvw",
|
869 |
+
"Sekpele": "lip",
|
870 |
+
"Selaru": "slu",
|
871 |
+
"Selee": "snw",
|
872 |
+
"Semai": "sea",
|
873 |
+
"Semelai": "sza",
|
874 |
+
"Sena": "seh",
|
875 |
+
"Seychelles French Creole": "crs",
|
876 |
+
"Shambala": "ksb",
|
877 |
+
"Shanga": "sho",
|
878 |
+
"Sharanahua": "mcd",
|
879 |
+
"Shawi": "cbt",
|
880 |
+
"Sherpa": "xsr",
|
881 |
+
"Shilluk": "shk",
|
882 |
+
"Shipibo-Conibo": "shp",
|
883 |
+
"Shona": "sna",
|
884 |
+
"Shor": "cjs",
|
885 |
+
"Shuar": "jiv",
|
886 |
+
"Siane": "snp",
|
887 |
+
"Siang": "sya",
|
888 |
+
"Sidamo": "sid",
|
889 |
+
"Siona": "snn",
|
890 |
+
"Siriano": "sri",
|
891 |
+
"Sirmauri": "srx",
|
892 |
+
"Sisaala, Tumulung": "sil",
|
893 |
+
"Sissala": "sld",
|
894 |
+
"Siwu": "akp",
|
895 |
+
"Soga": "xog",
|
896 |
+
"Somali": "som",
|
897 |
+
"Somba-Siawari": "bmu",
|
898 |
+
"Songhay, Koyra Chiini": "khq",
|
899 |
+
"Songhay, Koyraboro Senni": "ses",
|
900 |
+
"Sougb": "mnx",
|
901 |
+
"Spanish": "spa",
|
902 |
+
"Sranan Tongo": "srn",
|
903 |
+
"Suba": "sxb",
|
904 |
+
"Subanon, Western": "suc",
|
905 |
+
"Sudest": "tgo",
|
906 |
+
"Sukuma": "suk",
|
907 |
+
"Sunda": "sun",
|
908 |
+
"Sunwar": "suz",
|
909 |
+
"Surgujia": "sgj",
|
910 |
+
"Susu": "sus",
|
911 |
+
"Swahili": "swh",
|
912 |
+
"Swedish": "swe",
|
913 |
+
"Sylheti": "syl",
|
914 |
+
"S\u00e9noufo, Djimini": "dyi",
|
915 |
+
"S\u00e9noufo, Mamara": "myk",
|
916 |
+
"S\u00e9noufo, Supyire": "spp",
|
917 |
+
"Taabwa": "tap",
|
918 |
+
"Tabaru": "tby",
|
919 |
+
"Tacana": "tna",
|
920 |
+
"Tachelhit": "shi",
|
921 |
+
"Tado": "klw",
|
922 |
+
"Tagalog": "tgl",
|
923 |
+
"Tagbanwa, Calamian": "tbk",
|
924 |
+
"Tagin": "tgj",
|
925 |
+
"Tai Dam": "blt",
|
926 |
+
"Tairora, North": "tbg",
|
927 |
+
"Tairora, South": "omw",
|
928 |
+
"Tajik": "tgk",
|
929 |
+
"Tajio": "tdj",
|
930 |
+
"Takia": "tbc",
|
931 |
+
"Talinga-Bwisi": "tlj",
|
932 |
+
"Talysh": "tly",
|
933 |
+
"Tamajaq, Tawallammat": "ttq-script_tifinagh",
|
934 |
+
"Tamang, Eastern": "taj",
|
935 |
+
"Tamasheq": "taq",
|
936 |
+
"Tamil": "tam",
|
937 |
+
"Tampulma": "tpm",
|
938 |
+
"Tangoa": "tgp",
|
939 |
+
"Tanna, North": "tnn",
|
940 |
+
"Tarahumara, Western": "tac",
|
941 |
+
"Tarifit": "rif-script_arabic",
|
942 |
+
"Tatar": "tat",
|
943 |
+
"Tatuyo": "tav",
|
944 |
+
"Tawbuid": "twb",
|
945 |
+
"Tboli": "tbl",
|
946 |
+
"Tehit": "kps",
|
947 |
+
"Teiwa": "twe",
|
948 |
+
"Tektiteko": "ttc",
|
949 |
+
"Telugu": "tel",
|
950 |
+
"Tem": "kdh",
|
951 |
+
"Tengger": "tes",
|
952 |
+
"Tennet": "tex",
|
953 |
+
"Tepehua, Huehuetla": "tee",
|
954 |
+
"Tepehua, Pisaflores": "tpp",
|
955 |
+
"Tepehua, Tlachichilco": "tpt",
|
956 |
+
"Tepehuan, Southeastern": "stp",
|
957 |
+
"Teribe": "tfr",
|
958 |
+
"Termanu": "twu",
|
959 |
+
"Ter\u00eana": "ter",
|
960 |
+
"Tewa": "tew",
|
961 |
+
"Tharu, Dangaura": "thl",
|
962 |
+
"Themne": "tem",
|
963 |
+
"Tibetan, Amdo": "adx",
|
964 |
+
"Tibetan, Central": "bod",
|
965 |
+
"Tibetan, Khams": "khg",
|
966 |
+
"Ticuna": "tca",
|
967 |
+
"Tigrigna": "tir",
|
968 |
+
"Tii": "txq",
|
969 |
+
"Tikar": "tik",
|
970 |
+
"Tlicho": "dgr",
|
971 |
+
"Toba": "tob",
|
972 |
+
"Toba-Maskoy": "tmf",
|
973 |
+
"Tobanga": "tng",
|
974 |
+
"Tobelo": "tlb",
|
975 |
+
"Tohono O\u2019odham": "ood",
|
976 |
+
"Tok Pisin": "tpi",
|
977 |
+
"Tol": "jic",
|
978 |
+
"Tolaki": "lbw",
|
979 |
+
"Tombonuo": "txa",
|
980 |
+
"Tombulu": "tom",
|
981 |
+
"Tonga": "toh",
|
982 |
+
"Tontemboan": "tnt",
|
983 |
+
"Toraja-Sa\u2019dan": "sda",
|
984 |
+
"Torres Strait Creole": "tcs",
|
985 |
+
"Totonac, Coyutla": "toc",
|
986 |
+
"Totonac, Highland": "tos",
|
987 |
+
"Toura": "neb",
|
988 |
+
"Trinitario": "trn",
|
989 |
+
"Triqui, Chicahuaxtla": "trs",
|
990 |
+
"Triqui, Copala": "trc",
|
991 |
+
"Tri\u00f3": "tri",
|
992 |
+
"Tsafiki": "cof",
|
993 |
+
"Tsakhur": "tkr",
|
994 |
+
"Tsikimba": "kdl",
|
995 |
+
"Tsiman\u00e9": "cas",
|
996 |
+
"Tsonga": "tso",
|
997 |
+
"Tucano": "tuo",
|
998 |
+
"Tuma-Irumu": "iou",
|
999 |
+
"Tumak": "tmc",
|
1000 |
+
"Tunebo, Central": "tuf",
|
1001 |
+
"Turkish": "tur",
|
1002 |
+
"Turkmen": "tuk-script_arabic",
|
1003 |
+
"Tuwuli": "bov",
|
1004 |
+
"Tuyuca": "tue",
|
1005 |
+
"Tyap": "kcg",
|
1006 |
+
"Tzeltal": "tzh-dialect_tenejapa",
|
1007 |
+
"Tzotzil": "tzo-dialect_chamula",
|
1008 |
+
"Tz\u2019utujil": "tzj-dialect_eastern",
|
1009 |
+
"Uab Meto": "aoz",
|
1010 |
+
"Udmurt": "udm",
|
1011 |
+
"Uduk": "udu",
|
1012 |
+
"Ukrainian": "ukr",
|
1013 |
+
"Uma": "ppk",
|
1014 |
+
"Umbu-Ungu": "ubu",
|
1015 |
+
"Urak Lawoi\u2019": "urk",
|
1016 |
+
"Urarina": "ura",
|
1017 |
+
"Urat": "urt",
|
1018 |
+
"Urdu": "urd-script_latin",
|
1019 |
+
"Uripiv-Wala-Rano-Atchin": "upv",
|
1020 |
+
"Uspanteko": "usp",
|
1021 |
+
"Uyghur": "uig-script_cyrillic",
|
1022 |
+
"Uzbek": "uzb-script_cyrillic",
|
1023 |
+
"Vagla": "vag",
|
1024 |
+
"Vengo": "bav",
|
1025 |
+
"Vidunda": "vid",
|
1026 |
+
"Vili": "vif",
|
1027 |
+
"Vunjo": "vun",
|
1028 |
+
"Vute": "vut",
|
1029 |
+
"Wa, Parauk": "prk",
|
1030 |
+
"Waama": "wwa",
|
1031 |
+
"Waima": "rro",
|
1032 |
+
"Waimaha": "bao",
|
1033 |
+
"Waiwai": "waw",
|
1034 |
+
"Wala": "lgl",
|
1035 |
+
"Wali": "wlx",
|
1036 |
+
"Wamey": "cou",
|
1037 |
+
"Wamp\u00eds": "hub",
|
1038 |
+
"Wanano": "gvc",
|
1039 |
+
"Wandala": "mfi",
|
1040 |
+
"Wapishana": "wap",
|
1041 |
+
"Warao": "wba",
|
1042 |
+
"Waray-Waray": "war",
|
1043 |
+
"Wayana": "way",
|
1044 |
+
"Wayuu": "guc",
|
1045 |
+
"Welsh": "cym",
|
1046 |
+
"Wersing": "kvw",
|
1047 |
+
"Whitesands": "tnp",
|
1048 |
+
"Witoto, Minika": "hto",
|
1049 |
+
"Witoto, Murui": "huu",
|
1050 |
+
"Wolaytta": "wal-script_ethiopic",
|
1051 |
+
"Wolio": "wlo",
|
1052 |
+
"Woun Meu": "noa",
|
1053 |
+
"W\u00e8 Northern": "wob",
|
1054 |
+
"Xaasongaxango": "kao",
|
1055 |
+
"Xer\u00e9nte": "xer",
|
1056 |
+
"Yagua": "yad",
|
1057 |
+
"Yakan": "yka",
|
1058 |
+
"Yakut": "sah",
|
1059 |
+
"Yala": "yba",
|
1060 |
+
"Yali, Angguruk": "yli",
|
1061 |
+
"Yali, Ninia": "nlk",
|
1062 |
+
"Yalunka": "yal",
|
1063 |
+
"Yamba": "yam",
|
1064 |
+
"Yambeta": "yat",
|
1065 |
+
"Yamdena": "jmd",
|
1066 |
+
"Yami": "tao",
|
1067 |
+
"Yaminahua": "yaa",
|
1068 |
+
"Yanesha\u2019": "ame",
|
1069 |
+
"Yanomam\u00f6": "guu",
|
1070 |
+
"Yao": "yao",
|
1071 |
+
"Yaour\u00e9": "yre",
|
1072 |
+
"Yawa": "yva",
|
1073 |
+
"Yemba": "ybb",
|
1074 |
+
"Yine": "pib",
|
1075 |
+
"Yipma": "byr",
|
1076 |
+
"Yom": "pil",
|
1077 |
+
"Yoruba": "yor",
|
1078 |
+
"Yucuna": "ycn",
|
1079 |
+
"Yupik, Saint Lawrence Island": "ess",
|
1080 |
+
"Yuracare": "yuz",
|
1081 |
+
"Zaiwa": "atb",
|
1082 |
+
"Zande": "zne",
|
1083 |
+
"Zapotec, Alo\u00e1pam": "zaq",
|
1084 |
+
"Zapotec, Amatl\u00e1n": "zpo",
|
1085 |
+
"Zapotec, Cajonos": "zad",
|
1086 |
+
"Zapotec, Choapan": "zpc",
|
1087 |
+
"Zapotec, Coatecas Altas": "zca",
|
1088 |
+
"Zapotec, Guevea de Humboldt": "zpg",
|
1089 |
+
"Zapotec, Isthmus": "zai",
|
1090 |
+
"Zapotec, Lachix\u00edo": "zpl",
|
1091 |
+
"Zapotec, Miahuatl\u00e1n": "zam",
|
1092 |
+
"Zapotec, Mitla": "zaw",
|
1093 |
+
"Zapotec, Mixtepec": "zpm",
|
1094 |
+
"Zapotec, Ocotl\u00e1n": "zac",
|
1095 |
+
"Zapotec, Ozolotepec": "zao",
|
1096 |
+
"Zapotec, Quioquitani-Quier\u00ed": "ztq",
|
1097 |
+
"Zapotec, Rinc\u00f3n": "zar",
|
1098 |
+
"Zapotec, San Vicente Coatl\u00e1n": "zpt",
|
1099 |
+
"Zapotec, Santa Mar\u00eda Quiegolani": "zpi",
|
1100 |
+
"Zapotec, Santo Domingo Albarradas": "zas",
|
1101 |
+
"Zapotec, Sierra de Ju\u00e1rez": "zaa",
|
1102 |
+
"Zapotec, Texmelucan": "zpz",
|
1103 |
+
"Zapotec, Western Tlacolula Valley": "zab",
|
1104 |
+
"Zapotec, Yal\u00e1lag": "zpu",
|
1105 |
+
"Zapotec, Yareni": "zae",
|
1106 |
+
"Zapotec, Yatee": "zty",
|
1107 |
+
"Zapotec, Yatzachi": "zav",
|
1108 |
+
"Zaza": "zza",
|
1109 |
+
"Zhuang, Yongbei": "zyb",
|
1110 |
+
"Zigula": "ziw",
|
1111 |
+
"Zoque, Francisco Le\u00f3n": "zos",
|
1112 |
+
"Zulgo-Gemzek": "gnd",
|
1113 |
+
"\u00c9w\u00e9": "ewe"
|
1114 |
+
}
|
mm_num2word.py
ADDED
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
This file is adapted from https://github.com/hpbyte/Myanmar_Number_to_Words
|
3 |
+
"""
|
4 |
+
import re
|
5 |
+
|
6 |
+
mm_digit = {
|
7 |
+
"แ": "แแฏแ",
|
8 |
+
"แ": "แแ
แบ",
|
9 |
+
"แ": "แแพแ
แบ",
|
10 |
+
"แ": "แแฏแถ:",
|
11 |
+
"แ": "แแฑ:",
|
12 |
+
"แ
": "แแซ:",
|
13 |
+
"แ": "แแผแฑแฌแแบ",
|
14 |
+
"แ": "แแฏแแพแ
แบ",
|
15 |
+
"แ": "แแพแ
แบ",
|
16 |
+
"แ": "แแญแฏ:",
|
17 |
+
}
|
18 |
+
|
19 |
+
# regular expressions
|
20 |
+
rgxPh = "^(แแ|แแ)"
|
21 |
+
rgxDate = "[แ-แ]{1,2}-[แ-แ]{1,2}-[แ-แ]{4}|[แ-แ]{1,2}\/[แ-แ]{1,2}\/[แ-แ]{4}"
|
22 |
+
rgxTime = "[แ-แ]{1,2}:[แ-แ]{1,2}"
|
23 |
+
rgxDec = "[แ-แ]*\.[แ-แ]*"
|
24 |
+
rgxAmt = "[,แ-แ]+"
|
25 |
+
|
26 |
+
|
27 |
+
def convert_digit(num):
|
28 |
+
"""
|
29 |
+
@type num str
|
30 |
+
@param num Myanmar number
|
31 |
+
@rtype str
|
32 |
+
@return converted Myanmar spoken words
|
33 |
+
"""
|
34 |
+
|
35 |
+
converted = ""
|
36 |
+
nb_digits = len(num)
|
37 |
+
|
38 |
+
def check_if_zero(pos):
|
39 |
+
return not num[-pos] == "แ"
|
40 |
+
|
41 |
+
def hundred_thousandth_val():
|
42 |
+
n = num[:-5]
|
43 |
+
return (
|
44 |
+
("แแญแแบ: " + mm_num2word(n))
|
45 |
+
if (n[-2:] == "แแ")
|
46 |
+
else (mm_num2word(n) + "แแญแแบ: ")
|
47 |
+
)
|
48 |
+
|
49 |
+
def thousandth_val():
|
50 |
+
return mm_digit[num[-4]] + ("แแฑแฌแแบ " if (num[-3:] == "แแแ") else "แแฑแฌแแบแท ")
|
51 |
+
|
52 |
+
def hundredth_val():
|
53 |
+
return mm_digit[num[-3]] + (
|
54 |
+
"แแฌแท "
|
55 |
+
if (
|
56 |
+
(num[-2] == "แ" and re.match(r"[แ-แ]", num[-1]))
|
57 |
+
or (re.match(r"[แ-แ]", num[-2]) and num[-1] == "แ")
|
58 |
+
)
|
59 |
+
else "แแฌ "
|
60 |
+
)
|
61 |
+
|
62 |
+
def tenth_val():
|
63 |
+
return ("" if (num[-2] == "แ") else mm_digit[num[-2]]) + (
|
64 |
+
"แแแบ " if (num[-1] == "แ") else "แแแบแท "
|
65 |
+
)
|
66 |
+
|
67 |
+
if nb_digits > 5:
|
68 |
+
converted += hundred_thousandth_val()
|
69 |
+
if (nb_digits > 4) and check_if_zero(5):
|
70 |
+
converted += mm_digit[num[-5]] + "แแฑแฌแแบ: "
|
71 |
+
if (nb_digits > 3) and check_if_zero(4):
|
72 |
+
converted += thousandth_val()
|
73 |
+
if (nb_digits > 2) and check_if_zero(3):
|
74 |
+
converted += hundredth_val()
|
75 |
+
if (nb_digits > 1) and check_if_zero(2):
|
76 |
+
converted += tenth_val()
|
77 |
+
if (nb_digits > 0) and check_if_zero(1):
|
78 |
+
converted += mm_digit[num[-1]]
|
79 |
+
|
80 |
+
return converted
|
81 |
+
|
82 |
+
|
83 |
+
def mm_num2word(num):
|
84 |
+
"""
|
85 |
+
Detect type of number and convert accordingly
|
86 |
+
|
87 |
+
@type num str
|
88 |
+
@param num Myanmar number
|
89 |
+
@rtype str
|
90 |
+
@return converted Myanmar spoken words
|
91 |
+
"""
|
92 |
+
|
93 |
+
word = ""
|
94 |
+
|
95 |
+
# phone number
|
96 |
+
if re.match(r"" + rgxPh, num[:2]):
|
97 |
+
word = " ".join([(mm_digit[d] if not d == "แ" else "แแฝแแบ") for d in num])
|
98 |
+
# date
|
99 |
+
elif re.match(r"" + rgxDate, num):
|
100 |
+
n = re.split(r"-|/", num)
|
101 |
+
word = (
|
102 |
+
convert_digit(n[-1])
|
103 |
+
+ " แแฏแแพแ
แบ "
|
104 |
+
+ convert_digit(n[1])
|
105 |
+
+ " แแแญแฏแแบ: "
|
106 |
+
+ convert_digit(n[0])
|
107 |
+
+ " แแแบ"
|
108 |
+
)
|
109 |
+
# time
|
110 |
+
elif re.match(r"" + rgxTime, num):
|
111 |
+
n = re.split(r":", num)
|
112 |
+
word = (convert_digit(n[0]) + " แแฌแแฎ ") + (
|
113 |
+
"แแฝแฒ" if (n[1] == "แแ") else (convert_digit(n[1]) + " แแญแแ
แบ")
|
114 |
+
)
|
115 |
+
# decimal
|
116 |
+
elif re.match(r"" + rgxDec, num):
|
117 |
+
n = re.split(r"\.", num)
|
118 |
+
word = convert_digit(n[0]) + " แแฟแ " + " ".join([mm_digit[d] for d in n[1]])
|
119 |
+
# amount
|
120 |
+
elif re.match(r"" + rgxAmt, num):
|
121 |
+
word = convert_digit(num.replace(",", ""))
|
122 |
+
# default
|
123 |
+
else:
|
124 |
+
raise Exception("Cannot convert the provided number format!")
|
125 |
+
|
126 |
+
return word
|
127 |
+
|
128 |
+
|
129 |
+
def extract_num(S):
|
130 |
+
"""
|
131 |
+
Extract numbers from the input string
|
132 |
+
|
133 |
+
@type S str
|
134 |
+
@param S Myanmar sentence
|
135 |
+
@rtype list
|
136 |
+
@return a list of Myanmar numbers
|
137 |
+
"""
|
138 |
+
matchedNums = re.compile(
|
139 |
+
"%s|%s|%s|%s" % (rgxDate, rgxTime, rgxDec, rgxAmt)
|
140 |
+
).findall(S)
|
141 |
+
|
142 |
+
return matchedNums
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ttsmms
|
2 |
+
underthesea
|
3 |
+
nltk
|
4 |
+
soundfile
|
5 |
+
num2words
|