edyrkaj commited on
Commit
70d54fa
·
verified ·
1 Parent(s): 91796e5

Create iso639_wrapper.py

Browse files
Files changed (1) hide show
  1. iso639_wrapper.py +60 -0
iso639_wrapper.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from iso639 import Lang, iter_langs
2
+ from regex import R
3
+
4
+
5
+ langs = [lang for lang in iter_langs()]
6
+
7
+ # iso 1
8
+ iso1_code_to_name = {lg.pt1: lg.name for lg in langs}
9
+ iso1_name_to_code = {lg.name: lg.pt1 for lg in langs}
10
+
11
+ # iso 2
12
+ iso2b_code_to_name = {lg.pt2b: lg.name for lg in langs}
13
+ iso2b_name_to_code = {lg.name: lg.pt2b for lg in langs}
14
+ iso2t_code_to_name = {lg.pt2t: lg.name for lg in langs}
15
+ iso2t_name_to_code = {lg.name: lg.pt2t for lg in langs}
16
+
17
+ # iso 3
18
+ iso3_code_to_name = {lg.pt3: lg.name for lg in langs}
19
+ iso3_name_to_code = {lg.name: lg.pt3 for lg in langs}
20
+
21
+ # iso 5
22
+ iso5_code_to_name = {lg.pt5: lg.name for lg in langs}
23
+ iso5_name_to_code = {lg.name: lg.pt5 for lg in langs}
24
+
25
+ # https://github.com/Helsinki-NLP/Tatoeba-Challenge/blob/master/README.md#in-more-detail
26
+ helsinki_precendence = ["iso3", "iso5", "iso1", "iso2t", "iso2b"]
27
+
28
+ rename_dict = {"Panjabi": "Punjabi"}
29
+
30
+ def rename_languages(language):
31
+ if language in rename_dict:
32
+ return rename_dict[language]
33
+ return language
34
+
35
+ def rename_return_value(func):
36
+ def wrapper(*args, **kwargs):
37
+ result = func(*args, **kwargs)
38
+ if isinstance(result, str):
39
+ return rename_languages(result)
40
+ elif isinstance(result, list):
41
+ return [rename_languages(item) for item in result]
42
+ elif isinstance(result, dict):
43
+ return {key: rename_languages(value) for key, value in result.items()}
44
+ else:
45
+ return result
46
+ return wrapper
47
+
48
+ @rename_return_value
49
+ def get_name_from_iso_code(iso_code, precedence=helsinki_precendence):
50
+ for code_type in precedence:
51
+ if code_type == "iso1" and iso_code in iso1_code_to_name.keys():
52
+ return iso1_code_to_name[iso_code]
53
+ elif code_type == "iso2b" and iso_code in iso2b_code_to_name.keys():
54
+ return iso2b_code_to_name[iso_code]
55
+ elif code_type == "iso2t" and iso_code in iso2t_code_to_name.keys():
56
+ return iso2t_code_to_name[iso_code]
57
+ elif code_type == "iso3" and iso_code in iso3_code_to_name.keys():
58
+ return iso3_code_to_name[iso_code]
59
+ elif code_type == "iso5" and iso_code in iso5_code_to_name.keys():
60
+ return iso5_code_to_name[iso_code]