Spaces:
Running
Running
""" | |
This file is adapted from https://github.com/hpbyte/Myanmar_Number_to_Words | |
""" | |
import re | |
mm_digit = { | |
"แ": "แแฏแ", | |
"แ": "แแ แบ", | |
"แ": "แแพแ แบ", | |
"แ": "แแฏแถ:", | |
"แ": "แแฑ:", | |
"แ ": "แแซ:", | |
"แ": "แแผแฑแฌแแบ", | |
"แ": "แแฏแแพแ แบ", | |
"แ": "แแพแ แบ", | |
"แ": "แแญแฏ:", | |
} | |
# regular expressions | |
rgxPh = "^(แแ|แแ)" | |
rgxDate = "[แ-แ]{1,2}-[แ-แ]{1,2}-[แ-แ]{4}|[แ-แ]{1,2}\/[แ-แ]{1,2}\/[แ-แ]{4}" | |
rgxTime = "[แ-แ]{1,2}:[แ-แ]{1,2}" | |
rgxDec = "[แ-แ]*\.[แ-แ]*" | |
rgxAmt = "[,แ-แ]+" | |
def convert_digit(num): | |
""" | |
@type num str | |
@param num Myanmar number | |
@rtype str | |
@return converted Myanmar spoken words | |
""" | |
converted = "" | |
nb_digits = len(num) | |
def check_if_zero(pos): | |
return not num[-pos] == "แ" | |
def hundred_thousandth_val(): | |
n = num[:-5] | |
return ( | |
("แแญแแบ: " + mm_num2word(n)) | |
if (n[-2:] == "แแ") | |
else (mm_num2word(n) + "แแญแแบ: ") | |
) | |
def thousandth_val(): | |
return mm_digit[num[-4]] + ("แแฑแฌแแบ " if (num[-3:] == "แแแ") else "แแฑแฌแแบแท ") | |
def hundredth_val(): | |
return mm_digit[num[-3]] + ( | |
"แแฌแท " | |
if ( | |
(num[-2] == "แ" and re.match(r"[แ-แ]", num[-1])) | |
or (re.match(r"[แ-แ]", num[-2]) and num[-1] == "แ") | |
) | |
else "แแฌ " | |
) | |
def tenth_val(): | |
return ("" if (num[-2] == "แ") else mm_digit[num[-2]]) + ( | |
"แแแบ " if (num[-1] == "แ") else "แแแบแท " | |
) | |
if nb_digits > 5: | |
converted += hundred_thousandth_val() | |
if (nb_digits > 4) and check_if_zero(5): | |
converted += mm_digit[num[-5]] + "แแฑแฌแแบ: " | |
if (nb_digits > 3) and check_if_zero(4): | |
converted += thousandth_val() | |
if (nb_digits > 2) and check_if_zero(3): | |
converted += hundredth_val() | |
if (nb_digits > 1) and check_if_zero(2): | |
converted += tenth_val() | |
if (nb_digits > 0) and check_if_zero(1): | |
converted += mm_digit[num[-1]] | |
return converted | |
def mm_num2word(num): | |
""" | |
Detect type of number and convert accordingly | |
@type num str | |
@param num Myanmar number | |
@rtype str | |
@return converted Myanmar spoken words | |
""" | |
word = "" | |
# phone number | |
if re.match(r"" + rgxPh, num[:2]): | |
word = " ".join([(mm_digit[d] if not d == "แ" else "แแฝแแบ") for d in num]) | |
# date | |
elif re.match(r"" + rgxDate, num): | |
n = re.split(r"-|/", num) | |
word = ( | |
convert_digit(n[-1]) | |
+ " แแฏแแพแ แบ " | |
+ convert_digit(n[1]) | |
+ " แแแญแฏแแบ: " | |
+ convert_digit(n[0]) | |
+ " แแแบ" | |
) | |
# time | |
elif re.match(r"" + rgxTime, num): | |
n = re.split(r":", num) | |
word = (convert_digit(n[0]) + " แแฌแแฎ ") + ( | |
"แแฝแฒ" if (n[1] == "แแ") else (convert_digit(n[1]) + " แแญแแ แบ") | |
) | |
# decimal | |
elif re.match(r"" + rgxDec, num): | |
n = re.split(r"\.", num) | |
word = convert_digit(n[0]) + " แแฟแ " + " ".join([mm_digit[d] for d in n[1]]) | |
# amount | |
elif re.match(r"" + rgxAmt, num): | |
word = convert_digit(num.replace(",", "")) | |
# default | |
else: | |
raise Exception("Cannot convert the provided number format!") | |
return word | |
def extract_num(S): | |
""" | |
Extract numbers from the input string | |
@type S str | |
@param S Myanmar sentence | |
@rtype list | |
@return a list of Myanmar numbers | |
""" | |
matchedNums = re.compile( | |
"%s|%s|%s|%s" % (rgxDate, rgxTime, rgxDec, rgxAmt) | |
).findall(S) | |
return matchedNums | |