Spaces:
Running
on
Zero
Running
on
Zero
Upload app.py
Browse files
app.py
CHANGED
@@ -103,7 +103,7 @@ def normalize(text):
|
|
103 |
text = re.sub(r'\b(?:Ms\.|MS\.(?= [A-Z]))', 'Miss', text)
|
104 |
text = re.sub(r'\b(?:Mrs\.|MRS\.(?= [A-Z]))', 'Mrs', text)
|
105 |
text = re.sub(r'\betc\.(?! [A-Z])', 'etc', text)
|
106 |
-
text = re.sub(r'\b(
|
107 |
text = text.replace(chr(8216), "'").replace(chr(8217), "'")
|
108 |
text = text.replace(chr(8220), '"').replace(chr(8221), '"')
|
109 |
text = re.sub(r'[^\S \n]', ' ', text)
|
@@ -111,7 +111,7 @@ def normalize(text):
|
|
111 |
text = re.sub(r'(?<=\n) +(?=\n)', '', text)
|
112 |
text = re.sub(r'\d*\.\d+|\b\d{4}s?\b|(?<!:)\b(?:[1-9]|1[0-2]):[0-5]\d\b(?!:)', split_num, text)
|
113 |
text = re.sub(r'(?<=\d),(?=\d)', '', text)
|
114 |
-
text = re.sub(r'[$£]\d+(?:\.\d+)?(?: hundred| thousand| (?:[bm]|tr)illion)*\b|[$£]\d+\.\d\d?\b', flip_money, text)
|
115 |
text = re.sub(r'\d*\.\d+', point_num, text)
|
116 |
text = re.sub(r'(?<=\d)-(?=\d)', ' to ', text) # TODO: could be minus
|
117 |
text = re.sub(r'(?<=\d)S', ' S', text)
|
|
|
103 |
text = re.sub(r'\b(?:Ms\.|MS\.(?= [A-Z]))', 'Miss', text)
|
104 |
text = re.sub(r'\b(?:Mrs\.|MRS\.(?= [A-Z]))', 'Mrs', text)
|
105 |
text = re.sub(r'\betc\.(?! [A-Z])', 'etc', text)
|
106 |
+
text = re.sub(r'(?i)\b(y)eah?\b', r"\1e'a", text)
|
107 |
text = text.replace(chr(8216), "'").replace(chr(8217), "'")
|
108 |
text = text.replace(chr(8220), '"').replace(chr(8221), '"')
|
109 |
text = re.sub(r'[^\S \n]', ' ', text)
|
|
|
111 |
text = re.sub(r'(?<=\n) +(?=\n)', '', text)
|
112 |
text = re.sub(r'\d*\.\d+|\b\d{4}s?\b|(?<!:)\b(?:[1-9]|1[0-2]):[0-5]\d\b(?!:)', split_num, text)
|
113 |
text = re.sub(r'(?<=\d),(?=\d)', '', text)
|
114 |
+
text = re.sub(r'(?i)[$£]\d+(?:\.\d+)?(?: hundred| thousand| (?:[bm]|tr)illion)*\b|[$£]\d+\.\d\d?\b', flip_money, text)
|
115 |
text = re.sub(r'\d*\.\d+', point_num, text)
|
116 |
text = re.sub(r'(?<=\d)-(?=\d)', ' to ', text) # TODO: could be minus
|
117 |
text = re.sub(r'(?<=\d)S', ' S', text)
|