hexgrad commited on
Commit
9386491
·
verified ·
1 Parent(s): 44aaf39

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -103,7 +103,7 @@ def normalize(text):
103
  text = re.sub(r'\b(?:Ms\.|MS\.(?= [A-Z]))', 'Miss', text)
104
  text = re.sub(r'\b(?:Mrs\.|MRS\.(?= [A-Z]))', 'Mrs', text)
105
  text = re.sub(r'\betc\.(?! [A-Z])', 'etc', text)
106
- text = re.sub(r'\b([Yy])eah\b', r"\1e'a", text)
107
  text = text.replace(chr(8216), "'").replace(chr(8217), "'")
108
  text = text.replace(chr(8220), '"').replace(chr(8221), '"')
109
  text = re.sub(r'[^\S \n]', ' ', text)
@@ -111,7 +111,7 @@ def normalize(text):
111
  text = re.sub(r'(?<=\n) +(?=\n)', '', text)
112
  text = re.sub(r'\d*\.\d+|\b\d{4}s?\b|(?<!:)\b(?:[1-9]|1[0-2]):[0-5]\d\b(?!:)', split_num, text)
113
  text = re.sub(r'(?<=\d),(?=\d)', '', text)
114
- text = re.sub(r'[$£]\d+(?:\.\d+)?(?: hundred| thousand| (?:[bm]|tr)illion)*\b|[$£]\d+\.\d\d?\b', flip_money, text)
115
  text = re.sub(r'\d*\.\d+', point_num, text)
116
  text = re.sub(r'(?<=\d)-(?=\d)', ' to ', text) # TODO: could be minus
117
  text = re.sub(r'(?<=\d)S', ' S', text)
 
103
  text = re.sub(r'\b(?:Ms\.|MS\.(?= [A-Z]))', 'Miss', text)
104
  text = re.sub(r'\b(?:Mrs\.|MRS\.(?= [A-Z]))', 'Mrs', text)
105
  text = re.sub(r'\betc\.(?! [A-Z])', 'etc', text)
106
+ text = re.sub(r'(?i)\b(y)eah?\b', r"\1e'a", text)
107
  text = text.replace(chr(8216), "'").replace(chr(8217), "'")
108
  text = text.replace(chr(8220), '"').replace(chr(8221), '"')
109
  text = re.sub(r'[^\S \n]', ' ', text)
 
111
  text = re.sub(r'(?<=\n) +(?=\n)', '', text)
112
  text = re.sub(r'\d*\.\d+|\b\d{4}s?\b|(?<!:)\b(?:[1-9]|1[0-2]):[0-5]\d\b(?!:)', split_num, text)
113
  text = re.sub(r'(?<=\d),(?=\d)', '', text)
114
+ text = re.sub(r'(?i)[$£]\d+(?:\.\d+)?(?: hundred| thousand| (?:[bm]|tr)illion)*\b|[$£]\d+\.\d\d?\b', flip_money, text)
115
  text = re.sub(r'\d*\.\d+', point_num, text)
116
  text = re.sub(r'(?<=\d)-(?=\d)', ' to ', text) # TODO: could be minus
117
  text = re.sub(r'(?<=\d)S', ' S', text)