miesnerjacob commited on
Commit
2919f24
Β·
1 Parent(s): a00f9ba

added code comments in keyword extraction file

Browse files
Files changed (1) hide show
  1. keyword_extraction.py +13 -6
keyword_extraction.py CHANGED
@@ -90,7 +90,10 @@ class KeywordExtractor:
90
 
91
  len_indices = 0
92
  while True:
 
93
  merged = self.merge_overlapping_indices(keyword_indices)
 
 
94
  if len_indices == len(merged):
95
  out_indices = sorted(merged, key=itemgetter(0))
96
  return out_indices
@@ -108,18 +111,22 @@ class KeywordExtractor:
108
  annotation (list): list of tuples for generating html
109
  """
110
 
 
111
  arr = list(text)
 
 
112
  for idx in sorted(keyword_indices, reverse=True):
113
  arr.insert(idx[0], "<kw>")
114
- arr.insert(idx[1]+1, "XXXxxxXXXxxxXXX <kw>")
 
 
115
  joined_annotation = ''.join(arr)
 
 
116
  split = joined_annotation.split('<kw>')
117
- annotation = [(x.replace('XXXxxxXXXxxxXXX ', ''), "KEY", "#26aaef") if "XXXxxxXXXxxxXXX" in x else x for x in split]
118
 
119
- kws_check = []
120
- for i in annotation:
121
- if type(i) is tuple:
122
- kws_check.append(i[0])
123
 
124
  return annotation
125
 
 
90
 
91
  len_indices = 0
92
  while True:
93
+ # Merge overlapping indices
94
  merged = self.merge_overlapping_indices(keyword_indices)
95
+ # Check to see if merging reduced number of annotation indices
96
+ # If merging did not reduce list return final indicies
97
  if len_indices == len(merged):
98
  out_indices = sorted(merged, key=itemgetter(0))
99
  return out_indices
 
111
  annotation (list): list of tuples for generating html
112
  """
113
 
114
+ # Turn list to numpy array
115
  arr = list(text)
116
+
117
+ # Loop through indices in list and insert delimeters
118
  for idx in sorted(keyword_indices, reverse=True):
119
  arr.insert(idx[0], "<kw>")
120
+ arr.insert(idx[1]+1, "<!kw> <kw>")
121
+
122
+ # join array
123
  joined_annotation = ''.join(arr)
124
+
125
+ # split array on delimeter
126
  split = joined_annotation.split('<kw>')
 
127
 
128
+ # Create annotation for keywords in text
129
+ annotation = [(x.replace('<!kw> ', ''), "KEY", "#26aaef") if "<!kw>" in x else x for x in split]
 
 
130
 
131
  return annotation
132