pierreguillou commited on
Commit
cf141c0
·
1 Parent(s): afb4867

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -8
app.py CHANGED
@@ -23,13 +23,9 @@ kw_model = {
23
 
24
  ## KeyphraseVectorizers
25
  # source: https://github.com/TimSchopf/KeyphraseVectorizers#keyphrasevectorizers
26
-
27
- download spacy pipeline (https://spacy.io/models/pt)
28
- source: https://melaniewalsh.github.io/Intro-Cultural-Analytics/05-Text-Analysis/Multilingual/Portuguese/03-POS-Keywords-Portuguese.html
29
- os.system("python -m spacy download pt_core_news_lg")
30
-
31
- # Part-of-Speech Tagging for Portuguese (https://melaniewalsh.github.io/Intro-Cultural-Analytics/05-Text-Analysis/Multilingual/Portuguese/03-POS-Keywords-Portuguese.html)
32
- pos_pattern='<CONJ.*>*<ADP.*>*<ADV.*>*<NUM.*>*<ADJ.*>*<N.*>+'
33
 
34
  # download stop words in Portuguese
35
  #import nltk
@@ -37,8 +33,11 @@ pos_pattern='<CONJ.*>*<ADP.*>*<ADV.*>*<NUM.*>*<ADJ.*>*<N.*>+'
37
  #from nltk.corpus import stopwords
38
  #stop_words = set(stopwords.words('portuguese'))
39
 
 
 
 
40
  # define o vectorizer
41
- vectorizer = KeyphraseCountVectorizer(spacy_pipeline='pt_core_news_lg', pos_pattern=pos_pattern, stop_words=None, lowercase=False)
42
 
43
  # function principal (keywords)
44
  def get_kw_html(model_id, doc, top_n, diversity):
 
23
 
24
  ## KeyphraseVectorizers
25
  # source: https://github.com/TimSchopf/KeyphraseVectorizers#keyphrasevectorizers
26
+ # download spacy pipeline (https://spacy.io/models/pt)
27
+ # source: https://melaniewalsh.github.io/Intro-Cultural-Analytics/05-Text-Analysis/Multilingual/Portuguese/03-POS-Keywords-Portuguese.html
28
+ # os.system("python -m spacy download pt_core_news_lg")
 
 
 
 
29
 
30
  # download stop words in Portuguese
31
  #import nltk
 
33
  #from nltk.corpus import stopwords
34
  #stop_words = set(stopwords.words('portuguese'))
35
 
36
+ # Part-of-Speech Tagging for Portuguese (https://melaniewalsh.github.io/Intro-Cultural-Analytics/05-Text-Analysis/Multilingual/Portuguese/03-POS-Keywords-Portuguese.html)
37
+ pos_pattern='<CONJ.*>*<ADP.*>*<ADV.*>*<NUM.*>*<ADJ.*>*<N.*>+'
38
+
39
  # define o vectorizer
40
+ vectorizer = KeyphraseCountVectorizer(spacy_pipeline='pt_core_news_lg', stop_words=None, pos_pattern=pos_pattern, lowercase=False)
41
 
42
  # function principal (keywords)
43
  def get_kw_html(model_id, doc, top_n, diversity):