pszemraj commited on
Commit
f860236
·
verified ·
1 Parent(s): 2ccef2f

improve inference

Browse files
Files changed (1) hide show
  1. app.py +6 -2
app.py CHANGED
@@ -11,7 +11,11 @@ global df
11
 
12
  # Load the static embeddings model from HuggingFace hub
13
  model_name = "sentence-transformers/static-retrieval-mrl-en-v1"
14
- model = SentenceTransformer(model_name, device="cpu")
 
 
 
 
15
 
16
 
17
  def get_iframe(hub_repo_id):
@@ -58,7 +62,7 @@ def vectorize_dataset(hub_repo_id: str, split: str, column: str):
58
  gr.Info("Vectorizing dataset...")
59
  ds = load_dataset(hub_repo_id)
60
  df = ds[split].to_polars()
61
- embeddings = model.encode(df[column].cast(str).to_list(), show_progress_bar=True)
62
  return embeddings
63
 
64
 
 
11
 
12
  # Load the static embeddings model from HuggingFace hub
13
  model_name = "sentence-transformers/static-retrieval-mrl-en-v1"
14
+ model = SentenceTransformer(
15
+ model_name,
16
+ device="cpu",
17
+ tokenizer_kwargs={"model_max_length": 512},
18
+ )
19
 
20
 
21
  def get_iframe(hub_repo_id):
 
62
  gr.Info("Vectorizing dataset...")
63
  ds = load_dataset(hub_repo_id)
64
  df = ds[split].to_polars()
65
+ embeddings = model.encode(df[column].cast(str), show_progress_bar=True, batch_size=128)
66
  return embeddings
67
 
68