improve inference
Browse files
app.py
CHANGED
@@ -11,7 +11,11 @@ global df
|
|
11 |
|
12 |
# Load the static embeddings model from HuggingFace hub
|
13 |
model_name = "sentence-transformers/static-retrieval-mrl-en-v1"
|
14 |
-
model = SentenceTransformer(
|
|
|
|
|
|
|
|
|
15 |
|
16 |
|
17 |
def get_iframe(hub_repo_id):
|
@@ -58,7 +62,7 @@ def vectorize_dataset(hub_repo_id: str, split: str, column: str):
|
|
58 |
gr.Info("Vectorizing dataset...")
|
59 |
ds = load_dataset(hub_repo_id)
|
60 |
df = ds[split].to_polars()
|
61 |
-
embeddings = model.encode(df[column].cast(str)
|
62 |
return embeddings
|
63 |
|
64 |
|
|
|
11 |
|
12 |
# Load the static embeddings model from HuggingFace hub
|
13 |
model_name = "sentence-transformers/static-retrieval-mrl-en-v1"
|
14 |
+
model = SentenceTransformer(
|
15 |
+
model_name,
|
16 |
+
device="cpu",
|
17 |
+
tokenizer_kwargs={"model_max_length": 512},
|
18 |
+
)
|
19 |
|
20 |
|
21 |
def get_iframe(hub_repo_id):
|
|
|
62 |
gr.Info("Vectorizing dataset...")
|
63 |
ds = load_dataset(hub_repo_id)
|
64 |
df = ds[split].to_polars()
|
65 |
+
embeddings = model.encode(df[column].cast(str), show_progress_bar=True, batch_size=128)
|
66 |
return embeddings
|
67 |
|
68 |
|