Spaces:

llamaindex
/

multimodal_vdr_demo

Running on T4

App Files Files Community

cheesyFishes commited on 9 days ago

Commit

d9f893a

1 Parent(s): 7a47d4d

move device around more often

Browse files

Files changed (1) hide show

app.py +16 -3

app.py CHANGED Viewed

@@ -22,7 +22,7 @@ elif torch.backends.mps.is_available():
 image_embed_model = HuggingFaceEmbedding(
     model_name="llamaindex/vdr-2b-multi-v1",
-    device=device,
     trust_remote_code=True,
     token=os.getenv("HUGGINGFACE_TOKEN"),
     model_kwargs={"torch_dtype": torch.float16},
@@ -31,7 +31,7 @@ image_embed_model = HuggingFaceEmbedding(
 text_embed_model = HuggingFaceEmbedding(
     model_name="BAAI/bge-small-en",
-    device=device,
     trust_remote_code=True,
     token=os.getenv("HUGGINGFACE_TOKEN"),
     embed_batch_size=2,
@@ -80,6 +80,9 @@ def create_index(file, llama_parse_key, progress=gr.Progress()):
             image_docs.append(ImageDocument(text=image_dict["name"], image_path=image_dict["path"]))
         # Create index
         progress(0.9, desc="Creating final index...")
         index = MultiModalVectorStoreIndex.from_documents(
             text_docs + image_docs,
@@ -92,11 +95,17 @@ def create_index(file, llama_parse_key, progress=gr.Progress()):
     except Exception as e:
         return None, f"Error creating index: {str(e)}"
 def run_search(index, query, text_top_k, image_top_k):
     if not index:
         return "Please create or select an index first.", [], []
     retriever = index.as_retriever(
         similarity_top_k=text_top_k,
         image_similarity_top_k=image_top_k,
@@ -105,6 +114,10 @@ def run_search(index, query, text_top_k, image_top_k):
     image_nodes = retriever.text_to_image_retrieve(query)
     text_nodes = retriever.text_retrieve(query)
     # Extract text and scores from nodes
     text_results = [{"text": node.text, "score": f"{node.score:.3f}"} for node in text_nodes]

 image_embed_model = HuggingFaceEmbedding(
     model_name="llamaindex/vdr-2b-multi-v1",
+    device="cpu",
     trust_remote_code=True,
     token=os.getenv("HUGGINGFACE_TOKEN"),
     model_kwargs={"torch_dtype": torch.float16},
 text_embed_model = HuggingFaceEmbedding(
     model_name="BAAI/bge-small-en",
+    device="cpu",
     trust_remote_code=True,
     token=os.getenv("HUGGINGFACE_TOKEN"),
     embed_batch_size=2,
             image_docs.append(ImageDocument(text=image_dict["name"], image_path=image_dict["path"]))
         # Create index
+        # move models back to CPU
+        index._image_embed_model._model.to(device)
+        index._embed_model._model.to(device)
         progress(0.9, desc="Creating final index...")
         index = MultiModalVectorStoreIndex.from_documents(
             text_docs + image_docs,
     except Exception as e:
         return None, f"Error creating index: {str(e)}"
+    finally:
+        # move models back to CPU
+        index._image_embed_model._model.to("cpu")
+        index._embed_model._model.to("cpu")
 def run_search(index, query, text_top_k, image_top_k):
     if not index:
         return "Please create or select an index first.", [], []
+    # move models back to CPU
+    index._image_embed_model._model.to(device)
+    index._embed_model._model.to(device)
     retriever = index.as_retriever(
         similarity_top_k=text_top_k,
         image_similarity_top_k=image_top_k,
     image_nodes = retriever.text_to_image_retrieve(query)
     text_nodes = retriever.text_retrieve(query)
+    # move models back to CPU
+    index._image_embed_model._model.to("cpu")
+    index._embed_model._model.to("cpu")
     # Extract text and scores from nodes
     text_results = [{"text": node.text, "score": f"{node.score:.3f}"} for node in text_nodes]