orionweller commited on
Commit
52b042a
·
1 Parent(s): 321d11b
Files changed (2) hide show
  1. app.py +68 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from pyserini.search.lucene import LuceneSearcher
3
+ import os
4
+
5
+ # Download the index if it doesn't exist
6
+ if not os.path.exists('msmarco-passage'):
7
+ os.system('python -c "from pyserini.search import LuceneSearcher; LuceneSearcher.from_prebuilt_index(\'msmarco-passage\')"')
8
+
9
+ def search_pyserini(query):
10
+ try:
11
+ searcher = LuceneSearcher('msmarco-passage')
12
+ searcher.set_bm25(k1=0.9, b=0.4)
13
+ hits = searcher.search(query, k=10)
14
+
15
+ results = []
16
+ for i, hit in enumerate(hits):
17
+ doc = searcher.doc(hit.docid)
18
+ content = doc.raw()[:200] + "..." if len(doc.raw()) > 200 else doc.raw()
19
+ results.append(f"Rank: {i+1}\nDoc ID: {hit.docid}\nScore: {hit.score:.4f}\nContent: {content}\n")
20
+
21
+ return "\n".join(results)
22
+ except Exception as e:
23
+ return f"An error occurred: {str(e)}"
24
+
25
+ css = """
26
+ .gradio-container {
27
+ font-family: 'Arial', sans-serif;
28
+ }
29
+ .output-text {
30
+ white-space: pre-wrap;
31
+ font-family: 'Courier New', monospace;
32
+ font-size: 14px;
33
+ line-height: 1.5;
34
+ padding: 10px;
35
+ border: 1px solid #ccc;
36
+ border-radius: 5px;
37
+ background-color: #f9f9f9;
38
+ }
39
+ """
40
+
41
+ with gr.Blocks(css=css) as iface:
42
+ gr.Markdown("# Pyserini Search Interface")
43
+ gr.Markdown("Enter a query to search using Pyserini with BM25 scoring (k1=0.9, b=0.4).")
44
+
45
+ with gr.Row():
46
+ query_input = gr.Textbox(
47
+ lines=1,
48
+ placeholder="Enter your search query here...",
49
+ label="Search Query"
50
+ )
51
+
52
+ with gr.Row():
53
+ search_button = gr.Button("Search", variant="primary")
54
+
55
+ with gr.Row():
56
+ output = gr.Textbox(
57
+ lines=20,
58
+ label="Search Results",
59
+ elem_classes=["output-text"]
60
+ )
61
+
62
+ search_button.click(
63
+ fn=search_pyserini,
64
+ inputs=query_input,
65
+ outputs=output
66
+ )
67
+
68
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio==3.50.2
2
+ pyserini==0.23.0
3
+ faiss-cpu==1.7.4