TheJimmy commited on
Commit
aad580c
·
verified ·
1 Parent(s): 97870ca
Files changed (1) hide show
  1. app.py +68 -0
app.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from FlagEmbedding import BGEM3FlagModel
3
+ from FlagEmbedding import FlagReranker
4
+ import pandas as pd
5
+ import numpy as np
6
+
7
+ @st.cache_resource
8
+ def load_model():
9
+ return BGEM3FlagModel('BAAI/bge-m3',
10
+ use_fp16=True)
11
+ @st.cache_resource
12
+ def load_reranker():
13
+ return FlagReranker('BAAI/bge-reranker-v2-m3', use_fp16=True)
14
+
15
+ @st.cache_data
16
+ def load_embed(path):
17
+ embeddings_2 = np.load(path)
18
+ return embeddings_2
19
+
20
+ model = load_model()
21
+ reranker = load_reranker()
22
+
23
+ embeddings_2 = load_embed('D:/AI_Builder/BGE_embeddings_2.npy')
24
+
25
+ data = pd.DataFrame(pd.read_csv('D:/AI_Builder/ActualProject/DataCollection/TESTUNCLEANbookquestions.csv'))
26
+ data2 = pd.DataFrame(pd.read_csv('D:/AI_Builder/ActualProject/DataCollection/TRAINbookquestions.csv'))
27
+ data3 = pd.read_csv("D:/AI_Builder/ActualProject/DataCollection/booksummaries.txt",
28
+ header=None,sep="\t",
29
+ names=["ID", "Freebase ID", "Book Name", "Book Author", "Pub date", "Genres", "Summary"])
30
+ df = pd.concat([data, data2])
31
+ df = df.merge(data3, on='ID', how='left')
32
+ df = df.rename(columns={'Book Name_x': 'Book Name'})
33
+ df = df[['ID', 'Book Name', 'Book Author', 'Questions', 'Summary']]
34
+
35
+ st.header(":books: Book Identifier")
36
+
37
+ k = 10
38
+ with st.form(key='my_form'):
39
+ sen1 = st.text_area("Book description:")
40
+ submit_button = st.form_submit_button(label='Submit')
41
+
42
+ if submit_button:
43
+ embeddings_1 = model.encode(sen1,
44
+ batch_size=12,
45
+ max_length=8192,
46
+ )['dense_vecs']
47
+ similarity = embeddings_1 @ embeddings_2.T
48
+
49
+ top_k_qs = []
50
+ topk = np.argsort(similarity)[-k:]
51
+
52
+ for t in topk:
53
+ pred_sum = df['Summary'].iloc[t]
54
+ pred_ques = sen1
55
+ pred = [pred_ques, pred_sum]
56
+ top_k_qs.append(pred)
57
+ rrscore = reranker.compute_score(top_k_qs, normalize=True)
58
+ rrscore_index = np.argsort(rrscore)
59
+
60
+ pred_book = []
61
+ for rr in rrscore_index:
62
+ pred_book.append(f"{df['Book Name'][topk[rr]]} by {df['Book Author'][topk[rr]]}")
63
+
64
+ finalpred = []
65
+ pred_book.reverse()
66
+ st.write("Here is your prediction")
67
+ for n, pred in enumerate(pred_book):
68
+ st.write(f"{n+1}: {pred}")