SauleBis commited on
Commit
53b90ec
·
1 Parent(s): 6c9c7f8
Files changed (2) hide show
  1. app.py +135 -0
  2. requirements.txt +0 -0
app.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import torch
4
+ import numpy as np
5
+ from transformers import AutoTokenizer, AutoModel
6
+ import faiss
7
+ from streamlit.errors import StreamlitAPIException
8
+ import urllib.parse
9
+
10
+
11
+
12
+ import os
13
+ os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'
14
+
15
+ # Load model and tokenizer
16
+ model_name = "sentence-transformers/msmarco-distilbert-base-v3"
17
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
18
+ model = AutoModel.from_pretrained(model_name)
19
+
20
+ # Load data
21
+ books = pd.read_csv('data/data_final_version.csv')
22
+
23
+ MAX_LEN = 300
24
+
25
+ def embed_bert_cls(text, model=model, tokenizer=tokenizer):
26
+ t = tokenizer(text,
27
+ padding=True,
28
+ truncation=True,
29
+ return_tensors='pt',
30
+ max_length=MAX_LEN)
31
+ with torch.no_grad():
32
+ model_output = model(**{k: v.to(model.device) for k, v in t.items()})
33
+ embeddings = model_output.last_hidden_state[:, 0, :]
34
+ embeddings = torch.nn.functional.normalize(embeddings)
35
+ return embeddings[0].cpu().squeeze()
36
+
37
+ # Load embeddings
38
+ embeddings = np.loadtxt('models/embeddings.txt')
39
+ embeddings_tensor = [torch.tensor(embedding) for embedding in embeddings]
40
+
41
+ # Create Faiss index
42
+ embeddings_matrix = np.stack(embeddings)
43
+ index = faiss.IndexFlatIP(embeddings_matrix.shape[1])
44
+ index.add(embeddings_matrix)
45
+
46
+
47
+ # CSS стили для заднего фона
48
+ background_image = """
49
+ <style>
50
+ .stApp {
51
+ background-image: url("https://img.freepik.com/premium-photo/blur-image-book_9563-1100.jpg");
52
+ background-size: cover;
53
+ background-position: center;
54
+ background-repeat: no-repeat;
55
+ }
56
+ </style>
57
+ """
58
+
59
+ # Вставляем CSS стили в приложение Streamlit
60
+ st.markdown(background_image, unsafe_allow_html=True)
61
+
62
+
63
+ # Вставляем CSS стили для окошка с прозрачным фоном
64
+ transparent_title = """
65
+ <style>
66
+ .transparent-title {
67
+ background-color: rgba(255, 255, 255, 0.7);
68
+ padding: 10px;
69
+ border-radius: 5px;
70
+ box-shadow: 0px 0px 10px rgba(0, 0, 0, 0.1);
71
+ }
72
+ </style>
73
+ """
74
+
75
+ transparent_box = """
76
+ <style>
77
+ .transparent-box {
78
+ background-color: rgba(255, 255, 255, 0.7);
79
+ padding: 10px;
80
+ border-radius: 5px;
81
+ box-shadow: 0px 0px 10px rgba(0, 0, 0, 0.1);
82
+ }
83
+ </style>
84
+ """
85
+
86
+ # Вставляем CSS стили в приложение Streamlit
87
+ st.markdown(transparent_title, unsafe_allow_html=True)
88
+ st.markdown(transparent_box, unsafe_allow_html=True)
89
+
90
+ # Streamlit interface
91
+ st.markdown('<h1 class="transparent-title">🎓📚Приложение для рекомендаций книг📚🎓</h1>', unsafe_allow_html=True)
92
+
93
+ # Далее ваш код Streamlit
94
+ text = st.text_input('Введите ваш запрос для поиска книг:')
95
+ num_results = st.number_input('Количество результатов:', min_value=1, max_value=20, value=3)
96
+ recommend_button = st.button('Получить рекомендации')
97
+
98
+
99
+ if text and recommend_button: # Check if the user entered text and clicked the button
100
+
101
+ # Embed the query and search for nearest vectors using Faiss
102
+ query_embedding = embed_bert_cls(text)
103
+ query_embedding = query_embedding.numpy().astype('float32')
104
+ _, indices = index.search(np.expand_dims(query_embedding, axis=0), num_results)
105
+
106
+ st.subheader('Рекомендации по вашему запросу:')
107
+ for i in indices[0]:
108
+ recommended_embedding = embeddings_tensor[i].numpy() # Vector of the recommended book
109
+ similarity = np.dot(query_embedding, recommended_embedding) / (np.linalg.norm(query_embedding) * np.linalg.norm(recommended_embedding)) # Cosine similarity
110
+ similarity_percent = similarity * 100
111
+
112
+ col1, col2 = st.columns([1, 3])
113
+ with col1:
114
+ image_url = books['image_url'][i]
115
+ if pd.isna(image_url) or not image_url or image_url.strip() == '':
116
+ st.write("Обложка не найдена")
117
+ else:
118
+ try:
119
+ st.image(image_url, use_column_width=True)
120
+ except Exception as e:
121
+ st.write("Обложка не найдена")
122
+ st.write(e)
123
+
124
+ with col2:
125
+ # Выводим информацию о книге на прозрачном фоне
126
+ st.markdown(f"""
127
+ <div class="transparent-box">
128
+ <p><b>Название книги:</b> {books['title'][i]}</p>
129
+ <p><b>Автор:</b> {books['author'][i]}</p>
130
+ <p><b>Описание:</b>{books['annotation'][i]}")
131
+ <p><b>Оценка сходства:</b> {similarity_percent:.2f}%</p>
132
+ </div>
133
+ """, unsafe_allow_html=True)
134
+
135
+ st.write("---")
requirements.txt ADDED
File without changes