Spaces:
Runtime error
Runtime error
from flask import Flask, request, jsonify | |
from sentence_transformers import SentenceTransformer, util | |
import pandas as pd | |
import json | |
app = Flask(__name__) | |
class EnhancedSemanticSearchEvaluator: | |
def __init__(self, relevance_threshold=3, top_k=300, similarity_threshold=0.5): | |
self.models = { | |
"Model_1": SentenceTransformer('sentence-transformers/msmarco-distilbert-base-v3'), | |
"Model_2": SentenceTransformer('sentence-transformers/all-mpnet-base-v2'), | |
"Model_3": SentenceTransformer('sentence-transformers/paraphrase-MiniLM-L6-v2') | |
} | |
self.relevance_threshold = relevance_threshold | |
self.top_k = top_k | |
self.similarity_threshold = similarity_threshold | |
def compute_similarity(self, model, query, matches): | |
query_embedding = model.encode(query, convert_to_tensor=True) | |
match_embeddings = model.encode( | |
[match['metadata'] for match in matches], convert_to_tensor=True | |
) | |
scores = util.pytorch_cos_sim(query_embedding, match_embeddings).squeeze(0).tolist() | |
return scores | |
def rank_results(self, model, query, matches): | |
similarity_scores = self.compute_similarity(model, query, matches) | |
for match, score in zip(matches, similarity_scores): | |
match['similarity_score'] = score | |
ranked_matches = sorted(matches, key=lambda x: x['similarity_score'], reverse=True) | |
return ranked_matches | |
def evaluate_results(self, query, results): | |
all_metrics = {} | |
results_status = {} | |
for model_name, model in self.models.items(): | |
ranked_matches = self.rank_results(model, query, results['matches']) | |
results_with_scores = [] | |
for rank, match in enumerate(ranked_matches[:self.top_k], start=1): | |
doc_id = match['id'] | |
similarity_score = match['similarity_score'] | |
if similarity_score >= 0.7: | |
llm_score = 5 | |
elif similarity_score >= 0.5: | |
llm_score = 4 | |
elif similarity_score >= 0.3: | |
llm_score = 3 | |
elif similarity_score >= 0.1: | |
llm_score = 2 | |
else: | |
llm_score = 1 | |
results_with_scores.append({ | |
"Rank": rank, | |
"Document ID": doc_id, | |
"Similarity Score": similarity_score, | |
"LLM Score": llm_score | |
}) | |
results_df = pd.DataFrame(results_with_scores) | |
results_df['Pass'] = results_df['LLM Score'] >= self.relevance_threshold | |
pass_rate = results_df['Pass'].mean() | |
metrics = { | |
"Pass Rate": pass_rate, | |
"Precision@K": results_df.head(self.top_k)['Pass'].mean(), | |
"Recall@K": results_df.head(self.top_k)['Pass'].sum() / max(results_df['Pass'].sum(), 1), | |
"F1@K": ( | |
2 * (results_df.head(self.top_k)['Pass'].mean() * (results_df.head(self.top_k)['Pass'].sum() / max(results_df['Pass'].sum(), 1))) / | |
(results_df.head(self.top_k)['Pass'].mean() + (results_df.head(self.top_k)['Pass'].sum() / max(results_df['Pass'].sum(), 1))) | |
if (results_df.head(self.top_k)['Pass'].mean() + (results_df.head(self.top_k)['Pass'].sum() / max(results_df['Pass'].sum(), 1))) > 0 else 0) | |
} | |
all_metrics[model_name] = metrics | |
results_status[model_name] = "Test Passed" if pass_rate > 0.5 else "Test Failed" | |
return results_status | |
evaluator = EnhancedSemanticSearchEvaluator() | |
def evaluate(): | |
content = request.json | |
query = content['query'] | |
results = content['results'] | |
evaluator = EnhancedSemanticSearchEvaluator() | |
evaluation_result = evaluator.evaluate_results(query, results) | |
return jsonify(evaluation_result) | |
# if __name__ == '__main__': | |
# app.run(debug=True, host='0.0.0.0', port=8000) | |