Spaces:
Running
Running
import gradio as gr | |
from huggingface_hub import InferenceClient | |
import urllib.request | |
import xml.etree.ElementTree as ET | |
from transformers import pipeline | |
# HuggingFace Inference Client | |
#client = InferenceClient("meta-llama/Llama-3.3-70B-Instruct") | |
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") | |
# Hugging Face Pipeline für Named Entity Recognition (NER) | |
nlp = pipeline("ner", model="dslim/bert-base-NER") | |
# Funktion zur Extraktion von Keywords ohne Füllwörter | |
def generate_query(input_text): | |
entities = nlp(input_text) | |
keywords = [entity['word'] for entity in entities if entity['entity_group'] in ['MISC', 'ORG', 'LOC', 'PER']] | |
return " ".join(keywords).strip() | |
# Funktion, um relevante Studien von arXiv zu suchen | |
def fetch_arxiv_summary(query, sort_by="relevance", sort_order="descending", max_results=20): | |
url = (f'http://export.arxiv.org/api/query?search_query=all:{urllib.parse.quote(query)}' | |
f'&start=0&max_results={max_results}&sortBy={sort_by}&sortOrder={sort_order}') | |
try: | |
data = urllib.request.urlopen(url) | |
xml_data = data.read().decode("utf-8") | |
root = ET.fromstring(xml_data) | |
summaries = [] | |
for entry in root.findall(".//{http://www.w3.org/2005/Atom}entry"): | |
title = entry.find("{http://www.w3.org/2005/Atom}title") | |
link_element = entry.find("{http://www.w3.org/2005/Atom}link[@rel='alternate']") | |
summary = entry.find("{http://www.w3.org/2005/Atom}summary") | |
link = link_element.attrib.get("href") if link_element is not None else "Kein Link verfügbar" | |
if summary is not None and title is not None: | |
summaries.append(f"Titel: {title.text.strip()}\nLink: {link}\nZusammenfassung: {summary.text.strip()}") | |
return summaries if summaries else ["Keine relevanten Studien gefunden."] | |
except Exception as e: | |
return [f"Fehler beim Abrufen der Studie: {str(e)}"] | |
# Chatbot-Logik mit arXiv-Integration | |
def respond( | |
message, | |
history: list[tuple[str, str]], | |
system_message, | |
max_tokens, | |
temperature, | |
top_p, | |
sort_by, | |
sort_order, | |
max_results, | |
): | |
# Query generieren und Studien abrufen | |
query = generate_query(message) | |
study_summaries = fetch_arxiv_summary(query, sort_by, sort_order, max_results) | |
study_info = "\n".join(study_summaries) | |
# Nachrichten vorbereiten | |
messages = [{"role": "system", "content": f"{system_message} You are a highly capable assistant specializing in parsing and summarizing study abstracts. Your task is to analyze the provided study data, extract relevant information, and offer concise summaries. Always include the study's title and a direct link, ensuring clarity and accessibility.\n"}] | |
for val in history: | |
if val[0]: | |
messages.append({"role": "user", "content": val[0]}) | |
if val[1]: | |
messages.append({"role": "assistant", "content": val[1]}) | |
messages.append({"role": "user", "content": f"{message}\nUse this Kontext:\n{study_info}"}) | |
# Antwort vom Modell generieren | |
response = "" | |
for message in client.chat_completion( | |
messages, | |
max_tokens=max_tokens, | |
stream=True, | |
temperature=temperature, | |
top_p=top_p, | |
): | |
token = message.choices[0].delta.content | |
response += token | |
yield response | |
# Gradio-Interface mit zusätzlichen Eingaben | |
def create_intro_text(): | |
return ("Willkommen beim Chatbot! Dieser Chatbot verwendet KI, um Ihre Fragen zu beantworten und relevante Studien " | |
"aus der arXiv-Datenbank abzurufen. Geben Sie eine Frage ein, und der Bot liefert Ihnen basierend auf Ihrem " | |
"Suchbegriff Studien mit Titel, Link und Zusammenfassung. Zusätzlich können Sie die Sortierung und maximale " | |
"Anzahl der Ergebnisse anpassen.") | |
demo = gr.ChatInterface( | |
respond, | |
additional_inputs=[ | |
gr.Textbox(value="You are helpful.", label="System message"), | |
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), | |
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), | |
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"), | |
gr.Dropdown(label="Sortieren nach", choices=["relevance", "lastUpdatedDate", "submittedDate"], value="relevance"), | |
gr.Dropdown(label="Sortierreihenfolge", choices=["ascending", "descending"], value="descending"), | |
gr.Slider(label="Maximale Ergebnisse", minimum=1, maximum=50, value=20, step=1), | |
], | |
description=create_intro_text() | |
) | |
if __name__ == "__main__": | |
demo.launch() | |