File size: 4,456 Bytes
1e803da
 
09e28bb
 
1e803da
09e28bb
 
29633eb
 
f0f75ab
09e28bb
 
 
 
 
 
 
 
 
 
 
 
 
 
2663d44
09e28bb
2663d44
 
09e28bb
2384651
 
 
 
 
 
09e28bb
 
1e803da
 
2384651
 
 
1e803da
 
 
 
2384651
 
 
 
1e803da
 
 
 
5e6e8f5
2384651
 
 
 
e0af09a
09e28bb
 
2384651
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
09e28bb
1e803da
2384651
1e803da
2384651
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import gradio as gr
from huggingface_hub import InferenceClient
import urllib.request
import xml.etree.ElementTree as ET

# HuggingFace Inference Client
#client = InferenceClient("meta-llama/Llama-3.3-70B-Instruct")
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")



# Funktion, um relevante Studien von arXiv zu suchen
def fetch_arxiv_summary(query, sort_by="relevance", sort_order="descending", max_results=20):
    url = (f'http://export.arxiv.org/api/query?search_query=all:{urllib.parse.quote(query)}'
           f'&start=0&max_results={max_results}&sortBy={sort_by}&sortOrder={sort_order}')
    try:
        data = urllib.request.urlopen(url)
        xml_data = data.read().decode("utf-8")
        root = ET.fromstring(xml_data)
        summaries = []
        for entry in root.findall(".//{http://www.w3.org/2005/Atom}entry"):
            title = entry.find("{http://www.w3.org/2005/Atom}title")
            link_element = entry.find("{http://www.w3.org/2005/Atom}link[@rel='alternate']")
            summary = entry.find("{http://www.w3.org/2005/Atom}summary")
            link = link_element.attrib.get("href") if link_element is not None else "No Link found"
            if summary is not None and title is not None:
                summaries.append(f"Title: {title.text.strip()}\n Link: {link}\n Abstract: {summary.text.strip()}")
        return summaries if summaries else ["No Studies found"]
    except Exception as e:
        return [f"Error: {str(e)}"]


# Funktion, um die Ergebnisse zu generieren
def search_and_generate(query, sort_by, sort_order, max_results, system_message, max_tokens, temperature, top_p):
    # Studieninformationen abrufen
    study_summaries = fetch_arxiv_summary(query, sort_by, sort_order, max_results)
    study_info = "\n".join(study_summaries)


    # Anfrage an das Modell
    messages = [{"role": "system", "content": system_message},
                {"role": "user", "content": f"Studies:\n{study_info}"}]

    response = ""
    for message in client.chat_completion(
        messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = message.choices[0].delta.content
        response += token

    return f"{response}"


# Gradio-Interface mit Eingabe- und Ausgabefeldern
def create_intro_text():
    return ("This chatbot uses AI to answer your questions and retrieve relevant studies from the arXiv database. Enter your specific query in the field below, and the bot will provide you with studies including the title, link, and summary.")

with gr.Blocks() as demo:
    gr.Markdown(create_intro_text())
    query_input = gr.Textbox(value="", label="Query (arxiv API)", placeholder="Geben Sie Ihren spezifischen Suchbegriff ein.")
    sort_by = gr.Dropdown(label="Sortby", choices=["relevance", "lastUpdatedDate", "submittedDate"], value="relevance")
    sort_order = gr.Dropdown(label="Sort", choices=["ascending", "descending"], value="descending")
    max_results = gr.Slider(label="Max Entries", minimum=1, maximum=50, value=20, step=1)
    system_message = gr.Textbox(value="You are a highly capable assistant specializing in parsing and summarizing study abstracts. Your task is to analyze the provided study data, extract relevant information, and offer concise summaries. Always include the study's title and a direct link, ensuring clarity and accessibility. The data will be provided as a list of strings, where each string contains details about a study in the following format: 'Title: [Study Title]\\nLink: [URL]\\nSummary: [Study Abstract]'. Process each entry separately, ensuring accuracy and readability in your summaries.", label="Systemnachricht")
    max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max Token")
    temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
    top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")

    output_box = gr.Textbox(label="Results", placeholder="Result...", lines=10)
    search_button = gr.Button("Search")

    search_button.click(
        fn=search_and_generate,
        inputs=[query_input, sort_by, sort_order, max_results, system_message, max_tokens, temperature, top_p],
        outputs=output_box
    )


if __name__ == "__main__":
    demo.launch()