Spaces:

k-mktr
/

gpu-poor-llm-arena

Running

App Files Files Community

k-mktr commited on 3 days ago

Commit

bbe8901

verified ·

1 Parent(s): b9df178

Rename arena_config.py to config.py

Browse files

Files changed (1) hide show

arena_config.py → config.py +68 -64

arena_config.py → config.py RENAMED Viewed

@@ -1,15 +1,18 @@
 import os
 ARENA_NAME = "# 🏆 The GPU-Poor LLM Gladiator Arena 🏆"
 ARENA_DESCRIPTION = """
                 Step right up to the arena where frugal meets fabulous in the world of AI!
                 Watch as our compact contenders (maxing out at 14B parameters) duke it out in a battle of wits and words.
                 1. Type your prompt into the text box. Alternatively, click the "🎲" button to receive a random prompt.
                 2. Click the "Generate Responses" button to view the models' responses.
                 3. Cast your vote for the model that provided the better response. In the event of a Tie, enter a new prompt before continuing the battle.
                 4. Check out the Leaderboard to see how models rank against each other.
                 More info: [README.md](https://huggingface.co/spaces/k-mktr/gpu-poor-llm-arena/blob/main/README.md)
                 """
@@ -27,71 +30,58 @@ NEXTCLOUD_PASSWORD = os.environ.get("NEXTCLOUD_PASSWORD")
 NEXTCLOUD_LEADERBOARD_PATH = os.environ.get("NEXTCLOUD_LEADERBOARD_PATH")
 NEXTCLOUD_BACKUP_FOLDER = os.environ.get("NEXTCLOUD_BACKUP_FOLDER", "/gpu_poor_leaderboard_backups")
 NEXTCLOUD_SUGGESTIONS_PATH = os.environ.get("NEXTCLOUD_SUGGESTIONS_PATH", "/gpu_poor_model_suggestions.json")
-# Predefined list of approved models with human-readable names
-APPROVED_MODELS = [
-    ("llama3.2:1b-instruct-q8_0", "LLaMA 3.2 (1B, 8-bit)"),
-    ("llama3.2:3b-instruct-q4_K_M", "LLaMA 3.2 (3B, 4-bit)"),
-    ("llama3.1:8b-instruct-q4_0", "LLaMA 3.1 (8B, 4-bit)"),
     ("gemma2:2b-instruct-q4_0", "Gemma 2 (2B, 4-bit)"),
-    ("gemma2:9b-instruct-q4_0", "Gemma 2 (9B, 4-bit)"),
     ("qwen2.5:0.5b-instruct-q8_0", "Qwen 2.5 (0.5B, 8-bit)"),
-    ("qwen2.5:1.5b-instruct-q8_0", "Qwen 2.5 (1.5B, 8-bit)"),
-    ("qwen2.5:3b-instruct-q4_K_M", "Qwen 2.5 (3B, 4-bit)"),
-    ("qwen2.5:7b-instruct-q4_K_M", "Qwen 2.5 (7B, 4-bit)"),
-    ("phi3.5:3.8b-mini-instruct-q4_0", "Phi 3.5 (3.8B, 4-bit)"),
-    ("mistral:7b-instruct-v0.3-q4_0", "Mistral 0.3 (7B, 4-bit)"),
-    ("hermes3:8b-llama3.1-q4_0", "Hermes 3 (8B, 4-bit)"),
-    ("aya:8b-23-q4_0", "Aya 23 (8B, 4-bit)"),
-    ("granite3-dense:2b-instruct-q8_0", "Granite 3 Dense (2B, 8-bit)"),
-    ("granite3-dense:8b-instruct-q4_K_M", "Granite 3 Dense (8B, 4-bit)"),
-    ("granite3-moe:1b-instruct-q5_K_M", "Granite 3 MoE (1B, 5-bit)"),
-    ("granite3-moe:3b-instruct-q4_K_M", "Granite 3 MoE (3B, 4-bit)"),
-    ("ministral-8b-instruct-2410-q4_k_m", "Ministral (8B, 4-bit)"),
-    ("dolphin-2.9.4-llama3.1-8b-q4_k_m", "Dolphin 2.9.4 (8B, 4-bit)"),
-    ("yi:6b-chat-v1.5-q4_0", "Yi v1.5 (6B, 4-bit)"),
-    ("yi:9b-chat-v1.5-q4_0", "Yi v1.5 (9B, 4-bit)"),
-    ("mistral-nemo:12b-instruct-2407-q4_K_M", "Mistral Nemo (12B, 4-bit)"),
-    ("glm4:9b-chat-q4_K_M", "GLM4 (9B, 4-bit)"),
-    ("internlm2:7b-chat-v2.5-q4_K_M", "InternLM2 v2.5 (7B, 4-bit)"),
-    ("falcon2:11b-q4_K_M", "Falcon2 (11B, 4-bit)"),
-    ("stablelm2:1.6b-chat-q8_0", "StableLM2 (1.6B, 8-bit)"),
-    ("stablelm2:12b-chat-q4_K_M", "StableLM2 (12B, 4-bit)"),
-    ("solar:10.7b-instruct-v1-q4_K_M", "Solar (10.7B, 4-bit)"),
-    ("hf.co/bartowski/Replete-LLM-V2.5-Qwen-7b-GGUF:Q4_K_M", "Rombos Qwen (7B, 4-bit)"),
-    ("hf.co/bartowski/Replete-LLM-V2.5-Qwen-1.5b-GGUF:Q8_0", "Rombos Qwen (1.5B, 8-bit)"),
-    ("hf.co/bartowski/aya-expanse-8b-GGUF:Q4_K_M", "Aya Expanse (8B, 4-bit)"),
-    ("smollm2:1.7b-instruct-q8_0", "SmolLM2 (1.7B, 8-bit)"),
-    ("tinyllama:1.1b-chat-v1-q8_0", "TinyLLama (1.1B, 8-bit)"),
-    ("hf.co/Felladrin/gguf-1.5-Pints-16K-v0.1:Q8_0", "Pints (1.57B, 8-bit)"),
-    ("hf.co/bartowski/OLMoE-1B-7B-0924-Instruct-GGUF:Q4_K_M", "OLMoE (7B, 4-bit)"),
-    ("hf.co/bartowski/Llama-3.2-3B-Instruct-uncensored-GGUF:Q4_K_M", "Llama 3.2 Uncensored (3B, 4-bit)"),
-    ("hf.co/bartowski/Llama-3.1-Hawkish-8B-GGUF:Q4_K_M", "Llama 3.1 Hawkish (8B, 4-bit)"),
-    ("hf.co/bartowski/Humanish-LLama3-8B-Instruct-GGUF:Q4_K_M", "Humanish Llama 3 (8B, 4-bit)"),
-    ("hf.co/bartowski/Nemotron-Mini-4B-Instruct-GGUF:Q4_K_M", "Nemotron Mini (4B, 4-bit)"),
-    ("hf.co/bartowski/Llama-3.1-SauerkrautLM-8b-Instruct-GGUF:Q4_K_M", "Llama 3.1 Sauerkraut (8B, 4-bit)"),
-    ("hf.co/bartowski/Llama-3.1-SuperNova-Lite-GGUF:Q4_K_M", "Llama 3.1 SuperNova Lite (8B, 4-bit)"),
-    ("hf.co/bartowski/EuroLLM-9B-Instruct-GGUF:Q4_K_M", "EuroLLM (9B, 4-bit)"),
-    ("hf.co/bartowski/INTELLECT-1-Instruct-GGUF:Q4_K_M", "Intellect-1 (10B, 4-bit)"),
-    ("marco-o1:7b-q4_K_M", "Marco-o1 (7B, 4-bit)"),
-    ("tulu3:8b-q4_K_M", "Tulu 3 (8B, 4-bit)"),
-    ("exaone3.5:7.8b-instruct-q4_K_M", "EXAONE 3.5 (7.8B, 4-bit)"),
-    ("exaone3.5:2.4b-instruct-q8_0", "EXAONE 3.5 (2.4B, 8-bit)"),
-    ("falcon3:10b-instruct-q4_K_M", "Falcon 3 (10B, 4-bit)"),
-    ("falcon3:7b-instruct-q4_K_M", "Falcon 3 (7B, 4-bit)"),
-    ("falcon3:3b-instruct-q8_0", "Falcon 3 (3B, 8-bit)"),
-    ("falcon3:1b-instruct-q8_0", "Falcon 3 (1B, 8-bit)"),
-    ("granite3.1-moe:3b-instruct-q8_0", "Granite 3.1 MoE (3B, 8-bit)"),
-    ("granite3.1-moe:1b-instruct-q8_0", "Granite 3.1 MoE (1B, 8-bit)"),
-    ("granite3.1-dense:8b-instruct-q4_K_M", "Granite 3.1 Dense (8B, 4-bit)"),
-    ("granite3.1-dense:2b-instruct-q8_0", "Granite 3.1 Dense (3B, 8-bit)"),
-    ("hf.co/arcee-ai/Virtuoso-Small-GGUF:Q4_K_M", "Virtuoso Small (14B, 4-bit)"),
-    ("hf.co/bartowski/Llama-3.1-SuperNova-8B-Lite_TIES_with_Base-GGUF:Q4_K_M", "Llama 3.1 SuperNova Lite TIES with Base (8B, 4-bit)"),
-    ("hf.co/akjindal53244/Llama-3.1-Storm-8B-GGUF:Q4_K_M", "Llama 3.1 Storm (8B, 4-bit)"),
-    ("dolphin3:8b-llama3.1-q4_K_M", "Dolphin 3 (8B, 4-bit)"),
-    ("smallthinker:3b-preview-q4_K_M", "SmallThinker (3B, 4-bit)"),
-    ("hf.co/bartowski/OLMo-2-1124-7B-Instruct-GGUF:Q4_K_M", "OLMo-2 (7B, 4-bit)"),
 ]
 # Example prompts
@@ -197,3 +187,17 @@ model_nicknames = [
     "🦜 Parrot Paradoxer", "🌮 Taco Theorist", "🧨 Firecracker Philosopher",
     "🎳 Bowling Bard", "🧀 Cheese Chatterer", "🦚 Peacock Ponderer"
 ]

 import os
+from nc_py_api import Nextcloud
+import json
+from datetime import datetime
+import time
+import threading
 ARENA_NAME = "# 🏆 The GPU-Poor LLM Gladiator Arena 🏆"
 ARENA_DESCRIPTION = """
                 Step right up to the arena where frugal meets fabulous in the world of AI!
                 Watch as our compact contenders (maxing out at 14B parameters) duke it out in a battle of wits and words.
                 1. Type your prompt into the text box. Alternatively, click the "🎲" button to receive a random prompt.
                 2. Click the "Generate Responses" button to view the models' responses.
                 3. Cast your vote for the model that provided the better response. In the event of a Tie, enter a new prompt before continuing the battle.
                 4. Check out the Leaderboard to see how models rank against each other.
                 More info: [README.md](https://huggingface.co/spaces/k-mktr/gpu-poor-llm-arena/blob/main/README.md)
                 """
 NEXTCLOUD_LEADERBOARD_PATH = os.environ.get("NEXTCLOUD_LEADERBOARD_PATH")
 NEXTCLOUD_BACKUP_FOLDER = os.environ.get("NEXTCLOUD_BACKUP_FOLDER", "/gpu_poor_leaderboard_backups")
 NEXTCLOUD_SUGGESTIONS_PATH = os.environ.get("NEXTCLOUD_SUGGESTIONS_PATH", "/gpu_poor_model_suggestions.json")
+NEXTCLOUD_MODELS_PATH = os.environ.get("NEXTCLOUD_MODELS_PATH", "/gpu_poor_approved_models.json")
+# Initialize Nextcloud client
+nc = Nextcloud(
+    nextcloud_url=NEXTCLOUD_URL,
+    nc_auth_user=NEXTCLOUD_USERNAME,
+    nc_auth_pass=NEXTCLOUD_PASSWORD
+)
+def load_approved_models():
+    """Load approved models from Nextcloud, fallback to local list if needed."""
+    try:
+        # Try to load from Nextcloud
+        remote_data = nc.files.download(NEXTCLOUD_MODELS_PATH)
+        if remote_data:
+            models_data = json.loads(remote_data.decode('utf-8'))
+            return models_data['approved_models']
+    except Exception as e:
+        print(f"Could not load models from Nextcloud: {e}")
+    # Fallback to default models if Nextcloud fails
+    return FALLBACK_MODELS
+# Add these constants
+MODEL_REFRESH_INTERVAL = 3600  # Check every 1 hour
+_last_model_check = 0  # Track last check time
+def get_approved_models():
+    """Get the current list of approved models with periodic refresh."""
+    global _last_model_check
+    current_time = time.time()
+    # Check if we need to refresh (if it's been more than MODEL_REFRESH_INTERVAL seconds)
+    if not hasattr(get_approved_models, '_models') or \
+       (current_time - _last_model_check) > MODEL_REFRESH_INTERVAL:
+        get_approved_models._models = load_approved_models()
+        _last_model_check = current_time
+    return get_approved_models._models
+def refresh_approved_models():
+    """Force refresh of the approved models list."""
+    if hasattr(get_approved_models, '_models'):
+        delattr(get_approved_models, '_models')
+    return get_approved_models()
+# Keep FALLBACK_MODELS as a safety net
+FALLBACK_MODELS = [
+    ("dolphin3:8b-llama3.1-q4_K_M", "Dolphin 3 (8B, 4-bit)"),
+    ("granite3.1-dense:8b-instruct-q4_K_M", "Granite 3.1 Dense (8B, 4-bit)"),
     ("gemma2:2b-instruct-q4_0", "Gemma 2 (2B, 4-bit)"),
     ("qwen2.5:0.5b-instruct-q8_0", "Qwen 2.5 (0.5B, 8-bit)"),
 ]
 # Example prompts
     "🦜 Parrot Paradoxer", "🌮 Taco Theorist", "🧨 Firecracker Philosopher",
     "🎳 Bowling Bard", "🧀 Cheese Chatterer", "🦚 Peacock Ponderer"
 ]
+def start_model_refresh_thread():
+    """Start a background thread to periodically refresh the models list."""
+    def refresh_models_periodically():
+        while True:
+            time.sleep(MODEL_REFRESH_INTERVAL)
+            try:
+                refresh_approved_models()
+                print(f"Models list refreshed at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+            except Exception as e:
+                print(f"Error refreshing models list: {e}")
+    refresh_thread = threading.Thread(target=refresh_models_periodically, daemon=True)
+    refresh_thread.start()