k-mktr commited on
Commit
bbe8901
ยท
verified ยท
1 Parent(s): b9df178

Rename arena_config.py to config.py

Browse files
Files changed (1) hide show
  1. arena_config.py โ†’ config.py +68 -64
arena_config.py โ†’ config.py RENAMED
@@ -1,15 +1,18 @@
1
  import os
 
 
 
 
 
2
 
3
  ARENA_NAME = "# ๐Ÿ† The GPU-Poor LLM Gladiator Arena ๐Ÿ†"
4
  ARENA_DESCRIPTION = """
5
  Step right up to the arena where frugal meets fabulous in the world of AI!
6
  Watch as our compact contenders (maxing out at 14B parameters) duke it out in a battle of wits and words.
7
-
8
  1. Type your prompt into the text box. Alternatively, click the "๐ŸŽฒ" button to receive a random prompt.
9
  2. Click the "Generate Responses" button to view the models' responses.
10
  3. Cast your vote for the model that provided the better response. In the event of a Tie, enter a new prompt before continuing the battle.
11
  4. Check out the Leaderboard to see how models rank against each other.
12
-
13
  More info: [README.md](https://huggingface.co/spaces/k-mktr/gpu-poor-llm-arena/blob/main/README.md)
14
  """
15
 
@@ -27,71 +30,58 @@ NEXTCLOUD_PASSWORD = os.environ.get("NEXTCLOUD_PASSWORD")
27
  NEXTCLOUD_LEADERBOARD_PATH = os.environ.get("NEXTCLOUD_LEADERBOARD_PATH")
28
  NEXTCLOUD_BACKUP_FOLDER = os.environ.get("NEXTCLOUD_BACKUP_FOLDER", "/gpu_poor_leaderboard_backups")
29
  NEXTCLOUD_SUGGESTIONS_PATH = os.environ.get("NEXTCLOUD_SUGGESTIONS_PATH", "/gpu_poor_model_suggestions.json")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
- # Predefined list of approved models with human-readable names
32
- APPROVED_MODELS = [
33
- ("llama3.2:1b-instruct-q8_0", "LLaMA 3.2 (1B, 8-bit)"),
34
- ("llama3.2:3b-instruct-q4_K_M", "LLaMA 3.2 (3B, 4-bit)"),
35
- ("llama3.1:8b-instruct-q4_0", "LLaMA 3.1 (8B, 4-bit)"),
36
  ("gemma2:2b-instruct-q4_0", "Gemma 2 (2B, 4-bit)"),
37
- ("gemma2:9b-instruct-q4_0", "Gemma 2 (9B, 4-bit)"),
38
  ("qwen2.5:0.5b-instruct-q8_0", "Qwen 2.5 (0.5B, 8-bit)"),
39
- ("qwen2.5:1.5b-instruct-q8_0", "Qwen 2.5 (1.5B, 8-bit)"),
40
- ("qwen2.5:3b-instruct-q4_K_M", "Qwen 2.5 (3B, 4-bit)"),
41
- ("qwen2.5:7b-instruct-q4_K_M", "Qwen 2.5 (7B, 4-bit)"),
42
- ("phi3.5:3.8b-mini-instruct-q4_0", "Phi 3.5 (3.8B, 4-bit)"),
43
- ("mistral:7b-instruct-v0.3-q4_0", "Mistral 0.3 (7B, 4-bit)"),
44
- ("hermes3:8b-llama3.1-q4_0", "Hermes 3 (8B, 4-bit)"),
45
- ("aya:8b-23-q4_0", "Aya 23 (8B, 4-bit)"),
46
- ("granite3-dense:2b-instruct-q8_0", "Granite 3 Dense (2B, 8-bit)"),
47
- ("granite3-dense:8b-instruct-q4_K_M", "Granite 3 Dense (8B, 4-bit)"),
48
- ("granite3-moe:1b-instruct-q5_K_M", "Granite 3 MoE (1B, 5-bit)"),
49
- ("granite3-moe:3b-instruct-q4_K_M", "Granite 3 MoE (3B, 4-bit)"),
50
- ("ministral-8b-instruct-2410-q4_k_m", "Ministral (8B, 4-bit)"),
51
- ("dolphin-2.9.4-llama3.1-8b-q4_k_m", "Dolphin 2.9.4 (8B, 4-bit)"),
52
- ("yi:6b-chat-v1.5-q4_0", "Yi v1.5 (6B, 4-bit)"),
53
- ("yi:9b-chat-v1.5-q4_0", "Yi v1.5 (9B, 4-bit)"),
54
- ("mistral-nemo:12b-instruct-2407-q4_K_M", "Mistral Nemo (12B, 4-bit)"),
55
- ("glm4:9b-chat-q4_K_M", "GLM4 (9B, 4-bit)"),
56
- ("internlm2:7b-chat-v2.5-q4_K_M", "InternLM2 v2.5 (7B, 4-bit)"),
57
- ("falcon2:11b-q4_K_M", "Falcon2 (11B, 4-bit)"),
58
- ("stablelm2:1.6b-chat-q8_0", "StableLM2 (1.6B, 8-bit)"),
59
- ("stablelm2:12b-chat-q4_K_M", "StableLM2 (12B, 4-bit)"),
60
- ("solar:10.7b-instruct-v1-q4_K_M", "Solar (10.7B, 4-bit)"),
61
- ("hf.co/bartowski/Replete-LLM-V2.5-Qwen-7b-GGUF:Q4_K_M", "Rombos Qwen (7B, 4-bit)"),
62
- ("hf.co/bartowski/Replete-LLM-V2.5-Qwen-1.5b-GGUF:Q8_0", "Rombos Qwen (1.5B, 8-bit)"),
63
- ("hf.co/bartowski/aya-expanse-8b-GGUF:Q4_K_M", "Aya Expanse (8B, 4-bit)"),
64
- ("smollm2:1.7b-instruct-q8_0", "SmolLM2 (1.7B, 8-bit)"),
65
- ("tinyllama:1.1b-chat-v1-q8_0", "TinyLLama (1.1B, 8-bit)"),
66
- ("hf.co/Felladrin/gguf-1.5-Pints-16K-v0.1:Q8_0", "Pints (1.57B, 8-bit)"),
67
- ("hf.co/bartowski/OLMoE-1B-7B-0924-Instruct-GGUF:Q4_K_M", "OLMoE (7B, 4-bit)"),
68
- ("hf.co/bartowski/Llama-3.2-3B-Instruct-uncensored-GGUF:Q4_K_M", "Llama 3.2 Uncensored (3B, 4-bit)"),
69
- ("hf.co/bartowski/Llama-3.1-Hawkish-8B-GGUF:Q4_K_M", "Llama 3.1 Hawkish (8B, 4-bit)"),
70
- ("hf.co/bartowski/Humanish-LLama3-8B-Instruct-GGUF:Q4_K_M", "Humanish Llama 3 (8B, 4-bit)"),
71
- ("hf.co/bartowski/Nemotron-Mini-4B-Instruct-GGUF:Q4_K_M", "Nemotron Mini (4B, 4-bit)"),
72
- ("hf.co/bartowski/Llama-3.1-SauerkrautLM-8b-Instruct-GGUF:Q4_K_M", "Llama 3.1 Sauerkraut (8B, 4-bit)"),
73
- ("hf.co/bartowski/Llama-3.1-SuperNova-Lite-GGUF:Q4_K_M", "Llama 3.1 SuperNova Lite (8B, 4-bit)"),
74
- ("hf.co/bartowski/EuroLLM-9B-Instruct-GGUF:Q4_K_M", "EuroLLM (9B, 4-bit)"),
75
- ("hf.co/bartowski/INTELLECT-1-Instruct-GGUF:Q4_K_M", "Intellect-1 (10B, 4-bit)"),
76
- ("marco-o1:7b-q4_K_M", "Marco-o1 (7B, 4-bit)"),
77
- ("tulu3:8b-q4_K_M", "Tulu 3 (8B, 4-bit)"),
78
- ("exaone3.5:7.8b-instruct-q4_K_M", "EXAONE 3.5 (7.8B, 4-bit)"),
79
- ("exaone3.5:2.4b-instruct-q8_0", "EXAONE 3.5 (2.4B, 8-bit)"),
80
- ("falcon3:10b-instruct-q4_K_M", "Falcon 3 (10B, 4-bit)"),
81
- ("falcon3:7b-instruct-q4_K_M", "Falcon 3 (7B, 4-bit)"),
82
- ("falcon3:3b-instruct-q8_0", "Falcon 3 (3B, 8-bit)"),
83
- ("falcon3:1b-instruct-q8_0", "Falcon 3 (1B, 8-bit)"),
84
- ("granite3.1-moe:3b-instruct-q8_0", "Granite 3.1 MoE (3B, 8-bit)"),
85
- ("granite3.1-moe:1b-instruct-q8_0", "Granite 3.1 MoE (1B, 8-bit)"),
86
- ("granite3.1-dense:8b-instruct-q4_K_M", "Granite 3.1 Dense (8B, 4-bit)"),
87
- ("granite3.1-dense:2b-instruct-q8_0", "Granite 3.1 Dense (3B, 8-bit)"),
88
- ("hf.co/arcee-ai/Virtuoso-Small-GGUF:Q4_K_M", "Virtuoso Small (14B, 4-bit)"),
89
- ("hf.co/bartowski/Llama-3.1-SuperNova-8B-Lite_TIES_with_Base-GGUF:Q4_K_M", "Llama 3.1 SuperNova Lite TIES with Base (8B, 4-bit)"),
90
- ("hf.co/akjindal53244/Llama-3.1-Storm-8B-GGUF:Q4_K_M", "Llama 3.1 Storm (8B, 4-bit)"),
91
- ("dolphin3:8b-llama3.1-q4_K_M", "Dolphin 3 (8B, 4-bit)"),
92
- ("smallthinker:3b-preview-q4_K_M", "SmallThinker (3B, 4-bit)"),
93
- ("hf.co/bartowski/OLMo-2-1124-7B-Instruct-GGUF:Q4_K_M", "OLMo-2 (7B, 4-bit)"),
94
-
95
  ]
96
 
97
  # Example prompts
@@ -197,3 +187,17 @@ model_nicknames = [
197
  "๐Ÿฆœ Parrot Paradoxer", "๐ŸŒฎ Taco Theorist", "๐Ÿงจ Firecracker Philosopher",
198
  "๐ŸŽณ Bowling Bard", "๐Ÿง€ Cheese Chatterer", "๐Ÿฆš Peacock Ponderer"
199
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
+ from nc_py_api import Nextcloud
3
+ import json
4
+ from datetime import datetime
5
+ import time
6
+ import threading
7
 
8
  ARENA_NAME = "# ๐Ÿ† The GPU-Poor LLM Gladiator Arena ๐Ÿ†"
9
  ARENA_DESCRIPTION = """
10
  Step right up to the arena where frugal meets fabulous in the world of AI!
11
  Watch as our compact contenders (maxing out at 14B parameters) duke it out in a battle of wits and words.
 
12
  1. Type your prompt into the text box. Alternatively, click the "๐ŸŽฒ" button to receive a random prompt.
13
  2. Click the "Generate Responses" button to view the models' responses.
14
  3. Cast your vote for the model that provided the better response. In the event of a Tie, enter a new prompt before continuing the battle.
15
  4. Check out the Leaderboard to see how models rank against each other.
 
16
  More info: [README.md](https://huggingface.co/spaces/k-mktr/gpu-poor-llm-arena/blob/main/README.md)
17
  """
18
 
 
30
  NEXTCLOUD_LEADERBOARD_PATH = os.environ.get("NEXTCLOUD_LEADERBOARD_PATH")
31
  NEXTCLOUD_BACKUP_FOLDER = os.environ.get("NEXTCLOUD_BACKUP_FOLDER", "/gpu_poor_leaderboard_backups")
32
  NEXTCLOUD_SUGGESTIONS_PATH = os.environ.get("NEXTCLOUD_SUGGESTIONS_PATH", "/gpu_poor_model_suggestions.json")
33
+ NEXTCLOUD_MODELS_PATH = os.environ.get("NEXTCLOUD_MODELS_PATH", "/gpu_poor_approved_models.json")
34
+
35
+ # Initialize Nextcloud client
36
+ nc = Nextcloud(
37
+ nextcloud_url=NEXTCLOUD_URL,
38
+ nc_auth_user=NEXTCLOUD_USERNAME,
39
+ nc_auth_pass=NEXTCLOUD_PASSWORD
40
+ )
41
+
42
+ def load_approved_models():
43
+ """Load approved models from Nextcloud, fallback to local list if needed."""
44
+ try:
45
+ # Try to load from Nextcloud
46
+ remote_data = nc.files.download(NEXTCLOUD_MODELS_PATH)
47
+ if remote_data:
48
+ models_data = json.loads(remote_data.decode('utf-8'))
49
+ return models_data['approved_models']
50
+ except Exception as e:
51
+ print(f"Could not load models from Nextcloud: {e}")
52
+
53
+ # Fallback to default models if Nextcloud fails
54
+ return FALLBACK_MODELS
55
+
56
+ # Add these constants
57
+ MODEL_REFRESH_INTERVAL = 3600 # Check every 1 hour
58
+ _last_model_check = 0 # Track last check time
59
+
60
+ def get_approved_models():
61
+ """Get the current list of approved models with periodic refresh."""
62
+ global _last_model_check
63
+
64
+ current_time = time.time()
65
+ # Check if we need to refresh (if it's been more than MODEL_REFRESH_INTERVAL seconds)
66
+ if not hasattr(get_approved_models, '_models') or \
67
+ (current_time - _last_model_check) > MODEL_REFRESH_INTERVAL:
68
+ get_approved_models._models = load_approved_models()
69
+ _last_model_check = current_time
70
+
71
+ return get_approved_models._models
72
+
73
+ def refresh_approved_models():
74
+ """Force refresh of the approved models list."""
75
+ if hasattr(get_approved_models, '_models'):
76
+ delattr(get_approved_models, '_models')
77
+ return get_approved_models()
78
 
79
+ # Keep FALLBACK_MODELS as a safety net
80
+ FALLBACK_MODELS = [
81
+ ("dolphin3:8b-llama3.1-q4_K_M", "Dolphin 3 (8B, 4-bit)"),
82
+ ("granite3.1-dense:8b-instruct-q4_K_M", "Granite 3.1 Dense (8B, 4-bit)"),
 
83
  ("gemma2:2b-instruct-q4_0", "Gemma 2 (2B, 4-bit)"),
 
84
  ("qwen2.5:0.5b-instruct-q8_0", "Qwen 2.5 (0.5B, 8-bit)"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  ]
86
 
87
  # Example prompts
 
187
  "๐Ÿฆœ Parrot Paradoxer", "๐ŸŒฎ Taco Theorist", "๐Ÿงจ Firecracker Philosopher",
188
  "๐ŸŽณ Bowling Bard", "๐Ÿง€ Cheese Chatterer", "๐Ÿฆš Peacock Ponderer"
189
  ]
190
+
191
+ def start_model_refresh_thread():
192
+ """Start a background thread to periodically refresh the models list."""
193
+ def refresh_models_periodically():
194
+ while True:
195
+ time.sleep(MODEL_REFRESH_INTERVAL)
196
+ try:
197
+ refresh_approved_models()
198
+ print(f"Models list refreshed at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
199
+ except Exception as e:
200
+ print(f"Error refreshing models list: {e}")
201
+
202
+ refresh_thread = threading.Thread(target=refresh_models_periodically, daemon=True)
203
+ refresh_thread.start()