Luke Stanley commited on
Commit
434144a
·
1 Parent(s): e30b729

Make GPU detection and llama-cpp-python re-installation conditional

Browse files
Files changed (1) hide show
  1. app.py +29 -20
app.py CHANGED
@@ -1,27 +1,36 @@
 
1
  from os import system as run
2
  from subprocess import check_output
3
 
4
  import gradio as gr
5
 
6
- # Without a GPU, we need to re-install llama-cpp-python to avoid an error.
7
- # We use a shell command to detect if we have an NVIDIA GPU available:
8
- use_gpu = True
9
- try:
10
- command = "nvidia-debugdump --list|grep Device"
11
- output = str(check_output(command, shell=True).decode())
12
- if "NVIDIA" in output and "ID" in output:
13
- print("NVIDIA GPU detected.")
14
- except Exception as e:
15
- print("No NVIDIA GPU detected, using CPU. GPU check result:", e)
16
- use_gpu = False
17
-
18
- if use_gpu:
19
- print("GPU detected, existing GPU focused llama-cpp-python should work.")
20
- else:
21
- print("Avoiding error by re-installing non-GPU llama-cpp-python build because no GPU was detected.")
22
- run('pip uninstall llama-cpp-python -y')
23
- run('pip install git+https://github.com/lukestanley/llama-cpp-python.git@expose_json_grammar_convert_function --upgrade --no-cache-dir --force-reinstall')
24
- print("llama-cpp-python re-installed, will now attempt to load.")
 
 
 
 
 
 
 
 
25
 
26
  # Now chill can import llama-cpp-python without an error:
27
  from chill import improvement_loop
@@ -31,4 +40,4 @@ def greet(text):
31
  return str(improvement_loop(text))
32
 
33
  demo = gr.Interface(fn=greet, inputs="text", outputs="text")
34
- demo.launch(max_threads=1)
 
1
+ from os import environ as env
2
  from os import system as run
3
  from subprocess import check_output
4
 
5
  import gradio as gr
6
 
7
+
8
+ def inference_binary_check():
9
+ # Without a GPU, we need to re-install llama-cpp-python to avoid an error.
10
+ # We use a shell command to detect if we have an NVIDIA GPU available:
11
+ use_gpu = True
12
+ try:
13
+ command = "nvidia-debugdump --list|grep Device"
14
+ output = str(check_output(command, shell=True).decode())
15
+ if "NVIDIA" in output and "ID" in output:
16
+ print("NVIDIA GPU detected.")
17
+ except Exception as e:
18
+ print("No NVIDIA GPU detected, using CPU. GPU check result:", e)
19
+ use_gpu = False
20
+
21
+ if use_gpu:
22
+ print("GPU detected, existing GPU focused llama-cpp-python should work.")
23
+ else:
24
+ print("Avoiding error by re-installing non-GPU llama-cpp-python build because no GPU was detected.")
25
+ run('pip uninstall llama-cpp-python -y')
26
+ run('pip install git+https://github.com/lukestanley/llama-cpp-python.git@expose_json_grammar_convert_function --upgrade --no-cache-dir --force-reinstall')
27
+ print("llama-cpp-python re-installed, will now attempt to load.")
28
+
29
+
30
+ LLM_WORKER = env.get("LLM_WORKER", "runpod")
31
+
32
+ if LLM_WORKER == "http" or LLM_WORKER == "in_memory":
33
+ inference_binary_check()
34
 
35
  # Now chill can import llama-cpp-python without an error:
36
  from chill import improvement_loop
 
40
  return str(improvement_loop(text))
41
 
42
  demo = gr.Interface(fn=greet, inputs="text", outputs="text")
43
+ demo.launch(max_threads=1, share=True)