|
from llama_cpp import Llama |
|
from huggingface_hub import hf_hub_download |
|
from flask import Flask, request, jsonify |
|
|
|
app = Flask(__name__) |
|
|
|
hf_hub_download("TheBloke/phi-2-GGUF", "phi-2.Q8_0.gguf", local_dir="./") |
|
phi = Llama(model_path="./phi-2.Q8_0.gguf", n_ctx=2048, n_gpu_layers=999) |
|
|
|
app.route("/spaces/MrOvkill/phastfi/", methods=["GET"]) |
|
def index(): |
|
return "<html><body><h1>Use API</h1><p>Use /completion as POST with a prompt in a JSON query.</p></body></html>" |
|
|
|
@app.route("/spaces/MrOvkill/phastfi/generate", methods=["POST"]) |
|
def completion(): |
|
prompt = request.json["prompt"] |
|
res = phi( |
|
prompt, |
|
temperature=0.33, |
|
top_p=0.95, |
|
top_k=42, |
|
max_tokens=1024, |
|
num_completions=2, |
|
) |
|
return jsonify({ |
|
"responses": [ |
|
res["choices"][0]["text"], |
|
res["choices"][1]["text"] |
|
] |
|
}) |
|
|
|
if __name__ == "__main__": |
|
app.run(host="0.0.0.0", port=7860) |