import csv
import os
from datetime import datetime
from typing import Optional, Union
import gradio as gr
from huggingface_hub import HfApi, Repository
from onnx_export import convert
from apscheduler.schedulers.background import BackgroundScheduler
DATASET_REPO_URL = "https://huggingface.co/datasets/optimum/exporters"
DATA_FILENAME = "data.csv"
DATA_FILE = os.path.join("data", DATA_FILENAME)
HF_TOKEN = os.environ.get("HF_WRITE_TOKEN")
DATADIR = "exporters_data"
repo: Optional[Repository] = None
# if HF_TOKEN:
# repo = Repository(local_dir=DATADIR, clone_from=DATASET_REPO_URL, token=HF_TOKEN)
def onnx_export(token: str, model_id: str, task: str, opset: Union[int, str]) -> str:
if token == "" or model_id == "":
return """
### Invalid input 🐞
Please fill a token and model name.
"""
try:
if opset == "":
opset = None
else:
opset = int(opset)
api = HfApi(token=token)
error, commit_info = convert(api=api, model_id=model_id, task=task, opset=opset)
if error != "0":
return error
print("[commit_info]", commit_info)
# save in a private dataset
if repo is not None:
repo.git_pull(rebase=True)
with open(os.path.join(DATADIR, DATA_FILE), "a") as csvfile:
writer = csv.DictWriter(
csvfile, fieldnames=["model_id", "pr_url", "time"]
)
writer.writerow(
{
"model_id": model_id,
"pr_url": commit_info.pr_url,
"time": str(datetime.now()),
}
)
commit_url = repo.push_to_hub()
print("[dataset]", commit_url)
pr_revision = commit_info.pr_revision.replace("/", "%2F")
return f"#### Success 🔥 Yay! This model was successfully exported and a PR was open using your token, here: [{commit_info.pr_url}]({commit_info.pr_url}). If you would like to use the exported model without waiting for the PR to be approved, head to https://huggingface.co/{model_id}/tree/{pr_revision}"
except Exception as e:
return f"#### Error: {e}"
TTILE_IMAGE = """
"""
TITLE = """
Export transformers model to ONNX with 🤗 Optimum exporters 🏎️
"""
# for some reason https://huggingface.co/settings/tokens is not showing as a link by default?
DESCRIPTION = """
This Space allows you to automatically export 🤗 transformers PyTorch models hosted on the Hugging Face Hub to [ONNX](https://onnx.ai/). It opens a PR on the target model, and it is up to the owner of the original model
to merge the PR to allow people to leverage the ONNX standard to share and use the model on a wide range of devices!
Once exported, the model can, for example, be used in the [🤗 Optimum](https://huggingface.co/docs/optimum/) library closely following the transformers API.
Check out [this guide](https://huggingface.co/docs/optimum/main/en/onnxruntime/usage_guides/models) to see how!
The steps are as following:
- Paste a read-access token from [https://huggingface.co/settings/tokens](https://huggingface.co/settings/tokens). Read access is enough given that we will open a PR against the source repo.
- Input a model id from the Hub (for example: [textattack/distilbert-base-cased-CoLA](https://huggingface.co/textattack/distilbert-base-cased-CoLA))
- Click "Export to ONNX"
- That's it! You'll get feedback on if the export was successful or not, and if it was, you'll get the URL of the opened PR!
Note: in case the model to export is larger than 2 GB, it will be saved in a subfolder called `onnx/`. To load it from Optimum, the argument `subfolder="onnx"` should be provided.
"""
with gr.Blocks() as demo:
gr.HTML(TTILE_IMAGE)
gr.HTML(TITLE)
with gr.Row():
with gr.Column(scale=50):
gr.Markdown(DESCRIPTION)
with gr.Column(scale=50):
input_token = gr.Textbox(
max_lines=1,
label="Hugging Face token",
)
input_model = gr.Textbox(
max_lines=1,
label="Model name",
placeholder="textattack/distilbert-base-cased-CoLA",
)
input_task = gr.Textbox(
value="auto",
max_lines=1,
label='Task (can be left to "auto", will be automatically inferred)',
)
onnx_opset = gr.Textbox(
placeholder="for example 14, can be left blank",
max_lines=1,
label="ONNX opset (optional, can be left blank)",
)
btn = gr.Button("Export to ONNX")
output = gr.Markdown(label="Output")
btn.click(
fn=onnx_export,
inputs=[input_token, input_model, input_task, onnx_opset],
outputs=output,
)
def restart_space():
HfApi().restart_space(repo_id="onnx/export", token=HF_TOKEN, factory_reboot=True)
scheduler = BackgroundScheduler()
scheduler.add_job(restart_space, "interval", seconds=21600)
scheduler.start()
demo.launch()