Spaces:
Running
on
T4
Running
on
T4
File size: 2,735 Bytes
b20e802 5a94574 2268e8d 93467a7 5a94574 b20e802 57b9179 1a2d6a6 b20e802 9922ff1 b20e802 9922ff1 b20e802 2268e8d b20e802 025038c b20e802 2050fe8 b20e802 2050fe8 b20e802 2268e8d 025038c b20e802 2050fe8 b20e802 2050fe8 b20e802 2050fe8 b20e802 2050fe8 b20e802 2050fe8 b20e802 025038c b20e802 5a94574 b20e802 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
import torch
import numpy as np
import gradio as gr
import soundfile as sf
import tempfile
from transformers import pipeline
from huggingface_hub import InferenceClient
def _grab_best_device(use_gpu=True):
if torch.cuda.device_count() > 0 and use_gpu:
device = "cuda"
else:
device = "cpu"
return device
device = _grab_best_device()
title = """# MusicGen Prompt Upsampling ๐ถ
MusicGen, a simple and controllable model for music generation.
**Model**: https://huggingface.co/facebook/musicgen-stereo-medium
"""
vibes = pipeline("text-to-audio",
"facebook/musicgen-stereo-medium",
torch_dtype=torch.float16,
device="cuda")
client = InferenceClient(model="mistralai/Mixtral-8x7B-Instruct-v0.1",)
# Inference
def generate_audio(text,):
prompt = f"Take the next sentence and enrich it with details. Keep it compact. {text}"
output = client.text_generation(prompt, max_new_tokens=250)
out = vibes(output)
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
sf.write(f.name, out["audio"][0].T, out["sampling_rate"])
return f.name, output
css = """
#container{
margin: 0 auto;
max-width: 80rem;
}
#intro{
max-width: 100%;
text-align: center;
margin: 0 auto;
}
"""
# Gradio blocks demo
with gr.Blocks(css=css) as demo_blocks:
gr.Markdown(title, elem_id="intro")
with gr.Row(elem_id="container"):
with gr.Column():
inp_text = gr.Textbox(label="Input Prompt", info="What would you like MusicGen to synthesise?")
btn = gr.Button("Generate Music! ๐ถ")
with gr.Column():
out = gr.Audio(autoplay=False, label=f"Generated Music", show_label=True,)
prompt_text = gr.Textbox(label="Upsampled Prompt")
with gr.Accordion("Use MusicGen with Transformers ๐ค", open=False):
gr.Markdown(
"""
```python
import torch
import soundfile as sf
from transformers import pipeline
synthesiser = pipeline("text-to-audio",
"facebook/musicgen-stereo-medium",
device="cuda:0",
torch_dtype=torch.float16)
music = synthesiser("lo-fi music with a soothing melody",
forward_params={"max_new_tokens": 256})
sf.write("musicgen_out.wav", music["audio"][0].T, music["sampling_rate"])
```
"""
)
btn.click(generate_audio, inp_text, [out, prompt_text])
demo_blocks.queue().launch() |