fantos commited on
Commit
a638b1c
·
verified ·
1 Parent(s): 0473607

Create app-backup.py

Browse files
Files changed (1) hide show
  1. app-backup.py +117 -0
app-backup.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import outetts
4
+ from outetts.version.v2.interface import _DEFAULT_SPEAKERS
5
+ import torch
6
+ import spaces
7
+
8
+ def get_available_speakers():
9
+ speakers = list(_DEFAULT_SPEAKERS.keys())
10
+ return speakers
11
+
12
+ @spaces.GPU
13
+ def generate_tts(text, temperature, repetition_penalty, speaker_selection, reference_audio):
14
+ model_config = outetts.HFModelConfig_v2(
15
+ model_path="OuteAI/OuteTTS-0.3-1B",
16
+ tokenizer_path="OuteAI/OuteTTS-0.3-1B",
17
+ dtype=torch.bfloat16,
18
+ device="cuda"
19
+ )
20
+ interface = outetts.InterfaceHF(model_version="0.3", cfg=model_config)
21
+
22
+ try:
23
+ if reference_audio:
24
+ speaker = interface.create_speaker(reference_audio)
25
+ elif speaker_selection and speaker_selection != "None":
26
+ speaker = interface.load_default_speaker(speaker_selection)
27
+ else:
28
+ speaker = None
29
+
30
+ gen_cfg = outetts.GenerationConfig(
31
+ text=text,
32
+ temperature=temperature,
33
+ repetition_penalty=repetition_penalty,
34
+ max_length=4096,
35
+ speaker=speaker,
36
+ )
37
+ output = interface.generate(config=gen_cfg)
38
+
39
+ if output.audio is None:
40
+ raise ValueError("Model failed to generate audio. This may be due to input length constraints or early EOS token.")
41
+
42
+ output_path = "output.wav"
43
+ output.save(output_path)
44
+ return output_path, None
45
+ except Exception as e:
46
+ return None, str(e)
47
+
48
+ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange") as demo:
49
+ gr.Markdown("# Voice Clone Multilingual TTS")
50
+
51
+ error_box = gr.Textbox(label="Error Messages", visible=False)
52
+
53
+ with gr.Row():
54
+ with gr.Column(scale=1):
55
+ text_input = gr.Textbox(
56
+ label="Text to Synthesize",
57
+ placeholder="Enter text here...",
58
+ lines=8
59
+ )
60
+
61
+ submit_button = gr.Button("Generate Speech")
62
+
63
+ with gr.Column(scale=1):
64
+ audio_output = gr.Audio(
65
+ label="Generated Audio",
66
+ type="filepath"
67
+ )
68
+
69
+ with gr.Group():
70
+ speaker_dropdown = gr.Dropdown(
71
+ choices=get_available_speakers(),
72
+ value="en_male_1",
73
+ label="Speaker Selection"
74
+ )
75
+
76
+ temperature = gr.Slider(
77
+ 0.1, 1.0,
78
+ value=0.1,
79
+ label="Temperature (lower = more stable tone, higher = more expressive)"
80
+ )
81
+
82
+ repetition_penalty = gr.Slider(
83
+ 0.5, 2.0,
84
+ value=1.1,
85
+ label="Repetition Penalty"
86
+ )
87
+
88
+ reference_audio = gr.Audio(
89
+ label="Reference Audio (for voice cloning)",
90
+ type="filepath"
91
+ )
92
+
93
+ gr.Markdown("""
94
+ ### Voice Cloning Guidelines:
95
+ - Use around 7-10 seconds of clear, noise-free audio
96
+ - For transcription interface will use Whisper turbo to transcribe the audio file
97
+ - Longer audio clips will reduce maximum output length
98
+ - Custom speaker overrides speaker selection
99
+ """)
100
+
101
+ submit_button.click(
102
+ fn=generate_tts,
103
+ inputs=[
104
+ text_input,
105
+ temperature,
106
+ repetition_penalty,
107
+ speaker_dropdown,
108
+ reference_audio,
109
+ ],
110
+ outputs=[audio_output, error_box]
111
+ ).then(
112
+ fn=lambda x: gr.update(visible=bool(x)),
113
+ inputs=[error_box],
114
+ outputs=[error_box]
115
+ )
116
+
117
+ demo.launch()