Spaces:

candenizkocak
/

Audio-Gender-Classification-From-Turkish-Voice-Input

Running

Audio-Gender-Classification-From-Turkish-Voice-Input

File size: 1,668 Bytes

18b1b5d

import gradio as gr
import torchaudio
from transformers import pipeline
from datasets import load_dataset, Audio

# Load your model
classifier = pipeline("audio-classification", model="candenizkocak/wav2vec2-base_turkish_gender_classification")

# Function to resample audio to 16kHz
def resample_audio(audio_file, target_sampling_rate=16000):
    waveform, original_sample_rate = torchaudio.load(audio_file)
    if original_sample_rate != target_sampling_rate:
        resampler = torchaudio.transforms.Resample(orig_freq=original_sample_rate, new_freq=target_sampling_rate)
        waveform = resampler(waveform)
    return waveform.squeeze().numpy(), target_sampling_rate

# Define the prediction function
def classify_audio(audio_file):
    # Resample the audio to 16kHz
    resampled_audio, _ = resample_audio(audio_file)
    
    # Classify the audio
    prediction = classifier(resampled_audio)
    
    # Return predictions as a dictionary
    return {entry['label']: entry['score'] for entry in prediction}

# Define Gradio interface
def demo():
    with gr.Blocks(theme=gr.themes.Soft()) as demo:
        gr.Markdown("## Turkish Gender Audio Classification")
        
        # Input Audio
        with gr.Row():
            audio_input = gr.Audio(type="filepath", label="Input Audio")
        
        # Output Labels
        with gr.Row():
            label_output = gr.Label(label="Prediction")
        
        # Predict Button
        classify_btn = gr.Button("Classify")

        # Define the interaction
        classify_btn.click(fn=classify_audio, inputs=audio_input, outputs=label_output)
        
    return demo

# Launch the demo
demo().launch()