import gradio as gr
import numpy as np
import json
import os
from os import walk
from models import build_model
from kokoro import generate
import time
from datetime import datetime
import torch
import requests


# Load the model and voicepack
device = 'cuda' if torch.cuda.is_available() else 'cpu'
MODEL = build_model('kokoro-v0_19-half.pth', device)

# Load voices and make the saved voices dir
global VOICE_NAME 
global voices 
voices = []
VOICE_NAME = []
model_list = []
os.makedirs("voices", exist_ok=True)
for (dirpath, dirnames, filenames) in walk("voices/"):
    VOICE_NAME.extend(filenames)
    voices = [torch.load(f'voices/{vn}', weights_only=True).to(device) for vn in VOICE_NAME]
    break
    
# Get top-k indices
def get_topk_indices(vector_index, k):
    differences = diffs[vector_index].sum(dim=-1)[:,0]
    topk_indices = torch.topk(differences, k=k).indices
    return topk_indices.tolist()
# Visualize top-k differences
def visualize_topk(vector_index, k):
    vector_index= VOICE_NAME.index(vector_index)
    topk_indices = get_topk_indices(vector_index, k)
    return json.dumps(topk_indices, indent=2)
    
#FFT interpolation
import torch

def interpolate_vectors(vec1, vec2, t,importances=None):
    """
    Interpolate between two vectors in the frequency domain using PyTorch.
    
    Parameters:
    - vec1: torch.Tensor of shape (511, 1, 256)
    - vec2: torch.Tensor of shape (511, 1, 256)
    - t: interpolation parameter between 0 and 1
    
    Returns:
    - Interpolated vector of shape (511, 1, 256)
    """
   # print(vec1.shape,vec2.shape)
    # Reshape to (511, 256)
    vec1 = vec1.view(511, 256)
    vec2 = vec2.view(511, 256)
    
    # Apply FFT along the first dimension
    fft1 = torch.fft.fft(vec1, dim=0)
    fft2 = torch.fft.fft(vec2, dim=0)
    
    # Interpolate magnitude and phase
    mag1 = torch.abs(fft1)
    mag2 = torch.abs(fft2)
    phase1 = torch.angle(fft1)
    phase2 = torch.angle(fft2)
    
    if importances != None:
        mag_interpolated = mag1  # use first voice as base,then interpolate on top different indexes
        phase_diff = torch.zeros_like(mag1)
        for idx in importances:
            mag_interpolated[idx]= (1 - t) * mag1[idx] + t * mag2[idx]
            phase_diff[idx] = phase2[idx] - phase1[idx]
        phase_diff_wrapped = torch.angle(torch.exp(1j * phase_diff))
        phase_interpolated = phase1 + t * phase_diff_wrapped
    else:
        # Interpolate magnitude
        mag_interpolated = (1 - t) * mag1 + t * mag2
        
        # Angular interpolation for phase
        phase_diff = phase2 - phase1
        phase_diff_wrapped = torch.angle(torch.exp(1j * phase_diff))
        phase_interpolated = phase1 + t * phase_diff_wrapped
    # Combine magnitude and phase
    interpolated_fft = mag_interpolated * torch.exp(1j * phase_interpolated)
    
    # Inverse FFT
    interpolated_seq = torch.fft.ifft(interpolated_fft, dim=0).real
    
    # Reshape back to (512, 1, 256)
    interpolated_vec = interpolated_seq.view(511, 1,256)
    
    return interpolated_vec

voices_mean = torch.stack(voices,dim=0).mean(dim=0)
print(f"computing diferences of each voice" )
diffs = [ torch.abs(voices[i]-voices_mean) for i in range(len(voices))]


print(f'Loaded voices: {len(voices)}')

# Function for generating audio from text
def generate_audio_from_text(text, vector_index_1, vector_index_2, slice_value,topk=0):
    # Get the selected vectors
    vector_index_1= VOICE_NAME.index(vector_index_1)
    vector_index_2= VOICE_NAME.index(vector_index_2)

    vector_1 = voices[vector_index_1].clone()
    vector_2 = voices[vector_index_2].clone()
    tk_idx=None
    if topk !=0:
        differences_A = diffs[vector_index_1].sum(dim=-1)[:,0]
        differences_B = diffs[vector_index_2].sum(dim=-1)[:,0]
        diff_of_differences = differences_B-differences_A
        topk_indices = torch.topk(diff_of_differences, k=int(topk)).indices
        # Interpolate the selected slice
    interpolated_vector = interpolate_vectors(vector_1,vector_2,slice_value,importances=tk_idx)#.half()
    # Generate audio
    audio, out_ps = generate(MODEL, text, interpolated_vector)
    audio_np = audio.flatten()
    return 24000, audio_np
    
def reload_voices():
    VOICE_NAME = []
    voices = []
    for (dirpath, dirnames, filenames) in walk("voices/"):
        VOICE_NAME.extend(filenames)  
        voices = [torch.load(f'voices/{vn}', weights_only=True).to(device) for vn in filenames]
        break
    
    
    voices_mean = torch.stack(voices,dim=0).mean(dim=0)
    print(f"computing diferences of each voice" )
    diffs = [ torch.abs(voices[i]-voices_mean) for i in range(len(voices))]
    model_list = list_models()

# Function to save a custom voice
def save_custom_voice(name, vector_index_1, vector_index_2, slice_value,topk=0):
    # Get the selected vectors
    #print(VOICE_NAME)
    
    fmt =f"_mixed_{vector_index_1[:-3]}_{vector_index_2[:-3]}_ratio_{slice_value}"
    
    vector_index_1= VOICE_NAME.index(vector_index_1)
    vector_index_2= VOICE_NAME.index(vector_index_2)
    vector_1 = voices[vector_index_1].clone()
    vector_2 = voices[vector_index_2].clone()
    tk_idx=None
    if topk !=0:
        differences_A = diffs[vector_index_1].sum(dim=-1)[:,0]
        differences_B = diffs[vector_index_2].sum(dim=-1)[:,0]
        diff_of_differences = differences_B-differences_A
        topk_indices = torch.topk(diff_of_differences, k=int(topk)).indices
        
    custom_voice = interpolate_vectors(vector_1,vector_2,slice_value,importances=tk_idx) 
    # Save the custom voice
    save_path = f"voices/{name}{fmt}.pt"
    os.makedirs("voices", exist_ok=True)
    torch.save(custom_voice.half(), save_path)
    # reload all voices again
    reload_voices()
    return f"Voice saved as {save_path}",save_path

# Function to load saved voices
def load_voices():
    if not os.path.exists("voices"):
        return []
    return [f.replace(".pt", "") for f in os.listdir("voices") if f.endswith(".pt")]


# Gradio interface components
iface = gr.Interface(
    fn=generate_audio_from_text,
    inputs=[
        gr.Textbox(label="Text Input"),
        gr.Dropdown(choices=VOICE_NAME, label="Select Vector 1"),
        gr.Dropdown(choices=VOICE_NAME, label="Select Vector 2"),
        gr.Slider(minimum=0, maximum=1, step=0.01, label="Slice Value"),
        gr.Slider(minimum=0, maximum=511, step=1, label="top-K diferent indexes to use"),
       # gr.Button("Sample Audio"),
    ],
    outputs=[
        gr.Audio(interactive=False, format="wav", label="Synthesized Audio"),
    ],
    title="Kokoro TTS voices Interpolation gui ",
    description="Select 2 voices indexes, then a interpolation alpha value, and top-k diferent indexes to merge,if 0 are selected,the voice will merge all its indexes. It uses linear interpolation (1 - alpha) *  voice1 + alpha * voice2"
)

# Save and rate interface
save_iface = gr.Interface(
    fn=save_custom_voice,
    inputs=[
        gr.Textbox(label="Voice Name"),
        gr.Dropdown(choices=VOICE_NAME, label="Select Vector 1"),
        gr.Dropdown(choices=VOICE_NAME, label="Select Vector 2"),
        gr.Slider(minimum=0, maximum=1, step=0.01, label="Slice Value"),
        gr.Slider(minimum=0, maximum=511, step=1, label="top-K diferent indexes to use"),
    ],
    outputs=[
    gr.Textbox(label="Save Status"),
    gr.File(label="Download File"),
    ],
    title="Save Custom Voice",
    description="Save your custom voice with a name."
)


# Interface for visualizing top-k differences
topk_iface = gr.Interface(
    fn=visualize_topk,
    inputs=[
        gr.Dropdown(choices=VOICE_NAME, label="Select Vector"),
        gr.Slider(minimum=1, maximum=100, step=1, label="Top-k Indices")
    ],
    outputs=gr.Textbox(label="Top-k Differences"),
    title="Visualize Top-k Differences",
    description="View the indices with the largest differences for a selected vector."
)

def list_models():
    models = []
    for (dirpath, dirnames, filenames) in walk("voices/"):
        models.extend(filenames)
    return models
    

def list_models_update_gr():
    models = []
    for (dirpath, dirnames, filenames) in walk("voices/"):
        models.extend(filenames)
    return gr.update(choices=models)

# Function to return the full path of the selected model
def d_model(name):
    return f"voices/{name}"

# Define the Gradio interface
with gr.Blocks() as download_models_tab:
    # Dropdown to list models
    
    model_dropdown = gr.Dropdown(
        choices=list_models(),
        label="Select Model",
    )
   # file_download = gr.File(label="Download File")
    # Function to refresh the dropdown choices
    with gr.Row():
        refresh_button = gr.Button("Refresh")
        # Button to download the selected model
        download_button = gr.Button("Download")
    
    refresh_button.click(list_models_update_gr, inputs=[], outputs=model_dropdown)
    
    
    download_button.click(
        d_model, 
        inputs=[model_dropdown], 
        outputs=[gr.File(label="Download File")]
    )

# Combine all tabs into a single Gradio application
gr.TabbedInterface(
    [
        iface, save_iface, topk_iface, download_models_tab
    ],
    [
        "Interpolation", "Save Custom Voice", "Visualize Top-k", 
        "Download Models"
    ]
).launch()