{ | |
"decoder": { | |
"type": "istftnet", | |
"upsample_kernel_sizes": [20, 12], | |
"upsample_rates": [10, 6], | |
"gen_istft_hop_size": 5, | |
"gen_istft_n_fft": 20, | |
"resblock_dilation_sizes": [ | |
[1, 3, 5], | |
[1, 3, 5], | |
[1, 3, 5] | |
], | |
"resblock_kernel_sizes": [3, 7, 11], | |
"upsample_initial_channel": 512 | |
}, | |
"dim_in": 64, | |
"dropout": 0.2, | |
"hidden_dim": 512, | |
"max_conv_dim": 512, | |
"max_dur": 50, | |
"multispeaker": true, | |
"n_layer": 3, | |
"n_mels": 80, | |
"n_token": 178, | |
"style_dim": 128 | |
} |