|
67 | 67 | name?: string; // Some APIs might have a name field for models
|
68 | 68 | };
|
69 | 69 |
|
| 70 | + // Type for parsed KokoroTTS voice combinations |
| 71 | + type KokoroVoiceCombination = { |
| 72 | + name: string; |
| 73 | + weight: number; |
| 74 | + percentage: number; |
| 75 | + }; |
| 76 | +
|
70 | 77 | let voices: DisplayVoice[] = [];
|
71 | 78 | let models: FetchedModel[] = [];
|
| 79 | + let kokoroVoiceCombinations: KokoroVoiceCombination[] = []; |
72 | 80 |
|
73 | 81 | // Helper to get a clean URL for KokoroTTS
|
74 | 82 | const getCleanKokoroUrl = (url: string) => {
|
75 | 83 | if (!url) return '';
|
76 | 84 | return url.replace(/\/+$/, ''); // Remove trailing slashes
|
77 | 85 | };
|
78 | 86 |
|
| 87 | + // Function to parse KokoroTTS voice combination string and calculate percentages |
| 88 | + const parseKokoroVoiceCombinations = (combinationString: string | null | undefined) => { |
| 89 | + if (!combinationString) { |
| 90 | + kokoroVoiceCombinations = []; |
| 91 | + return; |
| 92 | + } |
| 93 | +
|
| 94 | + const combinations: KokoroVoiceCombination[] = []; |
| 95 | + let totalWeight = 0; |
| 96 | +
|
| 97 | + // Regex to capture voice name and weight (e.g., "voice_name(weight)") |
| 98 | + const voiceRegex = /([\w-]+)(?:\((\d+(?:\.\d+)?)\))?/g; |
| 99 | + let match; |
| 100 | +
|
| 101 | + while ((match = voiceRegex.exec(combinationString)) !== null) { |
| 102 | + const voiceName = match[1]; |
| 103 | + const weightString = match[2]; |
| 104 | + const weight = weightString ? parseFloat(weightString) : 1; // Default weight is 1 |
| 105 | +
|
| 106 | + if (voiceName) { |
| 107 | + combinations.push({ name: voiceName, weight, percentage: 0 }); |
| 108 | + totalWeight += weight; |
| 109 | + } |
| 110 | + } |
| 111 | +
|
| 112 | + // Calculate percentages |
| 113 | + if (totalWeight > 0) { |
| 114 | + combinations.forEach((combo) => { |
| 115 | + combo.percentage = (combo.weight / totalWeight) * 100; |
| 116 | + }); |
| 117 | + } |
| 118 | +
|
| 119 | + kokoroVoiceCombinations = combinations; |
| 120 | + }; |
| 121 | +
|
| 122 | + // Watch for changes in TTS_KOKORO_CUSTOM_COMBINATION_STRING to update percentages |
| 123 | + $: if (TTS_ENGINE === 'kokoro' && TTS_VOICE === '_custom_kokoro_combination_') { |
| 124 | + parseKokoroVoiceCombinations(TTS_KOKORO_CUSTOM_COMBINATION_STRING); |
| 125 | + } |
| 126 | +
|
79 | 127 | const getModels = async () => {
|
80 | 128 | if (TTS_ENGINE === 'kokoro') {
|
81 | 129 | const cleanUrl = getCleanKokoroUrl(TTS_KOKORO_API_BASE_URL);
|
|
87 | 135 | const response = await fetch(`${cleanUrl}/v1/models`);
|
88 | 136 | if (!response.ok) throw new Error(`HTTP error! status: ${response.status}`);
|
89 | 137 | const data = await response.json();
|
90 |
| - // Filter for "kokoro" owned models based on your example response |
91 | 138 | models = data.data.filter((m: FetchedModel) => m.owned_by === 'kokoro');
|
92 | 139 | console.log('KokoroTTS models:', models);
|
93 | 140 | } catch (e: any) {
|
|
152 | 199 |
|
153 | 200 | if (res) {
|
154 | 201 | console.log(res);
|
155 |
| - // Assuming res.voices directly contains objects with id and name or can be mapped to it |
156 | 202 | fetchedVoices = res.voices.map((v: any) => ({
|
157 |
| - id: v.id || v.name, // Fallback to name if id not present (e.g., some ElevenLabs voices) |
158 |
| - name: v.name || v.id // Fallback to id if name not present |
| 203 | + id: v.id || v.name, |
| 204 | + name: v.name || v.id |
159 | 205 | }));
|
160 | 206 | fetchedVoices.sort((a, b) => a.name.localeCompare(b.name, $i18n.resolvedLanguage));
|
161 | 207 | }
|
|
181 | 227 | AZURE_SPEECH_REGION: TTS_AZURE_SPEECH_REGION,
|
182 | 228 | AZURE_SPEECH_BASE_URL: TTS_AZURE_SPEECH_BASE_URL,
|
183 | 229 | AZURE_SPEECH_OUTPUT_FORMAT: TTS_AZURE_SPEECH_OUTPUT_FORMAT,
|
184 |
| - KOKORO_API_BASE_URL: getCleanKokoroUrl(TTS_KOKORO_API_BASE_URL), // Store clean URL |
| 230 | + KOKORO_API_BASE_URL: getCleanKokoroUrl(TTS_KOKORO_API_BASE_URL), |
185 | 231 | ...(TTS_ENGINE === 'kokoro' && {
|
186 | 232 | KOKORO_NORMALIZATION_OPTIONS: { normalize: TTS_KOKORO_ENABLE_NORMALIZATION }
|
187 | 233 | })
|
|
226 | 272 | TTS_ENGINE = res.tts.ENGINE;
|
227 | 273 | TTS_MODEL = res.tts.MODEL;
|
228 | 274 |
|
229 |
| - // Handle initial loading of KokoroTTS voice |
230 | 275 | if (res.tts.ENGINE === 'kokoro' && res.tts.VOICE) {
|
231 |
| - // Check if the loaded voice string indicates a custom combination (contains '+' or '(') |
232 | 276 | if (res.tts.VOICE.includes('+') || res.tts.VOICE.includes('(')) {
|
233 | 277 | TTS_KOKORO_CUSTOM_COMBINATION_STRING = res.tts.VOICE;
|
234 |
| - TTS_VOICE = '_custom_kokoro_combination_'; // Set special value to show custom input |
| 278 | + TTS_VOICE = '_custom_kokoro_combination_'; |
| 279 | + parseKokoroVoiceCombinations(res.tts.VOICE); // Parse initial value |
235 | 280 | } else {
|
236 | 281 | TTS_VOICE = res.tts.VOICE;
|
237 | 282 | }
|
238 | 283 | } else {
|
239 | 284 | TTS_VOICE = res.tts.VOICE;
|
240 |
| - TTS_KOKORO_CUSTOM_COMBINATION_STRING = ''; // Ensure it's clear for other engines |
| 285 | + TTS_KOKORO_CUSTOM_COMBINATION_STRING = ''; |
241 | 286 | }
|
242 | 287 |
|
243 | 288 | TTS_SPLIT_ON = res.tts.SPLIT_ON || TTS_RESPONSE_SPLIT.PUNCTUATION;
|
244 | 289 |
|
245 | 290 | TTS_AZURE_SPEECH_REGION = res.tts.AZURE_SPEECH_REGION;
|
246 | 291 | TTS_AZURE_SPEECH_BASE_URL = res.tts.AZURE_SPEECH_BASE_URL;
|
247 | 292 | TTS_AZURE_SPEECH_OUTPUT_FORMAT = res.tts.AZURE_SPEECH_OUTPUT_FORMAT;
|
248 |
| - TTS_KOKORO_API_BASE_URL = res.tts.KOKORO_API_BASE_URL || ''; // Load the stored URL |
| 293 | + TTS_KOKORO_API_BASE_URL = res.tts.KOKORO_API_BASE_URL || ''; |
249 | 294 | TTS_KOKORO_ENABLE_NORMALIZATION = res.tts.KOKORO_NORMALIZATION_OPTIONS?.normalize ?? true;
|
250 | 295 |
|
251 | 296 | STT_OPENAI_API_BASE_URL = res.stt.OPENAI_API_BASE_URL;
|
|
271 | 316 | <form
|
272 | 317 | class="flex flex-col h-full justify-between space-y-3 text-sm"
|
273 | 318 | on:submit|preventDefault={async () => {
|
274 |
| - // Basic client-side validation for custom Kokoro voice string |
275 | 319 | if (
|
276 | 320 | TTS_ENGINE === 'kokoro' &&
|
277 | 321 | TTS_VOICE === '_custom_kokoro_combination_' &&
|
278 | 322 | !TTS_KOKORO_CUSTOM_COMBINATION_STRING
|
279 | 323 | ) {
|
280 | 324 | toast.error($i18n.t('Please enter a custom voice combination for KokoroTTS.'));
|
281 |
| - return; // Prevent saving |
| 325 | + return; |
282 | 326 | }
|
283 | 327 |
|
284 | 328 | await updateConfigHandler();
|
|
528 | 572 | bind:value={TTS_ENGINE}
|
529 | 573 | placeholder="Select a mode"
|
530 | 574 | on:change={async (e) => {
|
531 |
| - TTS_VOICE = ''; // Clear voice selection on engine change |
532 |
| - TTS_MODEL = ''; // Clear model selection on engine change |
533 |
| - TTS_KOKORO_CUSTOM_COMBINATION_STRING = ''; // Clear custom combination string |
534 |
| - TTS_KOKORO_ENABLE_NORMALIZATION = true; // Reset normalization toggle on engine change |
535 |
| - await updateConfigHandler(); // Save current config (including new engine) |
536 |
| - await getVoices(); // Fetch voices for the new engine |
537 |
| - await getModels(); // Fetch models for the new engine |
| 575 | + TTS_VOICE = ''; |
| 576 | + TTS_MODEL = ''; |
| 577 | + TTS_KOKORO_CUSTOM_COMBINATION_STRING = ''; |
| 578 | + TTS_KOKORO_ENABLE_NORMALIZATION = true; |
| 579 | + kokoroVoiceCombinations = []; // Clear combinations on engine change |
| 580 | + await updateConfigHandler(); |
| 581 | + await getVoices(); |
| 582 | + await getModels(); |
538 | 583 |
|
539 | 584 | if (e.target?.value === 'openai') {
|
540 | 585 | TTS_VOICE = 'alloy';
|
|
793 | 838 | <select
|
794 | 839 | class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
|
795 | 840 | bind:value={TTS_VOICE}
|
796 |
| - on:change={() => { |
797 |
| - if ( |
798 |
| - TTS_VOICE === '_custom_kokoro_combination_' && |
799 |
| - !TTS_KOKORO_CUSTOM_COMBINATION_STRING && |
800 |
| - voices.length > 0 |
801 |
| - ) { |
802 |
| - TTS_KOKORO_CUSTOM_COMBINATION_STRING = voices[0].id; |
| 841 | + on:change={(e) => { |
| 842 | + const selectedValue = e.target.value; |
| 843 | + if (selectedValue === '_custom_kokoro_combination_') { |
| 844 | + // If custom is selected, ensure we have a default or prompt for input |
| 845 | + if (!TTS_KOKORO_CUSTOM_COMBINATION_STRING && voices.length > 0) { |
| 846 | + TTS_KOKORO_CUSTOM_COMBINATION_STRING = voices[0].id; |
| 847 | + } |
| 848 | + parseKokoroVoiceCombinations(TTS_KOKORO_CUSTOM_COMBINATION_STRING); // Ensure parsing on selection |
| 849 | + } else { |
| 850 | + kokoroVoiceCombinations = []; // Clear combinations if a specific voice is selected |
803 | 851 | }
|
804 | 852 | }}
|
805 | 853 | required
|
|
830 | 878 | 'Enter voice combinations (e.g., af_alloy+af_heart) or weighted combinations (e.g., af_bella(2)+af_sky(1)).'
|
831 | 879 | )}
|
832 | 880 | </div>
|
| 881 | + |
| 882 | + <!-- Displaying percentages --> |
| 883 | + {#if kokoroVoiceCombinations.length > 0} |
| 884 | + <div class="mt-3 p-2 bg-gray-100 dark:bg-gray-800 rounded-lg"> |
| 885 | + <div class="text-xs font-medium mb-1.5">{$i18n.t('Voice Distribution')}</div> |
| 886 | + {#each kokoroVoiceCombinations as combo (combo.name)} |
| 887 | + <div class="flex justify-between text-xs"> |
| 888 | + <span>{combo.name}:</span> |
| 889 | + <span>{combo.percentage.toFixed(1)}%</span> |
| 890 | + </div> |
| 891 | + {/each} |
| 892 | + </div> |
| 893 | + {/if} |
833 | 894 | </div>
|
834 | 895 | {/if}
|
835 | 896 |
|
836 |
| - <!-- Normalization Toggle (always visible for KokoroTTS) --> |
837 | 897 | <div class="mt-2 mb-2 flex w-full justify-between items-center">
|
838 | 898 | <div class="text-xs font-medium">{$i18n.t('Enable Text Normalization')}</div>
|
839 | 899 | <label class="relative inline-flex items-center cursor-pointer">
|
|
0 commit comments