Skip to content

Commit 4bb995c

Browse files
committed
feat: display voice distribution percentages for KokoroTTS voice combinations input
1 parent d007819 commit 4bb995c

File tree

1 file changed

+87
-27
lines changed

1 file changed

+87
-27
lines changed

src/lib/components/admin/Settings/Audio.svelte

Lines changed: 87 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -67,15 +67,63 @@
6767
name?: string; // Some APIs might have a name field for models
6868
};
6969
70+
// Type for parsed KokoroTTS voice combinations
71+
type KokoroVoiceCombination = {
72+
name: string;
73+
weight: number;
74+
percentage: number;
75+
};
76+
7077
let voices: DisplayVoice[] = [];
7178
let models: FetchedModel[] = [];
79+
let kokoroVoiceCombinations: KokoroVoiceCombination[] = [];
7280
7381
// Helper to get a clean URL for KokoroTTS
7482
const getCleanKokoroUrl = (url: string) => {
7583
if (!url) return '';
7684
return url.replace(/\/+$/, ''); // Remove trailing slashes
7785
};
7886
87+
// Function to parse KokoroTTS voice combination string and calculate percentages
88+
const parseKokoroVoiceCombinations = (combinationString: string | null | undefined) => {
89+
if (!combinationString) {
90+
kokoroVoiceCombinations = [];
91+
return;
92+
}
93+
94+
const combinations: KokoroVoiceCombination[] = [];
95+
let totalWeight = 0;
96+
97+
// Regex to capture voice name and weight (e.g., "voice_name(weight)")
98+
const voiceRegex = /([\w-]+)(?:\((\d+(?:\.\d+)?)\))?/g;
99+
let match;
100+
101+
while ((match = voiceRegex.exec(combinationString)) !== null) {
102+
const voiceName = match[1];
103+
const weightString = match[2];
104+
const weight = weightString ? parseFloat(weightString) : 1; // Default weight is 1
105+
106+
if (voiceName) {
107+
combinations.push({ name: voiceName, weight, percentage: 0 });
108+
totalWeight += weight;
109+
}
110+
}
111+
112+
// Calculate percentages
113+
if (totalWeight > 0) {
114+
combinations.forEach((combo) => {
115+
combo.percentage = (combo.weight / totalWeight) * 100;
116+
});
117+
}
118+
119+
kokoroVoiceCombinations = combinations;
120+
};
121+
122+
// Watch for changes in TTS_KOKORO_CUSTOM_COMBINATION_STRING to update percentages
123+
$: if (TTS_ENGINE === 'kokoro' && TTS_VOICE === '_custom_kokoro_combination_') {
124+
parseKokoroVoiceCombinations(TTS_KOKORO_CUSTOM_COMBINATION_STRING);
125+
}
126+
79127
const getModels = async () => {
80128
if (TTS_ENGINE === 'kokoro') {
81129
const cleanUrl = getCleanKokoroUrl(TTS_KOKORO_API_BASE_URL);
@@ -87,7 +135,6 @@
87135
const response = await fetch(`${cleanUrl}/v1/models`);
88136
if (!response.ok) throw new Error(`HTTP error! status: ${response.status}`);
89137
const data = await response.json();
90-
// Filter for "kokoro" owned models based on your example response
91138
models = data.data.filter((m: FetchedModel) => m.owned_by === 'kokoro');
92139
console.log('KokoroTTS models:', models);
93140
} catch (e: any) {
@@ -152,10 +199,9 @@
152199
153200
if (res) {
154201
console.log(res);
155-
// Assuming res.voices directly contains objects with id and name or can be mapped to it
156202
fetchedVoices = res.voices.map((v: any) => ({
157-
id: v.id || v.name, // Fallback to name if id not present (e.g., some ElevenLabs voices)
158-
name: v.name || v.id // Fallback to id if name not present
203+
id: v.id || v.name,
204+
name: v.name || v.id
159205
}));
160206
fetchedVoices.sort((a, b) => a.name.localeCompare(b.name, $i18n.resolvedLanguage));
161207
}
@@ -181,7 +227,7 @@
181227
AZURE_SPEECH_REGION: TTS_AZURE_SPEECH_REGION,
182228
AZURE_SPEECH_BASE_URL: TTS_AZURE_SPEECH_BASE_URL,
183229
AZURE_SPEECH_OUTPUT_FORMAT: TTS_AZURE_SPEECH_OUTPUT_FORMAT,
184-
KOKORO_API_BASE_URL: getCleanKokoroUrl(TTS_KOKORO_API_BASE_URL), // Store clean URL
230+
KOKORO_API_BASE_URL: getCleanKokoroUrl(TTS_KOKORO_API_BASE_URL),
185231
...(TTS_ENGINE === 'kokoro' && {
186232
KOKORO_NORMALIZATION_OPTIONS: { normalize: TTS_KOKORO_ENABLE_NORMALIZATION }
187233
})
@@ -226,26 +272,25 @@
226272
TTS_ENGINE = res.tts.ENGINE;
227273
TTS_MODEL = res.tts.MODEL;
228274
229-
// Handle initial loading of KokoroTTS voice
230275
if (res.tts.ENGINE === 'kokoro' && res.tts.VOICE) {
231-
// Check if the loaded voice string indicates a custom combination (contains '+' or '(')
232276
if (res.tts.VOICE.includes('+') || res.tts.VOICE.includes('(')) {
233277
TTS_KOKORO_CUSTOM_COMBINATION_STRING = res.tts.VOICE;
234-
TTS_VOICE = '_custom_kokoro_combination_'; // Set special value to show custom input
278+
TTS_VOICE = '_custom_kokoro_combination_';
279+
parseKokoroVoiceCombinations(res.tts.VOICE); // Parse initial value
235280
} else {
236281
TTS_VOICE = res.tts.VOICE;
237282
}
238283
} else {
239284
TTS_VOICE = res.tts.VOICE;
240-
TTS_KOKORO_CUSTOM_COMBINATION_STRING = ''; // Ensure it's clear for other engines
285+
TTS_KOKORO_CUSTOM_COMBINATION_STRING = '';
241286
}
242287
243288
TTS_SPLIT_ON = res.tts.SPLIT_ON || TTS_RESPONSE_SPLIT.PUNCTUATION;
244289
245290
TTS_AZURE_SPEECH_REGION = res.tts.AZURE_SPEECH_REGION;
246291
TTS_AZURE_SPEECH_BASE_URL = res.tts.AZURE_SPEECH_BASE_URL;
247292
TTS_AZURE_SPEECH_OUTPUT_FORMAT = res.tts.AZURE_SPEECH_OUTPUT_FORMAT;
248-
TTS_KOKORO_API_BASE_URL = res.tts.KOKORO_API_BASE_URL || ''; // Load the stored URL
293+
TTS_KOKORO_API_BASE_URL = res.tts.KOKORO_API_BASE_URL || '';
249294
TTS_KOKORO_ENABLE_NORMALIZATION = res.tts.KOKORO_NORMALIZATION_OPTIONS?.normalize ?? true;
250295
251296
STT_OPENAI_API_BASE_URL = res.stt.OPENAI_API_BASE_URL;
@@ -271,14 +316,13 @@
271316
<form
272317
class="flex flex-col h-full justify-between space-y-3 text-sm"
273318
on:submit|preventDefault={async () => {
274-
// Basic client-side validation for custom Kokoro voice string
275319
if (
276320
TTS_ENGINE === 'kokoro' &&
277321
TTS_VOICE === '_custom_kokoro_combination_' &&
278322
!TTS_KOKORO_CUSTOM_COMBINATION_STRING
279323
) {
280324
toast.error($i18n.t('Please enter a custom voice combination for KokoroTTS.'));
281-
return; // Prevent saving
325+
return;
282326
}
283327

284328
await updateConfigHandler();
@@ -528,13 +572,14 @@
528572
bind:value={TTS_ENGINE}
529573
placeholder="Select a mode"
530574
on:change={async (e) => {
531-
TTS_VOICE = ''; // Clear voice selection on engine change
532-
TTS_MODEL = ''; // Clear model selection on engine change
533-
TTS_KOKORO_CUSTOM_COMBINATION_STRING = ''; // Clear custom combination string
534-
TTS_KOKORO_ENABLE_NORMALIZATION = true; // Reset normalization toggle on engine change
535-
await updateConfigHandler(); // Save current config (including new engine)
536-
await getVoices(); // Fetch voices for the new engine
537-
await getModels(); // Fetch models for the new engine
575+
TTS_VOICE = '';
576+
TTS_MODEL = '';
577+
TTS_KOKORO_CUSTOM_COMBINATION_STRING = '';
578+
TTS_KOKORO_ENABLE_NORMALIZATION = true;
579+
kokoroVoiceCombinations = []; // Clear combinations on engine change
580+
await updateConfigHandler();
581+
await getVoices();
582+
await getModels();
538583

539584
if (e.target?.value === 'openai') {
540585
TTS_VOICE = 'alloy';
@@ -793,13 +838,16 @@
793838
<select
794839
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
795840
bind:value={TTS_VOICE}
796-
on:change={() => {
797-
if (
798-
TTS_VOICE === '_custom_kokoro_combination_' &&
799-
!TTS_KOKORO_CUSTOM_COMBINATION_STRING &&
800-
voices.length > 0
801-
) {
802-
TTS_KOKORO_CUSTOM_COMBINATION_STRING = voices[0].id;
841+
on:change={(e) => {
842+
const selectedValue = e.target.value;
843+
if (selectedValue === '_custom_kokoro_combination_') {
844+
// If custom is selected, ensure we have a default or prompt for input
845+
if (!TTS_KOKORO_CUSTOM_COMBINATION_STRING && voices.length > 0) {
846+
TTS_KOKORO_CUSTOM_COMBINATION_STRING = voices[0].id;
847+
}
848+
parseKokoroVoiceCombinations(TTS_KOKORO_CUSTOM_COMBINATION_STRING); // Ensure parsing on selection
849+
} else {
850+
kokoroVoiceCombinations = []; // Clear combinations if a specific voice is selected
803851
}
804852
}}
805853
required
@@ -830,10 +878,22 @@
830878
'Enter voice combinations (e.g., af_alloy+af_heart) or weighted combinations (e.g., af_bella(2)+af_sky(1)).'
831879
)}
832880
</div>
881+
882+
<!-- Displaying percentages -->
883+
{#if kokoroVoiceCombinations.length > 0}
884+
<div class="mt-3 p-2 bg-gray-100 dark:bg-gray-800 rounded-lg">
885+
<div class="text-xs font-medium mb-1.5">{$i18n.t('Voice Distribution')}</div>
886+
{#each kokoroVoiceCombinations as combo (combo.name)}
887+
<div class="flex justify-between text-xs">
888+
<span>{combo.name}:</span>
889+
<span>{combo.percentage.toFixed(1)}%</span>
890+
</div>
891+
{/each}
892+
</div>
893+
{/if}
833894
</div>
834895
{/if}
835896

836-
<!-- Normalization Toggle (always visible for KokoroTTS) -->
837897
<div class="mt-2 mb-2 flex w-full justify-between items-center">
838898
<div class="text-xs font-medium">{$i18n.t('Enable Text Normalization')}</div>
839899
<label class="relative inline-flex items-center cursor-pointer">

0 commit comments

Comments
 (0)