Files
fish-audio-plugin/tts.ts
Clawdbot 4af0789ebe sync: match upstream PR #56891 at rebase 2026-03-30
Brings Gitea mirror up to date with the current state of the
openclaw/openclaw PR branch, including all fixes from Codex review:

- Namespaced directive keys (fishaudio_*/fish_* prefixes only)
- Strict latency directive validation with warnings
- Code quality cleanup, s2 model removal
- Contract and directive parsing tests
- README updated with prefixed directive docs

Source: Conan-Scott/openclaw@9787ef6e (feat/fish-audio-speech-provider)
2026-03-30 18:14:52 +11:00

147 lines
3.8 KiB
TypeScript

export const DEFAULT_FISH_AUDIO_BASE_URL = "https://api.fish.audio";
export function normalizeFishAudioBaseUrl(baseUrl?: string): string {
const trimmed = baseUrl?.trim();
if (!trimmed) {
return DEFAULT_FISH_AUDIO_BASE_URL;
}
return trimmed.replace(/\/+$/, "");
}
export async function fishAudioTTS(params: {
text: string;
apiKey: string;
baseUrl?: string;
referenceId: string;
model: string;
format: "mp3" | "opus" | "wav" | "pcm";
latency?: "normal" | "balanced" | "low";
speed?: number;
temperature?: number;
topP?: number;
timeoutMs: number;
}): Promise<Buffer> {
const {
text,
apiKey,
baseUrl,
referenceId,
model,
format,
latency,
speed,
temperature,
topP,
timeoutMs,
} = params;
if (!text.trim()) {
throw new Error("Fish Audio TTS: empty text");
}
if (!referenceId.trim()) {
throw new Error("Fish Audio TTS: missing reference_id (voice)");
}
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), timeoutMs);
try {
const url = `${normalizeFishAudioBaseUrl(baseUrl)}/v1/tts`;
const body: Record<string, unknown> = {
text,
reference_id: referenceId,
format,
};
if (latency && latency !== "normal") {
body.latency = latency;
}
// Prosody settings
if (speed != null) {
body.prosody = { speed };
}
if (temperature != null) {
body.temperature = temperature;
}
if (topP != null) {
body.top_p = topP;
}
// Fish Audio uses the `model` HTTP header (not a body field) to select
// the TTS model. This is intentional per their API spec — don't move it
// into the JSON body.
const response = await fetch(url, {
method: "POST",
headers: {
Authorization: `Bearer ${apiKey}`,
"Content-Type": "application/json",
model,
},
body: JSON.stringify(body),
signal: controller.signal,
});
if (!response.ok) {
let errorDetail = "";
try {
const errorBody = await response.text();
// Cap at 500 chars to avoid log pollution from large error responses
const truncated =
errorBody.length > 500 ? `${errorBody.slice(0, 500)}` : errorBody;
errorDetail = truncated ? `: ${truncated}` : "";
} catch {
// Ignore error body read failure
}
throw new Error(`Fish Audio API error (${response.status})${errorDetail}`);
}
const buffer = Buffer.from(await response.arrayBuffer());
if (buffer.length === 0) {
throw new Error("Fish Audio TTS produced empty audio");
}
return buffer;
} finally {
clearTimeout(timeout);
}
}
export async function listFishAudioVoices(params: {
apiKey: string;
baseUrl?: string;
}): Promise<Array<{ id: string; name: string }>> {
const base = normalizeFishAudioBaseUrl(params.baseUrl);
// List the authenticated user's own voices (cloned/trained).
// Fish Audio has no stable API for fetching a curated "official" voice
// catalogue — the public model listing returns the entire community corpus
// (1M+ entries) and filtering by undocumented author IDs would be fragile.
// Users can browse and select voices at https://fish.audio and configure
// their chosen voiceId directly.
const res = await fetch(`${base}/model?type=tts&self=true&page_size=100`, {
headers: { Authorization: `Bearer ${params.apiKey}` },
});
if (!res.ok) {
throw new Error(`Fish Audio voices API error (${res.status})`);
}
const json = (await res.json()) as {
items?: Array<{ _id?: string; title?: string }>;
};
if (!Array.isArray(json.items)) {
return [];
}
return json.items
.map((v) => ({
id: v._id?.trim() ?? "",
name: v.title?.trim() || v._id?.trim() || "",
}))
.filter((v) => v.id.length > 0);
}