Fix: use fish-audio-sdk (not fishaudio), correct import

2026-03-24 01:26:27 +00:00
parent f18791e807
commit 269190746b
1 changed files with 6 additions and 6 deletions
--- a/ingest.py
+++ b/ingest.py
@@ -109,18 +109,18 @@ def transcribe_chunk_fish(chunk_path, api_key, start_offset_sec):
    Returns a list of segments with timestamps adjusted by start_offset_sec.
    Note: Fish Audio returns timestamps in milliseconds.
    """
-    from fishaudio import FishAudio
+    from fish_audio_sdk import Session, ASRRequest

    print(f"  → Fish Audio ASR: {chunk_path.name} (offset +{start_offset_sec}s)", flush=True)

-    client = FishAudio(api_key=api_key)
-    with open(chunk_path, "rb") as f:
-        result = client.asr.transcribe(audio=f.read(), language="en")
+    with Session(api_key) as session:
+        with open(chunk_path, "rb") as f:
+            result = session.asr(ASRRequest(audio=f.read(), language="en"))

    # Fish Audio returns timestamps in milliseconds — convert to seconds
    # and adjust by the chunk's start offset in the full audio
    segments = []
-    for seg in result.segments:
+    for seg in (result.segments or []):
        segments.append({
            "start": seg.start / 1000.0 + start_offset_sec,
            "end": seg.end / 1000.0 + start_offset_sec,
@@ -128,7 +128,7 @@ def transcribe_chunk_fish(chunk_path, api_key, start_offset_sec):
        })

    # Fallback: if no segments, use full text as one block
-    if not segments and result.text:
+    if not segments and getattr(result, "text", None):
        segments.append({
            "start": start_offset_sec,
            "end": start_offset_sec + 60,