From ecf37b8798fa7a3962656289963f39f9da0d40fc Mon Sep 17 00:00:00 2001
From: Conan Scott <conanscott@gmail.com>
Date: Tue, 24 Mar 2026 01:29:22 +0000
Subject: [PATCH] =?UTF-8?q?Use=20httpx=20directly=20for=20ASR=20=E2=80=94?=
 =?UTF-8?q?=20bypass=20broken=20SDK=20error=20handler?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 ingest.py | 45 ++++++++++++++++++++++++---------------------
 1 file changed, 24 insertions(+), 21 deletions(-)

diff --git a/ingest.py b/ingest.py
index baad9c2..19937a1 100644
--- a/ingest.py
+++ b/ingest.py
@@ -109,41 +109,44 @@ def transcribe_chunk_fish(chunk_path, api_key, start_offset_sec):
     Returns a list of segments with timestamps adjusted by start_offset_sec.
     Note: Fish Audio returns timestamps in milliseconds.
     """
-    from fish_audio_sdk import Session, ASRRequest
+    import httpx
 
     print(f"  → Fish Audio ASR: {chunk_path.name} (offset +{start_offset_sec}s)", flush=True)
 
-    with Session(api_key) as session:
-        with open(chunk_path, "rb") as f:
-            audio_bytes = f.read()
-        print(f"    Sending {len(audio_bytes)/1024/1024:.1f}MB to Fish Audio...", flush=True)
-        try:
-            result = session.asr(ASRRequest(audio=audio_bytes, language="en"))
-        except Exception as e:
-            # Try to get more detail on the error
-            resp = getattr(e, 'response', None) or getattr(e, '__cause__', None)
-            if resp is not None:
-                status = getattr(resp, 'status_code', '?')
-                body = getattr(resp, 'text', getattr(resp, 'content', b''))
-                raise RuntimeError(f"Fish Audio ASR failed: HTTP {status} — {body}") from e
-            raise
+    with open(chunk_path, "rb") as f:
+        audio_bytes = f.read()
+    print(f"    Sending {len(audio_bytes)/1024/1024:.1f}MB to Fish Audio...", flush=True)
+
+    with httpx.Client(timeout=300) as client:
+        resp = client.post(
+            "https://api.fish.audio/v1/asr",
+            headers={"Authorization": f"Bearer {api_key}"},
+            files={"audio": (chunk_path.name, audio_bytes, "audio/mpeg")},
+            data={"language": "en", "ignore_timestamps": "false"},
+        )
+
+    print(f"    HTTP {resp.status_code}", flush=True)
+    if resp.status_code != 200:
+        raise RuntimeError(f"Fish Audio ASR error {resp.status_code}: {resp.text!r}")
+
+    data = resp.json()
 
     # Fish Audio returns timestamps in milliseconds — convert to seconds
     # and adjust by the chunk's start offset in the full audio
     segments = []
-    for seg in (result.segments or []):
+    for seg in data.get("segments", []):
         segments.append({
-            "start": seg.start / 1000.0 + start_offset_sec,
-            "end": seg.end / 1000.0 + start_offset_sec,
-            "text": seg.text,
+            "start": seg["start"] / 1000.0 + start_offset_sec,
+            "end": seg["end"] / 1000.0 + start_offset_sec,
+            "text": seg["text"],
         })
 
     # Fallback: if no segments, use full text as one block
-    if not segments and getattr(result, "text", None):
+    if not segments and data.get("text"):
         segments.append({
             "start": start_offset_sec,
             "end": start_offset_sec + 60,
-            "text": result.text,
+            "text": data["text"],
         })
 
     return segments