From ecf37b8798fa7a3962656289963f39f9da0d40fc Mon Sep 17 00:00:00 2001 From: Conan Scott Date: Tue, 24 Mar 2026 01:29:22 +0000 Subject: [PATCH] =?UTF-8?q?Use=20httpx=20directly=20for=20ASR=20=E2=80=94?= =?UTF-8?q?=20bypass=20broken=20SDK=20error=20handler?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ingest.py | 45 ++++++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/ingest.py b/ingest.py index baad9c2..19937a1 100644 --- a/ingest.py +++ b/ingest.py @@ -109,41 +109,44 @@ def transcribe_chunk_fish(chunk_path, api_key, start_offset_sec): Returns a list of segments with timestamps adjusted by start_offset_sec. Note: Fish Audio returns timestamps in milliseconds. """ - from fish_audio_sdk import Session, ASRRequest + import httpx print(f" → Fish Audio ASR: {chunk_path.name} (offset +{start_offset_sec}s)", flush=True) - with Session(api_key) as session: - with open(chunk_path, "rb") as f: - audio_bytes = f.read() - print(f" Sending {len(audio_bytes)/1024/1024:.1f}MB to Fish Audio...", flush=True) - try: - result = session.asr(ASRRequest(audio=audio_bytes, language="en")) - except Exception as e: - # Try to get more detail on the error - resp = getattr(e, 'response', None) or getattr(e, '__cause__', None) - if resp is not None: - status = getattr(resp, 'status_code', '?') - body = getattr(resp, 'text', getattr(resp, 'content', b'')) - raise RuntimeError(f"Fish Audio ASR failed: HTTP {status} — {body}") from e - raise + with open(chunk_path, "rb") as f: + audio_bytes = f.read() + print(f" Sending {len(audio_bytes)/1024/1024:.1f}MB to Fish Audio...", flush=True) + + with httpx.Client(timeout=300) as client: + resp = client.post( + "https://api.fish.audio/v1/asr", + headers={"Authorization": f"Bearer {api_key}"}, + files={"audio": (chunk_path.name, audio_bytes, "audio/mpeg")}, + data={"language": "en", "ignore_timestamps": "false"}, + ) + + print(f" HTTP {resp.status_code}", flush=True) + if resp.status_code != 200: + raise RuntimeError(f"Fish Audio ASR error {resp.status_code}: {resp.text!r}") + + data = resp.json() # Fish Audio returns timestamps in milliseconds — convert to seconds # and adjust by the chunk's start offset in the full audio segments = [] - for seg in (result.segments or []): + for seg in data.get("segments", []): segments.append({ - "start": seg.start / 1000.0 + start_offset_sec, - "end": seg.end / 1000.0 + start_offset_sec, - "text": seg.text, + "start": seg["start"] / 1000.0 + start_offset_sec, + "end": seg["end"] / 1000.0 + start_offset_sec, + "text": seg["text"], }) # Fallback: if no segments, use full text as one block - if not segments and getattr(result, "text", None): + if not segments and data.get("text"): segments.append({ "start": start_offset_sec, "end": start_offset_sec + 60, - "text": result.text, + "text": data["text"], }) return segments