Use httpx directly for ASR — bypass broken SDK error handler
This commit is contained in:
45
ingest.py
45
ingest.py
@@ -109,41 +109,44 @@ def transcribe_chunk_fish(chunk_path, api_key, start_offset_sec):
|
|||||||
Returns a list of segments with timestamps adjusted by start_offset_sec.
|
Returns a list of segments with timestamps adjusted by start_offset_sec.
|
||||||
Note: Fish Audio returns timestamps in milliseconds.
|
Note: Fish Audio returns timestamps in milliseconds.
|
||||||
"""
|
"""
|
||||||
from fish_audio_sdk import Session, ASRRequest
|
import httpx
|
||||||
|
|
||||||
print(f" → Fish Audio ASR: {chunk_path.name} (offset +{start_offset_sec}s)", flush=True)
|
print(f" → Fish Audio ASR: {chunk_path.name} (offset +{start_offset_sec}s)", flush=True)
|
||||||
|
|
||||||
with Session(api_key) as session:
|
with open(chunk_path, "rb") as f:
|
||||||
with open(chunk_path, "rb") as f:
|
audio_bytes = f.read()
|
||||||
audio_bytes = f.read()
|
print(f" Sending {len(audio_bytes)/1024/1024:.1f}MB to Fish Audio...", flush=True)
|
||||||
print(f" Sending {len(audio_bytes)/1024/1024:.1f}MB to Fish Audio...", flush=True)
|
|
||||||
try:
|
with httpx.Client(timeout=300) as client:
|
||||||
result = session.asr(ASRRequest(audio=audio_bytes, language="en"))
|
resp = client.post(
|
||||||
except Exception as e:
|
"https://api.fish.audio/v1/asr",
|
||||||
# Try to get more detail on the error
|
headers={"Authorization": f"Bearer {api_key}"},
|
||||||
resp = getattr(e, 'response', None) or getattr(e, '__cause__', None)
|
files={"audio": (chunk_path.name, audio_bytes, "audio/mpeg")},
|
||||||
if resp is not None:
|
data={"language": "en", "ignore_timestamps": "false"},
|
||||||
status = getattr(resp, 'status_code', '?')
|
)
|
||||||
body = getattr(resp, 'text', getattr(resp, 'content', b''))
|
|
||||||
raise RuntimeError(f"Fish Audio ASR failed: HTTP {status} — {body}") from e
|
print(f" HTTP {resp.status_code}", flush=True)
|
||||||
raise
|
if resp.status_code != 200:
|
||||||
|
raise RuntimeError(f"Fish Audio ASR error {resp.status_code}: {resp.text!r}")
|
||||||
|
|
||||||
|
data = resp.json()
|
||||||
|
|
||||||
# Fish Audio returns timestamps in milliseconds — convert to seconds
|
# Fish Audio returns timestamps in milliseconds — convert to seconds
|
||||||
# and adjust by the chunk's start offset in the full audio
|
# and adjust by the chunk's start offset in the full audio
|
||||||
segments = []
|
segments = []
|
||||||
for seg in (result.segments or []):
|
for seg in data.get("segments", []):
|
||||||
segments.append({
|
segments.append({
|
||||||
"start": seg.start / 1000.0 + start_offset_sec,
|
"start": seg["start"] / 1000.0 + start_offset_sec,
|
||||||
"end": seg.end / 1000.0 + start_offset_sec,
|
"end": seg["end"] / 1000.0 + start_offset_sec,
|
||||||
"text": seg.text,
|
"text": seg["text"],
|
||||||
})
|
})
|
||||||
|
|
||||||
# Fallback: if no segments, use full text as one block
|
# Fallback: if no segments, use full text as one block
|
||||||
if not segments and getattr(result, "text", None):
|
if not segments and data.get("text"):
|
||||||
segments.append({
|
segments.append({
|
||||||
"start": start_offset_sec,
|
"start": start_offset_sec,
|
||||||
"end": start_offset_sec + 60,
|
"end": start_offset_sec + 60,
|
||||||
"text": result.text,
|
"text": data["text"],
|
||||||
})
|
})
|
||||||
|
|
||||||
return segments
|
return segments
|
||||||
|
|||||||
Reference in New Issue
Block a user