Fix HTTP client, multipart, ms->s timestamp conversion
This commit is contained in:
58
ingest.py
58
ingest.py
@@ -20,13 +20,14 @@ Dependencies (must be on PATH):
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import http.client
|
||||
import json
|
||||
import math
|
||||
import os
|
||||
import ssl
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
import urllib.request
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
|
||||
@@ -111,50 +112,53 @@ def transcribe_chunk_fish(chunk_path, api_key, start_offset_sec):
|
||||
"""
|
||||
Send a chunk to Fish Audio ASR. Returns a list of segments with
|
||||
timestamps adjusted by start_offset_sec.
|
||||
Uses stdlib urllib + manual multipart encoding (no requests dep).
|
||||
Note: Fish Audio returns timestamps in milliseconds.
|
||||
"""
|
||||
print(f" → Fish Audio ASR: {chunk_path.name} (offset +{start_offset_sec}s)", flush=True)
|
||||
|
||||
# Build multipart/form-data manually
|
||||
boundary = uuid.uuid4().hex
|
||||
with open(chunk_path, "rb") as f:
|
||||
audio_data = f.read()
|
||||
|
||||
def field(name, value):
|
||||
return (
|
||||
f"--{boundary}\r\n"
|
||||
f'Content-Disposition: form-data; name="{name}"\r\n\r\n'
|
||||
f"{value}\r\n"
|
||||
).encode()
|
||||
|
||||
body = (
|
||||
field("language", "en") +
|
||||
field("ignore_timestamps", "false") +
|
||||
f"--{boundary}\r\n".encode() +
|
||||
f'Content-Disposition: form-data; name="audio"; filename="{chunk_path.name}"\r\n'.encode() +
|
||||
b"Content-Type: audio/mpeg\r\n\r\n" +
|
||||
audio_data +
|
||||
f"\r\n--{boundary}--\r\n".encode()
|
||||
)
|
||||
f"--{boundary}\r\n"
|
||||
f'Content-Disposition: form-data; name="audio"; filename="{chunk_path.name}"\r\n'
|
||||
f"Content-Type: audio/mpeg\r\n\r\n"
|
||||
).encode() + audio_data + (
|
||||
f"\r\n--{boundary}\r\n"
|
||||
f'Content-Disposition: form-data; name="language"\r\n\r\n'
|
||||
f"en"
|
||||
f"\r\n--{boundary}--\r\n"
|
||||
).encode()
|
||||
|
||||
req = urllib.request.Request(
|
||||
FISH_AUDIO_ASR_URL,
|
||||
data=body,
|
||||
ctx = ssl.create_default_context()
|
||||
conn = http.client.HTTPSConnection("api.fish.audio", timeout=300, context=ctx)
|
||||
conn.request(
|
||||
"POST",
|
||||
"/v1/asr",
|
||||
body=body,
|
||||
headers={
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": f"multipart/form-data; boundary={boundary}",
|
||||
"Content-Length": str(len(body)),
|
||||
},
|
||||
method="POST",
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=300) as resp:
|
||||
data = json.loads(resp.read().decode())
|
||||
resp = conn.getresponse()
|
||||
resp_body = resp.read().decode()
|
||||
conn.close()
|
||||
|
||||
# Adjust timestamps by the chunk's start offset
|
||||
if resp.status != 200:
|
||||
raise RuntimeError(f"Fish Audio ASR error {resp.status}: {resp_body}")
|
||||
|
||||
data = json.loads(resp_body)
|
||||
|
||||
# Fish Audio returns timestamps in milliseconds — convert to seconds
|
||||
# and adjust by the chunk's start offset in the full audio
|
||||
segments = []
|
||||
for seg in data.get("segments", []):
|
||||
segments.append({
|
||||
"start": seg["start"] + start_offset_sec,
|
||||
"end": seg["end"] + start_offset_sec,
|
||||
"start": seg["start"] / 1000.0 + start_offset_sec,
|
||||
"end": seg["end"] / 1000.0 + start_offset_sec,
|
||||
"text": seg["text"],
|
||||
})
|
||||
|
||||
|
||||
Reference in New Issue
Block a user