From e51bcf11455c98453bd522fedcb3e1cebefe48f1 Mon Sep 17 00:00:00 2001 From: Conan Scott Date: Tue, 24 Mar 2026 01:07:53 +0000 Subject: [PATCH] Replace requests with stdlib urllib (no pip needed) --- ingest.py | 46 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 10 deletions(-) diff --git a/ingest.py b/ingest.py index e6df956..ae508af 100644 --- a/ingest.py +++ b/ingest.py @@ -26,7 +26,8 @@ import os import subprocess import sys import time -import requests +import urllib.request +import uuid from pathlib import Path @@ -110,18 +111,43 @@ def transcribe_chunk_fish(chunk_path, api_key, start_offset_sec): """ Send a chunk to Fish Audio ASR. Returns a list of segments with timestamps adjusted by start_offset_sec. + Uses stdlib urllib + manual multipart encoding (no requests dep). """ print(f" → Fish Audio ASR: {chunk_path.name} (offset +{start_offset_sec}s)", flush=True) + + # Build multipart/form-data manually + boundary = uuid.uuid4().hex with open(chunk_path, "rb") as f: - resp = requests.post( - FISH_AUDIO_ASR_URL, - headers={"Authorization": f"Bearer {api_key}"}, - files={"audio": (chunk_path.name, f, "audio/mpeg")}, - data={"language": "en", "ignore_timestamps": "false"}, - timeout=120, - ) - resp.raise_for_status() - data = resp.json() + audio_data = f.read() + + def field(name, value): + return ( + f"--{boundary}\r\n" + f'Content-Disposition: form-data; name="{name}"\r\n\r\n' + f"{value}\r\n" + ).encode() + + body = ( + field("language", "en") + + field("ignore_timestamps", "false") + + f"--{boundary}\r\n".encode() + + f'Content-Disposition: form-data; name="audio"; filename="{chunk_path.name}"\r\n'.encode() + + b"Content-Type: audio/mpeg\r\n\r\n" + + audio_data + + f"\r\n--{boundary}--\r\n".encode() + ) + + req = urllib.request.Request( + FISH_AUDIO_ASR_URL, + data=body, + headers={ + "Authorization": f"Bearer {api_key}", + "Content-Type": f"multipart/form-data; boundary={boundary}", + }, + method="POST", + ) + with urllib.request.urlopen(req, timeout=120) as resp: + data = json.loads(resp.read().decode()) # Adjust timestamps by the chunk's start offset segments = []