Replace requests with stdlib urllib (no pip needed)

2026-03-24 01:07:53 +00:00
parent ce3ef32778
commit e51bcf1145
1 changed files with 36 additions and 10 deletions
--- a/ingest.py
+++ b/ingest.py
@@ -26,7 +26,8 @@ import os
 import subprocess
 import sys
 import time
-import requests
+import urllib.request
+import uuid
 from pathlib import Path


@@ -110,18 +111,43 @@ def transcribe_chunk_fish(chunk_path, api_key, start_offset_sec):
    """
    Send a chunk to Fish Audio ASR. Returns a list of segments with
    timestamps adjusted by start_offset_sec.
+    Uses stdlib urllib + manual multipart encoding (no requests dep).
    """
    print(f"  → Fish Audio ASR: {chunk_path.name} (offset +{start_offset_sec}s)", flush=True)
+
+    # Build multipart/form-data manually
+    boundary = uuid.uuid4().hex
    with open(chunk_path, "rb") as f:
-        resp = requests.post(
-            FISH_AUDIO_ASR_URL,
-            headers={"Authorization": f"Bearer {api_key}"},
-            files={"audio": (chunk_path.name, f, "audio/mpeg")},
-            data={"language": "en", "ignore_timestamps": "false"},
-            timeout=120,
+        audio_data = f.read()
+
+    def field(name, value):
+        return (
+            f"--{boundary}\r\n"
+            f'Content-Disposition: form-data; name="{name}"\r\n\r\n'
+            f"{value}\r\n"
+        ).encode()
+
+    body = (
+        field("language", "en") +
+        field("ignore_timestamps", "false") +
+        f"--{boundary}\r\n".encode() +
+        f'Content-Disposition: form-data; name="audio"; filename="{chunk_path.name}"\r\n'.encode() +
+        b"Content-Type: audio/mpeg\r\n\r\n" +
+        audio_data +
+        f"\r\n--{boundary}--\r\n".encode()
    )
-    resp.raise_for_status()
-    data = resp.json()
+
+    req = urllib.request.Request(
+        FISH_AUDIO_ASR_URL,
+        data=body,
+        headers={
+            "Authorization": f"Bearer {api_key}",
+            "Content-Type": f"multipart/form-data; boundary={boundary}",
+        },
+        method="POST",
+    )
+    with urllib.request.urlopen(req, timeout=120) as resp:
+        data = json.loads(resp.read().decode())

    # Adjust timestamps by the chunk's start offset
    segments = []