Replace requests with stdlib urllib (no pip needed)
This commit is contained in:
46
ingest.py
46
ingest.py
@@ -26,7 +26,8 @@ import os
|
|||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
import requests
|
import urllib.request
|
||||||
|
import uuid
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
@@ -110,18 +111,43 @@ def transcribe_chunk_fish(chunk_path, api_key, start_offset_sec):
|
|||||||
"""
|
"""
|
||||||
Send a chunk to Fish Audio ASR. Returns a list of segments with
|
Send a chunk to Fish Audio ASR. Returns a list of segments with
|
||||||
timestamps adjusted by start_offset_sec.
|
timestamps adjusted by start_offset_sec.
|
||||||
|
Uses stdlib urllib + manual multipart encoding (no requests dep).
|
||||||
"""
|
"""
|
||||||
print(f" → Fish Audio ASR: {chunk_path.name} (offset +{start_offset_sec}s)", flush=True)
|
print(f" → Fish Audio ASR: {chunk_path.name} (offset +{start_offset_sec}s)", flush=True)
|
||||||
|
|
||||||
|
# Build multipart/form-data manually
|
||||||
|
boundary = uuid.uuid4().hex
|
||||||
with open(chunk_path, "rb") as f:
|
with open(chunk_path, "rb") as f:
|
||||||
resp = requests.post(
|
audio_data = f.read()
|
||||||
FISH_AUDIO_ASR_URL,
|
|
||||||
headers={"Authorization": f"Bearer {api_key}"},
|
def field(name, value):
|
||||||
files={"audio": (chunk_path.name, f, "audio/mpeg")},
|
return (
|
||||||
data={"language": "en", "ignore_timestamps": "false"},
|
f"--{boundary}\r\n"
|
||||||
timeout=120,
|
f'Content-Disposition: form-data; name="{name}"\r\n\r\n'
|
||||||
)
|
f"{value}\r\n"
|
||||||
resp.raise_for_status()
|
).encode()
|
||||||
data = resp.json()
|
|
||||||
|
body = (
|
||||||
|
field("language", "en") +
|
||||||
|
field("ignore_timestamps", "false") +
|
||||||
|
f"--{boundary}\r\n".encode() +
|
||||||
|
f'Content-Disposition: form-data; name="audio"; filename="{chunk_path.name}"\r\n'.encode() +
|
||||||
|
b"Content-Type: audio/mpeg\r\n\r\n" +
|
||||||
|
audio_data +
|
||||||
|
f"\r\n--{boundary}--\r\n".encode()
|
||||||
|
)
|
||||||
|
|
||||||
|
req = urllib.request.Request(
|
||||||
|
FISH_AUDIO_ASR_URL,
|
||||||
|
data=body,
|
||||||
|
headers={
|
||||||
|
"Authorization": f"Bearer {api_key}",
|
||||||
|
"Content-Type": f"multipart/form-data; boundary={boundary}",
|
||||||
|
},
|
||||||
|
method="POST",
|
||||||
|
)
|
||||||
|
with urllib.request.urlopen(req, timeout=120) as resp:
|
||||||
|
data = json.loads(resp.read().decode())
|
||||||
|
|
||||||
# Adjust timestamps by the chunk's start offset
|
# Adjust timestamps by the chunk's start offset
|
||||||
segments = []
|
segments = []
|
||||||
|
|||||||
Reference in New Issue
Block a user