#!/usr/bin/env python3 """ ask-annie/transcribe.py Step 1 of the Ask Annie pipeline: download audio and transcribe with Deepgram. Saves transcript.json ready for chapter generation. Usage: python3 transcribe.py --url --out out/ [--deepgram-api-key KEY] Output: out//audio.mp3 — downloaded audio (cached) out//transcript.json — full Deepgram transcript with timestamps out//transcript.txt — plain text version (for human review) After running this, share transcript.txt for chapter generation, then run: python3 ingest.py --url --chapters chapters/.json --out out/ Dependencies: yt-dlp, ffmpeg (on PATH) """ import argparse import http.client import json import os import ssl import subprocess import sys import time from pathlib import Path def run(cmd, **kwargs): print(f" $ {' '.join(str(c) for c in cmd)}", flush=True) result = subprocess.run(cmd, check=True, **kwargs) return result def download_audio(url, out_dir): audio_path = out_dir / "audio.%(ext)s" run([ "yt-dlp", "--extract-audio", "--audio-format", "mp3", "--audio-quality", "3", "-o", str(audio_path), url, ]) matches = [f for f in out_dir.glob("audio.*") if f.suffix != ".part"] if not matches: raise FileNotFoundError("Audio download failed — no audio.* file found") return matches[0] def transcribe_deepgram(audio_path, api_key): """Send full audio to Deepgram, return raw response JSON.""" print(f" Sending {audio_path.stat().st_size/1024/1024:.1f}MB to Deepgram...", flush=True) with open(audio_path, "rb") as f: audio_bytes = f.read() params = "?model=nova-3&language=en&punctuate=true&utterances=true&smart_format=true" ctx = ssl.create_default_context() conn = http.client.HTTPSConnection("api.deepgram.com", timeout=600, context=ctx) conn.request( "POST", f"/v1/listen{params}", body=audio_bytes, headers={ "Authorization": f"Token {api_key}", "Content-Type": "audio/mp3", }, ) resp = conn.getresponse() resp_body = resp.read().decode() conn.close() print(f" HTTP {resp.status}", flush=True) if resp.status != 200: raise RuntimeError(f"Deepgram error {resp.status}: {resp_body[:200]}") return json.loads(resp_body) def main(): parser = argparse.ArgumentParser(description="Download and transcribe Ask Annie Vimeo session") parser.add_argument("--url", required=True, help="Vimeo URL") parser.add_argument("--out", default="out", help="Output directory") parser.add_argument("--deepgram-api-key", default=os.environ.get("DEEPGRAM_API_KEY", ""), help="Deepgram API key (or set DEEPGRAM_API_KEY env var)") parser.add_argument("--video-id", default=None, help="Override video ID") args = parser.parse_args() if not args.deepgram_api_key: print("ERROR: Deepgram API key required. Pass --deepgram-api-key or set DEEPGRAM_API_KEY.") sys.exit(1) video_id = args.video_id or args.url.rstrip("/").split("/")[-1].split("?")[0] print(f"\n=== Transcribe: {video_id} ===\n") out_dir = Path(args.out) / video_id out_dir.mkdir(parents=True, exist_ok=True) transcript_path = out_dir / "transcript.json" txt_path = out_dir / "transcript.txt" # Step 1: Download audio print("=== Step 1: Download audio ===") audio_path = None for f in out_dir.glob("audio.*"): if f.suffix == ".part": continue print(f" [cache] {f}") audio_path = f break if audio_path is None: audio_path = download_audio(args.url, out_dir) print(f" Audio: {audio_path}\n") # Step 2: Transcribe if transcript_path.exists(): print(f"=== Step 2: Transcript cached at {transcript_path} ===\n") with open(transcript_path) as f: raw = json.load(f) else: print("=== Step 2: Transcribe with Deepgram ===") raw = transcribe_deepgram(audio_path, args.deepgram_api_key) with open(transcript_path, "w") as f: json.dump(raw, f, indent=2) print(f" Saved: {transcript_path}\n") # Step 3: Write plain text with timestamps utterances = raw.get("results", {}).get("utterances", []) duration = raw.get("metadata", {}).get("duration", 0) with open(txt_path, "w") as f: f.write(f"# Transcript: {video_id}\n") f.write(f"# URL: {args.url}\n") f.write(f"# Duration: {duration:.0f}s ({duration/60:.1f} min)\n") f.write(f"# Utterances: {len(utterances)}\n\n") for u in utterances: start = u["start"] m, s = divmod(int(start), 60) f.write(f"[{m}:{s:02d}] {u['transcript']}\n") print(f"=== Done ===") print(f" Transcript JSON: {transcript_path}") print(f" Transcript TXT: {txt_path}") print(f" Duration: {duration:.0f}s ({duration/60:.1f} min)") print(f" Utterances: {len(utterances)}") print(f"\nNext: share transcript.txt for chapter generation, then run ingest.py") if __name__ == "__main__": main()