From ad3c5616b2c0bf7e865f02e7669641f669912b72 Mon Sep 17 00:00:00 2001 From: Conan Scott Date: Mon, 30 Mar 2026 10:21:56 +0000 Subject: [PATCH] Update README.md --- README.md | 50 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 94751dc..2a3b28b 100644 --- a/README.md +++ b/README.md @@ -1 +1,49 @@ -IyBBc2sgQW5uaWUg4oCUIFNUIEJlc3QgUHJhY3RpY2VzIFNlc3Npb24gSW5nZXN0aW9uCgpJbmdlc3Rpb24gcGlwZWxpbmUgZm9yIEF4d2F5IE1GVCBVc2VyIEdyb3VwICJBc2sgQW5uaWUiIFEmQSBzZXNzaW9ucyBvbiBWaW1lby4KCiMjIFdoYXQgaXQgZG9lcwoKMS4gRG93bmxvYWRzIGF1ZGlvIGZyb20gYSBWaW1lbyBVUkwgdmlhIHl0LWRscAoyLiBUcmFuc2NyaWJlcyB3aXRoIFdoaXNwZXIgKHRpbWVzdGFtcGVkIHNlZ21lbnRzKQozLiBTbGljZXMgdHJhbnNjcmlwdCBpbnRvIHBlci1jaGFwdGVyIGNodW5rcyB1c2luZyBhIGNoYXB0ZXJzIEpTT04gZmlsZQo0LiBPcHRpb25hbGx5IGV4dHJhY3RzIGZyYW1lcyBmcm9tIGRlbW8taGVhdnkgY2hhcHRlcnMgZm9yIHZpc2lvbiBhbm5vdGF0aW9uCjUuIE91dHB1dHMgYGNodW5rcy5qc29uYCByZWFkeSBmb3IgaW5nZXN0aW9uIGludG8ga25vd2xlZGdlLW1jcAoKIyMgVXNhZ2UKCmBgYGJhc2gKcHl0aG9uMyBpbmdlc3QucHkgXAogIC0tdXJsICdodHRwczovL3ZpbWVvLmNvbS8xMDIwMTAyNjI2JyBcCiAgLS1jaGFwdGVycyBjaGFwdGVycy8xMDIwMTAyNjI2Lmpzb24gXAogIC0tb3V0IC4vb3V0IFwKICAtLXdoaXNwZXItbW9kZWwgbWVkaXVtCmBgYAoKQWRkIGAtLWZyYW1lc2AgdG8gYWxzbyBleHRyYWN0IHZpZGVvIGZyYW1lcyBmb3IgZGVtbyBjaGFwdGVycyAocmVxdWlyZXMgdmlkZW8gZG93bmxvYWQpLgoKIyMgRGVwZW5kZW5jaWVzCgpgYGBiYXNoCmJyZXcgaW5zdGFsbCB5dC1kbHAgZmZtcGVnCnBpcCBpbnN0YWxsIG9wZW5haS13aGlzcGVyCmBgYAoKIyMgUmVwbyBzdHJ1Y3R1cmUKCmBgYAppbmdlc3QucHkgICAgICAgICAgICAgICAgICAgICMgTWFpbiBwaXBlbGluZSBzY3JpcHQKY2hhcHRlcnMvPHZpZGVvX2lkPi5qc29uICAgICAjIENoYXB0ZXIgbGlzdCBwZXIgc2Vzc2lvbgpvdXQvPHZpZGVvX2lkPi8gICAgICAgICAgICAgICMgT3V0cHV0IChnaXRpZ25vcmVkKQogIGF1ZGlvLm1wMwogIHRyYW5zY3JpcHQuanNvbgogIGNodW5rcy5qc29uCiAgZnJhbWVzLwpgYGAKCiMjIEFkZGluZyBhIG5ldyBzZXNzaW9uCgoxLiBDcmVhdGUgYGNoYXB0ZXJzLzx2aWRlb19pZD4uanNvbmAgd2l0aCB0aW1lc3RhbXAgKyB0aXRsZSArIHN1bW1hcnkgcGVyIGNoYXB0ZXIKMi4gUnVuIGBpbmdlc3QucHkgLS11cmwgPHZpbWVvX3VybD4gLS1jaGFwdGVycyBjaGFwdGVycy88dmlkZW9faWQ+Lmpzb25gCjMuIFJldmlldyBgb3V0Lzx2aWRlb19pZD4vY2h1bmtzLmpzb25gCjQuIEluZ2VzdCBjaHVua3MgaW50byBrbm93bGVkZ2UtbWNwIG5vdGVib29rIGBzZWN1cmV0cmFuc3BvcnQtbWRgCg== \ No newline at end of file +# Ask Annie — ST Best Practices Session Ingestion + +Ingestion pipeline for Axway MFT User Group "Ask Annie" Q&A sessions on Vimeo. + +## What it does + +1. Downloads audio from a Vimeo URL via yt-dlp +2. Transcribes with Whisper (timestamped segments) +3. Slices transcript into per-chapter chunks using a chapters JSON file +4. Optionally extracts frames from demo-heavy chapters for vision annotation +5. Outputs `chunks.json` ready for ingestion into knowledge-mcp + +## Usage + +```bash +python3 ingest.py \ + --url 'https://vimeo.com/1020102626' \ + --chapters chapters/1020102626.json \ + --out ./out \ + --whisper-model medium +``` + +Add `--frames` to also extract video frames for demo chapters (requires video download). + +## Dependencies + +```bash +brew install yt-dlp ffmpeg +pip install openai-whisper +``` + +## Repo structure + +``` +ingest.py # Main pipeline script +chapters/.json # Chapter list per session +out// # Output (gitignored) + audio.mp3 + transcript.json + chunks.json + frames/ +``` + +## Adding a new session + +1. Create `chapters/.json` with timestamp + title + summary per chapter +2. Run `ingest.py --url --chapters chapters/.json` +3. Review `out//chunks.json` +4. Ingest chunks into knowledge-mcp notebook `securetransport-md` \ No newline at end of file