diff --git a/CLAUDE.md b/CLAUDE.md index bfc4625d..5fafb696 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -10,6 +10,7 @@ This repo ships skills that are installed globally via `npx hyperframes skills` | ------------------------ | ----------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | **hyperframes-compose** | `/hyperframes-compose` | Creating ANY HTML composition — videos, animations, title cards, overlays. Contains required HTML structure, `class="clip"` rules, GSAP timeline patterns, and rendering constraints. | | **hyperframes-captions** | `/hyperframes-captions` | Any task involving text synced to audio: captions, subtitles, lyrics, lyric videos, karaoke. Also covers transcription strategy (whisper model selection, transcript format). | +| **hyperframes-tts** | `/hyperframes-tts` | Generating speech from text: narration, voiceovers, text-to-speech. Voice selection, speed control, and combining TTS output with compositions. | | **marker-highlight** | `/marker-highlight` | Animated text highlighting — marker sweeps, hand-drawn circles, burst lines, scribble, sketchout. Use with captions for dynamic emphasis. | ### GSAP Skills (from [greensock/gsap-skills](https://github.com/greensock/gsap-skills)) @@ -32,6 +33,7 @@ The skills encode HyperFrames-specific patterns (e.g., required `class="clip"` o - When creating or modifying HTML compositions → invoke `/hyperframes-compose` BEFORE writing any code - When adding captions, subtitles, lyrics, or any text synced to audio → invoke `/hyperframes-captions` BEFORE writing any code - When transcribing audio or choosing a whisper model → invoke `/hyperframes-captions` BEFORE running any transcription tool +- When generating speech from text (narration, voiceover, TTS) → invoke `/hyperframes-tts` BEFORE running any TTS command - When creating a video from audio (music video, lyric video, audio visualizer with text) → invoke BOTH `/hyperframes-compose` AND `/hyperframes-captions` - When writing GSAP animations → invoke `/gsap-core` and `/gsap-timeline` BEFORE writing any code - When optimizing animation performance → invoke `/gsap-performance` BEFORE making changes @@ -80,6 +82,15 @@ bunx oxfmt --check # Format (check only, used by pre-commit hook) Always run both on changed files before committing. The lefthook pre-commit hook runs `bunx oxlint` and `bunx oxfmt --check` automatically. +### Adding CLI Commands + +When adding a new CLI command: + +1. Define the command in `packages/cli/src/commands/.ts` using `defineCommand` from citty +2. Register it in `packages/cli/src/cli.ts` under `subCommands` (lazy-loaded) +3. **Add examples to `packages/cli/src/help.ts`** in the `COMMAND_EXAMPLES` record — every command must have `--help` examples +4. Validate by running `npx tsx packages/cli/src/cli.ts --help` and verifying the examples section appears + ## Key Concepts - **Compositions** are HTML files with `data-*` attributes defining timeline, tracks, and media @@ -131,3 +142,46 @@ If captions are inaccurate (wrong words, bad timing): 3. **Use an external API**: Transcribe via OpenAI or Groq Whisper API, then import the JSON with `hyperframes transcribe response.json` See the `/hyperframes-captions` skill for full details on model selection and API usage. + +## Text-to-Speech + +Generate speech audio locally using Kokoro-82M (no API key, runs on CPU). Useful for adding voiceovers to compositions. + +### Quick reference + +```bash +# Generate speech from text +npx hyperframes tts "Welcome to HyperFrames" + +# Choose a voice and output path +npx hyperframes tts "Hello world" --voice am_adam --output narration.wav + +# Read text from a file +npx hyperframes tts script.txt --voice bf_emma + +# Adjust speech speed +npx hyperframes tts "Fast narration" --speed 1.2 + +# List available voices +npx hyperframes tts --list +``` + +### Voices + +Default voice is `af_heart`. The model ships with 54 voices across 8 languages: + +| Voice ID | Name | Language | Gender | +| ------------ | ------- | -------- | ------ | +| `af_heart` | Heart | en-US | Female | +| `af_nova` | Nova | en-US | Female | +| `am_adam` | Adam | en-US | Male | +| `am_michael` | Michael | en-US | Male | +| `bf_emma` | Emma | en-GB | Female | +| `bm_george` | George | en-GB | Male | + +Use `npx hyperframes tts --list` for the full set, or pass any valid Kokoro voice ID. + +### Requirements + +- Python 3.8+ (auto-installs `kokoro-onnx` package on first run) +- Model downloads automatically on first use (~311 MB model + ~27 MB voices, cached in `~/.cache/hyperframes/tts/`) diff --git a/packages/cli/package.json b/packages/cli/package.json index be1bc4be..1f6b21a8 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -21,7 +21,7 @@ "build:fonts": "cd ../producer && tsx scripts/generate-font-data.ts", "build:studio": "cd ../studio && bun run build", "build:runtime": "tsx scripts/build-runtime.ts", - "build:copy": "mkdir -p dist/studio dist/docs dist/templates dist/skills && cp -r ../studio/dist/* dist/studio/ && cp -r src/templates/blank src/templates/_shared dist/templates/ && cp -r ../../skills/hyperframes-compose ../../skills/hyperframes-captions dist/skills/ && (cp src/docs/*.md dist/docs/ 2>/dev/null || true)", + "build:copy": "mkdir -p dist/studio dist/docs dist/templates dist/skills && cp -r ../studio/dist/* dist/studio/ && cp -r src/templates/blank src/templates/_shared dist/templates/ && cp -r ../../skills/hyperframes-compose ../../skills/hyperframes-captions ../../skills/hyperframes-tts dist/skills/ && (cp src/docs/*.md dist/docs/ 2>/dev/null || true)", "typecheck": "tsc --noEmit" }, "dependencies": { diff --git a/packages/cli/src/cli.ts b/packages/cli/src/cli.ts index 3ea75e88..798ef2b7 100644 --- a/packages/cli/src/cli.ts +++ b/packages/cli/src/cli.ts @@ -33,6 +33,7 @@ const subCommands = { benchmark: () => import("./commands/benchmark.js").then((m) => m.default), browser: () => import("./commands/browser.js").then((m) => m.default), transcribe: () => import("./commands/transcribe.js").then((m) => m.default), + tts: () => import("./commands/tts.js").then((m) => m.default), docs: () => import("./commands/docs.js").then((m) => m.default), doctor: () => import("./commands/doctor.js").then((m) => m.default), upgrade: () => import("./commands/upgrade.js").then((m) => m.default), diff --git a/packages/cli/src/commands/tts.ts b/packages/cli/src/commands/tts.ts new file mode 100644 index 00000000..8c55395d --- /dev/null +++ b/packages/cli/src/commands/tts.ts @@ -0,0 +1,152 @@ +import { defineCommand } from "citty"; +import { existsSync, readFileSync } from "node:fs"; +import { resolve, extname } from "node:path"; +import * as clack from "@clack/prompts"; +import { c } from "../ui/colors.js"; +import { DEFAULT_VOICE, BUNDLED_VOICES } from "../tts/manager.js"; + +const voiceList = BUNDLED_VOICES.map((v) => `${v.id} (${v.label})`).join(", "); + +export default defineCommand({ + meta: { + name: "tts", + description: "Generate speech audio from text using a local AI model (Kokoro-82M)", + }, + args: { + input: { + type: "positional", + description: "Text to speak, or path to a .txt file", + required: false, + }, + output: { + type: "string", + description: "Output file path (default: speech.wav in current directory)", + alias: "o", + }, + voice: { + type: "string", + description: `Voice ID (default: ${DEFAULT_VOICE}). Options: ${voiceList}`, + alias: "v", + }, + speed: { + type: "string", + description: "Speech speed multiplier (default: 1.0)", + alias: "s", + }, + list: { + type: "boolean", + description: "List available voices and exit", + default: false, + }, + json: { + type: "boolean", + description: "Output result as JSON", + default: false, + }, + }, + async run({ args }) { + // ── List voices mode ────────────────────────────────────────────── + if (args.list) { + return listVoices(args.json); + } + + // ── Resolve input text ──────────────────────────────────────────── + if (!args.input) { + console.error(c.error("Provide text to speak, or use --list to see available voices.")); + process.exit(1); + } + + let text: string; + const maybeFile = resolve(args.input); + + if (existsSync(maybeFile) && extname(maybeFile).toLowerCase() === ".txt") { + text = readFileSync(maybeFile, "utf-8").trim(); + if (!text) { + console.error(c.error("File is empty.")); + process.exit(1); + } + } else { + text = args.input; + } + + if (!text.trim()) { + console.error(c.error("No text provided.")); + process.exit(1); + } + + // ── Resolve output path ─────────────────────────────────────────── + const output = resolve(args.output ?? "speech.wav"); + const voice = args.voice ?? DEFAULT_VOICE; + const speed = args.speed ? parseFloat(args.speed) : 1.0; + + if (isNaN(speed) || speed <= 0 || speed > 3) { + console.error(c.error("Speed must be a number between 0.1 and 3.0")); + process.exit(1); + } + + // ── Synthesize ──────────────────────────────────────────────────── + const { synthesize } = await import("../tts/synthesize.js"); + const spin = args.json ? null : clack.spinner(); + spin?.start(`Generating speech with ${c.accent(voice)}...`); + + try { + const result = await synthesize(text, output, { + voice, + speed, + onProgress: spin ? (msg) => spin.message(msg) : undefined, + }); + + if (args.json) { + console.log( + JSON.stringify({ + ok: true, + voice, + speed, + durationSeconds: result.durationSeconds, + outputPath: result.outputPath, + }), + ); + } else { + spin?.stop( + c.success( + `Generated ${c.accent(result.durationSeconds.toFixed(1) + "s")} of speech → ${c.accent(result.outputPath)}`, + ), + ); + } + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + if (args.json) { + console.log(JSON.stringify({ ok: false, error: message })); + } else { + spin?.stop(c.error(`Speech synthesis failed: ${message}`)); + } + process.exit(1); + } + }, +}); + +// --------------------------------------------------------------------------- +// List voices +// --------------------------------------------------------------------------- + +function listVoices(json: boolean): void { + if (json) { + console.log(JSON.stringify(BUNDLED_VOICES)); + return; + } + + console.log(`\n${c.bold("Available voices")} (Kokoro-82M)\n`); + console.log( + ` ${c.dim("ID")} ${c.dim("Name")} ${c.dim("Language")} ${c.dim("Gender")}`, + ); + console.log(` ${c.dim("─".repeat(60))}`); + for (const v of BUNDLED_VOICES) { + const id = v.id.padEnd(18); + const label = v.label.padEnd(13); + const lang = v.language.padEnd(10); + console.log(` ${c.accent(id)} ${label} ${lang} ${v.gender}`); + } + console.log( + `\n ${c.dim("Use any Kokoro voice ID — see https://github.com/thewh1teagle/kokoro-onnx for all 54 voices")}\n`, + ); +} diff --git a/packages/cli/src/help.ts b/packages/cli/src/help.ts index a2440e84..af83f000 100644 --- a/packages/cli/src/help.ts +++ b/packages/cli/src/help.ts @@ -124,6 +124,14 @@ const COMMAND_EXAMPLES: Record = { ["Install to Cursor (project-level)", "hyperframes skills --cursor"], ["Install to specific tools", "hyperframes skills --claude --gemini"], ], + tts: [ + ["Generate speech from text", 'hyperframes tts "Welcome to HyperFrames"'], + ["Choose a voice", 'hyperframes tts "Hello world" --voice am_adam'], + ["Save to a specific file", 'hyperframes tts "Intro" --voice bf_emma --output narration.wav'], + ["Adjust speech speed", 'hyperframes tts "Slow and clear" --speed 0.8'], + ["Read text from a file", "hyperframes tts script.txt"], + ["List available voices", "hyperframes tts --list"], + ], transcribe: [ ["Transcribe an audio file", "hyperframes transcribe audio.mp3"], ["Transcribe a video file", "hyperframes transcribe video.mp4"], diff --git a/packages/cli/src/tts/manager.ts b/packages/cli/src/tts/manager.ts new file mode 100644 index 00000000..cc945bc2 --- /dev/null +++ b/packages/cli/src/tts/manager.ts @@ -0,0 +1,99 @@ +import { existsSync, mkdirSync } from "node:fs"; +import { homedir } from "node:os"; +import { join } from "node:path"; +import { downloadFile } from "../utils/download.js"; + +const CACHE_DIR = join(homedir(), ".cache", "hyperframes", "tts"); +const MODELS_DIR = join(CACHE_DIR, "models"); +const VOICES_DIR = join(CACHE_DIR, "voices"); + +const DEFAULT_MODEL = "kokoro-v1.0"; + +const MODEL_URLS: Record = { + "kokoro-v1.0": + "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/kokoro-v1.0.onnx", +}; + +const VOICES_URL = + "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/voices-v1.0.bin"; + +// --------------------------------------------------------------------------- +// Voices — Kokoro ships 54 voices across 8 languages. We expose a curated +// default set and allow users to specify any valid Kokoro voice ID. +// --------------------------------------------------------------------------- + +export interface VoiceInfo { + id: string; + label: string; + language: string; + gender: "female" | "male"; +} + +export const BUNDLED_VOICES: VoiceInfo[] = [ + { id: "af_heart", label: "Heart", language: "en-US", gender: "female" }, + { id: "af_nova", label: "Nova", language: "en-US", gender: "female" }, + { id: "af_sky", label: "Sky", language: "en-US", gender: "female" }, + { id: "am_adam", label: "Adam", language: "en-US", gender: "male" }, + { id: "am_michael", label: "Michael", language: "en-US", gender: "male" }, + { id: "bf_emma", label: "Emma", language: "en-GB", gender: "female" }, + { id: "bf_isabella", label: "Isabella", language: "en-GB", gender: "female" }, + { id: "bm_george", label: "George", language: "en-GB", gender: "male" }, +]; + +export const DEFAULT_VOICE = "af_heart"; + +// --------------------------------------------------------------------------- +// Public API +// --------------------------------------------------------------------------- + +/** + * Ensure the Kokoro ONNX model is downloaded and cached. + * Returns the path to the .onnx model file. + */ +export async function ensureModel( + model: string = DEFAULT_MODEL, + options?: { onProgress?: (message: string) => void }, +): Promise { + const modelPath = join(MODELS_DIR, `${model}.onnx`); + if (existsSync(modelPath)) return modelPath; + + const url = MODEL_URLS[model]; + if (!url) { + throw new Error( + `Unknown TTS model: ${model}. Available: ${Object.keys(MODEL_URLS).join(", ")}`, + ); + } + + mkdirSync(MODELS_DIR, { recursive: true }); + options?.onProgress?.(`Downloading TTS model ${model} (~311 MB)...`); + await downloadFile(url, modelPath); + + if (!existsSync(modelPath)) { + throw new Error(`Model download failed: ${model}`); + } + + return modelPath; +} + +/** + * Ensure the Kokoro voices bundle is downloaded and cached. + * Returns the path to the voices .bin file. + */ +export async function ensureVoices(options?: { + onProgress?: (message: string) => void; +}): Promise { + const voicesPath = join(VOICES_DIR, "voices-v1.0.bin"); + if (existsSync(voicesPath)) return voicesPath; + + mkdirSync(VOICES_DIR, { recursive: true }); + options?.onProgress?.("Downloading voice data (~27 MB)..."); + await downloadFile(VOICES_URL, voicesPath); + + if (!existsSync(voicesPath)) { + throw new Error("Voice data download failed"); + } + + return voicesPath; +} + +export { MODELS_DIR, VOICES_DIR, DEFAULT_MODEL }; diff --git a/packages/cli/src/tts/synthesize.ts b/packages/cli/src/tts/synthesize.ts new file mode 100644 index 00000000..8f75cb1a --- /dev/null +++ b/packages/cli/src/tts/synthesize.ts @@ -0,0 +1,191 @@ +import { execFileSync } from "node:child_process"; +import { existsSync, writeFileSync, mkdirSync } from "node:fs"; +import { join, dirname } from "node:path"; +import { homedir } from "node:os"; +import { ensureModel, ensureVoices, DEFAULT_VOICE } from "./manager.js"; + +// --------------------------------------------------------------------------- +// Python runtime detection +// --------------------------------------------------------------------------- + +function findPython(): string | undefined { + for (const name of ["python3", "python"]) { + try { + const result = execFileSync("which", [name], { + encoding: "utf-8", + stdio: ["pipe", "pipe", "pipe"], + timeout: 5000, + }).trim(); + + // Verify it's Python 3 + const version = execFileSync(result, ["--version"], { + encoding: "utf-8", + stdio: ["pipe", "pipe", "pipe"], + timeout: 5000, + }).trim(); + + if (version.includes("Python 3")) return result; + } catch { + // not found or not Python 3 + } + } + return undefined; +} + +function hasPythonPackage(python: string, pkg: string): boolean { + try { + execFileSync(python, ["-c", `import ${pkg}`], { + stdio: ["pipe", "pipe", "pipe"], + timeout: 10_000, + }); + return true; + } catch { + return false; + } +} + +// --------------------------------------------------------------------------- +// Inline Python script for Kokoro synthesis +// --------------------------------------------------------------------------- + +const SYNTH_SCRIPT = ` +import sys, json + +model_path = sys.argv[1] +voices_path = sys.argv[2] +text = sys.argv[3] +voice = sys.argv[4] +speed = float(sys.argv[5]) +output_path = sys.argv[6] + +import kokoro_onnx +import soundfile as sf + +model = kokoro_onnx.Kokoro(model_path, voices_path) +samples, sample_rate = model.create(text, voice=voice, speed=speed) +sf.write(output_path, samples, sample_rate) + +duration = len(samples) / sample_rate +print(json.dumps({ + "outputPath": output_path, + "sampleRate": sample_rate, + "durationSeconds": round(duration, 3), +})) +`; + +// Cache the script to avoid rewriting it on every invocation +const SCRIPT_DIR = join(homedir(), ".cache", "hyperframes", "tts"); +const SCRIPT_PATH = join(SCRIPT_DIR, "synth.py"); + +function ensureSynthScript(): string { + if (!existsSync(SCRIPT_PATH)) { + mkdirSync(SCRIPT_DIR, { recursive: true }); + writeFileSync(SCRIPT_PATH, SYNTH_SCRIPT); + } + return SCRIPT_PATH; +} + +// --------------------------------------------------------------------------- +// Public API +// --------------------------------------------------------------------------- + +export interface SynthesizeOptions { + model?: string; + voice?: string; + speed?: number; + onProgress?: (message: string) => void; +} + +export interface SynthesizeResult { + outputPath: string; + sampleRate: number; + durationSeconds: number; +} + +/** + * Synthesize text to speech using Kokoro-82M via kokoro-onnx. + */ +export async function synthesize( + text: string, + outputPath: string, + options?: SynthesizeOptions, +): Promise { + const voice = options?.voice ?? DEFAULT_VOICE; + const speed = options?.speed ?? 1.0; + + // 1. Ensure Python 3 is available with kokoro-onnx + options?.onProgress?.("Checking Python runtime..."); + const python = findPython(); + if (!python) { + throw new Error( + "Python 3 is required for text-to-speech. Install Python 3.8+ and run: pip install kokoro-onnx soundfile", + ); + } + + if (!hasPythonPackage(python, "kokoro_onnx")) { + throw new Error( + "The kokoro-onnx package is not installed. Run: pip install kokoro-onnx soundfile", + ); + } + + if (!hasPythonPackage(python, "soundfile")) { + throw new Error("The soundfile package is not installed. Run: pip install soundfile"); + } + + // 2. Ensure model and voices are downloaded (parallel on first run) + const [modelPath, voicesPath] = await Promise.all([ + ensureModel(options?.model, { onProgress: options?.onProgress }), + ensureVoices({ onProgress: options?.onProgress }), + ]); + + // 3. Ensure synthesis script is cached + const scriptPath = ensureSynthScript(); + + // 4. Ensure output directory exists + mkdirSync(dirname(outputPath), { recursive: true }); + + // 5. Run synthesis + options?.onProgress?.(`Generating speech with voice ${voice}...`); + try { + const stdout = execFileSync( + python, + [scriptPath, modelPath, voicesPath, text, voice, String(speed), outputPath], + { + encoding: "utf-8", + timeout: 300_000, + stdio: ["pipe", "pipe", "pipe"], + }, + ); + + if (!existsSync(outputPath)) { + throw new Error("Synthesis completed but no output file was created"); + } + + // Parse the last line of stdout as JSON (in case Python printed warnings before it) + const lines = stdout.trim().split("\n"); + const jsonLine = lines[lines.length - 1] ?? ""; + const result: { outputPath: string; sampleRate: number; durationSeconds: number } = + JSON.parse(jsonLine); + + return { + outputPath: result.outputPath, + sampleRate: result.sampleRate, + durationSeconds: result.durationSeconds, + }; + } catch (err: unknown) { + // If the error is our own JSON parse failure but the file was created, + // re-throw with a clearer message rather than returning fabricated data + if (err instanceof SyntaxError && existsSync(outputPath)) { + throw new Error( + "Speech was generated but metadata could not be read. Check the output file manually.", + ); + } + + let detail = ""; + if (err && typeof err === "object" && "stderr" in err) { + const stderr = String(err.stderr).trim(); + if (stderr) detail = `\n${stderr.slice(-500)}`; + } + throw new Error(`Speech synthesis failed${detail}`); + } +} diff --git a/packages/cli/src/utils/download.ts b/packages/cli/src/utils/download.ts new file mode 100644 index 00000000..5b882562 --- /dev/null +++ b/packages/cli/src/utils/download.ts @@ -0,0 +1,51 @@ +import { createWriteStream, renameSync, unlinkSync } from "node:fs"; +import { get as httpsGet } from "node:https"; +import { pipeline } from "node:stream/promises"; + +/** + * Download a file from a URL, following redirects. + * Uses atomic write (download to .tmp, rename on success) to prevent + * corrupt partial files from persisting in the cache on interruption. + */ +export function downloadFile(url: string, dest: string): Promise { + const tmp = `${dest}.tmp`; + return new Promise((resolve, reject) => { + const follow = (u: string) => { + httpsGet(u, (res) => { + if (res.statusCode === 301 || res.statusCode === 302) { + const location = res.headers.location; + if (location) { + follow(location); + return; + } + } + if (res.statusCode !== 200) { + reject(new Error(`Download failed: HTTP ${res.statusCode}`)); + return; + } + const file = createWriteStream(tmp); + pipeline(res, file) + .then(() => { + renameSync(tmp, dest); + resolve(); + }) + .catch((err) => { + try { + unlinkSync(tmp); + } catch { + // ignore cleanup failure + } + reject(err); + }); + }).on("error", (err) => { + try { + unlinkSync(tmp); + } catch { + // ignore cleanup failure + } + reject(err); + }); + }; + follow(url); + }); +} diff --git a/packages/cli/src/whisper/manager.ts b/packages/cli/src/whisper/manager.ts index 41803b31..e2dffff6 100644 --- a/packages/cli/src/whisper/manager.ts +++ b/packages/cli/src/whisper/manager.ts @@ -1,9 +1,8 @@ import { execFileSync } from "node:child_process"; -import { existsSync, mkdirSync, createWriteStream, rmSync } from "node:fs"; +import { existsSync, mkdirSync, rmSync } from "node:fs"; import { homedir, platform } from "node:os"; import { join } from "node:path"; -import { get as httpsGet } from "node:https"; -import { pipeline } from "node:stream/promises"; +import { downloadFile } from "../utils/download.js"; const MODELS_DIR = join(homedir(), ".cache", "hyperframes", "whisper", "models"); const DEFAULT_MODEL = "small.en"; @@ -15,31 +14,6 @@ export interface WhisperResult { source: WhisperSource; } -// --- Download helper -------------------------------------------------------- - -function downloadFile(url: string, dest: string): Promise { - return new Promise((resolve, reject) => { - const follow = (u: string) => { - httpsGet(u, (res) => { - if (res.statusCode === 301 || res.statusCode === 302) { - const location = res.headers.location; - if (location) { - follow(location); - return; - } - } - if (res.statusCode !== 200) { - reject(new Error(`Download failed: HTTP ${res.statusCode}`)); - return; - } - const file = createWriteStream(dest); - pipeline(res, file).then(resolve).catch(reject); - }).on("error", reject); - }; - follow(url); - }); -} - function getModelUrl(model: string): string { return `https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-${model}.bin`; } diff --git a/skills/hyperframes-tts/SKILL.md b/skills/hyperframes-tts/SKILL.md new file mode 100644 index 00000000..e8a8cda0 --- /dev/null +++ b/skills/hyperframes-tts/SKILL.md @@ -0,0 +1,79 @@ +--- +name: hyperframes-tts +description: Generate speech audio locally using Kokoro-82M (no API key). Use when asked to create narration, voiceover, or text-to-speech audio for compositions, or when a user needs spoken audio from text. Covers voice selection, speed tuning, and integrating TTS output with compositions and captions. +--- + +# Text-to-Speech + +## Voice Selection + +Match voice to content. Default is `af_heart`. + +| Content type | Voice | Why | +| ----------------- | --------------------- | ----------------------------- | +| Product demo | `af_heart`/`af_nova` | Warm, professional | +| Tutorial / how-to | `am_adam`/`bf_emma` | Neutral, easy to follow | +| Marketing / promo | `af_sky`/`am_michael` | Energetic or authoritative | +| Documentation | `bf_emma`/`bm_george` | Clear British English, formal | +| Casual / social | `af_heart`/`af_sky` | Approachable, natural | + +Run `npx hyperframes tts --list` for all 54 voices (8 languages: EN, JP, ZH, KO, FR, DE, IT, PT). + +## Speed Tuning + +- **0.7-0.8** — Tutorial, complex content, accessibility +- **1.0** — Natural pace (default) +- **1.1-1.2** — Intros, transitions, upbeat content +- **1.5+** — Rarely appropriate; test carefully + +## Composing with TTS Audio + +Generate a voiceover and use it as the audio track: + +```bash +npx hyperframes tts "Your script here" --voice af_nova --output narration.wav +``` + +Then reference it in the composition as a standard `