voiceKeyterms.ts
services/voiceKeyterms.ts
107
Lines
3462
Bytes
2
Exports
3
Imports
10
Keywords
What this is
This page documents one file from the repository and includes its full source so you can read it without leaving the docs site.
Beginner explanation
This file is one piece of the larger system. Its name, directory, imports, and exports show where it fits. Start by reading the exports and related files first.
How it is used
Start from the exports list and related files. Those are the easiest clues for where this file fits into the system.
Expert explanation
Architecturally, this file intersects with integrations. It contains 107 lines, 3 detected imports, and 2 detected exports.
Important relationships
Detected exports
splitIdentifiergetVoiceKeyterms
Keywords
termsnamebranchkeytermsbasenamelengthfilepathwordprojectnames
Detected imports
path../bootstrap/state.js../utils/git.js
Source notes
This page embeds the full file contents. Small or leaf files are still indexed honestly instead of being over-explained.
Full source
// Voice keyterms for improving STT accuracy in the voice_stream endpoint.
//
// Provides domain-specific vocabulary hints (Deepgram "keywords") so the STT
// engine correctly recognises coding terminology, project names, and branch
// names that would otherwise be misheard.
import { basename } from 'path'
import { getProjectRoot } from '../bootstrap/state.js'
import { getBranch } from '../utils/git.js'
// ─── Global keyterms ────────────────────────────────────────────────
const GLOBAL_KEYTERMS: readonly string[] = [
// Terms Deepgram consistently mangles without keyword hints.
// Note: "Claude" and "Anthropic" are already server-side base keyterms.
// Avoid terms nobody speaks aloud as-spelled (stdout → "standard out").
'MCP',
'symlink',
'grep',
'regex',
'localhost',
'codebase',
'TypeScript',
'JSON',
'OAuth',
'webhook',
'gRPC',
'dotfiles',
'subagent',
'worktree',
]
// ─── Helpers ────────────────────────────────────────────────────────
/**
* Split an identifier (camelCase, PascalCase, kebab-case, snake_case, or
* path segments) into individual words. Fragments of 2 chars or fewer are
* discarded to avoid noise.
*/
export function splitIdentifier(name: string): string[] {
return name
.replace(/([a-z])([A-Z])/g, '$1 $2')
.split(/[-_./\s]+/)
.map(w => w.trim())
.filter(w => w.length > 2 && w.length <= 20)
}
function fileNameWords(filePath: string): string[] {
const stem = basename(filePath).replace(/\.[^.]+$/, '')
return splitIdentifier(stem)
}
// ─── Public API ─────────────────────────────────────────────────────
const MAX_KEYTERMS = 50
/**
* Build a list of keyterms for the voice_stream STT endpoint.
*
* Combines hardcoded global coding terms with session context (project name,
* git branch, recent files) without any model calls.
*/
export async function getVoiceKeyterms(
recentFiles?: ReadonlySet<string>,
): Promise<string[]> {
const terms = new Set<string>(GLOBAL_KEYTERMS)
// Project root basename as a single term — users say "claude CLI internal"
// as a phrase, not isolated words. Keeping the whole basename lets the
// STT's keyterm boosting match the phrase regardless of separator.
try {
const projectRoot = getProjectRoot()
if (projectRoot) {
const name = basename(projectRoot)
if (name.length > 2 && name.length <= 50) {
terms.add(name)
}
}
} catch {
// getProjectRoot() may throw if not initialised yet — ignore
}
// Git branch words (e.g. "feat/voice-keyterms" → "feat", "voice", "keyterms")
try {
const branch = await getBranch()
if (branch) {
for (const word of splitIdentifier(branch)) {
terms.add(word)
}
}
} catch {
// getBranch() may fail if not in a git repo — ignore
}
// Recent file names — only scan enough to fill remaining slots
if (recentFiles) {
for (const filePath of recentFiles) {
if (terms.size >= MAX_KEYTERMS) break
for (const word of fileNameWords(filePath)) {
terms.add(word)
}
}
}
return [...terms].slice(0, MAX_KEYTERMS)
}