live_audio.py¶
This example captures live audio from the host's default microphone with fromAudioDevice and runs a wake-word spotter for a fixed duration.
It needs a real default capture device. The acceptance test only smoke-tests --help unless SNSR_RUN_LIVE_AUDIO=1 is set.
Instructions¶
-
Set up the sample project environment:
cd ~/Sensory/TrulyNaturalSDK/7.8.0-pre.2/sample/python uv venv uv sync -
Run the sample and say "voice genie" during the capture window:
uv run src/live_audio.py --duration 10Increase
--durationif you need more time before speaking.
Code¶
Available in this TrulyNatural SDK installation at ~/Sensory/TrulyNaturalSDK/7.8.0-pre.2/sample/python/src/live_audio.py
live_audio.py
"""Live microphone capture for the TrulyNatural SDK Python binding.
Opens the host's default audio input device via
``Stream.from_audio_device``, wires it to a phrase-spotter
session, and prints results as they arrive in real time. Capture
runs for ``--duration`` seconds (default 10).
This sample exercises the SDK's host-audio backend (ALSA on
Linux, Audio Queue Services on macOS, the Windows Multimedia
Extensions wave API on Windows). It needs a real default capture
device and is **not** part of the default acceptance test sweep
because CI runners typically have no microphone; the acceptance
test only verifies that the script imports and parses arguments
cleanly. To run it for real, plug in a microphone and execute::
uv run src/live_audio.py [--sdk-root PATH] [--duration SECS]
then say "voice genie" within the capture window. The opt-in env
toggle ``SNSR_RUN_LIVE_AUDIO=1`` flips the acceptance test from
help-only to a full live-audio run.
"""
from __future__ import annotations
import argparse
import sys
from pathlib import Path
import snsr
MODEL = "spot-voicegenie-enUS-6.5.1-m.snsr"
DEFAULT_DURATION_S = 10.0
SAMPLES_PER_SECOND = 16_000 # snsr default capture format
BYTES_PER_SAMPLE = 2 # 16-bit LPCM
CHUNK_BYTES = 480
def default_sdk_root() -> Path:
return Path(__file__).resolve().parents[3]
def parse_args(argv: list[str] | None = None) -> argparse.Namespace:
parser = argparse.ArgumentParser(description=__doc__.splitlines()[0])
parser.add_argument(
"--sdk-root",
type=Path,
default=default_sdk_root(),
help="TrulyNatural SDK install root (default: auto-detect)",
)
parser.add_argument(
"--duration",
type=float,
default=DEFAULT_DURATION_S,
help=f"capture duration in seconds (default: {DEFAULT_DURATION_S})",
)
return parser.parse_args(argv)
def run_live_audio(model_path: Path, duration_s: float) -> int:
"""Run the spotter on the default capture device for ``duration_s``."""
count = 0
def on_result(s: snsr.Session, _key: bytes) -> None:
nonlocal count
count += 1
text = s.get_string(snsr.RES_TEXT)
score = s.get_double(snsr.RES_SCORE)
print(f" spotted {text!r} (score {score:.4f})")
print(f"snsr {snsr.VERSION}")
print(f" model: {model_path}")
print(f" capturing for {duration_s:.1f}s; say 'voice genie'...")
print()
bytes_to_capture = int(duration_s * SAMPLES_PER_SECOND) * BYTES_PER_SAMPLE
captured = 0
with snsr.Session(str(model_path)) as s:
s.require(snsr.TASK_TYPE, snsr.PHRASESPOT)
s.set_handler(snsr.RESULT_EVENT, on_result)
with snsr.Stream.from_audio_device() as mic:
while captured < bytes_to_capture:
chunk = mic.read(CHUNK_BYTES)
if not chunk:
break
s.push(snsr.SOURCE_AUDIO_PCM, chunk)
captured += len(chunk)
s.stop()
print()
print(f"done: {count} result event(s) over {captured / (SAMPLES_PER_SECOND * BYTES_PER_SAMPLE):.1f}s of audio")
return count
def main(argv: list[str] | None = None) -> int:
args = parse_args(argv)
sdk_root: Path = args.sdk_root.resolve()
model_path = sdk_root / "model" / MODEL
if not model_path.is_file():
print(f"error: model not found: {model_path}", file=sys.stderr)
print(
f"hint: pass --sdk-root pointing at a TrulyNatural SDK install",
file=sys.stderr,
)
return 2
if args.duration <= 0:
print(f"error: --duration must be positive, got {args.duration}", file=sys.stderr)
return 2
run_live_audio(model_path, args.duration)
return 0
if __name__ == "__main__":
raise SystemExit(main())