Skip to content

stt_push.py stt

This example shows push-mode Speech-to-Text. The application owns the audio source, reads a WAV file in small chunks, and feeds each chunk to the recognizer with push.

STT support is a TrulyNatural-only feature. On builds that do not include STT, the sample prints "STT not supported" and exits successfully.

Instructions

  1. Set up the sample project environment:

    cd ~/Sensory/TrulyNaturalSDK/7.8.0-pre.2/sample/python
    uv venv
    uv sync
    
  2. Run the sample:

    uv run src/stt_push.py
    

    On STT-capable builds, the sample prints the final recognition result.

Code

Available in this TrulyNatural SDK installation at ~/Sensory/TrulyNaturalSDK/7.8.0-pre.2/sample/python/src/stt_push.py

stt_push.py

"""Push-mode Speech-To-Text for the TrulyNatural SDK Python binding.

Loads the automotive STT model and feeds a WAV file into the
session in 480-byte chunks via ``Session.push``, printing each
``RESULT_EVENT`` as it arrives. This is the streaming counterpart
to ``hello_world.py``'s pull-mode ``set_stream`` + ``run`` loop:
the application owns the audio source and decides when to feed
samples in, which is the right shape for live audio, network
streams, or any other producer that does not look like a file.

Modelled on ``tests/test_snsr.py::test_Session_push_spotter`` and
``::test_Session_run_stt_reset`` from the ``snsr`` binding test
suite.

STT support is a TrulyNatural-only feature. On builds that do not
include it (notably TrulyHandsfree), the script prints a clear
"STT not supported" line and exits 0 so it composes cleanly with
the SDK's acceptance test.

Usage::

    uv run src/stt_push.py [--sdk-root PATH]
"""

from __future__ import annotations

import argparse
import sys
from pathlib import Path

import snsr


MODEL = "stt-enUS-automotive-medium-2.3.15-pnc.snsr"
AUDIO = "voice-genie-set-cruise-control.wav"
CHUNK_BYTES = 480
CUSTOM_VOCAB = "voice genie, set cruise control"


def default_sdk_root() -> Path:
    return Path(__file__).resolve().parents[3]


def parse_args(argv: list[str] | None = None) -> argparse.Namespace:
    parser = argparse.ArgumentParser(description=__doc__.splitlines()[0])
    parser.add_argument(
        "--sdk-root",
        type=Path,
        default=default_sdk_root(),
        help="TrulyNatural SDK install root (default: auto-detect)",
    )
    return parser.parse_args(argv)


def stt_supported() -> bool:
    """Return True if the loaded ``snsr`` build supports STT.

    ``STT_SUPPORT`` is a session-level int setting; we open a
    transient session purely to query it and close it before
    loading the actual STT model.
    """
    with snsr.Session() as s:
        return bool(s.get_int(snsr.STT_SUPPORT))


def run_stt_push(model_path: Path, audio_path: Path) -> int:
    """Push ``audio_path`` into a fresh STT session, return event count."""
    count = 0

    def on_result(s: snsr.Session, _key: bytes) -> None:
        nonlocal count
        count += 1
        text = s.get_string(snsr.RES_TEXT)
        print(f"  result: {text!r}")

    print(f"snsr {snsr.VERSION}")
    print(f"  model: {model_path}")
    print(f"  audio: {audio_path}")
    print(f"  vocab: {CUSTOM_VOCAB!r}")
    print()

    with snsr.Session(str(model_path)) as s:
        s.set_string(snsr.CUSTOM_VOCAB, CUSTOM_VOCAB)
        s.set_handler(snsr.RESULT_EVENT, on_result)
        with snsr.Stream.from_audio_file(str(audio_path)) as audio:
            for chunk in iter(lambda: audio.read(CHUNK_BYTES), b""):
                s.push(snsr.SOURCE_AUDIO_PCM, chunk)
        s.stop()

    print()
    print(f"done: {count} result event(s)")
    return count


def main(argv: list[str] | None = None) -> int:
    args = parse_args(argv)
    sdk_root: Path = args.sdk_root.resolve()

    if not stt_supported():
        print(
            "STT not supported in this TrulyNatural build "
            "(snsr.STT_SUPPORT == 0); skipping."
        )
        return 0

    model_path = sdk_root / "model" / MODEL
    audio_path = sdk_root / "data" / "audio" / AUDIO

    for label, path in (("model", model_path), ("audio", audio_path)):
        if not path.is_file():
            print(f"error: {label} not found: {path}", file=sys.stderr)
            print(
                f"hint: pass --sdk-root pointing at a TrulyNatural SDK install",
                file=sys.stderr,
            )
            return 2

    run_stt_push(model_path, audio_path)
    return 0


if __name__ == "__main__":
    raise SystemExit(main())