Skip to content

enrollUDT.java

This example shows how to enroll a user-defined wake word (UDT, trigger, key word spotter).

Instructions

To run this example, choose a wake word phrase, open a terminal window and enter the commands after the % prompt below (on Windows, replace ./gradlew with gradlew.bat). Speak when prompted.

% cd ~/Sensory/TrulyNaturalSDK/7.6.1/sample/java/enroll-udt/
% ./gradlew -q --console=plain enroll

Please say your enrollment phrase (1/4)
Recording:   3.80 s
Recording passes preliminary tests.

Please say your enrollment phrase (2/4)
Recording:   3.60 s
Recording passes preliminary tests.

Please say your enrollment phrase (3/4) with context,
 for example: "<phrase> will it rain tomorrow?"
Recording:   5.01 s
Recording passes preliminary tests.

Please say your enrollment phrase (4/4) with context,
 for example: "<phrase> will it rain tomorrow?"
Recording:   4.31 s
Recording passes preliminary tests.
Adapting: 100% done.
Enrollment context saved to ~/Sensory/TrulyNaturalSDK/7.6.1/sample/java/enroll-udt/build/model/enrollment-context.snsr
Enrolled model saved to ~/Sensory/TrulyNaturalSDK/7.6.1/sample/java/enroll-udt/build/model/enrolled-sv.snsr
Done!

You can make additional enrollments by specifying a unique phrase tag on the command line. New enrollments replace previous ones that used the same tag.

% ./gradlew -q --console=plain enroll -Ptag=second-phrase

Use the eval target to test the wake word enrollment(s) ( evalUDT.java). Stop the process with ^C when you're done.

% ./gradlew -q --console=plain eval
Say your enrolled phrase.
#00 "custom-phrase", score = 0.817
  [12795 ms, 13875 ms] custom-phrase

Recording:  16.91
^C

To start over, remove the existing enrollments:

% ./gradlew -q --console=plain clean

Code

Available in this TrulyNatural SDK installation at ~/Sensory/TrulyNaturalSDK/7.6.1/sample/java/enroll-udt/src/main/java/com/sensory/speech/snsr/demo/enroll-udt/enrollUDT.java

enrollUDT.java

/* Sensory Confidential
 * Copyright (C)2016-2025 Sensory, Inc. https://sensory.com/
 *
 * Command-line User-Defined Trigger enrollment.
 *------------------------------------------------------------------------------
 */

import java.io.Console;
import java.io.IOException;
import com.sensory.speech.snsr.Snsr;
import com.sensory.speech.snsr.SnsrDataFormat;
import com.sensory.speech.snsr.SnsrRC;
import com.sensory.speech.snsr.SnsrSession;
import com.sensory.speech.snsr.SnsrStream;

import enroll.BuildConfig;

public class enrollUDT {
  public static void main(String argv[]) {
    final int SAMPLE_RATE = 16000;
    final String SpeakNow = "\nPlease say your enrollment phrase";
    final String EnrollmentContext = BuildConfig.MODEL_DIR + "/enrollment-context.snsr";
    String userTag = "custom-phrase";

    if (argv.length == 1)
      userTag = argv[0];
    else if (argv.length != 0) {
      System.out.println("usage: ./gradlew enroll [-Ptag=user-or-phrase-tag]");
      System.exit(7);
    }

    // Live audio stream handle.
    SnsrStream audio = SnsrStream.fromAudioDevice();

    // Primary TrulyHandsfree session handle.
    SnsrSession s = new SnsrSession();
    try {
      s.load(BuildConfig.UDT_MODEL).require(Snsr.TASK_TYPE, Snsr.ENROLL);
    } catch (IOException e) {
      e.printStackTrace();
      System.exit(3);
    }

    try {
      s.load(EnrollmentContext);
      try {
        s.setString(Snsr.DELETE_USER, userTag);
      } catch (Exception e) {
      }
      System.out.println("Loaded enrollments from " + EnrollmentContext);
      s.forEach(Snsr.USER_LIST, (ses, key) -> {
        System.out.println("User " + ses.getString(Snsr.USER)
            + " has " + ses.getInt(Snsr.RES_ENROLLMENT_COUNT)
            + " enrollments.");
        return SnsrRC.OK;
      });
    } catch (IOException e) {
      // ignore
    }

    s.setStream(Snsr.SOURCE_AUDIO_PCM, audio)
        .setString(Snsr.USER, userTag)
        .setHandler(Snsr.FAIL_EVENT, (ses, key) -> {
          System.out.println("This enrollment recording is not usable.");
          System.out.println(" Reason: " + ses.getString(Snsr.RES_REASON));
          System.out.println("    Fix: " + ses.getString(Snsr.RES_GUIDANCE));
          return SnsrRC.OK;
        })
        .setHandler(Snsr.PASS_EVENT, (ses, key) -> {
          System.out.println("Recording passes preliminary tests.");
          return SnsrRC.OK;
        })
        .setHandler(Snsr.PROG_EVENT, (ses, key) -> {
          double p = ses.getDouble(Snsr.RES_PERCENT_DONE);
          System.out.print(String.format("\rAdapting: %3.0f%% done.    ", p));
          if (p >= 100)
            System.out.println("");
          return SnsrRC.OK;
        })
        .setHandler(Snsr.PAUSE_EVENT, (ses, key) -> {
          // Pause recording while processing.
          System.out.println("");
          audio.close();
          return SnsrRC.OK;
        })
        .setHandler(Snsr.RESUME_EVENT, (ses, key) -> {
          try {
            // Restart recording.
            audio.open();
          } catch (Exception e) {
            e.printStackTrace();
          }
          String prompt = SpeakNow + " ("
              + (ses.getInt(Snsr.RES_ENROLLMENT_COUNT) + 1) + "/"
              + ses.getInt(Snsr.ENROLLMENT_TARGET) + ")";
          if (ses.getInt(Snsr.ADD_CONTEXT) != 0) {
            prompt += " with context,\n for example: " +
                "\"<phrase> will it rain tomorrow?\"";
          }
          System.out.println(prompt);
          return SnsrRC.OK;
        })
        .setHandler(Snsr.DONE_EVENT, (ses, key) -> {
          SnsrStream out = SnsrStream.fromFileName(BuildConfig.ENROLLED_MODEL, "w");
          try {
            out.copy(ses.getStream(Snsr.MODEL_STREAM));
            System.out.println("Enrolled model saved to "
                + BuildConfig.ENROLLED_MODEL);
          } catch (Exception e) {
            e.printStackTrace();
          }
          out.close();
          System.out.println("Done!");
          return SnsrRC.STOP;
        })
        // Optional: save enrollment context
        // Use Snsr.ENROLLED_EVENT to save the
        // unadapted enrollment context instead.
        .setHandler(Snsr.ADAPTED_EVENT, (ses, key) -> {
          ses.save(SnsrDataFormat.RUNTIME, EnrollmentContext);
          System.out.println("Enrollment context saved to " + EnrollmentContext);
          return SnsrRC.OK;
        })
        // Show audio recording duration
        .setHandler(Snsr.SAMPLES_EVENT, (ses, key) -> {
          double count = ses.getDouble(Snsr.RES_SAMPLES);
          System.out.print(String.format("\rRecording: %6.2f s          ",
              count / SAMPLE_RATE));
          return SnsrRC.OK;
        });

    try {
      s.run();
      // Optional but good practice. finalize() will (eventually) release.
      s.release();
      audio.release();
    } catch (IOException e) {
      e.printStackTrace();
    }
  }
}