PocketSphinx
5.0.0
A small speech recognizer
|
Simple voice activity detection based endpointing. More...
#include <pocketsphinx/endpointer.h>
Public Member Functions | |
POCKETSPHINX_EXPORT ps_endpointer_t * | ps_endpointer_init (double window, double ratio, ps_vad_mode_t mode, int sample_rate, double frame_length) |
POCKETSPHINX_EXPORT ps_endpointer_t * | ps_endpointer_retain (ps_endpointer_t *ep) |
POCKETSPHINX_EXPORT int | ps_endpointer_free (ps_endpointer_t *ep) |
POCKETSPHINX_EXPORT ps_vad_t * | ps_endpointer_vad (ps_endpointer_t *ep) |
const POCKETSPHINX_EXPORT int16 * | ps_endpointer_process (ps_endpointer_t *ep, const int16 *frame) |
const POCKETSPHINX_EXPORT int16 * | ps_endpointer_end_stream (ps_endpointer_t *ep, const int16 *frame, size_t nsamp, size_t *out_nsamp) |
POCKETSPHINX_EXPORT int | ps_endpointer_in_speech (ps_endpointer_t *ep) |
POCKETSPHINX_EXPORT double | ps_endpointer_speech_start (ps_endpointer_t *ep) |
POCKETSPHINX_EXPORT double | ps_endpointer_speech_end (ps_endpointer_t *ep) |
Simple voice activity detection based endpointing.
POCKETSPHINX_EXPORT ps_endpointer_t * ps_endpointer_init | ( | double | window, |
double | ratio, | ||
ps_vad_mode_t | mode, | ||
int | sample_rate, | ||
double | frame_length | ||
) |
Initialize endpointing.
window | Seconds of audio to use in speech start/end decision, or 0 to use the default (PS_ENDPOINTER_DEFAULT_WINDOW). |
ratio | Ratio of frames needed to trigger start/end decision, or 0 for the default (PS_ENDPOINTER_DEFAULT_RATIO). |
mode | "Aggressiveness" of voice activity detection. Stricter values (see ps_vad_mode_t) are less likely to misclassify non-speech as speech. |
sample_rate | Sampling rate of input, or 0 for default (which can be obtained with ps_vad_sample_rate()). Only 8000, 16000, 32000, 48000 are directly supported, others will use the closest supported rate (within reason). Note that this means that the actual frame length may not be exactly the one requested, so you must always use the one returned by ps_endpointer_frame_size() (in samples) or ps_endpointer_frame_length() (in seconds). |
frame_length | Requested frame length in seconds, or 0.0 for the default. Only 0.01, 0.02, 0.03 currently supported. Actual frame length may be different, you must always use ps_endpointer_frame_length() to obtain it. |
POCKETSPHINX_EXPORT ps_endpointer_t * ps_endpointer_retain | ( | ps_endpointer_t * | ep | ) |
Retain a pointer to endpointer
ep | Endpointer. |
POCKETSPHINX_EXPORT int ps_endpointer_free | ( | ps_endpointer_t * | ep | ) |
Release a pointer to endpointer.
ep | Endpointer |
POCKETSPHINX_EXPORT ps_vad_t * ps_endpointer_vad | ( | ps_endpointer_t * | ep | ) |
Get the voice activity detector used by the endpointer.
const POCKETSPHINX_EXPORT int16 * ps_endpointer_process | ( | ps_endpointer_t * | ep, |
const int16 * | frame | ||
) |
Process a frame of audio, returning a frame if in a speech region.
Note that the endpointer is not thread-safe. You must call all endpointer functions from the same thread.
ep | Endpointer. |
frame | Frame of data, must contain ps_endpointer_frame_size() samples. |
const POCKETSPHINX_EXPORT int16 * ps_endpointer_end_stream | ( | ps_endpointer_t * | ep, |
const int16 * | frame, | ||
size_t | nsamp, | ||
size_t * | out_nsamp | ||
) |
Process remaining samples at end of stream.
Note that the endpointer is not thread-safe. You must call all endpointer functions from the same thread.
ep | Endpointer. |
frame | Frame of data, must contain ps_endpointer_frame_size() samples or less. |
nsamp | Number of samples in frame. |
out_nsamp | Output, number of samples available. |
POCKETSPHINX_EXPORT int ps_endpointer_in_speech | ( | ps_endpointer_t * | ep | ) |
Get the current state (speech/not-speech) of the endpointer.
This function can be used to detect speech/non-speech transitions. If it returns 0, and a subsequent call to ps_endpointer_process() returns non-NULL, this indicates a transition to speech. Conversely, if ps_endpointer_process() returns non-NULL and a subsequent call to this function returns 0, this indicates a transition to non-speech.
ep | Endpointer. |
POCKETSPHINX_EXPORT double ps_endpointer_speech_start | ( | ps_endpointer_t * | ep | ) |
Get the start time of the last speech segment.
POCKETSPHINX_EXPORT double ps_endpointer_speech_end | ( | ps_endpointer_t * | ep | ) |
Get the end time of the last speech segment