PocketSphinx  5.0.0rc5
A small speech recognizer
Macros | Typedefs | Functions
endpointer.h File Reference

VAD-based endpointer for PocketSphinx. More...

#include <pocketsphinx/prim_type.h>
#include <pocketsphinx/export.h>
#include <pocketsphinx/vad.h>

Go to the source code of this file.

Macros

#define PS_ENDPOINTER_DEFAULT_WINDOW   0.3
 
#define PS_ENDPOINTER_DEFAULT_RATIO   0.9
 
#define ps_endpointer_frame_size(ep)   ps_vad_frame_size(ps_endpointer_vad(ep))
 
#define ps_endpointer_sample_rate(ep)   ps_vad_sample_rate(ps_endpointer_vad(ep))
 

Typedefs

typedef struct ps_endpointer_s ps_endpointer_t
 

Functions

POCKETSPHINX_EXPORT ps_endpointer_tps_endpointer_init (double window, double ratio, ps_vad_mode_t mode, int sample_rate, double frame_length)
 
POCKETSPHINX_EXPORT ps_endpointer_tps_endpointer_retain (ps_endpointer_t *ep)
 
POCKETSPHINX_EXPORT int ps_endpointer_free (ps_endpointer_t *ep)
 
POCKETSPHINX_EXPORT ps_vad_tps_endpointer_vad (ps_endpointer_t *ep)
 
const POCKETSPHINX_EXPORT int16 * ps_endpointer_process (ps_endpointer_t *ep, const int16 *frame)
 
const POCKETSPHINX_EXPORT int16 * ps_endpointer_end_stream (ps_endpointer_t *ep, const int16 *frame, size_t nsamp, size_t *out_nsamp)
 
POCKETSPHINX_EXPORT int ps_endpointer_in_speech (ps_endpointer_t *ep)
 
POCKETSPHINX_EXPORT double ps_endpointer_speech_start (ps_endpointer_t *ep)
 
POCKETSPHINX_EXPORT double ps_endpointer_speech_end (ps_endpointer_t *ep)
 

Detailed Description

VAD-based endpointer for PocketSphinx.

Macro Definition Documentation

◆ PS_ENDPOINTER_DEFAULT_RATIO

#define PS_ENDPOINTER_DEFAULT_RATIO   0.9

Default ratio of frames in window to trigger start/end decision.

◆ PS_ENDPOINTER_DEFAULT_WINDOW

#define PS_ENDPOINTER_DEFAULT_WINDOW   0.3

Default window in seconds of audio to use for speech start/end decision.

◆ ps_endpointer_frame_size

#define ps_endpointer_frame_size (   ep)    ps_vad_frame_size(ps_endpointer_vad(ep))

Get the frame size required by the endpointer.

◆ ps_endpointer_sample_rate

#define ps_endpointer_sample_rate (   ep)    ps_vad_sample_rate(ps_endpointer_vad(ep))

Get the sample rate required by the endpointer.

Function Documentation

◆ ps_endpointer_end_stream()

const POCKETSPHINX_EXPORT int16* ps_endpointer_end_stream ( ps_endpointer_t ep,
const int16 *  frame,
size_t  nsamp,
size_t *  out_nsamp 
)

Process remaining samples at end of stream.

Note that the endpointer is not thread-safe. You must call all endpointer functions from the same thread.

Parameters
epEndpointer.
frameFrame of data, must contain ps_endpointer_frame_size() samples or less.
nsampNumber of samples in frame.
out_nsampOutput, number of samples available.
Returns
Pointer to available samples, or NULL if none available.

◆ ps_endpointer_free()

POCKETSPHINX_EXPORT int ps_endpointer_free ( ps_endpointer_t ep)

Release a pointer to endpointer.

Parameters
epEndpointer
Returns
New reference count (0 if freed).

◆ ps_endpointer_in_speech()

POCKETSPHINX_EXPORT int ps_endpointer_in_speech ( ps_endpointer_t ep)

Get the current state (speech/not-speech) of the endpointer.

This function can be used to detect speech/non-speech transitions. If it returns 0, and a subsequent call to ps_endpointer_process() returns non-NULL, this indicates a transition to speech. Conversely, if ps_endpointer_process() returns non-NULL and a subsequent call to this function returns 0, this indicates a transition to non-speech.

Parameters
epEndpointer.
Returns
non-zero if in a speech segment after processing the last frame of data.

◆ ps_endpointer_init()

POCKETSPHINX_EXPORT ps_endpointer_t* ps_endpointer_init ( double  window,
double  ratio,
ps_vad_mode_t  mode,
int  sample_rate,
double  frame_length 
)

Initialize endpointing.

Parameters
windowSeconds of audio to use in speech start/end decision, or 0 to use the default (PS_ENDPOINTER_DEFAULT_WINDOW).
ratioRatio of frames needed to trigger start/end decision, or 0 for the default (PS_ENDPOINTER_DEFAULT_RATIO).
mode"Aggressiveness" of voice activity detection. Stricter values (see ps_vad_mode_t) are less likely to misclassify non-speech as speech.
sample_rateSampling rate of input, or 0 for default (which can be obtained with ps_vad_sample_rate()). Only 8000, 16000, 32000, 48000 are directly supported, others will use the closest supported rate (within reason). Note that this means that the actual frame length may not be exactly the one requested, so you must always use the one returned by ps_endpointer_frame_size() (in samples) or ps_endpointer_frame_length() (in seconds).
frame_lengthRequested frame length in seconds, or 0.0 for the default. Only 0.01, 0.02, 0.03 currently supported. Actual frame length may be different, you must always use ps_endpointer_frame_length() to obtain it.
Returns
Endpointer object or NULL on failure (invalid parameter for instance).

◆ ps_endpointer_process()

const POCKETSPHINX_EXPORT int16* ps_endpointer_process ( ps_endpointer_t ep,
const int16 *  frame 
)

Process a frame of audio, returning a frame if in a speech region.

Note that the endpointer is not thread-safe. You must call all endpointer functions from the same thread.

Parameters
epEndpointer.
frameFrame of data, must contain ps_endpointer_frame_size() samples.
Returns
NULL if no speech available, or pointer to a frame of ps_endpointer_frame_size() samples (no more and no less).

◆ ps_endpointer_retain()

POCKETSPHINX_EXPORT ps_endpointer_t* ps_endpointer_retain ( ps_endpointer_t ep)

Retain a pointer to endpointer

Parameters
epEndpointer.
Returns
Endpointer with incremented reference count.

◆ ps_endpointer_speech_end()

POCKETSPHINX_EXPORT double ps_endpointer_speech_end ( ps_endpointer_t ep)

Get the end time of the last speech segment

◆ ps_endpointer_speech_start()

POCKETSPHINX_EXPORT double ps_endpointer_speech_start ( ps_endpointer_t ep)

Get the start time of the last speech segment.

◆ ps_endpointer_vad()

POCKETSPHINX_EXPORT ps_vad_t* ps_endpointer_vad ( ps_endpointer_t ep)

Get the voice activity detector used by the endpointer.

Returns
VAD object. The endpointer retains ownership of this object, so you must use ps_vad_retain() if you wish to use it outside of the lifetime of the endpointer.