PocketSphinx  5prealpha
acmod.c File Reference

Acoustic model structures for PocketSphinx. More...

#include <assert.h>
#include <string.h>
#include <math.h>
#include <sphinxbase/prim_type.h>
#include <sphinxbase/err.h>
#include <sphinxbase/cmd_ln.h>
#include <sphinxbase/strfuncs.h>
#include <sphinxbase/byteorder.h>
#include <sphinxbase/feat.h>
#include <sphinxbase/bio.h>
#include "cmdln_macro.h"
#include "acmod.h"
#include "s2_semi_mgau.h"
#include "ptm_mgau.h"
#include "ms_mgau.h"

Go to the source code of this file.

Macros

#define MPX_BITVEC_SET(a, h, i)
 
#define NONMPX_BITVEC_SET(a, h, i)
 

Functions

int acmod_fe_mismatch (acmod_t *acmod, fe_t *fe)
 
int acmod_feat_mismatch (acmod_t *acmod, feat_t *fcb)
 
acmod_tacmod_init (cmd_ln_t *config, logmath_t *lmath, fe_t *fe, feat_t *fcb)
 Initialize an acoustic model. More...
 
void acmod_free (acmod_t *acmod)
 Finalize an acoustic model.
 
ps_mllr_tacmod_update_mllr (acmod_t *acmod, ps_mllr_t *mllr)
 Adapt acoustic model using a linear transform. More...
 
int acmod_write_senfh_header (acmod_t *acmod, FILE *logfh)
 Write senone dump file header.
 
int acmod_set_senfh (acmod_t *acmod, FILE *logfh)
 Start logging senone scores to a filehandle. More...
 
int acmod_set_mfcfh (acmod_t *acmod, FILE *logfh)
 Start logging MFCCs to a filehandle. More...
 
int acmod_set_rawfh (acmod_t *acmod, FILE *logfh)
 Start logging raw audio to a filehandle. More...
 
void acmod_grow_feat_buf (acmod_t *acmod, int nfr)
 
int acmod_set_grow (acmod_t *acmod, int grow_feat)
 Set memory allocation policy for utterance processing. More...
 
int acmod_start_utt (acmod_t *acmod)
 Mark the start of an utterance.
 
int acmod_end_utt (acmod_t *acmod)
 Mark the end of an utterance.
 
int acmod_process_raw (acmod_t *acmod, int16 const **inout_raw, size_t *inout_n_samps, int full_utt)
 TODO: Set queue length for utterance processing. More...
 
int acmod_process_cep (acmod_t *acmod, mfcc_t ***inout_cep, int *inout_n_frames, int full_utt)
 Feed acoustic feature data into the acoustic model for scoring. More...
 
int acmod_process_feat (acmod_t *acmod, mfcc_t **feat)
 Feed dynamic feature data into the acoustic model for scoring. More...
 
int acmod_set_insenfh (acmod_t *acmod, FILE *senfh)
 Set up a senone score dump file for input. More...
 
int acmod_rewind (acmod_t *acmod)
 Rewind the current utterance, allowing it to be rescored. More...
 
int acmod_advance (acmod_t *acmod)
 Advance the frame index. More...
 
int acmod_write_scores (acmod_t *acmod, int n_active, uint8 const *active, int16 const *senscr, FILE *senfh)
 Write a frame of senone scores to a dump file.
 
int acmod_read_scores (acmod_t *acmod)
 Read one frame of scores from senone score dump file. More...
 
mfcc_t ** acmod_get_frame (acmod_t *acmod, int *inout_frame_idx)
 Get a frame of dynamic feature data. More...
 
int16 const * acmod_score (acmod_t *acmod, int *inout_frame_idx)
 Score one frame of data. More...
 
int acmod_best_score (acmod_t *acmod, int *out_best_senid)
 Get best score and senone index for current frame.
 
void acmod_clear_active (acmod_t *acmod)
 Clear set of active senones.
 
void acmod_activate_hmm (acmod_t *acmod, hmm_t *hmm)
 Activate senones associated with an HMM.
 
int32 acmod_flags2list (acmod_t *acmod)
 Build active list from.
 
int32 acmod_stream_offset (acmod_t *acmod)
 Get the offset of the utterance start of the current stream, helpful for stream-wide timing.
 
void acmod_start_stream (acmod_t *acmod)
 Reset the current stream.
 
void acmod_set_rawdata_size (acmod_t *acmod, int32 size)
 Sets the limit of the raw audio data to store.
 
void acmod_get_rawdata (acmod_t *acmod, int16 **buffer, int32 *size)
 Retrieves the raw data collected during utterance decoding.
 

Detailed Description

Acoustic model structures for PocketSphinx.

Author
David Huggins-Daines dhugg.nosp@m.ins@.nosp@m.cs.cm.nosp@m.u.ed.nosp@m.u

Definition in file acmod.c.

Macro Definition Documentation

#define MPX_BITVEC_SET (   a,
  h,
 
)
Value:
if (hmm_mpx_ssid(h,i) != BAD_SSID) \
bitvec_set((a)->senone_active_vec, hmm_mpx_senid(h,i))
#define BAD_SSID
Invalid senone sequence ID (limited to 16 bits for PocketSphinx).
Definition: bin_mdef.h:94

Definition at line 1205 of file acmod.c.

#define NONMPX_BITVEC_SET (   a,
  h,
 
)
Value:
bitvec_set((a)->senone_active_vec, \
hmm_nonmpx_senid(h,i))

Definition at line 1208 of file acmod.c.

Function Documentation

int acmod_advance ( acmod_t acmod)

Advance the frame index.

This function moves to the next frame of input data. Subsequent calls to acmod_score() will return scores for that frame, until the next call to acmod_advance().

Returns
New frame index.

Definition at line 899 of file acmod.c.

References acmod_s::feat_outidx, ps_mgau_s::frame_idx, acmod_s::mgau, acmod_s::n_feat_alloc, acmod_s::n_feat_frame, and acmod_s::output_frame.

mfcc_t** acmod_get_frame ( acmod_t acmod,
int *  inout_frame_idx 
)

Get a frame of dynamic feature data.

Parameters
inout_frame_idxInput: frame index to get, or NULL to obtain features for the most recent frame. Output: frame index corresponding to this set of features.
Returns
Feature array, or NULL if requested frame is not available.

Definition at line 1088 of file acmod.c.

References acmod_s::feat_buf.

acmod_t* acmod_init ( cmd_ln_t *  config,
logmath_t *  lmath,
fe_t *  fe,
feat_t *  fcb 
)

Initialize an acoustic model.

Parameters
configa command-line object containing parameters. This pointer is not retained by this object.
lmathglobal log-math parameters.
fea previously-initialized acoustic feature module to use, or NULL to create one automatically. If this is supplied and its parameters do not match those in the acoustic model, this function will fail. This pointer is not retained.
fea previously-initialized dynamic feature module to use, or NULL to create one automatically. If this is supplied and its parameters do not match those in the acoustic model, this function will fail. This pointer is not retained.
Returns
a newly initialized acmod_t, or NULL on failure.

Definition at line 228 of file acmod.c.

References acmod_free(), ACMOD_IDLE, acmod_s::compallsen, acmod_s::config, acmod_s::fcb, acmod_s::fe, acmod_s::feat_buf, acmod_s::framepos, acmod_s::lmath, acmod_s::log_zero, acmod_s::mdef, acmod_s::mfc_buf, acmod_s::n_feat_alloc, acmod_s::n_mfc_alloc, acmod_s::senone_active, acmod_s::senone_active_vec, acmod_s::senone_scores, acmod_s::state, and acmod_s::utt_start_frame.

Referenced by ps_reinit().

int acmod_process_cep ( acmod_t acmod,
mfcc_t ***  inout_cep,
int *  inout_n_frames,
int  full_utt 
)

Feed acoustic feature data into the acoustic model for scoring.

Parameters
inout_cepIn: Pointer to buffer of features Out: Pointer to next frame to be read
inout_n_framesIn: Number of frames available Out: Number of frames remaining
full_uttIf non-zero, this block represents a full utterance and should be processed as such.
Returns
Number of frames of data processed.

Definition at line 699 of file acmod.c.

References ACMOD_ENDED, ACMOD_PROCESSING, ACMOD_STARTED, acmod_s::fcb, acmod_s::feat_buf, acmod_s::feat_outidx, acmod_s::grow_feat, acmod_s::mfcfh, acmod_s::n_feat_alloc, acmod_s::n_feat_frame, and acmod_s::state.

int acmod_process_feat ( acmod_t acmod,
mfcc_t **  feat 
)

Feed dynamic feature data into the acoustic model for scoring.

Unlike acmod_process_raw() and acmod_process_cep(), this function accepts a single frame at a time. This is because there is no need to do buffering when using dynamic features as input. However, if the dynamic feature buffer is full, this function will fail, so you should either always check the return value, or always pair a call to it with a call to acmod_score().

Parameters
featPointer to one frame of dynamic features.
Returns
Number of frames processed (either 0 or 1).

Definition at line 797 of file acmod.c.

References acmod_s::fcb, acmod_s::feat_buf, acmod_s::feat_outidx, acmod_s::grow_feat, acmod_s::n_feat_alloc, and acmod_s::n_feat_frame.

int acmod_process_raw ( acmod_t acmod,
int16 const **  inout_raw,
size_t *  inout_n_samps,
int  full_utt 
)

TODO: Set queue length for utterance processing.

This function allows multiple concurrent passes of search to operate on different parts of the utterance. Feed raw audio data to the acoustic model for scoring.

Parameters
inout_rawIn: Pointer to buffer of raw samples Out: Pointer to next sample to be read
inout_n_sampsIn: Number of samples available Out: Number of samples remaining
full_uttIf non-zero, this block represents a full utterance and should be processed as such.
Returns
Number of frames of data processed.

Definition at line 607 of file acmod.c.

References acmod_s::fe, acmod_s::mfc_buf, acmod_s::mfc_outidx, acmod_s::n_mfc_alloc, acmod_s::n_mfc_frame, acmod_s::rawfh, and acmod_s::utt_start_frame.

Referenced by ps_process_raw().

int acmod_read_scores ( acmod_t acmod)

Read one frame of scores from senone score dump file.

Returns
Number of frames read or <0 on error.

Definition at line 1012 of file acmod.c.

References acmod_s::feat_outidx, acmod_s::framepos, acmod_s::grow_feat, acmod_s::insenfh, acmod_s::n_feat_alloc, acmod_s::n_feat_frame, acmod_s::n_senone_active, acmod_s::output_frame, and acmod_s::senscr_frame.

Referenced by ps_decode_senscr().

int acmod_rewind ( acmod_t acmod)

Rewind the current utterance, allowing it to be rescored.

After calling this function, the internal frame index is reset, and acmod_score() will return scores starting at the first frame of the current utterance. Currently, acmod_set_grow() must have been called to enable growing the feature buffer in order for this to work. In the future, senone scores may be cached instead.

Returns
0 for success, <0 for failure (if the utterance can't be rewound due to no feature or score data available)

Definition at line 877 of file acmod.c.

References acmod_s::feat_outidx, ps_mgau_s::frame_idx, acmod_s::mgau, acmod_s::n_feat_alloc, acmod_s::n_feat_frame, acmod_s::output_frame, and acmod_s::senscr_frame.

int16 const* acmod_score ( acmod_t acmod,
int *  inout_frame_idx 
)

Score one frame of data.

Parameters
inout_frame_idxInput: frame index to score, or NULL to obtain scores for the most recent frame. Output: frame index corresponding to this set of scores.
Returns
Array of senone scores for this frame, or NULL if no frame is available for scoring (such as if a frame index is requested that is not yet or no longer available). The data pointed to persists only until the next call to acmod_score() or acmod_advance().

Definition at line 1106 of file acmod.c.

References acmod_flags2list(), acmod_write_scores(), acmod_s::compallsen, acmod_s::feat_buf, acmod_s::framepos, acmod_s::insenfh, acmod_s::mgau, acmod_s::n_senone_active, acmod_s::senfh, acmod_s::senone_active, acmod_s::senone_scores, and acmod_s::senscr_frame.

Referenced by ngram_fwdflat_search(), and ngram_fwdtree_search().

int acmod_set_grow ( acmod_t acmod,
int  grow_feat 
)

Set memory allocation policy for utterance processing.

Parameters
grow_featIf non-zero, the internal dynamic feature buffer will expand as necessary to encompass any amount of data fed to the model.
Returns
previous allocation policy.

Definition at line 410 of file acmod.c.

References acmod_s::grow_feat, and acmod_s::n_feat_alloc.

Referenced by ngram_search_init(), and ps_process_raw().

int acmod_set_insenfh ( acmod_t acmod,
FILE *  insenfh 
)

Set up a senone score dump file for input.

Parameters
insenfhFile handle of dump file
Returns
0 for success, <0 for failure

Definition at line 864 of file acmod.c.

References acmod_s::compallsen, acmod_s::config, acmod_s::insenfh, and acmod_s::n_feat_frame.

Referenced by ps_decode_senscr().

int acmod_set_mfcfh ( acmod_t acmod,
FILE *  logfh 
)

Start logging MFCCs to a filehandle.

Parameters
acmodAcoustic model object.
logfhFilehandle to log to.
Returns
0 for success, <0 on error.

Definition at line 375 of file acmod.c.

References acmod_s::mfcfh.

Referenced by ps_start_utt().

int acmod_set_rawfh ( acmod_t acmod,
FILE *  logfh 
)

Start logging raw audio to a filehandle.

Parameters
acmodAcoustic model object.
logfhFilehandle to log to.
Returns
0 for success, <0 on error.

Definition at line 387 of file acmod.c.

References acmod_s::rawfh.

Referenced by ps_start_utt().

int acmod_set_senfh ( acmod_t acmod,
FILE *  senfh 
)

Start logging senone scores to a filehandle.

Parameters
acmodAcoustic model object.
logfhFilehandle to log to.
Returns
0 for success, <0 on error.

Definition at line 364 of file acmod.c.

References acmod_write_senfh_header(), and acmod_s::senfh.

Referenced by ps_start_utt().

ps_mllr_t* acmod_update_mllr ( acmod_t acmod,
ps_mllr_t mllr 
)

Adapt acoustic model using a linear transform.

Parameters
mllrThe new transform to use, or NULL to update the existing transform. The decoder retains ownership of this pointer, so you should not attempt to free it manually. Use ps_mllr_retain() if you wish to reuse it elsewhere.
Returns
The updated transform object for this decoder, or NULL on failure.

Definition at line 339 of file acmod.c.

References acmod_s::mgau, acmod_s::mllr, and ps_mllr_free().

Referenced by ps_update_mllr().