50 #include <sphinxbase/cmd_ln.h>
51 #include <sphinxbase/logmath.h>
52 #include <sphinxbase/fe.h>
53 #include <sphinxbase/feat.h>
54 #include <sphinxbase/bitvec.h>
55 #include <sphinxbase/err.h>
56 #include <sphinxbase/prim_type.h>
77 #define SENSCR_DUMMY 0x7fff
103 uint8 *senone_active,
104 int32 n_senone_active,
118 #define ps_mgau_base(mg) ((ps_mgau_t *)(mg))
119 #define ps_mgau_frame_eval(mg,senscr,senone_active,n_senone_active,feat,frame,compallsen) \
120 (*ps_mgau_base(mg)->vt->frame_eval) \
121 (mg, senscr, senone_active, n_senone_active, feat, frame, compallsen)
122 #define ps_mgau_transform(mg, mllr) \
123 (*ps_mgau_base(mg)->vt->transform)(mg, mllr)
124 #define ps_mgau_free(mg) \
125 (*ps_mgau_base(mg)->vt->free)(mg)
331 int16
const **inout_raw,
332 size_t *inout_n_samps,
407 int *inout_frame_idx);
418 int16
const *senscr, FILE *
senfh);
439 #define acmod_activate_sen(acmod, sen) bitvec_set((acmod)->senone_active_vec, sen)
FILE * insenfh
Input senone score file.
void acmod_get_rawdata(acmod_t *acmod, int16 **buffer, int32 *size)
Retrieves the raw data collected during utterance decoding.
uint8 grow_feat
Whether to grow feat_buf.
ps_mgau_t * mgau
Model parameters.
int acmod_read_scores(acmod_t *acmod)
Read one frame of scores from senone score dump file.
uint8 * senone_active
Array of deltas to active GMMs.
long * framepos
File positions of recent frames in senone file.
acmod_state_e
States in utterance processing.
Utterance started, no data yet.
int acmod_set_insenfh(acmod_t *acmod, FILE *senfh)
Set up a senone score dump file for input.
int32 acmod_stream_offset(acmod_t *acmod)
Get the offset of the utterance start of the current stream, helpful for stream-wide timing...
int acmod_rewind(acmod_t *acmod)
Rewind the current utterance, allowing it to be rescored.
int16 * senone_scores
GMM scores for current frame.
ps_mllr_t * acmod_update_mllr(acmod_t *acmod, ps_mllr_t *mllr)
Adapt acoustic model using a linear transform.
int acmod_process_cep(acmod_t *acmod, mfcc_t ***inout_cep, int *inout_n_frames, int full_utt)
Feed acoustic feature data into the acoustic model for scoring.
int n_senone_active
Number of active GMMs.
An individual HMM among the HMM search space.
void acmod_set_rawdata_size(acmod_t *acmod, int32 size)
Sets the limit of the raw audio data to store.
void acmod_start_stream(acmod_t *acmod)
Reset the current stream.
logmath_t * lmath
Log-math computation.
fe_t * fe
Acoustic feature computation.
frame_idx_t n_mfc_frame
Number of frames active in mfc_buf.
void acmod_activate_hmm(acmod_t *acmod, hmm_t *hmm)
Activate senones associated with an HMM.
float32 *** h
Diagonal transformation of variances.
Utterance ended, still buffering.
int32 * cb2mllr
Mapping from codebooks to transformations.
int * veclen
Length of input vectors for each stream.
Binary format model definition files, with support for heterogeneous topologies and variable-size N-p...
float32 *** b
Bias part of mean transformations.
FILE * rawfh
File for writing raw audio data.
mfcc_t ** mfc_buf
Temporary buffer of acoustic features.
frame_idx_t utt_start_frame
Index of the utterance start in the stream, all timings are relative to that.
int n_class
Number of MLLR classes.
Implementation of HMM base structure.
frame_idx_t n_feat_alloc
Number of frames allocated in feat_buf.
mfcc_t *** feat_buf
Temporary buffer of dynamic features.
int acmod_set_senfh(acmod_t *acmod, FILE *logfh)
Start logging senone scores to a filehandle.
int acmod_set_rawfh(acmod_t *acmod, FILE *logfh)
Start logging raw audio to a filehandle.
ps_mllr_t * mllr
Speaker transformation.
uint8 compallsen
Compute all senones?
int acmod_process_feat(acmod_t *acmod, mfcc_t **feat)
Feed dynamic feature data into the acoustic model for scoring.
int acmod_write_senfh_header(acmod_t *acmod, FILE *logfh)
Write senone dump file header.
int n_feat
Number of feature streams.
cmd_ln_t * config
Configuration.
frame_idx_t output_frame
Index of next frame of dynamic features.
int acmod_write_scores(acmod_t *acmod, int n_active, uint8 const *active, int16 const *senscr, FILE *senfh)
Write a frame of senone scores to a dump file.
tmat_t * tmat
Transition matrices.
int32 acmod_flags2list(acmod_t *acmod)
Build active list from.
int acmod_end_utt(acmod_t *acmod)
Mark the end of an utterance.
int acmod_advance(acmod_t *acmod)
Advance the frame index.
enum acmod_state_e acmod_state_t
States in utterance processing.
Model-space linear transforms for speaker adaptation.
uint8 state
State of utterance processing.
int acmod_set_mfcfh(acmod_t *acmod, FILE *logfh)
Start logging MFCCs to a filehandle.
void acmod_free(acmod_t *acmod)
Finalize an acoustic model.
void acmod_clear_active(acmod_t *acmod)
Clear set of active senones.
Feature space linear transform structure.
frame_idx_t feat_outidx
Start of active frames in feat_buf.
float32 **** A
Rotation part of mean transformations.
mfcc_t ** acmod_get_frame(acmod_t *acmod, int *inout_frame_idx)
Get a frame of dynamic feature data.
Transition matrix data structure.
feat_t * fcb
Dynamic feature computation.
int log_zero
Zero log-probability value.
FILE * senfh
File for writing senone score data.
frame_idx_t mfc_outidx
Start of active frames in mfc_buf.
Transition matrix data structure.
frame_idx_t n_mfc_alloc
Number of frames allocated in mfc_buf.
int32 frame_idx_t
Type for frame index values.
int acmod_process_raw(acmod_t *acmod, int16 const **inout_raw, size_t *inout_n_samps, int full_utt)
TODO: Set queue length for utterance processing.
uint8 insen_swap
Whether to swap input senone score.
int acmod_start_utt(acmod_t *acmod)
Mark the start of an utterance.
int senscr_frame
Frame index for senone_scores.
ps_mgaufuncs_t * vt
vtable of mgau functions.
bin_mdef_t * mdef
Model definition.
int acmod_best_score(acmod_t *acmod, int *out_best_senid)
Get best score and senone index for current frame.
int refcnt
Reference count.
acmod_t * acmod_init(cmd_ln_t *config, logmath_t *lmath, fe_t *fe, feat_t *fcb)
Initialize an acoustic model.
frame_idx_t n_feat_frame
Number of frames active in feat_buf.
FILE * mfcfh
File for writing acoustic feature data.
Acoustic model structure.
int acmod_set_grow(acmod_t *acmod, int grow_feat)
Set memory allocation policy for utterance processing.
int16 const * acmod_score(acmod_t *acmod, int *inout_frame_idx)
Score one frame of data.
int frame_idx
frame counter.
glist_t strings
Temporary acoustic model filenames.
bitvec_t * senone_active_vec
Active GMMs in current frame.