PocketSphinx  5prealpha
acmod.h
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 2008 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
43 #ifndef __ACMOD_H__
44 #define __ACMOD_H__
45 
46 /* System headers. */
47 #include <stdio.h>
48 
49 /* SphinxBase headers. */
50 #include <sphinxbase/cmd_ln.h>
51 #include <sphinxbase/logmath.h>
52 #include <sphinxbase/fe.h>
53 #include <sphinxbase/feat.h>
54 #include <sphinxbase/bitvec.h>
55 #include <sphinxbase/err.h>
56 #include <sphinxbase/prim_type.h>
57 
58 /* Local headers. */
59 #include "ps_mllr.h"
60 #include "bin_mdef.h"
61 #include "tmat.h"
62 #include "hmm.h"
63 
67 typedef enum acmod_state_e {
73 
77 #define SENSCR_DUMMY 0x7fff
78 
82 struct ps_mllr_s {
83  int refcnt;
84  int n_class;
85  int n_feat;
86  int *veclen;
87  float32 ****A;
88  float32 ***b;
89  float32 ***h;
90  int32 *cb2mllr;
91 };
92 
96 typedef struct ps_mgau_s ps_mgau_t;
97 
98 typedef struct ps_mgaufuncs_s {
99  char const *name;
100 
101  int (*frame_eval)(ps_mgau_t *mgau,
102  int16 *senscr,
103  uint8 *senone_active,
104  int32 n_senone_active,
105  mfcc_t ** feat,
106  int32 frame,
107  int32 compallsen);
108  int (*transform)(ps_mgau_t *mgau,
109  ps_mllr_t *mllr);
110  void (*free)(ps_mgau_t *mgau);
111 } ps_mgaufuncs_t;
112 
113 struct ps_mgau_s {
115  int frame_idx;
116 };
117 
118 #define ps_mgau_base(mg) ((ps_mgau_t *)(mg))
119 #define ps_mgau_frame_eval(mg,senscr,senone_active,n_senone_active,feat,frame,compallsen) \
120  (*ps_mgau_base(mg)->vt->frame_eval) \
121  (mg, senscr, senone_active, n_senone_active, feat, frame, compallsen)
122 #define ps_mgau_transform(mg, mllr) \
123  (*ps_mgau_base(mg)->vt->transform)(mg, mllr)
124 #define ps_mgau_free(mg) \
125  (*ps_mgau_base(mg)->vt->free)(mg)
126 
148 struct acmod_s {
149  /* Global objects, not retained. */
150  cmd_ln_t *config;
151  logmath_t *lmath;
152  glist_t strings;
154  /* Feature computation: */
155  fe_t *fe;
156  feat_t *fcb;
158  /* Model parameters: */
164  /* Senone scoring: */
165  int16 *senone_scores;
166  bitvec_t *senone_active_vec;
167  uint8 *senone_active;
170  int log_zero;
172  /* Utterance processing: */
173  mfcc_t **mfc_buf;
174  mfcc_t ***feat_buf;
175  FILE *rawfh;
176  FILE *mfcfh;
177  FILE *senfh;
178  FILE *insenfh;
179  long *framepos;
181  /* Rawdata collected during decoding */
182  int16 *rawdata;
183  int32 rawdata_size;
184  int32 rawdata_pos;
185 
186  /* A whole bunch of flags and counters: */
187  uint8 state;
188  uint8 compallsen;
189  uint8 grow_feat;
190  uint8 insen_swap;
201 };
202 typedef struct acmod_s acmod_t;
203 
220 acmod_t *acmod_init(cmd_ln_t *config, logmath_t *lmath, fe_t *fe, feat_t *fcb);
221 
234 
242 int acmod_set_senfh(acmod_t *acmod, FILE *senfh);
243 
251 int acmod_set_mfcfh(acmod_t *acmod, FILE *logfh);
252 
260 int acmod_set_rawfh(acmod_t *acmod, FILE *logfh);
261 
265 void acmod_free(acmod_t *acmod);
266 
270 int acmod_start_utt(acmod_t *acmod);
271 
275 int acmod_end_utt(acmod_t *acmod);
276 
289 int acmod_rewind(acmod_t *acmod);
290 
300 int acmod_advance(acmod_t *acmod);
301 
310 int acmod_set_grow(acmod_t *acmod, int grow_feat);
311 
330 int acmod_process_raw(acmod_t *acmod,
331  int16 const **inout_raw,
332  size_t *inout_n_samps,
333  int full_utt);
334 
346 int acmod_process_cep(acmod_t *acmod,
347  mfcc_t ***inout_cep,
348  int *inout_n_frames,
349  int full_utt);
350 
364 int acmod_process_feat(acmod_t *acmod,
365  mfcc_t **feat);
366 
373 int acmod_set_insenfh(acmod_t *acmod, FILE *insenfh);
374 
380 int acmod_read_scores(acmod_t *acmod);
381 
391 mfcc_t **acmod_get_frame(acmod_t *acmod, int *inout_frame_idx);
392 
406 int16 const *acmod_score(acmod_t *acmod,
407  int *inout_frame_idx);
408 
412 int acmod_write_senfh_header(acmod_t *acmod, FILE *logfh);
413 
417 int acmod_write_scores(acmod_t *acmod, int n_active, uint8 const *active,
418  int16 const *senscr, FILE *senfh);
419 
420 
424 int acmod_best_score(acmod_t *acmod, int *out_best_senid);
425 
429 void acmod_clear_active(acmod_t *acmod);
430 
434 void acmod_activate_hmm(acmod_t *acmod, hmm_t *hmm);
435 
439 #define acmod_activate_sen(acmod, sen) bitvec_set((acmod)->senone_active_vec, sen)
440 
444 int32 acmod_flags2list(acmod_t *acmod);
445 
449 int32 acmod_stream_offset(acmod_t *acmod);
450 
454 void acmod_start_stream(acmod_t *acmod);
455 
459 void acmod_set_rawdata_size(acmod_t *acmod, int32 size);
460 
464 void acmod_get_rawdata(acmod_t *acmod, int16 **buffer, int32 *size);
465 
466 #endif /* __ACMOD_H__ */
FILE * insenfh
Input senone score file.
Definition: acmod.h:178
void acmod_get_rawdata(acmod_t *acmod, int16 **buffer, int32 *size)
Retrieves the raw data collected during utterance decoding.
Definition: acmod.c:1332
uint8 grow_feat
Whether to grow feat_buf.
Definition: acmod.h:189
ps_mgau_t * mgau
Model parameters.
Definition: acmod.h:161
int acmod_read_scores(acmod_t *acmod)
Read one frame of scores from senone score dump file.
Definition: acmod.c:1012
Not in an utterance.
Definition: acmod.h:68
uint8 * senone_active
Array of deltas to active GMMs.
Definition: acmod.h:167
long * framepos
File positions of recent frames in senone file.
Definition: acmod.h:179
acmod_state_e
States in utterance processing.
Definition: acmod.h:67
Utterance started, no data yet.
Definition: acmod.h:69
int acmod_set_insenfh(acmod_t *acmod, FILE *senfh)
Set up a senone score dump file for input.
Definition: acmod.c:864
int32 acmod_stream_offset(acmod_t *acmod)
Get the offset of the utterance start of the current stream, helpful for stream-wide timing...
Definition: acmod.c:1308
int acmod_rewind(acmod_t *acmod)
Rewind the current utterance, allowing it to be rescored.
Definition: acmod.c:877
int16 * senone_scores
GMM scores for current frame.
Definition: acmod.h:165
ps_mllr_t * acmod_update_mllr(acmod_t *acmod, ps_mllr_t *mllr)
Adapt acoustic model using a linear transform.
Definition: acmod.c:339
int acmod_process_cep(acmod_t *acmod, mfcc_t ***inout_cep, int *inout_n_frames, int full_utt)
Feed acoustic feature data into the acoustic model for scoring.
Definition: acmod.c:699
Utterance in progress.
Definition: acmod.h:70
int n_senone_active
Number of active GMMs.
Definition: acmod.h:169
An individual HMM among the HMM search space.
void acmod_set_rawdata_size(acmod_t *acmod, int32 size)
Sets the limit of the raw audio data to store.
Definition: acmod.c:1321
void acmod_start_stream(acmod_t *acmod)
Reset the current stream.
Definition: acmod.c:1314
logmath_t * lmath
Log-math computation.
Definition: acmod.h:151
fe_t * fe
Acoustic feature computation.
Definition: acmod.h:155
frame_idx_t n_mfc_frame
Number of frames active in mfc_buf.
Definition: acmod.h:196
void acmod_activate_hmm(acmod_t *acmod, hmm_t *hmm)
Activate senones associated with an HMM.
Definition: acmod.c:1213
float32 *** h
Diagonal transformation of variances.
Definition: acmod.h:89
Utterance ended, still buffering.
Definition: acmod.h:71
int32 * cb2mllr
Mapping from codebooks to transformations.
Definition: acmod.h:90
int * veclen
Length of input vectors for each stream.
Definition: acmod.h:86
Binary format model definition files, with support for heterogeneous topologies and variable-size N-p...
float32 *** b
Bias part of mean transformations.
Definition: acmod.h:88
FILE * rawfh
File for writing raw audio data.
Definition: acmod.h:175
mfcc_t ** mfc_buf
Temporary buffer of acoustic features.
Definition: acmod.h:173
frame_idx_t utt_start_frame
Index of the utterance start in the stream, all timings are relative to that.
Definition: acmod.h:192
int n_class
Number of MLLR classes.
Definition: acmod.h:84
Implementation of HMM base structure.
frame_idx_t n_feat_alloc
Number of frames allocated in feat_buf.
Definition: acmod.h:198
mfcc_t *** feat_buf
Temporary buffer of dynamic features.
Definition: acmod.h:174
int acmod_set_senfh(acmod_t *acmod, FILE *logfh)
Start logging senone scores to a filehandle.
Definition: acmod.c:364
int acmod_set_rawfh(acmod_t *acmod, FILE *logfh)
Start logging raw audio to a filehandle.
Definition: acmod.c:387
ps_mllr_t * mllr
Speaker transformation.
Definition: acmod.h:162
uint8 compallsen
Compute all senones?
Definition: acmod.h:188
int acmod_process_feat(acmod_t *acmod, mfcc_t **feat)
Feed dynamic feature data into the acoustic model for scoring.
Definition: acmod.c:797
int acmod_write_senfh_header(acmod_t *acmod, FILE *logfh)
Write senone dump file header.
Definition: acmod.c:350
int n_feat
Number of feature streams.
Definition: acmod.h:85
cmd_ln_t * config
Configuration.
Definition: acmod.h:150
frame_idx_t output_frame
Index of next frame of dynamic features.
Definition: acmod.h:194
int acmod_write_scores(acmod_t *acmod, int n_active, uint8 const *active, int16 const *senscr, FILE *senfh)
Write a frame of senone scores to a dump file.
Definition: acmod.c:911
tmat_t * tmat
Transition matrices.
Definition: acmod.h:160
int32 acmod_flags2list(acmod_t *acmod)
Build active list from.
Definition: acmod.c:1254
int acmod_end_utt(acmod_t *acmod)
Mark the end of an utterance.
Definition: acmod.c:441
int acmod_advance(acmod_t *acmod)
Advance the frame index.
Definition: acmod.c:899
enum acmod_state_e acmod_state_t
States in utterance processing.
Model-space linear transforms for speaker adaptation.
uint8 state
State of utterance processing.
Definition: acmod.h:187
int acmod_set_mfcfh(acmod_t *acmod, FILE *logfh)
Start logging MFCCs to a filehandle.
Definition: acmod.c:375
void acmod_free(acmod_t *acmod)
Finalize an acoustic model.
Definition: acmod.c:299
void acmod_clear_active(acmod_t *acmod)
Clear set of active senones.
Definition: acmod.c:1197
Feature space linear transform structure.
Definition: acmod.h:82
frame_idx_t feat_outidx
Start of active frames in feat_buf.
Definition: acmod.h:200
float32 **** A
Rotation part of mean transformations.
Definition: acmod.h:87
mfcc_t ** acmod_get_frame(acmod_t *acmod, int *inout_frame_idx)
Get a frame of dynamic feature data.
Definition: acmod.c:1088
Transition matrix data structure.
feat_t * fcb
Dynamic feature computation.
Definition: acmod.h:156
int log_zero
Zero log-probability value.
Definition: acmod.h:170
FILE * senfh
File for writing senone score data.
Definition: acmod.h:177
frame_idx_t mfc_outidx
Start of active frames in mfc_buf.
Definition: acmod.h:197
Transition matrix data structure.
Definition: tmat.h:55
frame_idx_t n_mfc_alloc
Number of frames allocated in mfc_buf.
Definition: acmod.h:195
int32 frame_idx_t
Type for frame index values.
Definition: hmm.h:64
int acmod_process_raw(acmod_t *acmod, int16 const **inout_raw, size_t *inout_n_samps, int full_utt)
TODO: Set queue length for utterance processing.
Definition: acmod.c:607
uint8 insen_swap
Whether to swap input senone score.
Definition: acmod.h:190
int acmod_start_utt(acmod_t *acmod)
Mark the start of an utterance.
Definition: acmod.c:423
int senscr_frame
Frame index for senone_scores.
Definition: acmod.h:168
ps_mgaufuncs_t * vt
vtable of mgau functions.
Definition: acmod.h:114
bin_mdef_t * mdef
Model definition.
Definition: acmod.h:159
int acmod_best_score(acmod_t *acmod, int *out_best_senid)
Get best score and senone index for current frame.
Definition: acmod.c:1168
int refcnt
Reference count.
Definition: acmod.h:83
acmod_t * acmod_init(cmd_ln_t *config, logmath_t *lmath, fe_t *fe, feat_t *fcb)
Initialize an acoustic model.
Definition: acmod.c:228
frame_idx_t n_feat_frame
Number of frames active in feat_buf.
Definition: acmod.h:199
FILE * mfcfh
File for writing acoustic feature data.
Definition: acmod.h:176
Acoustic model structure.
Definition: acmod.h:148
int acmod_set_grow(acmod_t *acmod, int grow_feat)
Set memory allocation policy for utterance processing.
Definition: acmod.c:410
int16 const * acmod_score(acmod_t *acmod, int *inout_frame_idx)
Score one frame of data.
Definition: acmod.c:1106
int frame_idx
frame counter.
Definition: acmod.h:115
glist_t strings
Temporary acoustic model filenames.
Definition: acmod.h:152
bitvec_t * senone_active_vec
Active GMMs in current frame.
Definition: acmod.h:166