SphinxBase  5prealpha
feat.h
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 /*
38  * feat.h -- Cepstral features computation.
39  */
40 
41 #ifndef _S3_FEAT_H_
42 #define _S3_FEAT_H_
43 
44 #include <stdio.h>
45 
46 /* Win32/WinCE DLL gunk */
47 #include <sphinxbase/sphinxbase_export.h>
48 #include <sphinxbase/prim_type.h>
49 #include <sphinxbase/fe.h>
50 #include <sphinxbase/cmn.h>
51 #include <sphinxbase/agc.h>
52 
53 #ifdef __cplusplus
54 extern "C" {
55 #endif
56 #if 0
57 /* Fool Emacs. */
58 }
59 #endif
60 
64 #define LIVEBUFBLOCKSIZE 256
66 #define S3_MAX_FRAMES 15000 /* RAH, I believe this is still too large, but better than before */
67 
68 #define cepstral_to_feature_command_line_macro() \
69 { "-feat", \
70  ARG_STRING, \
71  "1s_c_d_dd", \
72  "Feature stream type, depends on the acoustic model" }, \
73 { "-ceplen", \
74  ARG_INT32, \
75  "13", \
76  "Number of components in the input feature vector" }, \
77 { "-cmn", \
78  ARG_STRING, \
79  "live", \
80  "Cepstral mean normalization scheme ('live', 'batch', or 'none')" }, \
81 { "-cmninit", \
82  ARG_STRING, \
83  "40,3,-1", \
84  "Initial values (comma-separated) for cepstral mean when 'live' is used" }, \
85 { "-varnorm", \
86  ARG_BOOLEAN, \
87  "no", \
88  "Variance normalize each utterance (only if CMN == current)" }, \
89 { "-agc", \
90  ARG_STRING, \
91  "none", \
92  "Automatic gain control for c0 ('max', 'emax', 'noise', or 'none')" }, \
93 { "-agcthresh", \
94  ARG_FLOAT32, \
95  "2.0", \
96  "Initial threshold for automatic gain control" }, \
97 { "-lda", \
98  ARG_STRING, \
99  NULL, \
100  "File containing transformation matrix to be applied to features (single-stream features only)" }, \
101 { "-ldadim", \
102  ARG_INT32, \
103  "0", \
104  "Dimensionality of output of feature transformation (0 to use entire matrix)" }, \
105 {"-svspec", \
106  ARG_STRING, \
107  NULL, \
108  "Subvector specification (e.g., 24,0-11/25,12-23/26-38 or 0-12/13-25/26-38)"}
109 
117 typedef struct feat_s {
118  int refcount;
119  char *name;
120  int32 cepsize;
121  int32 n_stream;
122  uint32 *stream_len;
123  int32 window_size;
125  int32 n_sv;
126  uint32 *sv_len;
127  int32 **subvecs;
128  mfcc_t *sv_buf;
129  int32 sv_dim;
132  int32 varnorm;
148  void (*compute_feat)(struct feat_s *fcb, mfcc_t **input, mfcc_t **feat);
154  mfcc_t **cepbuf;
155  mfcc_t **tmpcepbuf;
156  int32 bufpos;
157  int32 curpos;
159  mfcc_t ***lda;
160  uint32 n_lda;
161  uint32 out_dim;
162 } feat_t;
163 
167 #define feat_name(f) ((f)->name)
168 
171 #define feat_cepsize(f) ((f)->cepsize)
172 
175 #define feat_window_size(f) ((f)->window_size)
176 
181 #define feat_n_stream(f) ((f)->n_stream)
182 
187 #define feat_stream_len(f,i) ((f)->stream_len[i])
188 
191 #define feat_dimension1(f) ((f)->n_sv ? (f)->n_sv : f->n_stream)
192 
195 #define feat_dimension2(f,i) ((f)->lda ? (f)->out_dim : ((f)->sv_len ? (f)->sv_len[i] : f->stream_len[i]))
196 
199 #define feat_dimension(f) ((f)->out_dim)
200 
203 #define feat_stream_lengths(f) ((f)->lda ? (&(f)->out_dim) : (f)->sv_len ? (f)->sv_len : f->stream_len)
204 
227 SPHINXBASE_EXPORT
228 int32 **parse_subvecs(char const *str);
229 
233 SPHINXBASE_EXPORT
234 void subvecs_free(int32 **subvecs);
235 
236 
249 SPHINXBASE_EXPORT
250 mfcc_t ***feat_array_alloc(feat_t *fcb,
252  int32 nfr
253  );
254 
258 SPHINXBASE_EXPORT
259 mfcc_t ***feat_array_realloc(feat_t *fcb,
261  mfcc_t ***old_feat,
262  int32 ofr,
263  int32 nfr
264  );
265 
269 SPHINXBASE_EXPORT
270 void feat_array_free(mfcc_t ***feat);
271 
272 
288 SPHINXBASE_EXPORT
289 feat_t *feat_init(char const *type,
290  cmn_type_t cmn,
293  int32 varnorm,
296  agc_type_t agc,
298  int32 breport,
299  int32 cepsize
302  );
303 
308 SPHINXBASE_EXPORT
309 int32 feat_read_lda(feat_t *feat,
310  const char *ldafile,
311  int32 dim
312  );
313 
317 SPHINXBASE_EXPORT
318 void feat_lda_transform(feat_t *fcb,
319  mfcc_t ***inout_feat,
320  uint32 nfr
321  );
322 
341 SPHINXBASE_EXPORT
342 int feat_set_subvecs(feat_t *fcb, int32 **subvecs);
343 
347 SPHINXBASE_EXPORT
348 void feat_print(feat_t *fcb,
349  mfcc_t ***feat,
350  int32 nfr,
351  FILE *fp
352  );
353 
354 
371 SPHINXBASE_EXPORT
372 int32 feat_s2mfc2feat(feat_t *fcb,
373  const char *file,
374  const char *dir,
376  const char *cepext,
379  int32 sf, int32 ef, /* Start/End frames
380  within file to be read. Use
381  0,-1 to process entire
382  file */
383  mfcc_t ***feat,
385  int32 maxfr
389  );
390 
391 
420 SPHINXBASE_EXPORT
421 int32 feat_s2mfc2feat_live(feat_t *fcb,
422  mfcc_t **uttcep,
423  int32 *inout_ncep,
425  int32 beginutt,
426  int32 endutt,
427  mfcc_t ***ofeat
430  );
431 
432 
437 SPHINXBASE_EXPORT
438 void feat_update_stats(feat_t *fcb);
439 
440 
446 SPHINXBASE_EXPORT
448 
454 SPHINXBASE_EXPORT
455 int feat_free(feat_t *f
456  );
457 
461 SPHINXBASE_EXPORT
462 void feat_report(feat_t *f
463  );
464 #ifdef __cplusplus
465 }
466 #endif
467 
468 
469 #endif
SPHINXBASE_EXPORT void feat_print(feat_t *fcb, mfcc_t ***feat, int32 nfr, FILE *fp)
Print the given block of feature vectors to the given FILE.
Definition: feat.c:896
SPHINXBASE_EXPORT void feat_report(feat_t *f)
Report the feat_t data structure.
Definition: feat.c:1473
int32 n_stream
Number of feature streams; e.g., 4 in Sphinx-II.
Definition: feat.h:121
char * name
Printable name for this feature type.
Definition: feat.h:119
routine that implements automatic gain control
SPHINXBASE_EXPORT int32 feat_read_lda(feat_t *feat, const char *ldafile, int32 dim)
Add an LDA transformation to the feature module from a file.
Definition: lda.c:61
SPHINXBASE_EXPORT mfcc_t *** feat_array_alloc(feat_t *fcb, int32 nfr)
Allocate an array to hold several frames worth of feature vectors.
Definition: feat.c:356
agc_t * agc_struct
Structure that stores the temporary variables for acoustic gain control.
Definition: feat.h:151
int32 ** subvecs
Subvector specification (or NULL for none)
Definition: feat.h:127
Apply Cepstral Mean Normalization (CMN) to the set of input mfc frames.
agc_type_t agc
Type of AGC to be performed on each utterance.
Definition: feat.h:134
SPHINXBASE_EXPORT mfcc_t *** feat_array_realloc(feat_t *fcb, mfcc_t ***old_feat, int32 ofr, int32 nfr)
Realloate the array of features.
Definition: feat.c:389
int32 bufpos
Write index in cepbuf.
Definition: feat.h:156
uint32 * sv_len
Vector length of each subvector.
Definition: feat.h:126
int refcount
Reference count.
Definition: feat.h:118
mfcc_t * sv_buf
Temporary copy buffer for subvector projection.
Definition: feat.h:128
uint32 * stream_len
Vector length of each feature stream.
Definition: feat.h:122
Basic type definitions used in Sphinx.
void(* compute_feat)(struct feat_s *fcb, mfcc_t **input, mfcc_t **feat)
Feature computation function.
Definition: feat.h:148
Structure for describing a speech feature type Structure for describing a speech feature type (no...
SPHINXBASE_EXPORT void cmn(cmn_t *cmn, mfcc_t **mfc, int32 varnorm, int32 n_frame)
CMN for the whole sentence.
Definition: cmn.c:100
int32 cepsize
Size of input speech vector (typically, a cepstrum vector)
Definition: feat.h:120
mfcc_t *** lda
Array of linear transformations (for LDA, MLLT, or whatever)
Definition: feat.h:159
uint32 n_lda
Number of linear transformations in lda.
Definition: feat.h:160
uint32 out_dim
Output dimensionality.
Definition: feat.h:161
SPHINXBASE_EXPORT int32 ** parse_subvecs(char const *str)
Parse subvector specification string.
Definition: feat.c:169
enum agc_type_e agc_type_t
Types of acoustic gain control to apply to the features.
SPHINXBASE_EXPORT feat_t * feat_retain(feat_t *f)
Retain ownership of feat_t.
Definition: feat.c:1435
mfcc_t ** cepbuf
Circular buffer of MFCC frames for live feature computation.
Definition: feat.h:154
cmn_t * cmn_struct
Structure that stores the temporary variables for cepstral means normalization.
Definition: feat.h:149
int32 varnorm
Whether variance normalization is to be performed on each utt; Irrelevant if no CMN is performed...
Definition: feat.h:132
mfcc_t ** tmpcepbuf
Array of pointers into cepbuf to handle border cases.
Definition: feat.h:155
SPHINXBASE_EXPORT int32 feat_s2mfc2feat(feat_t *fcb, const char *file, const char *dir, const char *cepext, int32 sf, int32 ef, mfcc_t ***feat, int32 maxfr)
Read a specified MFC file (or given segment within it), perform CMN/AGC as indicated by fcb...
Definition: feat.c:1170
SPHINXBASE_EXPORT void subvecs_free(int32 **subvecs)
Free array of subvector specs.
Definition: feat.c:267
Definition: feat.h:117
int32 sv_dim
Total dimensionality of subvector (length of sv_buf)
Definition: feat.h:129
int32 curpos
Read index in cepbuf.
Definition: feat.h:157
Structure holding data for doing AGC.
Definition: agc.h:113
SPHINXBASE_EXPORT void feat_array_free(mfcc_t ***feat)
Free a buffer allocated with feat_array_alloc()
Definition: feat.c:418
int32 n_sv
Number of subvectors.
Definition: feat.h:125
cmn_type_t cmn
Type of CMN to be performed on each utterance.
Definition: feat.h:131
wrapper of operation of the cepstral mean normalization.
Definition: cmn.h:128
SPHINXBASE_EXPORT void feat_lda_transform(feat_t *fcb, mfcc_t ***inout_feat, uint32 nfr)
Transform a block of features using the feature module&#39;s LDA transform.
Definition: lda.c:139
enum cmn_type_e cmn_type_t
Types of cepstral mean normalization to apply to the features.
SPHINXBASE_EXPORT feat_t * feat_init(char const *type, cmn_type_t cmn, int32 varnorm, agc_type_t agc, int32 breport, int32 cepsize)
Initialize feature module to use the selected type of feature stream.
Definition: feat.c:705
SPHINXBASE_EXPORT void feat_update_stats(feat_t *fcb)
Update the normalization stats, possibly in the end of utterance.
Definition: feat.c:1424
SPHINXBASE_EXPORT int feat_free(feat_t *f)
Release resource associated with feat_t.
Definition: feat.c:1442
SPHINXBASE_EXPORT int feat_set_subvecs(feat_t *fcb, int32 **subvecs)
Add a subvector specification to the feature module.
Definition: feat.c:277
SPHINXBASE_EXPORT int32 feat_s2mfc2feat_live(feat_t *fcb, mfcc_t **uttcep, int32 *inout_ncep, int32 beginutt, int32 endutt, mfcc_t ***ofeat)
Feature computation routine for live mode decoder.
Definition: feat.c:1308
int32 window_size
Number of extra frames around given input frame needed to compute corresponding output feature (so to...
Definition: feat.h:123