SphinxBase  5prealpha
fe_internal.h
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1996-2004 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
38 #ifndef __FE_INTERNAL_H__
39 #define __FE_INTERNAL_H__
40 
41 #ifdef HAVE_CONFIG_H
42 #include <config.h>
43 #endif
44 
45 #include "sphinxbase/fe.h"
46 #include "sphinxbase/fixpoint.h"
47 
48 #include "fe_noise.h"
49 #include "fe_prespch_buf.h"
50 #include "fe_type.h"
51 
52 #ifdef __cplusplus
53 extern "C" {
54 #endif
55 #if 0
56 /* Fool Emacs. */
57 }
58 #endif
59 
60 /* Values for the 'logspec' field. */
61 enum {
62  RAW_LOG_SPEC = 1,
63  SMOOTH_LOG_SPEC = 2
64 };
65 
66 /* Values for the 'transform' field. */
67 enum {
68  LEGACY_DCT = 0,
69  DCT_II = 1,
70  DCT_HTK = 2
71 };
72 
73 typedef struct melfb_s melfb_t;
75 struct melfb_s {
76  float32 sampling_rate;
77  int32 num_cepstra;
78  int32 num_filters;
79  int32 fft_size;
80  float32 lower_filt_freq;
81  float32 upper_filt_freq;
82  /* DCT coefficients. */
83  mfcc_t **mel_cosine;
84  /* Filter coefficients. */
85  mfcc_t *filt_coeffs;
86  int16 *spec_start;
87  int16 *filt_start;
88  int16 *filt_width;
89  /* Luxury mobile home. */
90  int32 doublewide;
91  char const *warp_type;
92  char const *warp_params;
93  uint32 warp_id;
94  /* Precomputed normalization constants for unitary DCT-II/DCT-III */
95  mfcc_t sqrt_inv_n, sqrt_inv_2n;
96  /* Value and coefficients for HTK-style liftering */
97  int32 lifter_val;
98  mfcc_t *lifter;
99  /* Normalize filters to unit area */
100  int32 unit_area;
101  /* Round filter frequencies to DFT points (hurts accuracy, but is
102  useful for legacy purposes) */
103  int32 round_filters;
104 };
105 
106 /* sqrt(1/2), also used for unitary DCT-II/DCT-III */
107 #define SQRT_HALF FLOAT2MFCC(0.707106781186548)
108 
109 typedef struct vad_data_s {
110  uint8 in_speech;
111  int16 pre_speech_frames;
112  int16 post_speech_frames;
113  prespch_buf_t* prespch_buf;
114 } vad_data_t;
115 
117 struct fe_s {
118  cmd_ln_t *config;
119  int refcount;
120 
121  float32 sampling_rate;
122  int16 frame_rate;
123  int16 frame_shift;
124 
125  float32 window_length;
126  int16 frame_size;
127  int16 fft_size;
128 
129  uint8 fft_order;
130  uint8 feature_dimension;
131  uint8 num_cepstra;
132  uint8 remove_dc;
133  uint8 log_spec;
134  uint8 swap;
135  uint8 dither;
136  uint8 transform;
137  uint8 remove_noise;
138  uint8 remove_silence;
139 
140  float32 pre_emphasis_alpha;
141  int16 pre_emphasis_prior;
142  int32 dither_seed;
143 
144  int16 num_overflow_samps;
145  size_t num_processed_samps;
146 
147  /* Twiddle factors for FFT. */
148  frame_t *ccc, *sss;
149  /* Mel filter parameters. */
150  melfb_t *mel_fb;
151  /* Half of a Hamming Window. */
152  window_t *hamming_window;
153 
154  /* Noise removal */
155  noise_stats_t *noise_stats;
156 
157  /* VAD variables */
158  int16 pre_speech;
159  int16 post_speech;
160  int16 start_speech;
161  float32 vad_threshold;
162  vad_data_t *vad_data;
163 
164  /* Temporary buffers for processing. */
165  /* FIXME: too many of these. */
166  int16 *spch;
167  frame_t *frame;
168  powspec_t *spec, *mfspec;
169  int16 *overflow_samps;
170 };
171 
172 void fe_init_dither(int32 seed);
173 
174 /* Apply 1/2 bit noise to a buffer of audio. */
175 int32 fe_dither(int16 *buffer, int32 nsamps);
176 
177 /* Load a frame of data into the fe. */
178 int fe_read_frame(fe_t *fe, int16 const *in, int32 len);
179 
180 /* Shift the input buffer back and read more data. */
181 int fe_shift_frame(fe_t *fe, int16 const *in, int32 len);
182 
183 /* Process a frame of data into features. */
184 void fe_write_frame(fe_t *fe, mfcc_t *feat, int32 store_pcm);
185 
186 /* Initialization functions. */
187 int32 fe_build_melfilters(melfb_t *MEL_FB);
188 int32 fe_compute_melcosine(melfb_t *MEL_FB);
189 void fe_create_hamming(window_t *in, int32 in_len);
190 void fe_create_twiddle(fe_t *fe);
191 
192 fixed32 fe_log_add(fixed32 x, fixed32 y);
193 fixed32 fe_log_sub(fixed32 x, fixed32 y);
194 
195 /* Miscellaneous processing functions. */
196 void fe_spec2cep(fe_t * fe, const powspec_t * mflogspec, mfcc_t * mfcep);
197 void fe_dct2(fe_t *fe, const powspec_t *mflogspec, mfcc_t *mfcep, int htk);
198 void fe_dct3(fe_t *fe, const mfcc_t *mfcep, powspec_t *mflogspec);
199 
200 #ifdef __cplusplus
201 }
202 #endif
203 
204 #endif /* __FE_INTERNAL_H__ */
Base Struct to hold all structure for MFCC computation.
Definition: fe_internal.h:75
Opaque structure used to hold the results of command-line parsing.
Structure for the front-end computation.
Definition: fe_internal.h:117