SphinxBase  5prealpha
agc.c
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 /*
38  * agc.c -- Various forms of automatic gain control (AGC)
39  *
40  * **********************************************
41  * CMU ARPA Speech Project
42  *
43  * Copyright (c) 1996 Carnegie Mellon University.
44  * ALL RIGHTS RESERVED.
45  * **********************************************
46  *
47  * HISTORY
48  * $Log$
49  * Revision 1.5 2005/06/21 19:25:41 arthchan2003
50  * 1, Fixed doxygen documentation. 2, Added $ keyword.
51  *
52  * Revision 1.3 2005/03/30 01:22:46 archan
53  * Fixed mistakes in last updates. Add
54  *
55  *
56  * 04-Nov-95 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
57  * Created.
58  */
59 
60 #include <string.h>
61 #ifdef HAVE_CONFIG_H
62 #include <config.h>
63 #endif
64 
65 #include "sphinxbase/err.h"
66 #include "sphinxbase/ckd_alloc.h"
67 #include "sphinxbase/agc.h"
68 
69 /* NOTE! These must match the enum in agc.h */
70 const char *agc_type_str[] = {
71  "none",
72  "max",
73  "emax",
74  "noise"
75 };
76 static const int n_agc_type_str = sizeof(agc_type_str)/sizeof(agc_type_str[0]);
77 
79 agc_type_from_str(const char *str)
80 {
81  int i;
82 
83  for (i = 0; i < n_agc_type_str; ++i) {
84  if (0 == strcmp(str, agc_type_str[i]))
85  return (agc_type_t)i;
86  }
87  E_FATAL("Unknown AGC type '%s'\n", str);
88  return AGC_NONE;
89 }
90 
92 {
93  agc_t *agc;
94  agc = ckd_calloc(1, sizeof(*agc));
95  agc->noise_thresh = FLOAT2MFCC(2.0);
96 
97  return agc;
98 }
99 
100 void agc_free(agc_t *agc)
101 {
102  ckd_free(agc);
103 }
104 
108 void
109 agc_max(agc_t *agc, mfcc_t **mfc, int32 n_frame)
110 {
111  int32 i;
112 
113  if (n_frame <= 0)
114  return;
115  agc->obs_max = mfc[0][0];
116  for (i = 1; i < n_frame; i++) {
117  if (mfc[i][0] > agc->obs_max) {
118  agc->obs_max = mfc[i][0];
119  agc->obs_frame = 1;
120  }
121  }
122 
123  E_INFO("AGCMax: obs=max= %.2f\n", agc->obs_max);
124  for (i = 0; i < n_frame; i++)
125  mfc[i][0] -= agc->obs_max;
126 }
127 
128 void
129 agc_emax_set(agc_t *agc, float32 m)
130 {
131  agc->max = FLOAT2MFCC(m);
132  E_INFO("AGCEMax: max= %.2f\n", m);
133 }
134 
135 float32
137 {
138  return MFCC2FLOAT(agc->max);
139 }
140 
141 void
142 agc_emax(agc_t *agc, mfcc_t **mfc, int32 n_frame)
143 {
144  int i;
145 
146  if (n_frame <= 0)
147  return;
148  for (i = 0; i < n_frame; ++i) {
149  if (mfc[i][0] > agc->obs_max) {
150  agc->obs_max = mfc[i][0];
151  agc->obs_frame = 1;
152  }
153  mfc[i][0] -= agc->max;
154  }
155 }
156 
157 /* Update estimated max for next utterance */
158 void
160 {
161  if (agc->obs_frame) { /* Update only if some data observed */
162  agc->obs_max_sum += agc->obs_max;
163  agc->obs_utt++;
164 
165  /* Re-estimate max over past history; decay the history */
166  agc->max = agc->obs_max_sum / agc->obs_utt;
167  if (agc->obs_utt == 16) {
168  agc->obs_max_sum /= 2;
169  agc->obs_utt = 8;
170  }
171  }
172  E_INFO("AGCEMax: obs= %.2f, new= %.2f\n", agc->obs_max, agc->max);
173 
174  /* Reset the accumulators for the next utterance. */
175  agc->obs_frame = 0;
176  agc->obs_max = FLOAT2MFCC(-1000.0); /* Less than any real C0 value (hopefully!!) */
177 }
178 
179 void
181  mfcc_t **cep,
182  int32 nfr)
183 {
184  mfcc_t min_energy; /* Minimum log-energy */
185  mfcc_t noise_level; /* Average noise_level */
186  int32 i; /* frame index */
187  int32 noise_frames; /* Number of noise frames */
188 
189  /* Determine minimum log-energy in utterance */
190  min_energy = cep[0][0];
191  for (i = 0; i < nfr; ++i) {
192  if (cep[i][0] < min_energy)
193  min_energy = cep[i][0];
194  }
195 
196  /* Average all frames between min_energy and min_energy + agc->noise_thresh */
197  noise_frames = 0;
198  noise_level = 0;
199  min_energy += agc->noise_thresh;
200  for (i = 0; i < nfr; ++i) {
201  if (cep[i][0] < min_energy) {
202  noise_level += cep[i][0];
203  noise_frames++;
204  }
205  }
206 
207  if (noise_frames > 0) {
208  noise_level /= noise_frames;
209  E_INFO("AGC NOISE: max= %6.3f\n", MFCC2FLOAT(noise_level));
210  /* Subtract noise_level from all log_energy values */
211  for (i = 0; i < nfr; i++) {
212  cep[i][0] -= noise_level;
213  }
214  }
215 }
216 
217 void
218 agc_set_threshold(agc_t *agc, float32 threshold)
219 {
220  agc->noise_thresh = FLOAT2MFCC(threshold);
221 }
222 
223 float32
225 {
226  return FLOAT2MFCC(agc->noise_thresh);
227 }
SPHINXBASE_EXPORT float32 agc_get_threshold(agc_t *agc)
Get the current AGC noise threshold.
Definition: agc.c:224
#define E_INFO(...)
Print logging information to standard error stream.
Definition: err.h:114
SPHINXBASE_EXPORT void agc_noise(agc_t *agc, mfcc_t **mfc, int32 n_frame)
Apply AGC using noise threshold to the given block of MFC vectors.
Definition: agc.c:180
mfcc_t max
Estimated max for current utterance (for AGC_EMAX)
Definition: agc.h:114
SPHINXBASE_EXPORT void agc_max(agc_t *agc, mfcc_t **mfc, int32 n_frame)
Apply AGC to the given mfc vectors (normalize all C0 mfc coefficients in the given input such that th...
Definition: agc.c:109
routine that implements automatic gain control
#define ckd_calloc(n, sz)
Macros to simplify the use of above functions.
Definition: ckd_alloc.h:248
Sphinx&#39;s memory allocation/deallocation routines.
SPHINXBASE_EXPORT const char * agc_type_str[]
String representations of agc_type_t values.
Definition: agc.c:70
mfcc_t obs_max
Observed max in current utterance.
Definition: agc.h:115
int32 obs_frame
Whether any data was observed after prev update.
Definition: agc.h:116
SPHINXBASE_EXPORT void ckd_free(void *ptr)
Test and free a 1-D array.
Definition: ckd_alloc.c:244
SPHINXBASE_EXPORT agc_t * agc_init(void)
Initialize AGC structure with default values.
Definition: agc.c:91
SPHINXBASE_EXPORT void agc_free(agc_t *agc)
Free AGC structure.
Definition: agc.c:100
enum agc_type_e agc_type_t
Types of acoustic gain control to apply to the features.
int32 obs_utt
Whether any utterances have been observed.
Definition: agc.h:117
SPHINXBASE_EXPORT void agc_set_threshold(agc_t *agc, float32 threshold)
Set the current AGC noise threshold.
Definition: agc.c:218
mfcc_t noise_thresh
Noise threshold (for AGC_NOISE only)
Definition: agc.h:119
SPHINXBASE_EXPORT void agc_emax_set(agc_t *agc, float32 m)
Set the current AGC maximum estimate.
Definition: agc.c:129
Implementation of logging routines.
Structure holding data for doing AGC.
Definition: agc.h:113
SPHINXBASE_EXPORT void agc_emax_update(agc_t *agc)
Update AGC parameters for next utterance.
Definition: agc.c:159
SPHINXBASE_EXPORT void agc_emax(agc_t *agc, mfcc_t **mfc, int32 n_frame)
Apply AGC to the given block of MFC vectors.
Definition: agc.c:142
#define E_FATAL(...)
Exit with non-zero status after error message.
Definition: err.h:81
SPHINXBASE_EXPORT float32 agc_emax_get(agc_t *agc)
Get the current AGC maximum estimate.
Definition: agc.c:136
SPHINXBASE_EXPORT agc_type_t agc_type_from_str(const char *str)
Convert string representation (from command-line) to agc_type_t.
Definition: agc.c:79