PocketSphinx  5prealpha
ms_mgau.c
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 /*
38  * ms_mgau.c -- Essentially a wrapper that wrap up gauden and
39  * senone. It supports multi-stream.
40  *
41  *
42  * **********************************************
43  * CMU ARPA Speech Project
44  *
45  * Copyright (c) 1997 Carnegie Mellon University.
46  * ALL RIGHTS RESERVED.
47  * **********************************************
48  * HISTORY
49  * $Log$
50  * Revision 1.2 2006/02/22 16:56:01 arthchan2003
51  * Merged from SPHINX3_5_2_RCI_IRII_BRANCH: Added ms_mgau.[ch] into the trunk. It is a wrapper of ms_gauden and ms_senone
52  *
53  * Revision 1.1.2.4 2005/09/25 18:55:19 arthchan2003
54  * Added a flag to turn on and off precomputation.
55  *
56  * Revision 1.1.2.3 2005/08/03 18:53:44 dhdfu
57  * Add memory deallocation functions. Also move all the initialization
58  * of ms_mgau_model_t into ms_mgau_init (duh!), which entails removing it
59  * from decode_anytopo and friends.
60  *
61  * Revision 1.1.2.2 2005/08/02 21:05:38 arthchan2003
62  * 1, Added dist and mgau_active as intermediate variable for computation. 2, Added ms_cont_mgau_frame_eval, which is a multi stream version of GMM computation mainly s3.0 family of tools. 3, Fixed dox-doc.
63  *
64  * Revision 1.1.2.1 2005/07/20 19:37:09 arthchan2003
65  * Added a multi-stream cont_mgau (ms_mgau) which is a wrapper of both gauden and senone. Add ms_mgau_init and model_set_mllr. This allow eliminating 600 lines of code in decode_anytopo/align/allphone.
66  *
67  *
68  *
69  */
70 
71 /* Local headers. */
72 #include "ms_mgau.h"
73 
74 static ps_mgaufuncs_t ms_mgau_funcs = {
75  "ms",
76  ms_cont_mgau_frame_eval, /* frame_eval */
77  ms_mgau_mllr_transform, /* transform */
78  ms_mgau_free /* free */
79 };
80 
81 ps_mgau_t *
82 ms_mgau_init(acmod_t *acmod, logmath_t *lmath, bin_mdef_t *mdef)
83 {
84  /* Codebooks */
85  ms_mgau_model_t *msg;
86  ps_mgau_t *mg;
87  gauden_t *g;
88  senone_t *s;
89  cmd_ln_t *config;
90  int i;
91 
92  config = acmod->config;
93 
94  msg = (ms_mgau_model_t *) ckd_calloc(1, sizeof(ms_mgau_model_t));
95  msg->config = config;
96  msg->g = NULL;
97  msg->s = NULL;
98 
99  if ((g = msg->g = gauden_init(cmd_ln_str_r(config, "_mean"),
100  cmd_ln_str_r(config, "_var"),
101  cmd_ln_float32_r(config, "-varfloor"),
102  lmath)) == NULL) {
103  E_ERROR("Failed to read means and variances\n");
104  goto error_out;
105  }
106 
107  /* Verify n_feat and veclen, against acmod. */
108  if (g->n_feat != feat_dimension1(acmod->fcb)) {
109  E_ERROR("Number of streams does not match: %d != %d\n",
110  g->n_feat, feat_dimension1(acmod->fcb));
111  goto error_out;
112  }
113  for (i = 0; i < g->n_feat; ++i) {
114  if (g->featlen[i] != feat_dimension2(acmod->fcb, i)) {
115  E_ERROR("Dimension of stream %d does not match: %d != %d\n", i,
116  g->featlen[i], feat_dimension2(acmod->fcb, i));
117  goto error_out;
118  }
119  }
120 
121  s = msg->s = senone_init(msg->g,
122  cmd_ln_str_r(config, "_mixw"),
123  cmd_ln_str_r(config, "_senmgau"),
124  cmd_ln_float32_r(config, "-mixwfloor"),
125  lmath, mdef);
126 
127  s->aw = cmd_ln_int32_r(config, "-aw");
128 
129  /* Verify senone parameters against gauden parameters */
130  if (s->n_feat != g->n_feat)
131  E_FATAL("#Feature mismatch: gauden= %d, senone= %d\n", g->n_feat,
132  s->n_feat);
133  if (s->n_cw != g->n_density)
134  E_FATAL("#Densities mismatch: gauden= %d, senone= %d\n",
135  g->n_density, s->n_cw);
136  if (s->n_gauden > g->n_mgau)
137  E_FATAL("Senones need more codebooks (%d) than present (%d)\n",
138  s->n_gauden, g->n_mgau);
139  if (s->n_gauden < g->n_mgau)
140  E_ERROR("Senones use fewer codebooks (%d) than present (%d)\n",
141  s->n_gauden, g->n_mgau);
142 
143  msg->topn = cmd_ln_int32_r(config, "-topn");
144  E_INFO("The value of topn: %d\n", msg->topn);
145  if (msg->topn == 0 || msg->topn > msg->g->n_density) {
146  E_WARN
147  ("-topn argument (%d) invalid or > #density codewords (%d); set to latter\n",
148  msg->topn, msg->g->n_density);
149  msg->topn = msg->g->n_density;
150  }
151 
152  msg->dist = (gauden_dist_t ***)
153  ckd_calloc_3d(g->n_mgau, g->n_feat, msg->topn,
154  sizeof(gauden_dist_t));
155  msg->mgau_active = ckd_calloc(g->n_mgau, sizeof(int8));
156 
157  mg = (ps_mgau_t *)msg;
158  mg->vt = &ms_mgau_funcs;
159  return mg;
160 error_out:
161  ms_mgau_free(ps_mgau_base(msg));
162  return NULL;
163 }
164 
165 void
166 ms_mgau_free(ps_mgau_t * mg)
167 {
168  ms_mgau_model_t *msg = (ms_mgau_model_t *)mg;
169  if (msg == NULL)
170  return;
171 
172  if (msg->g)
173  gauden_free(msg->g);
174  if (msg->s)
175  senone_free(msg->s);
176  if (msg->dist)
177  ckd_free_3d((void *) msg->dist);
178  if (msg->mgau_active)
179  ckd_free(msg->mgau_active);
180 
181  ckd_free(msg);
182 }
183 
184 int
185 ms_mgau_mllr_transform(ps_mgau_t *s,
186  ps_mllr_t *mllr)
187 {
188  ms_mgau_model_t *msg = (ms_mgau_model_t *)s;
189  return gauden_mllr_transform(msg->g, mllr, msg->config);
190 }
191 
192 int32
193 ms_cont_mgau_frame_eval(ps_mgau_t * mg,
194  int16 *senscr,
195  uint8 *senone_active,
196  int32 n_senone_active,
197  mfcc_t ** feat,
198  int32 frame,
199  int32 compallsen)
200 {
201  ms_mgau_model_t *msg = (ms_mgau_model_t *)mg;
202  int32 gid;
203  int32 topn;
204  int32 best;
205  gauden_t *g;
206  senone_t *sen;
207 
208  topn = ms_mgau_topn(msg);
209  g = ms_mgau_gauden(msg);
210  sen = ms_mgau_senone(msg);
211 
212  if (compallsen) {
213  int32 s;
214 
215  for (gid = 0; gid < g->n_mgau; gid++)
216  gauden_dist(g, gid, topn, feat, msg->dist[gid]);
217 
218  best = (int32) 0x7fffffff;
219  for (s = 0; s < sen->n_sen; s++) {
220  senscr[s] = senone_eval(sen, s, msg->dist[sen->mgau[s]], topn);
221  if (best > senscr[s]) {
222  best = senscr[s];
223  }
224  }
225 
226  /* Normalize senone scores */
227  for (s = 0; s < sen->n_sen; s++) {
228  int32 bs = senscr[s] - best;
229  if (bs > 32767)
230  bs = 32767;
231  if (bs < -32768)
232  bs = -32768;
233  senscr[s] = bs;
234  }
235  }
236  else {
237  int32 i, n;
238  /* Flag all active mixture-gaussian codebooks */
239  for (gid = 0; gid < g->n_mgau; gid++)
240  msg->mgau_active[gid] = 0;
241 
242  n = 0;
243  for (i = 0; i < n_senone_active; i++) {
244  /* senone_active consists of deltas. */
245  int32 s = senone_active[i] + n;
246  msg->mgau_active[sen->mgau[s]] = 1;
247  n = s;
248  }
249 
250  /* Compute topn gaussian density values (for active codebooks) */
251  for (gid = 0; gid < g->n_mgau; gid++) {
252  if (msg->mgau_active[gid])
253  gauden_dist(g, gid, topn, feat, msg->dist[gid]);
254  }
255 
256  best = (int32) 0x7fffffff;
257  n = 0;
258  for (i = 0; i < n_senone_active; i++) {
259  int32 s = senone_active[i] + n;
260  senscr[s] = senone_eval(sen, s, msg->dist[sen->mgau[s]], topn);
261  if (best > senscr[s]) {
262  best = senscr[s];
263  }
264  n = s;
265  }
266 
267  /* Normalize senone scores */
268  n = 0;
269  for (i = 0; i < n_senone_active; i++) {
270  int32 s = senone_active[i] + n;
271  int32 bs = senscr[s] - best;
272  if (bs > 32767)
273  bs = 32767;
274  if (bs < -32768)
275  bs = -32768;
276  senscr[s] = bs;
277  n = s;
278  }
279  }
280 
281  return 0;
282 }
(Sphinx 3.0 specific) A module that wraps up the code of gauden and senone because they are closely r...
int32 n_density
Number gaussian densities in each codebook-feature stream.
Definition: ms_gauden.h:90
void gauden_free(gauden_t *g)
Release memory allocated by gauden_init.
Definition: ms_gauden.c:358
int32 aw
Inverse acoustic weight.
Definition: ms_senone.h:92
Structure to store distance (density) values for a given input observation wrt density values in some...
Definition: ms_gauden.h:71
int32 gauden_mllr_transform(gauden_t *s, ps_mllr_t *mllr, cmd_ln_t *config)
Transform Gaussians according to an MLLR matrix (or, eventually, more).
Definition: ms_gauden.c:509
gauden_t * gauden_init(char const *meanfile, char const *varfile, float32 varfloor, logmath_t *lmath)
Read mixture gaussian codebooks from the given files.
Definition: ms_gauden.c:311
uint32 n_sen
Number senones in this set.
Definition: ms_senone.h:85
gauden_t * g
The codebook.
Definition: ms_mgau.h:116
uint32 * mgau
senone-id -&gt; mgau-id mapping for senones in this set
Definition: ms_senone.h:90
8-bit senone PDF structure.
Definition: ms_senone.h:76
uint32 n_feat
Number feature streams.
Definition: ms_senone.h:86
cmd_ln_t * config
Configuration.
Definition: acmod.h:150
int32 * featlen
feature length for each feature
Definition: ms_gauden.h:91
int topn
Top-n gaussian will be computed.
Definition: ms_mgau.h:118
int32 senone_eval(senone_t *s, int id, gauden_dist_t **dist, int n_top)
Evaluate the score for the given senone wrt to the given top N gaussian codewords.
int32 n_mgau
Number codebooks.
Definition: ms_gauden.h:88
Feature space linear transform structure.
Definition: acmod.h:82
feat_t * fcb
Dynamic feature computation.
Definition: acmod.h:156
void senone_free(senone_t *s)
Release memory allocated by senone_init.
Definition: ms_senone.c:341
int32 n_feat
Number feature streams in each codebook.
Definition: ms_gauden.h:89
uint32 n_cw
Number codewords per codebook,stream.
Definition: ms_senone.h:87
uint32 n_gauden
Number gaussian density codebooks referred to by senones.
Definition: ms_senone.h:88
ps_mgaufuncs_t * vt
vtable of mgau functions.
Definition: acmod.h:114
Multivariate gaussian mixture density parameters.
Definition: ms_gauden.h:82
int32 gauden_dist(gauden_t *g, int mgau, int n_top, mfcc_t **obs, gauden_dist_t **out_dist)
Compute gaussian density values for the given input observation vector wrt the specified mixture gaus...
senone_t * senone_init(gauden_t *g, char const *mixwfile, char const *mgau_mapfile, float32 mixwfloor, logmath_t *lmath, bin_mdef_t *mdef)
Load a set of senones (mixing weights and mixture gaussian codebook mappings) from the given files...
Definition: ms_senone.c:275
Acoustic model structure.
Definition: acmod.h:148
senone_t * s
The senone.
Definition: ms_mgau.h:117