PocketSphinx  5prealpha
s2_semi_mgau.c
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
38 /* System headers */
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <assert.h>
43 #include <limits.h>
44 #include <math.h>
45 #if defined(__ADSPBLACKFIN__)
46 #elif !defined(_WIN32_WCE)
47 #include <sys/types.h>
48 #endif
49 
50 /* SphinxBase headers */
51 #include <sphinx_config.h>
52 #include <sphinxbase/cmd_ln.h>
53 #include <sphinxbase/fixpoint.h>
54 #include <sphinxbase/ckd_alloc.h>
55 #include <sphinxbase/bio.h>
56 #include <sphinxbase/err.h>
57 #include <sphinxbase/prim_type.h>
58 
59 /* Local headers */
60 #include "s2_semi_mgau.h"
61 #include "tied_mgau_common.h"
62 
63 static ps_mgaufuncs_t s2_semi_mgau_funcs = {
64  "s2_semi",
65  s2_semi_mgau_frame_eval, /* frame_eval */
66  s2_semi_mgau_mllr_transform, /* transform */
67  s2_semi_mgau_free /* free */
68 };
69 
70 struct vqFeature_s {
71  int32 score; /* score or distance */
72  int32 codeword; /* codeword (vector index) */
73 };
74 
75 static void
76 eval_topn(s2_semi_mgau_t *s, int32 feat, mfcc_t *z)
77 {
78  int i, ceplen;
79  vqFeature_t *topn;
80 
81  topn = s->f[feat];
82  ceplen = s->g->featlen[feat];
83 
84  for (i = 0; i < s->max_topn; i++) {
85  mfcc_t *mean, diff, sqdiff, compl; /* diff, diff^2, component likelihood */
86  vqFeature_t vtmp;
87  mfcc_t *var, d;
88  mfcc_t *obs;
89  int32 cw, j;
90 
91  cw = topn[i].codeword;
92  mean = s->g->mean[0][feat][0] + cw * ceplen;
93  var = s->g->var[0][feat][0] + cw * ceplen;
94  d = s->g->det[0][feat][cw];
95  obs = z;
96  for (j = 0; j < ceplen; j++) {
97  diff = *obs++ - *mean++;
98  sqdiff = MFCCMUL(diff, diff);
99  compl = MFCCMUL(sqdiff, *var);
100  d = GMMSUB(d, compl);
101  ++var;
102  }
103  topn[i].score = (int32)d;
104  if (i == 0)
105  continue;
106  vtmp = topn[i];
107  for (j = i - 1; j >= 0 && (int32)d > topn[j].score; j--) {
108  topn[j + 1] = topn[j];
109  }
110  topn[j + 1] = vtmp;
111  }
112 }
113 
114 static void
115 eval_cb(s2_semi_mgau_t *s, int32 feat, mfcc_t *z)
116 {
117  vqFeature_t *worst, *best, *topn;
118  mfcc_t *mean;
119  mfcc_t *var, *det, *detP, *detE;
120  int32 i, ceplen;
121 
122  best = topn = s->f[feat];
123  worst = topn + (s->max_topn - 1);
124  mean = s->g->mean[0][feat][0];
125  var = s->g->var[0][feat][0];
126  det = s->g->det[0][feat];
127  detE = det + s->g->n_density;
128  ceplen = s->g->featlen[feat];
129 
130  for (detP = det; detP < detE; ++detP) {
131  mfcc_t diff, sqdiff, compl; /* diff, diff^2, component likelihood */
132  mfcc_t d;
133  mfcc_t *obs;
134  vqFeature_t *cur;
135  int32 cw, j;
136 
137  d = *detP;
138  obs = z;
139  cw = (int)(detP - det);
140  for (j = 0; (j < ceplen) && (d >= worst->score); ++j) {
141  diff = *obs++ - *mean++;
142  sqdiff = MFCCMUL(diff, diff);
143  compl = MFCCMUL(sqdiff, *var);
144  d = GMMSUB(d, compl);
145  ++var;
146  }
147  if (j < ceplen) {
148  /* terminated early, so not in topn */
149  mean += (ceplen - j);
150  var += (ceplen - j);
151  continue;
152  }
153  if ((int32)d < worst->score)
154  continue;
155  for (i = 0; i < s->max_topn; i++) {
156  /* already there, so don't need to insert */
157  if (topn[i].codeword == cw)
158  break;
159  }
160  if (i < s->max_topn)
161  continue; /* already there. Don't insert */
162  /* remaining code inserts codeword and dist in correct spot */
163  for (cur = worst - 1; cur >= best && (int32)d >= cur->score; --cur)
164  memcpy(cur + 1, cur, sizeof(vqFeature_t));
165  ++cur;
166  cur->codeword = cw;
167  cur->score = (int32)d;
168  }
169 }
170 
171 static void
172 mgau_dist(s2_semi_mgau_t * s, int32 frame, int32 feat, mfcc_t * z)
173 {
174  eval_topn(s, feat, z);
175 
176  /* If this frame is skipped, do nothing else. */
177  if (frame % s->ds_ratio)
178  return;
179 
180  /* Evaluate the rest of the codebook (or subset thereof). */
181  eval_cb(s, feat, z);
182 }
183 
184 static int
185 mgau_norm(s2_semi_mgau_t *s, int feat)
186 {
187  int32 norm;
188  int j;
189 
190  /* Compute quantized normalizing constant. */
191  norm = s->f[feat][0].score >> SENSCR_SHIFT;
192 
193  /* Normalize the scores, negate them, and clamp their dynamic range. */
194  for (j = 0; j < s->max_topn; ++j) {
195  s->f[feat][j].score = -((s->f[feat][j].score >> SENSCR_SHIFT) - norm);
196  if (s->f[feat][j].score > MAX_NEG_ASCR)
197  s->f[feat][j].score = MAX_NEG_ASCR;
198  if (s->topn_beam[feat] && s->f[feat][j].score > s->topn_beam[feat])
199  break;
200  }
201  return j;
202 }
203 
204 static int32
205 get_scores_8b_feat_6(s2_semi_mgau_t * s, int i,
206  int16 *senone_scores, uint8 *senone_active,
207  int32 n_senone_active)
208 {
209  int32 j, l;
210  uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3, *pid_cw4, *pid_cw5;
211 
212  pid_cw0 = s->mixw[i][s->f[i][0].codeword];
213  pid_cw1 = s->mixw[i][s->f[i][1].codeword];
214  pid_cw2 = s->mixw[i][s->f[i][2].codeword];
215  pid_cw3 = s->mixw[i][s->f[i][3].codeword];
216  pid_cw4 = s->mixw[i][s->f[i][4].codeword];
217  pid_cw5 = s->mixw[i][s->f[i][5].codeword];
218 
219  for (l = j = 0; j < n_senone_active; j++) {
220  int sen = senone_active[j] + l;
221  int32 tmp = pid_cw0[sen] + s->f[i][0].score;
222 
223  tmp = fast_logmath_add(s->lmath_8b, tmp,
224  pid_cw1[sen] + s->f[i][1].score);
225  tmp = fast_logmath_add(s->lmath_8b, tmp,
226  pid_cw2[sen] + s->f[i][2].score);
227  tmp = fast_logmath_add(s->lmath_8b, tmp,
228  pid_cw3[sen] + s->f[i][3].score);
229  tmp = fast_logmath_add(s->lmath_8b, tmp,
230  pid_cw4[sen] + s->f[i][4].score);
231  tmp = fast_logmath_add(s->lmath_8b, tmp,
232  pid_cw5[sen] + s->f[i][5].score);
233 
234  senone_scores[sen] += tmp;
235  l = sen;
236  }
237  return 0;
238 }
239 
240 static int32
241 get_scores_8b_feat_5(s2_semi_mgau_t * s, int i,
242  int16 *senone_scores, uint8 *senone_active,
243  int32 n_senone_active)
244 {
245  int32 j, l;
246  uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3, *pid_cw4;
247 
248  pid_cw0 = s->mixw[i][s->f[i][0].codeword];
249  pid_cw1 = s->mixw[i][s->f[i][1].codeword];
250  pid_cw2 = s->mixw[i][s->f[i][2].codeword];
251  pid_cw3 = s->mixw[i][s->f[i][3].codeword];
252  pid_cw4 = s->mixw[i][s->f[i][4].codeword];
253 
254  for (l = j = 0; j < n_senone_active; j++) {
255  int sen = senone_active[j] + l;
256  int32 tmp = pid_cw0[sen] + s->f[i][0].score;
257 
258  tmp = fast_logmath_add(s->lmath_8b, tmp,
259  pid_cw1[sen] + s->f[i][1].score);
260  tmp = fast_logmath_add(s->lmath_8b, tmp,
261  pid_cw2[sen] + s->f[i][2].score);
262  tmp = fast_logmath_add(s->lmath_8b, tmp,
263  pid_cw3[sen] + s->f[i][3].score);
264  tmp = fast_logmath_add(s->lmath_8b, tmp,
265  pid_cw4[sen] + s->f[i][4].score);
266 
267  senone_scores[sen] += tmp;
268  l = sen;
269  }
270  return 0;
271 }
272 
273 static int32
274 get_scores_8b_feat_4(s2_semi_mgau_t * s, int i,
275  int16 *senone_scores, uint8 *senone_active,
276  int32 n_senone_active)
277 {
278  int32 j, l;
279  uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3;
280 
281  pid_cw0 = s->mixw[i][s->f[i][0].codeword];
282  pid_cw1 = s->mixw[i][s->f[i][1].codeword];
283  pid_cw2 = s->mixw[i][s->f[i][2].codeword];
284  pid_cw3 = s->mixw[i][s->f[i][3].codeword];
285 
286  for (l = j = 0; j < n_senone_active; j++) {
287  int sen = senone_active[j] + l;
288  int32 tmp = pid_cw0[sen] + s->f[i][0].score;
289 
290  tmp = fast_logmath_add(s->lmath_8b, tmp,
291  pid_cw1[sen] + s->f[i][1].score);
292  tmp = fast_logmath_add(s->lmath_8b, tmp,
293  pid_cw2[sen] + s->f[i][2].score);
294  tmp = fast_logmath_add(s->lmath_8b, tmp,
295  pid_cw3[sen] + s->f[i][3].score);
296 
297  senone_scores[sen] += tmp;
298  l = sen;
299  }
300  return 0;
301 }
302 
303 static int32
304 get_scores_8b_feat_3(s2_semi_mgau_t * s, int i,
305  int16 *senone_scores, uint8 *senone_active,
306  int32 n_senone_active)
307 {
308  int32 j, l;
309  uint8 *pid_cw0, *pid_cw1, *pid_cw2;
310 
311  pid_cw0 = s->mixw[i][s->f[i][0].codeword];
312  pid_cw1 = s->mixw[i][s->f[i][1].codeword];
313  pid_cw2 = s->mixw[i][s->f[i][2].codeword];
314 
315  for (l = j = 0; j < n_senone_active; j++) {
316  int sen = senone_active[j] + l;
317  int32 tmp = pid_cw0[sen] + s->f[i][0].score;
318 
319  tmp = fast_logmath_add(s->lmath_8b, tmp,
320  pid_cw1[sen] + s->f[i][1].score);
321  tmp = fast_logmath_add(s->lmath_8b, tmp,
322  pid_cw2[sen] + s->f[i][2].score);
323 
324  senone_scores[sen] += tmp;
325  l = sen;
326  }
327  return 0;
328 }
329 
330 static int32
331 get_scores_8b_feat_2(s2_semi_mgau_t * s, int i,
332  int16 *senone_scores, uint8 *senone_active,
333  int32 n_senone_active)
334 {
335  int32 j, l;
336  uint8 *pid_cw0, *pid_cw1;
337 
338  pid_cw0 = s->mixw[i][s->f[i][0].codeword];
339  pid_cw1 = s->mixw[i][s->f[i][1].codeword];
340 
341  for (l = j = 0; j < n_senone_active; j++) {
342  int sen = senone_active[j] + l;
343  int32 tmp = pid_cw0[sen] + s->f[i][0].score;
344 
345  tmp = fast_logmath_add(s->lmath_8b, tmp,
346  pid_cw1[sen] + s->f[i][1].score);
347 
348  senone_scores[sen] += tmp;
349  l = sen;
350  }
351  return 0;
352 }
353 
354 static int32
355 get_scores_8b_feat_1(s2_semi_mgau_t * s, int i,
356  int16 *senone_scores, uint8 *senone_active,
357  int32 n_senone_active)
358 {
359  int32 j, l;
360  uint8 *pid_cw0;
361 
362  pid_cw0 = s->mixw[i][s->f[i][0].codeword];
363  for (l = j = 0; j < n_senone_active; j++) {
364  int sen = senone_active[j] + l;
365  int32 tmp = pid_cw0[sen] + s->f[i][0].score;
366  senone_scores[sen] += tmp;
367  l = sen;
368  }
369  return 0;
370 }
371 
372 static int32
373 get_scores_8b_feat_any(s2_semi_mgau_t * s, int i, int topn,
374  int16 *senone_scores, uint8 *senone_active,
375  int32 n_senone_active)
376 {
377  int32 j, k, l;
378 
379  for (l = j = 0; j < n_senone_active; j++) {
380  int sen = senone_active[j] + l;
381  uint8 *pid_cw;
382  int32 tmp;
383  pid_cw = s->mixw[i][s->f[i][0].codeword];
384  tmp = pid_cw[sen] + s->f[i][0].score;
385  for (k = 1; k < topn; ++k) {
386  pid_cw = s->mixw[i][s->f[i][k].codeword];
387  tmp = fast_logmath_add(s->lmath_8b, tmp,
388  pid_cw[sen] + s->f[i][k].score);
389  }
390  senone_scores[sen] += tmp;
391  l = sen;
392  }
393  return 0;
394 }
395 
396 static int32
397 get_scores_8b_feat(s2_semi_mgau_t * s, int i, int topn,
398  int16 *senone_scores, uint8 *senone_active, int32 n_senone_active)
399 {
400  switch (topn) {
401  case 6:
402  return get_scores_8b_feat_6(s, i, senone_scores,
403  senone_active, n_senone_active);
404  case 5:
405  return get_scores_8b_feat_5(s, i, senone_scores,
406  senone_active, n_senone_active);
407  case 4:
408  return get_scores_8b_feat_4(s, i, senone_scores,
409  senone_active, n_senone_active);
410  case 3:
411  return get_scores_8b_feat_3(s, i, senone_scores,
412  senone_active, n_senone_active);
413  case 2:
414  return get_scores_8b_feat_2(s, i, senone_scores,
415  senone_active, n_senone_active);
416  case 1:
417  return get_scores_8b_feat_1(s, i, senone_scores,
418  senone_active, n_senone_active);
419  default:
420  return get_scores_8b_feat_any(s, i, topn, senone_scores,
421  senone_active, n_senone_active);
422  }
423 }
424 
425 static int32
426 get_scores_8b_feat_all(s2_semi_mgau_t * s, int i, int topn, int16 *senone_scores)
427 {
428  int32 j, k;
429 
430  for (j = 0; j < s->n_sen; j++) {
431  uint8 *pid_cw;
432  int32 tmp;
433  pid_cw = s->mixw[i][s->f[i][0].codeword];
434  tmp = pid_cw[j] + s->f[i][0].score;
435  for (k = 1; k < topn; ++k) {
436  pid_cw = s->mixw[i][s->f[i][k].codeword];
437  tmp = fast_logmath_add(s->lmath_8b, tmp,
438  pid_cw[j] + s->f[i][k].score);
439  }
440  senone_scores[j] += tmp;
441  }
442  return 0;
443 }
444 
445 static int32
446 get_scores_4b_feat_6(s2_semi_mgau_t * s, int i,
447  int16 *senone_scores, uint8 *senone_active,
448  int32 n_senone_active)
449 {
450  int32 j, l;
451  uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3, *pid_cw4, *pid_cw5;
452  uint8 w_den[6][16];
453 
454  /* Precompute scaled densities. */
455  for (j = 0; j < 16; ++j) {
456  w_den[0][j] = s->mixw_cb[j] + s->f[i][0].score;
457  w_den[1][j] = s->mixw_cb[j] + s->f[i][1].score;
458  w_den[2][j] = s->mixw_cb[j] + s->f[i][2].score;
459  w_den[3][j] = s->mixw_cb[j] + s->f[i][3].score;
460  w_den[4][j] = s->mixw_cb[j] + s->f[i][4].score;
461  w_den[5][j] = s->mixw_cb[j] + s->f[i][5].score;
462  }
463 
464  pid_cw0 = s->mixw[i][s->f[i][0].codeword];
465  pid_cw1 = s->mixw[i][s->f[i][1].codeword];
466  pid_cw2 = s->mixw[i][s->f[i][2].codeword];
467  pid_cw3 = s->mixw[i][s->f[i][3].codeword];
468  pid_cw4 = s->mixw[i][s->f[i][4].codeword];
469  pid_cw5 = s->mixw[i][s->f[i][5].codeword];
470 
471  for (l = j = 0; j < n_senone_active; j++) {
472  int n = senone_active[j] + l;
473  int tmp, cw;
474 
475  if (n & 1) {
476  cw = pid_cw0[n/2] >> 4;
477  tmp = w_den[0][cw];
478  cw = pid_cw1[n/2] >> 4;
479  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
480  cw = pid_cw2[n/2] >> 4;
481  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
482  cw = pid_cw3[n/2] >> 4;
483  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]);
484  cw = pid_cw4[n/2] >> 4;
485  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[4][cw]);
486  cw = pid_cw5[n/2] >> 4;
487  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[5][cw]);
488  }
489  else {
490  cw = pid_cw0[n/2] & 0x0f;
491  tmp = w_den[0][cw];
492  cw = pid_cw1[n/2] & 0x0f;
493  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
494  cw = pid_cw2[n/2] & 0x0f;
495  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
496  cw = pid_cw3[n/2] & 0x0f;
497  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]);
498  cw = pid_cw4[n/2] & 0x0f;
499  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[4][cw]);
500  cw = pid_cw5[n/2] & 0x0f;
501  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[5][cw]);
502  }
503  senone_scores[n] += tmp;
504  l = n;
505  }
506  return 0;
507 }
508 
509 static int32
510 get_scores_4b_feat_5(s2_semi_mgau_t * s, int i,
511  int16 *senone_scores, uint8 *senone_active,
512  int32 n_senone_active)
513 {
514  int32 j, l;
515  uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3, *pid_cw4;
516  uint8 w_den[5][16];
517 
518  /* Precompute scaled densities. */
519  for (j = 0; j < 16; ++j) {
520  w_den[0][j] = s->mixw_cb[j] + s->f[i][0].score;
521  w_den[1][j] = s->mixw_cb[j] + s->f[i][1].score;
522  w_den[2][j] = s->mixw_cb[j] + s->f[i][2].score;
523  w_den[3][j] = s->mixw_cb[j] + s->f[i][3].score;
524  w_den[4][j] = s->mixw_cb[j] + s->f[i][4].score;
525  }
526 
527  pid_cw0 = s->mixw[i][s->f[i][0].codeword];
528  pid_cw1 = s->mixw[i][s->f[i][1].codeword];
529  pid_cw2 = s->mixw[i][s->f[i][2].codeword];
530  pid_cw3 = s->mixw[i][s->f[i][3].codeword];
531  pid_cw4 = s->mixw[i][s->f[i][4].codeword];
532 
533  for (l = j = 0; j < n_senone_active; j++) {
534  int n = senone_active[j] + l;
535  int tmp, cw;
536 
537  if (n & 1) {
538  cw = pid_cw0[n/2] >> 4;
539  tmp = w_den[0][cw];
540  cw = pid_cw1[n/2] >> 4;
541  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
542  cw = pid_cw2[n/2] >> 4;
543  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
544  cw = pid_cw3[n/2] >> 4;
545  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]);
546  cw = pid_cw4[n/2] >> 4;
547  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[4][cw]);
548  }
549  else {
550  cw = pid_cw0[n/2] & 0x0f;
551  tmp = w_den[0][cw];
552  cw = pid_cw1[n/2] & 0x0f;
553  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
554  cw = pid_cw2[n/2] & 0x0f;
555  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
556  cw = pid_cw3[n/2] & 0x0f;
557  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]);
558  cw = pid_cw4[n/2] & 0x0f;
559  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[4][cw]);
560  }
561  senone_scores[n] += tmp;
562  l = n;
563  }
564  return 0;
565 }
566 
567 static int32
568 get_scores_4b_feat_4(s2_semi_mgau_t * s, int i,
569  int16 *senone_scores, uint8 *senone_active,
570  int32 n_senone_active)
571 {
572  int32 j, l;
573  uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3;
574  uint8 w_den[4][16];
575 
576  /* Precompute scaled densities. */
577  for (j = 0; j < 16; ++j) {
578  w_den[0][j] = s->mixw_cb[j] + s->f[i][0].score;
579  w_den[1][j] = s->mixw_cb[j] + s->f[i][1].score;
580  w_den[2][j] = s->mixw_cb[j] + s->f[i][2].score;
581  w_den[3][j] = s->mixw_cb[j] + s->f[i][3].score;
582  }
583 
584  pid_cw0 = s->mixw[i][s->f[i][0].codeword];
585  pid_cw1 = s->mixw[i][s->f[i][1].codeword];
586  pid_cw2 = s->mixw[i][s->f[i][2].codeword];
587  pid_cw3 = s->mixw[i][s->f[i][3].codeword];
588 
589  for (l = j = 0; j < n_senone_active; j++) {
590  int n = senone_active[j] + l;
591  int tmp, cw;
592 
593  if (n & 1) {
594  cw = pid_cw0[n/2] >> 4;
595  tmp = w_den[0][cw];
596  cw = pid_cw1[n/2] >> 4;
597  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
598  cw = pid_cw2[n/2] >> 4;
599  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
600  cw = pid_cw3[n/2] >> 4;
601  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]);
602  }
603  else {
604  cw = pid_cw0[n/2] & 0x0f;
605  tmp = w_den[0][cw];
606  cw = pid_cw1[n/2] & 0x0f;
607  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
608  cw = pid_cw2[n/2] & 0x0f;
609  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
610  cw = pid_cw3[n/2] & 0x0f;
611  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]);
612  }
613  senone_scores[n] += tmp;
614  l = n;
615  }
616  return 0;
617 }
618 
619 static int32
620 get_scores_4b_feat_3(s2_semi_mgau_t * s, int i,
621  int16 *senone_scores, uint8 *senone_active,
622  int32 n_senone_active)
623 {
624  int32 j, l;
625  uint8 *pid_cw0, *pid_cw1, *pid_cw2;
626  uint8 w_den[3][16];
627 
628  /* Precompute scaled densities. */
629  for (j = 0; j < 16; ++j) {
630  w_den[0][j] = s->mixw_cb[j] + s->f[i][0].score;
631  w_den[1][j] = s->mixw_cb[j] + s->f[i][1].score;
632  w_den[2][j] = s->mixw_cb[j] + s->f[i][2].score;
633  }
634 
635  pid_cw0 = s->mixw[i][s->f[i][0].codeword];
636  pid_cw1 = s->mixw[i][s->f[i][1].codeword];
637  pid_cw2 = s->mixw[i][s->f[i][2].codeword];
638 
639  for (l = j = 0; j < n_senone_active; j++) {
640  int n = senone_active[j] + l;
641  int tmp, cw;
642 
643  if (n & 1) {
644  cw = pid_cw0[n/2] >> 4;
645  tmp = w_den[0][cw];
646  cw = pid_cw1[n/2] >> 4;
647  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
648  cw = pid_cw2[n/2] >> 4;
649  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
650  }
651  else {
652  cw = pid_cw0[n/2] & 0x0f;
653  tmp = w_den[0][cw];
654  cw = pid_cw1[n/2] & 0x0f;
655  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
656  cw = pid_cw2[n/2] & 0x0f;
657  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
658  }
659  senone_scores[n] += tmp;
660  l = n;
661  }
662  return 0;
663 }
664 
665 static int32
666 get_scores_4b_feat_2(s2_semi_mgau_t * s, int i,
667  int16 *senone_scores, uint8 *senone_active,
668  int32 n_senone_active)
669 {
670  int32 j, l;
671  uint8 *pid_cw0, *pid_cw1;
672  uint8 w_den[2][16];
673 
674  /* Precompute scaled densities. */
675  for (j = 0; j < 16; ++j) {
676  w_den[0][j] = s->mixw_cb[j] + s->f[i][0].score;
677  w_den[1][j] = s->mixw_cb[j] + s->f[i][1].score;
678  }
679 
680  pid_cw0 = s->mixw[i][s->f[i][0].codeword];
681  pid_cw1 = s->mixw[i][s->f[i][1].codeword];
682 
683  for (l = j = 0; j < n_senone_active; j++) {
684  int n = senone_active[j] + l;
685  int tmp, cw;
686 
687  if (n & 1) {
688  cw = pid_cw0[n/2] >> 4;
689  tmp = w_den[0][cw];
690  cw = pid_cw1[n/2] >> 4;
691  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
692  }
693  else {
694  cw = pid_cw0[n/2] & 0x0f;
695  tmp = w_den[0][cw];
696  cw = pid_cw1[n/2] & 0x0f;
697  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
698  }
699  senone_scores[n] += tmp;
700  l = n;
701  }
702  return 0;
703 }
704 
705 static int32
706 get_scores_4b_feat_1(s2_semi_mgau_t * s, int i,
707  int16 *senone_scores, uint8 *senone_active,
708  int32 n_senone_active)
709 {
710  int32 j, l;
711  uint8 *pid_cw0;
712  uint8 w_den[16];
713 
714  /* Precompute scaled densities. */
715  for (j = 0; j < 16; ++j) {
716  w_den[j] = s->mixw_cb[j] + s->f[i][0].score;
717  }
718 
719  pid_cw0 = s->mixw[i][s->f[i][0].codeword];
720 
721  for (l = j = 0; j < n_senone_active; j++) {
722  int n = senone_active[j] + l;
723  int tmp, cw;
724 
725  if (n & 1) {
726  cw = pid_cw0[n/2] >> 4;
727  tmp = w_den[cw];
728  }
729  else {
730  cw = pid_cw0[n/2] & 0x0f;
731  tmp = w_den[cw];
732  }
733  senone_scores[n] += tmp;
734  l = n;
735  }
736  return 0;
737 }
738 
739 static int32
740 get_scores_4b_feat_any(s2_semi_mgau_t * s, int i, int topn,
741  int16 *senone_scores, uint8 *senone_active,
742  int32 n_senone_active)
743 {
744  int32 j, k, l;
745 
746  for (l = j = 0; j < n_senone_active; j++) {
747  int n = senone_active[j] + l;
748  int tmp, cw;
749  uint8 *pid_cw;
750 
751  pid_cw = s->mixw[i][s->f[i][0].codeword];
752  if (n & 1)
753  cw = pid_cw[n/2] >> 4;
754  else
755  cw = pid_cw[n/2] & 0x0f;
756  tmp = s->mixw_cb[cw] + s->f[i][0].score;
757  for (k = 1; k < topn; ++k) {
758  pid_cw = s->mixw[i][s->f[i][k].codeword];
759  if (n & 1)
760  cw = pid_cw[n/2] >> 4;
761  else
762  cw = pid_cw[n/2] & 0x0f;
763  tmp = fast_logmath_add(s->lmath_8b, tmp,
764  s->mixw_cb[cw] + s->f[i][k].score);
765  }
766  senone_scores[n] += tmp;
767  l = n;
768  }
769  return 0;
770 }
771 
772 static int32
773 get_scores_4b_feat(s2_semi_mgau_t * s, int i, int topn,
774  int16 *senone_scores, uint8 *senone_active, int32 n_senone_active)
775 {
776  switch (topn) {
777  case 6:
778  return get_scores_4b_feat_6(s, i, senone_scores,
779  senone_active, n_senone_active);
780  case 5:
781  return get_scores_4b_feat_5(s, i, senone_scores,
782  senone_active, n_senone_active);
783  case 4:
784  return get_scores_4b_feat_4(s, i, senone_scores,
785  senone_active, n_senone_active);
786  case 3:
787  return get_scores_4b_feat_3(s, i, senone_scores,
788  senone_active, n_senone_active);
789  case 2:
790  return get_scores_4b_feat_2(s, i, senone_scores,
791  senone_active, n_senone_active);
792  case 1:
793  return get_scores_4b_feat_1(s, i, senone_scores,
794  senone_active, n_senone_active);
795  default:
796  return get_scores_4b_feat_any(s, i, topn, senone_scores,
797  senone_active, n_senone_active);
798  }
799 }
800 
801 static int32
802 get_scores_4b_feat_all(s2_semi_mgau_t * s, int i, int topn, int16 *senone_scores)
803 {
804  int j, last_sen;
805 
806  j = 0;
807  /* Number of senones is always even, but don't overrun if it isn't. */
808  last_sen = s->n_sen & ~1;
809  while (j < last_sen) {
810  uint8 *pid_cw;
811  int32 tmp0, tmp1;
812  int k;
813 
814  pid_cw = s->mixw[i][s->f[i][0].codeword];
815  tmp0 = s->mixw_cb[pid_cw[j/2] & 0x0f] + s->f[i][0].score;
816  tmp1 = s->mixw_cb[pid_cw[j/2] >> 4] + s->f[i][0].score;
817  for (k = 1; k < topn; ++k) {
818  int32 w_den0, w_den1;
819 
820  pid_cw = s->mixw[i][s->f[i][k].codeword];
821  w_den0 = s->mixw_cb[pid_cw[j/2] & 0x0f] + s->f[i][k].score;
822  w_den1 = s->mixw_cb[pid_cw[j/2] >> 4] + s->f[i][k].score;
823  tmp0 = fast_logmath_add(s->lmath_8b, tmp0, w_den0);
824  tmp1 = fast_logmath_add(s->lmath_8b, tmp1, w_den1);
825  }
826  senone_scores[j++] += tmp0;
827  senone_scores[j++] += tmp1;
828  }
829  return 0;
830 }
831 
832 /*
833  * Compute senone scores for the active senones.
834  */
835 int32
836 s2_semi_mgau_frame_eval(ps_mgau_t *ps,
837  int16 *senone_scores,
838  uint8 *senone_active,
839  int32 n_senone_active,
840  mfcc_t ** featbuf, int32 frame,
841  int32 compallsen)
842 {
843  s2_semi_mgau_t *s = (s2_semi_mgau_t *)ps;
844  int i, topn_idx;
845  int n_feat = s->g->n_feat;
846 
847  memset(senone_scores, 0, s->n_sen * sizeof(*senone_scores));
848  /* No bounds checking is done here, which just means you'll get
849  * semi-random crap if you request a frame in the future or one
850  * that's too far in the past. */
851  topn_idx = frame % s->n_topn_hist;
852  s->f = s->topn_hist[topn_idx];
853  for (i = 0; i < n_feat; ++i) {
854  /* For past frames this will already be computed. */
855  if (frame >= ps_mgau_base(ps)->frame_idx) {
856  vqFeature_t **lastf;
857  if (topn_idx == 0)
858  lastf = s->topn_hist[s->n_topn_hist-1];
859  else
860  lastf = s->topn_hist[topn_idx-1];
861  memcpy(s->f[i], lastf[i], sizeof(vqFeature_t) * s->max_topn);
862  mgau_dist(s, frame, i, featbuf[i]);
863  s->topn_hist_n[topn_idx][i] = mgau_norm(s, i);
864  }
865  if (s->mixw_cb) {
866  if (compallsen)
867  get_scores_4b_feat_all(s, i, s->topn_hist_n[topn_idx][i], senone_scores);
868  else
869  get_scores_4b_feat(s, i, s->topn_hist_n[topn_idx][i], senone_scores,
870  senone_active, n_senone_active);
871  }
872  else {
873  if (compallsen)
874  get_scores_8b_feat_all(s, i, s->topn_hist_n[topn_idx][i], senone_scores);
875  else
876  get_scores_8b_feat(s, i, s->topn_hist_n[topn_idx][i], senone_scores,
877  senone_active, n_senone_active);
878  }
879  }
880 
881  return 0;
882 }
883 
884 static int32
885 read_sendump(s2_semi_mgau_t *s, bin_mdef_t *mdef, char const *file)
886 {
887  FILE *fp;
888  char line[1000];
889  int32 i, n, r, c;
890  int32 do_swap, do_mmap;
891  size_t offset;
892  int n_clust = 0;
893  int n_feat = s->g->n_feat;
894  int n_density = s->g->n_density;
895  int n_sen = bin_mdef_n_sen(mdef);
896  int n_bits = 8;
897 
898  s->n_sen = n_sen; /* FIXME: Should have been done earlier */
899  do_mmap = cmd_ln_boolean_r(s->config, "-mmap");
900 
901  if ((fp = fopen(file, "rb")) == NULL)
902  return -1;
903 
904  E_INFO("Loading senones from dump file %s\n", file);
905  /* Read title size, title */
906  if (fread(&n, sizeof(int32), 1, fp) != 1) {
907  E_ERROR_SYSTEM("Failed to read title size from %s", file);
908  goto error_out;
909  }
910  /* This is extremely bogus */
911  do_swap = 0;
912  if (n < 1 || n > 999) {
913  SWAP_INT32(&n);
914  if (n < 1 || n > 999) {
915  E_ERROR("Title length %x in dump file %s out of range\n", n, file);
916  goto error_out;
917  }
918  do_swap = 1;
919  }
920  if (fread(line, sizeof(char), n, fp) != n) {
921  E_ERROR_SYSTEM("Cannot read title");
922  goto error_out;
923  }
924  if (line[n - 1] != '\0') {
925  E_ERROR("Bad title in dump file\n");
926  goto error_out;
927  }
928  E_INFO("%s\n", line);
929 
930  /* Read header size, header */
931  if (fread(&n, sizeof(n), 1, fp) != 1) {
932  E_ERROR_SYSTEM("Failed to read header size from %s", file);
933  goto error_out;
934  }
935  if (do_swap) SWAP_INT32(&n);
936  if (fread(line, sizeof(char), n, fp) != n) {
937  E_ERROR_SYSTEM("Cannot read header");
938  goto error_out;
939  }
940  if (line[n - 1] != '\0') {
941  E_ERROR("Bad header in dump file\n");
942  goto error_out;
943  }
944 
945  /* Read other header strings until string length = 0 */
946  for (;;) {
947  if (fread(&n, sizeof(n), 1, fp) != 1) {
948  E_ERROR_SYSTEM("Failed to read header string size from %s", file);
949  goto error_out;
950  }
951  if (do_swap) SWAP_INT32(&n);
952  if (n == 0)
953  break;
954  if (fread(line, sizeof(char), n, fp) != n) {
955  E_ERROR_SYSTEM("Cannot read header");
956  goto error_out;
957  }
958  /* Look for a cluster count, if present */
959  if (!strncmp(line, "feature_count ", strlen("feature_count "))) {
960  n_feat = atoi(line + strlen("feature_count "));
961  }
962  if (!strncmp(line, "mixture_count ", strlen("mixture_count "))) {
963  n_density = atoi(line + strlen("mixture_count "));
964  }
965  if (!strncmp(line, "model_count ", strlen("model_count "))) {
966  n_sen = atoi(line + strlen("model_count "));
967  }
968  if (!strncmp(line, "cluster_count ", strlen("cluster_count "))) {
969  n_clust = atoi(line + strlen("cluster_count "));
970  }
971  if (!strncmp(line, "cluster_bits ", strlen("cluster_bits "))) {
972  n_bits = atoi(line + strlen("cluster_bits "));
973  }
974  }
975 
976  /* Defaults for #rows, #columns in mixw array. */
977  c = n_sen;
978  r = n_density;
979  if (n_clust == 0) {
980  /* Older mixw files have them here, and they might be padded. */
981  if (fread(&r, sizeof(r), 1, fp) != 1) {
982  E_ERROR_SYSTEM("Cannot read #rows");
983  goto error_out;
984  }
985  if (do_swap) SWAP_INT32(&r);
986  if (fread(&c, sizeof(c), 1, fp) != 1) {
987  E_ERROR_SYSTEM("Cannot read #columns");
988  goto error_out;
989  }
990  if (do_swap) SWAP_INT32(&c);
991  E_INFO("Rows: %d, Columns: %d\n", r, c);
992  }
993 
994  if (n_feat != s->g->n_feat) {
995  E_ERROR("Number of feature streams mismatch: %d != %d\n",
996  n_feat, s->g->n_feat);
997  goto error_out;
998  }
999  if (n_density != s->g->n_density) {
1000  E_ERROR("Number of densities mismatch: %d != %d\n",
1001  n_density, s->g->n_density);
1002  goto error_out;
1003  }
1004  if (n_sen != s->n_sen) {
1005  E_ERROR("Number of senones mismatch: %d != %d\n",
1006  n_sen, s->n_sen);
1007  goto error_out;
1008  }
1009 
1010  if (!((n_clust == 0) || (n_clust == 15) || (n_clust == 16))) {
1011  E_ERROR("Cluster count must be 0, 15, or 16\n");
1012  goto error_out;
1013  }
1014  if (n_clust == 15)
1015  ++n_clust;
1016 
1017  if (!((n_bits == 8) || (n_bits == 4))) {
1018  E_ERROR("Cluster count must be 4 or 8\n");
1019  goto error_out;
1020  }
1021 
1022  if (do_mmap) {
1023  E_INFO("Using memory-mapped I/O for senones\n");
1024  }
1025  offset = ftell(fp);
1026 
1027  /* Allocate memory for pdfs (or memory map them) */
1028  if (do_mmap) {
1029  s->sendump_mmap = mmio_file_read(file);
1030  /* Get cluster codebook if any. */
1031  if (n_clust) {
1032  s->mixw_cb = ((uint8 *) mmio_file_ptr(s->sendump_mmap)) + offset;
1033  offset += n_clust;
1034  }
1035  }
1036  else {
1037  /* Get cluster codebook if any. */
1038  if (n_clust) {
1039  s->mixw_cb = ckd_calloc(1, n_clust);
1040  if (fread(s->mixw_cb, 1, n_clust, fp) != (size_t) n_clust) {
1041  E_ERROR("Failed to read %d bytes from sendump\n", n_clust);
1042  goto error_out;
1043  }
1044  }
1045  }
1046 
1047  /* Set up pointers, or read, or whatever */
1048  if (s->sendump_mmap) {
1049  s->mixw = ckd_calloc_2d(n_feat, n_density, sizeof(*s->mixw));
1050  for (n = 0; n < n_feat; n++) {
1051  int step = c;
1052  if (n_bits == 4)
1053  step = (step + 1) / 2;
1054  for (i = 0; i < r; i++) {
1055  s->mixw[n][i] = ((uint8 *) mmio_file_ptr(s->sendump_mmap)) + offset;
1056  offset += step;
1057  }
1058  }
1059  }
1060  else {
1061  s->mixw = ckd_calloc_3d(n_feat, n_density, n_sen, sizeof(***s->mixw));
1062  /* Read pdf values and ids */
1063  for (n = 0; n < n_feat; n++) {
1064  int step = c;
1065  if (n_bits == 4)
1066  step = (step + 1) / 2;
1067  for (i = 0; i < r; i++) {
1068  if (fread(s->mixw[n][i], sizeof(***s->mixw), step, fp)
1069  != (size_t) step) {
1070  E_ERROR("Failed to read %d bytes from sendump\n", step);
1071  goto error_out;
1072  }
1073  }
1074  }
1075  }
1076 
1077  fclose(fp);
1078  return 0;
1079 error_out:
1080  fclose(fp);
1081  return -1;
1082 }
1083 
1084 static int32
1085 read_mixw(s2_semi_mgau_t * s, char const *file_name, double SmoothMin)
1086 {
1087  char **argname, **argval;
1088  char eofchk;
1089  FILE *fp;
1090  int32 byteswap, chksum_present;
1091  uint32 chksum;
1092  float32 *pdf;
1093  int32 i, f, c, n;
1094  int32 n_sen;
1095  int32 n_feat;
1096  int32 n_comp;
1097  int32 n_err;
1098 
1099  E_INFO("Reading mixture weights file '%s'\n", file_name);
1100 
1101  if ((fp = fopen(file_name, "rb")) == NULL)
1102  E_FATAL_SYSTEM("Failed to open mixture weights file '%s' for reading", file_name);
1103 
1104  /* Read header, including argument-value info and 32-bit byteorder magic */
1105  if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0)
1106  E_FATAL("Failed to read header from file '%s'\n", file_name);
1107 
1108  /* Parse argument-value list */
1109  chksum_present = 0;
1110  for (i = 0; argname[i]; i++) {
1111  if (strcmp(argname[i], "version") == 0) {
1112  if (strcmp(argval[i], MGAU_MIXW_VERSION) != 0)
1113  E_WARN("Version mismatch(%s): %s, expecting %s\n",
1114  file_name, argval[i], MGAU_MIXW_VERSION);
1115  }
1116  else if (strcmp(argname[i], "chksum0") == 0) {
1117  chksum_present = 1; /* Ignore the associated value */
1118  }
1119  }
1120  bio_hdrarg_free(argname, argval);
1121  argname = argval = NULL;
1122 
1123  chksum = 0;
1124 
1125  /* Read #senones, #features, #codewords, arraysize */
1126  if ((bio_fread(&n_sen, sizeof(int32), 1, fp, byteswap, &chksum) != 1)
1127  || (bio_fread(&n_feat, sizeof(int32), 1, fp, byteswap, &chksum) !=
1128  1)
1129  || (bio_fread(&n_comp, sizeof(int32), 1, fp, byteswap, &chksum) !=
1130  1)
1131  || (bio_fread(&n, sizeof(int32), 1, fp, byteswap, &chksum) != 1)) {
1132  E_FATAL("bio_fread(%s) (arraysize) failed\n", file_name);
1133  }
1134  if (n_feat != s->g->n_feat)
1135  E_FATAL("#Features streams(%d) != %d\n", n_feat, s->g->n_feat);
1136  if (n != n_sen * n_feat * n_comp) {
1137  E_FATAL
1138  ("%s: #float32s(%d) doesn't match header dimensions: %d x %d x %d\n",
1139  file_name, i, n_sen, n_feat, n_comp);
1140  }
1141 
1142  /* n_sen = number of mixture weights per codeword, which is
1143  * fixed at the number of senones since we have only one codebook.
1144  */
1145  s->n_sen = n_sen;
1146 
1147  /* Quantized mixture weight arrays. */
1148  s->mixw = ckd_calloc_3d(n_feat, s->g->n_density, n_sen, sizeof(***s->mixw));
1149 
1150  /* Temporary structure to read in floats before conversion to (int32) logs3 */
1151  pdf = (float32 *) ckd_calloc(n_comp, sizeof(float32));
1152 
1153  /* Read senone probs data, normalize, floor, convert to logs3, truncate to 8 bits */
1154  n_err = 0;
1155  for (i = 0; i < n_sen; i++) {
1156  for (f = 0; f < n_feat; f++) {
1157  if (bio_fread((void *) pdf, sizeof(float32),
1158  n_comp, fp, byteswap, &chksum) != n_comp) {
1159  E_FATAL("bio_fread(%s) (arraydata) failed\n", file_name);
1160  }
1161 
1162  /* Normalize and floor */
1163  if (vector_sum_norm(pdf, n_comp) <= 0.0)
1164  n_err++;
1165  vector_floor(pdf, n_comp, SmoothMin);
1166  vector_sum_norm(pdf, n_comp);
1167 
1168  /* Convert to LOG, quantize, and transpose */
1169  for (c = 0; c < n_comp; c++) {
1170  int32 qscr;
1171 
1172  qscr = -logmath_log(s->lmath_8b, pdf[c]);
1173  if ((qscr > MAX_NEG_MIXW) || (qscr < 0))
1174  qscr = MAX_NEG_MIXW;
1175  s->mixw[f][c][i] = qscr;
1176  }
1177  }
1178  }
1179  if (n_err > 0)
1180  E_WARN("Weight normalization failed for %d mixture weights components\n", n_err);
1181 
1182  ckd_free(pdf);
1183 
1184  if (chksum_present)
1185  bio_verify_chksum(fp, byteswap, chksum);
1186 
1187  if (fread(&eofchk, 1, 1, fp) == 1)
1188  E_FATAL("More data than expected in %s\n", file_name);
1189 
1190  fclose(fp);
1191 
1192  E_INFO("Read %d x %d x %d mixture weights\n", n_sen, n_feat, n_comp);
1193  return n_sen;
1194 }
1195 
1196 
1197 static int
1198 split_topn(char const *str, uint8 *out, int nfeat)
1199 {
1200  char *topn_list = ckd_salloc(str);
1201  char *c, *cc;
1202  int i, maxn;
1203 
1204  c = topn_list;
1205  i = 0;
1206  maxn = 0;
1207  while (i < nfeat && (cc = strchr(c, ',')) != NULL) {
1208  *cc = '\0';
1209  out[i] = atoi(c);
1210  if (out[i] > maxn) maxn = out[i];
1211  c = cc + 1;
1212  ++i;
1213  }
1214  if (i < nfeat && *c != '\0') {
1215  out[i] = atoi(c);
1216  if (out[i] > maxn) maxn = out[i];
1217  ++i;
1218  }
1219  while (i < nfeat)
1220  out[i++] = maxn;
1221 
1222  ckd_free(topn_list);
1223  return maxn;
1224 }
1225 
1226 
1227 ps_mgau_t *
1228 s2_semi_mgau_init(acmod_t *acmod)
1229 {
1230  s2_semi_mgau_t *s;
1231  ps_mgau_t *ps;
1232  char const *sendump_path;
1233  int i;
1234  int n_feat;
1235 
1236  s = ckd_calloc(1, sizeof(*s));
1237  s->config = acmod->config;
1238 
1239  s->lmath = logmath_retain(acmod->lmath);
1240  /* Log-add table. */
1241  s->lmath_8b = logmath_init(logmath_get_base(acmod->lmath), SENSCR_SHIFT, TRUE);
1242  if (s->lmath_8b == NULL)
1243  goto error_out;
1244  /* Ensure that it is only 8 bits wide so that fast_logmath_add() works. */
1245  if (logmath_get_width(s->lmath_8b) != 1) {
1246  E_ERROR("Log base %f is too small to represent add table in 8 bits\n",
1247  logmath_get_base(s->lmath_8b));
1248  goto error_out;
1249  }
1250 
1251  /* Read means and variances. */
1252  if ((s->g = gauden_init(cmd_ln_str_r(s->config, "_mean"),
1253  cmd_ln_str_r(s->config, "_var"),
1254  cmd_ln_float32_r(s->config, "-varfloor"),
1255  s->lmath)) == NULL) {
1256  E_ERROR("Failed to read means and variances\n");
1257  goto error_out;
1258  }
1259 
1260  /* Currently only a single codebook is supported. */
1261  if (s->g->n_mgau != 1)
1262  goto error_out;
1263 
1264  n_feat = s->g->n_feat;
1265 
1266  /* Verify n_feat and veclen, against acmod. */
1267  if (n_feat != feat_dimension1(acmod->fcb)) {
1268  E_ERROR("Number of streams does not match: %d != %d\n",
1269  n_feat, feat_dimension1(acmod->fcb));
1270  goto error_out;
1271  }
1272  for (i = 0; i < n_feat; ++i) {
1273  if (s->g->featlen[i] != feat_dimension2(acmod->fcb, i)) {
1274  E_ERROR("Dimension of stream %d does not match: %d != %d\n",
1275  i, s->g->featlen[i], feat_dimension2(acmod->fcb, i));
1276  goto error_out;
1277  }
1278  }
1279  /* Read mixture weights */
1280  if ((sendump_path = cmd_ln_str_r(s->config, "_sendump"))) {
1281  if (read_sendump(s, acmod->mdef, sendump_path) < 0) {
1282  goto error_out;
1283  }
1284  }
1285  else {
1286  if (read_mixw(s, cmd_ln_str_r(s->config, "_mixw"),
1287  cmd_ln_float32_r(s->config, "-mixwfloor")) < 0) {
1288  goto error_out;
1289  }
1290  }
1291  s->ds_ratio = cmd_ln_int32_r(s->config, "-ds");
1292 
1293  /* Determine top-N for each feature */
1294  s->topn_beam = ckd_calloc(n_feat, sizeof(*s->topn_beam));
1295  s->max_topn = cmd_ln_int32_r(s->config, "-topn");
1296  split_topn(cmd_ln_str_r(s->config, "-topn_beam"), s->topn_beam, n_feat);
1297  E_INFO("Maximum top-N: %d ", s->max_topn);
1298  E_INFOCONT("Top-N beams:");
1299  for (i = 0; i < n_feat; ++i) {
1300  E_INFOCONT(" %d", s->topn_beam[i]);
1301  }
1302  E_INFOCONT("\n");
1303 
1304  /* Top-N scores from recent frames */
1305  s->n_topn_hist = cmd_ln_int32_r(s->config, "-pl_window") + 2;
1306  s->topn_hist = (vqFeature_t ***)
1307  ckd_calloc_3d(s->n_topn_hist, n_feat, s->max_topn,
1308  sizeof(***s->topn_hist));
1309  s->topn_hist_n = ckd_calloc_2d(s->n_topn_hist, n_feat,
1310  sizeof(**s->topn_hist_n));
1311  for (i = 0; i < s->n_topn_hist; ++i) {
1312  int j;
1313  for (j = 0; j < n_feat; ++j) {
1314  int k;
1315  for (k = 0; k < s->max_topn; ++k) {
1316  s->topn_hist[i][j][k].score = WORST_DIST;
1317  s->topn_hist[i][j][k].codeword = k;
1318  }
1319  }
1320  }
1321 
1322  ps = (ps_mgau_t *)s;
1323  ps->vt = &s2_semi_mgau_funcs;
1324  return ps;
1325 error_out:
1326  s2_semi_mgau_free(ps_mgau_base(s));
1327  return NULL;
1328 }
1329 
1330 int
1331 s2_semi_mgau_mllr_transform(ps_mgau_t *ps,
1332  ps_mllr_t *mllr)
1333 {
1334  s2_semi_mgau_t *s = (s2_semi_mgau_t *)ps;
1335  return gauden_mllr_transform(s->g, mllr, s->config);
1336 }
1337 
1338 void
1339 s2_semi_mgau_free(ps_mgau_t *ps)
1340 {
1341  s2_semi_mgau_t *s = (s2_semi_mgau_t *)ps;
1342 
1343  logmath_free(s->lmath);
1344  logmath_free(s->lmath_8b);
1345  if (s->sendump_mmap) {
1346  ckd_free_2d(s->mixw);
1347  mmio_file_unmap(s->sendump_mmap);
1348  }
1349  else {
1350  ckd_free_3d(s->mixw);
1351  if (s->mixw_cb)
1352  ckd_free(s->mixw_cb);
1353  }
1354  gauden_free(s->g);
1355  ckd_free(s->topn_beam);
1356  ckd_free_2d(s->topn_hist_n);
1357  ckd_free_3d((void **)s->topn_hist);
1358  ckd_free(s);
1359 }
int32 n_density
Number gaussian densities in each codebook-feature stream.
Definition: ms_gauden.h:90
void gauden_free(gauden_t *g)
Release memory allocated by gauden_init.
Definition: ms_gauden.c:358
mfcc_t *** det
log(determinant) for each variance vector; actually, log(sqrt(2*pi*det))
Definition: ms_gauden.h:85
logmath_t * lmath
Log-math computation.
Definition: acmod.h:151
int n_topn_hist
Number of past frames tracked.
Definition: s2_semi_mgau.h:77
vqFeature_t *** topn_hist
Top-N scores and codewords for past frames.
Definition: s2_semi_mgau.h:74
int32 gauden_mllr_transform(gauden_t *s, ps_mllr_t *mllr, cmd_ln_t *config)
Transform Gaussians according to an MLLR matrix (or, eventually, more).
Definition: ms_gauden.c:509
gauden_t * gauden_init(char const *meanfile, char const *varfile, float32 varfloor, logmath_t *lmath)
Read mixture gaussian codebooks from the given files.
Definition: ms_gauden.c:311
cmd_ln_t * config
Configuration.
Definition: acmod.h:150
int32 * featlen
feature length for each feature
Definition: ms_gauden.h:91
#define GMMSUB(a, b)
Subtract GMM component b (assumed to be positive) and saturate.
int32 n_mgau
Number codebooks.
Definition: ms_gauden.h:88
Feature space linear transform structure.
Definition: acmod.h:82
#define SENSCR_SHIFT
Shift count for senone scores.
Definition: hmm.h:73
mfcc_t **** mean
mean[codebook][feature][codeword] vector
Definition: ms_gauden.h:83
feat_t * fcb
Dynamic feature computation.
Definition: acmod.h:156
int32 n_feat
Number feature streams in each codebook.
Definition: ms_gauden.h:89
uint8 ** topn_hist_n
Variable top-N for past frames.
Definition: s2_semi_mgau.h:75
ps_mgaufuncs_t * vt
vtable of mgau functions.
Definition: acmod.h:114
LOGMATH_INLINE int fast_logmath_add(logmath_t *lmath, int mlx, int mly)
Quickly log-add two negated log probabilities.
bin_mdef_t * mdef
Model definition.
Definition: acmod.h:159
#define MAX_NEG_ASCR
Maximum negated acoustic score value.
vqFeature_t ** f
Topn-N for currently scoring frame.
Definition: s2_semi_mgau.h:76
#define MAX_NEG_MIXW
Maximum negated mixture weight value.
Acoustic model structure.
Definition: acmod.h:148
mfcc_t **** var
like mean; diagonal covariance vector only
Definition: ms_gauden.h:84
Common code shared between SC and PTM (tied-state) models.