PocketSphinx  5prealpha
ms_gauden.c
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
38 #include <assert.h>
39 #include <string.h>
40 #include <math.h>
41 #include <float.h>
42 
43 #include <sphinxbase/bio.h>
44 #include <sphinxbase/err.h>
45 #include <sphinxbase/ckd_alloc.h>
46 
47 #include "ms_gauden.h"
48 
49 #define GAUDEN_PARAM_VERSION "1.0"
50 
51 #ifndef M_PI
52 #define M_PI 3.1415926535897932385e0
53 #endif
54 
55 #define WORST_DIST (int32)(0x80000000)
56 
57 void
59 {
60  int32 c;
61 
62  for (c = 0; c < g->n_mgau; c++)
63  gauden_dump_ind(g, c);
64 }
65 
66 
67 void
68 gauden_dump_ind(const gauden_t * g, int senidx)
69 {
70  int32 f, d, i;
71 
72  for (f = 0; f < g->n_feat; f++) {
73  E_INFO("Codebook %d, Feature %d (%dx%d):\n",
74  senidx, f, g->n_density, g->featlen[f]);
75 
76  for (d = 0; d < g->n_density; d++) {
77  printf("m[%3d]", d);
78  for (i = 0; i < g->featlen[f]; i++)
79  printf(" %7.4f", MFCC2FLOAT(g->mean[senidx][f][d][i]));
80  printf("\n");
81  }
82  printf("\n");
83 
84  for (d = 0; d < g->n_density; d++) {
85  printf("v[%3d]", d);
86  for (i = 0; i < g->featlen[f]; i++)
87  printf(" %d", (int)g->var[senidx][f][d][i]);
88  printf("\n");
89  }
90  printf("\n");
91 
92  for (d = 0; d < g->n_density; d++)
93  printf("d[%3d] %d\n", d, (int)g->det[senidx][f][d]);
94  }
95  fflush(stderr);
96 }
97 
107 static float ****
108 gauden_param_read(const char *file_name,
109  int32 * out_n_mgau,
110  int32 * out_n_feat,
111  int32 * out_n_density,
112  int32 ** out_veclen)
113 {
114  char tmp;
115  FILE *fp;
116  int32 i, j, k, l, n, blk;
117  int32 n_mgau;
118  int32 n_feat;
119  int32 n_density;
120  int32 *veclen;
121  int32 byteswap, chksum_present;
122  float32 ****out;
123  float32 *buf;
124  char **argname, **argval;
125  uint32 chksum;
126 
127  E_INFO("Reading mixture gaussian parameter: %s\n", file_name);
128 
129  if ((fp = fopen(file_name, "rb")) == NULL) {
130  E_ERROR_SYSTEM("Failed to open file '%s' for reading", file_name);
131  return NULL;
132  }
133 
134  /* Read header, including argument-value info and 32-bit byteorder magic */
135  if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0) {
136  E_ERROR("Failed to read header from file '%s'\n", file_name);
137  fclose(fp);
138  return NULL;
139  }
140 
141  /* Parse argument-value list */
142  chksum_present = 0;
143  for (i = 0; argname[i]; i++) {
144  if (strcmp(argname[i], "version") == 0) {
145  if (strcmp(argval[i], GAUDEN_PARAM_VERSION) != 0)
146  E_WARN("Version mismatch(%s): %s, expecting %s\n",
147  file_name, argval[i], GAUDEN_PARAM_VERSION);
148  }
149  else if (strcmp(argname[i], "chksum0") == 0) {
150  chksum_present = 1; /* Ignore the associated value */
151  }
152  }
153  bio_hdrarg_free(argname, argval);
154  argname = argval = NULL;
155 
156  chksum = 0;
157 
158  /* #Codebooks */
159  if (bio_fread(&n_mgau, sizeof(int32), 1, fp, byteswap, &chksum) != 1) {
160  E_ERROR("Failed to read number fo codebooks from %s\n", file_name);
161  fclose(fp);
162  return NULL;
163  }
164  *out_n_mgau = n_mgau;
165 
166  /* #Features/codebook */
167  if (bio_fread(&n_feat, sizeof(int32), 1, fp, byteswap, &chksum) != 1) {
168  E_ERROR("Failed to read number of features from %s\n", file_name);
169  fclose(fp);
170  return NULL;
171  }
172  *out_n_feat = n_feat;
173 
174  /* #Gaussian densities/feature in each codebook */
175  if (bio_fread(&n_density, sizeof(int32), 1, fp, byteswap, &chksum) != 1) {
176  E_ERROR("fread(%s) (#density/codebook) failed\n", file_name);
177  }
178  *out_n_density = n_density;
179 
180  /* #Dimensions in each feature stream */
181  veclen = ckd_calloc(n_feat, sizeof(uint32));
182  *out_veclen = veclen;
183  if (bio_fread(veclen, sizeof(int32), n_feat, fp, byteswap, &chksum) !=
184  n_feat) {
185  E_ERROR("fread(%s) (feature-lengths) failed\n", file_name);
186  fclose(fp);
187  return NULL;
188  }
189 
190  /* blk = total vector length of all feature streams */
191  for (i = 0, blk = 0; i < n_feat; i++)
192  blk += veclen[i];
193 
194  /* #Floats to follow; for the ENTIRE SET of CODEBOOKS */
195  if (bio_fread(&n, sizeof(int32), 1, fp, byteswap, &chksum) != 1) {
196  E_ERROR("Failed to read number of parameters from %s\n", file_name);
197  fclose(fp);
198  return NULL;
199  }
200 
201  if (n != n_mgau * n_density * blk) {
202  E_ERROR
203  ("Number of parameters in %s(%d) doesn't match dimensions: %d x %d x %d\n",
204  file_name, n, n_mgau, n_density, blk);
205  fclose(fp);
206  return NULL;
207  }
208 
209  /* Allocate memory for mixture gaussian densities if not already allocated */
210  out = (float32 ****) ckd_calloc_3d(n_mgau, n_feat, n_density,
211  sizeof(float32 *));
212  buf = (float32 *) ckd_calloc(n, sizeof(float32));
213  for (i = 0, l = 0; i < n_mgau; i++) {
214  for (j = 0; j < n_feat; j++) {
215  for (k = 0; k < n_density; k++) {
216  out[i][j][k] = &buf[l];
217  l += veclen[j];
218  }
219  }
220  }
221 
222  /* Read mixture gaussian densities data */
223  if (bio_fread(buf, sizeof(float32), n, fp, byteswap, &chksum) != n) {
224  E_ERROR("Failed to read density data from file '%s'\n", file_name);
225  fclose(fp);
226  ckd_free_3d(out);
227  return NULL;
228  }
229 
230  if (chksum_present)
231  bio_verify_chksum(fp, byteswap, chksum);
232 
233  if (fread(&tmp, 1, 1, fp) == 1) {
234  E_ERROR("More data than expected in %s\n", file_name);
235  fclose(fp);
236  ckd_free_3d(out);
237  return NULL;
238  }
239 
240  fclose(fp);
241 
242  E_INFO("%d codebook, %d feature, size: \n", n_mgau, n_feat);
243  for (i = 0; i < n_feat; i++)
244  E_INFO(" %dx%d\n", n_density, veclen[i]);
245 
246  return out;
247 }
248 
249 static void
250 gauden_param_free(mfcc_t **** p)
251 {
252  ckd_free(p[0][0][0]);
253  ckd_free_3d(p);
254 }
255 
256 /*
257  * Some of the gaussian density computation can be carried out in advance:
258  * log(determinant) calculation,
259  * 1/(2*var) in the exponent,
260  * NOTE; The density computation is performed in log domain.
261  */
262 static int32
263 gauden_dist_precompute(gauden_t * g, logmath_t *lmath, float32 varfloor)
264 {
265  int32 i, m, f, d, flen;
266  mfcc_t *meanp;
267  mfcc_t *varp;
268  mfcc_t *detp;
269  int32 floored;
270 
271  floored = 0;
272  /* Allocate space for determinants */
273  g->det = ckd_calloc_3d(g->n_mgau, g->n_feat, g->n_density, sizeof(***g->det));
274 
275  for (m = 0; m < g->n_mgau; m++) {
276  for (f = 0; f < g->n_feat; f++) {
277  flen = g->featlen[f];
278 
279  /* Determinants for all variance vectors in g->[m][f] */
280  for (d = 0, detp = g->det[m][f]; d < g->n_density; d++, detp++) {
281  *detp = 0;
282  for (i = 0, varp = g->var[m][f][d], meanp = g->mean[m][f][d];
283  i < flen; i++, varp++, meanp++) {
284  float32 *fvarp = (float32 *)varp;
285 
286 #ifdef FIXED_POINT
287  float32 *fmp = (float32 *)meanp;
288  *meanp = FLOAT2MFCC(*fmp);
289 #endif
290  if (*fvarp < varfloor) {
291  *fvarp = varfloor;
292  ++floored;
293  }
294  *detp += (mfcc_t)logmath_log(lmath,
295  1.0 / sqrt(*fvarp * 2.0 * M_PI));
296  /* Precompute this part of the exponential */
297  *varp = (mfcc_t)logmath_ln_to_log(lmath,
298  (1.0 / (*fvarp * 2.0)));
299  }
300  }
301  }
302  }
303 
304  E_INFO("%d variance values floored\n", floored);
305 
306  return 0;
307 }
308 
309 
310 gauden_t *
311 gauden_init(char const *meanfile, char const *varfile, float32 varfloor, logmath_t *lmath)
312 {
313  int32 i, m, f, d, *flen;
314  gauden_t *g;
315 
316  assert(meanfile != NULL);
317  assert(varfile != NULL);
318  assert(varfloor > 0.0);
319 
320  g = (gauden_t *) ckd_calloc(1, sizeof(gauden_t));
321  g->lmath = lmath;
322 
323  g->mean = (mfcc_t ****)gauden_param_read(meanfile, &g->n_mgau, &g->n_feat, &g->n_density,
324  &g->featlen);
325  if (g->mean == NULL) {
326  return NULL;
327  }
328  g->var = (mfcc_t ****)gauden_param_read(varfile, &m, &f, &d, &flen);
329  if (g->var == NULL) {
330  return NULL;
331  }
332 
333  /* Verify mean and variance parameter dimensions */
334  if ((m != g->n_mgau) || (f != g->n_feat) || (d != g->n_density)) {
335  E_ERROR
336  ("Mixture-gaussians dimensions for means and variances differ\n");
337  ckd_free(flen);
338  gauden_free(g);
339  return NULL;
340  }
341  for (i = 0; i < g->n_feat; i++) {
342  if (g->featlen[i] != flen[i]) {
343  E_FATAL("Feature lengths for means and variances differ\n");
344  ckd_free(flen);
345  gauden_free(g);
346  return NULL;
347  }
348  }
349 
350  ckd_free(flen);
351 
352  gauden_dist_precompute(g, lmath, varfloor);
353 
354  return g;
355 }
356 
357 void
359 {
360  if (g == NULL)
361  return;
362  if (g->mean)
363  gauden_param_free(g->mean);
364  if (g->var)
365  gauden_param_free(g->var);
366  if (g->det)
367  ckd_free_3d(g->det);
368  if (g->featlen)
369  ckd_free(g->featlen);
370  ckd_free(g);
371 }
372 
373 /* See compute_dist below */
374 static int32
375 compute_dist_all(gauden_dist_t * out_dist, mfcc_t* obs, int32 featlen,
376  mfcc_t ** mean, mfcc_t ** var, mfcc_t * det,
377  int32 n_density)
378 {
379  int32 i, d;
380 
381  for (d = 0; d < n_density; ++d) {
382  mfcc_t *m;
383  mfcc_t *v;
384  mfcc_t dval;
385 
386  m = mean[d];
387  v = var[d];
388  dval = det[d];
389 
390  for (i = 0; i < featlen; i++) {
391  mfcc_t diff;
392 #ifdef FIXED_POINT
393  /* Have to check for underflows here. */
394  mfcc_t pdval = dval;
395  diff = obs[i] - m[i];
396  dval -= MFCCMUL(MFCCMUL(diff, diff), v[i]);
397  if (dval > pdval) {
398  dval = WORST_SCORE;
399  break;
400  }
401 #else
402  diff = obs[i] - m[i];
403  /* The compiler really likes this to be a single
404  * expression, for whatever reason. */
405  dval -= diff * diff * v[i];
406 #endif
407  }
408 
409  out_dist[d].dist = dval;
410  out_dist[d].id = d;
411  }
412 
413  return 0;
414 }
415 
416 
417 /*
418  * Compute the top-N closest gaussians from the chosen set (mgau,feat)
419  * for the given input observation vector.
420  */
421 static int32
422 compute_dist(gauden_dist_t * out_dist, int32 n_top,
423  mfcc_t * obs, int32 featlen,
424  mfcc_t ** mean, mfcc_t ** var, mfcc_t * det,
425  int32 n_density)
426 {
427  int32 i, j, d;
428  gauden_dist_t *worst;
429 
430  /* Special case optimization when n_density <= n_top */
431  if (n_top >= n_density)
432  return (compute_dist_all
433  (out_dist, obs, featlen, mean, var, det, n_density));
434 
435  for (i = 0; i < n_top; i++)
436  out_dist[i].dist = WORST_DIST;
437  worst = &(out_dist[n_top - 1]);
438 
439  for (d = 0; d < n_density; d++) {
440  mfcc_t *m;
441  mfcc_t *v;
442  mfcc_t dval;
443 
444  m = mean[d];
445  v = var[d];
446  dval = det[d];
447 
448  for (i = 0; (i < featlen) && (dval >= worst->dist); i++) {
449  mfcc_t diff;
450 #ifdef FIXED_POINT
451  /* Have to check for underflows here. */
452  mfcc_t pdval = dval;
453  diff = obs[i] - m[i];
454  dval -= MFCCMUL(MFCCMUL(diff, diff), v[i]);
455  if (dval > pdval) {
456  dval = WORST_SCORE;
457  break;
458  }
459 #else
460  diff = obs[i] - m[i];
461  /* The compiler really likes this to be a single
462  * expression, for whatever reason. */
463  dval -= diff * diff * v[i];
464 #endif
465  }
466 
467  if ((i < featlen) || (dval < worst->dist)) /* Codeword d worse than worst */
468  continue;
469 
470  /* Codeword d at least as good as worst so far; insert in the ordered list */
471  for (i = 0; (i < n_top) && (dval < out_dist[i].dist); i++);
472  assert(i < n_top);
473  for (j = n_top - 1; j > i; --j)
474  out_dist[j] = out_dist[j - 1];
475  out_dist[i].dist = dval;
476  out_dist[i].id = d;
477  }
478 
479  return 0;
480 }
481 
482 
483 /*
484  * Compute distances of the input observation from the top N codewords in the given
485  * codebook (g->{mean,var}[mgau]). The input observation, obs, includes vectors for
486  * all features in the codebook.
487  */
488 int32
490  int mgau, int32 n_top, mfcc_t** obs, gauden_dist_t ** out_dist)
491 {
492  int32 f;
493 
494  assert((n_top > 0) && (n_top <= g->n_density));
495 
496  for (f = 0; f < g->n_feat; f++) {
497  compute_dist(out_dist[f], n_top,
498  obs[f], g->featlen[f],
499  g->mean[mgau][f], g->var[mgau][f], g->det[mgau][f],
500  g->n_density);
501  E_DEBUG(3, ("Top CW(%d,%d) = %d %d\n", mgau, f, out_dist[f][0].id,
502  (int)out_dist[f][0].dist >> SENSCR_SHIFT));
503  }
504 
505  return 0;
506 }
507 
508 int32
509 gauden_mllr_transform(gauden_t *g, ps_mllr_t *mllr, cmd_ln_t *config)
510 {
511  int32 i, m, f, d, *flen;
512 
513  /* Free data if already here */
514  if (g->mean)
515  gauden_param_free(g->mean);
516  if (g->var)
517  gauden_param_free(g->var);
518  if (g->det)
519  ckd_free_3d(g->det);
520  if (g->featlen)
521  ckd_free(g->featlen);
522  g->det = NULL;
523  g->featlen = NULL;
524 
525  /* Reload means and variances (un-precomputed). */
526  g->mean = (mfcc_t ****)gauden_param_read(cmd_ln_str_r(config, "_mean"), &g->n_mgau, &g->n_feat, &g->n_density,
527  &g->featlen);
528  g->var = (mfcc_t ****)gauden_param_read(cmd_ln_str_r(config, "_var"), &m, &f, &d, &flen);
529 
530  /* Verify mean and variance parameter dimensions */
531  if ((m != g->n_mgau) || (f != g->n_feat) || (d != g->n_density))
532  E_FATAL
533  ("Mixture-gaussians dimensions for means and variances differ\n");
534  for (i = 0; i < g->n_feat; i++)
535  if (g->featlen[i] != flen[i])
536  E_FATAL("Feature lengths for means and variances differ\n");
537  ckd_free(flen);
538 
539  /* Transform codebook for each stream s */
540  for (i = 0; i < g->n_mgau; ++i) {
541  for (f = 0; f < g->n_feat; ++f) {
542  float64 *temp;
543  temp = (float64 *) ckd_calloc(g->featlen[f], sizeof(float64));
544  /* Transform each density d in selected codebook */
545  for (d = 0; d < g->n_density; d++) {
546  int l;
547  for (l = 0; l < g->featlen[f]; l++) {
548  temp[l] = 0.0;
549  for (m = 0; m < g->featlen[f]; m++) {
550  /* FIXME: For now, only one class, hence the zeros below. */
551  temp[l] += mllr->A[f][0][l][m] * g->mean[i][f][d][m];
552  }
553  temp[l] += mllr->b[f][0][l];
554  }
555 
556  for (l = 0; l < g->featlen[f]; l++) {
557  g->mean[i][f][d][l] = (float32) temp[l];
558  g->var[i][f][d][l] *= mllr->h[f][0][l];
559  }
560  }
561  ckd_free(temp);
562  }
563  }
564 
565  /* Re-precompute (if we aren't adapting variances this isn't
566  * actually necessary...) */
567  gauden_dist_precompute(g, g->lmath, cmd_ln_float32_r(config, "-varfloor"));
568  return 0;
569 }
void gauden_dump(const gauden_t *g)
Dump the definitionn of Gaussian distribution.
Definition: ms_gauden.c:58
logmath_t * lmath
log math computation
Definition: ms_gauden.h:87
int32 n_density
Number gaussian densities in each codebook-feature stream.
Definition: ms_gauden.h:90
void gauden_free(gauden_t *g)
Release memory allocated by gauden_init.
Definition: ms_gauden.c:358
int32 id
Index of codeword (gaussian density)
Definition: ms_gauden.h:72
mfcc_t *** det
log(determinant) for each variance vector; actually, log(sqrt(2*pi*det))
Definition: ms_gauden.h:85
Structure to store distance (density) values for a given input observation wrt density values in some...
Definition: ms_gauden.h:71
float32 *** h
Diagonal transformation of variances.
Definition: acmod.h:89
int32 gauden_mllr_transform(gauden_t *s, ps_mllr_t *mllr, cmd_ln_t *config)
Transform Gaussians according to an MLLR matrix (or, eventually, more).
Definition: ms_gauden.c:509
float32 *** b
Bias part of mean transformations.
Definition: acmod.h:88
gauden_t * gauden_init(char const *meanfile, char const *varfile, float32 varfloor, logmath_t *lmath)
Read mixture gaussian codebooks from the given files.
Definition: ms_gauden.c:311
#define WORST_SCORE
Large &quot;bad&quot; score.
Definition: hmm.h:84
int32 * featlen
feature length for each feature
Definition: ms_gauden.h:91
int32 n_mgau
Number codebooks.
Definition: ms_gauden.h:88
(Sphinx 3.0 specific) Gaussian density module.
Feature space linear transform structure.
Definition: acmod.h:82
float32 **** A
Rotation part of mean transformations.
Definition: acmod.h:87
#define SENSCR_SHIFT
Shift count for senone scores.
Definition: hmm.h:73
mfcc_t **** mean
mean[codebook][feature][codeword] vector
Definition: ms_gauden.h:83
int32 n_feat
Number feature streams in each codebook.
Definition: ms_gauden.h:89
mfcc_t dist
Density value for input observation wrt above codeword; NOTE: result in logs3 domain...
Definition: ms_gauden.h:73
void gauden_dump_ind(const gauden_t *g, int senidx)
Dump the definition of Gaussian distribution of a particular index to the standard output stream...
Definition: ms_gauden.c:68
Multivariate gaussian mixture density parameters.
Definition: ms_gauden.h:82
int32 gauden_dist(gauden_t *g, int mgau, int n_top, mfcc_t **obs, gauden_dist_t **out_dist)
Compute gaussian density values for the given input observation vector wrt the specified mixture gaus...
mfcc_t **** var
like mean; diagonal covariance vector only
Definition: ms_gauden.h:84