PocketSphinx  5prealpha
ms_senone.c
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
38 /* System headers. */
39 #include <string.h>
40 #include <stdio.h>
41 #include <assert.h>
42 
43 /* SphinxBase headers. */
44 #include <sphinxbase/bio.h>
45 
46 /* Local headers. */
47 #include "ms_senone.h"
48 
49 #define MIXW_PARAM_VERSION "1.0"
50 #define SPDEF_PARAM_VERSION "1.2"
51 
52 static int32
53 senone_mgau_map_read(senone_t * s, char const *file_name)
54 {
55  FILE *fp;
56  int32 byteswap, chksum_present, n_gauden_present;
57  uint32 chksum;
58  int32 i;
59  char eofchk;
60  char **argname, **argval;
61  void *ptr;
62  float32 v;
63 
64  E_INFO("Reading senone gauden-codebook map file: %s\n", file_name);
65 
66  if ((fp = fopen(file_name, "rb")) == NULL)
67  E_FATAL_SYSTEM("Failed to open map file '%s' for reading", file_name);
68 
69  /* Read header, including argument-value info and 32-bit byteorder magic */
70  if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0)
71  E_FATAL("Failed to read header from file '%s'\n", file_name);
72 
73  /* Parse argument-value list */
74  chksum_present = 0;
75  n_gauden_present = 0;
76  for (i = 0; argname[i]; i++) {
77  if (strcmp(argname[i], "version") == 0) {
78  if (strcmp(argval[i], SPDEF_PARAM_VERSION) != 0) {
79  E_WARN("Version mismatch(%s): %s, expecting %s\n",
80  file_name, argval[i], SPDEF_PARAM_VERSION);
81  }
82 
83  /* HACK!! Convert version# to float32 and take appropriate action */
84  if (sscanf(argval[i], "%f", &v) != 1)
85  E_FATAL("%s: Bad version no. string: %s\n", file_name,
86  argval[i]);
87 
88  n_gauden_present = (v > 1.1) ? 1 : 0;
89  }
90  else if (strcmp(argname[i], "chksum0") == 0) {
91  chksum_present = 1; /* Ignore the associated value */
92  }
93  }
94  bio_hdrarg_free(argname, argval);
95  argname = argval = NULL;
96 
97  chksum = 0;
98 
99  /* Read #gauden (if version matches) */
100  if (n_gauden_present) {
101  E_INFO("Reading number of codebooks from %s\n", file_name);
102  if (bio_fread
103  (&(s->n_gauden), sizeof(int32), 1, fp, byteswap, &chksum) != 1)
104  E_FATAL("fread(%s) (#gauden) failed\n", file_name);
105  }
106 
107  /* Read 1d array data */
108  if (bio_fread_1d(&ptr, sizeof(uint32), &(s->n_sen), fp,
109  byteswap, &chksum) < 0) {
110  E_FATAL("bio_fread_1d(%s) failed\n", file_name);
111  }
112  s->mgau = ptr;
113  E_INFO("Mapping %d senones to %d codebooks\n", s->n_sen, s->n_gauden);
114 
115  /* Infer n_gauden if not present in this version */
116  if (!n_gauden_present) {
117  s->n_gauden = 1;
118  for (i = 0; i < s->n_sen; i++)
119  if (s->mgau[i] >= s->n_gauden)
120  s->n_gauden = s->mgau[i] + 1;
121  }
122 
123  if (chksum_present)
124  bio_verify_chksum(fp, byteswap, chksum);
125 
126  if (fread(&eofchk, 1, 1, fp) == 1)
127  E_FATAL("More data than expected in %s: %d\n", file_name, eofchk);
128 
129  fclose(fp);
130 
131  E_INFO("Read %d->%d senone-codebook mappings\n", s->n_sen,
132  s->n_gauden);
133 
134  return 1;
135 }
136 
137 
138 static int32
139 senone_mixw_read(senone_t * s, char const *file_name, logmath_t *lmath)
140 {
141  char eofchk;
142  FILE *fp;
143  int32 byteswap, chksum_present;
144  uint32 chksum;
145  float32 *pdf;
146  int32 i, f, c, p, n_err;
147  char **argname, **argval;
148 
149  E_INFO("Reading senone mixture weights: %s\n", file_name);
150 
151  if ((fp = fopen(file_name, "rb")) == NULL)
152  E_FATAL_SYSTEM("Failed to open mixture weights file '%s' for reading", file_name);
153 
154  /* Read header, including argument-value info and 32-bit byteorder magic */
155  if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0)
156  E_FATAL("Failed to read header from file '%s'\n", file_name);
157 
158  /* Parse argument-value list */
159  chksum_present = 0;
160  for (i = 0; argname[i]; i++) {
161  if (strcmp(argname[i], "version") == 0) {
162  if (strcmp(argval[i], MIXW_PARAM_VERSION) != 0)
163  E_WARN("Version mismatch(%s): %s, expecting %s\n",
164  file_name, argval[i], MIXW_PARAM_VERSION);
165  }
166  else if (strcmp(argname[i], "chksum0") == 0) {
167  chksum_present = 1; /* Ignore the associated value */
168  }
169  }
170  bio_hdrarg_free(argname, argval);
171  argname = argval = NULL;
172 
173  chksum = 0;
174 
175  /* Read #senones, #features, #codewords, arraysize */
176  if ((bio_fread(&(s->n_sen), sizeof(int32), 1, fp, byteswap, &chksum) !=
177  1)
178  ||
179  (bio_fread(&(s->n_feat), sizeof(int32), 1, fp, byteswap, &chksum)
180  != 1)
181  || (bio_fread(&(s->n_cw), sizeof(int32), 1, fp, byteswap, &chksum)
182  != 1)
183  || (bio_fread(&i, sizeof(int32), 1, fp, byteswap, &chksum) != 1)) {
184  E_FATAL("bio_fread(%s) (arraysize) failed\n", file_name);
185  }
186  if (i != s->n_sen * s->n_feat * s->n_cw) {
187  E_FATAL
188  ("%s: #float32s(%d) doesn't match dimensions: %d x %d x %d\n",
189  file_name, i, s->n_sen, s->n_feat, s->n_cw);
190  }
191 
192  /*
193  * Compute #LSB bits to be dropped to represent mixwfloor with 8 bits.
194  * All PDF values will be truncated (in the LSB positions) by these many bits.
195  */
196  if ((s->mixwfloor <= 0.0) || (s->mixwfloor >= 1.0))
197  E_FATAL("mixwfloor (%e) not in range (0, 1)\n", s->mixwfloor);
198 
199  /* Use a fixed shift for compatibility with everything else. */
200  E_INFO("Truncating senone logs3(pdf) values by %d bits\n", SENSCR_SHIFT);
201 
202  /*
203  * Allocate memory for senone PDF data. Organize normally or transposed depending on
204  * s->n_gauden.
205  */
206  if (s->n_gauden > 1) {
207  E_INFO("Not transposing mixture weights in memory\n");
208  s->pdf =
209  (senprob_t ***) ckd_calloc_3d(s->n_sen, s->n_feat, s->n_cw,
210  sizeof(senprob_t));
211  }
212  else {
213  E_INFO("Transposing mixture weights in memory\n");
214  s->pdf =
215  (senprob_t ***) ckd_calloc_3d(s->n_feat, s->n_cw, s->n_sen,
216  sizeof(senprob_t));
217  }
218 
219  /* Temporary structure to read in floats */
220  pdf = (float32 *) ckd_calloc(s->n_cw, sizeof(float32));
221 
222  /* Read senone probs data, normalize, floor, convert to logs3, truncate to 8 bits */
223  n_err = 0;
224  for (i = 0; i < s->n_sen; i++) {
225  for (f = 0; f < s->n_feat; f++) {
226  if (bio_fread
227  ((void *) pdf, sizeof(float32), s->n_cw, fp, byteswap,
228  &chksum)
229  != s->n_cw) {
230  E_FATAL("bio_fread(%s) (arraydata) failed\n", file_name);
231  }
232 
233  /* Normalize and floor */
234  if (vector_sum_norm(pdf, s->n_cw) <= 0.0)
235  n_err++;
236  vector_floor(pdf, s->n_cw, s->mixwfloor);
237  vector_sum_norm(pdf, s->n_cw);
238 
239  /* Convert to logs3, truncate to 8 bits, and store in s->pdf */
240  for (c = 0; c < s->n_cw; c++) {
241  p = -(logmath_log(lmath, pdf[c]));
242  p += (1 << (SENSCR_SHIFT - 1)) - 1; /* Rounding before truncation */
243 
244  if (s->n_gauden > 1)
245  s->pdf[i][f][c] =
246  (p < (255 << SENSCR_SHIFT)) ? (p >> SENSCR_SHIFT) : 255;
247  else
248  s->pdf[f][c][i] =
249  (p < (255 << SENSCR_SHIFT)) ? (p >> SENSCR_SHIFT) : 255;
250  }
251  }
252  }
253  if (n_err > 0)
254  E_WARN("Weight normalization failed for %d mixture weights components\n", n_err);
255 
256  ckd_free(pdf);
257 
258  if (chksum_present)
259  bio_verify_chksum(fp, byteswap, chksum);
260 
261  if (fread(&eofchk, 1, 1, fp) == 1)
262  E_FATAL("More data than expected in %s\n", file_name);
263 
264  fclose(fp);
265 
266  E_INFO
267  ("Read mixture weights for %d senones: %d features x %d codewords\n",
268  s->n_sen, s->n_feat, s->n_cw);
269 
270  return 1;
271 }
272 
273 
274 senone_t *
275 senone_init(gauden_t *g, char const *mixwfile, char const *sen2mgau_map_file,
276  float32 mixwfloor, logmath_t *lmath, bin_mdef_t *mdef)
277 {
278  senone_t *s;
279  int32 n = 0, i;
280 
281  s = (senone_t *) ckd_calloc(1, sizeof(senone_t));
282  s->lmath = logmath_init(logmath_get_base(lmath), SENSCR_SHIFT, TRUE);
283  s->mixwfloor = mixwfloor;
284 
285  s->n_gauden = g->n_mgau;
286  if (sen2mgau_map_file) {
287  if (!(strcmp(sen2mgau_map_file, ".semi.") == 0
288  || strcmp(sen2mgau_map_file, ".ptm.") == 0
289  || strcmp(sen2mgau_map_file, ".cont.") == 0)) {
290  senone_mgau_map_read(s, sen2mgau_map_file);
291  n = s->n_sen;
292  }
293  }
294  else {
295  if (s->n_gauden == 1)
296  sen2mgau_map_file = ".semi.";
297  else if (s->n_gauden == bin_mdef_n_ciphone(mdef))
298  sen2mgau_map_file = ".ptm.";
299  else
300  sen2mgau_map_file = ".cont.";
301  }
302 
303  senone_mixw_read(s, mixwfile, lmath);
304 
305  if (strcmp(sen2mgau_map_file, ".semi.") == 0) {
306  /* All-to-1 senones-codebook mapping */
307  E_INFO("Mapping all senones to one codebook\n");
308  s->mgau = (uint32 *) ckd_calloc(s->n_sen, sizeof(*s->mgau));
309  }
310  else if (strcmp(sen2mgau_map_file, ".ptm.") == 0) {
311  /* All-to-ciphone-id senones-codebook mapping */
312  E_INFO("Mapping senones to context-independent phone codebooks\n");
313  s->mgau = (uint32 *) ckd_calloc(s->n_sen, sizeof(*s->mgau));
314  for (i = 0; i < s->n_sen; i++)
315  s->mgau[i] = bin_mdef_sen2cimap(mdef, i);
316  }
317  else if (strcmp(sen2mgau_map_file, ".cont.") == 0
318  || strcmp(sen2mgau_map_file, ".s3cont.") == 0) {
319  /* 1-to-1 senone-codebook mapping */
320  E_INFO("Mapping senones to individual codebooks\n");
321  if (s->n_sen <= 1)
322  E_FATAL("#senone=%d; must be >1\n", s->n_sen);
323 
324  s->mgau = (uint32 *) ckd_calloc(s->n_sen, sizeof(*s->mgau));
325  for (i = 0; i < s->n_sen; i++)
326  s->mgau[i] = i;
327  /* Not sure why this is here, it probably does nothing. */
328  s->n_gauden = s->n_sen;
329  }
330  else {
331  if (s->n_sen != n)
332  E_FATAL("#senones inconsistent: %d in %s; %d in %s\n",
333  n, sen2mgau_map_file, s->n_sen, mixwfile);
334  }
335 
336  s->featscr = NULL;
337  return s;
338 }
339 
340 void
342 {
343  if (s == NULL)
344  return;
345  if (s->pdf)
346  ckd_free_3d((void *) s->pdf);
347  if (s->mgau)
348  ckd_free(s->mgau);
349  if (s->featscr)
350  ckd_free(s->featscr);
351  logmath_free(s->lmath);
352  ckd_free(s);
353 }
354 
355 
356 /*
357  * Compute senone score for one senone.
358  * NOTE: Remember that senone PDF tables contain SCALED, NEGATED logs3 values.
359  * NOTE: Remember also that PDF data may be transposed or not depending on s->n_gauden.
360  */
361 int32
362 senone_eval(senone_t * s, int id, gauden_dist_t ** dist, int32 n_top)
363 {
364  int32 scr; /* total senone score */
365  int32 fden; /* Gaussian density */
366  int32 fscr; /* senone score for one feature */
367  int32 fwscr; /* senone score for one feature, one codeword */
368  int32 f, t;
369  gauden_dist_t *fdist;
370 
371  assert((id >= 0) && (id < s->n_sen));
372  assert((n_top > 0) && (n_top <= s->n_cw));
373 
374  scr = 0;
375 
376  for (f = 0; f < s->n_feat; f++) {
377 #ifdef SPHINX_DEBUG
378  int top;
379 #endif
380  fdist = dist[f];
381 
382  /* Top codeword for feature f */
383 #ifdef SPHINX_DEBUG
384  top =
385 #endif
386  fden = ((int32)fdist[0].dist + ((1<<SENSCR_SHIFT) - 1)) >> SENSCR_SHIFT;
387  fscr = (s->n_gauden > 1)
388  ? (fden + -s->pdf[id][f][fdist[0].id]) /* untransposed */
389  : (fden + -s->pdf[f][fdist[0].id][id]); /* transposed */
390  E_DEBUG(1, ("fden[%d][%d] l+= %d + %d = %d\n",
391  id, f, -(fscr - fden), -(fden-top), -(fscr-top)));
392  /* Remaining of n_top codewords for feature f */
393  for (t = 1; t < n_top; t++) {
394  fden = ((int32)fdist[t].dist + ((1<<SENSCR_SHIFT) - 1)) >> SENSCR_SHIFT;
395  fwscr = (s->n_gauden > 1) ?
396  (fden + -s->pdf[id][f][fdist[t].id]) :
397  (fden + -s->pdf[f][fdist[t].id][id]);
398  fscr = logmath_add(s->lmath, fscr, fwscr);
399  E_DEBUG(1, ("fden[%d][%d] l+= %d + %d = %d\n",
400  id, f, -(fwscr - fden), -(fden-top), -(fscr-top)));
401  }
402  /* Senone scores are also scaled, negated logs3 values. Hence
403  * we have to negate the stuff we calculated above. */
404  scr -= fscr;
405  }
406  /* Downscale scores. */
407  scr /= s->aw;
408 
409  /* Avoid overflowing int16 */
410  if (scr > 32767)
411  scr = 32767;
412  if (scr < -32768)
413  scr = -32768;
414  return scr;
415 }
senprob_t *** pdf
gaussian density mixture weights, organized two possible ways depending on n_gauden: if (n_gauden &gt; 1...
Definition: ms_senone.h:77
int32 id
Index of codeword (gaussian density)
Definition: ms_gauden.h:72
(Sphinx 3.0 specific) multiple streams senones.
int32 aw
Inverse acoustic weight.
Definition: ms_senone.h:92
Structure to store distance (density) values for a given input observation wrt density values in some...
Definition: ms_gauden.h:71
float32 mixwfloor
floor applied to each PDF entry
Definition: ms_senone.h:89
uint32 n_sen
Number senones in this set.
Definition: ms_senone.h:85
uint32 * mgau
senone-id -&gt; mgau-id mapping for senones in this set
Definition: ms_senone.h:90
8-bit senone PDF structure.
Definition: ms_senone.h:76
uint32 n_feat
Number feature streams.
Definition: ms_senone.h:86
int32 * featscr
The feature score for every senone, will be initialized inside senone_eval_all.
Definition: ms_senone.h:91
int32 senone_eval(senone_t *s, int id, gauden_dist_t **dist, int n_top)
Evaluate the score for the given senone wrt to the given top N gaussian codewords.
int32 n_mgau
Number codebooks.
Definition: ms_gauden.h:88
#define SENSCR_SHIFT
Shift count for senone scores.
Definition: hmm.h:73
void senone_free(senone_t *s)
Release memory allocated by senone_init.
Definition: ms_senone.c:341
logmath_t * lmath
log math computation
Definition: ms_senone.h:84
uint32 n_cw
Number codewords per codebook,stream.
Definition: ms_senone.h:87
uint32 n_gauden
Number gaussian density codebooks referred to by senones.
Definition: ms_senone.h:88
Multivariate gaussian mixture density parameters.
Definition: ms_gauden.h:82
senone_t * senone_init(gauden_t *g, char const *mixwfile, char const *mgau_mapfile, float32 mixwfloor, logmath_t *lmath, bin_mdef_t *mdef)
Load a set of senones (mixing weights and mixture gaussian codebook mappings) from the given files...
Definition: ms_senone.c:275
uint8 senprob_t
Senone logs3-probs, truncated to 8 bits.
Definition: ms_senone.h:67