SphinxBase  5prealpha
feat.c
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 /*
38  * feat.c -- Feature vector description and cepstra->feature computation.
39  *
40  * **********************************************
41  * CMU ARPA Speech Project
42  *
43  * Copyright (c) 1996 Carnegie Mellon University.
44  * ALL RIGHTS RESERVED.
45  * **********************************************
46  *
47  * HISTORY
48  * $Log$
49  * Revision 1.22 2006/02/23 03:59:40 arthchan2003
50  * Merged from branch SPHINX3_5_2_RCI_IRII_BRANCH: a, Free buffers correctly. b, Fixed dox-doc.
51  *
52  * Revision 1.21.4.3 2005/10/17 04:45:57 arthchan2003
53  * Free stuffs in cmn and feat corectly.
54  *
55  * Revision 1.21.4.2 2005/09/26 02:19:57 arthchan2003
56  * Add message to show the directory which the feature is searched for.
57  *
58  * Revision 1.21.4.1 2005/07/03 22:55:50 arthchan2003
59  * More correct deallocation in feat.c. The cmn deallocation is still not correct at this point.
60  *
61  * Revision 1.21 2005/06/22 03:29:35 arthchan2003
62  * Makefile.am s for all subdirectory of libs3decoder/
63  *
64  * Revision 1.4 2005/04/21 23:50:26 archan
65  * Some more refactoring on the how reporting of structures inside kbcore_t is done, it is now 50% nice. Also added class-based LM test case into test-decode.sh.in. At this moment, everything in search mode 5 is already done. It is time to test the idea whether the search can really be used.
66  *
67  * Revision 1.3 2005/03/30 01:22:46 archan
68  * Fixed mistakes in last updates. Add
69  *
70  *
71  * 20.Apr.2001 RAH (rhoughton@mediasite.com, ricky.houghton@cs.cmu.edu)
72  * Adding feat_free() to free allocated memory
73  *
74  * 02-Jan-2001 Rita Singh (rsingh@cs.cmu.edu) at Carnegie Mellon University
75  * Modified feat_s2mfc2feat_block() to handle empty buffers at
76  * the end of an utterance
77  *
78  * 30-Dec-2000 Rita Singh (rsingh@cs.cmu.edu) at Carnegie Mellon University
79  * Added feat_s2mfc2feat_block() to allow feature computation
80  * from sequences of blocks of cepstral vectors
81  *
82  * 12-Jun-98 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
83  * Major changes to accommodate arbitrary feature input types. Added
84  * feat_read(), moved various cep2feat functions from other files into
85  * this one. Also, made this module object-oriented with the feat_t type.
86  * Changed definition of s2mfc_read to let the caller manage MFC buffers.
87  *
88  * 03-Oct-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
89  * Added unistd.h include.
90  *
91  * 02-Oct-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
92  * Added check for sf argument to s2mfc_read being within file size.
93  *
94  * 18-Sep-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
95  * Added sf, ef parameters to s2mfc_read().
96  *
97  * 10-Jan-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
98  * Added feat_cepsize().
99  * Added different feature-handling (s2_4x, s3_1x39 at this point).
100  * Moved feature-dependent functions to feature-dependent files.
101  *
102  * 09-Jan-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
103  * Moved constant declarations from feat.h into here.
104  *
105  * 04-Nov-95 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
106  * Created.
107  */
108 
109 
110 /*
111  * This module encapsulates different feature streams used by the Sphinx group. New
112  * stream types can be added by augmenting feat_init() and providing an accompanying
113  * compute_feat function. It also provides a "generic" feature vector definition for
114  * handling "arbitrary" speech input feature types (see the last section in feat_init()).
115  * In this case the speech input data should already be feature vectors; no computation,
116  * such as MFC->feature conversion, is available or needed.
117  */
118 
119 #include <assert.h>
120 #include <string.h>
121 #ifdef HAVE_CONFIG_H
122 #include <config.h>
123 #endif
124 
125 #ifdef _MSC_VER
126 #pragma warning (disable: 4244 4996)
127 #endif
128 
129 #include "sphinxbase/fe.h"
130 #include "sphinxbase/feat.h"
131 #include "sphinxbase/bio.h"
132 #include "sphinxbase/pio.h"
133 #include "sphinxbase/cmn.h"
134 #include "sphinxbase/agc.h"
135 #include "sphinxbase/err.h"
136 #include "sphinxbase/ckd_alloc.h"
137 #include "sphinxbase/prim_type.h"
138 #include "sphinxbase/glist.h"
139 
140 #define FEAT_VERSION "1.0"
141 #define FEAT_DCEP_WIN 2
142 
143 #ifdef DUMP_FEATURES
144 static void
145 cep_dump_dbg(feat_t *fcb, mfcc_t **mfc, int32 nfr, const char *text)
146 {
147  int32 i, j;
148 
149  E_INFO("%s\n", text);
150  for (i = 0; i < nfr; i++) {
151  for (j = 0; j < fcb->cepsize; j++) {
152  fprintf(stderr, "%f ", MFCC2FLOAT(mfc[i][j]));
153  }
154  fprintf(stderr, "\n");
155  }
156 }
157 static void
158 feat_print_dbg(feat_t *fcb, mfcc_t ***feat, int32 nfr, const char *text)
159 {
160  E_INFO("%s\n", text);
161  feat_print(fcb, feat, nfr, stderr);
162 }
163 #else /* !DUMP_FEATURES */
164 #define cep_dump_dbg(fcb,mfc,nfr,text)
165 #define feat_print_dbg(fcb,mfc,nfr,text)
166 #endif
167 
168 int32 **
169 parse_subvecs(char const *str)
170 {
171  char const *strp;
172  int32 n, n2, l;
173  glist_t dimlist; /* List of dimensions in one subvector */
174  glist_t veclist; /* List of dimlists (subvectors) */
175  int32 **subvec;
176  gnode_t *gn, *gn2;
177 
178  veclist = NULL;
179 
180  strp = str;
181  for (;;) {
182  dimlist = NULL;
183 
184  for (;;) {
185  if (sscanf(strp, "%d%n", &n, &l) != 1)
186  E_FATAL("'%s': Couldn't read int32 @pos %d\n", str,
187  strp - str);
188  strp += l;
189 
190  if (*strp == '-') {
191  strp++;
192 
193  if (sscanf(strp, "%d%n", &n2, &l) != 1)
194  E_FATAL("'%s': Couldn't read int32 @pos %d\n", str,
195  strp - str);
196  strp += l;
197  }
198  else
199  n2 = n;
200 
201  if ((n < 0) || (n > n2))
202  E_FATAL("'%s': Bad subrange spec ending @pos %d\n", str,
203  strp - str);
204 
205  for (; n <= n2; n++) {
206  gnode_t *gn;
207  for (gn = dimlist; gn; gn = gnode_next(gn))
208  if (gnode_int32(gn) == n)
209  break;
210  if (gn != NULL)
211  E_FATAL("'%s': Duplicate dimension ending @pos %d\n",
212  str, strp - str);
213 
214  dimlist = glist_add_int32(dimlist, n);
215  }
216 
217  if ((*strp == '\0') || (*strp == '/'))
218  break;
219 
220  if (*strp != ',')
221  E_FATAL("'%s': Bad delimiter @pos %d\n", str, strp - str);
222 
223  strp++;
224  }
225 
226  veclist = glist_add_ptr(veclist, (void *) dimlist);
227 
228  if (*strp == '\0')
229  break;
230 
231  assert(*strp == '/');
232  strp++;
233  }
234 
235  /* Convert the glists to arrays; remember the glists are in reverse order of the input! */
236  n = glist_count(veclist); /* #Subvectors */
237  subvec = (int32 **) ckd_calloc(n + 1, sizeof(int32 *)); /* +1 for sentinel */
238  subvec[n] = NULL; /* sentinel */
239 
240  for (--n, gn = veclist; (n >= 0) && gn; gn = gnode_next(gn), --n) {
241  gn2 = (glist_t) gnode_ptr(gn);
242 
243  n2 = glist_count(gn2); /* Length of this subvector */
244  if (n2 <= 0)
245  E_FATAL("'%s': 0-length subvector\n", str);
246 
247  subvec[n] = (int32 *) ckd_calloc(n2 + 1, sizeof(int32)); /* +1 for sentinel */
248  subvec[n][n2] = -1; /* sentinel */
249 
250  for (--n2; (n2 >= 0) && gn2; gn2 = gnode_next(gn2), --n2)
251  subvec[n][n2] = gnode_int32(gn2);
252  assert((n2 < 0) && (!gn2));
253  }
254  assert((n < 0) && (!gn));
255 
256  /* Free the glists */
257  for (gn = veclist; gn; gn = gnode_next(gn)) {
258  gn2 = (glist_t) gnode_ptr(gn);
259  glist_free(gn2);
260  }
261  glist_free(veclist);
262 
263  return subvec;
264 }
265 
266 void
267 subvecs_free(int32 **subvecs)
268 {
269  int32 **sv;
270 
271  for (sv = subvecs; sv && *sv; ++sv)
272  ckd_free(*sv);
273  ckd_free(subvecs);
274 }
275 
276 int
277 feat_set_subvecs(feat_t *fcb, int32 **subvecs)
278 {
279  int32 **sv;
280  uint32 n_sv, n_dim, i;
281 
282  if (subvecs == NULL) {
283  subvecs_free(fcb->subvecs);
284  ckd_free(fcb->sv_buf);
285  ckd_free(fcb->sv_len);
286  fcb->n_sv = 0;
287  fcb->subvecs = NULL;
288  fcb->sv_len = NULL;
289  fcb->sv_buf = NULL;
290  fcb->sv_dim = 0;
291  return 0;
292  }
293 
294  if (fcb->n_stream != 1) {
295  E_ERROR("Subvector specifications require single-stream features!");
296  return -1;
297  }
298 
299  n_sv = 0;
300  n_dim = 0;
301  for (sv = subvecs; sv && *sv; ++sv) {
302  int32 *d;
303 
304  for (d = *sv; d && *d != -1; ++d) {
305  ++n_dim;
306  }
307  ++n_sv;
308  }
309  if (n_dim > feat_dimension(fcb)) {
310  E_ERROR("Total dimensionality of subvector specification %d "
311  "> feature dimensionality %d\n", n_dim, feat_dimension(fcb));
312  return -1;
313  }
314 
315  fcb->n_sv = n_sv;
316  fcb->subvecs = subvecs;
317  fcb->sv_len = (uint32 *)ckd_calloc(n_sv, sizeof(*fcb->sv_len));
318  fcb->sv_buf = (mfcc_t *)ckd_calloc(n_dim, sizeof(*fcb->sv_buf));
319  fcb->sv_dim = n_dim;
320  for (i = 0; i < n_sv; ++i) {
321  int32 *d;
322  for (d = subvecs[i]; d && *d != -1; ++d) {
323  ++fcb->sv_len[i];
324  }
325  }
326 
327  return 0;
328 }
329 
333 static void
334 feat_subvec_project(feat_t *fcb, mfcc_t ***inout_feat, uint32 nfr)
335 {
336  uint32 i;
337 
338  if (fcb->subvecs == NULL)
339  return;
340  for (i = 0; i < nfr; ++i) {
341  mfcc_t *out;
342  int32 j;
343 
344  out = fcb->sv_buf;
345  for (j = 0; j < fcb->n_sv; ++j) {
346  int32 *d;
347  for (d = fcb->subvecs[j]; d && *d != -1; ++d) {
348  *out++ = inout_feat[i][0][*d];
349  }
350  }
351  memcpy(inout_feat[i][0], fcb->sv_buf, fcb->sv_dim * sizeof(*fcb->sv_buf));
352  }
353 }
354 
355 mfcc_t ***
356 feat_array_alloc(feat_t * fcb, int32 nfr)
357 {
358  int32 i, j, k;
359  mfcc_t *data, *d, ***feat;
360 
361  assert(fcb);
362  assert(nfr > 0);
363  assert(feat_dimension(fcb) > 0);
364 
365  /* Make sure to use the dimensionality of the features *before*
366  LDA and subvector projection. */
367  k = 0;
368  for (i = 0; i < fcb->n_stream; ++i)
369  k += fcb->stream_len[i];
370  assert(k >= feat_dimension(fcb));
371  assert(k >= fcb->sv_dim);
372 
373  feat =
374  (mfcc_t ***) ckd_calloc_2d(nfr, feat_dimension1(fcb), sizeof(mfcc_t *));
375  data = (mfcc_t *) ckd_calloc(nfr * k, sizeof(mfcc_t));
376 
377  for (i = 0; i < nfr; i++) {
378  d = data + i * k;
379  for (j = 0; j < feat_dimension1(fcb); j++) {
380  feat[i][j] = d;
381  d += feat_dimension2(fcb, j);
382  }
383  }
384 
385  return feat;
386 }
387 
388 mfcc_t ***
389 feat_array_realloc(feat_t *fcb, mfcc_t ***old_feat, int32 ofr, int32 nfr)
390 {
391  int32 i, k, cf;
392  mfcc_t*** new_feat;
393 
394  assert(fcb);
395  assert(nfr > 0);
396  assert(ofr > 0);
397  assert(feat_dimension(fcb) > 0);
398 
399  /* Make sure to use the dimensionality of the features *before*
400  LDA and subvector projection. */
401  k = 0;
402  for (i = 0; i < fcb->n_stream; ++i)
403  k += fcb->stream_len[i];
404  assert(k >= feat_dimension(fcb));
405  assert(k >= fcb->sv_dim);
406 
407  new_feat = feat_array_alloc(fcb, nfr);
408 
409  cf = (nfr < ofr) ? nfr : ofr;
410  memcpy(new_feat[0][0], old_feat[0][0], cf * k * sizeof(mfcc_t));
411 
412  feat_array_free(old_feat);
413 
414  return new_feat;
415 }
416 
417 void
418 feat_array_free(mfcc_t ***feat)
419 {
420  ckd_free(feat[0][0]);
421  ckd_free_2d((void **)feat);
422 }
423 
424 static void
425 feat_s2_4x_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
426 {
427  mfcc_t *f;
428  mfcc_t *w, *_w;
429  mfcc_t *w1, *w_1, *_w1, *_w_1;
430  mfcc_t d1, d2;
431  int32 i, j;
432 
433  assert(fcb);
434  assert(feat_cepsize(fcb) == 13);
435  assert(feat_n_stream(fcb) == 4);
436  assert(feat_stream_len(fcb, 0) == 12);
437  assert(feat_stream_len(fcb, 1) == 24);
438  assert(feat_stream_len(fcb, 2) == 3);
439  assert(feat_stream_len(fcb, 3) == 12);
440  assert(feat_window_size(fcb) == 4);
441 
442  /* CEP; skip C0 */
443  memcpy(feat[0], mfc[0] + 1, (feat_cepsize(fcb) - 1) * sizeof(mfcc_t));
444 
445  /*
446  * DCEP(SHORT): mfc[2] - mfc[-2]
447  * DCEP(LONG): mfc[4] - mfc[-4]
448  */
449  w = mfc[2] + 1; /* +1 to skip C0 */
450  _w = mfc[-2] + 1;
451 
452  f = feat[1];
453  for (i = 0; i < feat_cepsize(fcb) - 1; i++) /* Short-term */
454  f[i] = w[i] - _w[i];
455 
456  w = mfc[4] + 1; /* +1 to skip C0 */
457  _w = mfc[-4] + 1;
458 
459  for (j = 0; j < feat_cepsize(fcb) - 1; i++, j++) /* Long-term */
460  f[i] = w[j] - _w[j];
461 
462  /* D2CEP: (mfc[3] - mfc[-1]) - (mfc[1] - mfc[-3]) */
463  w1 = mfc[3] + 1; /* Final +1 to skip C0 */
464  _w1 = mfc[-1] + 1;
465  w_1 = mfc[1] + 1;
466  _w_1 = mfc[-3] + 1;
467 
468  f = feat[3];
469  for (i = 0; i < feat_cepsize(fcb) - 1; i++) {
470  d1 = w1[i] - _w1[i];
471  d2 = w_1[i] - _w_1[i];
472 
473  f[i] = d1 - d2;
474  }
475 
476  /* POW: C0, DC0, D2C0; differences computed as above for rest of cep */
477  f = feat[2];
478  f[0] = mfc[0][0];
479  f[1] = mfc[2][0] - mfc[-2][0];
480 
481  d1 = mfc[3][0] - mfc[-1][0];
482  d2 = mfc[1][0] - mfc[-3][0];
483  f[2] = d1 - d2;
484 }
485 
486 
487 static void
488 feat_s3_1x39_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
489 {
490  mfcc_t *f;
491  mfcc_t *w, *_w;
492  mfcc_t *w1, *w_1, *_w1, *_w_1;
493  mfcc_t d1, d2;
494  int32 i;
495 
496  assert(fcb);
497  assert(feat_cepsize(fcb) == 13);
498  assert(feat_n_stream(fcb) == 1);
499  assert(feat_stream_len(fcb, 0) == 39);
500  assert(feat_window_size(fcb) == 3);
501 
502  /* CEP; skip C0 */
503  memcpy(feat[0], mfc[0] + 1, (feat_cepsize(fcb) - 1) * sizeof(mfcc_t));
504  /*
505  * DCEP: mfc[2] - mfc[-2];
506  */
507  f = feat[0] + feat_cepsize(fcb) - 1;
508  w = mfc[2] + 1; /* +1 to skip C0 */
509  _w = mfc[-2] + 1;
510 
511  for (i = 0; i < feat_cepsize(fcb) - 1; i++)
512  f[i] = w[i] - _w[i];
513 
514  /* POW: C0, DC0, D2C0 */
515  f += feat_cepsize(fcb) - 1;
516 
517  f[0] = mfc[0][0];
518  f[1] = mfc[2][0] - mfc[-2][0];
519 
520  d1 = mfc[3][0] - mfc[-1][0];
521  d2 = mfc[1][0] - mfc[-3][0];
522  f[2] = d1 - d2;
523 
524  /* D2CEP: (mfc[3] - mfc[-1]) - (mfc[1] - mfc[-3]) */
525  f += 3;
526 
527  w1 = mfc[3] + 1; /* Final +1 to skip C0 */
528  _w1 = mfc[-1] + 1;
529  w_1 = mfc[1] + 1;
530  _w_1 = mfc[-3] + 1;
531 
532  for (i = 0; i < feat_cepsize(fcb) - 1; i++) {
533  d1 = w1[i] - _w1[i];
534  d2 = w_1[i] - _w_1[i];
535 
536  f[i] = d1 - d2;
537  }
538 }
539 
540 
541 static void
542 feat_s3_cep(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
543 {
544  assert(fcb);
545  assert(feat_n_stream(fcb) == 1);
546  assert(feat_window_size(fcb) == 0);
547 
548  /* CEP */
549  memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t));
550 }
551 
552 static void
553 feat_s3_cep_dcep(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
554 {
555  mfcc_t *f;
556  mfcc_t *w, *_w;
557  int32 i;
558 
559  assert(fcb);
560  assert(feat_n_stream(fcb) == 1);
561  assert(feat_stream_len(fcb, 0) == feat_cepsize(fcb) * 2);
562  assert(feat_window_size(fcb) == 2);
563 
564  /* CEP */
565  memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t));
566 
567  /*
568  * DCEP: mfc[2] - mfc[-2];
569  */
570  f = feat[0] + feat_cepsize(fcb);
571  w = mfc[2];
572  _w = mfc[-2];
573 
574  for (i = 0; i < feat_cepsize(fcb); i++)
575  f[i] = w[i] - _w[i];
576 }
577 
578 static void
579 feat_1s_c_d_dd_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
580 {
581  mfcc_t *f;
582  mfcc_t *w, *_w;
583  mfcc_t *w1, *w_1, *_w1, *_w_1;
584  mfcc_t d1, d2;
585  int32 i;
586 
587  assert(fcb);
588  assert(feat_n_stream(fcb) == 1);
589  assert(feat_stream_len(fcb, 0) == feat_cepsize(fcb) * 3);
590  assert(feat_window_size(fcb) == FEAT_DCEP_WIN + 1);
591 
592  /* CEP */
593  memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t));
594 
595  /*
596  * DCEP: mfc[w] - mfc[-w], where w = FEAT_DCEP_WIN;
597  */
598  f = feat[0] + feat_cepsize(fcb);
599  w = mfc[FEAT_DCEP_WIN];
600  _w = mfc[-FEAT_DCEP_WIN];
601 
602  for (i = 0; i < feat_cepsize(fcb); i++)
603  f[i] = w[i] - _w[i];
604 
605  /*
606  * D2CEP: (mfc[w+1] - mfc[-w+1]) - (mfc[w-1] - mfc[-w-1]),
607  * where w = FEAT_DCEP_WIN
608  */
609  f += feat_cepsize(fcb);
610 
611  w1 = mfc[FEAT_DCEP_WIN + 1];
612  _w1 = mfc[-FEAT_DCEP_WIN + 1];
613  w_1 = mfc[FEAT_DCEP_WIN - 1];
614  _w_1 = mfc[-FEAT_DCEP_WIN - 1];
615 
616  for (i = 0; i < feat_cepsize(fcb); i++) {
617  d1 = w1[i] - _w1[i];
618  d2 = w_1[i] - _w_1[i];
619 
620  f[i] = d1 - d2;
621  }
622 }
623 
624 static void
625 feat_1s_c_d_ld_dd_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
626 {
627  mfcc_t *f;
628  mfcc_t *w, *_w;
629  mfcc_t *w1, *w_1, *_w1, *_w_1;
630  mfcc_t d1, d2;
631  int32 i;
632 
633  assert(fcb);
634  assert(feat_n_stream(fcb) == 1);
635  assert(feat_stream_len(fcb, 0) == feat_cepsize(fcb) * 4);
636  assert(feat_window_size(fcb) == FEAT_DCEP_WIN * 2);
637 
638  /* CEP */
639  memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t));
640 
641  /*
642  * DCEP: mfc[w] - mfc[-w], where w = FEAT_DCEP_WIN;
643  */
644  f = feat[0] + feat_cepsize(fcb);
645  w = mfc[FEAT_DCEP_WIN];
646  _w = mfc[-FEAT_DCEP_WIN];
647 
648  for (i = 0; i < feat_cepsize(fcb); i++)
649  f[i] = w[i] - _w[i];
650 
651  /*
652  * LDCEP: mfc[w] - mfc[-w], where w = FEAT_DCEP_WIN * 2;
653  */
654  f += feat_cepsize(fcb);
655  w = mfc[FEAT_DCEP_WIN * 2];
656  _w = mfc[-FEAT_DCEP_WIN * 2];
657 
658  for (i = 0; i < feat_cepsize(fcb); i++)
659  f[i] = w[i] - _w[i];
660 
661  /*
662  * D2CEP: (mfc[w+1] - mfc[-w+1]) - (mfc[w-1] - mfc[-w-1]),
663  * where w = FEAT_DCEP_WIN
664  */
665  f += feat_cepsize(fcb);
666 
667  w1 = mfc[FEAT_DCEP_WIN + 1];
668  _w1 = mfc[-FEAT_DCEP_WIN + 1];
669  w_1 = mfc[FEAT_DCEP_WIN - 1];
670  _w_1 = mfc[-FEAT_DCEP_WIN - 1];
671 
672  for (i = 0; i < feat_cepsize(fcb); i++) {
673  d1 = w1[i] - _w1[i];
674  d2 = w_1[i] - _w_1[i];
675 
676  f[i] = d1 - d2;
677  }
678 }
679 
680 static void
681 feat_copy(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
682 {
683  int32 win, i, j;
684 
685  win = feat_window_size(fcb);
686 
687  /* Concatenate input features */
688  for (i = -win; i <= win; ++i) {
689  uint32 spos = 0;
690 
691  for (j = 0; j < feat_n_stream(fcb); ++j) {
692  uint32 stream_len;
693 
694  /* Unscale the stream length by the window. */
695  stream_len = feat_stream_len(fcb, j) / (2 * win + 1);
696  memcpy(feat[j] + ((i + win) * stream_len),
697  mfc[i] + spos,
698  stream_len * sizeof(mfcc_t));
699  spos += stream_len;
700  }
701  }
702 }
703 
704 feat_t *
705 feat_init(char const *type, cmn_type_t cmn, int32 varnorm,
706  agc_type_t agc, int32 breport, int32 cepsize)
707 {
708  feat_t *fcb;
709 
710  if (cepsize == 0)
711  cepsize = 13;
712  if (breport)
713  E_INFO
714  ("Initializing feature stream to type: '%s', ceplen=%d, CMN='%s', VARNORM='%s', AGC='%s'\n",
715  type, cepsize, cmn_type_str[cmn], varnorm ? "yes" : "no", agc_type_str[agc]);
716 
717  fcb = (feat_t *) ckd_calloc(1, sizeof(feat_t));
718  fcb->refcount = 1;
719  fcb->name = (char *) ckd_salloc(type);
720  if (strcmp(type, "s2_4x") == 0) {
721  /* Sphinx-II format 4-stream feature (Hack!! hardwired constants below) */
722  if (cepsize != 13) {
723  E_ERROR("s2_4x features require cepsize == 13\n");
724  ckd_free(fcb);
725  return NULL;
726  }
727  fcb->cepsize = 13;
728  fcb->n_stream = 4;
729  fcb->stream_len = (uint32 *) ckd_calloc(4, sizeof(uint32));
730  fcb->stream_len[0] = 12;
731  fcb->stream_len[1] = 24;
732  fcb->stream_len[2] = 3;
733  fcb->stream_len[3] = 12;
734  fcb->out_dim = 51;
735  fcb->window_size = 4;
736  fcb->compute_feat = feat_s2_4x_cep2feat;
737  }
738  else if ((strcmp(type, "s3_1x39") == 0) || (strcmp(type, "1s_12c_12d_3p_12dd") == 0)) {
739  /* 1-stream cep/dcep/pow/ddcep (Hack!! hardwired constants below) */
740  if (cepsize != 13) {
741  E_ERROR("s2_4x features require cepsize == 13\n");
742  ckd_free(fcb);
743  return NULL;
744  }
745  fcb->cepsize = 13;
746  fcb->n_stream = 1;
747  fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32));
748  fcb->stream_len[0] = 39;
749  fcb->out_dim = 39;
750  fcb->window_size = 3;
751  fcb->compute_feat = feat_s3_1x39_cep2feat;
752  }
753  else if (strncmp(type, "1s_c_d_dd", 9) == 0) {
754  fcb->cepsize = cepsize;
755  fcb->n_stream = 1;
756  fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32));
757  fcb->stream_len[0] = cepsize * 3;
758  fcb->out_dim = cepsize * 3;
759  fcb->window_size = FEAT_DCEP_WIN + 1; /* ddcep needs the extra 1 */
760  fcb->compute_feat = feat_1s_c_d_dd_cep2feat;
761  }
762  else if (strncmp(type, "1s_c_d_ld_dd", 12) == 0) {
763  fcb->cepsize = cepsize;
764  fcb->n_stream = 1;
765  fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32));
766  fcb->stream_len[0] = cepsize * 4;
767  fcb->out_dim = cepsize * 4;
768  fcb->window_size = FEAT_DCEP_WIN * 2;
769  fcb->compute_feat = feat_1s_c_d_ld_dd_cep2feat;
770  }
771  else if (strncmp(type, "cep_dcep", 8) == 0 || strncmp(type, "1s_c_d", 6) == 0) {
772  /* 1-stream cep/dcep */
773  fcb->cepsize = cepsize;
774  fcb->n_stream = 1;
775  fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32));
776  fcb->stream_len[0] = feat_cepsize(fcb) * 2;
777  fcb->out_dim = fcb->stream_len[0];
778  fcb->window_size = 2;
779  fcb->compute_feat = feat_s3_cep_dcep;
780  }
781  else if (strncmp(type, "cep", 3) == 0 || strncmp(type, "1s_c", 4) == 0) {
782  /* 1-stream cep */
783  fcb->cepsize = cepsize;
784  fcb->n_stream = 1;
785  fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32));
786  fcb->stream_len[0] = feat_cepsize(fcb);
787  fcb->out_dim = fcb->stream_len[0];
788  fcb->window_size = 0;
789  fcb->compute_feat = feat_s3_cep;
790  }
791  else if (strncmp(type, "1s_3c", 5) == 0 || strncmp(type, "1s_4c", 5) == 0) {
792  /* 1-stream cep with frames concatenated, so called cepwin features */
793  if (strncmp(type, "1s_3c", 5) == 0)
794  fcb->window_size = 3;
795  else
796  fcb->window_size = 4;
797 
798  fcb->cepsize = cepsize;
799  fcb->n_stream = 1;
800  fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32));
801  fcb->stream_len[0] = feat_cepsize(fcb) * (2 * fcb->window_size + 1);
802  fcb->out_dim = fcb->stream_len[0];
803  fcb->compute_feat = feat_copy;
804  }
805  else {
806  int32 i, k, l;
807  size_t len;
808  char *strp;
809  char *mtype = ckd_salloc(type);
810  char *wd = ckd_salloc(type);
811  /*
812  * Generic definition: Format should be %d,%d,%d,...,%d (i.e.,
813  * comma separated list of feature stream widths; #items =
814  * #streams). An optional window size (frames will be
815  * concatenated) is also allowed, which can be specified with
816  * a colon after the list of feature streams.
817  */
818  len = strlen(mtype);
819  k = 0;
820  for (i = 1; i < len - 1; i++) {
821  if (mtype[i] == ',') {
822  mtype[i] = ' ';
823  k++;
824  }
825  else if (mtype[i] == ':') {
826  mtype[i] = '\0';
827  fcb->window_size = atoi(mtype + i + 1);
828  break;
829  }
830  }
831  k++; /* Presumably there are (#commas+1) streams */
832  fcb->n_stream = k;
833  fcb->stream_len = (uint32 *) ckd_calloc(k, sizeof(uint32));
834 
835  /* Scan individual feature stream lengths */
836  strp = mtype;
837  i = 0;
838  fcb->out_dim = 0;
839  fcb->cepsize = 0;
840  while (sscanf(strp, "%s%n", wd, &l) == 1) {
841  strp += l;
842  if ((i >= fcb->n_stream)
843  || (sscanf(wd, "%u", &(fcb->stream_len[i])) != 1)
844  || (fcb->stream_len[i] <= 0))
845  E_FATAL("Bad feature type argument\n");
846  /* Input size before windowing */
847  fcb->cepsize += fcb->stream_len[i];
848  if (fcb->window_size > 0)
849  fcb->stream_len[i] *= (fcb->window_size * 2 + 1);
850  /* Output size after windowing */
851  fcb->out_dim += fcb->stream_len[i];
852  i++;
853  }
854  if (i != fcb->n_stream)
855  E_FATAL("Bad feature type argument\n");
856  if (fcb->cepsize != cepsize)
857  E_FATAL("Bad feature type argument\n");
858 
859  /* Input is already the feature stream */
860  fcb->compute_feat = feat_copy;
861  ckd_free(mtype);
862  ckd_free(wd);
863  }
864 
865  if (cmn != CMN_NONE)
866  fcb->cmn_struct = cmn_init(feat_cepsize(fcb));
867  fcb->cmn = cmn;
868  fcb->varnorm = varnorm;
869  if (agc != AGC_NONE) {
870  fcb->agc_struct = agc_init();
871  /*
872  * No need to check if agc is set to EMAX; agc_emax_set() changes only emax related things
873  * Moreover, if agc is not NONE and block mode is used, feat_agc() SILENTLY
874  * switches to EMAX
875  */
876  /* HACK: hardwired initial estimates based on use of CMN (from Sphinx2) */
877  agc_emax_set(fcb->agc_struct, (cmn != CMN_NONE) ? 5.0 : 10.0);
878  }
879  fcb->agc = agc;
880  /*
881  * Make sure this buffer is large enough to be used in feat_s2mfc2feat_block_utt()
882  */
883  fcb->cepbuf = (mfcc_t **) ckd_calloc_2d((LIVEBUFBLOCKSIZE < feat_window_size(fcb) * 2) ? feat_window_size(fcb) * 2 : LIVEBUFBLOCKSIZE,
884  feat_cepsize(fcb),
885  sizeof(mfcc_t));
886  /* This one is actually just an array of pointers to "flatten out"
887  * wraparounds. */
888  fcb->tmpcepbuf = (mfcc_t** )ckd_calloc(2 * feat_window_size(fcb) + 1,
889  sizeof(*fcb->tmpcepbuf));
890 
891  return fcb;
892 }
893 
894 
895 void
896 feat_print(feat_t * fcb, mfcc_t *** feat, int32 nfr, FILE * fp)
897 {
898  uint32 i, j, k;
899 
900  for (i = 0; i < nfr; i++) {
901  fprintf(fp, "%8d:\n", i);
902 
903  for (j = 0; j < feat_dimension1(fcb); j++) {
904  fprintf(fp, "\t%2d:", j);
905 
906  for (k = 0; k < feat_dimension2(fcb, j); k++)
907  fprintf(fp, " %8.4f", MFCC2FLOAT(feat[i][j][k]));
908  fprintf(fp, "\n");
909  }
910  }
911 
912  fflush(fp);
913 }
914 
915 static void
916 feat_cmn(feat_t *fcb, mfcc_t **mfc, int32 nfr, int32 beginutt, int32 endutt)
917 {
918  cmn_type_t cmn_type = fcb->cmn;
919 
920  if (!(beginutt && endutt)
921  && cmn_type != CMN_NONE) /* Only cmn_prior in block computation mode. */
922  fcb->cmn = cmn_type = CMN_LIVE;
923 
924  switch (cmn_type) {
925  case CMN_BATCH:
926  cmn(fcb->cmn_struct, mfc, fcb->varnorm, nfr);
927  break;
928  case CMN_LIVE:
929  cmn_live(fcb->cmn_struct, mfc, fcb->varnorm, nfr);
930  if (endutt)
931  cmn_live_update(fcb->cmn_struct);
932  break;
933  default:
934  ;
935  }
936  cep_dump_dbg(fcb, mfc, nfr, "After CMN");
937 }
938 
939 static void
940 feat_agc(feat_t *fcb, mfcc_t **mfc, int32 nfr, int32 beginutt, int32 endutt)
941 {
942  agc_type_t agc_type = fcb->agc;
943 
944  if (!(beginutt && endutt)
945  && agc_type != AGC_NONE) /* Only agc_emax in block computation mode. */
946  agc_type = AGC_EMAX;
947 
948  switch (agc_type) {
949  case AGC_MAX:
950  agc_max(fcb->agc_struct, mfc, nfr);
951  break;
952  case AGC_EMAX:
953  agc_emax(fcb->agc_struct, mfc, nfr);
954  if (endutt)
955  agc_emax_update(fcb->agc_struct);
956  break;
957  case AGC_NOISE:
958  agc_noise(fcb->agc_struct, mfc, nfr);
959  break;
960  default:
961  ;
962  }
963  cep_dump_dbg(fcb, mfc, nfr, "After AGC");
964 }
965 
966 static void
967 feat_compute_utt(feat_t *fcb, mfcc_t **mfc, int32 nfr, int32 win, mfcc_t ***feat)
968 {
969  int32 i;
970 
971  cep_dump_dbg(fcb, mfc, nfr, "Incoming features (after padding)");
972 
973  /* Create feature vectors */
974  for (i = win; i < nfr - win; i++) {
975  fcb->compute_feat(fcb, mfc + i, feat[i - win]);
976  }
977 
978  feat_print_dbg(fcb, feat, nfr - win * 2, "After dynamic feature computation");
979 
980  if (fcb->lda) {
981  feat_lda_transform(fcb, feat, nfr - win * 2);
982  feat_print_dbg(fcb, feat, nfr - win * 2, "After LDA");
983  }
984 
985  if (fcb->subvecs) {
986  feat_subvec_project(fcb, feat, nfr - win * 2);
987  feat_print_dbg(fcb, feat, nfr - win * 2, "After subvector projection");
988  }
989 }
990 
991 
1004 static int32
1005 feat_s2mfc_read_norm_pad(feat_t *fcb, char *file, int32 win,
1006  int32 sf, int32 ef,
1007  mfcc_t ***out_mfc,
1008  int32 maxfr,
1009  int32 cepsize)
1010 {
1011  FILE *fp;
1012  int32 n_float32;
1013  float32 *float_feat;
1014  struct stat statbuf;
1015  int32 i, n, byterev;
1016  int32 start_pad, end_pad;
1017  mfcc_t **mfc;
1018 
1019  /* Initialize the output pointer to NULL, so that any attempts to
1020  free() it if we fail before allocating it will not segfault! */
1021  if (out_mfc)
1022  *out_mfc = NULL;
1023  E_INFO("Reading mfc file: '%s'[%d..%d]\n", file, sf, ef);
1024  if (ef >= 0 && ef <= sf) {
1025  E_ERROR("%s: End frame (%d) <= Start frame (%d)\n", file, ef, sf);
1026  return -1;
1027  }
1028 
1029  /* Find filesize; HACK!! To get around intermittent NFS failures, use stat_retry */
1030  if ((stat_retry(file, &statbuf) < 0)
1031  || ((fp = fopen(file, "rb")) == NULL)) {
1032  E_ERROR_SYSTEM("Failed to open file '%s' for reading", file);
1033  return -1;
1034  }
1035 
1036  /* Read #floats in header */
1037  if (fread_retry(&n_float32, sizeof(int32), 1, fp) != 1) {
1038  E_ERROR("%s: fread(#floats) failed\n", file);
1039  fclose(fp);
1040  return -1;
1041  }
1042 
1043  /* Check if n_float32 matches file size */
1044  byterev = 0;
1045  if ((int32) (n_float32 * sizeof(float32) + 4) != (int32) statbuf.st_size) { /* RAH, typecast both sides to remove compile warning */
1046  n = n_float32;
1047  SWAP_INT32(&n);
1048 
1049  if ((int32) (n * sizeof(float32) + 4) != (int32) (statbuf.st_size)) { /* RAH, typecast both sides to remove compile warning */
1050  E_ERROR
1051  ("%s: Header size field: %d(%08x); filesize: %d(%08x)\n",
1052  file, n_float32, n_float32, statbuf.st_size,
1053  statbuf.st_size);
1054  fclose(fp);
1055  return -1;
1056  }
1057 
1058  n_float32 = n;
1059  byterev = 1;
1060  }
1061  if (n_float32 <= 0) {
1062  E_ERROR("%s: Header size field (#floats) = %d\n", file, n_float32);
1063  fclose(fp);
1064  return -1;
1065  }
1066 
1067  /* Convert n to #frames of input */
1068  n = n_float32 / cepsize;
1069  if (n * cepsize != n_float32) {
1070  E_ERROR("Header size field: %d; not multiple of %d\n", n_float32,
1071  cepsize);
1072  fclose(fp);
1073  return -1;
1074  }
1075 
1076  /* Check start and end frames */
1077  if (sf > 0) {
1078  if (sf >= n) {
1079  E_ERROR("%s: Start frame (%d) beyond file size (%d)\n", file,
1080  sf, n);
1081  fclose(fp);
1082  return -1;
1083  }
1084  }
1085  if (ef < 0)
1086  ef = n-1;
1087  else if (ef >= n) {
1088  E_WARN("%s: End frame (%d) beyond file size (%d), will truncate\n",
1089  file, ef, n);
1090  ef = n-1;
1091  }
1092 
1093  /* Add window to start and end frames */
1094  sf -= win;
1095  ef += win;
1096  if (sf < 0) {
1097  start_pad = -sf;
1098  sf = 0;
1099  }
1100  else
1101  start_pad = 0;
1102  if (ef >= n) {
1103  end_pad = ef - n + 1;
1104  ef = n - 1;
1105  }
1106  else
1107  end_pad = 0;
1108 
1109  /* Limit n if indicated by [sf..ef] */
1110  if ((ef - sf + 1) < n)
1111  n = (ef - sf + 1);
1112  if (maxfr > 0 && n + start_pad + end_pad > maxfr) {
1113  E_ERROR("%s: Maximum output size(%d frames) < actual #frames(%d)\n",
1114  file, maxfr, n + start_pad + end_pad);
1115  fclose(fp);
1116  return -1;
1117  }
1118 
1119  /* If no output buffer was supplied, then skip the actual data reading. */
1120  if (out_mfc != NULL) {
1121  /* Position at desired start frame and read actual MFC data */
1122  mfc = (mfcc_t **)ckd_calloc_2d(n + start_pad + end_pad, cepsize, sizeof(mfcc_t));
1123  if (sf > 0)
1124  fseek(fp, sf * cepsize * sizeof(float32), SEEK_CUR);
1125  n_float32 = n * cepsize;
1126 #ifdef FIXED_POINT
1127  float_feat = ckd_calloc(n_float32, sizeof(float32));
1128 #else
1129  float_feat = mfc[start_pad];
1130 #endif
1131  if (fread_retry(float_feat, sizeof(float32), n_float32, fp) != n_float32) {
1132  E_ERROR("%s: fread(%dx%d) (MFC data) failed\n", file, n, cepsize);
1133  ckd_free_2d(mfc);
1134  fclose(fp);
1135  return -1;
1136  }
1137  if (byterev) {
1138  for (i = 0; i < n_float32; i++) {
1139  SWAP_FLOAT32(&float_feat[i]);
1140  }
1141  }
1142 #ifdef FIXED_POINT
1143  for (i = 0; i < n_float32; ++i) {
1144  mfc[start_pad][i] = FLOAT2MFCC(float_feat[i]);
1145  }
1146  ckd_free(float_feat);
1147 #endif
1148 
1149  /* Normalize */
1150  feat_cmn(fcb, mfc + start_pad, n, 1, 1);
1151  feat_agc(fcb, mfc + start_pad, n, 1, 1);
1152 
1153  /* Replicate start and end frames if necessary. */
1154  for (i = 0; i < start_pad; ++i)
1155  memcpy(mfc[i], mfc[start_pad], cepsize * sizeof(mfcc_t));
1156  for (i = 0; i < end_pad; ++i)
1157  memcpy(mfc[start_pad + n + i], mfc[start_pad + n - 1],
1158  cepsize * sizeof(mfcc_t));
1159 
1160  *out_mfc = mfc;
1161  }
1162 
1163  fclose(fp);
1164  return n + start_pad + end_pad;
1165 }
1166 
1167 
1168 
1169 int32
1170 feat_s2mfc2feat(feat_t * fcb, const char *file, const char *dir, const char *cepext,
1171  int32 sf, int32 ef, mfcc_t *** feat, int32 maxfr)
1172 {
1173  char *path;
1174  char *ps = "/";
1175  int32 win, nfr;
1176  size_t file_length, cepext_length, path_length = 0;
1177  mfcc_t **mfc;
1178 
1179  if (fcb->cepsize <= 0) {
1180  E_ERROR("Bad cepsize: %d\n", fcb->cepsize);
1181  return -1;
1182  }
1183 
1184  if (cepext == NULL)
1185  cepext = "";
1186 
1187  /*
1188  * Create mfc filename, combining file, dir and extension if
1189  * necessary
1190  */
1191 
1192  /*
1193  * First we decide about the path. If dir is defined, then use
1194  * it. Otherwise assume the filename already contains the path.
1195  */
1196  if (dir == NULL) {
1197  dir = "";
1198  ps = "";
1199  /*
1200  * This is not true but some 3rd party apps
1201  * may parse the output explicitly checking for this line
1202  */
1203  E_INFO("At directory . (current directory)\n");
1204  }
1205  else {
1206  E_INFO("At directory %s\n", dir);
1207  /*
1208  * Do not forget the path separator!
1209  */
1210  path_length += strlen(dir) + 1;
1211  }
1212 
1213  /*
1214  * Include cepext, if it's not already part of the filename.
1215  */
1216  file_length = strlen(file);
1217  cepext_length = strlen(cepext);
1218  if ((file_length > cepext_length)
1219  && (strcmp(file + file_length - cepext_length, cepext) == 0)) {
1220  cepext = "";
1221  cepext_length = 0;
1222  }
1223 
1224  /*
1225  * Do not forget the '\0'
1226  */
1227  path_length += file_length + cepext_length + 1;
1228  path = (char*) ckd_calloc(path_length, sizeof(char));
1229 
1230 #ifdef HAVE_SNPRINTF
1231  /*
1232  * Paranoia is our best friend...
1233  */
1234  while ((file_length = snprintf(path, path_length, "%s%s%s%s", dir, ps, file, cepext)) > path_length) {
1235  path_length = file_length;
1236  path = (char*) ckd_realloc(path, path_length * sizeof(char));
1237  }
1238 #else
1239  sprintf(path, "%s%s%s%s", dir, ps, file, cepext);
1240 #endif
1241 
1242  win = feat_window_size(fcb);
1243  /* Pad maxfr with win, so we read enough raw feature data to
1244  * calculate the requisite number of dynamic features. */
1245  if (maxfr >= 0)
1246  maxfr += win * 2;
1247 
1248  if (feat != NULL) {
1249  /* Read mfc file including window or padding if necessary. */
1250  nfr = feat_s2mfc_read_norm_pad(fcb, path, win, sf, ef, &mfc, maxfr, fcb->cepsize);
1251  ckd_free(path);
1252  if (nfr < 0) {
1253  ckd_free_2d((void **) mfc);
1254  return -1;
1255  }
1256 
1257  /* Actually compute the features */
1258  feat_compute_utt(fcb, mfc, nfr, win, feat);
1259 
1260  ckd_free_2d((void **) mfc);
1261  }
1262  else {
1263  /* Just calculate the number of frames we would need. */
1264  nfr = feat_s2mfc_read_norm_pad(fcb, path, win, sf, ef, NULL, maxfr, fcb->cepsize);
1265  ckd_free(path);
1266  if (nfr < 0)
1267  return nfr;
1268  }
1269 
1270 
1271  return (nfr - win * 2);
1272 }
1273 
1274 static int32
1275 feat_s2mfc2feat_block_utt(feat_t * fcb, mfcc_t ** uttcep,
1276  int32 nfr, mfcc_t *** ofeat)
1277 {
1278  mfcc_t **cepbuf;
1279  int32 i, win, cepsize;
1280 
1281  win = feat_window_size(fcb);
1282  cepsize = feat_cepsize(fcb);
1283 
1284  /* Copy and pad out the utterance (this requires that the
1285  * feature computation functions always access the buffer via
1286  * the frame pointers, which they do) */
1287  cepbuf = (mfcc_t **)ckd_calloc(nfr + win * 2, sizeof(mfcc_t *));
1288  memcpy(cepbuf + win, uttcep, nfr * sizeof(mfcc_t *));
1289 
1290  /* Do normalization before we interpolate on the boundary */
1291  feat_cmn(fcb, cepbuf + win, nfr, 1, 1);
1292  feat_agc(fcb, cepbuf + win, nfr, 1, 1);
1293 
1294  /* Now interpolate */
1295  for (i = 0; i < win; ++i) {
1296  cepbuf[i] = fcb->cepbuf[i];
1297  memcpy(cepbuf[i], uttcep[0], cepsize * sizeof(mfcc_t));
1298  cepbuf[nfr + win + i] = fcb->cepbuf[win + i];
1299  memcpy(cepbuf[nfr + win + i], uttcep[nfr - 1], cepsize * sizeof(mfcc_t));
1300  }
1301  /* Compute as usual. */
1302  feat_compute_utt(fcb, cepbuf, nfr + win * 2, win, ofeat);
1303  ckd_free(cepbuf);
1304  return nfr;
1305 }
1306 
1307 int32
1308 feat_s2mfc2feat_live(feat_t * fcb, mfcc_t ** uttcep, int32 *inout_ncep,
1309  int32 beginutt, int32 endutt, mfcc_t *** ofeat)
1310 {
1311  int32 win, cepsize, nbufcep;
1312  int32 i, j, nfeatvec;
1313  int32 zero = 0;
1314 
1315  /* Avoid having to check this everywhere. */
1316  if (inout_ncep == NULL) inout_ncep = &zero;
1317 
1318  /* Special case for entire utterances. */
1319  if (beginutt && endutt && *inout_ncep > 0)
1320  return feat_s2mfc2feat_block_utt(fcb, uttcep, *inout_ncep, ofeat);
1321 
1322  win = feat_window_size(fcb);
1323  cepsize = feat_cepsize(fcb);
1324 
1325  /* Empty the input buffer on start of utterance. */
1326  if (beginutt)
1327  fcb->bufpos = fcb->curpos;
1328 
1329  /* Calculate how much data is in the buffer already. */
1330  nbufcep = fcb->bufpos - fcb->curpos;
1331  if (nbufcep < 0)
1332  nbufcep = fcb->bufpos + LIVEBUFBLOCKSIZE - fcb->curpos;
1333  /* Add any data that we have to replicate. */
1334  if (beginutt && *inout_ncep > 0)
1335  nbufcep += win;
1336  if (endutt)
1337  nbufcep += win;
1338 
1339  /* Only consume as much input as will fit in the buffer. */
1340  if (nbufcep + *inout_ncep > LIVEBUFBLOCKSIZE) {
1341  /* We also can't overwrite the trailing window, hence the
1342  * reason why win is subtracted here. */
1343  *inout_ncep = LIVEBUFBLOCKSIZE - nbufcep - win;
1344  /* Cancel end of utterance processing. */
1345  endutt = FALSE;
1346  }
1347 
1348  /* FIXME: Don't modify the input! */
1349  feat_cmn(fcb, uttcep, *inout_ncep, beginutt, endutt);
1350  feat_agc(fcb, uttcep, *inout_ncep, beginutt, endutt);
1351 
1352  /* Replicate first frame into the first win frames if we're at the
1353  * beginning of the utterance and there was some actual input to
1354  * deal with. (FIXME: Not entirely sure why that condition) */
1355  if (beginutt && *inout_ncep > 0) {
1356  for (i = 0; i < win; i++) {
1357  memcpy(fcb->cepbuf[fcb->bufpos++], uttcep[0],
1358  cepsize * sizeof(mfcc_t));
1359  fcb->bufpos %= LIVEBUFBLOCKSIZE;
1360  }
1361  /* Move the current pointer past this data. */
1362  fcb->curpos = fcb->bufpos;
1363  nbufcep -= win;
1364  }
1365 
1366  /* Copy in frame data to the circular buffer. */
1367  for (i = 0; i < *inout_ncep; ++i) {
1368  memcpy(fcb->cepbuf[fcb->bufpos++], uttcep[i],
1369  cepsize * sizeof(mfcc_t));
1370  fcb->bufpos %= LIVEBUFBLOCKSIZE;
1371  ++nbufcep;
1372  }
1373 
1374  /* Replicate last frame into the last win frames if we're at the
1375  * end of the utterance (even if there was no input, so we can
1376  * flush the output). */
1377  if (endutt) {
1378  int32 tpos; /* Index of last input frame. */
1379  if (fcb->bufpos == 0)
1380  tpos = LIVEBUFBLOCKSIZE - 1;
1381  else
1382  tpos = fcb->bufpos - 1;
1383  for (i = 0; i < win; ++i) {
1384  memcpy(fcb->cepbuf[fcb->bufpos++], fcb->cepbuf[tpos],
1385  cepsize * sizeof(mfcc_t));
1386  fcb->bufpos %= LIVEBUFBLOCKSIZE;
1387  }
1388  }
1389 
1390  /* We have to leave the trailing window of frames. */
1391  nfeatvec = nbufcep - win;
1392  if (nfeatvec <= 0)
1393  return 0; /* Do nothing. */
1394 
1395  for (i = 0; i < nfeatvec; ++i) {
1396  /* Handle wraparound cases. */
1397  if (fcb->curpos - win < 0 || fcb->curpos + win >= LIVEBUFBLOCKSIZE) {
1398  /* Use tmpcepbuf for this case. Actually, we just need the pointers. */
1399  for (j = -win; j <= win; ++j) {
1400  int32 tmppos =
1401  (fcb->curpos + j + LIVEBUFBLOCKSIZE) % LIVEBUFBLOCKSIZE;
1402  fcb->tmpcepbuf[win + j] = fcb->cepbuf[tmppos];
1403  }
1404  fcb->compute_feat(fcb, fcb->tmpcepbuf + win, ofeat[i]);
1405  }
1406  else {
1407  fcb->compute_feat(fcb, fcb->cepbuf + fcb->curpos, ofeat[i]);
1408  }
1409  /* Move the read pointer forward. */
1410  ++fcb->curpos;
1411  fcb->curpos %= LIVEBUFBLOCKSIZE;
1412  }
1413 
1414  if (fcb->lda)
1415  feat_lda_transform(fcb, ofeat, nfeatvec);
1416 
1417  if (fcb->subvecs)
1418  feat_subvec_project(fcb, ofeat, nfeatvec);
1419 
1420  return nfeatvec;
1421 }
1422 
1423 void
1425 {
1426  if (fcb->cmn == CMN_LIVE) {
1427  cmn_live_update(fcb->cmn_struct);
1428  }
1429  if (fcb->agc == AGC_EMAX || fcb->agc == AGC_MAX) {
1430  agc_emax_update(fcb->agc_struct);
1431  }
1432 }
1433 
1434 feat_t *
1436 {
1437  ++f->refcount;
1438  return f;
1439 }
1440 
1441 int
1443 {
1444  if (f == NULL)
1445  return 0;
1446  if (--f->refcount > 0)
1447  return f->refcount;
1448 
1449  if (f->cepbuf)
1450  ckd_free_2d((void **) f->cepbuf);
1451  ckd_free(f->tmpcepbuf);
1452 
1453  if (f->name) {
1454  ckd_free((void *) f->name);
1455  }
1456  if (f->lda)
1457  ckd_free_3d((void ***) f->lda);
1458 
1459  ckd_free(f->stream_len);
1460  ckd_free(f->sv_len);
1461  ckd_free(f->sv_buf);
1462  subvecs_free(f->subvecs);
1463 
1464  cmn_free(f->cmn_struct);
1465  agc_free(f->agc_struct);
1466 
1467  ckd_free(f);
1468  return 0;
1469 }
1470 
1471 
1472 void
1474 {
1475  int i;
1476  E_INFO_NOFN("Initialization of feat_t, report:\n");
1477  E_INFO_NOFN("Feature type = %s\n", f->name);
1478  E_INFO_NOFN("Cepstral size = %d\n", f->cepsize);
1479  E_INFO_NOFN("Number of streams = %d\n", f->n_stream);
1480  for (i = 0; i < f->n_stream; i++) {
1481  E_INFO_NOFN("Vector size of stream[%d]: %d\n", i,
1482  f->stream_len[i]);
1483  }
1484  E_INFO_NOFN("Number of subvectors = %d\n", f->n_sv);
1485  for (i = 0; i < f->n_sv; i++) {
1486  int32 *sv;
1487 
1488  E_INFO_NOFN("Components of subvector[%d]:", i);
1489  for (sv = f->subvecs[i]; sv && *sv != -1; ++sv)
1490  E_INFOCONT(" %d", *sv);
1491  E_INFOCONT("\n");
1492  }
1493  E_INFO_NOFN("Whether CMN is used = %d\n", f->cmn);
1494  E_INFO_NOFN("Whether AGC is used = %d\n", f->agc);
1495  E_INFO_NOFN("Whether variance is normalized = %d\n", f->varnorm);
1496  E_INFO_NOFN("\n");
1497 }
#define E_ERROR_SYSTEM(...)
Print error text; Call perror(&quot;&quot;);.
Definition: err.h:99
SPHINXBASE_EXPORT void feat_print(feat_t *fcb, mfcc_t ***feat, int32 nfr, FILE *fp)
Print the given block of feature vectors to the given FILE.
Definition: feat.c:896
SPHINXBASE_EXPORT void feat_report(feat_t *f)
Report the feat_t data structure.
Definition: feat.c:1473
#define E_INFO(...)
Print logging information to standard error stream.
Definition: err.h:114
SPHINXBASE_EXPORT void agc_noise(agc_t *agc, mfcc_t **mfc, int32 n_frame)
Apply AGC using noise threshold to the given block of MFC vectors.
Definition: agc.c:180
SPHINXBASE_EXPORT void agc_max(agc_t *agc, mfcc_t **mfc, int32 n_frame)
Apply AGC to the given mfc vectors (normalize all C0 mfc coefficients in the given input such that th...
Definition: agc.c:109
#define ckd_calloc_2d(d1, d2, sz)
Macro for ckd_calloc_2d
Definition: ckd_alloc.h:270
routine that implements automatic gain control
#define ckd_calloc(n, sz)
Macros to simplify the use of above functions.
Definition: ckd_alloc.h:248
SPHINXBASE_EXPORT glist_t glist_add_int32(glist_t g, int32 val)
Create and prepend a new list node containing an integer.
Definition: glist.c:86
#define E_ERROR(...)
Print error message to error log.
Definition: err.h:104
SPHINXBASE_EXPORT mfcc_t *** feat_array_alloc(feat_t *fcb, int32 nfr)
Allocate an array to hold several frames worth of feature vectors.
Definition: feat.c:356
#define feat_dimension(f)
Total dimensionality of feature output.
Definition: feat.h:199
Sphinx&#39;s memory allocation/deallocation routines.
SPHINXBASE_EXPORT const char * agc_type_str[]
String representations of agc_type_t values.
Definition: agc.c:70
Apply Cepstral Mean Normalization (CMN) to the set of input mfc frames.
SPHINXBASE_EXPORT mfcc_t *** feat_array_realloc(feat_t *fcb, mfcc_t ***old_feat, int32 ofr, int32 nfr)
Realloate the array of features.
Definition: feat.c:389
SPHINXBASE_EXPORT int32 stat_retry(const char *file, struct stat *statbuf)
There is no bitstream decoder, because a stream abstraction is too slow.
Definition: pio.c:489
Cross platform binary IO to process files in sphinx3 format.
#define E_INFOCONT(...)
Continue printing the information to standard error stream.
Definition: err.h:119
#define feat_cepsize(f)
Input dimensionality of feature.
Definition: feat.h:171
A node in a generic list.
Definition: glist.h:100
#define ckd_salloc(ptr)
Macro for ckd_salloc
Definition: ckd_alloc.h:264
Basic type definitions used in Sphinx.
Structure for describing a speech feature type Structure for describing a speech feature type (no...
SPHINXBASE_EXPORT void ckd_free(void *ptr)
Test and free a 1-D array.
Definition: ckd_alloc.c:244
SPHINXBASE_EXPORT glist_t glist_add_ptr(glist_t g, void *ptr)
Create and prepend a new list node, with the given user-defined data, at the HEAD of the given generi...
Definition: glist.c:74
SPHINXBASE_EXPORT void cmn(cmn_t *cmn, mfcc_t **mfc, int32 varnorm, int32 n_frame)
CMN for the whole sentence.
Definition: cmn.c:100
SPHINXBASE_EXPORT agc_t * agc_init(void)
Initialize AGC structure with default values.
Definition: agc.c:91
SPHINXBASE_EXPORT void agc_free(agc_t *agc)
Free AGC structure.
Definition: agc.c:100
SPHINXBASE_EXPORT int32 ** parse_subvecs(char const *str)
Parse subvector specification string.
Definition: feat.c:169
enum agc_type_e agc_type_t
Types of acoustic gain control to apply to the features.
SPHINXBASE_EXPORT feat_t * feat_retain(feat_t *f)
Retain ownership of feat_t.
Definition: feat.c:1435
SPHINXBASE_EXPORT void glist_free(glist_t g)
Free the given generic list; user-defined data contained within is not automatically freed...
Definition: glist.c:133
SPHINXBASE_EXPORT int32 feat_s2mfc2feat(feat_t *fcb, const char *file, const char *dir, const char *cepext, int32 sf, int32 ef, mfcc_t ***feat, int32 maxfr)
Read a specified MFC file (or given segment within it), perform CMN/AGC as indicated by fcb...
Definition: feat.c:1170
#define gnode_ptr(g)
Head of a list of gnodes.
Definition: glist.h:109
SPHINXBASE_EXPORT void subvecs_free(int32 **subvecs)
Free array of subvector specs.
Definition: feat.c:267
compute the dynamic coefficients from the cepstral vector.
SPHINXBASE_EXPORT void agc_emax_set(agc_t *agc, float32 m)
Set the current AGC maximum estimate.
Definition: agc.c:129
Implementation of logging routines.
#define feat_window_size(f)
Size of dynamic feature window.
Definition: feat.h:175
#define feat_stream_len(f, i)
Length of feature stream i.
Definition: feat.h:187
Generic linked-lists maintenance.
#define E_WARN(...)
Print warning message to error log.
Definition: err.h:109
SPHINXBASE_EXPORT void feat_array_free(mfcc_t ***feat)
Free a buffer allocated with feat_array_alloc()
Definition: feat.c:418
#define feat_n_stream(f)
Number of feature streams.
Definition: feat.h:181
SPHINXBASE_EXPORT void agc_emax_update(agc_t *agc)
Update AGC parameters for next utterance.
Definition: agc.c:159
SPHINXBASE_EXPORT void ckd_free_3d(void *ptr)
Free a 3-D array (ptr) previously allocated by ckd_calloc_3d.
Definition: ckd_alloc.c:297
SPHINXBASE_EXPORT int32 fread_retry(void *pointer, int32 size, int32 num_items, FILE *stream)
NFS file reads seem to fail now and then.
Definition: pio.c:408
SPHINXBASE_EXPORT void feat_lda_transform(feat_t *fcb, mfcc_t ***inout_feat, uint32 nfr)
Transform a block of features using the feature module&#39;s LDA transform.
Definition: lda.c:139
enum cmn_type_e cmn_type_t
Types of cepstral mean normalization to apply to the features.
SPHINXBASE_EXPORT feat_t * feat_init(char const *type, cmn_type_t cmn, int32 varnorm, agc_type_t agc, int32 breport, int32 cepsize)
Initialize feature module to use the selected type of feature stream.
Definition: feat.c:705
SPHINXBASE_EXPORT const char * cmn_type_str[]
String representations of cmn_type_t values.
Definition: cmn.c:59
SPHINXBASE_EXPORT void ckd_free_2d(void *ptr)
Free a 2-D array (ptr) previously allocated by ckd_calloc_2d.
Definition: ckd_alloc.c:255
SPHINXBASE_EXPORT void feat_update_stats(feat_t *fcb)
Update the normalization stats, possibly in the end of utterance.
Definition: feat.c:1424
SPHINXBASE_EXPORT void agc_emax(agc_t *agc, mfcc_t **mfc, int32 n_frame)
Apply AGC to the given block of MFC vectors.
Definition: agc.c:142
SPHINXBASE_EXPORT void cmn_live(cmn_t *cmn, mfcc_t **incep, int32 varnorm, int32 nfr)
CMN for one block of data, using live mean.
Definition: cmn_live.c:145
#define feat_dimension1(f)
Number of streams or subvectors in feature output.
Definition: feat.h:191
#define E_INFO_NOFN(...)
Print logging information without filename.
Definition: err.h:124
#define E_FATAL(...)
Exit with non-zero status after error message.
Definition: err.h:81
SPHINXBASE_EXPORT int feat_free(feat_t *f)
Release resource associated with feat_t.
Definition: feat.c:1442
SPHINXBASE_EXPORT void cmn_live_update(cmn_t *cmn)
Update live mean based on observed data.
Definition: cmn_live.c:112
SPHINXBASE_EXPORT int feat_set_subvecs(feat_t *fcb, int32 **subvecs)
Add a subvector specification to the feature module.
Definition: feat.c:277
#define feat_dimension2(f, i)
Dimensionality of stream/subvector i in feature output.
Definition: feat.h:195
#define ckd_realloc(ptr, sz)
Macro for ckd_realloc
Definition: ckd_alloc.h:258
SPHINXBASE_EXPORT int32 glist_count(glist_t g)
Count the number of element in a given link list.
Definition: glist.c:145
file IO related operations.
SPHINXBASE_EXPORT int32 feat_s2mfc2feat_live(feat_t *fcb, mfcc_t **uttcep, int32 *inout_ncep, int32 beginutt, int32 endutt, mfcc_t ***ofeat)
Feature computation routine for live mode decoder.
Definition: feat.c:1308