PocketSphinx  5prealpha
acmod.c
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 2008 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
38 
44 /* System headers. */
45 #include <assert.h>
46 #include <string.h>
47 #include <math.h>
48 
49 /* SphinxBase headers. */
50 #include <sphinxbase/prim_type.h>
51 #include <sphinxbase/err.h>
52 #include <sphinxbase/cmd_ln.h>
53 #include <sphinxbase/strfuncs.h>
54 #include <sphinxbase/byteorder.h>
55 #include <sphinxbase/feat.h>
56 #include <sphinxbase/bio.h>
57 
58 /* Local headers. */
59 #include "cmdln_macro.h"
60 #include "acmod.h"
61 #include "s2_semi_mgau.h"
62 #include "ptm_mgau.h"
63 #include "ms_mgau.h"
64 
65 static int32 acmod_process_mfcbuf(acmod_t *acmod);
66 
67 static int
68 acmod_init_am(acmod_t *acmod)
69 {
70  char const *mdeffn, *tmatfn, *mllrfn, *hmmdir;
71 
72  /* Read model definition. */
73  if ((mdeffn = cmd_ln_str_r(acmod->config, "_mdef")) == NULL) {
74  if ((hmmdir = cmd_ln_str_r(acmod->config, "-hmm")) == NULL)
75  E_ERROR("Acoustic model definition is not specified either "
76  "with -mdef option or with -hmm\n");
77  else
78  E_ERROR("Folder '%s' does not contain acoustic model "
79  "definition 'mdef'\n", hmmdir);
80 
81  return -1;
82  }
83 
84  if ((acmod->mdef = bin_mdef_read(acmod->config, mdeffn)) == NULL) {
85  E_ERROR("Failed to read acoustic model definition from %s\n", mdeffn);
86  return -1;
87  }
88 
89  /* Read transition matrices. */
90  if ((tmatfn = cmd_ln_str_r(acmod->config, "_tmat")) == NULL) {
91  E_ERROR("No tmat file specified\n");
92  return -1;
93  }
94  acmod->tmat = tmat_init(tmatfn, acmod->lmath,
95  cmd_ln_float32_r(acmod->config, "-tmatfloor"),
96  TRUE);
97 
98  /* Read the acoustic models. */
99  if ((cmd_ln_str_r(acmod->config, "_mean") == NULL)
100  || (cmd_ln_str_r(acmod->config, "_var") == NULL)
101  || (cmd_ln_str_r(acmod->config, "_tmat") == NULL)) {
102  E_ERROR("No mean/var/tmat files specified\n");
103  return -1;
104  }
105 
106  if (cmd_ln_str_r(acmod->config, "_senmgau")) {
107  E_INFO("Using general multi-stream GMM computation\n");
108  acmod->mgau = ms_mgau_init(acmod, acmod->lmath, acmod->mdef);
109  if (acmod->mgau == NULL)
110  return -1;
111  }
112  else {
113  E_INFO("Attempting to use PTM computation module\n");
114  if ((acmod->mgau = ptm_mgau_init(acmod, acmod->mdef)) == NULL) {
115  E_INFO("Attempting to use semi-continuous computation module\n");
116  if ((acmod->mgau = s2_semi_mgau_init(acmod)) == NULL) {
117  E_INFO("Falling back to general multi-stream GMM computation\n");
118  acmod->mgau = ms_mgau_init(acmod, acmod->lmath, acmod->mdef);
119  if (acmod->mgau == NULL) {
120  E_ERROR("Failed to read acoustic model\n");
121  return -1;
122  }
123  }
124  }
125  }
126 
127  /* If there is an MLLR transform, apply it. */
128  if ((mllrfn = cmd_ln_str_r(acmod->config, "-mllr"))) {
129  ps_mllr_t *mllr = ps_mllr_read(mllrfn);
130  if (mllr == NULL)
131  return -1;
132  acmod_update_mllr(acmod, mllr);
133  }
134 
135  return 0;
136 }
137 
138 static int
139 acmod_init_feat(acmod_t *acmod)
140 {
141  acmod->fcb =
142  feat_init(cmd_ln_str_r(acmod->config, "-feat"),
143  cmn_type_from_str(cmd_ln_str_r(acmod->config,"-cmn")),
144  cmd_ln_boolean_r(acmod->config, "-varnorm"),
145  agc_type_from_str(cmd_ln_str_r(acmod->config, "-agc")),
146  1, cmd_ln_int32_r(acmod->config, "-ceplen"));
147  if (acmod->fcb == NULL)
148  return -1;
149 
150  if (cmd_ln_str_r(acmod->config, "_lda")) {
151  E_INFO("Reading linear feature transformation from %s\n",
152  cmd_ln_str_r(acmod->config, "_lda"));
153  if (feat_read_lda(acmod->fcb,
154  cmd_ln_str_r(acmod->config, "_lda"),
155  cmd_ln_int32_r(acmod->config, "-ldadim")) < 0)
156  return -1;
157  }
158 
159  if (cmd_ln_str_r(acmod->config, "-svspec")) {
160  int32 **subvecs;
161  E_INFO("Using subvector specification %s\n",
162  cmd_ln_str_r(acmod->config, "-svspec"));
163  if ((subvecs = parse_subvecs(cmd_ln_str_r(acmod->config, "-svspec"))) == NULL)
164  return -1;
165  if ((feat_set_subvecs(acmod->fcb, subvecs)) < 0)
166  return -1;
167  }
168 
169  if (cmd_ln_exists_r(acmod->config, "-agcthresh")
170  && 0 != strcmp(cmd_ln_str_r(acmod->config, "-agc"), "none")) {
171  agc_set_threshold(acmod->fcb->agc_struct,
172  cmd_ln_float32_r(acmod->config, "-agcthresh"));
173  }
174 
175  if (acmod->fcb->cmn_struct
176  && cmd_ln_exists_r(acmod->config, "-cmninit")) {
177  char *c, *cc, *vallist;
178  int32 nvals;
179 
180  vallist = ckd_salloc(cmd_ln_str_r(acmod->config, "-cmninit"));
181  c = vallist;
182  nvals = 0;
183  while (nvals < acmod->fcb->cmn_struct->veclen
184  && (cc = strchr(c, ',')) != NULL) {
185  *cc = '\0';
186  acmod->fcb->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof_c(c));
187  c = cc + 1;
188  ++nvals;
189  }
190  if (nvals < acmod->fcb->cmn_struct->veclen && *c != '\0') {
191  acmod->fcb->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof_c(c));
192  }
193  ckd_free(vallist);
194  }
195  return 0;
196 }
197 
198 int
199 acmod_fe_mismatch(acmod_t *acmod, fe_t *fe)
200 {
201  /* Output vector dimension needs to be the same. */
202  if (cmd_ln_int32_r(acmod->config, "-ceplen") != fe_get_output_size(fe)) {
203  E_ERROR("Configured feature length %d doesn't match feature "
204  "extraction output size %d\n",
205  cmd_ln_int32_r(acmod->config, "-ceplen"),
206  fe_get_output_size(fe));
207  return TRUE;
208  }
209  /* Feature parameters need to be the same. */
210  /* ... */
211  return FALSE;
212 }
213 
214 int
215 acmod_feat_mismatch(acmod_t *acmod, feat_t *fcb)
216 {
217  /* Feature type needs to be the same. */
218  if (0 != strcmp(cmd_ln_str_r(acmod->config, "-feat"), feat_name(fcb)))
219  return TRUE;
220  /* Input vector dimension needs to be the same. */
221  if (cmd_ln_int32_r(acmod->config, "-ceplen") != feat_cepsize(fcb))
222  return TRUE;
223  /* FIXME: Need to check LDA and stuff too. */
224  return FALSE;
225 }
226 
227 acmod_t *
228 acmod_init(cmd_ln_t *config, logmath_t *lmath, fe_t *fe, feat_t *fcb)
229 {
230  acmod_t *acmod;
231 
232  acmod = ckd_calloc(1, sizeof(*acmod));
233  acmod->config = cmd_ln_retain(config);
234  acmod->lmath = lmath;
235  acmod->state = ACMOD_IDLE;
236 
237  /* Initialize feature computation. */
238  if (fe) {
239  if (acmod_fe_mismatch(acmod, fe))
240  goto error_out;
241  fe_retain(fe);
242  acmod->fe = fe;
243  }
244  else {
245  /* Initialize a new front end. */
246  acmod->fe = fe_init_auto_r(config);
247  if (acmod->fe == NULL)
248  goto error_out;
249  if (acmod_fe_mismatch(acmod, acmod->fe))
250  goto error_out;
251  }
252  if (fcb) {
253  if (acmod_feat_mismatch(acmod, fcb))
254  goto error_out;
255  feat_retain(fcb);
256  acmod->fcb = fcb;
257  }
258  else {
259  /* Initialize a new fcb. */
260  if (acmod_init_feat(acmod) < 0)
261  goto error_out;
262  }
263 
264  /* Load acoustic model parameters. */
265  if (acmod_init_am(acmod) < 0)
266  goto error_out;
267 
268 
269  /* The MFCC buffer needs to be at least as large as the dynamic
270  * feature window. */
271  acmod->n_mfc_alloc = acmod->fcb->window_size * 2 + 1;
272  acmod->mfc_buf = (mfcc_t **)
273  ckd_calloc_2d(acmod->n_mfc_alloc, acmod->fcb->cepsize,
274  sizeof(**acmod->mfc_buf));
275 
276  /* Feature buffer has to be at least as large as MFCC buffer. */
277  acmod->n_feat_alloc = acmod->n_mfc_alloc + cmd_ln_int32_r(config, "-pl_window");
278  acmod->feat_buf = feat_array_alloc(acmod->fcb, acmod->n_feat_alloc);
279  acmod->framepos = ckd_calloc(acmod->n_feat_alloc, sizeof(*acmod->framepos));
280 
281  acmod->utt_start_frame = 0;
282 
283  /* Senone computation stuff. */
284  acmod->senone_scores = ckd_calloc(bin_mdef_n_sen(acmod->mdef),
285  sizeof(*acmod->senone_scores));
286  acmod->senone_active_vec = bitvec_alloc(bin_mdef_n_sen(acmod->mdef));
287  acmod->senone_active = ckd_calloc(bin_mdef_n_sen(acmod->mdef),
288  sizeof(*acmod->senone_active));
289  acmod->log_zero = logmath_get_zero(acmod->lmath);
290  acmod->compallsen = cmd_ln_boolean_r(config, "-compallsen");
291  return acmod;
292 
293 error_out:
294  acmod_free(acmod);
295  return NULL;
296 }
297 
298 void
300 {
301  if (acmod == NULL)
302  return;
303 
304  feat_free(acmod->fcb);
305  fe_free(acmod->fe);
306  cmd_ln_free_r(acmod->config);
307 
308  if (acmod->mfc_buf)
309  ckd_free_2d((void **)acmod->mfc_buf);
310  if (acmod->feat_buf)
311  feat_array_free(acmod->feat_buf);
312 
313  if (acmod->mfcfh)
314  fclose(acmod->mfcfh);
315  if (acmod->rawfh)
316  fclose(acmod->rawfh);
317  if (acmod->senfh)
318  fclose(acmod->senfh);
319 
320  ckd_free(acmod->framepos);
321  ckd_free(acmod->senone_scores);
322  ckd_free(acmod->senone_active_vec);
323  ckd_free(acmod->senone_active);
324  ckd_free(acmod->rawdata);
325 
326  if (acmod->mdef)
327  bin_mdef_free(acmod->mdef);
328  if (acmod->tmat)
329  tmat_free(acmod->tmat);
330  if (acmod->mgau)
331  ps_mgau_free(acmod->mgau);
332  if (acmod->mllr)
333  ps_mllr_free(acmod->mllr);
334 
335  ckd_free(acmod);
336 }
337 
338 ps_mllr_t *
340 {
341  if (acmod->mllr)
342  ps_mllr_free(acmod->mllr);
343  acmod->mllr = mllr;
344  ps_mgau_transform(acmod->mgau, mllr);
345 
346  return mllr;
347 }
348 
349 int
350 acmod_write_senfh_header(acmod_t *acmod, FILE *logfh)
351 {
352  char nsenstr[64], logbasestr[64];
353 
354  sprintf(nsenstr, "%d", bin_mdef_n_sen(acmod->mdef));
355  sprintf(logbasestr, "%f", logmath_get_base(acmod->lmath));
356  return bio_writehdr(logfh,
357  "version", "0.1",
358  "mdef_file", cmd_ln_str_r(acmod->config, "_mdef"),
359  "n_sen", nsenstr,
360  "logbase", logbasestr, NULL);
361 }
362 
363 int
364 acmod_set_senfh(acmod_t *acmod, FILE *logfh)
365 {
366  if (acmod->senfh)
367  fclose(acmod->senfh);
368  acmod->senfh = logfh;
369  if (logfh == NULL)
370  return 0;
371  return acmod_write_senfh_header(acmod, logfh);
372 }
373 
374 int
375 acmod_set_mfcfh(acmod_t *acmod, FILE *logfh)
376 {
377  int rv = 0;
378 
379  if (acmod->mfcfh)
380  fclose(acmod->mfcfh);
381  acmod->mfcfh = logfh;
382  fwrite(&rv, 4, 1, acmod->mfcfh);
383  return rv;
384 }
385 
386 int
387 acmod_set_rawfh(acmod_t *acmod, FILE *logfh)
388 {
389  if (acmod->rawfh)
390  fclose(acmod->rawfh);
391  acmod->rawfh = logfh;
392  return 0;
393 }
394 
395 void
396 acmod_grow_feat_buf(acmod_t *acmod, int nfr)
397 {
398  if (nfr > MAX_N_FRAMES)
399  E_FATAL("Decoder can not process more than %d frames at once, "
400  "requested %d\n", MAX_N_FRAMES, nfr);
401 
402  acmod->feat_buf = feat_array_realloc(acmod->fcb, acmod->feat_buf,
403  acmod->n_feat_alloc, nfr);
404  acmod->framepos = ckd_realloc(acmod->framepos,
405  nfr * sizeof(*acmod->framepos));
406  acmod->n_feat_alloc = nfr;
407 }
408 
409 int
410 acmod_set_grow(acmod_t *acmod, int grow_feat)
411 {
412  int tmp = acmod->grow_feat;
413  acmod->grow_feat = grow_feat;
414 
415  /* Expand feat_buf to a reasonable size to start with. */
416  if (grow_feat && acmod->n_feat_alloc < 128)
417  acmod_grow_feat_buf(acmod, 128);
418 
419  return tmp;
420 }
421 
422 int
424 {
425  fe_start_utt(acmod->fe);
426  acmod->state = ACMOD_STARTED;
427  acmod->n_mfc_frame = 0;
428  acmod->n_feat_frame = 0;
429  acmod->mfc_outidx = 0;
430  acmod->feat_outidx = 0;
431  acmod->output_frame = 0;
432  acmod->senscr_frame = -1;
433  acmod->n_senone_active = 0;
434  acmod->mgau->frame_idx = 0;
435  acmod->rawdata_pos = 0;
436 
437  return 0;
438 }
439 
440 int
442 {
443  int32 nfr = 0;
444 
445  acmod->state = ACMOD_ENDED;
446  if (acmod->n_mfc_frame < acmod->n_mfc_alloc) {
447  int inptr;
448  /* Where to start writing them (circular buffer) */
449  inptr = (acmod->mfc_outidx + acmod->n_mfc_frame) % acmod->n_mfc_alloc;
450  /* nfr is always either zero or one. */
451  fe_end_utt(acmod->fe, acmod->mfc_buf[inptr], &nfr);
452  acmod->n_mfc_frame += nfr;
453 
454  /* Process whatever's left, and any leadout or update stats if needed. */
455  if (nfr)
456  nfr = acmod_process_mfcbuf(acmod);
457  else
458  feat_update_stats(acmod->fcb);
459  }
460  if (acmod->mfcfh) {
461  long outlen;
462  int32 rv;
463  outlen = (ftell(acmod->mfcfh) - 4) / 4;
464  /* Try to seek and write */
465  if ((rv = fseek(acmod->mfcfh, 0, SEEK_SET)) == 0) {
466  fwrite(&outlen, 4, 1, acmod->mfcfh);
467  }
468  fclose(acmod->mfcfh);
469  acmod->mfcfh = NULL;
470  }
471  if (acmod->rawfh) {
472  fclose(acmod->rawfh);
473  acmod->rawfh = NULL;
474  }
475 
476  if (acmod->senfh) {
477  fclose(acmod->senfh);
478  acmod->senfh = NULL;
479  }
480 
481  return nfr;
482 }
483 
484 static int
485 acmod_log_mfc(acmod_t *acmod,
486  mfcc_t **cep, int n_frames)
487 {
488  int n = n_frames * feat_cepsize(acmod->fcb);
489  /* Write features. */
490  if (fwrite(cep[0], sizeof(mfcc_t), n, acmod->mfcfh) != n) {
491  E_ERROR_SYSTEM("Failed to write %d values to file", n);
492  }
493  return 0;
494 }
495 
496 static int
497 acmod_process_full_cep(acmod_t *acmod,
498  mfcc_t ***inout_cep,
499  int *inout_n_frames)
500 {
501  int32 nfr;
502 
503  /* Write to file. */
504  if (acmod->mfcfh)
505  acmod_log_mfc(acmod, *inout_cep, *inout_n_frames);
506 
507  /* Resize feat_buf to fit. */
508  if (acmod->n_feat_alloc < *inout_n_frames) {
509 
510  if (*inout_n_frames > MAX_N_FRAMES)
511  E_FATAL("Batch processing can not process more than %d frames "
512  "at once, requested %d\n", MAX_N_FRAMES, *inout_n_frames);
513 
514  feat_array_free(acmod->feat_buf);
515  acmod->feat_buf = feat_array_alloc(acmod->fcb, *inout_n_frames);
516  acmod->n_feat_alloc = *inout_n_frames;
517  acmod->n_feat_frame = 0;
518  acmod->feat_outidx = 0;
519  }
520  /* Make dynamic features. */
521  nfr = feat_s2mfc2feat_live(acmod->fcb, *inout_cep, inout_n_frames,
522  TRUE, TRUE, acmod->feat_buf);
523  acmod->n_feat_frame = nfr;
524  assert(acmod->n_feat_frame <= acmod->n_feat_alloc);
525  *inout_cep += *inout_n_frames;
526  *inout_n_frames = 0;
527 
528  return nfr;
529 }
530 
531 static int
532 acmod_process_full_raw(acmod_t *acmod,
533  int16 const **inout_raw,
534  size_t *inout_n_samps)
535 {
536  int32 nfr, ntail;
537  mfcc_t **cepptr;
538 
539  /* Write to logging file if any. */
540  if (*inout_n_samps + acmod->rawdata_pos < acmod->rawdata_size) {
541  memcpy(acmod->rawdata + acmod->rawdata_pos, *inout_raw, *inout_n_samps * sizeof(int16));
542  acmod->rawdata_pos += *inout_n_samps;
543  }
544  if (acmod->rawfh)
545  fwrite(*inout_raw, sizeof(int16), *inout_n_samps, acmod->rawfh);
546  /* Resize mfc_buf to fit. */
547  if (fe_process_frames(acmod->fe, NULL, inout_n_samps, NULL, &nfr, NULL) < 0)
548  return -1;
549  if (acmod->n_mfc_alloc < nfr + 1) {
550  ckd_free_2d(acmod->mfc_buf);
551  acmod->mfc_buf = ckd_calloc_2d(nfr + 1, fe_get_output_size(acmod->fe),
552  sizeof(**acmod->mfc_buf));
553  acmod->n_mfc_alloc = nfr + 1;
554  }
555  acmod->n_mfc_frame = 0;
556  acmod->mfc_outidx = 0;
557  fe_start_utt(acmod->fe);
558  if (fe_process_frames(acmod->fe, inout_raw, inout_n_samps,
559  acmod->mfc_buf, &nfr, NULL) < 0)
560  return -1;
561  fe_end_utt(acmod->fe, acmod->mfc_buf[nfr], &ntail);
562  nfr += ntail;
563 
564  cepptr = acmod->mfc_buf;
565  nfr = acmod_process_full_cep(acmod, &cepptr, &nfr);
566  acmod->n_mfc_frame = 0;
567  return nfr;
568 }
569 
573 static int32
574 acmod_process_mfcbuf(acmod_t *acmod)
575 {
576  mfcc_t **mfcptr;
577  int32 ncep;
578 
579  ncep = acmod->n_mfc_frame;
580  /* Also do this in two parts because of the circular mfc_buf. */
581  if (acmod->mfc_outidx + ncep > acmod->n_mfc_alloc) {
582  int32 ncep1 = acmod->n_mfc_alloc - acmod->mfc_outidx;
583  int saved_state = acmod->state;
584 
585  /* Make sure we don't end the utterance here. */
586  if (acmod->state == ACMOD_ENDED)
587  acmod->state = ACMOD_PROCESSING;
588  mfcptr = acmod->mfc_buf + acmod->mfc_outidx;
589  ncep1 = acmod_process_cep(acmod, &mfcptr, &ncep1, FALSE);
590  /* It's possible that not all available frames were filled. */
591  ncep -= ncep1;
592  acmod->n_mfc_frame -= ncep1;
593  acmod->mfc_outidx += ncep1;
594  acmod->mfc_outidx %= acmod->n_mfc_alloc;
595  /* Restore original state (could this really be the end) */
596  acmod->state = saved_state;
597  }
598  mfcptr = acmod->mfc_buf + acmod->mfc_outidx;
599  ncep = acmod_process_cep(acmod, &mfcptr, &ncep, FALSE);
600  acmod->n_mfc_frame -= ncep;
601  acmod->mfc_outidx += ncep;
602  acmod->mfc_outidx %= acmod->n_mfc_alloc;
603  return ncep;
604 }
605 
606 int
608  int16 const **inout_raw,
609  size_t *inout_n_samps,
610  int full_utt)
611 {
612  int32 ncep;
613  int32 out_frameidx;
614  int16 const *prev_audio_inptr;
615 
616  /* If this is a full utterance, process it all at once. */
617  if (full_utt)
618  return acmod_process_full_raw(acmod, inout_raw, inout_n_samps);
619 
620  /* Append MFCCs to the end of any that are previously in there
621  * (in practice, there will probably be none) */
622  if (inout_n_samps && *inout_n_samps) {
623  int inptr;
624  int32 processed_samples;
625 
626  prev_audio_inptr = *inout_raw;
627  /* Total number of frames available. */
628  ncep = acmod->n_mfc_alloc - acmod->n_mfc_frame;
629  /* Where to start writing them (circular buffer) */
630  inptr = (acmod->mfc_outidx + acmod->n_mfc_frame) % acmod->n_mfc_alloc;
631 
632  /* Write them in two (or more) parts if there is wraparound. */
633  while (inptr + ncep > acmod->n_mfc_alloc) {
634  int32 ncep1 = acmod->n_mfc_alloc - inptr;
635  if (fe_process_frames(acmod->fe, inout_raw, inout_n_samps,
636  acmod->mfc_buf + inptr, &ncep1, &out_frameidx) < 0)
637  return -1;
638 
639  if (out_frameidx > 0)
640  acmod->utt_start_frame = out_frameidx;
641 
642  processed_samples = *inout_raw - prev_audio_inptr;
643  if (processed_samples + acmod->rawdata_pos < acmod->rawdata_size) {
644  memcpy(acmod->rawdata + acmod->rawdata_pos, prev_audio_inptr, processed_samples * sizeof(int16));
645  acmod->rawdata_pos += processed_samples;
646  }
647  /* Write to logging file if any. */
648  if (acmod->rawfh) {
649  fwrite(prev_audio_inptr, sizeof(int16),
650  processed_samples,
651  acmod->rawfh);
652  }
653  prev_audio_inptr = *inout_raw;
654 
655  /* ncep1 now contains the number of frames actually
656  * processed. This is a good thing, but it means we
657  * actually still might have some room left at the end of
658  * the buffer, hence the while loop. Unfortunately it
659  * also means that in the case where we are really
660  * actually done, we need to get out totally, hence the
661  * goto. */
662  acmod->n_mfc_frame += ncep1;
663  ncep -= ncep1;
664  inptr += ncep1;
665  inptr %= acmod->n_mfc_alloc;
666  if (ncep1 == 0)
667  goto alldone;
668  }
669 
670  assert(inptr + ncep <= acmod->n_mfc_alloc);
671  if (fe_process_frames(acmod->fe, inout_raw, inout_n_samps,
672  acmod->mfc_buf + inptr, &ncep, &out_frameidx) < 0)
673  return -1;
674 
675  if (out_frameidx > 0)
676  acmod->utt_start_frame = out_frameidx;
677 
678 
679  processed_samples = *inout_raw - prev_audio_inptr;
680  if (processed_samples + acmod->rawdata_pos < acmod->rawdata_size) {
681  memcpy(acmod->rawdata + acmod->rawdata_pos, prev_audio_inptr, processed_samples * sizeof(int16));
682  acmod->rawdata_pos += processed_samples;
683  }
684  if (acmod->rawfh) {
685  fwrite(prev_audio_inptr, sizeof(int16),
686  processed_samples, acmod->rawfh);
687  }
688  prev_audio_inptr = *inout_raw;
689  acmod->n_mfc_frame += ncep;
690  alldone:
691  ;
692  }
693 
694  /* Hand things off to acmod_process_cep. */
695  return acmod_process_mfcbuf(acmod);
696 }
697 
698 int
700  mfcc_t ***inout_cep,
701  int *inout_n_frames,
702  int full_utt)
703 {
704  int32 nfeat, ncep, inptr;
705  int orig_n_frames;
706 
707  /* If this is a full utterance, process it all at once. */
708  if (full_utt)
709  return acmod_process_full_cep(acmod, inout_cep, inout_n_frames);
710 
711  /* Write to file. */
712  if (acmod->mfcfh)
713  acmod_log_mfc(acmod, *inout_cep, *inout_n_frames);
714 
715  /* Maximum number of frames we're going to generate. */
716  orig_n_frames = ncep = nfeat = *inout_n_frames;
717 
718  /* FIXME: This behaviour isn't guaranteed... */
719  if (acmod->state == ACMOD_ENDED)
720  nfeat += feat_window_size(acmod->fcb);
721  else if (acmod->state == ACMOD_STARTED)
722  nfeat -= feat_window_size(acmod->fcb);
723 
724  /* Clamp number of features to fit available space. */
725  if (nfeat > acmod->n_feat_alloc - acmod->n_feat_frame) {
726  /* Grow it as needed - we have to grow it at the end of an
727  * utterance because we can't return a short read there. */
728  if (acmod->grow_feat || acmod->state == ACMOD_ENDED)
729  acmod_grow_feat_buf(acmod, acmod->n_feat_alloc + nfeat);
730  else
731  ncep -= (nfeat - (acmod->n_feat_alloc - acmod->n_feat_frame));
732  }
733 
734  /* Where to start writing in the feature buffer. */
735  if (acmod->grow_feat) {
736  /* Grow to avoid wraparound if grow_feat == TRUE. */
737  inptr = acmod->feat_outidx + acmod->n_feat_frame;
738  while (inptr + nfeat >= acmod->n_feat_alloc)
739  acmod_grow_feat_buf(acmod, acmod->n_feat_alloc * 2);
740  }
741  else {
742  inptr = (acmod->feat_outidx + acmod->n_feat_frame) % acmod->n_feat_alloc;
743  }
744 
745 
746  /* FIXME: we can't split the last frame drop properly to be on the bounary,
747  * so just return
748  */
749  if (inptr + nfeat > acmod->n_feat_alloc && acmod->state == ACMOD_ENDED) {
750  *inout_n_frames -= ncep;
751  *inout_cep += ncep;
752  return 0;
753  }
754 
755  /* Write them in two parts if there is wraparound. */
756  if (inptr + nfeat > acmod->n_feat_alloc) {
757  int32 ncep1 = acmod->n_feat_alloc - inptr;
758 
759  /* Make sure we don't end the utterance here. */
760  nfeat = feat_s2mfc2feat_live(acmod->fcb, *inout_cep,
761  &ncep1,
762  (acmod->state == ACMOD_STARTED),
763  FALSE,
764  acmod->feat_buf + inptr);
765  if (nfeat < 0)
766  return -1;
767  /* Move the output feature pointer forward. */
768  acmod->n_feat_frame += nfeat;
769  assert(acmod->n_feat_frame <= acmod->n_feat_alloc);
770  inptr += nfeat;
771  inptr %= acmod->n_feat_alloc;
772  /* Move the input feature pointers forward. */
773  *inout_n_frames -= ncep1;
774  *inout_cep += ncep1;
775  ncep -= ncep1;
776  }
777 
778  nfeat = feat_s2mfc2feat_live(acmod->fcb, *inout_cep,
779  &ncep,
780  (acmod->state == ACMOD_STARTED),
781  (acmod->state == ACMOD_ENDED),
782  acmod->feat_buf + inptr);
783  if (nfeat < 0)
784  return -1;
785  acmod->n_feat_frame += nfeat;
786  assert(acmod->n_feat_frame <= acmod->n_feat_alloc);
787  /* Move the input feature pointers forward. */
788  *inout_n_frames -= ncep;
789  *inout_cep += ncep;
790  if (acmod->state == ACMOD_STARTED)
791  acmod->state = ACMOD_PROCESSING;
792 
793  return orig_n_frames - *inout_n_frames;
794 }
795 
796 int
798  mfcc_t **feat)
799 {
800  int i, inptr;
801 
802  if (acmod->n_feat_frame == acmod->n_feat_alloc) {
803  if (acmod->grow_feat)
804  acmod_grow_feat_buf(acmod, acmod->n_feat_alloc * 2);
805  else
806  return 0;
807  }
808 
809  if (acmod->grow_feat) {
810  /* Grow to avoid wraparound if grow_feat == TRUE. */
811  inptr = acmod->feat_outidx + acmod->n_feat_frame;
812  while (inptr + 1 >= acmod->n_feat_alloc)
813  acmod_grow_feat_buf(acmod, acmod->n_feat_alloc * 2);
814  }
815  else {
816  inptr = (acmod->feat_outidx + acmod->n_feat_frame) % acmod->n_feat_alloc;
817  }
818  for (i = 0; i < feat_dimension1(acmod->fcb); ++i)
819  memcpy(acmod->feat_buf[inptr][i],
820  feat[i], feat_dimension2(acmod->fcb, i) * sizeof(**feat));
821  ++acmod->n_feat_frame;
822  assert(acmod->n_feat_frame <= acmod->n_feat_alloc);
823 
824  return 1;
825 }
826 
827 static int
828 acmod_read_senfh_header(acmod_t *acmod)
829 {
830  char **name, **val;
831  int32 swap;
832  int i;
833 
834  if (bio_readhdr(acmod->insenfh, &name, &val, &swap) < 0)
835  goto error_out;
836  for (i = 0; name[i] != NULL; ++i) {
837  if (!strcmp(name[i], "n_sen")) {
838  if (atoi(val[i]) != bin_mdef_n_sen(acmod->mdef)) {
839  E_ERROR("Number of senones in senone file (%d) does not "
840  "match mdef (%d)\n", atoi(val[i]),
841  bin_mdef_n_sen(acmod->mdef));
842  goto error_out;
843  }
844  }
845 
846  if (!strcmp(name[i], "logbase")) {
847  if (fabs(atof_c(val[i]) - logmath_get_base(acmod->lmath)) > 0.001) {
848  E_ERROR("Logbase in senone file (%f) does not match acmod "
849  "(%f)\n", atof_c(val[i]),
850  logmath_get_base(acmod->lmath));
851  goto error_out;
852  }
853  }
854  }
855  acmod->insen_swap = swap;
856  bio_hdrarg_free(name, val);
857  return 0;
858 error_out:
859  bio_hdrarg_free(name, val);
860  return -1;
861 }
862 
863 int
864 acmod_set_insenfh(acmod_t *acmod, FILE *senfh)
865 {
866  acmod->insenfh = senfh;
867  if (senfh == NULL) {
868  acmod->n_feat_frame = 0;
869  acmod->compallsen = cmd_ln_boolean_r(acmod->config, "-compallsen");
870  return 0;
871  }
872  acmod->compallsen = TRUE;
873  return acmod_read_senfh_header(acmod);
874 }
875 
876 int
878 {
879  /* If the feature buffer is circular, this is not possible. */
880  if (acmod->output_frame > acmod->n_feat_alloc) {
881  E_ERROR("Circular feature buffer cannot be rewound (output frame %d, "
882  "alloc %d)\n", acmod->output_frame, acmod->n_feat_alloc);
883  return -1;
884  }
885 
886  /* Frames consumed + frames available */
887  acmod->n_feat_frame = acmod->output_frame + acmod->n_feat_frame;
888 
889  /* Reset output pointers. */
890  acmod->feat_outidx = 0;
891  acmod->output_frame = 0;
892  acmod->senscr_frame = -1;
893  acmod->mgau->frame_idx = 0;
894 
895  return 0;
896 }
897 
898 int
900 {
901  /* Advance the output pointers. */
902  if (++acmod->feat_outidx == acmod->n_feat_alloc)
903  acmod->feat_outidx = 0;
904  --acmod->n_feat_frame;
905  ++acmod->mgau->frame_idx;
906 
907  return ++acmod->output_frame;
908 }
909 
910 int
911 acmod_write_scores(acmod_t *acmod, int n_active, uint8 const *active,
912  int16 const *senscr, FILE *senfh)
913 {
914  int16 n_active2;
915 
916  /* Uncompressed frame format:
917  *
918  * (2 bytes) n_active: Number of active senones
919  * If all senones active:
920  * (n_active * 2 bytes) scores of active senones
921  *
922  * Otherwise:
923  * (2 bytes) n_active: Number of active senones
924  * (n_active bytes) deltas to active senones
925  * (n_active * 2 bytes) scores of active senones
926  */
927  n_active2 = n_active;
928  if (fwrite(&n_active2, 2, 1, senfh) != 1)
929  goto error_out;
930  if (n_active == bin_mdef_n_sen(acmod->mdef)) {
931  if (fwrite(senscr, 2, n_active, senfh) != n_active)
932  goto error_out;
933  }
934  else {
935  int i, n;
936  if (fwrite(active, 1, n_active, senfh) != n_active)
937  goto error_out;
938  for (i = n = 0; i < n_active; ++i) {
939  n += active[i];
940  if (fwrite(senscr + n, 2, 1, senfh) != 1)
941  goto error_out;
942  }
943  }
944  return 0;
945 error_out:
946  E_ERROR_SYSTEM("Failed to write frame to senone file");
947  return -1;
948 }
949 
953 static int
954 acmod_read_scores_internal(acmod_t *acmod)
955 {
956  FILE *senfh = acmod->insenfh;
957  int16 n_active;
958  size_t rv;
959 
960  if (acmod->n_feat_frame == acmod->n_feat_alloc) {
961  if (acmod->grow_feat)
962  acmod_grow_feat_buf(acmod, acmod->n_feat_alloc * 2);
963  else
964  return 0;
965  }
966 
967  if (senfh == NULL)
968  return -1;
969 
970  if ((rv = fread(&n_active, 2, 1, senfh)) != 1)
971  goto error_out;
972 
973  acmod->n_senone_active = n_active;
974  if (acmod->n_senone_active == bin_mdef_n_sen(acmod->mdef)) {
975  if ((rv = fread(acmod->senone_scores, 2,
976  acmod->n_senone_active, senfh)) != acmod->n_senone_active)
977  goto error_out;
978  }
979  else {
980  int i, n;
981 
982  if ((rv = fread(acmod->senone_active, 1,
983  acmod->n_senone_active, senfh)) != acmod->n_senone_active)
984  goto error_out;
985 
986  for (i = 0, n = 0; i < acmod->n_senone_active; ++i) {
987  int j, sen = n + acmod->senone_active[i];
988  for (j = n + 1; j < sen; ++j)
989  acmod->senone_scores[j] = SENSCR_DUMMY;
990 
991  if ((rv = fread(acmod->senone_scores + sen, 2, 1, senfh)) != 1)
992  goto error_out;
993 
994  n = sen;
995  }
996 
997  n++;
998  while (n < bin_mdef_n_sen(acmod->mdef))
999  acmod->senone_scores[n++] = SENSCR_DUMMY;
1000  }
1001  return 1;
1002 
1003 error_out:
1004  if (ferror(senfh)) {
1005  E_ERROR_SYSTEM("Failed to read frame from senone file");
1006  return -1;
1007  }
1008  return 0;
1009 }
1010 
1011 int
1013 {
1014  int inptr, rv;
1015 
1016  if (acmod->grow_feat) {
1017  /* Grow to avoid wraparound if grow_feat == TRUE. */
1018  inptr = acmod->feat_outidx + acmod->n_feat_frame;
1019  /* Has to be +1, otherwise, next time acmod_advance() is
1020  * called, this will wrap around. */
1021  while (inptr + 1 >= acmod->n_feat_alloc)
1022  acmod_grow_feat_buf(acmod, acmod->n_feat_alloc * 2);
1023  }
1024  else {
1025  inptr = (acmod->feat_outidx + acmod->n_feat_frame) %
1026  acmod->n_feat_alloc;
1027  }
1028 
1029  if ((rv = acmod_read_scores_internal(acmod)) != 1)
1030  return rv;
1031 
1032  /* Set acmod->senscr_frame appropriately so that these scores
1033  get reused below in acmod_score(). */
1034  acmod->senscr_frame = acmod->output_frame + acmod->n_feat_frame;
1035 
1036  E_DEBUG(1,("Frame %d has %d active states\n",
1037  acmod->senscr_frame, acmod->n_senone_active));
1038 
1039  /* Increment the "feature frame counter" and record the file
1040  * position for the relevant frame in the (possibly circular)
1041  * buffer. */
1042  ++acmod->n_feat_frame;
1043  acmod->framepos[inptr] = ftell(acmod->insenfh);
1044 
1045  return 1;
1046 }
1047 
1048 static int
1049 calc_frame_idx(acmod_t *acmod, int *inout_frame_idx)
1050 {
1051  int frame_idx;
1052 
1053  /* Calculate the absolute frame index to be scored. */
1054  if (inout_frame_idx == NULL)
1055  frame_idx = acmod->output_frame;
1056  else if (*inout_frame_idx < 0)
1057  frame_idx = acmod->output_frame + 1 + *inout_frame_idx;
1058  else
1059  frame_idx = *inout_frame_idx;
1060 
1061  return frame_idx;
1062 }
1063 
1064 static int
1065 calc_feat_idx(acmod_t *acmod, int frame_idx)
1066 {
1067  int n_backfr, feat_idx;
1068 
1069  n_backfr = acmod->n_feat_alloc - acmod->n_feat_frame;
1070  if (frame_idx < 0 || acmod->output_frame - frame_idx > n_backfr) {
1071  E_ERROR("Frame %d outside queue of %d frames, %d alloc (%d > %d), "
1072  "cannot score\n", frame_idx, acmod->n_feat_frame,
1073  acmod->n_feat_alloc, acmod->output_frame - frame_idx,
1074  n_backfr);
1075  return -1;
1076  }
1077 
1078  /* Get the index in feat_buf/framepos of the frame to be scored. */
1079  feat_idx = (acmod->feat_outidx + frame_idx - acmod->output_frame) %
1080  acmod->n_feat_alloc;
1081  if (feat_idx < 0)
1082  feat_idx += acmod->n_feat_alloc;
1083 
1084  return feat_idx;
1085 }
1086 
1087 mfcc_t **
1088 acmod_get_frame(acmod_t *acmod, int *inout_frame_idx)
1089 {
1090  int frame_idx, feat_idx;
1091 
1092  /* Calculate the absolute frame index requested. */
1093  frame_idx = calc_frame_idx(acmod, inout_frame_idx);
1094 
1095  /* Calculate position of requested frame in circular buffer. */
1096  if ((feat_idx = calc_feat_idx(acmod, frame_idx)) < 0)
1097  return NULL;
1098 
1099  if (inout_frame_idx)
1100  *inout_frame_idx = frame_idx;
1101 
1102  return acmod->feat_buf[feat_idx];
1103 }
1104 
1105 int16 const *
1106 acmod_score(acmod_t *acmod, int *inout_frame_idx)
1107 {
1108  int frame_idx, feat_idx;
1109 
1110  /* Calculate the absolute frame index to be scored. */
1111  frame_idx = calc_frame_idx(acmod, inout_frame_idx);
1112 
1113  /* If all senones are being computed, or we are using a senone file,
1114  then we can reuse existing scores. */
1115  if ((acmod->compallsen || acmod->insenfh)
1116  && frame_idx == acmod->senscr_frame) {
1117  if (inout_frame_idx)
1118  *inout_frame_idx = frame_idx;
1119  return acmod->senone_scores;
1120  }
1121 
1122  /* Calculate position of requested frame in circular buffer. */
1123  if ((feat_idx = calc_feat_idx(acmod, frame_idx)) < 0)
1124  return NULL;
1125 
1126  /*
1127  * If there is an input senone file locate the appropriate frame and read
1128  * it.
1129  */
1130  if (acmod->insenfh) {
1131  fseek(acmod->insenfh, acmod->framepos[feat_idx], SEEK_SET);
1132  if (acmod_read_scores_internal(acmod) < 0)
1133  return NULL;
1134  }
1135  else {
1136  /* Build active senone list. */
1137  acmod_flags2list(acmod);
1138 
1139  /* Generate scores for the next available frame */
1140  ps_mgau_frame_eval(acmod->mgau,
1141  acmod->senone_scores,
1142  acmod->senone_active,
1143  acmod->n_senone_active,
1144  acmod->feat_buf[feat_idx],
1145  frame_idx,
1146  acmod->compallsen);
1147  }
1148 
1149  if (inout_frame_idx)
1150  *inout_frame_idx = frame_idx;
1151  acmod->senscr_frame = frame_idx;
1152 
1153  /* Dump scores to the senone dump file if one exists. */
1154  if (acmod->senfh) {
1155  if (acmod_write_scores(acmod, acmod->n_senone_active,
1156  acmod->senone_active,
1157  acmod->senone_scores,
1158  acmod->senfh) < 0)
1159  return NULL;
1160  E_DEBUG(1,("Frame %d has %d active states\n", frame_idx,
1161  acmod->n_senone_active));
1162  }
1163 
1164  return acmod->senone_scores;
1165 }
1166 
1167 int
1168 acmod_best_score(acmod_t *acmod, int *out_best_senid)
1169 {
1170  int i, best;
1171 
1172  best = SENSCR_DUMMY;
1173  if (acmod->compallsen) {
1174  for (i = 0; i < bin_mdef_n_sen(acmod->mdef); ++i) {
1175  if (acmod->senone_scores[i] < best) {
1176  best = acmod->senone_scores[i];
1177  *out_best_senid = i;
1178  }
1179  }
1180  }
1181  else {
1182  int16 *senscr;
1183  senscr = acmod->senone_scores;
1184  for (i = 0; i < acmod->n_senone_active; ++i) {
1185  senscr += acmod->senone_active[i];
1186  if (*senscr < best) {
1187  best = *senscr;
1188  *out_best_senid = i;
1189  }
1190  }
1191  }
1192  return best;
1193 }
1194 
1195 
1196 void
1198 {
1199  if (acmod->compallsen)
1200  return;
1201  bitvec_clear_all(acmod->senone_active_vec, bin_mdef_n_sen(acmod->mdef));
1202  acmod->n_senone_active = 0;
1203 }
1204 
1205 #define MPX_BITVEC_SET(a,h,i) \
1206  if (hmm_mpx_ssid(h,i) != BAD_SSID) \
1207  bitvec_set((a)->senone_active_vec, hmm_mpx_senid(h,i))
1208 #define NONMPX_BITVEC_SET(a,h,i) \
1209  bitvec_set((a)->senone_active_vec, \
1210  hmm_nonmpx_senid(h,i))
1211 
1212 void
1214 {
1215  int i;
1216 
1217  if (acmod->compallsen)
1218  return;
1219  if (hmm_is_mpx(hmm)) {
1220  switch (hmm_n_emit_state(hmm)) {
1221  case 5:
1222  MPX_BITVEC_SET(acmod, hmm, 4);
1223  MPX_BITVEC_SET(acmod, hmm, 3);
1224  case 3:
1225  MPX_BITVEC_SET(acmod, hmm, 2);
1226  MPX_BITVEC_SET(acmod, hmm, 1);
1227  MPX_BITVEC_SET(acmod, hmm, 0);
1228  break;
1229  default:
1230  for (i = 0; i < hmm_n_emit_state(hmm); ++i) {
1231  MPX_BITVEC_SET(acmod, hmm, i);
1232  }
1233  }
1234  }
1235  else {
1236  switch (hmm_n_emit_state(hmm)) {
1237  case 5:
1238  NONMPX_BITVEC_SET(acmod, hmm, 4);
1239  NONMPX_BITVEC_SET(acmod, hmm, 3);
1240  case 3:
1241  NONMPX_BITVEC_SET(acmod, hmm, 2);
1242  NONMPX_BITVEC_SET(acmod, hmm, 1);
1243  NONMPX_BITVEC_SET(acmod, hmm, 0);
1244  break;
1245  default:
1246  for (i = 0; i < hmm_n_emit_state(hmm); ++i) {
1247  NONMPX_BITVEC_SET(acmod, hmm, i);
1248  }
1249  }
1250  }
1251 }
1252 
1253 int32
1255 {
1256  int32 w, l, n, b, total_dists, total_words, extra_bits;
1257  bitvec_t *flagptr;
1258 
1259  total_dists = bin_mdef_n_sen(acmod->mdef);
1260  if (acmod->compallsen) {
1261  acmod->n_senone_active = total_dists;
1262  return total_dists;
1263  }
1264  total_words = total_dists / BITVEC_BITS;
1265  extra_bits = total_dists % BITVEC_BITS;
1266  w = n = l = 0;
1267  for (flagptr = acmod->senone_active_vec; w < total_words; ++w, ++flagptr) {
1268  if (*flagptr == 0)
1269  continue;
1270  for (b = 0; b < BITVEC_BITS; ++b) {
1271  if (*flagptr & (1UL << b)) {
1272  int32 sen = w * BITVEC_BITS + b;
1273  int32 delta = sen - l;
1274  /* Handle excessive deltas "lossily" by adding a few
1275  extra senones to bridge the gap. */
1276  while (delta > 255) {
1277  acmod->senone_active[n++] = 255;
1278  delta -= 255;
1279  }
1280  acmod->senone_active[n++] = delta;
1281  l = sen;
1282  }
1283  }
1284  }
1285 
1286  for (b = 0; b < extra_bits; ++b) {
1287  if (*flagptr & (1UL << b)) {
1288  int32 sen = w * BITVEC_BITS + b;
1289  int32 delta = sen - l;
1290  /* Handle excessive deltas "lossily" by adding a few
1291  extra senones to bridge the gap. */
1292  while (delta > 255) {
1293  acmod->senone_active[n++] = 255;
1294  delta -= 255;
1295  }
1296  acmod->senone_active[n++] = delta;
1297  l = sen;
1298  }
1299  }
1300 
1301  acmod->n_senone_active = n;
1302  E_DEBUG(1, ("acmod_flags2list: %d active in frame %d\n",
1303  acmod->n_senone_active, acmod->output_frame));
1304  return n;
1305 }
1306 
1307 int32
1309 {
1310  return acmod->utt_start_frame;
1311 }
1312 
1313 void
1315 {
1316  fe_start_stream(acmod->fe);
1317  acmod->utt_start_frame = 0;
1318 }
1319 
1320 void
1321 acmod_set_rawdata_size(acmod_t *acmod, int32 size)
1322 {
1323  assert(size >= 0);
1324  acmod->rawdata_size = size;
1325  if (acmod->rawdata_size > 0) {
1326  ckd_free(acmod->rawdata);
1327  acmod->rawdata = ckd_calloc(size, sizeof(int16));
1328  }
1329 }
1330 
1331 void
1332 acmod_get_rawdata(acmod_t *acmod, int16 **buffer, int32 *size)
1333 {
1334  if (buffer) {
1335  *buffer = acmod->rawdata;
1336  }
1337  if (size) {
1338  *size = acmod->rawdata_pos;
1339  }
1340 }
1341 
(Sphinx 3.0 specific) A module that wraps up the code of gauden and senone because they are closely r...
FILE * insenfh
Input senone score file.
Definition: acmod.h:178
void acmod_get_rawdata(acmod_t *acmod, int16 **buffer, int32 *size)
Retrieves the raw data collected during utterance decoding.
Definition: acmod.c:1332
uint8 grow_feat
Whether to grow feat_buf.
Definition: acmod.h:189
ps_mgau_t * mgau
Model parameters.
Definition: acmod.h:161
int acmod_read_scores(acmod_t *acmod)
Read one frame of scores from senone score dump file.
Definition: acmod.c:1012
Not in an utterance.
Definition: acmod.h:68
uint8 * senone_active
Array of deltas to active GMMs.
Definition: acmod.h:167
long * framepos
File positions of recent frames in senone file.
Definition: acmod.h:179
Utterance started, no data yet.
Definition: acmod.h:69
int acmod_set_insenfh(acmod_t *acmod, FILE *senfh)
Set up a senone score dump file for input.
Definition: acmod.c:864
int32 acmod_stream_offset(acmod_t *acmod)
Get the offset of the utterance start of the current stream, helpful for stream-wide timing...
Definition: acmod.c:1308
int acmod_rewind(acmod_t *acmod)
Rewind the current utterance, allowing it to be rescored.
Definition: acmod.c:877
int16 * senone_scores
GMM scores for current frame.
Definition: acmod.h:165
ps_mllr_t * acmod_update_mllr(acmod_t *acmod, ps_mllr_t *mllr)
Adapt acoustic model using a linear transform.
Definition: acmod.c:339
int acmod_process_cep(acmod_t *acmod, mfcc_t ***inout_cep, int *inout_n_frames, int full_utt)
Feed acoustic feature data into the acoustic model for scoring.
Definition: acmod.c:699
Utterance in progress.
Definition: acmod.h:70
int n_senone_active
Number of active GMMs.
Definition: acmod.h:169
An individual HMM among the HMM search space.
void acmod_set_rawdata_size(acmod_t *acmod, int32 size)
Sets the limit of the raw audio data to store.
Definition: acmod.c:1321
void acmod_start_stream(acmod_t *acmod)
Reset the current stream.
Definition: acmod.c:1314
logmath_t * lmath
Log-math computation.
Definition: acmod.h:151
fe_t * fe
Acoustic feature computation.
Definition: acmod.h:155
frame_idx_t n_mfc_frame
Number of frames active in mfc_buf.
Definition: acmod.h:196
void acmod_activate_hmm(acmod_t *acmod, hmm_t *hmm)
Activate senones associated with an HMM.
Definition: acmod.c:1213
Utterance ended, still buffering.
Definition: acmod.h:71
FILE * rawfh
File for writing raw audio data.
Definition: acmod.h:175
mfcc_t ** mfc_buf
Temporary buffer of acoustic features.
Definition: acmod.h:173
frame_idx_t utt_start_frame
Index of the utterance start in the stream, all timings are relative to that.
Definition: acmod.h:192
void tmat_free(tmat_t *t)
RAH, add code to remove memory allocated by tmat_init.
Definition: tmat.c:275
frame_idx_t n_feat_alloc
Number of frames allocated in feat_buf.
Definition: acmod.h:198
mfcc_t *** feat_buf
Temporary buffer of dynamic features.
Definition: acmod.h:174
tmat_t * tmat_init(char const *tmatfile, logmath_t *lmath, float64 tpfloor, int32 breport)
Initialize transition matrix.
Definition: tmat.c:134
int acmod_set_senfh(acmod_t *acmod, FILE *logfh)
Start logging senone scores to a filehandle.
Definition: acmod.c:364
#define MAX_N_FRAMES
Maximum number of frames in index, should be in sync with above.
Definition: hmm.h:69
int acmod_set_rawfh(acmod_t *acmod, FILE *logfh)
Start logging raw audio to a filehandle.
Definition: acmod.c:387
ps_mllr_t * mllr
Speaker transformation.
Definition: acmod.h:162
Fast phonetically-tied mixture evaluation.
POCKETSPHINX_EXPORT ps_mllr_t * ps_mllr_read(char const *file)
Read a speaker-adaptive linear transform from a file.
Definition: ps_mllr.c:52
uint8 compallsen
Compute all senones?
Definition: acmod.h:188
POCKETSPHINX_EXPORT bin_mdef_t * bin_mdef_read(cmd_ln_t *config, const char *filename)
Read a binary mdef from a file.
Definition: bin_mdef.c:323
int acmod_process_feat(acmod_t *acmod, mfcc_t **feat)
Feed dynamic feature data into the acoustic model for scoring.
Definition: acmod.c:797
int acmod_write_senfh_header(acmod_t *acmod, FILE *logfh)
Write senone dump file header.
Definition: acmod.c:350
cmd_ln_t * config
Configuration.
Definition: acmod.h:150
frame_idx_t output_frame
Index of next frame of dynamic features.
Definition: acmod.h:194
int acmod_write_scores(acmod_t *acmod, int n_active, uint8 const *active, int16 const *senscr, FILE *senfh)
Write a frame of senone scores to a dump file.
Definition: acmod.c:911
tmat_t * tmat
Transition matrices.
Definition: acmod.h:160
int32 acmod_flags2list(acmod_t *acmod)
Build active list from.
Definition: acmod.c:1254
POCKETSPHINX_EXPORT int ps_mllr_free(ps_mllr_t *mllr)
Release a pointer to a linear transform.
Definition: ps_mllr.c:145
int acmod_end_utt(acmod_t *acmod)
Mark the end of an utterance.
Definition: acmod.c:441
int acmod_advance(acmod_t *acmod)
Advance the frame index.
Definition: acmod.c:899
uint8 state
State of utterance processing.
Definition: acmod.h:187
int acmod_set_mfcfh(acmod_t *acmod, FILE *logfh)
Start logging MFCCs to a filehandle.
Definition: acmod.c:375
void acmod_free(acmod_t *acmod)
Finalize an acoustic model.
Definition: acmod.c:299
void acmod_clear_active(acmod_t *acmod)
Clear set of active senones.
Definition: acmod.c:1197
#define SENSCR_DUMMY
Dummy senone score value for unintentionally active states.
Definition: acmod.h:77
Feature space linear transform structure.
Definition: acmod.h:82
frame_idx_t feat_outidx
Start of active frames in feat_buf.
Definition: acmod.h:200
mfcc_t ** acmod_get_frame(acmod_t *acmod, int *inout_frame_idx)
Get a frame of dynamic feature data.
Definition: acmod.c:1088
feat_t * fcb
Dynamic feature computation.
Definition: acmod.h:156
int log_zero
Zero log-probability value.
Definition: acmod.h:170
FILE * senfh
File for writing senone score data.
Definition: acmod.h:177
frame_idx_t mfc_outidx
Start of active frames in mfc_buf.
Definition: acmod.h:197
frame_idx_t n_mfc_alloc
Number of frames allocated in mfc_buf.
Definition: acmod.h:195
int acmod_process_raw(acmod_t *acmod, int16 const **inout_raw, size_t *inout_n_samps, int full_utt)
TODO: Set queue length for utterance processing.
Definition: acmod.c:607
int bin_mdef_free(bin_mdef_t *m)
Release a pointer to a binary mdef.
Definition: bin_mdef.c:272
uint8 insen_swap
Whether to swap input senone score.
Definition: acmod.h:190
int acmod_start_utt(acmod_t *acmod)
Mark the start of an utterance.
Definition: acmod.c:423
int senscr_frame
Frame index for senone_scores.
Definition: acmod.h:168
bin_mdef_t * mdef
Model definition.
Definition: acmod.h:159
int acmod_best_score(acmod_t *acmod, int *out_best_senid)
Get best score and senone index for current frame.
Definition: acmod.c:1168
acmod_t * acmod_init(cmd_ln_t *config, logmath_t *lmath, fe_t *fe, feat_t *fcb)
Initialize an acoustic model.
Definition: acmod.c:228
frame_idx_t n_feat_frame
Number of frames active in feat_buf.
Definition: acmod.h:199
Acoustic model structures for PocketSphinx.
FILE * mfcfh
File for writing acoustic feature data.
Definition: acmod.h:176
Acoustic model structure.
Definition: acmod.h:148
int acmod_set_grow(acmod_t *acmod, int grow_feat)
Set memory allocation policy for utterance processing.
Definition: acmod.c:410
int16 const * acmod_score(acmod_t *acmod, int *inout_frame_idx)
Score one frame of data.
Definition: acmod.c:1106
int frame_idx
frame counter.
Definition: acmod.h:115
bitvec_t * senone_active_vec
Active GMMs in current frame.
Definition: acmod.h:166