PocketSphinx  5prealpha
pocketsphinx.c
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 2008 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
38 /* System headers. */
39 #include <stdio.h>
40 #include <assert.h>
41 
42 #ifdef HAVE_UNISTD_H
43 #include <unistd.h>
44 #endif
45 
46 /* SphinxBase headers. */
47 #include <sphinxbase/err.h>
48 #include <sphinxbase/strfuncs.h>
49 #include <sphinxbase/filename.h>
50 #include <sphinxbase/pio.h>
51 #include <sphinxbase/jsgf.h>
52 #include <sphinxbase/hash_table.h>
53 
54 /* Local headers. */
55 #include "cmdln_macro.h"
56 #include "pocketsphinx.h"
57 #include "pocketsphinx_internal.h"
58 #include "ps_lattice_internal.h"
59 #include "phone_loop_search.h"
60 #include "kws_search.h"
61 #include "fsg_search_internal.h"
62 #include "ngram_search.h"
63 #include "ngram_search_fwdtree.h"
64 #include "ngram_search_fwdflat.h"
65 #include "allphone_search.h"
66 
67 static const arg_t ps_args_def[] = {
68  POCKETSPHINX_OPTIONS,
69  CMDLN_EMPTY_OPTION
70 };
71 
72 /* I'm not sure what the portable way to do this is. */
73 static int
74 file_exists(const char *path)
75 {
76  FILE *tmp;
77 
78  tmp = fopen(path, "rb");
79  if (tmp) fclose(tmp);
80  return (tmp != NULL);
81 }
82 
83 #ifdef MODELDIR
84 static int
85 hmmdir_exists(const char *path)
86 {
87  FILE *tmp;
88  char *mdef = string_join(path, "/mdef", NULL);
89 
90  tmp = fopen(mdef, "rb");
91  if (tmp) fclose(tmp);
92  ckd_free(mdef);
93  return (tmp != NULL);
94 }
95 #endif
96 
97 static void
98 ps_expand_file_config(ps_decoder_t *ps, const char *arg, const char *extra_arg,
99  const char *hmmdir, const char *file)
100 {
101  const char *val;
102  if ((val = cmd_ln_str_r(ps->config, arg)) != NULL) {
103  cmd_ln_set_str_extra_r(ps->config, extra_arg, val);
104  } else if (hmmdir == NULL) {
105  cmd_ln_set_str_extra_r(ps->config, extra_arg, NULL);
106  } else {
107  char *tmp = string_join(hmmdir, "/", file, NULL);
108  if (file_exists(tmp))
109  cmd_ln_set_str_extra_r(ps->config, extra_arg, tmp);
110  else
111  cmd_ln_set_str_extra_r(ps->config, extra_arg, NULL);
112  ckd_free(tmp);
113  }
114 }
115 
116 /* Feature and front-end parameters that may be in feat.params */
117 static const arg_t feat_defn[] = {
118  waveform_to_cepstral_command_line_macro(),
119  cepstral_to_feature_command_line_macro(),
120  CMDLN_EMPTY_OPTION
121 };
122 
123 static void
124 ps_expand_model_config(ps_decoder_t *ps)
125 {
126  char const *hmmdir, *featparams;
127 
128  /* Disable memory mapping on Blackfin (FIXME: should be uClinux in general). */
129 #ifdef __ADSPBLACKFIN__
130  E_INFO("Will not use mmap() on uClinux/Blackfin.");
131  cmd_ln_set_boolean_r(ps->config, "-mmap", FALSE);
132 #endif
133 
134  /* Get acoustic model filenames and add them to the command-line */
135  hmmdir = cmd_ln_str_r(ps->config, "-hmm");
136  ps_expand_file_config(ps, "-mdef", "_mdef", hmmdir, "mdef");
137  ps_expand_file_config(ps, "-mean", "_mean", hmmdir, "means");
138  ps_expand_file_config(ps, "-var", "_var", hmmdir, "variances");
139  ps_expand_file_config(ps, "-tmat", "_tmat", hmmdir, "transition_matrices");
140  ps_expand_file_config(ps, "-mixw", "_mixw", hmmdir, "mixture_weights");
141  ps_expand_file_config(ps, "-sendump", "_sendump", hmmdir, "sendump");
142  ps_expand_file_config(ps, "-fdict", "_fdict", hmmdir, "noisedict");
143  ps_expand_file_config(ps, "-lda", "_lda", hmmdir, "feature_transform");
144  ps_expand_file_config(ps, "-featparams", "_featparams", hmmdir, "feat.params");
145  ps_expand_file_config(ps, "-senmgau", "_senmgau", hmmdir, "senmgau");
146 
147  /* Look for feat.params in acoustic model dir. */
148  if ((featparams = cmd_ln_str_r(ps->config, "_featparams"))) {
149  if (NULL !=
150  cmd_ln_parse_file_r(ps->config, feat_defn, featparams, FALSE))
151  E_INFO("Parsed model-specific feature parameters from %s\n",
152  featparams);
153  }
154 
155  /* Print here because acmod_init might load feat.params file */
156  if (err_get_logfp() != NULL) {
157  cmd_ln_print_values_r(ps->config, err_get_logfp(), ps_args());
158  }
159 }
160 
161 static void
162 ps_free_searches(ps_decoder_t *ps)
163 {
164  if (ps->searches) {
165  hash_iter_t *search_it;
166  for (search_it = hash_table_iter(ps->searches); search_it;
167  search_it = hash_table_iter_next(search_it)) {
168  ps_search_free(hash_entry_val(search_it->ent));
169  }
170  hash_table_free(ps->searches);
171  }
172 
173  ps->searches = NULL;
174  ps->search = NULL;
175 }
176 
177 static ps_search_t *
178 ps_find_search(ps_decoder_t *ps, char const *name)
179 {
180  void *search = NULL;
181  hash_table_lookup(ps->searches, name, &search);
182 
183  return (ps_search_t *) search;
184 }
185 
186 /* Set default acoustic and language models if they are not defined in configuration. */
187 void
188 ps_default_search_args(cmd_ln_t *config)
189 {
190 #ifdef MODELDIR
191  const char *hmmdir = cmd_ln_str_r(config, "-hmm");
192  const char *lmfile = cmd_ln_str_r(config, "-lm");
193  const char *dictfile = cmd_ln_str_r(config, "-dict");
194 
195  if (hmmdir == NULL && hmmdir_exists(MODELDIR "/en-us/en-us")) {
196  hmmdir = MODELDIR "/en-us/en-us";
197  cmd_ln_set_str_r(config, "-hmm", hmmdir);
198  }
199 
200  if (lmfile == NULL && !cmd_ln_str_r(config, "-fsg")
201  && !cmd_ln_str_r(config, "-jsgf")
202  && !cmd_ln_str_r(config, "-lmctl")
203  && !cmd_ln_str_r(config, "-kws")
204  && !cmd_ln_str_r(config, "-keyphrase")
205  && file_exists(MODELDIR "/en-us/en-us.lm.bin")) {
206  lmfile = MODELDIR "/en-us/en-us.lm.bin";
207  cmd_ln_set_str_r(config, "-lm", lmfile);
208  }
209 
210  if (dictfile == NULL && file_exists(MODELDIR "/en-us/cmudict-en-us.dict")) {
211  dictfile = MODELDIR "/en-us/cmudict-en-us.dict";
212  cmd_ln_set_str_r(config, "-dict", dictfile);
213  }
214 #endif
215 }
216 
217 int
218 ps_reinit(ps_decoder_t *ps, cmd_ln_t *config)
219 {
220  const char *path;
221  const char *keyphrase;
222  int32 lw;
223 
224  if (config && config != ps->config) {
225  cmd_ln_free_r(ps->config);
226  ps->config = cmd_ln_retain(config);
227  }
228 
229  err_set_debug_level(cmd_ln_int32_r(ps->config, "-debug"));
230  /* Set up logging. We need to do this earlier because we want to dump
231  * the information to the configured log, not to the stderr. */
232  if (config && cmd_ln_str_r(ps->config, "-logfn")) {
233  if (err_set_logfile(cmd_ln_str_r(ps->config, "-logfn")) < 0) {
234  E_ERROR("Cannot redirect log output\n");
235  return -1;
236  }
237  }
238 
239  ps->mfclogdir = cmd_ln_str_r(ps->config, "-mfclogdir");
240  ps->rawlogdir = cmd_ln_str_r(ps->config, "-rawlogdir");
241  ps->senlogdir = cmd_ln_str_r(ps->config, "-senlogdir");
242 
243  /* Fill in some default arguments. */
244  ps_expand_model_config(ps);
245 
246  /* Free old searches (do this before other reinit) */
247  ps_free_searches(ps);
248  ps->searches = hash_table_new(3, HASH_CASE_YES);
249 
250  /* Free old acmod. */
251  acmod_free(ps->acmod);
252  ps->acmod = NULL;
253 
254  /* Free old dictionary (must be done after the two things above) */
255  dict_free(ps->dict);
256  ps->dict = NULL;
257 
258  /* Free d2p */
259  dict2pid_free(ps->d2p);
260  ps->d2p = NULL;
261 
262  /* Logmath computation (used in acmod and search) */
263  if (ps->lmath == NULL
264  || (logmath_get_base(ps->lmath) !=
265  (float64)cmd_ln_float32_r(ps->config, "-logbase"))) {
266  if (ps->lmath)
267  logmath_free(ps->lmath);
268  ps->lmath = logmath_init
269  ((float64)cmd_ln_float32_r(ps->config, "-logbase"), 0,
270  cmd_ln_boolean_r(ps->config, "-bestpath"));
271  }
272 
273  /* Acoustic model (this is basically everything that
274  * uttproc.c, senscr.c, and others used to do) */
275  if ((ps->acmod = acmod_init(ps->config, ps->lmath, NULL, NULL)) == NULL)
276  return -1;
277 
278 
279 
280  if (cmd_ln_int32_r(ps->config, "-pl_window") > 0) {
281  /* Initialize an auxiliary phone loop search, which will run in
282  * "parallel" with FSG or N-Gram search. */
283  if ((ps->phone_loop =
284  phone_loop_search_init(ps->config, ps->acmod, ps->dict)) == NULL)
285  return -1;
286  hash_table_enter(ps->searches,
287  ps_search_name(ps->phone_loop),
288  ps->phone_loop);
289  }
290 
291  /* Dictionary and triphone mappings (depends on acmod). */
292  /* FIXME: pass config, change arguments, implement LTS, etc. */
293  if ((ps->dict = dict_init(ps->config, ps->acmod->mdef)) == NULL)
294  return -1;
295  if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict)) == NULL)
296  return -1;
297 
298  lw = cmd_ln_float32_r(ps->config, "-lw");
299 
300  /* Determine whether we are starting out in FSG or N-Gram search mode.
301  * If neither is used skip search initialization. */
302 
303  /* Load KWS if one was specified in config */
304  if ((keyphrase = cmd_ln_str_r(ps->config, "-keyphrase"))) {
305  if (ps_set_keyphrase(ps, PS_DEFAULT_SEARCH, keyphrase))
306  return -1;
307  ps_set_search(ps, PS_DEFAULT_SEARCH);
308  }
309 
310  if ((path = cmd_ln_str_r(ps->config, "-kws"))) {
311  if (ps_set_kws(ps, PS_DEFAULT_SEARCH, path))
312  return -1;
313  ps_set_search(ps, PS_DEFAULT_SEARCH);
314  }
315 
316  /* Load an FSG if one was specified in config */
317  if ((path = cmd_ln_str_r(ps->config, "-fsg"))) {
318  fsg_model_t *fsg = fsg_model_readfile(path, ps->lmath, lw);
319  if (!fsg)
320  return -1;
321  if (ps_set_fsg(ps, PS_DEFAULT_SEARCH, fsg)) {
322  fsg_model_free(fsg);
323  return -1;
324  }
325  fsg_model_free(fsg);
326  ps_set_search(ps, PS_DEFAULT_SEARCH);
327  }
328 
329  /* Or load a JSGF grammar */
330  if ((path = cmd_ln_str_r(ps->config, "-jsgf"))) {
331  if (ps_set_jsgf_file(ps, PS_DEFAULT_SEARCH, path)
332  || ps_set_search(ps, PS_DEFAULT_SEARCH))
333  return -1;
334  }
335 
336  if ((path = cmd_ln_str_r(ps->config, "-allphone"))) {
337  if (ps_set_allphone_file(ps, PS_DEFAULT_SEARCH, path)
338  || ps_set_search(ps, PS_DEFAULT_SEARCH))
339  return -1;
340  }
341 
342  if ((path = cmd_ln_str_r(ps->config, "-lm")) &&
343  !cmd_ln_boolean_r(ps->config, "-allphone")) {
344  if (ps_set_lm_file(ps, PS_DEFAULT_SEARCH, path)
345  || ps_set_search(ps, PS_DEFAULT_SEARCH))
346  return -1;
347  }
348 
349  if ((path = cmd_ln_str_r(ps->config, "-lmctl"))) {
350  const char *name;
351  ngram_model_t *lmset;
352  ngram_model_set_iter_t *lmset_it;
353 
354  if (!(lmset = ngram_model_set_read(ps->config, path, ps->lmath))) {
355  E_ERROR("Failed to read language model control file: %s\n", path);
356  return -1;
357  }
358 
359  for(lmset_it = ngram_model_set_iter(lmset);
360  lmset_it; lmset_it = ngram_model_set_iter_next(lmset_it)) {
361  ngram_model_t *lm = ngram_model_set_iter_model(lmset_it, &name);
362  E_INFO("adding search %s\n", name);
363  if (ps_set_lm(ps, name, lm)) {
364  ngram_model_set_iter_free(lmset_it);
365  ngram_model_free(lmset);
366  return -1;
367  }
368  }
369  ngram_model_free(lmset);
370 
371  name = cmd_ln_str_r(ps->config, "-lmname");
372  if (name)
373  ps_set_search(ps, name);
374  else {
375  E_ERROR("No default LM name (-lmname) for `-lmctl'\n");
376  return -1;
377  }
378  }
379 
380  /* Initialize performance timer. */
381  ps->perf.name = "decode";
382  ptmr_init(&ps->perf);
383 
384  return 0;
385 }
386 
387 ps_decoder_t *
388 ps_init(cmd_ln_t *config)
389 {
390  ps_decoder_t *ps;
391 
392  if (!config) {
393  E_ERROR("No configuration specified");
394  return NULL;
395  }
396 
397  ps = ckd_calloc(1, sizeof(*ps));
398  ps->refcount = 1;
399  if (ps_reinit(ps, config) < 0) {
400  ps_free(ps);
401  return NULL;
402  }
403  return ps;
404 }
405 
406 arg_t const *
407 ps_args(void)
408 {
409  return ps_args_def;
410 }
411 
412 ps_decoder_t *
414 {
415  ++ps->refcount;
416  return ps;
417 }
418 
419 int
421 {
422  if (ps == NULL)
423  return 0;
424  if (--ps->refcount > 0)
425  return ps->refcount;
426  ps_free_searches(ps);
427  dict_free(ps->dict);
428  dict2pid_free(ps->d2p);
429  acmod_free(ps->acmod);
430  logmath_free(ps->lmath);
431  cmd_ln_free_r(ps->config);
432  ckd_free(ps);
433  return 0;
434 }
435 
436 cmd_ln_t *
438 {
439  return ps->config;
440 }
441 
442 logmath_t *
444 {
445  return ps->lmath;
446 }
447 
448 fe_t *
450 {
451  return ps->acmod->fe;
452 }
453 
454 feat_t *
456 {
457  return ps->acmod->fcb;
458 }
459 
460 ps_mllr_t *
462 {
463  return acmod_update_mllr(ps->acmod, mllr);
464 }
465 
466 int
467 ps_set_search(ps_decoder_t *ps, const char *name)
468 {
469  ps_search_t *search;
470 
471  if (ps->acmod->state != ACMOD_ENDED && ps->acmod->state != ACMOD_IDLE) {
472  E_ERROR("Cannot change search while decoding, end utterance first\n");
473  return -1;
474  }
475 
476  if (!(search = ps_find_search(ps, name))) {
477  return -1;
478  }
479 
480  ps->search = search;
481  /* Set pl window depending on the search */
482  if (!strcmp(PS_SEARCH_TYPE_NGRAM, ps_search_type(search))) {
483  ps->pl_window = cmd_ln_int32_r(ps->config, "-pl_window");
484  } else {
485  ps->pl_window = 0;
486  }
487 
488  return 0;
489 }
490 
491 const char*
493 {
494  hash_iter_t *search_it;
495  const char* name = NULL;
496  for (search_it = hash_table_iter(ps->searches); search_it;
497  search_it = hash_table_iter_next(search_it)) {
498  if (hash_entry_val(search_it->ent) == ps->search) {
499  name = hash_entry_key(search_it->ent);
500  break;
501  }
502  }
503  return name;
504 }
505 
506 int
507 ps_unset_search(ps_decoder_t *ps, const char *name)
508 {
509  ps_search_t *search = hash_table_delete(ps->searches, name);
510  if (!search)
511  return -1;
512  if (ps->search == search)
513  ps->search = NULL;
514  ps_search_free(search);
515  return 0;
516 }
517 
520 {
521  return (ps_search_iter_t *)hash_table_iter(ps->searches);
522 }
523 
526 {
527  return (ps_search_iter_t *)hash_table_iter_next((hash_iter_t *)itor);
528 }
529 
530 const char*
532 {
533  return (const char*)(((hash_iter_t *)itor)->ent->key);
534 }
535 
536 void
538 {
539  hash_table_iter_free((hash_iter_t *)itor);
540 }
541 
542 ngram_model_t *
543 ps_get_lm(ps_decoder_t *ps, const char *name)
544 {
545  ps_search_t *search = ps_find_search(ps, name);
546  if (search && strcmp(PS_SEARCH_TYPE_NGRAM, ps_search_type(search)))
547  return NULL;
548  return search ? ((ngram_search_t *) search)->lmset : NULL;
549 }
550 
551 fsg_model_t *
552 ps_get_fsg(ps_decoder_t *ps, const char *name)
553 {
554  ps_search_t *search = ps_find_search(ps, name);
555  if (search && strcmp(PS_SEARCH_TYPE_FSG, ps_search_type(search)))
556  return NULL;
557  return search ? ((fsg_search_t *) search)->fsg : NULL;
558 }
559 
560 const char*
561 ps_get_kws(ps_decoder_t *ps, const char* name)
562 {
563  ps_search_t *search = ps_find_search(ps, name);
564  if (search && strcmp(PS_SEARCH_TYPE_KWS, ps_search_type(search)))
565  return NULL;
566  return search ? kws_search_get_keyphrases(search) : NULL;
567 }
568 
569 static int
570 set_search_internal(ps_decoder_t *ps, ps_search_t *search)
571 {
572  ps_search_t *old_search;
573 
574  if (!search)
575  return -1;
576 
577  search->pls = ps->phone_loop;
578  old_search = (ps_search_t *) hash_table_replace(ps->searches, ps_search_name(search), search);
579  if (old_search != search)
580  ps_search_free(old_search);
581 
582  return 0;
583 }
584 
585 int
586 ps_set_lm(ps_decoder_t *ps, const char *name, ngram_model_t *lm)
587 {
588  ps_search_t *search;
589  search = ngram_search_init(name, lm, ps->config, ps->acmod, ps->dict, ps->d2p);
590  return set_search_internal(ps, search);
591 }
592 
593 int
594 ps_set_lm_file(ps_decoder_t *ps, const char *name, const char *path)
595 {
596  ngram_model_t *lm;
597  int result;
598 
599  lm = ngram_model_read(ps->config, path, NGRAM_AUTO, ps->lmath);
600  if (!lm)
601  return -1;
602 
603  result = ps_set_lm(ps, name, lm);
604  ngram_model_free(lm);
605  return result;
606 }
607 
608 int
609 ps_set_allphone(ps_decoder_t *ps, const char *name, ngram_model_t *lm)
610 {
611  ps_search_t *search;
612  search = allphone_search_init(name, lm, ps->config, ps->acmod, ps->dict, ps->d2p);
613  return set_search_internal(ps, search);
614 }
615 
616 int
617 ps_set_allphone_file(ps_decoder_t *ps, const char *name, const char *path)
618 {
619  ngram_model_t *lm;
620  int result;
621 
622  lm = NULL;
623  if (path)
624  lm = ngram_model_read(ps->config, path, NGRAM_AUTO, ps->lmath);
625  result = ps_set_allphone(ps, name, lm);
626  if (lm)
627  ngram_model_free(lm);
628  return result;
629 }
630 
631 int
632 ps_set_kws(ps_decoder_t *ps, const char *name, const char *keyfile)
633 {
634  ps_search_t *search;
635  search = kws_search_init(name, NULL, keyfile, ps->config, ps->acmod, ps->dict, ps->d2p);
636  return set_search_internal(ps, search);
637 }
638 
639 int
640 ps_set_keyphrase(ps_decoder_t *ps, const char *name, const char *keyphrase)
641 {
642  ps_search_t *search;
643  search = kws_search_init(name, keyphrase, NULL, ps->config, ps->acmod, ps->dict, ps->d2p);
644  return set_search_internal(ps, search);
645 }
646 
647 int
648 ps_set_fsg(ps_decoder_t *ps, const char *name, fsg_model_t *fsg)
649 {
650  ps_search_t *search;
651  search = fsg_search_init(name, fsg, ps->config, ps->acmod, ps->dict, ps->d2p);
652  return set_search_internal(ps, search);
653 }
654 
655 int
656 ps_set_jsgf_file(ps_decoder_t *ps, const char *name, const char *path)
657 {
658  fsg_model_t *fsg;
659  jsgf_rule_t *rule;
660  char const *toprule;
661  jsgf_t *jsgf = jsgf_parse_file(path, NULL);
662  float lw;
663  int result;
664 
665  if (!jsgf)
666  return -1;
667 
668  rule = NULL;
669  /* Take the -toprule if specified. */
670  if ((toprule = cmd_ln_str_r(ps->config, "-toprule"))) {
671  rule = jsgf_get_rule(jsgf, toprule);
672  if (rule == NULL) {
673  E_ERROR("Start rule %s not found\n", toprule);
674  jsgf_grammar_free(jsgf);
675  return -1;
676  }
677  } else {
678  rule = jsgf_get_public_rule(jsgf);
679  if (rule == NULL) {
680  E_ERROR("No public rules found in %s\n", path);
681  jsgf_grammar_free(jsgf);
682  return -1;
683  }
684  }
685 
686  lw = cmd_ln_float32_r(ps->config, "-lw");
687  fsg = jsgf_build_fsg(jsgf, rule, ps->lmath, lw);
688  result = ps_set_fsg(ps, name, fsg);
689  fsg_model_free(fsg);
690  jsgf_grammar_free(jsgf);
691  return result;
692 }
693 
694 int
695 ps_set_jsgf_string(ps_decoder_t *ps, const char *name, const char *jsgf_string)
696 {
697  fsg_model_t *fsg;
698  jsgf_rule_t *rule;
699  char const *toprule;
700  jsgf_t *jsgf = jsgf_parse_string(jsgf_string, NULL);
701  float lw;
702  int result;
703 
704  if (!jsgf)
705  return -1;
706 
707  rule = NULL;
708  /* Take the -toprule if specified. */
709  if ((toprule = cmd_ln_str_r(ps->config, "-toprule"))) {
710  rule = jsgf_get_rule(jsgf, toprule);
711  if (rule == NULL) {
712  E_ERROR("Start rule %s not found\n", toprule);
713  return -1;
714  }
715  } else {
716  rule = jsgf_get_public_rule(jsgf);
717  if (rule == NULL) {
718  E_ERROR("No public rules found in input string\n");
719  return -1;
720  }
721  }
722 
723  lw = cmd_ln_float32_r(ps->config, "-lw");
724  fsg = jsgf_build_fsg(jsgf, rule, ps->lmath, lw);
725  result = ps_set_fsg(ps, name, fsg);
726  fsg_model_free(fsg);
727  return result;
728 }
729 
730 
731 int
732 ps_load_dict(ps_decoder_t *ps, char const *dictfile,
733  char const *fdictfile, char const *format)
734 {
735  dict2pid_t *d2p;
736  dict_t *dict;
737  hash_iter_t *search_it;
738  cmd_ln_t *newconfig;
739 
740  /* Create a new scratch config to load this dict (so existing one
741  * won't be affected if it fails) */
742  newconfig = cmd_ln_init(NULL, ps_args(), TRUE, NULL);
743  cmd_ln_set_boolean_r(newconfig, "-dictcase",
744  cmd_ln_boolean_r(ps->config, "-dictcase"));
745  cmd_ln_set_str_r(newconfig, "-dict", dictfile);
746  if (fdictfile)
747  cmd_ln_set_str_extra_r(newconfig, "_fdict", fdictfile);
748  else
749  cmd_ln_set_str_extra_r(newconfig, "_fdict",
750  cmd_ln_str_r(ps->config, "_fdict"));
751 
752  /* Try to load it. */
753  if ((dict = dict_init(newconfig, ps->acmod->mdef)) == NULL) {
754  cmd_ln_free_r(newconfig);
755  return -1;
756  }
757 
758  /* Reinit the dict2pid. */
759  if ((d2p = dict2pid_build(ps->acmod->mdef, dict)) == NULL) {
760  cmd_ln_free_r(newconfig);
761  return -1;
762  }
763 
764  /* Success! Update the existing config to reflect new dicts and
765  * drop everything into place. */
766  cmd_ln_free_r(newconfig);
767  dict_free(ps->dict);
768  ps->dict = dict;
769  dict2pid_free(ps->d2p);
770  ps->d2p = d2p;
771 
772  /* And tell all searches to reconfigure themselves. */
773  for (search_it = hash_table_iter(ps->searches); search_it;
774  search_it = hash_table_iter_next(search_it)) {
775  if (ps_search_reinit(hash_entry_val(search_it->ent), dict, d2p) < 0) {
776  hash_table_iter_free(search_it);
777  return -1;
778  }
779  }
780 
781  return 0;
782 }
783 
784 int
785 ps_save_dict(ps_decoder_t *ps, char const *dictfile,
786  char const *format)
787 {
788  return dict_write(ps->dict, dictfile, format);
789 }
790 
791 int
793  char const *word,
794  char const *phones,
795  int update)
796 {
797  int32 wid;
798  s3cipid_t *pron;
799  hash_iter_t *search_it;
800  char **phonestr, *tmp;
801  int np, i, rv;
802 
803  /* Parse phones into an array of phone IDs. */
804  tmp = ckd_salloc(phones);
805  np = str2words(tmp, NULL, 0);
806  phonestr = ckd_calloc(np, sizeof(*phonestr));
807  str2words(tmp, phonestr, np);
808  pron = ckd_calloc(np, sizeof(*pron));
809  for (i = 0; i < np; ++i) {
810  pron[i] = bin_mdef_ciphone_id(ps->acmod->mdef, phonestr[i]);
811  if (pron[i] == -1) {
812  E_ERROR("Unknown phone %s in phone string %s\n",
813  phonestr[i], tmp);
814  ckd_free(phonestr);
815  ckd_free(tmp);
816  ckd_free(pron);
817  return -1;
818  }
819  }
820  /* No longer needed. */
821  ckd_free(phonestr);
822  ckd_free(tmp);
823 
824  /* Add it to the dictionary. */
825  if ((wid = dict_add_word(ps->dict, word, pron, np)) == -1) {
826  ckd_free(pron);
827  return -1;
828  }
829  /* No longer needed. */
830  ckd_free(pron);
831 
832  /* Now we also have to add it to dict2pid. */
833  dict2pid_add_word(ps->d2p, wid);
834 
835  /* TODO: we definitely need to refactor this */
836  for (search_it = hash_table_iter(ps->searches); search_it;
837  search_it = hash_table_iter_next(search_it)) {
838  ps_search_t *search = hash_entry_val(search_it->ent);
839  if (!strcmp(PS_SEARCH_TYPE_NGRAM, ps_search_type(search))) {
840  ngram_model_t *lmset = ((ngram_search_t *) search)->lmset;
841  if (ngram_model_add_word(lmset, word, 1.0) == NGRAM_INVALID_WID) {
842  hash_table_iter_free(search_it);
843  return -1;
844  }
845  }
846 
847  if (update) {
848  if ((rv = ps_search_reinit(search, ps->dict, ps->d2p) < 0)) {
849  hash_table_iter_free(search_it);
850  return rv;
851  }
852  }
853  }
854 
855  /* Rebuild the widmap and search tree if requested. */
856  return wid;
857 }
858 
859 char *
860 ps_lookup_word(ps_decoder_t *ps, const char *word)
861 {
862  s3wid_t wid;
863  int32 phlen, j;
864  char *phones;
865  dict_t *dict = ps->dict;
866 
867  wid = dict_wordid(dict, word);
868  if (wid == BAD_S3WID)
869  return NULL;
870 
871  for (phlen = j = 0; j < dict_pronlen(dict, wid); ++j)
872  phlen += strlen(dict_ciphone_str(dict, wid, j)) + 1;
873  phones = ckd_calloc(1, phlen);
874  for (j = 0; j < dict_pronlen(dict, wid); ++j) {
875  strcat(phones, dict_ciphone_str(dict, wid, j));
876  if (j != dict_pronlen(dict, wid) - 1)
877  strcat(phones, " ");
878  }
879  return phones;
880 }
881 
882 long
883 ps_decode_raw(ps_decoder_t *ps, FILE *rawfh,
884  long maxsamps)
885 {
886  int16 *data;
887  long total, pos, endpos;
888 
889  ps_start_stream(ps);
890  ps_start_utt(ps);
891 
892  /* If this file is seekable or maxsamps is specified, then decode
893  * the whole thing at once. */
894  if (maxsamps != -1) {
895  data = ckd_calloc(maxsamps, sizeof(*data));
896  total = fread(data, sizeof(*data), maxsamps, rawfh);
897  ps_process_raw(ps, data, total, FALSE, TRUE);
898  ckd_free(data);
899  } else if ((pos = ftell(rawfh)) >= 0) {
900  fseek(rawfh, 0, SEEK_END);
901  endpos = ftell(rawfh);
902  fseek(rawfh, pos, SEEK_SET);
903  maxsamps = endpos - pos;
904 
905  data = ckd_calloc(maxsamps, sizeof(*data));
906  total = fread(data, sizeof(*data), maxsamps, rawfh);
907  ps_process_raw(ps, data, total, FALSE, TRUE);
908  ckd_free(data);
909  } else {
910  /* Otherwise decode it in a stream. */
911  total = 0;
912  while (!feof(rawfh)) {
913  int16 data[256];
914  size_t nread;
915 
916  nread = fread(data, sizeof(*data), sizeof(data)/sizeof(*data), rawfh);
917  ps_process_raw(ps, data, nread, FALSE, FALSE);
918  total += nread;
919  }
920  }
921  ps_end_utt(ps);
922  return total;
923 }
924 
925 int
927 {
929  return 0;
930 }
931 
932 int
934 {
935  int rv;
936  char uttid[16];
937 
938  if (ps->acmod->state == ACMOD_STARTED || ps->acmod->state == ACMOD_PROCESSING) {
939  E_ERROR("Utterance already started\n");
940  return -1;
941  }
942 
943  if (ps->search == NULL) {
944  E_ERROR("No search module is selected, did you forget to "
945  "specify a language model or grammar?\n");
946  return -1;
947  }
948 
949  ptmr_reset(&ps->perf);
950  ptmr_start(&ps->perf);
951 
952  sprintf(uttid, "%09u", ps->uttno);
953  ++ps->uttno;
954 
955  /* Remove any residual word lattice and hypothesis. */
956  ps_lattice_free(ps->search->dag);
957  ps->search->dag = NULL;
958  ps->search->last_link = NULL;
959  ps->search->post = 0;
960  ckd_free(ps->search->hyp_str);
961  ps->search->hyp_str = NULL;
962  if ((rv = acmod_start_utt(ps->acmod)) < 0)
963  return rv;
964 
965  /* Start logging features and audio if requested. */
966  if (ps->mfclogdir) {
967  char *logfn = string_join(ps->mfclogdir, "/",
968  uttid, ".mfc", NULL);
969  FILE *mfcfh;
970  E_INFO("Writing MFCC file: %s\n", logfn);
971  if ((mfcfh = fopen(logfn, "wb")) == NULL) {
972  E_ERROR_SYSTEM("Failed to open MFCC file %s", logfn);
973  ckd_free(logfn);
974  return -1;
975  }
976  ckd_free(logfn);
977  acmod_set_mfcfh(ps->acmod, mfcfh);
978  }
979  if (ps->rawlogdir) {
980  char *logfn = string_join(ps->rawlogdir, "/",
981  uttid, ".raw", NULL);
982  FILE *rawfh;
983  E_INFO("Writing raw audio file: %s\n", logfn);
984  if ((rawfh = fopen(logfn, "wb")) == NULL) {
985  E_ERROR_SYSTEM("Failed to open raw audio file %s", logfn);
986  ckd_free(logfn);
987  return -1;
988  }
989  ckd_free(logfn);
990  acmod_set_rawfh(ps->acmod, rawfh);
991  }
992  if (ps->senlogdir) {
993  char *logfn = string_join(ps->senlogdir, "/",
994  uttid, ".sen", NULL);
995  FILE *senfh;
996  E_INFO("Writing senone score file: %s\n", logfn);
997  if ((senfh = fopen(logfn, "wb")) == NULL) {
998  E_ERROR_SYSTEM("Failed to open senone score file %s", logfn);
999  ckd_free(logfn);
1000  return -1;
1001  }
1002  ckd_free(logfn);
1003  acmod_set_senfh(ps->acmod, senfh);
1004  }
1005 
1006  /* Start auxiliary phone loop search. */
1007  if (ps->phone_loop)
1008  ps_search_start(ps->phone_loop);
1009 
1010  return ps_search_start(ps->search);
1011 }
1012 
1013 static int
1014 ps_search_forward(ps_decoder_t *ps)
1015 {
1016  int nfr;
1017 
1018  nfr = 0;
1019  while (ps->acmod->n_feat_frame > 0) {
1020  int k;
1021  if (ps->pl_window > 0)
1022  if ((k = ps_search_step(ps->phone_loop, ps->acmod->output_frame)) < 0)
1023  return k;
1024  if (ps->acmod->output_frame >= ps->pl_window)
1025  if ((k = ps_search_step(ps->search,
1026  ps->acmod->output_frame - ps->pl_window)) < 0)
1027  return k;
1028  acmod_advance(ps->acmod);
1029  ++ps->n_frame;
1030  ++nfr;
1031  }
1032  return nfr;
1033 }
1034 
1035 int
1037 {
1038  int nfr, n_searchfr;
1039 
1040  ps_start_utt(ps);
1041  n_searchfr = 0;
1042  acmod_set_insenfh(ps->acmod, senfh);
1043  while ((nfr = acmod_read_scores(ps->acmod)) > 0) {
1044  if ((nfr = ps_search_forward(ps)) < 0) {
1045  ps_end_utt(ps);
1046  return nfr;
1047  }
1048  n_searchfr += nfr;
1049  }
1050  ps_end_utt(ps);
1051  acmod_set_insenfh(ps->acmod, NULL);
1052 
1053  return n_searchfr;
1054 }
1055 
1056 int
1058  int16 const *data,
1059  size_t n_samples,
1060  int no_search,
1061  int full_utt)
1062 {
1063  int n_searchfr = 0;
1064 
1065  if (ps->acmod->state == ACMOD_IDLE) {
1066  E_ERROR("Failed to process data, utterance is not started. Use start_utt to start it\n");
1067  return 0;
1068  }
1069 
1070  if (no_search)
1071  acmod_set_grow(ps->acmod, TRUE);
1072 
1073  while (n_samples) {
1074  int nfr;
1075 
1076  /* Process some data into features. */
1077  if ((nfr = acmod_process_raw(ps->acmod, &data,
1078  &n_samples, full_utt)) < 0)
1079  return nfr;
1080 
1081  /* Score and search as much data as possible */
1082  if (no_search)
1083  continue;
1084  if ((nfr = ps_search_forward(ps)) < 0)
1085  return nfr;
1086  n_searchfr += nfr;
1087  }
1088 
1089  return n_searchfr;
1090 }
1091 
1092 int
1094  mfcc_t **data,
1095  int32 n_frames,
1096  int no_search,
1097  int full_utt)
1098 {
1099  int n_searchfr = 0;
1100 
1101  if (no_search)
1102  acmod_set_grow(ps->acmod, TRUE);
1103 
1104  while (n_frames) {
1105  int nfr;
1106 
1107  /* Process some data into features. */
1108  if ((nfr = acmod_process_cep(ps->acmod, &data,
1109  &n_frames, full_utt)) < 0)
1110  return nfr;
1111 
1112  /* Score and search as much data as possible */
1113  if (no_search)
1114  continue;
1115  if ((nfr = ps_search_forward(ps)) < 0)
1116  return nfr;
1117  n_searchfr += nfr;
1118  }
1119 
1120  return n_searchfr;
1121 }
1122 
1123 int
1125 {
1126  int rv, i;
1127 
1128  if (ps->acmod->state == ACMOD_ENDED || ps->acmod->state == ACMOD_IDLE) {
1129  E_ERROR("Utterance is not started\n");
1130  return -1;
1131  }
1132  acmod_end_utt(ps->acmod);
1133 
1134  /* Search any remaining frames. */
1135  if ((rv = ps_search_forward(ps)) < 0) {
1136  ptmr_stop(&ps->perf);
1137  return rv;
1138  }
1139  /* Finish phone loop search. */
1140  if (ps->phone_loop) {
1141  if ((rv = ps_search_finish(ps->phone_loop)) < 0) {
1142  ptmr_stop(&ps->perf);
1143  return rv;
1144  }
1145  }
1146  /* Search any frames remaining in the lookahead window. */
1147  if (ps->acmod->output_frame >= ps->pl_window) {
1148  for (i = ps->acmod->output_frame - ps->pl_window;
1149  i < ps->acmod->output_frame; ++i)
1150  ps_search_step(ps->search, i);
1151  }
1152  /* Finish main search. */
1153  if ((rv = ps_search_finish(ps->search)) < 0) {
1154  ptmr_stop(&ps->perf);
1155  return rv;
1156  }
1157  ptmr_stop(&ps->perf);
1158 
1159  /* Log a backtrace if requested. */
1160  if (cmd_ln_boolean_r(ps->config, "-backtrace")) {
1161  const char* hyp;
1162  ps_seg_t *seg;
1163  int32 score;
1164 
1165  hyp = ps_get_hyp(ps, &score);
1166 
1167  if (hyp != NULL) {
1168  E_INFO("%s (%d)\n", hyp, score);
1169  E_INFO_NOFN("%-20s %-5s %-5s %-5s %-10s %-10s %-3s\n",
1170  "word", "start", "end", "pprob", "ascr", "lscr", "lback");
1171  for (seg = ps_seg_iter(ps); seg;
1172  seg = ps_seg_next(seg)) {
1173  char const *word;
1174  int sf, ef;
1175  int32 post, lscr, ascr, lback;
1176 
1177  word = ps_seg_word(seg);
1178  ps_seg_frames(seg, &sf, &ef);
1179  post = ps_seg_prob(seg, &ascr, &lscr, &lback);
1180  E_INFO_NOFN("%-20s %-5d %-5d %-1.3f %-10d %-10d %-3d\n",
1181  word, sf, ef, logmath_exp(ps_get_logmath(ps), post),
1182  ascr, lscr, lback);
1183  }
1184  }
1185  }
1186  return rv;
1187 }
1188 
1189 char const *
1190 ps_get_hyp(ps_decoder_t *ps, int32 *out_best_score)
1191 {
1192  char const *hyp;
1193 
1194  ptmr_start(&ps->perf);
1195  hyp = ps_search_hyp(ps->search, out_best_score);
1196  ptmr_stop(&ps->perf);
1197  return hyp;
1198 }
1199 
1200 int32
1202 {
1203  int32 prob;
1204 
1205  ptmr_start(&ps->perf);
1206  prob = ps_search_prob(ps->search);
1207  ptmr_stop(&ps->perf);
1208  return prob;
1209 }
1210 
1211 ps_seg_t *
1213 {
1214  ps_seg_t *itor;
1215 
1216  ptmr_start(&ps->perf);
1217  itor = ps_search_seg_iter(ps->search);
1218  ptmr_stop(&ps->perf);
1219  return itor;
1220 }
1221 
1222 ps_seg_t *
1224 {
1225  return ps_search_seg_next(seg);
1226 }
1227 
1228 char const *
1230 {
1231  return seg->word;
1232 }
1233 
1234 void
1235 ps_seg_frames(ps_seg_t *seg, int *out_sf, int *out_ef)
1236 {
1237  int uf;
1238  uf = acmod_stream_offset(seg->search->acmod);
1239  if (out_sf) *out_sf = seg->sf + uf;
1240  if (out_ef) *out_ef = seg->ef + uf;
1241 }
1242 
1243 int32
1244 ps_seg_prob(ps_seg_t *seg, int32 *out_ascr, int32 *out_lscr, int32 *out_lback)
1245 {
1246  if (out_ascr) *out_ascr = seg->ascr;
1247  if (out_lscr) *out_lscr = seg->lscr;
1248  if (out_lback) *out_lback = seg->lback;
1249  return seg->prob;
1250 }
1251 
1252 void
1254 {
1255  ps_search_seg_free(seg);
1256 }
1257 
1258 ps_lattice_t *
1260 {
1261  return ps_search_lattice(ps->search);
1262 }
1263 
1264 ps_nbest_t *
1266 {
1267  ps_lattice_t *dag;
1268  ngram_model_t *lmset;
1269  ps_astar_t *nbest;
1270  float32 lwf;
1271 
1272  if (ps->search == NULL)
1273  return NULL;
1274  if ((dag = ps_get_lattice(ps)) == NULL)
1275  return NULL;
1276 
1277  /* FIXME: This is all quite specific to N-Gram search. Either we
1278  * should make N-best a method for each search module or it needs
1279  * to be abstracted to work for N-Gram and FSG. */
1280  if (0 != strcmp(ps_search_type(ps->search), PS_SEARCH_TYPE_NGRAM)) {
1281  lmset = NULL;
1282  lwf = 1.0f;
1283  } else {
1284  lmset = ((ngram_search_t *)ps->search)->lmset;
1285  lwf = ((ngram_search_t *)ps->search)->bestpath_fwdtree_lw_ratio;
1286  }
1287 
1288  nbest = ps_astar_start(dag, lmset, lwf, 0, -1, -1, -1);
1289 
1290  nbest = ps_nbest_next(nbest);
1291 
1292  return (ps_nbest_t *)nbest;
1293 }
1294 
1295 void
1297 {
1298  ps_astar_finish(nbest);
1299 }
1300 
1301 ps_nbest_t *
1303 {
1304  ps_latpath_t *next;
1305 
1306  next = ps_astar_next(nbest);
1307  if (next == NULL) {
1308  ps_nbest_free(nbest);
1309  return NULL;
1310  }
1311  return nbest;
1312 }
1313 
1314 char const *
1315 ps_nbest_hyp(ps_nbest_t *nbest, int32 *out_score)
1316 {
1317  assert(nbest != NULL);
1318 
1319  if (nbest->top == NULL)
1320  return NULL;
1321  if (out_score) *out_score = nbest->top->score;
1322  return ps_astar_hyp(nbest, nbest->top);
1323 }
1324 
1325 ps_seg_t *
1327 {
1328  if (nbest->top == NULL)
1329  return NULL;
1330 
1331  return ps_astar_seg_iter(nbest, nbest->top, 1.0);
1332 }
1333 
1334 int
1336 {
1337  return ps->acmod->output_frame + 1;
1338 }
1339 
1340 void
1341 ps_get_utt_time(ps_decoder_t *ps, double *out_nspeech,
1342  double *out_ncpu, double *out_nwall)
1343 {
1344  int32 frate;
1345 
1346  frate = cmd_ln_int32_r(ps->config, "-frate");
1347  *out_nspeech = (double)ps->acmod->output_frame / frate;
1348  *out_ncpu = ps->perf.t_cpu;
1349  *out_nwall = ps->perf.t_elapsed;
1350 }
1351 
1352 void
1353 ps_get_all_time(ps_decoder_t *ps, double *out_nspeech,
1354  double *out_ncpu, double *out_nwall)
1355 {
1356  int32 frate;
1357 
1358  frate = cmd_ln_int32_r(ps->config, "-frate");
1359  *out_nspeech = (double)ps->n_frame / frate;
1360  *out_ncpu = ps->perf.t_tot_cpu;
1361  *out_nwall = ps->perf.t_tot_elapsed;
1362 }
1363 
1364 uint8
1366 {
1367  return fe_get_vad_state(ps->acmod->fe);
1368 }
1369 
1370 void
1372  const char *type,
1373  const char *name,
1374  cmd_ln_t *config, acmod_t *acmod, dict_t *dict,
1375  dict2pid_t *d2p)
1376 {
1377  search->vt = vt;
1378  search->name = ckd_salloc(name);
1379  search->type = ckd_salloc(type);
1380 
1381  search->config = config;
1382  search->acmod = acmod;
1383  if (d2p)
1384  search->d2p = dict2pid_retain(d2p);
1385  else
1386  search->d2p = NULL;
1387  if (dict) {
1388  search->dict = dict_retain(dict);
1389  search->start_wid = dict_startwid(dict);
1390  search->finish_wid = dict_finishwid(dict);
1391  search->silence_wid = dict_silwid(dict);
1392  search->n_words = dict_size(dict);
1393  }
1394  else {
1395  search->dict = NULL;
1396  search->start_wid = search->finish_wid = search->silence_wid = -1;
1397  search->n_words = 0;
1398  }
1399 }
1400 
1401 void
1403 {
1404  /* FIXME: We will have refcounting on acmod, config, etc, at which
1405  * point we will free them here too. */
1406  ckd_free(search->name);
1407  ckd_free(search->type);
1408  dict_free(search->dict);
1409  dict2pid_free(search->d2p);
1410  ckd_free(search->hyp_str);
1411  ps_lattice_free(search->dag);
1412 }
1413 
1414 void
1416  dict2pid_t *d2p)
1417 {
1418  dict_free(search->dict);
1419  dict2pid_free(search->d2p);
1420  /* FIXME: _retain() should just return NULL if passed NULL. */
1421  if (dict) {
1422  search->dict = dict_retain(dict);
1423  search->start_wid = dict_startwid(dict);
1424  search->finish_wid = dict_finishwid(dict);
1425  search->silence_wid = dict_silwid(dict);
1426  search->n_words = dict_size(dict);
1427  }
1428  else {
1429  search->dict = NULL;
1430  search->start_wid = search->finish_wid = search->silence_wid = -1;
1431  search->n_words = 0;
1432  }
1433  if (d2p)
1434  search->d2p = dict2pid_retain(d2p);
1435  else
1436  search->d2p = NULL;
1437 }
1438 
1439 void
1441 {
1442  acmod_set_rawdata_size(ps->acmod, size);
1443 }
1444 
1445 void
1446 ps_get_rawdata(ps_decoder_t *ps, int16 **buffer, int32 *size)
1447 {
1448  acmod_get_rawdata(ps->acmod, buffer, size);
1449 }
void acmod_get_rawdata(acmod_t *acmod, int16 **buffer, int32 *size)
Retrieves the raw data collected during utterance decoding.
Definition: acmod.c:1332
Implementation of FSG search (and &quot;FSG set&quot;) structure.
ptmr_t perf
Performance counter for all of decoding.
dict_t * dict_init(cmd_ln_t *config, bin_mdef_t *mdef)
Initialize a new dictionary.
Definition: dict.c:252
Internal implementation of PocketSphinx decoder.
POCKETSPHINX_EXPORT void ps_get_utt_time(ps_decoder_t *ps, double *out_nspeech, double *out_ncpu, double *out_nwall)
Get performance information for the current utterance.
POCKETSPHINX_EXPORT feat_t * ps_get_feat(ps_decoder_t *ps)
Get the dynamic feature computation object for this decoder.
Definition: pocketsphinx.c:455
int acmod_read_scores(acmod_t *acmod)
Read one frame of scores from senone score dump file.
Definition: acmod.c:1012
Not in an utterance.
Definition: acmod.h:68
void ps_astar_finish(ps_astar_t *nbest)
Finish N-best search, releasing resources associated with it.
Definition: ps_lattice.c:1925
POCKETSPHINX_EXPORT void ps_set_rawdata_size(ps_decoder_t *ps, int32 size)
Sets the limit of the raw audio data to store in decoder to retrieve it later on ps_get_rawdata.
POCKETSPHINX_EXPORT ngram_model_t * ps_get_lm(ps_decoder_t *ps, const char *name)
Get the language model set object for this decoder.
Definition: pocketsphinx.c:543
ps_latpath_t * ps_astar_next(ps_astar_t *nbest)
Find next best hypothesis of A* on a word graph.
Definition: ps_lattice.c:1771
char const * ps_astar_hyp(ps_astar_t *nbest, ps_latpath_t *path)
Get hypothesis string from A* search.
Definition: ps_lattice.c:1804
POCKETSPHINX_EXPORT int ps_process_cep(ps_decoder_t *ps, mfcc_t **data, int n_frames, int no_search, int full_utt)
Decode acoustic feature data.
Base structure for search module.
POCKETSPHINX_EXPORT arg_t const * ps_args(void)
Returns the argument definitions used in ps_init().
Definition: pocketsphinx.c:407
Utterance started, no data yet.
Definition: acmod.h:69
dict_t * dict
Pronunciation dictionary.
int acmod_set_insenfh(acmod_t *acmod, FILE *senfh)
Set up a senone score dump file for input.
Definition: acmod.c:864
POCKETSPHINX_EXPORT const char * ps_get_kws(ps_decoder_t *ps, const char *name)
Get the current Key phrase to spot.
Definition: pocketsphinx.c:561
int32 acmod_stream_offset(acmod_t *acmod)
Get the offset of the utterance start of the current stream, helpful for stream-wide timing...
Definition: acmod.c:1308
POCKETSPHINX_EXPORT int ps_set_fsg(ps_decoder_t *ps, const char *name, fsg_model_t *fsg)
Adds new search based on finite state grammar.
Definition: pocketsphinx.c:648
POCKETSPHINX_EXPORT s3wid_t dict_wordid(dict_t *d, const char *word)
Return word id for given word string if present.
Definition: dict.c:399
ps_mllr_t * acmod_update_mllr(acmod_t *acmod, ps_mllr_t *mllr)
Adapt acoustic model using a linear transform.
Definition: acmod.c:339
int acmod_process_cep(acmod_t *acmod, mfcc_t ***inout_cep, int *inout_n_frames, int full_utt)
Feed acoustic feature data into the acoustic model for scoring.
Definition: acmod.c:699
POCKETSPHINX_EXPORT int ps_load_dict(ps_decoder_t *ps, char const *dictfile, char const *fdictfile, char const *format)
Reload the pronunciation dictionary from a file.
Definition: pocketsphinx.c:732
POCKETSPHINX_EXPORT void ps_nbest_free(ps_nbest_t *nbest)
Finish N-best search early, releasing resources.
int32 silence_wid
Silence word ID.
Utterance in progress.
Definition: acmod.h:70
void ps_search_base_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
Re-initialize base structure with new dictionary.
acmod_t * acmod
Acoustic model.
int dict_free(dict_t *d)
Release a pointer to a dictionary.
Definition: dict.c:468
Main header file for the PocketSphinx decoder.
void acmod_set_rawdata_size(acmod_t *acmod, int32 size)
Sets the limit of the raw audio data to store.
Definition: acmod.c:1321
POCKETSPHINX_EXPORT void ps_seg_frames(ps_seg_t *seg, int *out_sf, int *out_ef)
Get inclusive start and end frames from a segmentation iterator.
void acmod_start_stream(acmod_t *acmod)
Reset the current stream.
Definition: acmod.c:1314
POCKETSPHINX_EXPORT ps_mllr_t * ps_update_mllr(ps_decoder_t *ps, ps_mllr_t *mllr)
Adapt current acoustic model using a linear transform.
Definition: pocketsphinx.c:461
int32 finish_wid
Finish word ID.
fe_t * fe
Acoustic feature computation.
Definition: acmod.h:155
int bin_mdef_ciphone_id(bin_mdef_t *m, const char *ciphone)
Context-independent phone lookup.
Definition: bin_mdef.c:691
int32 lscr
Language model score.
int32 n_words
Number of words known to search (may be less than in the dictionary)
POCKETSPHINX_EXPORT int ps_set_kws(ps_decoder_t *ps, const char *name, const char *keyfile)
Adds keyphrases from a file to spotting.
Definition: pocketsphinx.c:632
POCKETSPHINX_EXPORT uint8 ps_get_in_speech(ps_decoder_t *ps)
Checks if the last feed audio buffer contained speech.
Utterance ended, still buffering.
Definition: acmod.h:71
POCKETSPHINX_EXPORT int ps_save_dict(ps_decoder_t *ps, char const *dictfile, char const *format)
Dump the current pronunciation dictionary to a file.
Definition: pocketsphinx.c:785
#define BAD_S3WID
Dictionary word id.
Definition: s3types.h:90
POCKETSPHINX_EXPORT ps_search_iter_t * ps_search_iter(ps_decoder_t *ps)
Returns iterator over current searches.
Definition: pocketsphinx.c:519
char const * mfclogdir
Log directory for MFCC files.
POCKETSPHINX_EXPORT void ps_default_search_args(cmd_ln_t *)
Sets default grammar and language model if they are not set explicitly and are present in the default...
Definition: pocketsphinx.c:188
Word graph search implementation.
POCKETSPHINX_EXPORT char const * ps_nbest_hyp(ps_nbest_t *nbest, int32 *out_score)
Get the hypothesis string from an N-best list iterator.
POCKETSPHINX_EXPORT char * ps_lookup_word(ps_decoder_t *ps, const char *word)
Lookup for the word in the dictionary and return phone transcription for it.
Definition: pocketsphinx.c:860
int refcount
Reference count.
int dict2pid_free(dict2pid_t *d2p)
Free the memory dict2pid structure.
Definition: dict2pid.c:507
const char * dict_ciphone_str(dict_t *d, s3wid_t wid, int32 pos)
Return value: CI phone string for the given word, phone position.
Definition: dict.c:69
A* search structure.
POCKETSPHINX_EXPORT ps_lattice_t * ps_get_lattice(ps_decoder_t *ps)
Get word lattice.
POCKETSPHINX_EXPORT ps_nbest_t * ps_nbest_next(ps_nbest_t *nbest)
Move an N-best list iterator forward.
int32 prob
Log posterior probability.
dict_t * dict_retain(dict_t *d)
Retain a pointer to an dict_t.
Definition: dict.c:461
POCKETSPHINX_EXPORT long ps_decode_raw(ps_decoder_t *ps, FILE *rawfh, long maxsamps)
Decode a raw audio stream.
Definition: pocketsphinx.c:883
char const * word
Word string (pointer into dictionary hash)
int acmod_set_senfh(acmod_t *acmod, FILE *logfh)
Start logging senone scores to a filehandle.
Definition: acmod.c:364
acmod_t * acmod
Acoustic model.
ps_search_t * search
Search object from whence this came.
ps_search_t * search
Currently active search module.
Lexicon tree based Viterbi search.
hash_table_t * searches
Set of search modules.
int acmod_set_rawfh(acmod_t *acmod, FILE *logfh)
Start logging raw audio to a filehandle.
Definition: acmod.c:387
void ps_search_init(ps_search_t *search, ps_searchfuncs_t *vt, const char *type, const char *name, cmd_ln_t *config, acmod_t *acmod, dict_t *dict, dict2pid_t *d2p)
Initialize base structure.
int dict_write(dict_t *dict, char const *filename, char const *format)
Write dictionary to a file.
Definition: dict.c:221
POCKETSPHINX_EXPORT int32 ps_get_prob(ps_decoder_t *ps)
Get posterior probability.
logmath_t * lmath
Log math computation.
int32 start_wid
Start word ID.
POCKETSPHINX_EXPORT ps_seg_t * ps_seg_next(ps_seg_t *seg)
Get the next segment in a word segmentation.
N-Gram search module structure.
Definition: ngram_search.h:197
POCKETSPHINX_EXPORT int ps_set_lm(ps_decoder_t *ps, const char *name, ngram_model_t *lm)
Adds new search based on N-gram language model.
Definition: pocketsphinx.c:586
dict2pid_t * d2p
Dictionary to senone mappings.
POCKETSPHINX_EXPORT char const * ps_get_hyp(ps_decoder_t *ps, int32 *out_best_score)
Get hypothesis string and path score.
Decoder object.
char const * rawlogdir
Log directory for audio files.
void ps_search_base_free(ps_search_t *search)
Free search.
POCKETSPHINX_EXPORT int ps_set_jsgf_string(ps_decoder_t *ps, const char *name, const char *jsgf_string)
Adds new search using JSGF model.
Definition: pocketsphinx.c:695
frame_idx_t output_frame
Index of next frame of dynamic features.
Definition: acmod.h:194
POCKETSPHINX_EXPORT int ps_free(ps_decoder_t *ps)
Finalize the decoder.
Definition: pocketsphinx.c:420
int16 s3cipid_t
Size definitions for more semantially meaningful units.
Definition: s3types.h:63
N-Gram based multi-pass search (&quot;FBS&quot;)
POCKETSPHINX_EXPORT void ps_seg_free(ps_seg_t *seg)
Finish iterating over a word segmentation early, freeing resources.
frame_idx_t ef
End frame.
int32 ascr
Acoustic score.
cmd_ln_t * config
Configuration.
int acmod_end_utt(acmod_t *acmod)
Mark the end of an utterance.
Definition: acmod.c:441
POCKETSPHINX_EXPORT ps_decoder_t * ps_retain(ps_decoder_t *ps)
Retain a pointer to the decoder.
Definition: pocketsphinx.c:413
int dict2pid_add_word(dict2pid_t *d2p, int32 wid)
Add a word to the dict2pid structure (after adding it to dict).
Definition: dict2pid.c:298
int acmod_advance(acmod_t *acmod)
Advance the frame index.
Definition: acmod.c:899
POCKETSPHINX_EXPORT ps_seg_t * ps_nbest_seg(ps_nbest_t *nbest)
Get the word segmentation from an N-best list iterator.
uint8 state
State of utterance processing.
Definition: acmod.h:187
POCKETSPHINX_EXPORT ps_seg_t * ps_seg_iter(ps_decoder_t *ps)
Get an iterator over the word segmentation for the best hypothesis.
int acmod_set_mfcfh(acmod_t *acmod, FILE *logfh)
Start logging MFCCs to a filehandle.
Definition: acmod.c:375
void acmod_free(acmod_t *acmod)
Finalize an acoustic model.
Definition: acmod.c:299
POCKETSPHINX_EXPORT cmd_ln_t * ps_get_config(ps_decoder_t *ps)
Get the configuration object for this decoder.
Definition: pocketsphinx.c:437
POCKETSPHINX_EXPORT int ps_set_lm_file(ps_decoder_t *ps, const char *name, const char *path)
Adds new search based on N-gram language model.
Definition: pocketsphinx.c:594
ps_search_t * phone_loop
Phone loop search for lookahead.
Feature space linear transform structure.
Definition: acmod.h:82
POCKETSPHINX_EXPORT int ps_process_raw(ps_decoder_t *ps, int16 const *data, size_t n_samples, int no_search, int full_utt)
Decode raw audio data.
POCKETSPHINX_EXPORT int ps_set_allphone_file(ps_decoder_t *ps, const char *name, const char *path)
Adds new search based on phone N-gram language model.
Definition: pocketsphinx.c:617
a structure for a dictionary.
Definition: dict.h:76
feat_t * fcb
Dynamic feature computation.
Definition: acmod.h:156
Word graph structure used in bestpath/nbest search.
char const * senlogdir
Log directory for senone score files.
ps_searchfuncs_t * vt
V-table of search methods.
POCKETSPHINX_EXPORT int ps_unset_search(ps_decoder_t *ps, const char *name)
Unsets the search and releases related resources.
Definition: pocketsphinx.c:507
ps_astar_t * ps_astar_start(ps_lattice_t *dag, ngram_model_t *lmset, float32 lwf, int sf, int ef, int w1, int w2)
Begin N-Gram based A* search on a word graph.
Definition: ps_lattice.c:1712
uint32 n_frame
Total number of frames processed.
POCKETSPHINX_EXPORT ps_search_iter_t * ps_search_iter_next(ps_search_iter_t *itor)
Updates search iterator to point to the next position.
Definition: pocketsphinx.c:525
POCKETSPHINX_EXPORT int ps_reinit(ps_decoder_t *ps, cmd_ln_t *config)
Reinitialize the decoder with updated configuration.
Definition: pocketsphinx.c:218
POCKETSPHINX_EXPORT int ps_set_jsgf_file(ps_decoder_t *ps, const char *name, const char *path)
Adds new search using JSGF model.
Definition: pocketsphinx.c:656
Fast and rough context-independent phoneme loop search.
dict2pid_t * d2p
Dictionary to senone mapping.
POCKETSPHINX_EXPORT fsg_model_t * ps_get_fsg(ps_decoder_t *ps, const char *name)
Get the finite-state grammar set object for this decoder.
Definition: pocketsphinx.c:552
dict2pid_t * dict2pid_retain(dict2pid_t *d2p)
Retain a pointer to dict2pid.
Definition: dict2pid.c:500
POCKETSPHINX_EXPORT int ps_end_utt(ps_decoder_t *ps)
End utterance processing.
int32 post
Utterance posterior probability.
char * hyp_str
Current hypothesis string.
Partial path structure used in N-best (A*) search.
dict_t * dict
Pronunciation dictionary.
int acmod_process_raw(acmod_t *acmod, int16 const **inout_raw, size_t *inout_n_samps, int full_utt)
TODO: Set queue length for utterance processing.
Definition: acmod.c:607
Flat lexicon based Viterbi search.
POCKETSPHINX_EXPORT int ps_add_word(ps_decoder_t *ps, char const *word, char const *phones, int update)
Add a word to the pronunciation dictionary.
Definition: pocketsphinx.c:792
int32 lback
Language model backoff.
POCKETSPHINX_EXPORT void ps_get_all_time(ps_decoder_t *ps, double *out_nspeech, double *out_ncpu, double *out_nwall)
Get overall performance information.
POCKETSPHINX_EXPORT logmath_t * ps_get_logmath(ps_decoder_t *ps)
Get the log-math computation object for this decoder.
Definition: pocketsphinx.c:443
int acmod_start_utt(acmod_t *acmod)
Mark the start of an utterance.
Definition: acmod.c:423
POCKETSPHINX_EXPORT int ps_set_keyphrase(ps_decoder_t *ps, const char *name, const char *keyphrase)
Adds new keyphrase to spot.
Definition: pocketsphinx.c:640
POCKETSPHINX_EXPORT ps_nbest_t * ps_nbest(ps_decoder_t *ps)
Get an iterator over the best hypotheses.
POCKETSPHINX_EXPORT int32 ps_seg_prob(ps_seg_t *seg, int32 *out_ascr, int32 *out_lscr, int32 *out_lback)
Get language, acoustic, and posterior probabilities from a segmentation iterator. ...
s3wid_t dict_add_word(dict_t *d, char const *word, s3cipid_t const *p, int32 np)
Add a word with the given ciphone pronunciation list to the dictionary.
Definition: dict.c:80
bin_mdef_t * mdef
Model definition.
Definition: acmod.h:159
POCKETSPHINX_EXPORT void ps_search_iter_free(ps_search_iter_t *itor)
Delete an unfinished search iterator.
Definition: pocketsphinx.c:537
ps_latlink_t * last_link
Final link in best path.
dict2pid_t * dict2pid_build(bin_mdef_t *mdef, dict_t *dict)
Build the dict2pid structure for the given model/dictionary.
Definition: dict2pid.c:388
POCKETSPHINX_EXPORT int ps_start_stream(ps_decoder_t *ps)
Start processing of the stream of speech.
Definition: pocketsphinx.c:926
POCKETSPHINX_EXPORT int ps_set_allphone(ps_decoder_t *ps, const char *name, ngram_model_t *lm)
Adds new search based on phone N-gram language model.
Definition: pocketsphinx.c:609
acmod_t * acmod_init(cmd_ln_t *config, logmath_t *lmath, fe_t *fe, feat_t *fcb)
Initialize an acoustic model.
Definition: acmod.c:228
frame_idx_t n_feat_frame
Number of frames active in feat_buf.
Definition: acmod.h:199
POCKETSPHINX_EXPORT fe_t * ps_get_fe(ps_decoder_t *ps)
Get the feature extraction object for this decoder.
Definition: pocketsphinx.c:449
V-table for search algorithm.
POCKETSPHINX_EXPORT int ps_start_utt(ps_decoder_t *ps)
Start utterance processing.
Definition: pocketsphinx.c:933
ps_search_t * pls
Phoneme loop for lookahead.
ps_seg_t * ps_astar_seg_iter(ps_astar_t *astar, ps_latpath_t *path, float32 lwf)
Get hypothesis segmentation from A* search.
Definition: ps_lattice.c:1898
ps_lattice_t * dag
Current hypothesis word graph.
POCKETSPHINX_EXPORT ps_decoder_t * ps_init(cmd_ln_t *config)
Initialize the decoder from a configuration object.
Definition: pocketsphinx.c:388
Base structure for hypothesis segmentation iterator.
cmd_ln_t * config
Configuration.
#define dict_size(d)
Packaged macro access to dictionary members.
Definition: dict.h:151
int32 score
Exact score from start node up to node-&gt;sf.
POCKETSPHINX_EXPORT int ps_lattice_free(ps_lattice_t *dag)
Free a lattice.
Definition: ps_lattice.c:665
POCKETSPHINX_EXPORT const char * ps_search_iter_val(ps_search_iter_t *itor)
Retrieves the name of the search the iterator points to.
Definition: pocketsphinx.c:531
POCKETSPHINX_EXPORT int ps_get_n_frames(ps_decoder_t *ps)
Get the number of frames of data searched.
ps_search_t * ngram_search_init(const char *name, ngram_model_t *lm, cmd_ln_t *config, acmod_t *acmod, dict_t *dict, dict2pid_t *d2p)
Initialize the N-Gram search module.
Definition: ngram_search.c:140
Acoustic model structure.
Definition: acmod.h:148
Building composite triphone (as well as word internal triphones) with the dictionary.
Definition: dict2pid.h:84
int acmod_set_grow(acmod_t *acmod, int grow_feat)
Set memory allocation policy for utterance processing.
Definition: acmod.c:410
POCKETSPHINX_EXPORT int ps_decode_senscr(ps_decoder_t *ps, FILE *senfh)
Decode a senone score dump file.
POCKETSPHINX_EXPORT char const * ps_seg_word(ps_seg_t *seg)
Get word string from a segmentation iterator.
uint32 uttno
Utterance counter.
frame_idx_t sf
Start frame.
POCKETSPHINX_EXPORT const char * ps_get_search(ps_decoder_t *ps)
Returns name of curent search in decoder.
Definition: pocketsphinx.c:492
int pl_window
Window size for phoneme lookahead.
POCKETSPHINX_EXPORT void ps_get_rawdata(ps_decoder_t *ps, int16 **buffer, int32 *size)
Retrieves the raw data collected during utterance decoding.
POCKETSPHINX_EXPORT int ps_set_search(ps_decoder_t *ps, const char *name)
Actives search with the provided name.
Definition: pocketsphinx.c:467