PocketSphinx  5prealpha
fsg_search.c
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  *
19  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
20  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
23  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  * ====================================================================
32  *
33  */
34 
35 /*
36  * fsg_search.c -- Search structures for FSM decoding.
37  *
38  * **********************************************
39  * CMU ARPA Speech Project
40  *
41  * Copyright (c) 2004 Carnegie Mellon University.
42  * ALL RIGHTS RESERVED.
43  * **********************************************
44  *
45  * HISTORY
46  *
47  * 18-Feb-2004 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon
48  * Started.
49  */
50 
51 /* System headers. */
52 #include <stdio.h>
53 #include <string.h>
54 #include <assert.h>
55 
56 /* SphinxBase headers. */
57 #include <sphinxbase/err.h>
58 #include <sphinxbase/ckd_alloc.h>
59 #include <sphinxbase/strfuncs.h>
60 #include <sphinxbase/cmd_ln.h>
61 
62 /* Local headers. */
63 #include "pocketsphinx_internal.h"
64 #include "ps_lattice_internal.h"
65 #include "fsg_search_internal.h"
66 #include "fsg_history.h"
67 #include "fsg_lextree.h"
68 
69 /* Turn this on for detailed debugging dump */
70 #define __FSG_DBG__ 0
71 #define __FSG_DBG_CHAN__ 0
72 
73 static ps_seg_t *fsg_search_seg_iter(ps_search_t *search);
74 static ps_lattice_t *fsg_search_lattice(ps_search_t *search);
75 static int fsg_search_prob(ps_search_t *search);
76 
77 static ps_searchfuncs_t fsg_funcs = {
78  /* start: */ fsg_search_start,
79  /* step: */ fsg_search_step,
80  /* finish: */ fsg_search_finish,
81  /* reinit: */ fsg_search_reinit,
82  /* free: */ fsg_search_free,
83  /* lattice: */ fsg_search_lattice,
84  /* hyp: */ fsg_search_hyp,
85  /* prob: */ fsg_search_prob,
86  /* seg_iter: */ fsg_search_seg_iter,
87 };
88 
89 static int
90 fsg_search_add_silences(fsg_search_t *fsgs, fsg_model_t *fsg)
91 {
92  dict_t *dict;
93  int32 wid;
94  int n_sil;
95 
96  dict = ps_search_dict(fsgs);
97  /*
98  * NOTE: Unlike N-Gram search, we do not use explicit start and
99  * end symbols. This is because the start and end nodes are
100  * defined in the grammar. We do add silence/filler self-loops to
101  * all states in order to allow for silence between words and at
102  * the beginning and end of utterances.
103  *
104  * This has some implications for word graph generation, namely,
105  * that there can (and usually will) be multiple start and end
106  * states in the word graph. We therefore do add explicit start
107  * and end nodes to the graph.
108  */
109  /* Add silence self-loops to all states. */
110  fsg_model_add_silence(fsg, "<sil>", -1,
111  cmd_ln_float32_r(ps_search_config(fsgs), "-silprob"));
112  n_sil = 0;
113  /* Add self-loops for all other fillers. */
114  for (wid = dict_filler_start(dict); wid < dict_filler_end(dict); ++wid) {
115  char const *word = dict_wordstr(dict, wid);
116  if (wid == dict_startwid(dict) || wid == dict_finishwid(dict))
117  continue;
118  fsg_model_add_silence(fsg, word, -1,
119  cmd_ln_float32_r(ps_search_config(fsgs), "-fillprob"));
120  ++n_sil;
121  }
122 
123  return n_sil;
124 }
125 
126 /* Scans the dictionary and check if all words are present. */
127 static int
128 fsg_search_check_dict(fsg_search_t *fsgs, fsg_model_t *fsg)
129 {
130  dict_t *dict;
131  int i;
132 
133  dict = ps_search_dict(fsgs);
134  for (i = 0; i < fsg_model_n_word(fsg); ++i) {
135  char const *word;
136  int32 wid;
137 
138  word = fsg_model_word_str(fsg, i);
139  wid = dict_wordid(dict, word);
140  if (wid == BAD_S3WID) {
141  E_ERROR("The word '%s' is missing in the dictionary\n", word);
142  return FALSE;
143  }
144  }
145 
146  return TRUE;
147 }
148 
149 static int
150 fsg_search_add_altpron(fsg_search_t *fsgs, fsg_model_t *fsg)
151 {
152  dict_t *dict;
153  int n_alt, n_word;
154  int i;
155 
156  dict = ps_search_dict(fsgs);
157  /* Scan FSG's vocabulary for words that have alternate pronunciations. */
158  n_alt = 0;
159  n_word = fsg_model_n_word(fsg);
160  for (i = 0; i < n_word; ++i) {
161  char const *word;
162  int32 wid;
163 
164  word = fsg_model_word_str(fsg, i);
165  wid = dict_wordid(dict, word);
166  if (wid != BAD_S3WID) {
167  while ((wid = dict_nextalt(dict, wid)) != BAD_S3WID) {
168  n_alt += fsg_model_add_alt(fsg, word, dict_wordstr(dict, wid));
169  }
170  }
171  }
172 
173  E_INFO("Added %d alternate word transitions\n", n_alt);
174  return n_alt;
175 }
176 
177 ps_search_t *
178 fsg_search_init(const char *name,
179  fsg_model_t *fsg,
180  cmd_ln_t *config,
181  acmod_t *acmod,
182  dict_t *dict,
183  dict2pid_t *d2p)
184 {
185  fsg_search_t *fsgs = ckd_calloc(1, sizeof(*fsgs));
186  ps_search_init(ps_search_base(fsgs), &fsg_funcs, PS_SEARCH_TYPE_FSG, name, config, acmod, dict, d2p);
187 
188  fsgs->fsg = fsg_model_retain(fsg);
189  /* Initialize HMM context. */
190  fsgs->hmmctx = hmm_context_init(bin_mdef_n_emit_state(acmod->mdef),
191  acmod->tmat->tp, NULL, acmod->mdef->sseq);
192  if (fsgs->hmmctx == NULL) {
193  ps_search_free(ps_search_base(fsgs));
194  return NULL;
195  }
196 
197  /* Intialize the search history object */
198  fsgs->history = fsg_history_init(NULL, dict);
199  fsgs->frame = -1;
200 
201  /* Get search pruning parameters */
202  fsgs->beam_factor = 1.0f;
203  fsgs->beam = fsgs->beam_orig
204  = (int32) logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-beam"))
205  >> SENSCR_SHIFT;
206  fsgs->pbeam = fsgs->pbeam_orig
207  = (int32) logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pbeam"))
208  >> SENSCR_SHIFT;
209  fsgs->wbeam = fsgs->wbeam_orig
210  = (int32) logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-wbeam"))
211  >> SENSCR_SHIFT;
212 
213  /* LM related weights/penalties */
214  fsgs->lw = cmd_ln_float32_r(config, "-lw");
215  fsgs->pip = (int32) (logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-pip"))
216  * fsgs->lw)
217  >> SENSCR_SHIFT;
218  fsgs->wip = (int32) (logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-wip"))
219  * fsgs->lw)
220  >> SENSCR_SHIFT;
221 
222  /* Acoustic score scale for posterior probabilities. */
223  fsgs->ascale = 1.0 / cmd_ln_float32_r(config, "-ascale");
224 
225  E_INFO("FSG(beam: %d, pbeam: %d, wbeam: %d; wip: %d, pip: %d)\n",
226  fsgs->beam_orig, fsgs->pbeam_orig, fsgs->wbeam_orig,
227  fsgs->wip, fsgs->pip);
228 
229  if (!fsg_search_check_dict(fsgs, fsg)) {
230  fsg_search_free(ps_search_base(fsgs));
231  return NULL;
232  }
233 
234  if (cmd_ln_boolean_r(config, "-fsgusefiller") &&
235  !fsg_model_has_sil(fsg))
236  fsg_search_add_silences(fsgs, fsg);
237 
238  if (cmd_ln_boolean_r(config, "-fsgusealtpron") &&
239  !fsg_model_has_alt(fsg))
240  fsg_search_add_altpron(fsgs, fsg);
241 
242  if (fsg_search_reinit(ps_search_base(fsgs),
243  ps_search_dict(fsgs),
244  ps_search_dict2pid(fsgs)) < 0)
245  {
246  ps_search_free(ps_search_base(fsgs));
247  return NULL;
248 
249  }
250  ptmr_init(&fsgs->perf);
251 
252  return ps_search_base(fsgs);
253 }
254 
255 void
256 fsg_search_free(ps_search_t *search)
257 {
258  fsg_search_t *fsgs = (fsg_search_t *)search;
259 
260  double n_speech = (double)fsgs->n_tot_frame
261  / cmd_ln_int32_r(ps_search_config(fsgs), "-frate");
262 
263  E_INFO("TOTAL fsg %.2f CPU %.3f xRT\n",
264  fsgs->perf.t_tot_cpu,
265  fsgs->perf.t_tot_cpu / n_speech);
266  E_INFO("TOTAL fsg %.2f wall %.3f xRT\n",
267  fsgs->perf.t_tot_elapsed,
268  fsgs->perf.t_tot_elapsed / n_speech);
269 
270  ps_search_base_free(search);
271  fsg_lextree_free(fsgs->lextree);
272  if (fsgs->history) {
273  fsg_history_reset(fsgs->history);
274  fsg_history_set_fsg(fsgs->history, NULL, NULL);
275  fsg_history_free(fsgs->history);
276  }
277  hmm_context_free(fsgs->hmmctx);
278  fsg_model_free(fsgs->fsg);
279  ckd_free(fsgs);
280 }
281 
282 int
283 fsg_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
284 {
285  fsg_search_t *fsgs = (fsg_search_t *)search;
286 
287  /* Free the old lextree */
288  if (fsgs->lextree)
289  fsg_lextree_free(fsgs->lextree);
290 
291  /* Free old dict2pid, dict */
292  ps_search_base_reinit(search, dict, d2p);
293 
294  /* Update the number of words (not used by this module though). */
295  search->n_words = dict_size(dict);
296 
297  /* Allocate new lextree for the given FSG */
298  fsgs->lextree = fsg_lextree_init(fsgs->fsg, dict, d2p,
299  ps_search_acmod(fsgs)->mdef,
300  fsgs->hmmctx, fsgs->wip, fsgs->pip);
301 
302  /* Inform the history module of the new fsg */
303  fsg_history_set_fsg(fsgs->history, fsgs->fsg, dict);
304 
305  return 0;
306 }
307 
308 
309 static void
310 fsg_search_sen_active(fsg_search_t *fsgs)
311 {
312  gnode_t *gn;
313  fsg_pnode_t *pnode;
314  hmm_t *hmm;
315 
316  acmod_clear_active(ps_search_acmod(fsgs));
317 
318  for (gn = fsgs->pnode_active; gn; gn = gnode_next(gn)) {
319  pnode = (fsg_pnode_t *) gnode_ptr(gn);
320  hmm = fsg_pnode_hmmptr(pnode);
321  assert(hmm_frame(hmm) == fsgs->frame);
322  acmod_activate_hmm(ps_search_acmod(fsgs), hmm);
323  }
324 }
325 
326 
327 /*
328  * Evaluate all the active HMMs.
329  * (Executed once per frame.)
330  */
331 static void
332 fsg_search_hmm_eval(fsg_search_t *fsgs)
333 {
334  gnode_t *gn;
335  fsg_pnode_t *pnode;
336  hmm_t *hmm;
337  int32 bestscore;
338  int32 n, maxhmmpf;
339 
340  bestscore = WORST_SCORE;
341 
342  if (!fsgs->pnode_active) {
343  E_ERROR("Frame %d: No active HMM!!\n", fsgs->frame);
344  return;
345  }
346 
347  for (n = 0, gn = fsgs->pnode_active; gn; gn = gnode_next(gn), n++) {
348  int32 score;
349 
350  pnode = (fsg_pnode_t *) gnode_ptr(gn);
351  hmm = fsg_pnode_hmmptr(pnode);
352  assert(hmm_frame(hmm) == fsgs->frame);
353 
354 #if __FSG_DBG__
355  E_INFO("pnode(%08x) active @frm %5d\n", (int32) pnode,
356  fsgs->frame);
357  hmm_dump(hmm, stdout);
358 #endif
359  score = hmm_vit_eval(hmm);
360 #if __FSG_DBG_CHAN__
361  E_INFO("pnode(%08x) after eval @frm %5d\n",
362  (int32) pnode, fsgs->frame);
363  hmm_dump(hmm, stdout);
364 #endif
365 
366  if (score BETTER_THAN bestscore)
367  bestscore = score;
368  }
369 
370 #if __FSG_DBG__
371  E_INFO("[%5d] %6d HMM; bestscr: %11d\n", fsgs->frame, n, bestscore);
372 #endif
373  fsgs->n_hmm_eval += n;
374 
375  /* Adjust beams if #active HMMs larger than absolute threshold */
376  maxhmmpf = cmd_ln_int32_r(ps_search_config(fsgs), "-maxhmmpf");
377  if (maxhmmpf != -1 && n > maxhmmpf) {
378  /*
379  * Too many HMMs active; reduce the beam factor applied to the default
380  * beams, but not if the factor is already at a floor (0.1).
381  */
382  if (fsgs->beam_factor > 0.1) { /* Hack!! Hardwired constant 0.1 */
383  fsgs->beam_factor *= 0.9f; /* Hack!! Hardwired constant 0.9 */
384  fsgs->beam =
385  (int32) (fsgs->beam_orig * fsgs->beam_factor);
386  fsgs->pbeam =
387  (int32) (fsgs->pbeam_orig * fsgs->beam_factor);
388  fsgs->wbeam =
389  (int32) (fsgs->wbeam_orig * fsgs->beam_factor);
390  }
391  }
392  else {
393  fsgs->beam_factor = 1.0f;
394  fsgs->beam = fsgs->beam_orig;
395  fsgs->pbeam = fsgs->pbeam_orig;
396  fsgs->wbeam = fsgs->wbeam_orig;
397  }
398 
399  if (n > fsg_lextree_n_pnode(fsgs->lextree))
400  E_FATAL("PANIC! Frame %d: #HMM evaluated(%d) > #PNodes(%d)\n",
401  fsgs->frame, n, fsg_lextree_n_pnode(fsgs->lextree));
402 
403  fsgs->bestscore = bestscore;
404 }
405 
406 
407 static void
408 fsg_search_pnode_trans(fsg_search_t *fsgs, fsg_pnode_t * pnode)
409 {
410  fsg_pnode_t *child;
411  hmm_t *hmm;
412  int32 newscore, thresh, nf;
413 
414  assert(pnode);
415  assert(!fsg_pnode_leaf(pnode));
416 
417  nf = fsgs->frame + 1;
418  thresh = fsgs->bestscore + fsgs->beam;
419 
420  hmm = fsg_pnode_hmmptr(pnode);
421 
422  for (child = fsg_pnode_succ(pnode);
423  child; child = fsg_pnode_sibling(child)) {
424  newscore = hmm_out_score(hmm) + child->logs2prob;
425 
426  if ((newscore BETTER_THAN thresh)
427  && (newscore BETTER_THAN hmm_in_score(&child->hmm))) {
428  /* Incoming score > pruning threshold and > target's existing score */
429  if (hmm_frame(&child->hmm) < nf) {
430  /* Child node not yet activated; do so */
431  fsgs->pnode_active_next =
432  glist_add_ptr(fsgs->pnode_active_next,
433  (void *) child);
434  }
435 
436  hmm_enter(&child->hmm, newscore, hmm_out_history(hmm), nf);
437  }
438  }
439 }
440 
441 
442 static void
443 fsg_search_pnode_exit(fsg_search_t *fsgs, fsg_pnode_t * pnode)
444 {
445  hmm_t *hmm;
446  fsg_link_t *fl;
447  int32 wid;
448  fsg_pnode_ctxt_t ctxt;
449 
450  assert(pnode);
451  assert(fsg_pnode_leaf(pnode));
452 
453  hmm = fsg_pnode_hmmptr(pnode);
454  fl = fsg_pnode_fsglink(pnode);
455  assert(fl);
456 
457  wid = fsg_link_wid(fl);
458  assert(wid >= 0);
459 
460 #if __FSG_DBG__
461  E_INFO("[%5d] Exit(%08x) %10d(score) %5d(pred)\n",
462  fsgs->frame, (int32) pnode,
463  hmm_out_score(hmm), hmm_out_history(hmm));
464 #endif
465 
466  /*
467  * Check if this is filler or single phone word; these do not model right
468  * context (i.e., the exit score applies to all right contexts).
469  */
470  if (fsg_model_is_filler(fsgs->fsg, wid)
471  /* FIXME: This might be slow due to repeated calls to dict_to_id(). */
472  || (dict_is_single_phone(ps_search_dict(fsgs),
473  dict_wordid(ps_search_dict(fsgs),
474  fsg_model_word_str(fsgs->fsg, wid))))) {
475  /* Create a dummy context structure that applies to all right contexts */
476  fsg_pnode_add_all_ctxt(&ctxt);
477 
478  /* Create history table entry for this word exit */
479  fsg_history_entry_add(fsgs->history,
480  fl,
481  fsgs->frame,
482  hmm_out_score(hmm),
483  hmm_out_history(hmm),
484  pnode->ci_ext, ctxt);
485 
486  }
487  else {
488  /* Create history table entry for this word exit */
489  fsg_history_entry_add(fsgs->history,
490  fl,
491  fsgs->frame,
492  hmm_out_score(hmm),
493  hmm_out_history(hmm),
494  pnode->ci_ext, pnode->ctxt);
495  }
496 }
497 
498 
499 /*
500  * (Beam) prune the just evaluated HMMs, determine which ones remain
501  * active, which ones transition to successors, which ones exit and
502  * terminate in their respective destination FSM states.
503  * (Executed once per frame.)
504  */
505 static void
506 fsg_search_hmm_prune_prop(fsg_search_t *fsgs)
507 {
508  gnode_t *gn;
509  fsg_pnode_t *pnode;
510  hmm_t *hmm;
511  int32 thresh, word_thresh, phone_thresh;
512 
513  assert(fsgs->pnode_active_next == NULL);
514 
515  thresh = fsgs->bestscore + fsgs->beam;
516  phone_thresh = fsgs->bestscore + fsgs->pbeam;
517  word_thresh = fsgs->bestscore + fsgs->wbeam;
518 
519  for (gn = fsgs->pnode_active; gn; gn = gnode_next(gn)) {
520  pnode = (fsg_pnode_t *) gnode_ptr(gn);
521  hmm = fsg_pnode_hmmptr(pnode);
522 
523  if (hmm_bestscore(hmm) >= thresh) {
524  /* Keep this HMM active in the next frame */
525  if (hmm_frame(hmm) == fsgs->frame) {
526  hmm_frame(hmm) = fsgs->frame + 1;
527  fsgs->pnode_active_next =
528  glist_add_ptr(fsgs->pnode_active_next,
529  (void *) pnode);
530  }
531  else {
532  assert(hmm_frame(hmm) == fsgs->frame + 1);
533  }
534 
535  if (!fsg_pnode_leaf(pnode)) {
536  if (hmm_out_score(hmm) >= phone_thresh) {
537  /* Transition out of this phone into its children */
538  fsg_search_pnode_trans(fsgs, pnode);
539  }
540  }
541  else {
542  if (hmm_out_score(hmm) >= word_thresh) {
543  /* Transition out of leaf node into destination FSG state */
544  fsg_search_pnode_exit(fsgs, pnode);
545  }
546  }
547  }
548  }
549 }
550 
551 
552 /*
553  * Propagate newly created history entries through null transitions.
554  */
555 static void
556 fsg_search_null_prop(fsg_search_t *fsgs)
557 {
558  int32 bpidx, n_entries, thresh, newscore;
559  fsg_hist_entry_t *hist_entry;
560  fsg_link_t *l;
561  int32 s;
562  fsg_model_t *fsg;
563 
564  fsg = fsgs->fsg;
565  thresh = fsgs->bestscore + fsgs->wbeam; /* Which beam really?? */
566 
567  n_entries = fsg_history_n_entries(fsgs->history);
568 
569  for (bpidx = fsgs->bpidx_start; bpidx < n_entries; bpidx++) {
570  fsg_arciter_t *itor;
571  hist_entry = fsg_history_entry_get(fsgs->history, bpidx);
572 
573  l = fsg_hist_entry_fsglink(hist_entry);
574 
575  /* Destination FSG state for history entry */
576  s = l ? fsg_link_to_state(l) : fsg_model_start_state(fsg);
577 
578  /*
579  * Check null transitions from d to all other states. (Only need to
580  * propagate one step, since FSG contains transitive closure of null
581  * transitions.)
582  */
583  /* Add all links from from_state to dst */
584  for (itor = fsg_model_arcs(fsg, s); itor;
585  itor = fsg_arciter_next(itor)) {
586  fsg_link_t *l = fsg_arciter_get(itor);
587 
588  /* FIXME: Need to deal with tag transitions somehow. */
589  if (fsg_link_wid(l) != -1)
590  continue;
591  newscore =
592  fsg_hist_entry_score(hist_entry) +
593  (fsg_link_logs2prob(l) >> SENSCR_SHIFT);
594 
595  if (newscore >= thresh) {
596  fsg_history_entry_add(fsgs->history, l,
597  fsg_hist_entry_frame(hist_entry),
598  newscore,
599  bpidx,
600  fsg_hist_entry_lc(hist_entry),
601  fsg_hist_entry_rc(hist_entry));
602  }
603  }
604  }
605 }
606 
607 
608 /*
609  * Perform cross-word transitions; propagate each history entry created in this
610  * frame to lextree roots attached to the target FSG state for that entry.
611  */
612 static void
613 fsg_search_word_trans(fsg_search_t *fsgs)
614 {
615  int32 bpidx, n_entries;
616  fsg_hist_entry_t *hist_entry;
617  fsg_link_t *l;
618  int32 score, newscore, thresh, nf, d;
619  fsg_pnode_t *root;
620  int32 lc, rc;
621 
622  n_entries = fsg_history_n_entries(fsgs->history);
623 
624  thresh = fsgs->bestscore + fsgs->beam;
625  nf = fsgs->frame + 1;
626 
627  for (bpidx = fsgs->bpidx_start; bpidx < n_entries; bpidx++) {
628  hist_entry = fsg_history_entry_get(fsgs->history, bpidx);
629  assert(hist_entry);
630  score = fsg_hist_entry_score(hist_entry);
631  assert(fsgs->frame == fsg_hist_entry_frame(hist_entry));
632 
633  l = fsg_hist_entry_fsglink(hist_entry);
634 
635  /* Destination state for hist_entry */
636  d = l ? fsg_link_to_state(l) : fsg_model_start_state(fsgs->
637  fsg);
638 
639  lc = fsg_hist_entry_lc(hist_entry);
640 
641  /* Transition to all root nodes attached to state d */
642  for (root = fsg_lextree_root(fsgs->lextree, d);
643  root; root = root->sibling) {
644  rc = root->ci_ext;
645 
646  if ((root->ctxt.bv[lc >> 5] & (1 << (lc & 0x001f))) &&
647  (hist_entry->rc.bv[rc >> 5] & (1 << (rc & 0x001f)))) {
648  /*
649  * Last CIphone of history entry is in left-context list supported by
650  * target root node, and
651  * first CIphone of target root node is in right context list supported
652  * by history entry;
653  * So the transition can go ahead (if new score is good enough).
654  */
655  newscore = score + root->logs2prob;
656 
657  if ((newscore BETTER_THAN thresh)
658  && (newscore BETTER_THAN hmm_in_score(&root->hmm))) {
659  if (hmm_frame(&root->hmm) < nf) {
660  /* Newly activated node; add to active list */
661  fsgs->pnode_active_next =
662  glist_add_ptr(fsgs->pnode_active_next,
663  (void *) root);
664 #if __FSG_DBG__
665  E_INFO
666  ("[%5d] WordTrans bpidx[%d] -> pnode[%08x] (activated)\n",
667  fsgs->frame, bpidx, (int32) root);
668 #endif
669  }
670  else {
671 #if __FSG_DBG__
672  E_INFO
673  ("[%5d] WordTrans bpidx[%d] -> pnode[%08x]\n",
674  fsgs->frame, bpidx, (int32) root);
675 #endif
676  }
677 
678  hmm_enter(&root->hmm, newscore, bpidx, nf);
679  }
680  }
681  }
682  }
683 }
684 
685 
686 int
687 fsg_search_step(ps_search_t *search, int frame_idx)
688 {
689  fsg_search_t *fsgs = (fsg_search_t *)search;
690  int16 const *senscr;
691  acmod_t *acmod = search->acmod;
692  gnode_t *gn;
693  fsg_pnode_t *pnode;
694  hmm_t *hmm;
695 
696  /* Activate our HMMs for the current frame if need be. */
697  if (!acmod->compallsen)
698  fsg_search_sen_active(fsgs);
699  /* Compute GMM scores for the current frame. */
700  senscr = acmod_score(acmod, &frame_idx);
701  fsgs->n_sen_eval += acmod->n_senone_active;
702  hmm_context_set_senscore(fsgs->hmmctx, senscr);
703 
704  /* Mark backpointer table for current frame. */
705  fsgs->bpidx_start = fsg_history_n_entries(fsgs->history);
706 
707  /* Evaluate all active pnodes (HMMs) */
708  fsg_search_hmm_eval(fsgs);
709 
710  /*
711  * Prune and propagate the HMMs evaluated; create history entries for
712  * word exits. The words exits are tentative, and may be pruned; make
713  * the survivors permanent via fsg_history_end_frame().
714  */
715  fsg_search_hmm_prune_prop(fsgs);
716  fsg_history_end_frame(fsgs->history);
717 
718  /*
719  * Propagate new history entries through any null transitions, creating
720  * new history entries, and then make the survivors permanent.
721  */
722  fsg_search_null_prop(fsgs);
723  fsg_history_end_frame(fsgs->history);
724 
725  /*
726  * Perform cross-word transitions; propagate each history entry across its
727  * terminating state to the root nodes of the lextree attached to the state.
728  */
729  fsg_search_word_trans(fsgs);
730 
731  /*
732  * We've now come full circle, HMM and FSG states have been updated for
733  * the next frame.
734  * Update the active lists, deactivate any currently active HMMs that
735  * did not survive into the next frame
736  */
737  for (gn = fsgs->pnode_active; gn; gn = gnode_next(gn)) {
738  pnode = (fsg_pnode_t *) gnode_ptr(gn);
739  hmm = fsg_pnode_hmmptr(pnode);
740 
741  if (hmm_frame(hmm) == fsgs->frame) {
742  /* This HMM NOT activated for the next frame; reset it */
744  }
745  else {
746  assert(hmm_frame(hmm) == (fsgs->frame + 1));
747  }
748  }
749 
750  /* Free the currently active list */
751  glist_free(fsgs->pnode_active);
752 
753  /* Make the next-frame active list the current one */
754  fsgs->pnode_active = fsgs->pnode_active_next;
755  fsgs->pnode_active_next = NULL;
756 
757  /* End of this frame; ready for the next */
758  ++fsgs->frame;
759 
760  return 1;
761 }
762 
763 
764 /*
765  * Set all HMMs to inactive, clear active lists, initialize FSM start
766  * state to be the only active node.
767  * (Executed at the start of each utterance.)
768  */
769 int
770 fsg_search_start(ps_search_t *search)
771 {
772  fsg_search_t *fsgs = (fsg_search_t *)search;
773  int32 silcipid;
774  fsg_pnode_ctxt_t ctxt;
775 
776  /* Reset dynamic adjustment factor for beams */
777  fsgs->beam_factor = 1.0f;
778  fsgs->beam = fsgs->beam_orig;
779  fsgs->pbeam = fsgs->pbeam_orig;
780  fsgs->wbeam = fsgs->wbeam_orig;
781 
782  silcipid = bin_mdef_ciphone_id(ps_search_acmod(fsgs)->mdef, "SIL");
783 
784  /* Initialize EVERYTHING to be inactive */
785  assert(fsgs->pnode_active == NULL);
786  assert(fsgs->pnode_active_next == NULL);
787 
788  fsg_history_reset(fsgs->history);
789  fsg_history_utt_start(fsgs->history);
790  fsgs->final = FALSE;
791 
792  /* Dummy context structure that allows all right contexts to use this entry */
793  fsg_pnode_add_all_ctxt(&ctxt);
794 
795  /* Create dummy history entry leading to start state */
796  fsgs->frame = -1;
797  fsgs->bestscore = 0;
798  fsg_history_entry_add(fsgs->history,
799  NULL, -1, 0, -1, silcipid, ctxt);
800  fsgs->bpidx_start = 0;
801 
802  /* Propagate dummy history entry through NULL transitions from start state */
803  fsg_search_null_prop(fsgs);
804 
805  /* Perform word transitions from this dummy history entry */
806  fsg_search_word_trans(fsgs);
807 
808  /* Make the next-frame active list the current one */
809  fsgs->pnode_active = fsgs->pnode_active_next;
810  fsgs->pnode_active_next = NULL;
811 
812  ++fsgs->frame;
813 
814  fsgs->n_hmm_eval = 0;
815  fsgs->n_sen_eval = 0;
816 
817  ptmr_reset(&fsgs->perf);
818  ptmr_start(&fsgs->perf);
819 
820  return 0;
821 }
822 
823 /*
824  * Cleanup at the end of each utterance.
825  */
826 int
827 fsg_search_finish(ps_search_t *search)
828 {
829  fsg_search_t *fsgs = (fsg_search_t *)search;
830  gnode_t *gn;
831  fsg_pnode_t *pnode;
832  int32 n_hist, cf;
833 
834  /* Deactivate all nodes in the current and next-frame active lists */
835  for (gn = fsgs->pnode_active; gn; gn = gnode_next(gn)) {
836  pnode = (fsg_pnode_t *) gnode_ptr(gn);
838  }
839  for (gn = fsgs->pnode_active_next; gn; gn = gnode_next(gn)) {
840  pnode = (fsg_pnode_t *) gnode_ptr(gn);
842  }
843 
844  glist_free(fsgs->pnode_active);
845  fsgs->pnode_active = NULL;
846  glist_free(fsgs->pnode_active_next);
847  fsgs->pnode_active_next = NULL;
848 
849  fsgs->final = TRUE;
850 
851  n_hist = fsg_history_n_entries(fsgs->history);
852  fsgs->n_tot_frame += fsgs->frame;
853  E_INFO
854  ("%d frames, %d HMMs (%d/fr), %d senones (%d/fr), %d history entries (%d/fr)\n\n",
855  fsgs->frame, fsgs->n_hmm_eval,
856  (fsgs->frame > 0) ? fsgs->n_hmm_eval / fsgs->frame : 0,
857  fsgs->n_sen_eval,
858  (fsgs->frame > 0) ? fsgs->n_sen_eval / fsgs->frame : 0,
859  n_hist, (fsgs->frame > 0) ? n_hist / fsgs->frame : 0);
860 
861  /* Print out some statistics. */
862  ptmr_stop(&fsgs->perf);
863  /* This is the number of frames processed. */
864  cf = ps_search_acmod(fsgs)->output_frame;
865  if (cf > 0) {
866  double n_speech = (double) (cf + 1)
867  / cmd_ln_int32_r(ps_search_config(fsgs), "-frate");
868  E_INFO("fsg %.2f CPU %.3f xRT\n",
869  fsgs->perf.t_cpu, fsgs->perf.t_cpu / n_speech);
870  E_INFO("fsg %.2f wall %.3f xRT\n",
871  fsgs->perf.t_elapsed, fsgs->perf.t_elapsed / n_speech);
872  }
873 
874 
875  return 0;
876 }
877 
878 static int
879 fsg_search_find_exit(fsg_search_t *fsgs, int frame_idx, int final, int32 *out_score)
880 {
881  fsg_hist_entry_t *hist_entry = NULL;
882  fsg_model_t *fsg;
883  int bpidx, frm, last_frm, besthist;
884  int32 bestscore;
885 
886  if (frame_idx == -1)
887  frame_idx = fsgs->frame - 1;
888  last_frm = frm = frame_idx;
889 
890  /* Scan backwards to find a word exit in frame_idx. */
891  bpidx = fsg_history_n_entries(fsgs->history) - 1;
892  while (bpidx > 0) {
893  hist_entry = fsg_history_entry_get(fsgs->history, bpidx);
894  if (fsg_hist_entry_frame(hist_entry) <= frame_idx) {
895  frm = last_frm = fsg_hist_entry_frame(hist_entry);
896  break;
897  }
898  bpidx--;
899  }
900 
901  /* No hypothesis (yet). */
902  if (bpidx <= 0)
903  return bpidx;
904 
905  /* Now find best word exit in this frame. */
906  bestscore = INT_MIN;
907  besthist = -1;
908  fsg = fsgs->fsg;
909  while (frm == last_frm) {
910  fsg_link_t *fl;
911  int32 score;
912 
913  fl = fsg_hist_entry_fsglink(hist_entry);
914  score = fsg_hist_entry_score(hist_entry);
915 
916  if (fl == NULL)
917  break;
918 
919  /* Prefer final hypothesis */
920  if (score == bestscore && fsg_link_to_state(fl) == fsg_model_final_state(fsg)) {
921  besthist = bpidx;
922  } else if (score BETTER_THAN bestscore) {
923  /* Only enforce the final state constraint if this is a final hypothesis. */
924  if ((!final)
925  || fsg_link_to_state(fl) == fsg_model_final_state(fsg)) {
926  bestscore = score;
927  besthist = bpidx;
928  }
929  }
930 
931  --bpidx;
932  if (bpidx < 0)
933  break;
934  hist_entry = fsg_history_entry_get(fsgs->history, bpidx);
935  frm = fsg_hist_entry_frame(hist_entry);
936  }
937 
938  /* Final state not reached. */
939  if (besthist == -1) {
940  E_ERROR("Final result does not match the grammar in frame %d\n", frame_idx);
941  return -1;
942  }
943 
944  /* This here's the one we want. */
945  if (out_score)
946  *out_score = bestscore;
947 
948  return besthist;
949 }
950 
951 /* FIXME: Mostly duplicated with ngram_search_bestpath(). */
952 static ps_latlink_t *
953 fsg_search_bestpath(ps_search_t *search, int32 *out_score, int backward)
954 {
955  fsg_search_t *fsgs = (fsg_search_t *)search;
956 
957  if (search->last_link == NULL) {
958  search->last_link = ps_lattice_bestpath(search->dag, NULL,
959  1.0, fsgs->ascale);
960  if (search->last_link == NULL)
961  return NULL;
962  /* Also calculate betas so we can fill in the posterior
963  * probability field in the segmentation. */
964  if (search->post == 0)
965  search->post = ps_lattice_posterior(search->dag, NULL, fsgs->ascale);
966  }
967  if (out_score)
968  *out_score = search->last_link->path_scr + search->dag->final_node_ascr;
969  return search->last_link;
970 }
971 
972 char const *
973 fsg_search_hyp(ps_search_t *search, int32 *out_score)
974 {
975  fsg_search_t *fsgs = (fsg_search_t *)search;
976  dict_t *dict = ps_search_dict(search);
977  char *c;
978  size_t len;
979  int bp, bpidx;
980 
981  /* Get last backpointer table index. */
982  bpidx = fsg_search_find_exit(fsgs, fsgs->frame, fsgs->final, out_score);
983  /* No hypothesis (yet). */
984  if (bpidx <= 0) {
985  return NULL;
986  }
987 
988  /* If bestpath is enabled and the utterance is complete, then run it. */
989  if (fsgs->bestpath && fsgs->final) {
990  ps_lattice_t *dag;
991  ps_latlink_t *link;
992 
993  if ((dag = fsg_search_lattice(search)) == NULL) {
994  E_WARN("Failed to obtain the lattice while bestpath enabled\n");
995  return NULL;
996  }
997  if ((link = fsg_search_bestpath(search, out_score, FALSE)) == NULL) {
998  E_WARN("Failed to find the bestpath in a lattice\n");
999  return NULL;
1000  }
1001  return ps_lattice_hyp(dag, link);
1002  }
1003 
1004  bp = bpidx;
1005  len = 0;
1006  while (bp > 0) {
1007  fsg_hist_entry_t *hist_entry = fsg_history_entry_get(fsgs->history, bp);
1008  fsg_link_t *fl = fsg_hist_entry_fsglink(hist_entry);
1009  char const *baseword;
1010  int32 wid;
1011 
1012  bp = fsg_hist_entry_pred(hist_entry);
1013  wid = fsg_link_wid(fl);
1014  if (wid < 0 || fsg_model_is_filler(fsgs->fsg, wid))
1015  continue;
1016  baseword = dict_basestr(dict,
1017  dict_wordid(dict,
1018  fsg_model_word_str(fsgs->fsg, wid)));
1019  len += strlen(baseword) + 1;
1020  }
1021 
1022  ckd_free(search->hyp_str);
1023  if (len == 0) {
1024  search->hyp_str = NULL;
1025  return search->hyp_str;
1026  }
1027  search->hyp_str = ckd_calloc(1, len);
1028 
1029  bp = bpidx;
1030  c = search->hyp_str + len - 1;
1031  while (bp > 0) {
1032  fsg_hist_entry_t *hist_entry = fsg_history_entry_get(fsgs->history, bp);
1033  fsg_link_t *fl = fsg_hist_entry_fsglink(hist_entry);
1034  char const *baseword;
1035  int32 wid;
1036 
1037  bp = fsg_hist_entry_pred(hist_entry);
1038  wid = fsg_link_wid(fl);
1039  if (wid < 0 || fsg_model_is_filler(fsgs->fsg, wid))
1040  continue;
1041  baseword = dict_basestr(dict,
1042  dict_wordid(dict,
1043  fsg_model_word_str(fsgs->fsg, wid)));
1044  len = strlen(baseword);
1045  c -= len;
1046  memcpy(c, baseword, len);
1047  if (c > search->hyp_str) {
1048  --c;
1049  *c = ' ';
1050  }
1051  }
1052 
1053  return search->hyp_str;
1054 }
1055 
1056 static void
1057 fsg_seg_bp2itor(ps_seg_t *seg, fsg_hist_entry_t *hist_entry)
1058 {
1059  fsg_search_t *fsgs = (fsg_search_t *)seg->search;
1060  fsg_hist_entry_t *ph = NULL;
1061  int32 bp;
1062 
1063  if ((bp = fsg_hist_entry_pred(hist_entry)) >= 0)
1064  ph = fsg_history_entry_get(fsgs->history, bp);
1065  seg->word = fsg_model_word_str(fsgs->fsg, hist_entry->fsglink->wid);
1066  seg->ef = fsg_hist_entry_frame(hist_entry);
1067  seg->sf = ph ? fsg_hist_entry_frame(ph) + 1 : 0;
1068  /* This is kind of silly but it happens for null transitions. */
1069  if (seg->sf > seg->ef) seg->sf = seg->ef;
1070  seg->prob = 0; /* Bogus value... */
1071  /* "Language model" score = transition probability. */
1072  seg->lback = 1;
1073  seg->lscr = fsg_link_logs2prob(hist_entry->fsglink) >> SENSCR_SHIFT;
1074  if (ph) {
1075  /* FIXME: Not sure exactly how cross-word triphones are handled. */
1076  seg->ascr = hist_entry->score - ph->score - seg->lscr;
1077  }
1078  else
1079  seg->ascr = hist_entry->score - seg->lscr;
1080 }
1081 
1082 
1083 static void
1084 fsg_seg_free(ps_seg_t *seg)
1085 {
1086  fsg_seg_t *itor = (fsg_seg_t *)seg;
1087  ckd_free(itor->hist);
1088  ckd_free(itor);
1089 }
1090 
1091 static ps_seg_t *
1092 fsg_seg_next(ps_seg_t *seg)
1093 {
1094  fsg_seg_t *itor = (fsg_seg_t *)seg;
1095 
1096  if (++itor->cur == itor->n_hist) {
1097  fsg_seg_free(seg);
1098  return NULL;
1099  }
1100 
1101  fsg_seg_bp2itor(seg, itor->hist[itor->cur]);
1102  return seg;
1103 }
1104 
1105 static ps_segfuncs_t fsg_segfuncs = {
1106  /* seg_next */ fsg_seg_next,
1107  /* seg_free */ fsg_seg_free
1108 };
1109 
1110 static ps_seg_t *
1111 fsg_search_seg_iter(ps_search_t *search)
1112 {
1113  fsg_search_t *fsgs = (fsg_search_t *)search;
1114  fsg_seg_t *itor;
1115  int32 out_score;
1116  int bp, bpidx, cur;
1117 
1118  bpidx = fsg_search_find_exit(fsgs, fsgs->frame, fsgs->final, &out_score);
1119  /* No hypothesis (yet). */
1120  if (bpidx <= 0)
1121  return NULL;
1122 
1123  /* If bestpath is enabled and the utterance is complete, then run it. */
1124  if (fsgs->bestpath && fsgs->final) {
1125  ps_lattice_t *dag;
1126  ps_latlink_t *link;
1127 
1128  if ((dag = fsg_search_lattice(search)) == NULL)
1129  return NULL;
1130  if ((link = fsg_search_bestpath(search, &out_score, TRUE)) == NULL)
1131  return NULL;
1132  return ps_lattice_seg_iter(dag, link, 1.0);
1133  }
1134 
1135  /* Calling this an "iterator" is a bit of a misnomer since we have
1136  * to get the entire backtrace in order to produce it. On the
1137  * other hand, all we actually need is the bptbl IDs, and we can
1138  * allocate a fixed-size array of them. */
1139  itor = ckd_calloc(1, sizeof(*itor));
1140  itor->base.vt = &fsg_segfuncs;
1141  itor->base.search = search;
1142  itor->base.lwf = 1.0;
1143  itor->n_hist = 0;
1144  bp = bpidx;
1145  while (bp > 0) {
1146  fsg_hist_entry_t *hist_entry = fsg_history_entry_get(fsgs->history, bp);
1147  bp = fsg_hist_entry_pred(hist_entry);
1148  ++itor->n_hist;
1149  }
1150  if (itor->n_hist == 0) {
1151  ckd_free(itor);
1152  return NULL;
1153  }
1154  itor->hist = ckd_calloc(itor->n_hist, sizeof(*itor->hist));
1155  cur = itor->n_hist - 1;
1156  bp = bpidx;
1157  while (bp > 0) {
1158  fsg_hist_entry_t *hist_entry = fsg_history_entry_get(fsgs->history, bp);
1159  itor->hist[cur] = hist_entry;
1160  bp = fsg_hist_entry_pred(hist_entry);
1161  --cur;
1162  }
1163 
1164  /* Fill in relevant fields for first element. */
1165  fsg_seg_bp2itor((ps_seg_t *)itor, itor->hist[0]);
1166 
1167  return (ps_seg_t *)itor;
1168 }
1169 
1170 static int
1171 fsg_search_prob(ps_search_t *search)
1172 {
1173  fsg_search_t *fsgs = (fsg_search_t *)search;
1174 
1175  /* If bestpath is enabled and the utterance is complete, then run it. */
1176  if (fsgs->bestpath && fsgs->final) {
1177  ps_lattice_t *dag;
1178  ps_latlink_t *link;
1179 
1180  if ((dag = fsg_search_lattice(search)) == NULL)
1181  return 0;
1182  if ((link = fsg_search_bestpath(search, NULL, TRUE)) == NULL)
1183  return 0;
1184  return search->post;
1185  }
1186  else {
1187  /* FIXME: Give some kind of good estimate here, eventually. */
1188  return 0;
1189  }
1190 }
1191 
1192 static ps_latnode_t *
1193 find_node(ps_lattice_t *dag, fsg_model_t *fsg, int sf, int32 wid, int32 node_id)
1194 {
1195  ps_latnode_t *node;
1196 
1197  for (node = dag->nodes; node; node = node->next)
1198  if ((node->sf == sf) && (node->wid == wid) && (node->node_id == node_id))
1199  break;
1200  return node;
1201 }
1202 
1203 static ps_latnode_t *
1204 new_node(ps_lattice_t *dag, fsg_model_t *fsg, int sf, int ef, int32 wid, int32 node_id, int32 ascr)
1205 {
1206  ps_latnode_t *node;
1207 
1208  node = find_node(dag, fsg, sf, wid, node_id);
1209 
1210  if (node) {
1211  /* Update end frames. */
1212  if (node->lef == -1 || node->lef < ef)
1213  node->lef = ef;
1214  if (node->fef == -1 || node->fef > ef)
1215  node->fef = ef;
1216  /* Update best link score. */
1217  if (ascr BETTER_THAN node->info.best_exit)
1218  node->info.best_exit = ascr;
1219  }
1220  else {
1221  /* New node; link to head of list */
1222  node = listelem_malloc(dag->latnode_alloc);
1223  node->wid = wid;
1224  node->sf = sf;
1225  node->fef = node->lef = ef;
1226  node->reachable = FALSE;
1227  node->entries = NULL;
1228  node->exits = NULL;
1229  node->info.best_exit = ascr;
1230  node->node_id = node_id;
1231 
1232  node->next = dag->nodes;
1233  dag->nodes = node;
1234  ++dag->n_nodes;
1235  }
1236 
1237  return node;
1238 }
1239 
1240 static ps_latnode_t *
1241 find_start_node(fsg_search_t *fsgs, ps_lattice_t *dag)
1242 {
1243  ps_latnode_t *node;
1244  glist_t start = NULL;
1245  int nstart = 0;
1246 
1247  /* Look for all nodes starting in frame zero with some exits. */
1248  for (node = dag->nodes; node; node = node->next) {
1249  if (node->sf == 0 && node->exits) {
1250  E_INFO("Start node %s.%d:%d:%d\n",
1251  fsg_model_word_str(fsgs->fsg, node->wid),
1252  node->sf, node->fef, node->lef);
1253  start = glist_add_ptr(start, node);
1254  ++nstart;
1255  }
1256  }
1257 
1258  /* If there was more than one start node candidate, then we need
1259  * to create an artificial start node with epsilon transitions to
1260  * all of them. */
1261  if (nstart == 1) {
1262  node = gnode_ptr(start);
1263  }
1264  else {
1265  gnode_t *st;
1266  int wid;
1267 
1268  wid = fsg_model_word_add(fsgs->fsg, "<s>");
1269  if (fsgs->fsg->silwords)
1270  bitvec_set(fsgs->fsg->silwords, wid);
1271  node = new_node(dag, fsgs->fsg, 0, 0, wid, -1, 0);
1272  for (st = start; st; st = gnode_next(st))
1273  ps_lattice_link(dag, node, gnode_ptr(st), 0, 0);
1274  }
1275  glist_free(start);
1276  return node;
1277 }
1278 
1279 static ps_latnode_t *
1280 find_end_node(fsg_search_t *fsgs, ps_lattice_t *dag)
1281 {
1282  ps_latnode_t *node;
1283  glist_t end = NULL;
1284  int nend = 0;
1285 
1286  /* Look for all nodes ending in last frame with some entries. */
1287  for (node = dag->nodes; node; node = node->next) {
1288  if (node->lef == dag->n_frames - 1 && node->entries) {
1289  E_INFO("End node %s.%d:%d:%d (%d)\n",
1290  fsg_model_word_str(fsgs->fsg, node->wid),
1291  node->sf, node->fef, node->lef, node->info.best_exit);
1292  end = glist_add_ptr(end, node);
1293  ++nend;
1294  }
1295  }
1296 
1297  if (nend == 1) {
1298  node = gnode_ptr(end);
1299  }
1300  else if (nend == 0) {
1301  ps_latnode_t *last = NULL;
1302  int ef = 0;
1303 
1304  /* If there were no end node candidates, then just use the
1305  * node with the last exit frame. */
1306  for (node = dag->nodes; node; node = node->next) {
1307  if (node->lef > ef && node->entries) {
1308  last = node;
1309  ef = node->lef;
1310  }
1311  }
1312  node = last;
1313  if (node)
1314  E_INFO("End node %s.%d:%d:%d (%d)\n",
1315  fsg_model_word_str(fsgs->fsg, node->wid),
1316  node->sf, node->fef, node->lef, node->info.best_exit);
1317  }
1318  else {
1319  /* If there was more than one end node candidate, then we need
1320  * to create an artificial end node with epsilon transitions
1321  * out of all of them. */
1322  gnode_t *st;
1323  int wid;
1324  wid = fsg_model_word_add(fsgs->fsg, "</s>");
1325  if (fsgs->fsg->silwords)
1326  bitvec_set(fsgs->fsg->silwords, wid);
1327  node = new_node(dag, fsgs->fsg, fsgs->frame, fsgs->frame, wid, -1, 0);
1328  /* Use the "best" (in reality it will be the only) exit link
1329  * score from this final node as the link score. */
1330  for (st = end; st; st = gnode_next(st)) {
1331  ps_latnode_t *src = gnode_ptr(st);
1332  ps_lattice_link(dag, src, node, src->info.best_exit, fsgs->frame);
1333  }
1334  }
1335  glist_free(end);
1336  return node;
1337 }
1338 
1339 static void
1340 mark_reachable(ps_lattice_t *dag, ps_latnode_t *end)
1341 {
1342  glist_t q;
1343 
1344  /* It doesn't matter which order we do this in. */
1345  end->reachable = TRUE;
1346  q = glist_add_ptr(NULL, end);
1347  while (q) {
1348  ps_latnode_t *node = gnode_ptr(q);
1349  latlink_list_t *x;
1350 
1351  /* Pop the front of the list. */
1352  q = gnode_free(q, NULL);
1353  /* Expand all its predecessors that haven't been seen yet. */
1354  for (x = node->entries; x; x = x->next) {
1355  ps_latnode_t *next = x->link->from;
1356  if (!next->reachable) {
1357  next->reachable = TRUE;
1358  q = glist_add_ptr(q, next);
1359  }
1360  }
1361  }
1362 }
1363 
1372 static ps_lattice_t *
1373 fsg_search_lattice(ps_search_t *search)
1374 {
1375  fsg_search_t *fsgs;
1376  fsg_model_t *fsg;
1377  ps_latnode_t *node;
1378  ps_lattice_t *dag;
1379  int32 i, n;
1380 
1381  fsgs = (fsg_search_t *)search;
1382 
1383  /* Check to see if a lattice has previously been created over the
1384  * same number of frames, and reuse it if so. */
1385  if (search->dag && search->dag->n_frames == fsgs->frame)
1386  return search->dag;
1387 
1388  /* Nope, create a new one. */
1389  ps_lattice_free(search->dag);
1390  search->dag = NULL;
1391  dag = ps_lattice_init_search(search, fsgs->frame);
1392  fsg = fsgs->fsg;
1393 
1394  /*
1395  * Each history table entry represents a link in the word graph.
1396  * The set of nodes is determined by the number of unique
1397  * (word,start-frame) pairs in the history table. So we will
1398  * first find all those nodes.
1399  */
1400  n = fsg_history_n_entries(fsgs->history);
1401  for (i = 0; i < n; ++i) {
1402  fsg_hist_entry_t *fh = fsg_history_entry_get(fsgs->history, i);
1403  int32 ascr;
1404  int sf;
1405 
1406  /* Skip null transitions. */
1407  if (fh->fsglink == NULL || fh->fsglink->wid == -1)
1408  continue;
1409 
1410  /* Find the start node of this link. */
1411  if (fh->pred) {
1412  fsg_hist_entry_t *pfh = fsg_history_entry_get(fsgs->history, fh->pred);
1413  /* FIXME: We include the transition score in the lattice
1414  * link score. This is because of the practical
1415  * difficulty of obtaining it separately in bestpath or
1416  * forward-backward search, and because it is essentially
1417  * a unigram probability, so there is no need to treat it
1418  * separately from the acoustic score. However, it's not
1419  * clear that this will actually yield correct results.*/
1420  ascr = fh->score - pfh->score;
1421  sf = pfh->frame + 1;
1422  }
1423  else {
1424  ascr = fh->score;
1425  sf = 0;
1426  }
1427 
1428  /*
1429  * Note that although scores are tied to links rather than
1430  * nodes, it's possible that there are no links out of the
1431  * destination node, and thus we need to preserve its score in
1432  * case it turns out to be utterance-final.
1433  */
1434  new_node(dag, fsg, sf, fh->frame, fh->fsglink->wid, fsg_link_to_state(fh->fsglink), ascr);
1435  }
1436 
1437  /*
1438  * Now, we will create links only to nodes that actually exist.
1439  */
1440  n = fsg_history_n_entries(fsgs->history);
1441  for (i = 0; i < n; ++i) {
1442  fsg_hist_entry_t *fh = fsg_history_entry_get(fsgs->history, i);
1443  fsg_arciter_t *itor;
1444  ps_latnode_t *src, *dest;
1445  int32 ascr;
1446  int sf;
1447 
1448  /* Skip null transitions. */
1449  if (fh->fsglink == NULL || fh->fsglink->wid == -1)
1450  continue;
1451 
1452  /* Find the start node of this link and calculate its link score. */
1453  if (fh->pred) {
1454  fsg_hist_entry_t *pfh = fsg_history_entry_get(fsgs->history, fh->pred);
1455  sf = pfh->frame + 1;
1456  ascr = fh->score - pfh->score;
1457  }
1458  else {
1459  ascr = fh->score;
1460  sf = 0;
1461  }
1462  src = find_node(dag, fsg, sf, fh->fsglink->wid, fsg_link_to_state(fh->fsglink));
1463  sf = fh->frame + 1;
1464 
1465  for (itor = fsg_model_arcs(fsg, fsg_link_to_state(fh->fsglink));
1466  itor; itor = fsg_arciter_next(itor)) {
1467  fsg_link_t *link = fsg_arciter_get(itor);
1468 
1469  /* FIXME: Need to figure out what to do about tag transitions. */
1470  if (link->wid >= 0) {
1471  /*
1472  * For each non-epsilon link following this one, look for a
1473  * matching node in the lattice and link to it.
1474  */
1475  if ((dest = find_node(dag, fsg, sf, link->wid, fsg_link_to_state(link))) != NULL)
1476  ps_lattice_link(dag, src, dest, ascr, fh->frame);
1477  }
1478  else {
1479  /*
1480  * Transitive closure on nulls has already been done, so we
1481  * just need to look one link forward from them.
1482  */
1483  fsg_arciter_t *itor2;
1484 
1485  /* Add all non-null links out of j. */
1486  for (itor2 = fsg_model_arcs(fsg, fsg_link_to_state(link));
1487  itor2; itor2 = fsg_arciter_next(itor2)) {
1488  fsg_link_t *link = fsg_arciter_get(itor2);
1489 
1490  if (link->wid == -1)
1491  continue;
1492 
1493  if ((dest = find_node(dag, fsg, sf, link->wid, fsg_link_to_state(link))) != NULL) {
1494  ps_lattice_link(dag, src, dest, ascr, fh->frame);
1495  }
1496  }
1497  }
1498  }
1499  }
1500 
1501 
1502  /* Figure out which nodes are the start and end nodes. */
1503  if ((dag->start = find_start_node(fsgs, dag)) == NULL) {
1504  E_WARN("Failed to find the start node\n");
1505  goto error_out;
1506  }
1507  if ((dag->end = find_end_node(fsgs, dag)) == NULL) {
1508  E_WARN("Failed to find the end node\n");
1509  goto error_out;
1510  }
1511 
1512 
1513  E_INFO("lattice start node %s.%d end node %s.%d\n",
1514  fsg_model_word_str(fsg, dag->start->wid), dag->start->sf,
1515  fsg_model_word_str(fsg, dag->end->wid), dag->end->sf);
1516  /* FIXME: Need to calculate final_node_ascr here. */
1517 
1518  /*
1519  * Convert word IDs from FSG to dictionary.
1520  */
1521  for (node = dag->nodes; node; node = node->next) {
1522  node->wid = dict_wordid(dag->search->dict,
1523  fsg_model_word_str(fsg, node->wid));
1524  node->basewid = dict_basewid(dag->search->dict, node->wid);
1525  }
1526 
1527  /*
1528  * Now we are done, because the links in the graph are uniquely
1529  * defined by the history table. However we should remove any
1530  * nodes which are not reachable from the end node of the FSG.
1531  * Everything is reachable from the start node by definition.
1532  */
1533  mark_reachable(dag, dag->end);
1534 
1536  {
1537  int32 silpen, fillpen;
1538 
1539  silpen = (int32)(logmath_log(fsg->lmath,
1540  cmd_ln_float32_r(ps_search_config(fsgs), "-silprob"))
1541  * fsg->lw)
1542  >> SENSCR_SHIFT;
1543  fillpen = (int32)(logmath_log(fsg->lmath,
1544  cmd_ln_float32_r(ps_search_config(fsgs), "-fillprob"))
1545  * fsg->lw)
1546  >> SENSCR_SHIFT;
1547 
1548  ps_lattice_penalize_fillers(dag, silpen, fillpen);
1549  }
1550  search->dag = dag;
1551 
1552  return dag;
1553 
1554 
1555 error_out:
1556  ps_lattice_free(dag);
1557  return NULL;
1558 
1559 }
1560 
void fsg_lextree_free(fsg_lextree_t *lextree)
Free lextrees for an FSG.
Definition: fsg_lextree.c:286
glist_t pnode_active_next
Those activated for the next frame.
Implementation of FSG search (and &quot;FSG set&quot;) structure.
Internal implementation of PocketSphinx decoder.
int16 reachable
From.
Base structure for search module.
int32 n_nodes
Number of nodes in this lattice.
ps_seg_t * ps_lattice_seg_iter(ps_lattice_t *dag, ps_latlink_t *link, float32 lwf)
Get hypothesis segmentation iterator after bestpath search.
Definition: ps_lattice.c:1006
int16 cur
Current position in hist.
Definition: fsg_history.h:95
POCKETSPHINX_EXPORT s3wid_t dict_wordid(dict_t *d, const char *word)
Return word id for given word string if present.
Definition: dict.c:399
int32 lef
Last end frame.
fsg_model_t * fsg
FSG model.
int32 wbeam_orig
Pruning threshold for word exit.
void fsg_pnode_add_all_ctxt(fsg_pnode_ctxt_t *ctxt)
Set all flags on in the given context bitvector.
Definition: fsg_lextree.c:328
void ps_search_base_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
Re-initialize base structure with new dictionary.
int n_senone_active
Number of active GMMs.
Definition: acmod.h:169
acmod_t * acmod
Acoustic model.
Segmentation &quot;iterator&quot; for FSG history.
void ps_lattice_penalize_fillers(ps_lattice_t *dag, int32 silpen, int32 fillpen)
Insert penalty for fillers.
Definition: ps_lattice.c:106
ps_seg_t base
Base structure.
An individual HMM among the HMM search space.
float32 beam_factor
Dynamic/adaptive factor (&lt;=1) applied to above beams to determine actual effective beams...
ps_latnode_t * start
Starting node.
uint8 *** tp
The transition matrices; kept in the same scale as acoustic scores; tp[tmatid][from-state][to-state]...
Definition: tmat.h:56
ps_segfuncs_t * vt
V-table of seg methods.
logmath_t * lmath
Log-math computation.
Definition: acmod.h:151
int bin_mdef_ciphone_id(bin_mdef_t *m, const char *ciphone)
Context-independent phone lookup.
Definition: bin_mdef.c:691
struct fsg_history_s * history
For storing the Viterbi search history.
uint16 ** sseq
Unique senone sequences (2D array built at load time)
Definition: bin_mdef.h:134
int32 lscr
Language model score.
int32 n_words
Number of words known to search (may be less than in the dictionary)
int32 wbeam
Effective beams after applying beam_factor.
void acmod_activate_hmm(acmod_t *acmod, hmm_t *hmm)
Activate senones associated with an HMM.
Definition: acmod.c:1213
float32 ascale
Acoustic score scale for posterior probabilities.
ps_search_t * search
Search (if generated by search).
#define BAD_S3WID
Dictionary word id.
Definition: s3types.h:90
frame_idx_t n_frames
Number of frames for this utterance.
Word graph search implementation.
int32 n_hmm_eval
Total HMMs evaluated this utt.
ps_latnode_t * nodes
List of all nodes.
listelem_alloc_t * latnode_alloc
Node allocator for this DAG.
int32 wip
Language weights.
int32 best_exit
Best exit score (used for final nodes only)
int32 prob
Log posterior probability.
latlink_list_t * entries
Links into this node.
POCKETSPHINX_EXPORT int32 ps_lattice_posterior(ps_lattice_t *dag, ngram_model_t *lmset, float32 ascale)
Calculate link posterior probabilities on a word graph.
Definition: ps_lattice.c:1446
hmm_context_t * hmmctx
HMM context.
char const * word
Word string (pointer into dictionary hash)
ps_search_t * search
Search object from whence this came.
int32 final_node_ascr
Acoustic score of implicit link exiting final node.
void ps_search_init(ps_search_t *search, ps_searchfuncs_t *vt, const char *type, const char *name, cmd_ln_t *config, acmod_t *acmod, dict_t *dict, dict2pid_t *d2p)
Initialize base structure.
fsg_hist_entry_t ** hist
Sequence of history entries.
int32 hmm_vit_eval(hmm_t *hmm)
Viterbi evaluation of given HMM.
Definition: hmm.c:789
void fsg_psubtree_pnode_deactivate(fsg_pnode_t *pnode)
Mark the given pnode as inactive (for search).
Definition: fsg_lextree.c:832
uint8 compallsen
Compute all senones?
Definition: acmod.h:188
hmm_context_t * hmm_context_init(int32 n_emit_state, uint8 **const *tp, int16 const *senscore, uint16 *const *sseq)
Create an HMM context.
Definition: hmm.c:56
void ps_search_base_free(ps_search_t *search)
Free search.
void ps_lattice_delete_unreachable(ps_lattice_t *dag)
Remove nodes marked as unreachable.
Definition: ps_lattice.c:174
ps_latnode_t * end
Ending node.
frame_idx_t sf
Start frame.
POCKETSPHINX_EXPORT ps_latlink_t * ps_lattice_bestpath(ps_lattice_t *dag, ngram_model_t *lmset, float32 lwf, float32 ascale)
Do N-Gram based best-path search on a word graph.
Definition: ps_lattice.c:1215
int32 beam_orig
Global pruning threshold.
latlink_list_t * exits
Links out of this node.
#define WORST_SCORE
Large &quot;bad&quot; score.
Definition: hmm.h:84
tmat_t * tmat
Transition matrices.
Definition: acmod.h:160
frame_idx_t ef
End frame.
int32 ascr
Acoustic score.
void hmm_enter(hmm_t *h, int32 score, int32 histid, int frame)
Enter an HMM with the given path score and history ID.
Definition: hmm.c:201
void acmod_clear_active(acmod_t *acmod)
Clear set of active senones.
Definition: acmod.c:1197
int32 wid
Dictionary word id.
int32 node_id
Node from fsg model, used to map lattice back to model.
int32 pbeam_orig
Pruning threshold for phone transition.
#define hmm_context_set_senscore(ctx, senscr)
Change the senone score array for a context.
Definition: hmm.h:227
void hmm_dump(hmm_t *h, FILE *fp)
For debugging, dump the whole HMM out.
Definition: hmm.c:116
uint8 final
Decoding is finished for this utterance.
#define SENSCR_SHIFT
Shift count for senone scores.
Definition: hmm.h:73
a structure for a dictionary.
Definition: dict.h:76
char const * ps_lattice_hyp(ps_lattice_t *dag, ps_latlink_t *link)
Get hypothesis string after bestpath search.
Definition: ps_lattice.c:830
Word graph structure used in bestpath/nbest search.
int32 bestscore
For beam pruning.
int32 bpidx_start
First history entry index this frame.
int32 post
Utterance posterior probability.
char * hyp_str
Current hypothesis string.
fsg_lextree_t * fsg_lextree_init(fsg_model_t *fsg, dict_t *dict, dict2pid_t *d2p, bin_mdef_t *mdef, hmm_context_t *ctx, int32 wip, int32 pip)
Create, initialize, and return a new phonetic lextree for the given FSG.
Definition: fsg_lextree.c:215
#define BETTER_THAN
Is one score better than another?
Definition: hmm.h:95
dict_t * dict
Pronunciation dictionary.
int32 fef
First end frame.
uint8 bestpath
Whether to run bestpath search and confidence annotation at end.
int32 lback
Language model backoff.
int32 n_sen_eval
Total senones evaluated this utt.
int32 basewid
Dictionary base word id.
glist_t pnode_active
Those active in this frame.
ps_lattice_t * ps_lattice_init_search(ps_search_t *search, int n_frame)
Construct an empty word graph with reference to a search structure.
Definition: ps_lattice.c:639
void hmm_context_free(hmm_context_t *ctx)
Free an HMM context.
Definition: hmm.c:80
int16 n_hist
Number of history entries.
bin_mdef_t * mdef
Model definition.
Definition: acmod.h:159
ps_latlink_t * last_link
Final link in best path.
struct ps_latnode_s * next
Next node in DAG (no ordering implied)
V-table for search algorithm.
frame_idx_t frame
Current frame.
ps_lattice_t * dag
Current hypothesis word graph.
Base structure for hypothesis segmentation iterator.
struct fsg_lextree_s * lextree
Lextree structure for the currently active FSG.
#define dict_size(d)
Packaged macro access to dictionary members.
Definition: dict.h:151
POCKETSPHINX_EXPORT int ps_lattice_free(ps_lattice_t *dag)
Free a lattice.
Definition: ps_lattice.c:665
Acoustic model structure.
Definition: acmod.h:148
float32 lwf
Language weight factor (for second-pass searches)
Building composite triphone (as well as word internal triphones) with the dictionary.
Definition: dict2pid.h:84
POCKETSPHINX_EXPORT void ps_lattice_link(ps_lattice_t *dag, ps_latnode_t *from, ps_latnode_t *to, int32 score, int32 ef)
Create a directed link between &quot;from&quot; and &quot;to&quot; nodes, but if a link already exists, choose one with the best link_scr.
Definition: ps_lattice.c:65
frame_idx_t sf
Start frame.
int16 const * acmod_score(acmod_t *acmod, int *inout_frame_idx)
Score one frame of data.
Definition: acmod.c:1106
ptmr_t perf
Performance counter.