PocketSphinx  5prealpha
ngram_search.c
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 2008 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
42 /* System headers. */
43 #include <string.h>
44 #include <assert.h>
45 
46 /* SphinxBase headers. */
47 #include <sphinxbase/ckd_alloc.h>
48 #include <sphinxbase/listelem_alloc.h>
49 #include <sphinxbase/err.h>
50 
51 /* Local headers. */
52 #include "pocketsphinx_internal.h"
53 #include "ps_lattice_internal.h"
54 #include "ngram_search.h"
55 #include "ngram_search_fwdtree.h"
56 #include "ngram_search_fwdflat.h"
57 
58 static int ngram_search_start(ps_search_t *search);
59 static int ngram_search_step(ps_search_t *search, int frame_idx);
60 static int ngram_search_finish(ps_search_t *search);
61 static int ngram_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p);
62 static char const *ngram_search_hyp(ps_search_t *search, int32 *out_score);
63 static int32 ngram_search_prob(ps_search_t *search);
64 static ps_seg_t *ngram_search_seg_iter(ps_search_t *search);
65 
66 static ps_searchfuncs_t ngram_funcs = {
67  /* start: */ ngram_search_start,
68  /* step: */ ngram_search_step,
69  /* finish: */ ngram_search_finish,
70  /* reinit: */ ngram_search_reinit,
71  /* free: */ ngram_search_free,
72  /* lattice: */ ngram_search_lattice,
73  /* hyp: */ ngram_search_hyp,
74  /* prob: */ ngram_search_prob,
75  /* seg_iter: */ ngram_search_seg_iter,
76 };
77 
78 static ngram_model_t *default_lm;
79 
80 static void
81 ngram_search_update_widmap(ngram_search_t *ngs)
82 {
83  char const **words;
84  int32 i, n_words;
85 
86  /* It's okay to include fillers since they won't be in the LM */
87  n_words = ps_search_n_words(ngs);
88  words = (char const**)ckd_calloc(n_words, sizeof(*words));
89  /* This will include alternates, again, that's okay since they aren't in the LM */
90  for (i = 0; i < n_words; ++i)
91  words[i] = dict_wordstr(ps_search_dict(ngs), i);
92  ngram_model_set_map_words(ngs->lmset, words, n_words);
93  ckd_free(words);
94 }
95 
96 static void
97 ngram_search_calc_beams(ngram_search_t *ngs)
98 {
99  cmd_ln_t *config;
100  acmod_t *acmod;
101 
102  config = ps_search_config(ngs);
103  acmod = ps_search_acmod(ngs);
104 
105  /* Log beam widths. */
106  ngs->beam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-beam"))>>SENSCR_SHIFT;
107  ngs->wbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-wbeam"))>>SENSCR_SHIFT;
108  ngs->pbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pbeam"))>>SENSCR_SHIFT;
109  ngs->lpbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-lpbeam"))>>SENSCR_SHIFT;
110  ngs->lponlybeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-lponlybeam"))>>SENSCR_SHIFT;
111  ngs->fwdflatbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-fwdflatbeam"))>>SENSCR_SHIFT;
112  ngs->fwdflatwbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-fwdflatwbeam"))>>SENSCR_SHIFT;
113 
114  /* Absolute pruning parameters. */
115  ngs->maxwpf = cmd_ln_int32_r(config, "-maxwpf");
116  ngs->maxhmmpf = cmd_ln_int32_r(config, "-maxhmmpf");
117 
118  /* Various penalties which may or may not be useful. */
119  ngs->wip = logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-wip")) >>SENSCR_SHIFT;
120  ngs->nwpen = logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-nwpen")) >>SENSCR_SHIFT;
121  ngs->pip = logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-pip")) >>SENSCR_SHIFT;
122  ngs->silpen = ngs->pip
123  + (logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-silprob"))>>SENSCR_SHIFT);
124  ngs->fillpen = ngs->pip
125  + (logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-fillprob"))>>SENSCR_SHIFT);
126 
127  /* Language weight ratios for fwdflat and bestpath search. */
128  ngs->fwdflat_fwdtree_lw_ratio =
129  cmd_ln_float32_r(config, "-fwdflatlw")
130  / cmd_ln_float32_r(config, "-lw");
131  ngs->bestpath_fwdtree_lw_ratio =
132  cmd_ln_float32_r(config, "-bestpathlw")
133  / cmd_ln_float32_r(config, "-lw");
134 
135  /* Acoustic score scale for posterior probabilities. */
136  ngs->ascale = 1.0 / cmd_ln_float32_r(config, "-ascale");
137 }
138 
139 ps_search_t *
140 ngram_search_init(const char *name,
141  ngram_model_t *lm,
142  cmd_ln_t *config,
143  acmod_t *acmod,
144  dict_t *dict,
145  dict2pid_t *d2p)
146 {
147  ngram_search_t *ngs;
148  static char *lmname = "default";
149 
150  /* Make the acmod's feature buffer growable if we are doing two-pass
151  * search. */
152  acmod_set_grow(acmod, cmd_ln_boolean_r(config, "-fwdflat") &&
153  cmd_ln_boolean_r(config, "-fwdtree"));
154 
155  ngs = ckd_calloc(1, sizeof(*ngs));
156  ps_search_init(&ngs->base, &ngram_funcs, PS_SEARCH_TYPE_NGRAM, name, config, acmod, dict, d2p);
157 
158  ngs->hmmctx = hmm_context_init(bin_mdef_n_emit_state(acmod->mdef),
159  acmod->tmat->tp, NULL, acmod->mdef->sseq);
160  if (ngs->hmmctx == NULL) {
161  ps_search_free(ps_search_base(ngs));
162  return NULL;
163  }
164  ngs->chan_alloc = listelem_alloc_init(sizeof(chan_t));
165  ngs->root_chan_alloc = listelem_alloc_init(sizeof(root_chan_t));
166  ngs->latnode_alloc = listelem_alloc_init(sizeof(ps_latnode_t));
167 
168  /* Calculate various beam widths and such. */
169  ngram_search_calc_beams(ngs);
170 
171  /* Allocate a billion different tables for stuff. */
172  ngs->word_chan = ckd_calloc(dict_size(dict),
173  sizeof(*ngs->word_chan));
174  ngs->word_lat_idx = ckd_calloc(dict_size(dict),
175  sizeof(*ngs->word_lat_idx));
176  ngs->word_active = bitvec_alloc(dict_size(dict));
177  ngs->last_ltrans = ckd_calloc(dict_size(dict),
178  sizeof(*ngs->last_ltrans));
179 
180  /* FIXME: All these structures need to be made dynamic with
181  * garbage collection. */
182  ngs->bp_table_size = cmd_ln_int32_r(config, "-latsize");
183  ngs->bp_table = ckd_calloc(ngs->bp_table_size,
184  sizeof(*ngs->bp_table));
185  /* FIXME: This thing is frickin' huge. */
186  ngs->bscore_stack_size = ngs->bp_table_size * 20;
187  ngs->bscore_stack = ckd_calloc(ngs->bscore_stack_size,
188  sizeof(*ngs->bscore_stack));
189  ngs->n_frame_alloc = 256;
190  ngs->bp_table_idx = ckd_calloc(ngs->n_frame_alloc + 1,
191  sizeof(*ngs->bp_table_idx));
192  ++ngs->bp_table_idx; /* Make bptableidx[-1] valid */
193 
194  /* Allocate active word list array */
195  ngs->active_word_list = ckd_calloc_2d(2, dict_size(dict),
196  sizeof(**ngs->active_word_list));
197 
198  ngs->lmset = ngram_model_set_init(config, &lm, &lmname, NULL, 1);
199  if (!ngs->lmset)
200  goto error_out;
201 
202  if (ngram_wid(ngs->lmset, S3_FINISH_WORD) ==
203  ngram_unknown_wid(ngs->lmset))
204  {
205  E_ERROR("Language model/set does not contain </s>, "
206  "recognition will fail\n");
207  goto error_out;
208  }
209 
210  /* Create word mappings. */
211  ngram_search_update_widmap(ngs);
212 
213  /* Initialize fwdtree, fwdflat, bestpath modules if necessary. */
214  if (cmd_ln_boolean_r(config, "-fwdtree")) {
215  ngram_fwdtree_init(ngs);
216  ngs->fwdtree = TRUE;
217  ngs->fwdtree_perf.name = "fwdtree";
218  ptmr_init(&ngs->fwdtree_perf);
219  }
220  if (cmd_ln_boolean_r(config, "-fwdflat")) {
221  ngram_fwdflat_init(ngs);
222  ngs->fwdflat = TRUE;
223  ngs->fwdflat_perf.name = "fwdflat";
224  ptmr_init(&ngs->fwdflat_perf);
225  }
226  if (cmd_ln_boolean_r(config, "-bestpath")) {
227  ngs->bestpath = TRUE;
228  ngs->bestpath_perf.name = "bestpath";
229  ptmr_init(&ngs->bestpath_perf);
230  }
231 
232  return (ps_search_t *)ngs;
233 
234 error_out:
236  return NULL;
237 }
238 
239 static int
240 ngram_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
241 {
242  ngram_search_t *ngs = (ngram_search_t *)search;
243  int old_n_words;
244  int rv = 0;
245 
246  /* Update the number of words. */
247  old_n_words = search->n_words;
248  if (old_n_words != dict_size(dict)) {
249  search->n_words = dict_size(dict);
250  /* Reallocate these temporary arrays. */
251  ckd_free(ngs->word_lat_idx);
252  ckd_free(ngs->word_active);
253  ckd_free(ngs->last_ltrans);
254  ckd_free_2d(ngs->active_word_list);
255  ngs->word_lat_idx = ckd_calloc(search->n_words, sizeof(*ngs->word_lat_idx));
256  ngs->word_active = bitvec_alloc(search->n_words);
257  ngs->last_ltrans = ckd_calloc(search->n_words, sizeof(*ngs->last_ltrans));
258  ngs->active_word_list
259  = ckd_calloc_2d(2, search->n_words,
260  sizeof(**ngs->active_word_list));
261  }
262 
263  /* Free old dict2pid, dict */
264  ps_search_base_reinit(search, dict, d2p);
265 
266  if (ngs->lmset == NULL)
267  return 0;
268 
269  /* Update beam widths. */
270  ngram_search_calc_beams(ngs);
271 
272  /* Update word mappings. */
273  ngram_search_update_widmap(ngs);
274 
275  /* Now rebuild lextrees. */
276  if (ngs->fwdtree) {
277  if ((rv = ngram_fwdtree_reinit(ngs)) < 0)
278  return rv;
279  }
280  if (ngs->fwdflat) {
281  if ((rv = ngram_fwdflat_reinit(ngs)) < 0)
282  return rv;
283  }
284 
285  return rv;
286 }
287 
288 void
290 {
291  ngram_search_t *ngs = (ngram_search_t *)search;
292 
293  if (ngs->fwdtree)
295  if (ngs->fwdflat)
297  if (ngs->bestpath) {
298  double n_speech = (double)ngs->n_tot_frame
299  / cmd_ln_int32_r(ps_search_config(ngs), "-frate");
300 
301  E_INFO("TOTAL bestpath %.2f CPU %.3f xRT\n",
302  ngs->bestpath_perf.t_tot_cpu,
303  ngs->bestpath_perf.t_tot_cpu / n_speech);
304  E_INFO("TOTAL bestpath %.2f wall %.3f xRT\n",
305  ngs->bestpath_perf.t_tot_elapsed,
306  ngs->bestpath_perf.t_tot_elapsed / n_speech);
307  }
308 
309  ps_search_base_free(search);
310  hmm_context_free(ngs->hmmctx);
311  listelem_alloc_free(ngs->chan_alloc);
312  listelem_alloc_free(ngs->root_chan_alloc);
313  listelem_alloc_free(ngs->latnode_alloc);
314  ngram_model_free(ngs->lmset);
315 
316  ckd_free(ngs->word_chan);
317  ckd_free(ngs->word_lat_idx);
318  bitvec_free(ngs->word_active);
319  ckd_free(ngs->bp_table);
320  ckd_free(ngs->bscore_stack);
321  if (ngs->bp_table_idx != NULL)
322  ckd_free(ngs->bp_table_idx - 1);
323  ckd_free_2d(ngs->active_word_list);
324  ckd_free(ngs->last_ltrans);
325  ckd_free(ngs);
326 }
327 
328 int
330 {
331  if (frame_idx >= ngs->n_frame_alloc) {
332  ngs->n_frame_alloc *= 2;
333  ngs->bp_table_idx = ckd_realloc(ngs->bp_table_idx - 1,
334  (ngs->n_frame_alloc + 1)
335  * sizeof(*ngs->bp_table_idx));
336  if (ngs->frm_wordlist) {
337  ngs->frm_wordlist = ckd_realloc(ngs->frm_wordlist,
338  ngs->n_frame_alloc
339  * sizeof(*ngs->frm_wordlist));
340  }
341  ++ngs->bp_table_idx; /* Make bptableidx[-1] valid */
342  }
343  ngs->bp_table_idx[frame_idx] = ngs->bpidx;
344  return ngs->bpidx;
345 }
346 
347 static void
348 set_real_wid(ngram_search_t *ngs, int32 bp)
349 {
350  bptbl_t *ent, *prev;
351 
352  assert(bp != NO_BP);
353  ent = ngs->bp_table + bp;
354  if (ent->bp == NO_BP)
355  prev = NULL;
356  else
357  prev = ngs->bp_table + ent->bp;
358 
359  /* Propagate lm state for fillers, rotate it for words. */
360  if (dict_filler_word(ps_search_dict(ngs), ent->wid)) {
361  if (prev != NULL) {
362  ent->real_wid = prev->real_wid;
363  ent->prev_real_wid = prev->prev_real_wid;
364  }
365  else {
366  ent->real_wid = dict_basewid(ps_search_dict(ngs),
367  ent->wid);
368  ent->prev_real_wid = BAD_S3WID;
369  }
370  }
371  else {
372  ent->real_wid = dict_basewid(ps_search_dict(ngs), ent->wid);
373  if (prev != NULL)
374  ent->prev_real_wid = prev->real_wid;
375  else
376  ent->prev_real_wid = BAD_S3WID;
377  }
378 }
379 
380 #define NGRAM_HISTORY_LONG_WORD 2000 /* 20s */
381 
382 void
384  int32 w, int32 score, int32 path, int32 rc)
385 {
386  int32 bp;
387 
388  /* Look for an existing exit for this word in this frame. The
389  * only reason one would exist is from a different right context
390  * triphone, but of course that happens quite frequently. */
391  bp = ngs->word_lat_idx[w];
392  if (bp != NO_BP) {
393 
394  if (frame_idx - ngs->bp_table[path].frame > NGRAM_HISTORY_LONG_WORD) {
395  E_WARN("Word '%s' survived for %d frames, potential overpruning\n", dict_wordstr(ps_search_dict(ngs), w),
396  frame_idx - ngs->bp_table[path].frame);
397  }
398 
399  /* Keep only the best scoring one, we will reconstruct the
400  * others from the right context scores - usually the history
401  * is not lost. */
402  if (ngs->bp_table[bp].score WORSE_THAN score) {
403  assert(path != bp); /* Pathological. */
404  if (ngs->bp_table[bp].bp != path) {
405  int32 bplh[2], newlh[2];
406  /* But, sometimes, the history *is* lost. If we wanted to
407  * do exact language model scoring we'd have to preserve
408  * these alternate histories. */
409  E_DEBUG(2,("Updating path history %d => %d frame %d\n",
410  ngs->bp_table[bp].bp, path, frame_idx));
411  bplh[0] = ngs->bp_table[bp].bp == -1
412  ? -1 : ngs->bp_table[ngs->bp_table[bp].bp].prev_real_wid;
413  bplh[1] = ngs->bp_table[bp].bp == -1
414  ? -1 : ngs->bp_table[ngs->bp_table[bp].bp].real_wid;
415  newlh[0] = path == -1
416  ? -1 : ngs->bp_table[path].prev_real_wid;
417  newlh[1] = path == -1
418  ? -1 : ngs->bp_table[path].real_wid;
419  /* Actually it's worth checking how often the actual
420  * language model state changes. */
421  if (bplh[0] != newlh[0] || bplh[1] != newlh[1]) {
422  /* It's fairly rare that the actual language model
423  * state changes, but it does happen some
424  * times. */
425  E_DEBUG(1, ("Updating language model state %s,%s => %s,%s frame %d\n",
426  dict_wordstr(ps_search_dict(ngs), bplh[0]),
427  dict_wordstr(ps_search_dict(ngs), bplh[1]),
428  dict_wordstr(ps_search_dict(ngs), newlh[0]),
429  dict_wordstr(ps_search_dict(ngs), newlh[1]),
430  frame_idx));
431  set_real_wid(ngs, bp);
432  }
433  ngs->bp_table[bp].bp = path;
434  }
435  ngs->bp_table[bp].score = score;
436  }
437  /* But do keep track of scores for all right contexts, since
438  * we need them to determine the starting path scores for any
439  * successors of this word exit. */
440  if (ngs->bp_table[bp].s_idx != -1)
441  ngs->bscore_stack[ngs->bp_table[bp].s_idx + rc] = score;
442  }
443  else {
444  int32 i, rcsize;
445  bptbl_t *be;
446 
447  /* This might happen if recognition fails. */
448  if (ngs->bpidx == NO_BP) {
449  E_ERROR("No entries in backpointer table!");
450  return;
451  }
452 
453  /* Expand the backpointer tables if necessary. */
454  if (ngs->bpidx >= ngs->bp_table_size) {
455  ngs->bp_table_size *= 2;
456  ngs->bp_table = ckd_realloc(ngs->bp_table,
457  ngs->bp_table_size
458  * sizeof(*ngs->bp_table));
459  E_INFO("Resized backpointer table to %d entries\n", ngs->bp_table_size);
460  }
461  if (ngs->bss_head >= ngs->bscore_stack_size
462  - bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef)) {
463  ngs->bscore_stack_size *= 2;
464  ngs->bscore_stack = ckd_realloc(ngs->bscore_stack,
465  ngs->bscore_stack_size
466  * sizeof(*ngs->bscore_stack));
467  E_INFO("Resized score stack to %d entries\n", ngs->bscore_stack_size);
468  }
469 
470  ngs->word_lat_idx[w] = ngs->bpidx;
471  be = &(ngs->bp_table[ngs->bpidx]);
472  be->wid = w;
473  be->frame = frame_idx;
474  be->bp = path;
475  be->score = score;
476  be->s_idx = ngs->bss_head;
477  be->valid = TRUE;
478  assert(path != ngs->bpidx);
479 
480  /* DICT2PID */
481  /* Get diphone ID for final phone and number of ssids corresponding to it. */
482  be->last_phone = dict_last_phone(ps_search_dict(ngs),w);
483  if (dict_is_single_phone(ps_search_dict(ngs), w)) {
484  be->last2_phone = -1;
485  be->s_idx = -1;
486  rcsize = 0;
487  }
488  else {
489  be->last2_phone = dict_second_last_phone(ps_search_dict(ngs),w);
490  rcsize = dict2pid_rssid(ps_search_dict2pid(ngs),
491  be->last_phone, be->last2_phone)->n_ssid;
492  }
493  /* Allocate some space on the bscore_stack for all of these triphones. */
494  for (i = 0; i < rcsize; ++i)
495  ngs->bscore_stack[ngs->bss_head + i] = WORST_SCORE;
496  if (rcsize)
497  ngs->bscore_stack[ngs->bss_head + rc] = score;
498  set_real_wid(ngs, ngs->bpidx);
499 
500  ngs->bpidx++;
501  ngs->bss_head += rcsize;
502  }
503 }
504 
505 int
506 ngram_search_find_exit(ngram_search_t *ngs, int frame_idx, int32 *out_best_score)
507 {
508  /* End of backpointers for this frame. */
509  int end_bpidx;
510  int best_exit, bp;
511  int32 best_score;
512 
513  /* No hypothesis means no exit node! */
514  if (ngs->n_frame == 0)
515  return NO_BP;
516 
517  if (frame_idx == -1 || frame_idx >= ngs->n_frame)
518  frame_idx = ngs->n_frame - 1;
519  end_bpidx = ngs->bp_table_idx[frame_idx];
520 
521  best_score = WORST_SCORE;
522  best_exit = NO_BP;
523 
524  /* Scan back to find a frame with some backpointers in it. */
525  while (frame_idx >= 0 && ngs->bp_table_idx[frame_idx] == end_bpidx)
526  --frame_idx;
527  /* This is NOT an error, it just means there is no hypothesis yet. */
528  if (frame_idx < 0)
529  return NO_BP;
530 
531  /* Now find the entry for </s> OR the best scoring entry. */
532  assert(end_bpidx < ngs->bp_table_size);
533  for (bp = ngs->bp_table_idx[frame_idx]; bp < end_bpidx; ++bp) {
534  if (ngs->bp_table[bp].wid == ps_search_finish_wid(ngs)
535  || ngs->bp_table[bp].score BETTER_THAN best_score) {
536  best_score = ngs->bp_table[bp].score;
537  best_exit = bp;
538  }
539  if (ngs->bp_table[bp].wid == ps_search_finish_wid(ngs))
540  break;
541  }
542 
543  if (out_best_score) {
544  *out_best_score = best_score;
545  }
546  return best_exit;
547 }
548 
549 char const *
551 {
552  ps_search_t *base = ps_search_base(ngs);
553  char *c;
554  size_t len;
555  int bp;
556 
557  if (bpidx == NO_BP)
558  return NULL;
559 
560  bp = bpidx;
561  len = 0;
562  while (bp != NO_BP) {
563  bptbl_t *be = &ngs->bp_table[bp];
564  bp = be->bp;
565  if (dict_real_word(ps_search_dict(ngs), be->wid))
566  len += strlen(dict_basestr(ps_search_dict(ngs), be->wid)) + 1;
567  }
568 
569  ckd_free(base->hyp_str);
570  if (len == 0) {
571  base->hyp_str = NULL;
572  return base->hyp_str;
573  }
574  base->hyp_str = ckd_calloc(1, len);
575 
576  bp = bpidx;
577  c = base->hyp_str + len - 1;
578  while (bp != NO_BP) {
579  bptbl_t *be = &ngs->bp_table[bp];
580  size_t len;
581 
582  bp = be->bp;
583  if (dict_real_word(ps_search_dict(ngs), be->wid)) {
584  len = strlen(dict_basestr(ps_search_dict(ngs), be->wid));
585  c -= len;
586  memcpy(c, dict_basestr(ps_search_dict(ngs), be->wid), len);
587  if (c > base->hyp_str) {
588  --c;
589  *c = ' ';
590  }
591  }
592  }
593 
594  return base->hyp_str;
595 }
596 
597 void
599 {
600  chan_t *hmm, *thmm;
601  xwdssid_t *rssid;
602  int32 i, tmatid, ciphone;
603 
604  /* DICT2PID */
605  /* Get pointer to array of triphones for final diphone. */
606  assert(!dict_is_single_phone(ps_search_dict(ngs), w));
607  ciphone = dict_last_phone(ps_search_dict(ngs),w);
608  rssid = dict2pid_rssid(ps_search_dict2pid(ngs),
609  ciphone,
610  dict_second_last_phone(ps_search_dict(ngs),w));
611  tmatid = bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, ciphone);
612  hmm = ngs->word_chan[w];
613  if ((hmm == NULL) || (hmm_nonmpx_ssid(&hmm->hmm) != rssid->ssid[0])) {
614  hmm = listelem_malloc(ngs->chan_alloc);
615  hmm->next = ngs->word_chan[w];
616  ngs->word_chan[w] = hmm;
617 
618  hmm->info.rc_id = 0;
619  hmm->ciphone = ciphone;
620  hmm_init(ngs->hmmctx, &hmm->hmm, FALSE, rssid->ssid[0], tmatid);
621  E_DEBUG(3,("allocated rc_id 0 ssid %d ciphone %d lc %d word %s\n",
622  rssid->ssid[0], hmm->ciphone,
623  dict_second_last_phone(ps_search_dict(ngs),w),
624  dict_wordstr(ps_search_dict(ngs),w)));
625  }
626  for (i = 1; i < rssid->n_ssid; ++i) {
627  if ((hmm->next == NULL) || (hmm_nonmpx_ssid(&hmm->next->hmm) != rssid->ssid[i])) {
628  thmm = listelem_malloc(ngs->chan_alloc);
629  thmm->next = hmm->next;
630  hmm->next = thmm;
631  hmm = thmm;
632 
633  hmm->info.rc_id = i;
634  hmm->ciphone = ciphone;
635  hmm_init(ngs->hmmctx, &hmm->hmm, FALSE, rssid->ssid[i], tmatid);
636  E_DEBUG(3,("allocated rc_id %d ssid %d ciphone %d lc %d word %s\n",
637  i, rssid->ssid[i], hmm->ciphone,
638  dict_second_last_phone(ps_search_dict(ngs),w),
639  dict_wordstr(ps_search_dict(ngs),w)));
640  }
641  else
642  hmm = hmm->next;
643  }
644 }
645 
646 void
648 {
649  chan_t *hmm, *thmm;
650 
651  for (hmm = ngs->word_chan[w]; hmm; hmm = thmm) {
652  thmm = hmm->next;
653  hmm_deinit(&hmm->hmm);
654  listelem_free(ngs->chan_alloc, hmm);
655  }
656  ngs->word_chan[w] = NULL;
657 }
658 
659 int32
661 {
662  /* DICT2PID */
663  /* Get the mapping from right context phone ID to index in the
664  * right context table and the bscore_stack. */
665  if (pbe->last2_phone == -1) {
666  /* No right context for single phone predecessor words. */
667  return pbe->score;
668  }
669  else {
670  xwdssid_t *rssid;
671  /* Find the index for the last diphone of the previous word +
672  * the first phone of the current word. */
673  rssid = dict2pid_rssid(ps_search_dict2pid(ngs),
674  pbe->last_phone, pbe->last2_phone);
675  /* This may be WORST_SCORE, which means that there was no exit
676  * with rcphone as right context. */
677  return ngs->bscore_stack[pbe->s_idx + rssid->cimap[rcphone]];
678  }
679 }
680 
681 /*
682  * Compute acoustic and LM scores for a BPTable entry (segment).
683  */
684 void
685 ngram_compute_seg_score(ngram_search_t *ngs, bptbl_t *be, float32 lwf,
686  int32 *out_ascr, int32 *out_lscr)
687 {
688  bptbl_t *pbe;
689  int32 start_score;
690 
691  /* Start of utterance. */
692  if (be->bp == NO_BP) {
693  *out_ascr = be->score;
694  *out_lscr = 0;
695  return;
696  }
697 
698  /* Otherwise, calculate lscr and ascr. */
699  pbe = ngs->bp_table + be->bp;
700  start_score = ngram_search_exit_score(ngs, pbe,
701  dict_first_phone(ps_search_dict(ngs),be->wid));
702  assert(start_score BETTER_THAN WORST_SCORE);
703 
704  /* FIXME: These result in positive acoustic scores when filler
705  words have non-filler pronunciations. That whole business
706  is still pretty much broken but at least it doesn't
707  segfault. */
708  if (be->wid == ps_search_silence_wid(ngs)) {
709  *out_lscr = ngs->silpen;
710  }
711  else if (dict_filler_word(ps_search_dict(ngs), be->wid)) {
712  *out_lscr = ngs->fillpen;
713  }
714  else {
715  int32 n_used;
716  *out_lscr = ngram_tg_score(ngs->lmset,
717  be->real_wid,
718  pbe->real_wid,
719  pbe->prev_real_wid,
720  &n_used)>>SENSCR_SHIFT;
721  *out_lscr = *out_lscr * lwf;
722  }
723  *out_ascr = be->score - start_score - *out_lscr;
724 }
725 
726 static int
727 ngram_search_start(ps_search_t *search)
728 {
729  ngram_search_t *ngs = (ngram_search_t *)search;
730 
731  ngs->done = FALSE;
732  ngram_model_flush(ngs->lmset);
733  if (ngs->fwdtree)
734  ngram_fwdtree_start(ngs);
735  else if (ngs->fwdflat)
736  ngram_fwdflat_start(ngs);
737  else
738  return -1;
739  return 0;
740 }
741 
742 static int
743 ngram_search_step(ps_search_t *search, int frame_idx)
744 {
745  ngram_search_t *ngs = (ngram_search_t *)search;
746 
747  if (ngs->fwdtree)
748  return ngram_fwdtree_search(ngs, frame_idx);
749  else if (ngs->fwdflat)
750  return ngram_fwdflat_search(ngs, frame_idx);
751  else
752  return -1;
753 }
754 
755 void
756 dump_bptable(ngram_search_t *ngs)
757 {
758  int i;
759  E_INFO("Backpointer table (%d entries):\n", ngs->bpidx);
760  for (i = 0; i < ngs->bpidx; ++i) {
761  bptbl_t *bpe = ngs->bp_table + i;
762  int j, rcsize;
763 
764  E_INFO_NOFN("%-5d %-10s start %-3d end %-3d score %-8d bp %-3d real_wid %-5d prev_real_wid %-5d",
765  i, dict_wordstr(ps_search_dict(ngs), bpe->wid),
766  (bpe->bp == -1
767  ? 0 : ngs->bp_table[bpe->bp].frame + 1),
768  bpe->frame, bpe->score, bpe->bp,
769  bpe->real_wid, bpe->prev_real_wid);
770 
771  if (bpe->last2_phone == -1)
772  rcsize = 0;
773  else
774  rcsize = dict2pid_rssid(ps_search_dict2pid(ngs),
775  bpe->last_phone, bpe->last2_phone)->n_ssid;
776  if (rcsize) {
777  E_INFOCONT("\tbss");
778  for (j = 0; j < rcsize; ++j)
779  if (ngs->bscore_stack[bpe->s_idx + j] != WORST_SCORE)
780  E_INFOCONT(" %d", bpe->score - ngs->bscore_stack[bpe->s_idx + j]);
781  }
782  E_INFOCONT("\n");
783  }
784 }
785 
786 static int
787 ngram_search_finish(ps_search_t *search)
788 {
789  ngram_search_t *ngs = (ngram_search_t *)search;
790 
791  ngs->n_tot_frame += ngs->n_frame;
792  if (ngs->fwdtree) {
794  /* dump_bptable(ngs); */
795 
796  /* Now do fwdflat search in its entirety, if requested. */
797  if (ngs->fwdflat) {
798  int i;
799  /* Rewind the acoustic model. */
800  if (acmod_rewind(ps_search_acmod(ngs)) < 0)
801  return -1;
802  /* Now redo search. */
803  ngram_fwdflat_start(ngs);
804  i = 0;
805  while (ps_search_acmod(ngs)->n_feat_frame > 0) {
806  int nfr;
807  if ((nfr = ngram_fwdflat_search(ngs, i)) < 0)
808  return nfr;
809  acmod_advance(ps_search_acmod(ngs));
810  ++i;
811  }
813  /* And now, we should have a result... */
814  /* dump_bptable(ngs); */
815  }
816  }
817  else if (ngs->fwdflat) {
819  }
820 
821  /* Mark the current utterance as done. */
822  ngs->done = TRUE;
823  return 0;
824 }
825 
826 static ps_latlink_t *
827 ngram_search_bestpath(ps_search_t *search, int32 *out_score, int backward)
828 {
829  ngram_search_t *ngs = (ngram_search_t *)search;
830 
831  if (search->last_link == NULL) {
832  search->last_link = ps_lattice_bestpath(search->dag, ngs->lmset,
833  ngs->bestpath_fwdtree_lw_ratio,
834  ngs->ascale);
835  if (search->last_link == NULL)
836  return NULL;
837  /* Also calculate betas so we can fill in the posterior
838  * probability field in the segmentation. */
839  if (search->post == 0)
840  search->post = ps_lattice_posterior(search->dag, ngs->lmset,
841  ngs->ascale);
842  }
843  if (out_score)
844  *out_score = search->last_link->path_scr + search->dag->final_node_ascr;
845  return search->last_link;
846 }
847 
848 static char const *
849 ngram_search_hyp(ps_search_t *search, int32 *out_score)
850 {
851  ngram_search_t *ngs = (ngram_search_t *)search;
852 
853  /* Only do bestpath search if the utterance is complete. */
854  if (ngs->bestpath && ngs->done) {
855  ps_lattice_t *dag;
856  ps_latlink_t *link;
857  char const *hyp;
858  double n_speech;
859 
860  ptmr_reset(&ngs->bestpath_perf);
861  ptmr_start(&ngs->bestpath_perf);
862  if ((dag = ngram_search_lattice(search)) == NULL)
863  return NULL;
864  if ((link = ngram_search_bestpath(search, out_score, FALSE)) == NULL)
865  return NULL;
866  hyp = ps_lattice_hyp(dag, link);
867  ptmr_stop(&ngs->bestpath_perf);
868  n_speech = (double)dag->n_frames
869  / cmd_ln_int32_r(ps_search_config(ngs), "-frate");
870  E_INFO("bestpath %.2f CPU %.3f xRT\n",
871  ngs->bestpath_perf.t_cpu,
872  ngs->bestpath_perf.t_cpu / n_speech);
873  E_INFO("bestpath %.2f wall %.3f xRT\n",
874  ngs->bestpath_perf.t_elapsed,
875  ngs->bestpath_perf.t_elapsed / n_speech);
876  return hyp;
877  }
878  else {
879  int32 bpidx;
880 
881  /* fwdtree and fwdflat use same backpointer table. */
882  bpidx = ngram_search_find_exit(ngs, -1, out_score);
883  if (bpidx != NO_BP)
884  return ngram_search_bp_hyp(ngs, bpidx);
885  }
886 
887  return NULL;
888 }
889 
890 static void
891 ngram_search_bp2itor(ps_seg_t *seg, int bp)
892 {
893  ngram_search_t *ngs = (ngram_search_t *)seg->search;
894  bptbl_t *be, *pbe;
895 
896  be = &ngs->bp_table[bp];
897  pbe = be->bp == -1 ? NULL : &ngs->bp_table[be->bp];
898  seg->word = dict_wordstr(ps_search_dict(ngs), be->wid);
899  seg->ef = be->frame;
900  seg->sf = pbe ? pbe->frame + 1 : 0;
901  seg->prob = 0; /* Bogus value... */
902  /* Compute acoustic and LM scores for this segment. */
903  if (pbe == NULL) {
904  seg->ascr = be->score;
905  seg->lscr = 0;
906  seg->lback = 0;
907  }
908  else {
909  int32 start_score;
910 
911  /* Find ending path score of previous word. */
912  start_score = ngram_search_exit_score(ngs, pbe,
913  dict_first_phone(ps_search_dict(ngs), be->wid));
914  assert(start_score BETTER_THAN WORST_SCORE);
915  if (be->wid == ps_search_silence_wid(ngs)) {
916  seg->lscr = ngs->silpen;
917  }
918  else if (dict_filler_word(ps_search_dict(ngs), be->wid)) {
919  seg->lscr = ngs->fillpen;
920  }
921  else {
922  seg->lscr = ngram_tg_score(ngs->lmset,
923  be->real_wid,
924  pbe->real_wid,
925  pbe->prev_real_wid,
926  &seg->lback)>>SENSCR_SHIFT;
927  seg->lscr = (int32)(seg->lscr * seg->lwf);
928  }
929  seg->ascr = be->score - start_score - seg->lscr;
930  }
931 }
932 
933 static void
934 ngram_bp_seg_free(ps_seg_t *seg)
935 {
936  bptbl_seg_t *itor = (bptbl_seg_t *)seg;
937 
938  ckd_free(itor->bpidx);
939  ckd_free(itor);
940 }
941 
942 static ps_seg_t *
943 ngram_bp_seg_next(ps_seg_t *seg)
944 {
945  bptbl_seg_t *itor = (bptbl_seg_t *)seg;
946 
947  if (++itor->cur == itor->n_bpidx) {
948  ngram_bp_seg_free(seg);
949  return NULL;
950  }
951 
952  ngram_search_bp2itor(seg, itor->bpidx[itor->cur]);
953  return seg;
954 }
955 
956 static ps_segfuncs_t ngram_bp_segfuncs = {
957  /* seg_next */ ngram_bp_seg_next,
958  /* seg_free */ ngram_bp_seg_free
959 };
960 
961 static ps_seg_t *
962 ngram_search_bp_iter(ngram_search_t *ngs, int bpidx, float32 lwf)
963 {
964  bptbl_seg_t *itor;
965  int bp, cur;
966 
967  /* Calling this an "iterator" is a bit of a misnomer since we have
968  * to get the entire backtrace in order to produce it. On the
969  * other hand, all we actually need is the bptbl IDs, and we can
970  * allocate a fixed-size array of them. */
971  itor = ckd_calloc(1, sizeof(*itor));
972  itor->base.vt = &ngram_bp_segfuncs;
973  itor->base.search = ps_search_base(ngs);
974  itor->base.lwf = lwf;
975  itor->n_bpidx = 0;
976  bp = bpidx;
977  while (bp != NO_BP) {
978  bptbl_t *be = &ngs->bp_table[bp];
979  bp = be->bp;
980  ++itor->n_bpidx;
981  }
982  if (itor->n_bpidx == 0) {
983  ckd_free(itor);
984  return NULL;
985  }
986  itor->bpidx = ckd_calloc(itor->n_bpidx, sizeof(*itor->bpidx));
987  cur = itor->n_bpidx - 1;
988  bp = bpidx;
989  while (bp != NO_BP) {
990  bptbl_t *be = &ngs->bp_table[bp];
991  itor->bpidx[cur] = bp;
992  bp = be->bp;
993  --cur;
994  }
995 
996  /* Fill in relevant fields for first element. */
997  ngram_search_bp2itor((ps_seg_t *)itor, itor->bpidx[0]);
998 
999  return (ps_seg_t *)itor;
1000 }
1001 
1002 static ps_seg_t *
1003 ngram_search_seg_iter(ps_search_t *search)
1004 {
1005  ngram_search_t *ngs = (ngram_search_t *)search;
1006 
1007  /* Only do bestpath search if the utterance is done. */
1008  if (ngs->bestpath && ngs->done) {
1009  ps_lattice_t *dag;
1010  ps_latlink_t *link;
1011  double n_speech;
1012  ps_seg_t *itor;
1013 
1014  ptmr_reset(&ngs->bestpath_perf);
1015  ptmr_start(&ngs->bestpath_perf);
1016  if ((dag = ngram_search_lattice(search)) == NULL)
1017  return NULL;
1018  if ((link = ngram_search_bestpath(search, NULL, TRUE)) == NULL)
1019  return NULL;
1020  itor = ps_lattice_seg_iter(dag, link,
1021  ngs->bestpath_fwdtree_lw_ratio);
1022  ptmr_stop(&ngs->bestpath_perf);
1023  n_speech = (double)dag->n_frames
1024  / cmd_ln_int32_r(ps_search_config(ngs), "-frate");
1025  E_INFO("bestpath %.2f CPU %.3f xRT\n",
1026  ngs->bestpath_perf.t_cpu,
1027  ngs->bestpath_perf.t_cpu / n_speech);
1028  E_INFO("bestpath %.2f wall %.3f xRT\n",
1029  ngs->bestpath_perf.t_elapsed,
1030  ngs->bestpath_perf.t_elapsed / n_speech);
1031  return itor;
1032  }
1033  else {
1034  int32 bpidx;
1035 
1036  /* fwdtree and fwdflat use same backpointer table. */
1037  bpidx = ngram_search_find_exit(ngs, -1, NULL);
1038  return ngram_search_bp_iter(ngs, bpidx,
1039  /* but different language weights... */
1040  (ngs->done && ngs->fwdflat)
1041  ? ngs->fwdflat_fwdtree_lw_ratio : 1.0);
1042  }
1043 
1044  return NULL;
1045 }
1046 
1047 static int32
1048 ngram_search_prob(ps_search_t *search)
1049 {
1050  ngram_search_t *ngs = (ngram_search_t *)search;
1051 
1052  /* Only do bestpath search if the utterance is done. */
1053  if (ngs->bestpath && ngs->done) {
1054  ps_lattice_t *dag;
1055  ps_latlink_t *link;
1056 
1057  if ((dag = ngram_search_lattice(search)) == NULL)
1058  return 0;
1059  if ((link = ngram_search_bestpath(search, NULL, TRUE)) == NULL)
1060  return 0;
1061  return search->post;
1062  }
1063  else {
1064  /* FIXME: Give some kind of good estimate here, eventually. */
1065  return 0;
1066  }
1067 }
1068 
1069 static void
1070 create_dag_nodes(ngram_search_t *ngs, ps_lattice_t *dag)
1071 {
1072  bptbl_t *bp_ptr;
1073  int32 i;
1074 
1075  for (i = 0, bp_ptr = ngs->bp_table; i < ngs->bpidx; ++i, ++bp_ptr) {
1076  int32 sf, ef, wid;
1077  ps_latnode_t *node;
1078 
1079  /* Skip invalid backpointers (these result from -maxwpf pruning) */
1080  if (!bp_ptr->valid)
1081  continue;
1082 
1083  sf = (bp_ptr->bp < 0) ? 0 : ngs->bp_table[bp_ptr->bp].frame + 1;
1084  ef = bp_ptr->frame;
1085  wid = bp_ptr->wid;
1086 
1087  assert(ef < dag->n_frames);
1088  /* Skip non-final </s> entries. */
1089  if ((wid == ps_search_finish_wid(ngs)) && (ef < dag->n_frames - 1))
1090  continue;
1091 
1092  /* Skip if word not in LM */
1093  if ((!dict_filler_word(ps_search_dict(ngs), wid))
1094  && (!ngram_model_set_known_wid(ngs->lmset,
1095  dict_basewid(ps_search_dict(ngs), wid))))
1096  continue;
1097 
1098  /* See if bptbl entry <wid,sf> already in lattice */
1099  for (node = dag->nodes; node; node = node->next) {
1100  if ((node->wid == wid) && (node->sf == sf))
1101  break;
1102  }
1103 
1104  /* For the moment, store bptbl indices in node.{fef,lef} */
1105  if (node)
1106  node->lef = i;
1107  else {
1108  /* New node; link to head of list */
1109  node = listelem_malloc(dag->latnode_alloc);
1110  node->wid = wid;
1111  node->sf = sf; /* This is a frame index. */
1112  node->fef = node->lef = i; /* These are backpointer indices (argh) */
1113  node->reachable = FALSE;
1114  node->entries = NULL;
1115  node->exits = NULL;
1116 
1117  /* NOTE: This creates the list of nodes in reverse
1118  * topological order, i.e. a node always precedes its
1119  * antecedents in this list. */
1120  node->next = dag->nodes;
1121  dag->nodes = node;
1122  ++dag->n_nodes;
1123  }
1124  }
1125 }
1126 
1127 static ps_latnode_t *
1128 find_start_node(ngram_search_t *ngs, ps_lattice_t *dag)
1129 {
1130  ps_latnode_t *node;
1131 
1132  /* Find start node <s>.0 */
1133  for (node = dag->nodes; node; node = node->next) {
1134  if ((node->wid == ps_search_start_wid(ngs)) && (node->sf == 0))
1135  break;
1136  }
1137  if (!node) {
1138  /* This is probably impossible. */
1139  E_ERROR("Couldn't find <s> in first frame\n");
1140  return NULL;
1141  }
1142  return node;
1143 }
1144 
1145 static ps_latnode_t *
1146 find_end_node(ngram_search_t *ngs, ps_lattice_t *dag, float32 lwf)
1147 {
1148  ps_latnode_t *node;
1149  int32 ef, bestbp, bp, bestscore;
1150 
1151  /* Find final node </s>.last_frame; nothing can follow this node */
1152  for (node = dag->nodes; node; node = node->next) {
1153  int32 lef = ngs->bp_table[node->lef].frame;
1154  if ((node->wid == ps_search_finish_wid(ngs))
1155  && (lef == dag->n_frames - 1))
1156  break;
1157  }
1158  if (node != NULL)
1159  return node;
1160 
1161  /* It is quite likely that no </s> exited in the last frame. So,
1162  * find the node corresponding to the best exit. */
1163  /* Find the last frame containing a word exit. */
1164  for (ef = dag->n_frames - 1;
1165  ef >= 0 && ngs->bp_table_idx[ef] == ngs->bpidx;
1166  --ef);
1167  if (ef < 0) {
1168  E_ERROR("Empty backpointer table: can not build DAG.\n");
1169  return NULL;
1170  }
1171 
1172  /* Find best word exit in that frame. */
1173  bestscore = WORST_SCORE;
1174  bestbp = NO_BP;
1175  for (bp = ngs->bp_table_idx[ef]; bp < ngs->bp_table_idx[ef + 1]; ++bp) {
1176  int32 n_used, l_scr, wid, prev_wid;
1177  wid = ngs->bp_table[bp].real_wid;
1178  prev_wid = ngs->bp_table[bp].prev_real_wid;
1179  /* Always prefer </s>, of which there will only be one per frame. */
1180  if (wid == ps_search_finish_wid(ngs)) {
1181  bestbp = bp;
1182  break;
1183  }
1184  l_scr = ngram_tg_score(ngs->lmset, ps_search_finish_wid(ngs),
1185  wid, prev_wid, &n_used) >>SENSCR_SHIFT;
1186  l_scr = l_scr * lwf;
1187  if (ngs->bp_table[bp].score + l_scr BETTER_THAN bestscore) {
1188  bestscore = ngs->bp_table[bp].score + l_scr;
1189  bestbp = bp;
1190  }
1191  }
1192  if (bestbp == NO_BP) {
1193  E_ERROR("No word exits found in last frame (%d), assuming no recognition\n", ef);
1194  return NULL;
1195  }
1196  E_INFO("</s> not found in last frame, using %s.%d instead\n",
1197  dict_basestr(ps_search_dict(ngs), ngs->bp_table[bestbp].wid), ef);
1198 
1199  /* Now find the node that corresponds to it. */
1200  for (node = dag->nodes; node; node = node->next) {
1201  if (node->lef == bestbp)
1202  return node;
1203  }
1204 
1205  /* FIXME: This seems to happen a lot! */
1206  E_ERROR("Failed to find DAG node corresponding to %s\n",
1207  dict_basestr(ps_search_dict(ngs), ngs->bp_table[bestbp].wid));
1208  return NULL;
1209 }
1210 
1211 /*
1212  * Build lattice from bptable.
1213  */
1214 ps_lattice_t *
1216 {
1217  int32 i, score, ascr, lscr;
1218  ps_latnode_t *node, *from, *to;
1219  ngram_search_t *ngs;
1220  ps_lattice_t *dag;
1221  int min_endfr, nlink;
1222  float lwf;
1223 
1224  ngs = (ngram_search_t *)search;
1225  min_endfr = cmd_ln_int32_r(ps_search_config(search), "-min_endfr");
1226 
1227  /* If the best score is WORST_SCORE or worse, there is no way to
1228  * make a lattice. */
1230  return NULL;
1231 
1232  /* Check to see if a lattice has previously been created over the
1233  * same number of frames, and reuse it if so. */
1234  if (search->dag && search->dag->n_frames == ngs->n_frame)
1235  return search->dag;
1236 
1237  /* Nope, create a new one. */
1238  ps_lattice_free(search->dag);
1239  search->dag = NULL;
1240  dag = ps_lattice_init_search(search, ngs->n_frame);
1241  /* Compute these such that they agree with the fwdtree language weight. */
1242  lwf = ngs->fwdflat ? ngs->fwdflat_fwdtree_lw_ratio : 1.0;
1243  create_dag_nodes(ngs, dag);
1244  if ((dag->start = find_start_node(ngs, dag)) == NULL)
1245  goto error_out;
1246  if ((dag->end = find_end_node(ngs, dag, ngs->bestpath_fwdtree_lw_ratio)) == NULL)
1247  goto error_out;
1248  E_INFO("lattice start node %s.%d end node %s.%d\n",
1249  dict_wordstr(search->dict, dag->start->wid), dag->start->sf,
1250  dict_wordstr(search->dict, dag->end->wid), dag->end->sf);
1251 
1252  ngram_compute_seg_score(ngs, ngs->bp_table + dag->end->lef, lwf,
1253  &dag->final_node_ascr, &lscr);
1254 
1255  /*
1256  * At this point, dag->nodes is ordered such that nodes earlier in
1257  * the list can follow (in time) those later in the list, but not
1258  * vice versa (see above - also note that adjacency is purely
1259  * determined by time which is why we can make this claim). Now
1260  * create precedence links and simultanesously mark all nodes that
1261  * can reach dag->end. (All nodes are reached from dag->start
1262  * simply by definition - they were created that way).
1263  *
1264  * Note that this also means that any nodes before dag->end in the
1265  * list can be discarded, meaning that dag->end will always be
1266  * equal to dag->nodes (FIXME: except when loading from a file but
1267  * we can fix that...)
1268  */
1269  i = 0;
1270  while (dag->nodes && dag->nodes != dag->end) {
1271  ps_latnode_t *next = dag->nodes->next;
1272  listelem_free(dag->latnode_alloc, dag->nodes);
1273  dag->nodes = next;
1274  ++i;
1275  }
1276  E_INFO("Eliminated %d nodes before end node\n", i);
1277  dag->end->reachable = TRUE;
1278  nlink = 0;
1279  for (to = dag->end; to; to = to->next) {
1280  int fef, lef;
1281 
1282  /* Skip if not reachable; it will never be reachable from dag->end */
1283  if (!to->reachable)
1284  continue;
1285 
1286  /* Prune nodes with too few endpoints - heuristic
1287  borrowed from Sphinx3 */
1288  fef = ngs->bp_table[to->fef].frame;
1289  lef = ngs->bp_table[to->lef].frame;
1290  if (to != dag->end && lef - fef < min_endfr) {
1291  to->reachable = FALSE;
1292  continue;
1293  }
1294 
1295  /* Find predecessors of to : from->fef+1 <= to->sf <= from->lef+1 */
1296  for (from = to->next; from; from = from->next) {
1297  bptbl_t *from_bpe;
1298 
1299  fef = ngs->bp_table[from->fef].frame;
1300  lef = ngs->bp_table[from->lef].frame;
1301 
1302  if ((to->sf <= fef) || (to->sf > lef + 1))
1303  continue;
1304  if (lef - fef < min_endfr) {
1305  assert(!from->reachable);
1306  continue;
1307  }
1308 
1309  /* Find bptable entry for "from" that exactly precedes "to" */
1310  i = from->fef;
1311  from_bpe = ngs->bp_table + i;
1312  for (; i <= from->lef; i++, from_bpe++) {
1313  if (from_bpe->wid != from->wid)
1314  continue;
1315  if (from_bpe->frame >= to->sf - 1)
1316  break;
1317  }
1318 
1319  if ((i > from->lef) || (from_bpe->frame != to->sf - 1))
1320  continue;
1321 
1322  /* Find acoustic score from.sf->to.sf-1 with right context = to */
1323  /* This gives us from_bpe's best acoustic score. */
1324  ngram_compute_seg_score(ngs, from_bpe, lwf,
1325  &ascr, &lscr);
1326  /* Now find the exact path score for from->to, including
1327  * the appropriate final triphone. In fact this might not
1328  * exist. */
1329  score = ngram_search_exit_score(ngs, from_bpe,
1330  dict_first_phone(ps_search_dict(ngs), to->wid));
1331  /* Does not exist. Can't create a link here. */
1332  if (score == WORST_SCORE)
1333  continue;
1334  /* Adjust the arc score to match the correct triphone. */
1335  else
1336  score = ascr + (score - from_bpe->score);
1337  if (score BETTER_THAN 0) {
1338  /* Scores must be negative, or Bad Things will happen.
1339  In general, they are, except in corner cases
1340  involving filler words. We don't want to throw any
1341  links away so we'll keep these, but with some
1342  arbitrarily improbable but recognizable score. */
1343  ps_lattice_link(dag, from, to, -424242, from_bpe->frame);
1344  ++nlink;
1345  from->reachable = TRUE;
1346  }
1347  else if (score BETTER_THAN WORST_SCORE) {
1348  ps_lattice_link(dag, from, to, score, from_bpe->frame);
1349  ++nlink;
1350  from->reachable = TRUE;
1351  }
1352  }
1353  }
1354 
1355  /* There must be at least one path between dag->start and dag->end */
1356  if (!dag->start->reachable) {
1357  E_ERROR("End node of lattice isolated; unreachable\n");
1358  goto error_out;
1359  }
1360 
1361  for (node = dag->nodes; node; node = node->next) {
1362  /* Change node->{fef,lef} from bptbl indices to frames. */
1363  node->fef = ngs->bp_table[node->fef].frame;
1364  node->lef = ngs->bp_table[node->lef].frame;
1365  /* Find base wid for nodes. */
1366  node->basewid = dict_basewid(search->dict, node->wid);
1367  }
1368 
1369  /* Link nodes with alternate pronunciations at the same timepoint. */
1370  for (node = dag->nodes; node; node = node->next) {
1371  ps_latnode_t *alt;
1372  /* Scan forward to find the next alternate, then stop. */
1373  for (alt = node->next; alt && alt->sf == node->sf; alt = alt->next) {
1374  if (alt->basewid == node->basewid) {
1375  alt->alt = node->alt;
1376  node->alt = alt;
1377  break;
1378  }
1379  }
1380  }
1381  E_INFO("Lattice has %d nodes, %d links\n", dag->n_nodes, nlink);
1382 
1383  /* Minor hack: If the final node is a filler word and not </s>,
1384  * then set its base word ID to </s>, so that the language model
1385  * scores won't be screwed up. */
1386  if (dict_filler_word(ps_search_dict(ngs), dag->end->wid))
1387  dag->end->basewid = ps_search_finish_wid(ngs);
1388 
1389  /* Free nodes unreachable from dag->end and their links */
1391 
1392  /* Add silprob and fillprob to corresponding links */
1393  ps_lattice_penalize_fillers(dag, ngs->silpen, ngs->fillpen);
1394 
1395  search->dag = dag;
1396  return dag;
1397 
1398 error_out:
1399  ps_lattice_free(dag);
1400  return NULL;
1401 }
1402 
1403 void ngram_search_set_lm(ngram_model_t *lm)
1404 {
1405  default_lm = ngram_model_retain(lm);
1406 }
1407 
hmm_t hmm
Basic HMM structure.
Definition: ngram_search.h:65
Internal implementation of PocketSphinx decoder.
void ngram_fwdtree_finish(ngram_search_t *ngs)
Finish fwdtree decoding for an utterance.
int16 reachable
From.
int32 n_frame_alloc
Number of frames allocated in bp_table_idx and friends.
Definition: ngram_search.h:307
int32 wid
Word index.
Definition: ngram_search.h:113
void ngram_fwdtree_deinit(ngram_search_t *ngs)
Release memory associated with fwdtree decoding.
Base structure for search module.
int32 n_nodes
Number of nodes in this lattice.
void ngram_search_alloc_all_rc(ngram_search_t *ngs, int32 w)
Allocate last phone channels for all possible right contexts for word w.
Definition: ngram_search.c:598
ps_seg_t * ps_lattice_seg_iter(ps_lattice_t *dag, ps_latlink_t *link, float32 lwf)
Get hypothesis segmentation iterator after bestpath search.
Definition: ps_lattice.c:1006
void hmm_init(hmm_context_t *ctx, hmm_t *hmm, int mpx, int ssid, int tmatid)
Populate a previously-allocated HMM structure, allocating internal data.
Definition: hmm.c:89
int acmod_rewind(acmod_t *acmod)
Rewind the current utterance, allowing it to be rescored.
Definition: acmod.c:877
int32 lef
Last end frame.
listelem_alloc_t * chan_alloc
For chan_t.
Definition: ngram_search.h:211
void ngram_fwdtree_start(ngram_search_t *ngs)
Start fwdtree decoding for an utterance.
void ngram_search_set_lm(ngram_model_t *lm)
Sets the global language model.
void ps_search_base_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
Re-initialize base structure with new dictionary.
void ps_lattice_penalize_fillers(ps_lattice_t *dag, int32 silpen, int32 fillpen)
Insert penalty for fillers.
Definition: ps_lattice.c:106
frame_idx_t frame
start or end frame
Definition: ngram_search.h:110
ps_latnode_t * start
Starting node.
uint8 *** tp
The transition matrices; kept in the same scale as acoustic scores; tp[tmatid][from-state][to-state]...
Definition: tmat.h:56
hmm_context_t * hmmctx
HMM context.
Definition: ngram_search.h:200
ps_segfuncs_t * vt
V-table of seg methods.
logmath_t * lmath
Log-math computation.
Definition: acmod.h:151
uint16 ** sseq
Unique senone sequences (2D array built at load time)
Definition: bin_mdef.h:134
void hmm_deinit(hmm_t *hmm)
Free an HMM structure, releasing internal data (but not the HMM structure itself).
Definition: hmm.c:111
int32 lscr
Language model score.
int32 n_words
Number of words known to search (may be less than in the dictionary)
int16 last2_phone
next-to-last phone of this word
Definition: ngram_search.h:120
#define BAD_S3WID
Dictionary word id.
Definition: s3types.h:90
int32 n_ssid
#Unique ssid in above, compressed ssid list
Definition: dict2pid.h:76
frame_idx_t n_frames
Number of frames for this utterance.
int ngram_fwdflat_reinit(ngram_search_t *ngs)
Rebuild search structures for updated language models.
Word graph search implementation.
bitvec_t * word_active
array of active flags for all words.
Definition: ngram_search.h:247
void ngram_fwdflat_finish(ngram_search_t *ngs)
Finish fwdflat decoding for an utterance.
ps_latnode_t * nodes
List of all nodes.
int ngram_fwdflat_search(ngram_search_t *ngs, int frame_idx)
Search one frame forward in an utterance.
int32 ngram_search_exit_score(ngram_search_t *ngs, bptbl_t *pbe, int rcphone)
Get the exit score for a backpointer entry with a given right context.
Definition: ngram_search.c:660
listelem_alloc_t * latnode_alloc
Node allocator for this DAG.
int32 prob
Log posterior probability.
latlink_list_t * entries
Links into this node.
POCKETSPHINX_EXPORT int32 ps_lattice_posterior(ps_lattice_t *dag, ngram_model_t *lmset, float32 ascale)
Calculate link posterior probabilities on a word graph.
Definition: ps_lattice.c:1446
struct ps_latnode_s * alt
Node with alternate pronunciation for this word.
char const * word
Word string (pointer into dictionary hash)
int32 ** active_word_list
Array of active multi-phone words for current and next frame.
Definition: ngram_search.h:287
struct chan_s * next
first descendant of this channel; or, in the case of the last phone of a word, the next alternative r...
Definition: ngram_search.h:68
void ngram_search_save_bp(ngram_search_t *ngs, int frame_idx, int32 w, int32 score, int32 path, int32 rc)
Enter a word in the backpointer table.
Definition: ngram_search.c:383
ps_search_t * search
Search object from whence this came.
int32 final_node_ascr
Acoustic score of implicit link exiting final node.
Lexicon tree based Viterbi search.
int ngram_search_find_exit(ngram_search_t *ngs, int frame_idx, int32 *out_best_score)
Find the best word exit for the current frame in the backpointer table.
Definition: ngram_search.c:506
void ps_search_init(ps_search_t *search, ps_searchfuncs_t *vt, const char *type, const char *name, cmd_ln_t *config, acmod_t *acmod, dict_t *dict, dict2pid_t *d2p)
Initialize base structure.
int ngram_search_mark_bptable(ngram_search_t *ngs, int frame_idx)
Record the current frame&#39;s index in the backpointer table.
Definition: ngram_search.c:329
int32 bp
Back Pointer.
Definition: ngram_search.h:114
int32 rc_id
right-context id for last phone of words
Definition: ngram_search.h:79
#define dict2pid_rssid(d, ci, lc)
Access macros; not designed for arbitrary use.
Definition: dict2pid.h:115
void ngram_fwdflat_start(ngram_search_t *ngs)
Start fwdflat decoding for an utterance.
N-Gram search module structure.
Definition: ngram_search.h:197
int ngram_fwdtree_search(ngram_search_t *ngs, int frame_idx)
Search one frame forward in an utterance.
hmm_context_t * hmm_context_init(int32 n_emit_state, uint8 **const *tp, int16 const *senscore, uint16 *const *sseq)
Create an HMM context.
Definition: hmm.c:56
void ps_search_base_free(ps_search_t *search)
Free search.
void ps_lattice_delete_unreachable(ps_lattice_t *dag)
Remove nodes marked as unreachable.
Definition: ps_lattice.c:174
ps_latnode_t * end
Ending node.
frame_idx_t sf
Start frame.
int32 real_wid
wid of this or latest predecessor real word
Definition: ngram_search.h:117
int32 prev_real_wid
wid of second-last real word
Definition: ngram_search.h:118
POCKETSPHINX_EXPORT ps_latlink_t * ps_lattice_bestpath(ps_lattice_t *dag, ngram_model_t *lmset, float32 lwf, float32 ascale)
Do N-Gram based best-path search on a word graph.
Definition: ps_lattice.c:1215
ps_lattice_t * ngram_search_lattice(ps_search_t *search)
Construct a word lattice from the current hypothesis.
latlink_list_t * exits
Links out of this node.
#define WORST_SCORE
Large &quot;bad&quot; score.
Definition: hmm.h:84
N-Gram based multi-pass search (&quot;FBS&quot;)
tmat_t * tmat
Transition matrices.
Definition: acmod.h:160
frame_idx_t ef
End frame.
int32 ascr
Acoustic score.
int acmod_advance(acmod_t *acmod)
Advance the frame index.
Definition: acmod.c:899
listelem_alloc_t * latnode_alloc
For latnode_t.
Definition: ngram_search.h:213
int dict_filler_word(dict_t *d, s3wid_t w)
Return 1 if w is a filler word, 0 if not.
Definition: dict.c:413
void ngram_fwdtree_init(ngram_search_t *ngs)
Initialize N-Gram search for fwdtree decoding.
Segmentation &quot;iterator&quot; for backpointer table results.
Definition: ngram_search.h:126
ps_latnode_t ** frm_wordlist
List of active words in each frame.
Definition: ngram_search.h:316
Lexical tree node data type for the first phone (root) of each dynamic HMM tree structure.
Definition: ngram_search.h:90
Lexical tree node data type.
Definition: ngram_search.h:64
int32 wid
Dictionary word id.
int16 cur
Current position in bpidx.
Definition: ngram_search.h:130
#define SENSCR_SHIFT
Shift count for senone scores.
Definition: hmm.h:73
a structure for a dictionary.
Definition: dict.h:76
char const * ps_lattice_hyp(ps_lattice_t *dag, ps_latlink_t *link)
Get hypothesis string after bestpath search.
Definition: ps_lattice.c:830
POCKETSPHINX_EXPORT int dict_real_word(dict_t *d, s3wid_t w)
Test if w is a &quot;real&quot; word, i.e.
Definition: dict.c:427
void ngram_search_free(ps_search_t *search)
Finalize the N-Gram search module.
Definition: ngram_search.c:289
Word graph structure used in bestpath/nbest search.
#define WORSE_THAN
Is one score worse than another?
Definition: hmm.h:100
int16 n_bpidx
Number of backpointer IDs.
Definition: ngram_search.h:129
int32 best_score
Best Viterbi path score.
Definition: ngram_search.h:325
Back pointer table (forward pass lattice; actually a tree)
Definition: ngram_search.h:109
cross word triphone model structure
Definition: dict2pid.h:73
int ngram_fwdtree_reinit(ngram_search_t *ngs)
Rebuild search structures for updated language models.
void ngram_search_free_all_rc(ngram_search_t *ngs, int32 w)
Allocate last phone channels for all possible right contexts for word w.
Definition: ngram_search.c:647
int32 post
Utterance posterior probability.
char * hyp_str
Current hypothesis string.
#define BETTER_THAN
Is one score better than another?
Definition: hmm.h:95
dict_t * dict
Pronunciation dictionary.
void ngram_fwdflat_deinit(ngram_search_t *ngs)
Release memory associated with fwdflat decoding.
int32 s_idx
Start of BScoreStack for various right contexts.
Definition: ngram_search.h:116
int32 fef
First end frame.
int32 n_frame
Number of frames actually present.
Definition: ngram_search.h:308
Flat lexicon based Viterbi search.
ngram_model_t * lmset
Set of language models.
Definition: ngram_search.h:199
uint8 valid
For absolute pruning.
Definition: ngram_search.h:111
int32 lback
Language model backoff.
listelem_alloc_t * root_chan_alloc
For root_chan_t.
Definition: ngram_search.h:212
int32 basewid
Dictionary base word id.
int32 ciphone
ciphone for this node
Definition: ngram_search.h:73
void ngram_fwdflat_init(ngram_search_t *ngs)
Initialize N-Gram search for fwdflat decoding.
ps_lattice_t * ps_lattice_init_search(ps_search_t *search, int n_frame)
Construct an empty word graph with reference to a search structure.
Definition: ps_lattice.c:639
void hmm_context_free(hmm_context_t *ctx)
Free an HMM context.
Definition: hmm.c:80
int32 * bpidx
Sequence of backpointer IDs.
Definition: ngram_search.h:128
chan_t ** word_chan
Channels associated with a given word (only used for right contexts, single-phone words in fwdtree se...
Definition: ngram_search.h:246
bin_mdef_t * mdef
Model definition.
Definition: acmod.h:159
ps_latlink_t * last_link
Final link in best path.
struct ps_latnode_s * next
Next node in DAG (no ordering implied)
int32 score
Score (best among all right contexts)
Definition: ngram_search.h:115
V-table for search algorithm.
ps_lattice_t * dag
Current hypothesis word graph.
Base structure for hypothesis segmentation iterator.
#define dict_size(d)
Packaged macro access to dictionary members.
Definition: dict.h:151
s3cipid_t * cimap
Index into ssid[] above for each ci phone.
Definition: dict2pid.h:75
POCKETSPHINX_EXPORT int ps_lattice_free(ps_lattice_t *dag)
Free a lattice.
Definition: ps_lattice.c:665
ps_seg_t base
Base structure.
Definition: ngram_search.h:127
float32 ascale
Acoustic score scale for posterior probabilities.
Definition: ngram_search.h:333
ps_search_t * ngram_search_init(const char *name, ngram_model_t *lm, cmd_ln_t *config, acmod_t *acmod, dict_t *dict, dict2pid_t *d2p)
Initialize the N-Gram search module.
Definition: ngram_search.c:140
Acoustic model structure.
Definition: acmod.h:148
float32 lwf
Language weight factor (for second-pass searches)
Building composite triphone (as well as word internal triphones) with the dictionary.
Definition: dict2pid.h:84
int acmod_set_grow(acmod_t *acmod, int grow_feat)
Set memory allocation policy for utterance processing.
Definition: acmod.c:410
POCKETSPHINX_EXPORT void ps_lattice_link(ps_lattice_t *dag, ps_latnode_t *from, ps_latnode_t *to, int32 score, int32 ef)
Create a directed link between &quot;from&quot; and &quot;to&quot; nodes, but if a link already exists, choose one with the best link_scr.
Definition: ps_lattice.c:65
s3ssid_t * ssid
Senone Sequence ID list for all context ciphones.
Definition: dict2pid.h:74
frame_idx_t sf
Start frame.
int16 last_phone
last phone of this word
Definition: ngram_search.h:119
char const * ngram_search_bp_hyp(ngram_search_t *ngs, int bpidx)
Backtrace from a given backpointer index to obtain a word hypothesis.
Definition: ngram_search.c:550