PocketSphinx  5prealpha
ngram_search_fwdtree.c
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 2008 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
42 /* System headers. */
43 #include <string.h>
44 #include <assert.h>
45 
46 /* SphinxBase headers. */
47 #include <sphinxbase/ckd_alloc.h>
48 #include <sphinxbase/listelem_alloc.h>
49 #include <sphinxbase/err.h>
50 
51 /* Local headers. */
52 #include "ngram_search_fwdtree.h"
53 #include "phone_loop_search.h"
54 
55 /* Turn this on to dump channels for debugging */
56 #define __CHAN_DUMP__ 0
57 #if __CHAN_DUMP__
58 #define chan_v_eval(chan) hmm_dump_vit_eval(&(chan)->hmm, stderr)
59 #else
60 #define chan_v_eval(chan) hmm_vit_eval(&(chan)->hmm)
61 #endif
62 
63 /*
64  * Allocate that part of the search channel tree structure that is independent of the
65  * LM in use.
66  */
67 static void
68 init_search_tree(ngram_search_t *ngs)
69 {
70  int32 w, ndiph, i, n_words, n_ci;
71  dict_t *dict = ps_search_dict(ngs);
72  bitvec_t *dimap;
73 
74  E_INFO("Initializing search tree\n");
75 
76  n_words = ps_search_n_words(ngs);
77  ngs->homophone_set = ckd_calloc(n_words, sizeof(*ngs->homophone_set));
78 
79  /* Find #single phone words, and #unique first diphones (#root channels) in dict. */
80  ndiph = 0;
81  ngs->n_1ph_words = 0;
82  n_ci = bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef);
83  /* Allocate a bitvector with flags for each possible diphone. */
84  dimap = bitvec_alloc(n_ci * n_ci);
85  for (w = 0; w < n_words; w++) {
86  if (!dict_real_word(dict, w))
87  continue;
88  if (dict_is_single_phone(dict, w))
89  ++ngs->n_1ph_words;
90  else {
91  int ph0, ph1;
92  ph0 = dict_first_phone(dict, w);
93  ph1 = dict_second_phone(dict, w);
94  /* Increment ndiph the first time we see a diphone. */
95  if (bitvec_is_clear(dimap, ph0 * n_ci + ph1)) {
96  bitvec_set(dimap, ph0 * n_ci + ph1);
97  ++ndiph;
98  }
99  }
100  }
101  E_INFO("%d unique initial diphones\n", ndiph);
102  bitvec_free(dimap);
103 
104  /* Add remaining dict words (</s>, <s>, <sil>, noise words) to single-phone words */
105  ngs->n_1ph_words += dict_num_fillers(dict) + 2;
106  ngs->n_root_chan_alloc = ndiph + 1;
107  /* Verify that these are all *actually* single-phone words,
108  * otherwise really bad things will happen to us. */
109  for (w = 0; w < n_words; ++w) {
110  if (dict_real_word(dict, w))
111  continue;
112  if (!dict_is_single_phone(dict, w)) {
113  E_WARN("Filler word %d = %s has more than one phone, ignoring it.\n",
114  w, dict_wordstr(dict, w));
115  --ngs->n_1ph_words;
116  }
117  }
118 
119  /* Allocate and initialize root channels */
120  ngs->root_chan =
121  ckd_calloc(ngs->n_root_chan_alloc, sizeof(*ngs->root_chan));
122  for (i = 0; i < ngs->n_root_chan_alloc; i++) {
123  hmm_init(ngs->hmmctx, &ngs->root_chan[i].hmm, TRUE, -1, -1);
124  ngs->root_chan[i].penult_phn_wid = -1;
125  ngs->root_chan[i].next = NULL;
126  }
127 
128  /* Permanently allocate and initialize channels for single-phone
129  * words (1/word). */
130  ngs->rhmm_1ph = ckd_calloc(ngs->n_1ph_words, sizeof(*ngs->rhmm_1ph));
131  i = 0;
132  for (w = 0; w < n_words; w++) {
133  if (!dict_is_single_phone(dict, w))
134  continue;
135  /* Use SIL as right context for these. */
136  ngs->rhmm_1ph[i].ci2phone = bin_mdef_silphone(ps_search_acmod(ngs)->mdef);
137  ngs->rhmm_1ph[i].ciphone = dict_first_phone(dict, w);
138  hmm_init(ngs->hmmctx, &ngs->rhmm_1ph[i].hmm, TRUE,
139  bin_mdef_pid2ssid(ps_search_acmod(ngs)->mdef, ngs->rhmm_1ph[i].ciphone),
140  bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, ngs->rhmm_1ph[i].ciphone));
141  ngs->rhmm_1ph[i].next = NULL;
142 
143  ngs->word_chan[w] = (chan_t *) &(ngs->rhmm_1ph[i]);
144  i++;
145  }
146 
147  ngs->single_phone_wid = ckd_calloc(ngs->n_1ph_words,
148  sizeof(*ngs->single_phone_wid));
149 }
150 
151 /*
152  * One-time initialization of internal channels in HMM tree.
153  */
154 static void
155 init_nonroot_chan(ngram_search_t *ngs, chan_t * hmm, int32 ph, int32 ci, int32 tmatid)
156 {
157  hmm->next = NULL;
158  hmm->alt = NULL;
159  hmm->info.penult_phn_wid = -1;
160  hmm->ciphone = ci;
161  hmm_init(ngs->hmmctx, &hmm->hmm, FALSE, ph, tmatid);
162 }
163 
164 /*
165  * Allocate and initialize search channel-tree structure.
166  * At this point, all the root-channels have been allocated and partly initialized
167  * (as per init_search_tree()), and channels for all the single-phone words have been
168  * allocated and initialized. None of the interior channels of search-trees have
169  * been allocated.
170  * This routine may be called on every utterance, after reinit_search_tree() clears
171  * the search tree created for the previous utterance. Meant for reconfiguring the
172  * search tree to suit the currently active LM.
173  */
174 static void
175 create_search_channels(ngram_search_t *ngs)
176 {
177  chan_t *hmm;
178  root_chan_t *rhmm;
179  int32 w, i, j, p, ph, tmatid;
180  int32 n_words;
181  dict_t *dict = ps_search_dict(ngs);
182  dict2pid_t *d2p = ps_search_dict2pid(ngs);
183 
184  n_words = ps_search_n_words(ngs);
185 
186  E_INFO("Creating search channels\n");
187 
188  for (w = 0; w < n_words; w++)
189  ngs->homophone_set[w] = -1;
190 
191  ngs->n_1ph_LMwords = 0;
192  ngs->n_root_chan = 0;
193  ngs->n_nonroot_chan = 0;
194 
195  for (w = 0; w < n_words; w++) {
196  int ciphone, ci2phone;
197 
198  /* Ignore dictionary words not in LM */
199  if (!ngram_model_set_known_wid(ngs->lmset, dict_basewid(dict, w)))
200  continue;
201 
202  /* Handle single-phone words individually; not in channel tree */
203  if (dict_is_single_phone(dict, w)) {
204  E_DEBUG(1,("single_phone_wid[%d] = %s\n",
205  ngs->n_1ph_LMwords, dict_wordstr(dict, w)));
206  ngs->single_phone_wid[ngs->n_1ph_LMwords++] = w;
207  continue;
208  }
209 
210  /* Find a root channel matching the initial diphone, or
211  * allocate one if not found. */
212  ciphone = dict_first_phone(dict, w);
213  ci2phone = dict_second_phone(dict, w);
214  for (i = 0; i < ngs->n_root_chan; ++i) {
215  if (ngs->root_chan[i].ciphone == ciphone
216  && ngs->root_chan[i].ci2phone == ci2phone)
217  break;
218  }
219  if (i == ngs->n_root_chan) {
220  rhmm = &(ngs->root_chan[ngs->n_root_chan]);
221  rhmm->hmm.tmatid = bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, ciphone);
222  /* Begin with CI phone? Not sure this makes a difference... */
223  hmm_mpx_ssid(&rhmm->hmm, 0) =
224  bin_mdef_pid2ssid(ps_search_acmod(ngs)->mdef, ciphone);
225  rhmm->ciphone = ciphone;
226  rhmm->ci2phone = ci2phone;
227  ngs->n_root_chan++;
228  }
229  else
230  rhmm = &(ngs->root_chan[i]);
231 
232  E_DEBUG(3,("word %s rhmm %d\n", dict_wordstr(dict, w), rhmm - ngs->root_chan));
233  /* Now, rhmm = root channel for w. Go on to remaining phones */
234  if (dict_pronlen(dict, w) == 2) {
235  /* Next phone is the last; not kept in tree; add w to penult_phn_wid set */
236  if ((j = rhmm->penult_phn_wid) < 0)
237  rhmm->penult_phn_wid = w;
238  else {
239  for (; ngs->homophone_set[j] >= 0; j = ngs->homophone_set[j]);
240  ngs->homophone_set[j] = w;
241  }
242  }
243  else {
244  /* Add remaining phones, except the last, to tree */
245  ph = dict2pid_internal(d2p, w, 1);
246  tmatid = bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, dict_pron(dict, w, 1));
247  hmm = rhmm->next;
248  if (hmm == NULL) {
249  rhmm->next = hmm = listelem_malloc(ngs->chan_alloc);
250  init_nonroot_chan(ngs, hmm, ph, dict_pron(dict, w, 1), tmatid);
251  ngs->n_nonroot_chan++;
252  }
253  else {
254  chan_t *prev_hmm = NULL;
255 
256  for (; hmm && (hmm_nonmpx_ssid(&hmm->hmm) != ph); hmm = hmm->alt)
257  prev_hmm = hmm;
258  if (!hmm) { /* thanks, rkm! */
259  prev_hmm->alt = hmm = listelem_malloc(ngs->chan_alloc);
260  init_nonroot_chan(ngs, hmm, ph, dict_pron(dict, w, 1), tmatid);
261  ngs->n_nonroot_chan++;
262  }
263  }
264  E_DEBUG(3,("phone %s = %d\n",
265  bin_mdef_ciphone_str(ps_search_acmod(ngs)->mdef,
266  dict_second_phone(dict, w)), ph));
267  for (p = 2; p < dict_pronlen(dict, w) - 1; p++) {
268  ph = dict2pid_internal(d2p, w, p);
269  tmatid = bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, dict_pron(dict, w, p));
270  if (!hmm->next) {
271  hmm->next = listelem_malloc(ngs->chan_alloc);
272  hmm = hmm->next;
273  init_nonroot_chan(ngs, hmm, ph, dict_pron(dict, w, p), tmatid);
274  ngs->n_nonroot_chan++;
275  }
276  else {
277  chan_t *prev_hmm = NULL;
278 
279  for (hmm = hmm->next; hmm && (hmm_nonmpx_ssid(&hmm->hmm) != ph);
280  hmm = hmm->alt)
281  prev_hmm = hmm;
282  if (!hmm) { /* thanks, rkm! */
283  prev_hmm->alt = hmm = listelem_malloc(ngs->chan_alloc);
284  init_nonroot_chan(ngs, hmm, ph, dict_pron(dict, w, p), tmatid);
285  ngs->n_nonroot_chan++;
286  }
287  }
288  E_DEBUG(3,("phone %s = %d\n",
289  bin_mdef_ciphone_str(ps_search_acmod(ngs)->mdef,
290  dict_pron(dict, w, p)), ph));
291  }
292 
293  /* All but last phone of w in tree; add w to hmm->info.penult_phn_wid set */
294  if ((j = hmm->info.penult_phn_wid) < 0)
295  hmm->info.penult_phn_wid = w;
296  else {
297  for (; ngs->homophone_set[j] >= 0; j = ngs->homophone_set[j]);
298  ngs->homophone_set[j] = w;
299  }
300  }
301  }
302 
303  ngs->n_1ph_words = ngs->n_1ph_LMwords;
304 
305  /* Add filler words to the array of 1ph words. */
306  for (w = 0; w < n_words; ++w) {
307  /* Skip anything that doesn't actually have a single phone. */
308  if (!dict_is_single_phone(dict, w))
309  continue;
310  /* Also skip "real words" and things that are in the LM. */
311  if (dict_real_word(dict, w))
312  continue;
313  if (ngram_model_set_known_wid(ngs->lmset, dict_basewid(dict, w)))
314  continue;
315  E_DEBUG(1,("single_phone_wid[%d] = %s\n",
316  ngs->n_1ph_words, dict_wordstr(dict, w)));
317  ngs->single_phone_wid[ngs->n_1ph_words++] = w;
318  }
319 
320  if (ngs->n_nonroot_chan >= ngs->max_nonroot_chan) {
321  /* Give some room for channels for new words added dynamically at run time */
322  ngs->max_nonroot_chan = ngs->n_nonroot_chan + 128;
323  E_INFO("Max nonroot chan increased to %d\n", ngs->max_nonroot_chan);
324 
325  /* Free old active channel list array if any and allocate new one */
326  if (ngs->active_chan_list)
327  ckd_free_2d(ngs->active_chan_list);
328  ngs->active_chan_list = ckd_calloc_2d(2, ngs->max_nonroot_chan,
329  sizeof(**ngs->active_chan_list));
330  }
331 
332  E_INFO("Created %d root, %d non-root channels, %d single-phone words\n",
333  ngs->n_root_chan, ngs->n_nonroot_chan, ngs->n_1ph_words);
334 
335  if (ngs->n_root_chan + ngs->n_1ph_words == 0)
336  E_ERROR("No word from the language model has pronunciation in the dictionary\n");
337 }
338 
339 static void
340 reinit_search_subtree(ngram_search_t *ngs, chan_t * hmm)
341 {
342  chan_t *child, *sibling;
343 
344  /* First free all children under hmm */
345  for (child = hmm->next; child; child = sibling) {
346  sibling = child->alt;
347  reinit_search_subtree(ngs, child);
348  }
349 
350  /* Now free hmm */
351  hmm_deinit(&hmm->hmm);
352  listelem_free(ngs->chan_alloc, hmm);
353 }
354 
355 /*
356  * Delete search tree by freeing all interior channels within search tree and
357  * restoring root channel state to the init state (i.e., just after init_search_tree()).
358  */
359 static void
360 reinit_search_tree(ngram_search_t *ngs)
361 {
362  int32 i;
363  chan_t *hmm, *sibling;
364 
365  for (i = 0; i < ngs->n_root_chan; i++) {
366  hmm = ngs->root_chan[i].next;
367 
368  while (hmm) {
369  sibling = hmm->alt;
370  reinit_search_subtree(ngs, hmm);
371  hmm = sibling;
372  }
373 
374  ngs->root_chan[i].penult_phn_wid = -1;
375  ngs->root_chan[i].next = NULL;
376  }
377  ngs->n_nonroot_chan = 0;
378 }
379 
380 void
382 {
383  /* Allocate bestbp_rc, lastphn_cand, last_ltrans */
384  ngs->bestbp_rc = ckd_calloc(bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef),
385  sizeof(*ngs->bestbp_rc));
386  ngs->lastphn_cand = ckd_calloc(ps_search_n_words(ngs),
387  sizeof(*ngs->lastphn_cand));
388  init_search_tree(ngs);
389  create_search_channels(ngs);
390 }
391 
392 static void
393 deinit_search_tree(ngram_search_t *ngs)
394 {
395  int i, w, n_words;
396 
397  n_words = ps_search_n_words(ngs);
398  for (i = 0; i < ngs->n_root_chan_alloc; i++) {
399  hmm_deinit(&ngs->root_chan[i].hmm);
400  }
401  if (ngs->rhmm_1ph) {
402  for (i = w = 0; w < n_words; ++w) {
403  if (!dict_is_single_phone(ps_search_dict(ngs), w))
404  continue;
405  hmm_deinit(&ngs->rhmm_1ph[i].hmm);
406  ++i;
407  }
408  ckd_free(ngs->rhmm_1ph);
409  ngs->rhmm_1ph = NULL;
410  }
411  ngs->n_root_chan = 0;
412  ngs->n_root_chan_alloc = 0;
413  ckd_free(ngs->root_chan);
414  ngs->root_chan = NULL;
415  ckd_free(ngs->single_phone_wid);
416  ngs->single_phone_wid = NULL;
417  ckd_free(ngs->homophone_set);
418  ngs->homophone_set = NULL;
419 }
420 
421 void
423 {
424  double n_speech = (double)ngs->n_tot_frame
425  / cmd_ln_int32_r(ps_search_config(ngs), "-frate");
426 
427  E_INFO("TOTAL fwdtree %.2f CPU %.3f xRT\n",
428  ngs->fwdtree_perf.t_tot_cpu,
429  ngs->fwdtree_perf.t_tot_cpu / n_speech);
430  E_INFO("TOTAL fwdtree %.2f wall %.3f xRT\n",
431  ngs->fwdtree_perf.t_tot_elapsed,
432  ngs->fwdtree_perf.t_tot_elapsed / n_speech);
433 
434  /* Reset non-root channels. */
435  reinit_search_tree(ngs);
436  /* Free the search tree. */
437  deinit_search_tree(ngs);
438  /* Free other stuff. */
439  ngs->max_nonroot_chan = 0;
440  ckd_free_2d(ngs->active_chan_list);
441  ngs->active_chan_list = NULL;
442  ckd_free(ngs->cand_sf);
443  ngs->cand_sf = NULL;
444  ckd_free(ngs->bestbp_rc);
445  ngs->bestbp_rc = NULL;
446  ckd_free(ngs->lastphn_cand);
447  ngs->lastphn_cand = NULL;
448 }
449 
450 int
452 {
453  /* Reset non-root channels. */
454  reinit_search_tree(ngs);
455  /* Free the search tree. */
456  deinit_search_tree(ngs);
457  /* Reallocate things that depend on the number of words. */
458  ckd_free(ngs->lastphn_cand);
459  ngs->lastphn_cand = ckd_calloc(ps_search_n_words(ngs),
460  sizeof(*ngs->lastphn_cand));
461  ckd_free(ngs->word_chan);
462  ngs->word_chan = ckd_calloc(ps_search_n_words(ngs),
463  sizeof(*ngs->word_chan));
464  /* Rebuild the search tree. */
465  init_search_tree(ngs);
466  create_search_channels(ngs);
467  return 0;
468 }
469 
470 void
472 {
473  ps_search_t *base = (ps_search_t *)ngs;
474  int32 i, w, n_words;
475  root_chan_t *rhmm;
476 
477  n_words = ps_search_n_words(ngs);
478 
479  /* Reset utterance statistics. */
480  memset(&ngs->st, 0, sizeof(ngs->st));
481  ptmr_reset(&ngs->fwdtree_perf);
482  ptmr_start(&ngs->fwdtree_perf);
483 
484  /* Reset backpointer table. */
485  ngs->bpidx = 0;
486  ngs->bss_head = 0;
487 
488  /* Reset word lattice. */
489  for (i = 0; i < n_words; ++i)
490  ngs->word_lat_idx[i] = NO_BP;
491 
492  /* Reset active HMM and word lists. */
493  ngs->n_active_chan[0] = ngs->n_active_chan[1] = 0;
494  ngs->n_active_word[0] = ngs->n_active_word[1] = 0;
495 
496  /* Reset scores. */
497  ngs->best_score = 0;
498  ngs->renormalized = 0;
499 
500  /* Reset other stuff. */
501  for (i = 0; i < n_words; i++)
502  ngs->last_ltrans[i].sf = -1;
503  ngs->n_frame = 0;
504 
505  /* Clear the hypothesis string. */
506  ckd_free(base->hyp_str);
507  base->hyp_str = NULL;
508 
509  /* Reset the permanently allocated single-phone words, since they
510  * may have junk left over in them from FWDFLAT. */
511  for (i = 0; i < ngs->n_1ph_words; i++) {
512  w = ngs->single_phone_wid[i];
513  rhmm = (root_chan_t *) ngs->word_chan[w];
514  hmm_clear(&rhmm->hmm);
515  }
516 
517  /* Start search with <s>; word_chan[<s>] is permanently allocated */
518  rhmm = (root_chan_t *) ngs->word_chan[dict_startwid(ps_search_dict(ngs))];
519  hmm_clear(&rhmm->hmm);
520  hmm_enter(&rhmm->hmm, 0, NO_BP, 0);
521 }
522 
523 /*
524  * Mark the active senones for all senones belonging to channels that are active in the
525  * current frame.
526  */
527 static void
528 compute_sen_active(ngram_search_t *ngs, int frame_idx)
529 {
530  root_chan_t *rhmm;
531  chan_t *hmm, **acl;
532  int32 i, w, *awl;
533 
534  acmod_clear_active(ps_search_acmod(ngs));
535 
536  /* Flag active senones for root channels */
537  for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
538  if (hmm_frame(&rhmm->hmm) == frame_idx)
539  acmod_activate_hmm(ps_search_acmod(ngs), &rhmm->hmm);
540  }
541 
542  /* Flag active senones for nonroot channels in HMM tree */
543  i = ngs->n_active_chan[frame_idx & 0x1];
544  acl = ngs->active_chan_list[frame_idx & 0x1];
545  for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
546  acmod_activate_hmm(ps_search_acmod(ngs), &hmm->hmm);
547  }
548 
549  /* Flag active senones for individual word channels */
550  i = ngs->n_active_word[frame_idx & 0x1];
551  awl = ngs->active_word_list[frame_idx & 0x1];
552  for (w = *(awl++); i > 0; --i, w = *(awl++)) {
553  for (hmm = ngs->word_chan[w]; hmm; hmm = hmm->next) {
554  acmod_activate_hmm(ps_search_acmod(ngs), &hmm->hmm);
555  }
556  }
557  for (i = 0; i < ngs->n_1ph_words; i++) {
558  w = ngs->single_phone_wid[i];
559  rhmm = (root_chan_t *) ngs->word_chan[w];
560 
561  if (hmm_frame(&rhmm->hmm) == frame_idx)
562  acmod_activate_hmm(ps_search_acmod(ngs), &rhmm->hmm);
563  }
564 }
565 
566 static void
567 renormalize_scores(ngram_search_t *ngs, int frame_idx, int32 norm)
568 {
569  root_chan_t *rhmm;
570  chan_t *hmm, **acl;
571  int32 i, w, *awl;
572 
573  /* Renormalize root channels */
574  for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
575  if (hmm_frame(&rhmm->hmm) == frame_idx) {
576  hmm_normalize(&rhmm->hmm, norm);
577  }
578  }
579 
580  /* Renormalize nonroot channels in HMM tree */
581  i = ngs->n_active_chan[frame_idx & 0x1];
582  acl = ngs->active_chan_list[frame_idx & 0x1];
583  for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
584  hmm_normalize(&hmm->hmm, norm);
585  }
586 
587  /* Renormalize individual word channels */
588  i = ngs->n_active_word[frame_idx & 0x1];
589  awl = ngs->active_word_list[frame_idx & 0x1];
590  for (w = *(awl++); i > 0; --i, w = *(awl++)) {
591  for (hmm = ngs->word_chan[w]; hmm; hmm = hmm->next) {
592  hmm_normalize(&hmm->hmm, norm);
593  }
594  }
595  for (i = 0; i < ngs->n_1ph_words; i++) {
596  w = ngs->single_phone_wid[i];
597  rhmm = (root_chan_t *) ngs->word_chan[w];
598  if (hmm_frame(&rhmm->hmm) == frame_idx) {
599  hmm_normalize(&rhmm->hmm, norm);
600  }
601  }
602 
603  ngs->renormalized = TRUE;
604 }
605 
606 static int32
607 eval_root_chan(ngram_search_t *ngs, int frame_idx)
608 {
609  root_chan_t *rhmm;
610  int32 i, bestscore;
611 
612  bestscore = WORST_SCORE;
613  for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
614  if (hmm_frame(&rhmm->hmm) == frame_idx) {
615  int32 score = chan_v_eval(rhmm);
616  if (score BETTER_THAN bestscore)
617  bestscore = score;
618  ++ngs->st.n_root_chan_eval;
619  }
620  }
621  return (bestscore);
622 }
623 
624 static int32
625 eval_nonroot_chan(ngram_search_t *ngs, int frame_idx)
626 {
627  chan_t *hmm, **acl;
628  int32 i, bestscore;
629 
630  i = ngs->n_active_chan[frame_idx & 0x1];
631  acl = ngs->active_chan_list[frame_idx & 0x1];
632  bestscore = WORST_SCORE;
633  ngs->st.n_nonroot_chan_eval += i;
634 
635  for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
636  int32 score = chan_v_eval(hmm);
637  assert(hmm_frame(&hmm->hmm) == frame_idx);
638  if (score BETTER_THAN bestscore)
639  bestscore = score;
640  }
641 
642  return bestscore;
643 }
644 
645 static int32
646 eval_word_chan(ngram_search_t *ngs, int frame_idx)
647 {
648  root_chan_t *rhmm;
649  chan_t *hmm;
650  int32 i, w, bestscore, *awl, j, k;
651 
652  k = 0;
653  bestscore = WORST_SCORE;
654  awl = ngs->active_word_list[frame_idx & 0x1];
655 
656  i = ngs->n_active_word[frame_idx & 0x1];
657  for (w = *(awl++); i > 0; --i, w = *(awl++)) {
658  assert(bitvec_is_set(ngs->word_active, w));
659  bitvec_clear(ngs->word_active, w);
660  assert(ngs->word_chan[w] != NULL);
661 
662  for (hmm = ngs->word_chan[w]; hmm; hmm = hmm->next) {
663  int32 score;
664 
665  assert(hmm_frame(&hmm->hmm) == frame_idx);
666  score = chan_v_eval(hmm);
667  /*printf("eval word chan %d score %d\n", w, score); */
668 
669  if (score BETTER_THAN bestscore)
670  bestscore = score;
671 
672  k++;
673  }
674  }
675 
676  /* Similarly for statically allocated single-phone words */
677  j = 0;
678  for (i = 0; i < ngs->n_1ph_words; i++) {
679  int32 score;
680 
681  w = ngs->single_phone_wid[i];
682  rhmm = (root_chan_t *) ngs->word_chan[w];
683  if (hmm_frame(&rhmm->hmm) < frame_idx)
684  continue;
685 
686  score = chan_v_eval(rhmm);
687  /* printf("eval 1ph word chan %d score %d\n", w, score); */
688  if (score BETTER_THAN bestscore && w != ps_search_finish_wid(ngs))
689  bestscore = score;
690 
691  j++;
692  }
693 
694  ngs->st.n_last_chan_eval += k + j;
695  ngs->st.n_nonroot_chan_eval += k + j;
696  ngs->st.n_word_lastchan_eval +=
697  ngs->n_active_word[frame_idx & 0x1] + j;
698 
699  return bestscore;
700 }
701 
702 static int32
703 evaluate_channels(ngram_search_t *ngs, int16 const *senone_scores, int frame_idx)
704 {
705  int32 bs;
706 
707  hmm_context_set_senscore(ngs->hmmctx, senone_scores);
708  ngs->best_score = eval_root_chan(ngs, frame_idx);
709  if ((bs = eval_nonroot_chan(ngs, frame_idx)) BETTER_THAN ngs->best_score)
710  ngs->best_score = bs;
711  if ((bs = eval_word_chan(ngs, frame_idx)) BETTER_THAN ngs->best_score)
712  ngs->best_score = bs;
713  ngs->last_phone_best_score = bs;
714 
715  return ngs->best_score;
716 }
717 
718 /*
719  * Prune currently active root channels for next frame. Also, perform exit
720  * transitions out of them and activate successors.
721  * score[] of pruned root chan set to WORST_SCORE elsewhere.
722  */
723 static void
724 prune_root_chan(ngram_search_t *ngs, int frame_idx)
725 {
726  root_chan_t *rhmm;
727  chan_t *hmm;
728  int32 i, nf, w;
729  int32 thresh, newphone_thresh, lastphn_thresh, newphone_score;
730  chan_t **nacl; /* next active list */
731  lastphn_cand_t *candp;
732  phone_loop_search_t *pls;
733 
734  nf = frame_idx + 1;
735  thresh = ngs->best_score + ngs->dynamic_beam;
736  newphone_thresh = ngs->best_score + ngs->pbeam;
737  lastphn_thresh = ngs->best_score + ngs->lpbeam;
738  nacl = ngs->active_chan_list[nf & 0x1];
739  pls = (phone_loop_search_t *)ps_search_lookahead(ngs);
740 
741  for (i = 0, rhmm = ngs->root_chan; i < ngs->n_root_chan; i++, rhmm++) {
742  E_DEBUG(3,("Root channel %d frame %d score %d thresh %d\n",
743  i, hmm_frame(&rhmm->hmm), hmm_bestscore(&rhmm->hmm), thresh));
744  /* First check if this channel was active in current frame */
745  if (hmm_frame(&rhmm->hmm) < frame_idx)
746  continue;
747 
748  if (hmm_bestscore(&rhmm->hmm) BETTER_THAN thresh) {
749  hmm_frame(&rhmm->hmm) = nf; /* rhmm will be active in next frame */
750  E_DEBUG(3,("Preserving root channel %d score %d\n", i, hmm_bestscore(&rhmm->hmm)));
751  /* transitions out of this root channel */
752  /* transition to all next-level channels in the HMM tree */
753  newphone_score = hmm_out_score(&rhmm->hmm) + ngs->pip;
754  if (pls != NULL || newphone_score BETTER_THAN newphone_thresh) {
755  for (hmm = rhmm->next; hmm; hmm = hmm->alt) {
756  int32 pl_newphone_score = newphone_score
757  + phone_loop_search_score(pls, hmm->ciphone);
758  if (pl_newphone_score BETTER_THAN newphone_thresh) {
759  if ((hmm_frame(&hmm->hmm) < frame_idx)
760  || (newphone_score BETTER_THAN hmm_in_score(&hmm->hmm))) {
761  hmm_enter(&hmm->hmm, newphone_score,
762  hmm_out_history(&rhmm->hmm), nf);
763  *(nacl++) = hmm;
764  }
765  }
766  }
767  }
768 
769  /*
770  * Transition to last phone of all words for which this is the
771  * penultimate phone (the last phones may need multiple right contexts).
772  * Remember to remove the temporary newword_penalty.
773  */
774  if (pls != NULL || newphone_score BETTER_THAN lastphn_thresh) {
775  for (w = rhmm->penult_phn_wid; w >= 0;
776  w = ngs->homophone_set[w]) {
777  int32 pl_newphone_score = newphone_score
779  (pls, dict_last_phone(ps_search_dict(ngs),w));
780  E_DEBUG(3,("word %s newphone_score %d\n", dict_wordstr(ps_search_dict(ngs), w), newphone_score));
781  if (pl_newphone_score BETTER_THAN lastphn_thresh) {
782  candp = ngs->lastphn_cand + ngs->n_lastphn_cand;
783  ngs->n_lastphn_cand++;
784  candp->wid = w;
785  candp->score =
786  newphone_score - ngs->nwpen;
787  candp->bp = hmm_out_history(&rhmm->hmm);
788  }
789  }
790  }
791  }
792  }
793  ngs->n_active_chan[nf & 0x1] = (int)(nacl - ngs->active_chan_list[nf & 0x1]);
794 }
795 
796 /*
797  * Prune currently active nonroot channels in HMM tree for next frame. Also, perform
798  * exit transitions out of such channels and activate successors.
799  */
800 static void
801 prune_nonroot_chan(ngram_search_t *ngs, int frame_idx)
802 {
803  chan_t *hmm, *nexthmm;
804  int32 nf, w, i;
805  int32 thresh, newphone_thresh, lastphn_thresh, newphone_score;
806  chan_t **acl, **nacl; /* active list, next active list */
807  lastphn_cand_t *candp;
808  phone_loop_search_t *pls;
809 
810  nf = frame_idx + 1;
811 
812  thresh = ngs->best_score + ngs->dynamic_beam;
813  newphone_thresh = ngs->best_score + ngs->pbeam;
814  lastphn_thresh = ngs->best_score + ngs->lpbeam;
815  pls = (phone_loop_search_t *)ps_search_lookahead(ngs);
816 
817  acl = ngs->active_chan_list[frame_idx & 0x1]; /* currently active HMMs in tree */
818  nacl = ngs->active_chan_list[nf & 0x1] + ngs->n_active_chan[nf & 0x1];
819 
820  for (i = ngs->n_active_chan[frame_idx & 0x1], hmm = *(acl++); i > 0;
821  --i, hmm = *(acl++)) {
822  assert(hmm_frame(&hmm->hmm) >= frame_idx);
823 
824  if (hmm_bestscore(&hmm->hmm) BETTER_THAN thresh) {
825  /* retain this channel in next frame */
826  if (hmm_frame(&hmm->hmm) != nf) {
827  hmm_frame(&hmm->hmm) = nf;
828  *(nacl++) = hmm;
829  }
830 
831  /* transition to all next-level channel in the HMM tree */
832  newphone_score = hmm_out_score(&hmm->hmm) + ngs->pip;
833  if (pls != NULL || newphone_score BETTER_THAN newphone_thresh) {
834  for (nexthmm = hmm->next; nexthmm; nexthmm = nexthmm->alt) {
835  int32 pl_newphone_score = newphone_score
836  + phone_loop_search_score(pls, nexthmm->ciphone);
837  if ((pl_newphone_score BETTER_THAN newphone_thresh)
838  && ((hmm_frame(&nexthmm->hmm) < frame_idx)
839  || (newphone_score
840  BETTER_THAN hmm_in_score(&nexthmm->hmm)))) {
841  if (hmm_frame(&nexthmm->hmm) != nf) {
842  /* Keep this HMM on the active list */
843  *(nacl++) = nexthmm;
844  }
845  hmm_enter(&nexthmm->hmm, newphone_score,
846  hmm_out_history(&hmm->hmm), nf);
847  }
848  }
849  }
850 
851  /*
852  * Transition to last phone of all words for which this is the
853  * penultimate phone (the last phones may need multiple right contexts).
854  * Remember to remove the temporary newword_penalty.
855  */
856  if (pls != NULL || newphone_score BETTER_THAN lastphn_thresh) {
857  for (w = hmm->info.penult_phn_wid; w >= 0;
858  w = ngs->homophone_set[w]) {
859  int32 pl_newphone_score = newphone_score
861  (pls, dict_last_phone(ps_search_dict(ngs),w));
862  if (pl_newphone_score BETTER_THAN lastphn_thresh) {
863  candp = ngs->lastphn_cand + ngs->n_lastphn_cand;
864  ngs->n_lastphn_cand++;
865  candp->wid = w;
866  candp->score =
867  newphone_score - ngs->nwpen;
868  candp->bp = hmm_out_history(&hmm->hmm);
869  }
870  }
871  }
872  }
873  else if (hmm_frame(&hmm->hmm) != nf) {
874  hmm_clear(&hmm->hmm);
875  }
876  }
877  ngs->n_active_chan[nf & 0x1] = (int)(nacl - ngs->active_chan_list[nf & 0x1]);
878 }
879 
880 /*
881  * Execute the transition into the last phone for all candidates words emerging from
882  * the HMM tree. Attach LM scores to such transitions.
883  * (Executed after pruning root and non-root, but before pruning word-chan.)
884  */
885 static void
886 last_phone_transition(ngram_search_t *ngs, int frame_idx)
887 {
888  int32 i, j, k, nf, bp, bpend, w;
889  lastphn_cand_t *candp;
890  int32 *nawl;
891  int32 thresh;
892  int32 bestscore, dscr;
893  chan_t *hmm;
894  bptbl_t *bpe;
895  int32 n_cand_sf = 0;
896 
897  nf = frame_idx + 1;
898  nawl = ngs->active_word_list[nf & 0x1];
899  ngs->st.n_lastphn_cand_utt += ngs->n_lastphn_cand;
900 
901  /* For each candidate word (entering its last phone) */
902  /* If best LM score and bp for candidate known use it, else sort cands by startfrm */
903  for (i = 0, candp = ngs->lastphn_cand; i < ngs->n_lastphn_cand; i++, candp++) {
904  int32 start_score;
905 
906  /* This can happen if recognition fails. */
907  if (candp->bp == -1)
908  continue;
909  /* Backpointer entry for it. */
910  bpe = &(ngs->bp_table[candp->bp]);
911 
912  /* Subtract starting score for candidate, leave it with only word score */
913  start_score = ngram_search_exit_score
914  (ngs, bpe, dict_first_phone(ps_search_dict(ngs), candp->wid));
915  assert(start_score BETTER_THAN WORST_SCORE);
916  candp->score -= start_score;
917 
918  /*
919  * If this candidate not occurred in an earlier frame, prepare for finding
920  * best transition score into last phone; sort by start frame.
921  */
922  /* i.e. if we don't have an entry in last_ltrans for this
923  * <word,sf>, then create one */
924  if (ngs->last_ltrans[candp->wid].sf != bpe->frame + 1) {
925  /* Look for an entry in cand_sf matching the backpointer
926  * for this candidate. */
927  for (j = 0; j < n_cand_sf; j++) {
928  if (ngs->cand_sf[j].bp_ef == bpe->frame)
929  break;
930  }
931  /* Oh, we found one, so chain onto it. */
932  if (j < n_cand_sf)
933  candp->next = ngs->cand_sf[j].cand;
934  else {
935  /* Nope, let's make a new one, allocating cand_sf if necessary. */
936  if (n_cand_sf >= ngs->cand_sf_alloc) {
937  if (ngs->cand_sf_alloc == 0) {
938  ngs->cand_sf =
939  ckd_calloc(CAND_SF_ALLOCSIZE,
940  sizeof(*ngs->cand_sf));
941  ngs->cand_sf_alloc = CAND_SF_ALLOCSIZE;
942  }
943  else {
944  ngs->cand_sf_alloc += CAND_SF_ALLOCSIZE;
945  ngs->cand_sf = ckd_realloc(ngs->cand_sf,
946  ngs->cand_sf_alloc
947  * sizeof(*ngs->cand_sf));
948  E_INFO("cand_sf[] increased to %d entries\n",
949  ngs->cand_sf_alloc);
950  }
951  }
952 
953  /* Use the newly created cand_sf. */
954  j = n_cand_sf++;
955  candp->next = -1; /* End of the chain. */
956  ngs->cand_sf[j].bp_ef = bpe->frame;
957  }
958  /* Update it to point to this candidate. */
959  ngs->cand_sf[j].cand = i;
960 
961  ngs->last_ltrans[candp->wid].dscr = WORST_SCORE;
962  ngs->last_ltrans[candp->wid].sf = bpe->frame + 1;
963  }
964  }
965 
966  /* Compute best LM score and bp for new cands entered in the sorted lists above */
967  for (i = 0; i < n_cand_sf; i++) {
968  /* For the i-th unique end frame... */
969  bp = ngs->bp_table_idx[ngs->cand_sf[i].bp_ef];
970  bpend = ngs->bp_table_idx[ngs->cand_sf[i].bp_ef + 1];
971  for (bpe = &(ngs->bp_table[bp]); bp < bpend; bp++, bpe++) {
972  if (!bpe->valid)
973  continue;
974  /* For each candidate at the start frame find bp->cand transition-score */
975  for (j = ngs->cand_sf[i].cand; j >= 0; j = candp->next) {
976  int32 n_used;
977  candp = &(ngs->lastphn_cand[j]);
978  dscr =
980  (ngs, bpe, dict_first_phone(ps_search_dict(ngs), candp->wid));
981  if (dscr BETTER_THAN WORST_SCORE) {
982  assert(!dict_filler_word(ps_search_dict(ngs), candp->wid));
983  dscr += ngram_tg_score(ngs->lmset,
984  dict_basewid(ps_search_dict(ngs), candp->wid),
985  bpe->real_wid,
986  bpe->prev_real_wid,
987  &n_used)>>SENSCR_SHIFT;
988  }
989 
990  if (dscr BETTER_THAN ngs->last_ltrans[candp->wid].dscr) {
991  ngs->last_ltrans[candp->wid].dscr = dscr;
992  ngs->last_ltrans[candp->wid].bp = bp;
993  }
994  }
995  }
996  }
997 
998  /* Update best transitions for all candidates; also update best lastphone score */
999  bestscore = ngs->last_phone_best_score;
1000  for (i = 0, candp = ngs->lastphn_cand; i < ngs->n_lastphn_cand; i++, candp++) {
1001  candp->score += ngs->last_ltrans[candp->wid].dscr;
1002  candp->bp = ngs->last_ltrans[candp->wid].bp;
1003 
1004  if (candp->score BETTER_THAN bestscore)
1005  bestscore = candp->score;
1006  }
1007  ngs->last_phone_best_score = bestscore;
1008 
1009  /* At this pt, we know the best entry score (with LM component) for all candidates */
1010  thresh = bestscore + ngs->lponlybeam;
1011  for (i = ngs->n_lastphn_cand, candp = ngs->lastphn_cand; i > 0; --i, candp++) {
1012  if (candp->score BETTER_THAN thresh) {
1013  w = candp->wid;
1014 
1015  ngram_search_alloc_all_rc(ngs, w);
1016 
1017  k = 0;
1018  for (hmm = ngs->word_chan[w]; hmm; hmm = hmm->next) {
1019  if ((hmm_frame(&hmm->hmm) < frame_idx)
1020  || (candp->score BETTER_THAN hmm_in_score(&hmm->hmm))) {
1021  assert(hmm_frame(&hmm->hmm) != nf);
1022  hmm_enter(&hmm->hmm,
1023  candp->score, candp->bp, nf);
1024  k++;
1025  }
1026  }
1027  if (k > 0) {
1028  assert(bitvec_is_clear(ngs->word_active, w));
1029  assert(!dict_is_single_phone(ps_search_dict(ngs), w));
1030  *(nawl++) = w;
1031  bitvec_set(ngs->word_active, w);
1032  }
1033  }
1034  }
1035  ngs->n_active_word[nf & 0x1] = (int)(nawl - ngs->active_word_list[nf & 0x1]);
1036 }
1037 
1038 /*
1039  * Prune currently active word channels for next frame. Also, perform exit
1040  * transitions out of such channels and active successors.
1041  */
1042 static void
1043 prune_word_chan(ngram_search_t *ngs, int frame_idx)
1044 {
1045  root_chan_t *rhmm;
1046  chan_t *hmm, *thmm;
1047  chan_t **phmmp; /* previous HMM-pointer */
1048  int32 nf, w, i, k;
1049  int32 newword_thresh, lastphn_thresh;
1050  int32 *awl, *nawl;
1051 
1052  nf = frame_idx + 1;
1053  newword_thresh = ngs->last_phone_best_score + ngs->wbeam;
1054  lastphn_thresh = ngs->last_phone_best_score + ngs->lponlybeam;
1055 
1056  awl = ngs->active_word_list[frame_idx & 0x1];
1057  nawl = ngs->active_word_list[nf & 0x1] + ngs->n_active_word[nf & 0x1];
1058 
1059  /* Dynamically allocated last channels of multi-phone words */
1060  for (i = ngs->n_active_word[frame_idx & 0x1], w = *(awl++); i > 0;
1061  --i, w = *(awl++)) {
1062  k = 0;
1063  phmmp = &(ngs->word_chan[w]);
1064  for (hmm = ngs->word_chan[w]; hmm; hmm = thmm) {
1065  assert(hmm_frame(&hmm->hmm) >= frame_idx);
1066 
1067  thmm = hmm->next;
1068  if (hmm_bestscore(&hmm->hmm) BETTER_THAN lastphn_thresh) {
1069  /* retain this channel in next frame */
1070  hmm_frame(&hmm->hmm) = nf;
1071  k++;
1072  phmmp = &(hmm->next);
1073 
1074  /* Could if ((! skip_alt_frm) || (frame_idx & 0x1)) the following */
1075  if (hmm_out_score(&hmm->hmm) BETTER_THAN newword_thresh) {
1076  /* can exit channel and recognize word */
1077  ngram_search_save_bp(ngs, frame_idx, w,
1078  hmm_out_score(&hmm->hmm),
1079  hmm_out_history(&hmm->hmm),
1080  hmm->info.rc_id);
1081  }
1082  }
1083  else if (hmm_frame(&hmm->hmm) == nf) {
1084  phmmp = &(hmm->next);
1085  }
1086  else {
1087  hmm_deinit(&hmm->hmm);
1088  listelem_free(ngs->chan_alloc, hmm);
1089  *phmmp = thmm;
1090  }
1091  }
1092  if ((k > 0) && (bitvec_is_clear(ngs->word_active, w))) {
1093  assert(!dict_is_single_phone(ps_search_dict(ngs), w));
1094  *(nawl++) = w;
1095  bitvec_set(ngs->word_active, w);
1096  }
1097  }
1098  ngs->n_active_word[nf & 0x1] = (int)(nawl - ngs->active_word_list[nf & 0x1]);
1099 
1100  /*
1101  * Prune permanently allocated single-phone channels.
1102  * NOTES: score[] of pruned channels set to WORST_SCORE elsewhere.
1103  */
1104  for (i = 0; i < ngs->n_1ph_words; i++) {
1105  w = ngs->single_phone_wid[i];
1106  rhmm = (root_chan_t *) ngs->word_chan[w];
1107  E_DEBUG(3,("Single phone word %s frame %d score %d thresh %d outscore %d nwthresh %d\n",
1108  dict_wordstr(ps_search_dict(ngs),w),
1109  hmm_frame(&rhmm->hmm), hmm_bestscore(&rhmm->hmm),
1110  lastphn_thresh, hmm_out_score(&rhmm->hmm), newword_thresh));
1111  if (hmm_frame(&rhmm->hmm) < frame_idx)
1112  continue;
1113  if (hmm_bestscore(&rhmm->hmm) BETTER_THAN lastphn_thresh) {
1114  hmm_frame(&rhmm->hmm) = nf;
1115 
1116  /* Could if ((! skip_alt_frm) || (frame_idx & 0x1)) the following */
1117  if (hmm_out_score(&rhmm->hmm) BETTER_THAN newword_thresh) {
1118  E_DEBUG(4,("Exiting single phone word %s with %d > %d, %d\n",
1119  dict_wordstr(ps_search_dict(ngs),w),
1120  hmm_out_score(&rhmm->hmm),
1121  lastphn_thresh, newword_thresh));
1122  ngram_search_save_bp(ngs, frame_idx, w,
1123  hmm_out_score(&rhmm->hmm),
1124  hmm_out_history(&rhmm->hmm), 0);
1125  }
1126  }
1127  }
1128 }
1129 
1130 static void
1131 prune_channels(ngram_search_t *ngs, int frame_idx)
1132 {
1133  /* Clear last phone candidate list. */
1134  ngs->n_lastphn_cand = 0;
1135  /* Set the dynamic beam based on maxhmmpf here. */
1136  ngs->dynamic_beam = ngs->beam;
1137  if (ngs->maxhmmpf != -1
1138  && ngs->st.n_root_chan_eval + ngs->st.n_nonroot_chan_eval > ngs->maxhmmpf) {
1139  /* Build a histogram to approximately prune them. */
1140  int32 bins[256], bw, nhmms, i;
1141  root_chan_t *rhmm;
1142  chan_t **acl, *hmm;
1143 
1144  /* Bins go from zero (best score) to edge of beam. */
1145  bw = -ngs->beam / 256;
1146  memset(bins, 0, sizeof(bins));
1147  /* For each active root channel. */
1148  for (i = 0, rhmm = ngs->root_chan; i < ngs->n_root_chan; i++, rhmm++) {
1149  int32 b;
1150 
1151  /* Put it in a bin according to its bestscore. */
1152  b = (ngs->best_score - hmm_bestscore(&rhmm->hmm)) / bw;
1153  if (b >= 256)
1154  b = 255;
1155  ++bins[b];
1156  }
1157  /* For each active non-root channel. */
1158  acl = ngs->active_chan_list[frame_idx & 0x1]; /* currently active HMMs in tree */
1159  for (i = ngs->n_active_chan[frame_idx & 0x1], hmm = *(acl++);
1160  i > 0; --i, hmm = *(acl++)) {
1161  int32 b;
1162 
1163  /* Put it in a bin according to its bestscore. */
1164  b = (ngs->best_score - hmm_bestscore(&hmm->hmm)) / bw;
1165  if (b >= 256)
1166  b = 255;
1167  ++bins[b];
1168  }
1169  /* Walk down the bins to find the new beam. */
1170  for (i = nhmms = 0; i < 256; ++i) {
1171  nhmms += bins[i];
1172  if (nhmms > ngs->maxhmmpf)
1173  break;
1174  }
1175  ngs->dynamic_beam = -(i * bw);
1176  }
1177 
1178  prune_root_chan(ngs, frame_idx);
1179  prune_nonroot_chan(ngs, frame_idx);
1180  last_phone_transition(ngs, frame_idx);
1181  prune_word_chan(ngs, frame_idx);
1182 }
1183 
1184 /*
1185  * Limit the number of word exits in each frame to maxwpf. And also limit the number of filler
1186  * words to 1.
1187  */
1188 static void
1189 bptable_maxwpf(ngram_search_t *ngs, int frame_idx)
1190 {
1191  int32 bp, n;
1192  int32 bestscr, worstscr;
1193  bptbl_t *bpe, *bestbpe, *worstbpe;
1194 
1195  /* Don't prune if no pruing. */
1196  if (ngs->maxwpf == -1 || ngs->maxwpf == ps_search_n_words(ngs))
1197  return;
1198 
1199  /* Allow only one filler word exit (the best) per frame */
1200  bestscr = (int32) 0x80000000;
1201  bestbpe = NULL;
1202  n = 0;
1203  for (bp = ngs->bp_table_idx[frame_idx]; bp < ngs->bpidx; bp++) {
1204  bpe = &(ngs->bp_table[bp]);
1205  if (dict_filler_word(ps_search_dict(ngs), bpe->wid)) {
1206  if (bpe->score BETTER_THAN bestscr) {
1207  bestscr = bpe->score;
1208  bestbpe = bpe;
1209  }
1210  bpe->valid = FALSE;
1211  n++; /* No. of filler words */
1212  }
1213  }
1214  /* Restore bestbpe to valid state */
1215  if (bestbpe != NULL) {
1216  bestbpe->valid = TRUE;
1217  --n;
1218  }
1219 
1220  /* Allow up to maxwpf best entries to survive; mark the remaining with valid = 0 */
1221  n = (ngs->bpidx
1222  - ngs->bp_table_idx[frame_idx]) - n; /* No. of entries after limiting fillers */
1223  for (; n > ngs->maxwpf; --n) {
1224  /* Find worst BPTable entry */
1225  worstscr = (int32) 0x7fffffff;
1226  worstbpe = NULL;
1227  for (bp = ngs->bp_table_idx[frame_idx]; (bp < ngs->bpidx); bp++) {
1228  bpe = &(ngs->bp_table[bp]);
1229  if (bpe->valid && (bpe->score WORSE_THAN worstscr)) {
1230  worstscr = bpe->score;
1231  worstbpe = bpe;
1232  }
1233  }
1234  /* FIXME: Don't panic! */
1235  if (worstbpe == NULL)
1236  E_FATAL("PANIC: No worst BPtable entry remaining\n");
1237  worstbpe->valid = FALSE;
1238  }
1239 }
1240 
1241 static void
1242 word_transition(ngram_search_t *ngs, int frame_idx)
1243 {
1244  int32 i, k, bp, w, nf;
1245  int32 rc;
1246  int32 thresh, newscore, pl_newscore;
1247  bptbl_t *bpe;
1248  root_chan_t *rhmm;
1249  struct bestbp_rc_s *bestbp_rc_ptr;
1250  phone_loop_search_t *pls;
1251  dict_t *dict = ps_search_dict(ngs);
1252  dict2pid_t *d2p = ps_search_dict2pid(ngs);
1253 
1254  /*
1255  * Transition to start of new word instances (HMM tree roots); but only if words
1256  * other than </s> finished here.
1257  * But, first, find the best starting score for each possible right context phone.
1258  */
1259  for (i = bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef) - 1; i >= 0; --i)
1260  ngs->bestbp_rc[i].score = WORST_SCORE;
1261  k = 0;
1262  pls = (phone_loop_search_t *)ps_search_lookahead(ngs);
1263  /* Ugh, this is complicated. Scan all word exits for this frame
1264  * (they have already been created by prune_word_chan()). */
1265  for (bp = ngs->bp_table_idx[frame_idx]; bp < ngs->bpidx; bp++) {
1266  bpe = &(ngs->bp_table[bp]);
1267  ngs->word_lat_idx[bpe->wid] = NO_BP;
1268 
1269  if (bpe->wid == ps_search_finish_wid(ngs))
1270  continue;
1271  k++;
1272 
1273  /* DICT2PID */
1274  /* Array of HMM scores corresponding to all the possible right
1275  * context expansions of the final phone. It's likely that a
1276  * lot of these are going to be missing, actually. */
1277  if (bpe->last2_phone == -1) { /* implies s_idx == -1 */
1278  /* No right context expansion. */
1279  for (rc = 0; rc < bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef); ++rc) {
1280  if (bpe->score BETTER_THAN ngs->bestbp_rc[rc].score) {
1281  E_DEBUG(4,("bestbp_rc[0] = %d lc %d\n",
1282  bpe->score, bpe->last_phone));
1283  ngs->bestbp_rc[rc].score = bpe->score;
1284  ngs->bestbp_rc[rc].path = bp;
1285  ngs->bestbp_rc[rc].lc = bpe->last_phone;
1286  }
1287  }
1288  }
1289  else {
1290  xwdssid_t *rssid = dict2pid_rssid(d2p, bpe->last_phone, bpe->last2_phone);
1291  int32 *rcss = &(ngs->bscore_stack[bpe->s_idx]);
1292  for (rc = 0; rc < bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef); ++rc) {
1293  if (rcss[rssid->cimap[rc]] BETTER_THAN ngs->bestbp_rc[rc].score) {
1294  E_DEBUG(4,("bestbp_rc[%d] = %d lc %d\n",
1295  rc, rcss[rssid->cimap[rc]], bpe->last_phone));
1296  ngs->bestbp_rc[rc].score = rcss[rssid->cimap[rc]];
1297  ngs->bestbp_rc[rc].path = bp;
1298  ngs->bestbp_rc[rc].lc = bpe->last_phone;
1299  }
1300  }
1301  }
1302  }
1303  if (k == 0)
1304  return;
1305 
1306  nf = frame_idx + 1;
1307  thresh = ngs->best_score + ngs->dynamic_beam;
1308  /*
1309  * Hypothesize successors to words finished in this frame.
1310  * Main dictionary, multi-phone words transition to HMM-trees roots.
1311  */
1312  for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
1313  bestbp_rc_ptr = &(ngs->bestbp_rc[rhmm->ciphone]);
1314 
1315  newscore = bestbp_rc_ptr->score + ngs->nwpen + ngs->pip;
1316  pl_newscore = newscore
1317  + phone_loop_search_score(pls, rhmm->ciphone);
1318  if (pl_newscore BETTER_THAN thresh) {
1319  if ((hmm_frame(&rhmm->hmm) < frame_idx)
1320  || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
1321  hmm_enter(&rhmm->hmm, newscore,
1322  bestbp_rc_ptr->path, nf);
1323  /* DICT2PID: Another place where mpx ssids are entered. */
1324  /* Look up the ssid to use when entering this mpx triphone. */
1325  hmm_mpx_ssid(&rhmm->hmm, 0) =
1326  dict2pid_ldiph_lc(d2p, rhmm->ciphone, rhmm->ci2phone, bestbp_rc_ptr->lc);
1327  assert(hmm_mpx_ssid(&rhmm->hmm, 0) != BAD_SSID);
1328  }
1329  }
1330  }
1331 
1332  /*
1333  * Single phone words; no right context for these. Cannot use bestbp_rc as
1334  * LM scores have to be included. First find best transition to these words.
1335  */
1336  for (i = 0; i < ngs->n_1ph_LMwords; i++) {
1337  w = ngs->single_phone_wid[i];
1338  ngs->last_ltrans[w].dscr = (int32) 0x80000000;
1339  }
1340  for (bp = ngs->bp_table_idx[frame_idx]; bp < ngs->bpidx; bp++) {
1341  bpe = &(ngs->bp_table[bp]);
1342  if (!bpe->valid)
1343  continue;
1344 
1345  for (i = 0; i < ngs->n_1ph_LMwords; i++) {
1346  int32 n_used;
1347  w = ngs->single_phone_wid[i];
1348  newscore = ngram_search_exit_score
1349  (ngs, bpe, dict_first_phone(dict, w));
1350  E_DEBUG(4, ("initial newscore for %s: %d\n",
1351  dict_wordstr(dict, w), newscore));
1352  if (newscore != WORST_SCORE)
1353  newscore += ngram_tg_score(ngs->lmset,
1354  dict_basewid(dict, w),
1355  bpe->real_wid,
1356  bpe->prev_real_wid,
1357  &n_used)>>SENSCR_SHIFT;
1358 
1359  /* FIXME: Not sure how WORST_SCORE could be better, but it
1360  * apparently happens. */
1361  if (newscore BETTER_THAN ngs->last_ltrans[w].dscr) {
1362  ngs->last_ltrans[w].dscr = newscore;
1363  ngs->last_ltrans[w].bp = bp;
1364  }
1365  }
1366  }
1367 
1368  /* Now transition to in-LM single phone words */
1369  for (i = 0; i < ngs->n_1ph_LMwords; i++) {
1370  w = ngs->single_phone_wid[i];
1371  /* Never transition into the start word (for one thing, it is
1372  a non-event in the language model.) */
1373  if (w == dict_startwid(ps_search_dict(ngs)))
1374  continue;
1375  rhmm = (root_chan_t *) ngs->word_chan[w];
1376  newscore = ngs->last_ltrans[w].dscr + ngs->pip;
1377  pl_newscore = newscore + phone_loop_search_score(pls, rhmm->ciphone);
1378  if (pl_newscore BETTER_THAN thresh) {
1379  bpe = ngs->bp_table + ngs->last_ltrans[w].bp;
1380  if ((hmm_frame(&rhmm->hmm) < frame_idx)
1381  || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
1382  hmm_enter(&rhmm->hmm,
1383  newscore, ngs->last_ltrans[w].bp, nf);
1384  /* DICT2PID: another place where mpx ssids are entered. */
1385  /* Look up the ssid to use when entering this mpx triphone. */
1386  hmm_mpx_ssid(&rhmm->hmm, 0) =
1387  dict2pid_ldiph_lc(d2p, rhmm->ciphone, rhmm->ci2phone,
1388  dict_last_phone(dict, bpe->wid));
1389  assert(hmm_mpx_ssid(&rhmm->hmm, 0) != BAD_SSID);
1390  }
1391  }
1392  }
1393 
1394  /* Remaining words: <sil>, noise words. No mpx for these! */
1395  w = ps_search_silence_wid(ngs);
1396  rhmm = (root_chan_t *) ngs->word_chan[w];
1397  bestbp_rc_ptr = &(ngs->bestbp_rc[ps_search_acmod(ngs)->mdef->sil]);
1398  newscore = bestbp_rc_ptr->score + ngs->silpen + ngs->pip;
1399  pl_newscore = newscore
1400  + phone_loop_search_score(pls, rhmm->ciphone);
1401  if (pl_newscore BETTER_THAN thresh) {
1402  if ((hmm_frame(&rhmm->hmm) < frame_idx)
1403  || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
1404  hmm_enter(&rhmm->hmm,
1405  newscore, bestbp_rc_ptr->path, nf);
1406  }
1407  }
1408  for (w = dict_filler_start(dict); w <= dict_filler_end(dict); w++) {
1409  if (w == ps_search_silence_wid(ngs))
1410  continue;
1411  /* Never transition into the start word (for one thing, it is
1412  a non-event in the language model.) */
1413  if (w == dict_startwid(ps_search_dict(ngs)))
1414  continue;
1415  rhmm = (root_chan_t *) ngs->word_chan[w];
1416  /* If this was not actually a single-phone word, rhmm will be NULL. */
1417  if (rhmm == NULL)
1418  continue;
1419  newscore = bestbp_rc_ptr->score + ngs->fillpen + ngs->pip;
1420  pl_newscore = newscore
1421  + phone_loop_search_score(pls, rhmm->ciphone);
1422  if (pl_newscore BETTER_THAN thresh) {
1423  if ((hmm_frame(&rhmm->hmm) < frame_idx)
1424  || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
1425  hmm_enter(&rhmm->hmm,
1426  newscore, bestbp_rc_ptr->path, nf);
1427  }
1428  }
1429  }
1430 }
1431 
1432 static void
1433 deactivate_channels(ngram_search_t *ngs, int frame_idx)
1434 {
1435  root_chan_t *rhmm;
1436  int i;
1437 
1438  /* Clear score[] of pruned root channels */
1439  for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
1440  if (hmm_frame(&rhmm->hmm) == frame_idx) {
1441  hmm_clear(&rhmm->hmm);
1442  }
1443  }
1444  /* Clear score[] of pruned single-phone channels */
1445  for (i = 0; i < ngs->n_1ph_words; i++) {
1446  int32 w = ngs->single_phone_wid[i];
1447  rhmm = (root_chan_t *) ngs->word_chan[w];
1448  if (hmm_frame(&rhmm->hmm) == frame_idx) {
1449  hmm_clear(&rhmm->hmm);
1450  }
1451  }
1452 }
1453 
1454 int
1456 {
1457  int16 const *senscr;
1458 
1459  /* Activate our HMMs for the current frame if need be. */
1460  if (!ps_search_acmod(ngs)->compallsen)
1461  compute_sen_active(ngs, frame_idx);
1462 
1463  /* Compute GMM scores for the current frame. */
1464  if ((senscr = acmod_score(ps_search_acmod(ngs), &frame_idx)) == NULL)
1465  return 0;
1466  ngs->st.n_senone_active_utt += ps_search_acmod(ngs)->n_senone_active;
1467 
1468  /* Mark backpointer table for current frame. */
1469  ngram_search_mark_bptable(ngs, frame_idx);
1470 
1471  /* If the best score is equal to or worse than WORST_SCORE,
1472  * recognition has failed, don't bother to keep trying. */
1473  if (ngs->best_score == WORST_SCORE || ngs->best_score WORSE_THAN WORST_SCORE)
1474  return 0;
1475  /* Renormalize if necessary */
1476  if (ngs->best_score + (2 * ngs->beam) WORSE_THAN WORST_SCORE) {
1477  E_INFO("Renormalizing Scores at frame %d, best score %d\n",
1478  frame_idx, ngs->best_score);
1479  renormalize_scores(ngs, frame_idx, ngs->best_score);
1480  }
1481 
1482  /* Evaluate HMMs */
1483  evaluate_channels(ngs, senscr, frame_idx);
1484  /* Prune HMMs and do phone transitions. */
1485  prune_channels(ngs, frame_idx);
1486  /* Do absolute pruning on word exits. */
1487  bptable_maxwpf(ngs, frame_idx);
1488  /* Do word transitions. */
1489  word_transition(ngs, frame_idx);
1490  /* Deactivate pruned HMMs. */
1491  deactivate_channels(ngs, frame_idx);
1492 
1493  ++ngs->n_frame;
1494  /* Return the number of frames processed. */
1495  return 1;
1496 }
1497 
1498 void
1500 {
1501  int32 i, w, cf, *awl;
1502  root_chan_t *rhmm;
1503  chan_t *hmm, **acl;
1504 
1505  /* This is the number of frames processed. */
1506  cf = ps_search_acmod(ngs)->output_frame;
1507  /* Add a mark in the backpointer table for one past the final frame. */
1508  ngram_search_mark_bptable(ngs, cf);
1509 
1510  /* Deactivate channels lined up for the next frame */
1511  /* First, root channels of HMM tree */
1512  for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
1513  hmm_clear(&rhmm->hmm);
1514  }
1515 
1516  /* nonroot channels of HMM tree */
1517  i = ngs->n_active_chan[cf & 0x1];
1518  acl = ngs->active_chan_list[cf & 0x1];
1519  for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
1520  hmm_clear(&hmm->hmm);
1521  }
1522 
1523  /* word channels */
1524  i = ngs->n_active_word[cf & 0x1];
1525  awl = ngs->active_word_list[cf & 0x1];
1526  for (w = *(awl++); i > 0; --i, w = *(awl++)) {
1527  /* Don't accidentally free single-phone words! */
1528  if (dict_is_single_phone(ps_search_dict(ngs), w))
1529  continue;
1530  bitvec_clear(ngs->word_active, w);
1531  if (ngs->word_chan[w] == NULL)
1532  continue;
1533  ngram_search_free_all_rc(ngs, w);
1534  }
1535 
1536  /*
1537  * The previous search code did a postprocessing of the
1538  * backpointer table here, but we will postpone this until it is
1539  * absolutely necessary, i.e. when generating a word graph.
1540  * Likewise we don't actually have to decide what the exit word is
1541  * until somebody requests a backtrace.
1542  */
1543 
1544  ptmr_stop(&ngs->fwdtree_perf);
1545  /* Print out some statistics. */
1546  if (cf > 0) {
1547  double n_speech = (double)(cf + 1)
1548  / cmd_ln_int32_r(ps_search_config(ngs), "-frate");
1549  E_INFO("%8d words recognized (%d/fr)\n",
1550  ngs->bpidx, (ngs->bpidx + (cf >> 1)) / (cf + 1));
1551  E_INFO("%8d senones evaluated (%d/fr)\n", ngs->st.n_senone_active_utt,
1552  (ngs->st.n_senone_active_utt + (cf >> 1)) / (cf + 1));
1553  E_INFO("%8d channels searched (%d/fr), %d 1st, %d last\n",
1554  ngs->st.n_root_chan_eval + ngs->st.n_nonroot_chan_eval,
1555  (ngs->st.n_root_chan_eval + ngs->st.n_nonroot_chan_eval) / (cf + 1),
1556  ngs->st.n_root_chan_eval, ngs->st.n_last_chan_eval);
1557  E_INFO("%8d words for which last channels evaluated (%d/fr)\n",
1558  ngs->st.n_word_lastchan_eval,
1559  ngs->st.n_word_lastchan_eval / (cf + 1));
1560  E_INFO("%8d candidate words for entering last phone (%d/fr)\n",
1561  ngs->st.n_lastphn_cand_utt, ngs->st.n_lastphn_cand_utt / (cf + 1));
1562  E_INFO("fwdtree %.2f CPU %.3f xRT\n",
1563  ngs->fwdtree_perf.t_cpu,
1564  ngs->fwdtree_perf.t_cpu / n_speech);
1565  E_INFO("fwdtree %.2f wall %.3f xRT\n",
1566  ngs->fwdtree_perf.t_elapsed,
1567  ngs->fwdtree_perf.t_elapsed / n_speech);
1568  }
1569  /* dump_bptable(ngs); */
1570 }
hmm_t hmm
Basic HMM structure.
Definition: ngram_search.h:65
void ngram_fwdtree_finish(ngram_search_t *ngs)
Finish fwdtree decoding for an utterance.
int32 wid
Word index.
Definition: ngram_search.h:113
void ngram_fwdtree_deinit(ngram_search_t *ngs)
Release memory associated with fwdtree decoding.
Base structure for search module.
int32 n_nonroot_chan
Number of valid non-root channels.
Definition: ngram_search.h:234
void ngram_search_alloc_all_rc(ngram_search_t *ngs, int32 w)
Allocate last phone channels for all possible right contexts for word w.
Definition: ngram_search.c:598
void hmm_init(hmm_context_t *ctx, hmm_t *hmm, int mpx, int ssid, int tmatid)
Populate a previously-allocated HMM structure, allocating internal data.
Definition: hmm.c:89
chan_t * next
first descendant of this channel
Definition: ngram_search.h:94
listelem_alloc_t * chan_alloc
For chan_t.
Definition: ngram_search.h:211
void ngram_fwdtree_start(ngram_search_t *ngs)
Start fwdtree decoding for an utterance.
const char * bin_mdef_ciphone_str(bin_mdef_t *m, int32 ci)
In: ciphone id for which name wanted.
Definition: bin_mdef.c:737
frame_idx_t frame
start or end frame
Definition: ngram_search.h:110
hmm_context_t * hmmctx
HMM context.
Definition: ngram_search.h:200
int32 n_active_chan[2]
Number entries in active_chan_list.
Definition: ngram_search.h:276
void hmm_deinit(hmm_t *hmm)
Free an HMM structure, releasing internal data (but not the HMM structure itself).
Definition: hmm.c:111
int16 last2_phone
next-to-last phone of this word
Definition: ngram_search.h:120
void acmod_activate_hmm(acmod_t *acmod, hmm_t *hmm)
Activate senones associated with an HMM.
Definition: acmod.c:1213
#define BAD_SSID
Invalid senone sequence ID (limited to 16 bits for PocketSphinx).
Definition: bin_mdef.h:94
bitvec_t * word_active
array of active flags for all words.
Definition: ngram_search.h:247
int32 ngram_search_exit_score(ngram_search_t *ngs, bptbl_t *pbe, int rcphone)
Get the exit score for a backpointer entry with a given right context.
Definition: ngram_search.c:660
int16 ciphone
first ciphone of this node; all words rooted at this node begin with this ciphone ...
Definition: ngram_search.h:100
int32 ** active_word_list
Array of active multi-phone words for current and next frame.
Definition: ngram_search.h:287
struct chan_s * next
first descendant of this channel; or, in the case of the last phone of a word, the next alternative r...
Definition: ngram_search.h:68
void ngram_search_save_bp(ngram_search_t *ngs, int frame_idx, int32 w, int32 score, int32 path, int32 rc)
Enter a word in the backpointer table.
Definition: ngram_search.c:383
Lexicon tree based Viterbi search.
int32 * single_phone_wid
list of single-phone word ids
Definition: ngram_search.h:264
int ngram_search_mark_bptable(ngram_search_t *ngs, int frame_idx)
Record the current frame&#39;s index in the backpointer table.
Definition: ngram_search.c:329
int32 n_root_chan_alloc
Number of root_chan allocated.
Definition: ngram_search.h:232
int16 ci2phone
second ciphone of this node; one root HMM for each unique right context
Definition: ngram_search.h:102
int32 penult_phn_wid
list of words whose last phone follows this one; this field indicates the first of the list; the rest...
Definition: ngram_search.h:75
int32 n_active_word[2]
Number entries in active_word_list.
Definition: ngram_search.h:288
int32 rc_id
right-context id for last phone of words
Definition: ngram_search.h:79
#define dict2pid_rssid(d, ci, lc)
Access macros; not designed for arbitrary use.
Definition: dict2pid.h:115
N-Gram search module structure.
Definition: ngram_search.h:197
int ngram_fwdtree_search(ngram_search_t *ngs, int frame_idx)
Search one frame forward in an utterance.
void hmm_normalize(hmm_t *h, int32 bestscr)
Renormalize the scores in this HMM based on the given best score.
Definition: hmm.c:209
int32 max_nonroot_chan
Maximum possible number of non-root channels.
Definition: ngram_search.h:235
int32 last_phone_best_score
Best Viterbi path score for last phone.
Definition: ngram_search.h:326
int32 real_wid
wid of this or latest predecessor real word
Definition: ngram_search.h:117
root_chan_t * rhmm_1ph
Root HMMs for single-phone words.
Definition: ngram_search.h:236
int32 prev_real_wid
wid of second-last real word
Definition: ngram_search.h:118
#define WORST_SCORE
Large &quot;bad&quot; score.
Definition: hmm.h:84
int dict_filler_word(dict_t *d, s3wid_t w)
Return 1 if w is a filler word, 0 if not.
Definition: dict.c:413
void ngram_fwdtree_init(ngram_search_t *ngs)
Initialize N-Gram search for fwdtree decoding.
Lexical tree node data type for the first phone (root) of each dynamic HMM tree structure.
Definition: ngram_search.h:90
void hmm_enter(hmm_t *h, int32 score, int32 histid, int frame)
Enter an HMM with the given path score and history ID.
Definition: hmm.c:201
Lexical tree node data type.
Definition: ngram_search.h:64
hmm_t hmm
Basic HMM structure.
Definition: ngram_search.h:91
void acmod_clear_active(acmod_t *acmod)
Clear set of active senones.
Definition: acmod.c:1197
#define hmm_context_set_senscore(ctx, senscr)
Change the senone score array for a context.
Definition: hmm.h:227
#define SENSCR_SHIFT
Shift count for senone scores.
Definition: hmm.h:73
chan_t *** active_chan_list
Array of active channels for current and next frame.
Definition: ngram_search.h:275
a structure for a dictionary.
Definition: dict.h:76
POCKETSPHINX_EXPORT int dict_real_word(dict_t *d, s3wid_t w)
Test if w is a &quot;real&quot; word, i.e.
Definition: dict.c:427
struct chan_s * alt
sibling; i.e., next descendant of parent HMM
Definition: ngram_search.h:71
#define WORSE_THAN
Is one score worse than another?
Definition: hmm.h:100
s3ssid_t dict2pid_internal(dict2pid_t *d2p, int32 wid, int pos)
Return the senone sequence ID for the given word position.
Definition: dict2pid.c:367
void hmm_clear(hmm_t *h)
Reset the states of the HMM to the invalid condition.
Definition: hmm.c:183
int32 best_score
Best Viterbi path score.
Definition: ngram_search.h:325
Back pointer table (forward pass lattice; actually a tree)
Definition: ngram_search.h:109
int32 n_1ph_LMwords
Number single phone dict words also in LM; these come first in single_phone_wid.
Definition: ngram_search.h:266
cross word triphone model structure
Definition: dict2pid.h:73
int ngram_fwdtree_reinit(ngram_search_t *ngs)
Rebuild search structures for updated language models.
Fast and rough context-independent phoneme loop search.
void ngram_search_free_all_rc(ngram_search_t *ngs, int32 w)
Allocate last phone channels for all possible right contexts for word w.
Definition: ngram_search.c:647
root_chan_t * root_chan
Search structure of HMM instances.
Definition: ngram_search.h:231
char * hyp_str
Current hypothesis string.
#define BETTER_THAN
Is one score better than another?
Definition: hmm.h:95
int32 s_idx
Start of BScoreStack for various right contexts.
Definition: ngram_search.h:116
int32 n_frame
Number of frames actually present.
Definition: ngram_search.h:308
ngram_model_t * lmset
Set of language models.
Definition: ngram_search.h:199
uint8 valid
For absolute pruning.
Definition: ngram_search.h:111
int32 n_1ph_words
Number single phone words in dict (total)
Definition: ngram_search.h:265
int32 ciphone
ciphone for this node
Definition: ngram_search.h:73
ngram_search_stats_t st
Various statistics for profiling.
Definition: ngram_search.h:335
chan_t ** word_chan
Channels associated with a given word (only used for right contexts, single-phone words in fwdtree se...
Definition: ngram_search.h:246
int32 score
Score (best among all right contexts)
Definition: ngram_search.h:115
int32 n_root_chan
Number of valid root_chan.
Definition: ngram_search.h:233
s3cipid_t * cimap
Index into ssid[] above for each ci phone.
Definition: dict2pid.h:75
int32 * homophone_set
Each node in the HMM tree structure may point to a set of words whose last phone would follow that no...
Definition: ngram_search.h:263
#define dict_pron(d, w, p)
The CI phones of the word w at position p.
Definition: dict.h:165
Building composite triphone (as well as word internal triphones) with the dictionary.
Definition: dict2pid.h:84
#define phone_loop_search_score(pls, ci)
Return lookahead heuristic score for a specific phone.
Phone loop search structure.
int16 const * acmod_score(acmod_t *acmod, int *inout_frame_idx)
Score one frame of data.
Definition: acmod.c:1106
int16 last_phone
last phone of this word
Definition: ngram_search.h:119