PocketSphinx  5prealpha
ps_lattice.c
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 2008 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
42 /* System headers. */
43 #include <assert.h>
44 #include <string.h>
45 #include <math.h>
46 
47 /* SphinxBase headers. */
48 #include <sphinxbase/ckd_alloc.h>
49 #include <sphinxbase/listelem_alloc.h>
50 #include <sphinxbase/strfuncs.h>
51 #include <sphinxbase/err.h>
52 #include <sphinxbase/pio.h>
53 
54 /* Local headers. */
55 #include "pocketsphinx_internal.h"
56 #include "ps_lattice_internal.h"
57 #include "ngram_search.h"
58 #include "dict.h"
59 
60 /*
61  * Create a directed link between "from" and "to" nodes, but if a link already exists,
62  * choose one with the best ascr.
63  */
64 void
66  int32 score, int32 ef)
67 {
68  latlink_list_t *fwdlink;
69 
70  /* Look for an existing link between "from" and "to" nodes */
71  for (fwdlink = from->exits; fwdlink; fwdlink = fwdlink->next)
72  if (fwdlink->link->to == to)
73  break;
74 
75  if (fwdlink == NULL) {
76  latlink_list_t *revlink;
77  ps_latlink_t *link;
78 
79  /* No link between the two nodes; create a new one */
80  link = listelem_malloc(dag->latlink_alloc);
81  fwdlink = listelem_malloc(dag->latlink_list_alloc);
82  revlink = listelem_malloc(dag->latlink_list_alloc);
83 
84  link->from = from;
85  link->to = to;
86  link->ascr = score;
87  link->ef = ef;
88  link->best_prev = NULL;
89 
90  fwdlink->link = revlink->link = link;
91  fwdlink->next = from->exits;
92  from->exits = fwdlink;
93  revlink->next = to->entries;
94  to->entries = revlink;
95  }
96  else {
97  /* Link already exists; just retain the best ascr */
98  if (score BETTER_THAN fwdlink->link->ascr) {
99  fwdlink->link->ascr = score;
100  fwdlink->link->ef = ef;
101  }
102  }
103 }
104 
105 void
106 ps_lattice_penalize_fillers(ps_lattice_t *dag, int32 silpen, int32 fillpen)
107 {
108  ps_latnode_t *node;
109 
110  for (node = dag->nodes; node; node = node->next) {
111  latlink_list_t *linklist;
112  if (node != dag->start && node != dag->end && dict_filler_word(dag->dict, node->basewid)) {
113  for (linklist = node->entries; linklist; linklist = linklist->next)
114  linklist->link->ascr += (node->basewid == dag->silence) ? silpen : fillpen;
115  }
116  }
117 }
118 
119 static void
120 delete_node(ps_lattice_t *dag, ps_latnode_t *node)
121 {
122  latlink_list_t *x, *next_x;
123 
124  for (x = node->exits; x; x = next_x) {
125  next_x = x->next;
126  x->link->from = NULL;
127  listelem_free(dag->latlink_list_alloc, x);
128  }
129  for (x = node->entries; x; x = next_x) {
130  next_x = x->next;
131  x->link->to = NULL;
132  listelem_free(dag->latlink_list_alloc, x);
133  }
134  listelem_free(dag->latnode_alloc, node);
135 }
136 
137 
138 static void
139 remove_dangling_links(ps_lattice_t *dag, ps_latnode_t *node)
140 {
141  latlink_list_t *x, *prev_x, *next_x;
142 
143  prev_x = NULL;
144  for (x = node->exits; x; x = next_x) {
145  next_x = x->next;
146  if (x->link->to == NULL) {
147  if (prev_x)
148  prev_x->next = next_x;
149  else
150  node->exits = next_x;
151  listelem_free(dag->latlink_alloc, x->link);
152  listelem_free(dag->latlink_list_alloc, x);
153  }
154  else
155  prev_x = x;
156  }
157  prev_x = NULL;
158  for (x = node->entries; x; x = next_x) {
159  next_x = x->next;
160  if (x->link->from == NULL) {
161  if (prev_x)
162  prev_x->next = next_x;
163  else
164  node->entries = next_x;
165  listelem_free(dag->latlink_alloc, x->link);
166  listelem_free(dag->latlink_list_alloc, x);
167  }
168  else
169  prev_x = x;
170  }
171 }
172 
173 void
175 {
176  ps_latnode_t *node, *prev_node, *next_node;
177  int i;
178 
179  /* Remove unreachable nodes from the list of nodes. */
180  prev_node = NULL;
181  for (node = dag->nodes; node; node = next_node) {
182  next_node = node->next;
183  if (!node->reachable) {
184  if (prev_node)
185  prev_node->next = next_node;
186  else
187  dag->nodes = next_node;
188  /* Delete this node and NULLify links to it. */
189  delete_node(dag, node);
190  }
191  else
192  prev_node = node;
193  }
194 
195  /* Remove all links to and from unreachable nodes. */
196  i = 0;
197  for (node = dag->nodes; node; node = node->next) {
198  /* Assign sequence numbers. */
199  node->id = i++;
200 
201  /* We should obviously not encounter unreachable nodes here! */
202  assert(node->reachable);
203 
204  /* Remove all links that go nowhere. */
205  remove_dangling_links(dag, node);
206  }
207 }
208 
209 int32
210 ps_lattice_write(ps_lattice_t *dag, char const *filename)
211 {
212  FILE *fp;
213  int32 i;
214  ps_latnode_t *d, *initial, *final;
215 
216  initial = dag->start;
217  final = dag->end;
218 
219  E_INFO("Writing lattice file: %s\n", filename);
220  if ((fp = fopen(filename, "w")) == NULL) {
221  E_ERROR_SYSTEM("Failed to open lattice file '%s' for writing", filename);
222  return -1;
223  }
224 
225  /* Stupid Sphinx-III lattice code expects 'getcwd:' here */
226  fprintf(fp, "# getcwd: /this/is/bogus\n");
227  fprintf(fp, "# -logbase %e\n", logmath_get_base(dag->lmath));
228  fprintf(fp, "#\n");
229 
230  fprintf(fp, "Frames %d\n", dag->n_frames);
231  fprintf(fp, "#\n");
232 
233  for (i = 0, d = dag->nodes; d; d = d->next, i++);
234  fprintf(fp,
235  "Nodes %d (NODEID WORD STARTFRAME FIRST-ENDFRAME LAST-ENDFRAME)\n",
236  i);
237  for (i = 0, d = dag->nodes; d; d = d->next, i++) {
238  d->id = i;
239  fprintf(fp, "%d %s %d %d %d ; %d\n",
240  i, dict_wordstr(dag->dict, d->wid),
241  d->sf, d->fef, d->lef, d->node_id);
242  }
243  fprintf(fp, "#\n");
244 
245  fprintf(fp, "Initial %d\nFinal %d\n", initial->id, final->id);
246  fprintf(fp, "#\n");
247 
248  /* Don't bother with this, it's not used by anything. */
249  fprintf(fp, "BestSegAscr %d (NODEID ENDFRAME ASCORE)\n",
250  0 /* #BPTable entries */ );
251  fprintf(fp, "#\n");
252 
253  fprintf(fp, "Edges (FROM-NODEID TO-NODEID ASCORE)\n");
254  for (d = dag->nodes; d; d = d->next) {
255  latlink_list_t *l;
256  for (l = d->exits; l; l = l->next) {
257  if (l->link->ascr WORSE_THAN WORST_SCORE || l->link->ascr BETTER_THAN 0)
258  continue;
259  fprintf(fp, "%d %d %d\n",
260  d->id, l->link->to->id, l->link->ascr << SENSCR_SHIFT);
261  }
262  }
263  fprintf(fp, "End\n");
264  fclose(fp);
265 
266  return 0;
267 }
268 
269 int32
270 ps_lattice_write_htk(ps_lattice_t *dag, char const *filename)
271 {
272  FILE *fp;
273  ps_latnode_t *d, *initial, *final;
274  int32 j, n_links, n_nodes;
275 
276  initial = dag->start;
277  final = dag->end;
278 
279  E_INFO("Writing lattice file: %s\n", filename);
280  if ((fp = fopen(filename, "w")) == NULL) {
281  E_ERROR_SYSTEM("Failed to open lattice file '%s' for writing", filename);
282  return -1;
283  }
284 
285  for (n_links = n_nodes = 0, d = dag->nodes; d; d = d->next) {
286  latlink_list_t *l;
287  if (!d->reachable)
288  continue;
289  d->id = n_nodes;
290  for (l = d->exits; l; l = l->next) {
291  if (l->link->to == NULL || !l->link->to->reachable)
292  continue;
293  if (l->link->ascr WORSE_THAN WORST_SCORE || l->link->ascr BETTER_THAN 0)
294  continue;
295  ++n_links;
296  }
297  ++n_nodes;
298  }
299 
300  fprintf(fp, "# Lattice generated by PocketSphinx\n");
301  fprintf(fp, "#\n# Header\n#\n");
302  fprintf(fp, "VERSION=1.0\n");
303  fprintf(fp, "start=%d\n", initial->id);
304  fprintf(fp, "end=%d\n", final->id);
305  fprintf(fp, "#\n");
306 
307  fprintf(fp, "N=%d\tL=%d\n", n_nodes, n_links);
308  fprintf(fp, "#\n# Node definitions\n#\n");
309  for (d = dag->nodes; d; d = d->next) {
310  char const *word = dict_wordstr(dag->dict, d->wid);
311  char const *c = strrchr(word, '(');
312  int altpron = 1;
313  if (!d->reachable)
314  continue;
315  if (c)
316  altpron = atoi(c + 1);
317  word = dict_basestr(dag->dict, d->wid);
318  if (d->wid == dict_startwid(dag->dict))
319  word = "!SENT_START";
320  else if (d->wid == dict_finishwid(dag->dict))
321  word = "!SENT_END";
322  else if (dict_filler_word(dag->dict, d->wid))
323  word = "!NULL";
324  fprintf(fp, "I=%d\tt=%.2f\tW=%s\tv=%d\n",
325  d->id, (double)d->sf / dag->frate,
326  word, altpron);
327  }
328  fprintf(fp, "#\n# Link definitions\n#\n");
329  for (j = 0, d = dag->nodes; d; d = d->next) {
330  latlink_list_t *l;
331  if (!d->reachable)
332  continue;
333  for (l = d->exits; l; l = l->next) {
334  if (l->link->to == NULL || !l->link->to->reachable)
335  continue;
336  if (l->link->ascr WORSE_THAN WORST_SCORE || l->link->ascr BETTER_THAN 0)
337  continue;
338  fprintf(fp, "J=%d\tS=%d\tE=%d\ta=%f\tp=%g\n", j++,
339  d->id, l->link->to->id,
340  logmath_log_to_ln(dag->lmath, l->link->ascr << SENSCR_SHIFT),
341  logmath_exp(dag->lmath, l->link->alpha + l->link->beta - dag->norm));
342  }
343  }
344  fclose(fp);
345 
346  return 0;
347 }
348 
349 /* Read parameter from a lattice file*/
350 static int
351 dag_param_read(lineiter_t *li, char *param)
352 {
353  int32 n;
354 
355  while ((li = lineiter_next(li)) != NULL) {
356  char *c;
357 
358  /* Ignore comments. */
359  if (li->buf[0] == '#')
360  continue;
361 
362  /* Find the first space. */
363  c = strchr(li->buf, ' ');
364  if (c == NULL) continue;
365 
366  /* Check that the first field equals param and that there's a number after it. */
367  if (strncmp(li->buf, param, strlen(param)) == 0
368  && sscanf(c + 1, "%d", &n) == 1)
369  return n;
370  }
371  return -1;
372 }
373 
374 /* Mark every node that has a path to the argument dagnode as "reachable". */
375 static void
376 dag_mark_reachable(ps_latnode_t * d)
377 {
378  latlink_list_t *l;
379 
380  d->reachable = 1;
381  for (l = d->entries; l; l = l->next)
382  if (l->link->from && !l->link->from->reachable)
383  dag_mark_reachable(l->link->from);
384 }
385 
386 ps_lattice_t *
388  char const *file)
389 {
390  FILE *fp;
391  int32 ispipe;
392  lineiter_t *line;
393  float64 lb;
394  float32 logratio;
395  ps_latnode_t *tail;
396  ps_latnode_t **darray;
397  ps_lattice_t *dag;
398  int i, k, n_nodes;
399  int32 pip, silpen, fillpen;
400 
401  dag = ckd_calloc(1, sizeof(*dag));
402 
403  if (ps) {
404  dag->search = ps->search;
405  dag->dict = dict_retain(ps->dict);
406  dag->lmath = logmath_retain(ps->lmath);
407  dag->frate = cmd_ln_int32_r(dag->search->config, "-frate");
408  }
409  else {
410  dag->dict = dict_init(NULL, NULL);
411  dag->lmath = logmath_init(1.0001, 0, FALSE);
412  dag->frate = 100;
413  }
414  dag->silence = dict_silwid(dag->dict);
415  dag->latnode_alloc = listelem_alloc_init(sizeof(ps_latnode_t));
416  dag->latlink_alloc = listelem_alloc_init(sizeof(ps_latlink_t));
417  dag->latlink_list_alloc = listelem_alloc_init(sizeof(latlink_list_t));
418  dag->refcount = 1;
419 
420  tail = NULL;
421  darray = NULL;
422 
423  E_INFO("Reading DAG file: %s\n", file);
424  if ((fp = fopen_compchk(file, &ispipe)) == NULL) {
425  E_ERROR_SYSTEM("Failed to open DAG file '%s' for reading", file);
426  return NULL;
427  }
428  line = lineiter_start(fp);
429 
430  /* Read and verify logbase (ONE BIG HACK!!) */
431  if (line == NULL) {
432  E_ERROR("Premature EOF(%s)\n", file);
433  goto load_error;
434  }
435  if (strncmp(line->buf, "# getcwd: ", 10) != 0) {
436  E_ERROR("%s does not begin with '# getcwd: '\n%s", file, line->buf);
437  goto load_error;
438  }
439  if ((line = lineiter_next(line)) == NULL) {
440  E_ERROR("Premature EOF(%s)\n", file);
441  goto load_error;
442  }
443  if ((strncmp(line->buf, "# -logbase ", 11) != 0)
444  || (sscanf(line->buf + 11, "%lf", &lb) != 1)) {
445  E_WARN("%s: Cannot find -logbase in header\n", file);
446  lb = 1.0001;
447  }
448  logratio = 1.0f;
449  if (dag->lmath == NULL)
450  dag->lmath = logmath_init(lb, 0, TRUE);
451  else {
452  float32 pb = logmath_get_base(dag->lmath);
453  if (fabs(lb - pb) >= 0.0001) {
454  E_WARN("Inconsistent logbases: %f vs %f: will compensate\n", lb, pb);
455  logratio = (float32)(log(lb) / log(pb));
456  E_INFO("Lattice log ratio: %f\n", logratio);
457  }
458  }
459  /* Read Frames parameter */
460  dag->n_frames = dag_param_read(line, "Frames");
461  if (dag->n_frames <= 0) {
462  E_ERROR("Frames parameter missing or invalid\n");
463  goto load_error;
464  }
465  /* Read Nodes parameter */
466  n_nodes = dag_param_read(line, "Nodes");
467  if (n_nodes <= 0) {
468  E_ERROR("Nodes parameter missing or invalid\n");
469  goto load_error;
470  }
471 
472  /* Read nodes */
473  darray = ckd_calloc(n_nodes, sizeof(*darray));
474  for (i = 0; i < n_nodes; i++) {
475  ps_latnode_t *d;
476  int32 w;
477  int seqid, sf, fef, lef;
478  char wd[256];
479 
480  if ((line = lineiter_next(line)) == NULL) {
481  E_ERROR("Premature EOF while loading Nodes(%s)\n", file);
482  goto load_error;
483  }
484 
485  if ((k =
486  sscanf(line->buf, "%d %255s %d %d %d", &seqid, wd, &sf, &fef,
487  &lef)) != 5) {
488  E_ERROR("Cannot parse line: %s, value of count %d\n", line->buf, k);
489  goto load_error;
490  }
491 
492  w = dict_wordid(dag->dict, wd);
493  if (w < 0) {
494  if (dag->search == NULL) {
495  char *ww = ckd_salloc(wd);
496  if (dict_word2basestr(ww) != -1) {
497  if (dict_wordid(dag->dict, ww) == BAD_S3WID)
498  dict_add_word(dag->dict, ww, NULL, 0);
499  }
500  ckd_free(ww);
501  w = dict_add_word(dag->dict, wd, NULL, 0);
502  }
503  if (w < 0) {
504  E_ERROR("Unknown word in line: %s\n", line->buf);
505  goto load_error;
506  }
507  }
508 
509  if (seqid != i) {
510  E_ERROR("Seqno error: %s\n", line->buf);
511  goto load_error;
512  }
513 
514  d = listelem_malloc(dag->latnode_alloc);
515  darray[i] = d;
516  d->wid = w;
517  d->basewid = dict_basewid(dag->dict, w);
518  d->id = seqid;
519  d->sf = sf;
520  d->fef = fef;
521  d->lef = lef;
522  d->reachable = 0;
523  d->exits = d->entries = NULL;
524  d->next = NULL;
525 
526  if (!dag->nodes)
527  dag->nodes = d;
528  else
529  tail->next = d;
530  tail = d;
531  }
532 
533  /* Read initial node ID */
534  k = dag_param_read(line, "Initial");
535  if ((k < 0) || (k >= n_nodes)) {
536  E_ERROR("Initial node parameter missing or invalid\n");
537  goto load_error;
538  }
539  dag->start = darray[k];
540 
541  /* Read final node ID */
542  k = dag_param_read(line, "Final");
543  if ((k < 0) || (k >= n_nodes)) {
544  E_ERROR("Final node parameter missing or invalid\n");
545  goto load_error;
546  }
547  dag->end = darray[k];
548 
549  /* Read bestsegscore entries and ignore them. */
550  if ((k = dag_param_read(line, "BestSegAscr")) < 0) {
551  E_ERROR("BestSegAscr parameter missing\n");
552  goto load_error;
553  }
554  for (i = 0; i < k; i++) {
555  if ((line = lineiter_next(line)) == NULL) {
556  E_ERROR("Premature EOF while (%s) ignoring BestSegAscr\n",
557  line);
558  goto load_error;
559  }
560  }
561 
562  /* Read in edges. */
563  while ((line = lineiter_next(line)) != NULL) {
564  if (line->buf[0] == '#')
565  continue;
566  if (0 == strncmp(line->buf, "Edges", 5))
567  break;
568  }
569  if (line == NULL) {
570  E_ERROR("Edges missing\n");
571  goto load_error;
572  }
573  while ((line = lineiter_next(line)) != NULL) {
574  int from, to, ascr;
575  ps_latnode_t *pd, *d;
576 
577  if (sscanf(line->buf, "%d %d %d", &from, &to, &ascr) != 3)
578  break;
579  if (ascr WORSE_THAN WORST_SCORE)
580  continue;
581  pd = darray[from];
582  d = darray[to];
583  if (logratio != 1.0f)
584  ascr = (int32)(ascr * logratio);
585  ps_lattice_link(dag, pd, d, ascr, d->sf - 1);
586  }
587  if (strcmp(line->buf, "End\n") != 0) {
588  E_ERROR("Terminating 'End' missing\n");
589  goto load_error;
590  }
591  lineiter_free(line);
592  fclose_comp(fp, ispipe);
593  ckd_free(darray);
594 
595  /* Minor hack: If the final node is a filler word and not </s>,
596  * then set its base word ID to </s>, so that the language model
597  * scores won't be screwed up. */
598  if (dict_filler_word(dag->dict, dag->end->wid))
599  dag->end->basewid = dag->search
600  ? ps_search_finish_wid(dag->search)
601  : dict_wordid(dag->dict, S3_FINISH_WORD);
602 
603  /* Mark reachable from dag->end */
604  dag_mark_reachable(dag->end);
605 
606  /* Free nodes unreachable from dag->end and their links */
608 
609  if (ps) {
610  /* Build links around silence and filler words, since they do
611  * not exist in the language model. FIXME: This is
612  * potentially buggy, as we already do this before outputing
613  * lattices. */
614  pip = logmath_log(dag->lmath, cmd_ln_float32_r(ps->config, "-pip"));
615  silpen = pip + logmath_log(dag->lmath,
616  cmd_ln_float32_r(ps->config, "-silprob"));
617  fillpen = pip + logmath_log(dag->lmath,
618  cmd_ln_float32_r(ps->config, "-fillprob"));
619  ps_lattice_penalize_fillers(dag, silpen, fillpen);
620  }
621 
622  return dag;
623 
624  load_error:
625  E_ERROR("Failed to load %s\n", file);
626  lineiter_free(line);
627  if (fp) fclose_comp(fp, ispipe);
628  ckd_free(darray);
629  return NULL;
630 }
631 
632 int
634 {
635  return dag->n_frames;
636 }
637 
638 ps_lattice_t *
639 ps_lattice_init_search(ps_search_t *search, int n_frame)
640 {
641  ps_lattice_t *dag;
642 
643  dag = ckd_calloc(1, sizeof(*dag));
644  dag->search = search;
645  dag->dict = dict_retain(search->dict);
646  dag->lmath = logmath_retain(search->acmod->lmath);
647  dag->frate = cmd_ln_int32_r(dag->search->config, "-frate");
648  dag->silence = dict_silwid(dag->dict);
649  dag->n_frames = n_frame;
650  dag->latnode_alloc = listelem_alloc_init(sizeof(ps_latnode_t));
651  dag->latlink_alloc = listelem_alloc_init(sizeof(ps_latlink_t));
652  dag->latlink_list_alloc = listelem_alloc_init(sizeof(latlink_list_t));
653  dag->refcount = 1;
654  return dag;
655 }
656 
657 ps_lattice_t *
659 {
660  ++dag->refcount;
661  return dag;
662 }
663 
664 int
666 {
667  if (dag == NULL)
668  return 0;
669  if (--dag->refcount > 0)
670  return dag->refcount;
671  logmath_free(dag->lmath);
672  dict_free(dag->dict);
673  listelem_alloc_free(dag->latnode_alloc);
674  listelem_alloc_free(dag->latlink_alloc);
675  listelem_alloc_free(dag->latlink_list_alloc);
676  ckd_free(dag->hyp_str);
677  ckd_free(dag);
678  return 0;
679 }
680 
681 logmath_t *
683 {
684  return dag->lmath;
685 }
686 
689 {
690  return dag->nodes;
691 }
692 
695 {
696  return itor->next;
697 }
698 
699 void
701 {
702  /* Do absolutely nothing. */
703 }
704 
705 ps_latnode_t *
707 {
708  return itor;
709 }
710 
711 int
712 ps_latnode_times(ps_latnode_t *node, int16 *out_fef, int16 *out_lef)
713 {
714  if (out_fef) *out_fef = (int16)node->fef;
715  if (out_lef) *out_lef = (int16)node->lef;
716  return node->sf;
717 }
718 
719 char const *
721 {
722  return dict_wordstr(dag->dict, node->wid);
723 }
724 
725 char const *
727 {
728  return dict_wordstr(dag->dict, node->basewid);
729 }
730 
731 int32
733  ps_latlink_t **out_link)
734 {
735  latlink_list_t *links;
736  int32 bestpost = logmath_get_zero(dag->lmath);
737 
738  for (links = node->exits; links; links = links->next) {
739  int32 post = links->link->alpha + links->link->beta - dag->norm;
740  if (post > bestpost) {
741  if (out_link) *out_link = links->link;
742  bestpost = post;
743  }
744  }
745  return bestpost;
746 }
747 
750 {
751  return node->exits;
752 }
753 
756 {
757  return node->entries;
758 }
759 
762 {
763  return itor->next;
764 }
765 
766 void
768 {
769  /* Do absolutely nothing. */
770 }
771 
772 ps_latlink_t *
774 {
775  return itor->link;
776 }
777 
778 int
779 ps_latlink_times(ps_latlink_t *link, int16 *out_sf)
780 {
781  if (out_sf) {
782  if (link->from) {
783  *out_sf = link->from->sf;
784  }
785  else {
786  *out_sf = 0;
787  }
788  }
789  return link->ef;
790 }
791 
792 ps_latnode_t *
794 {
795  if (out_src) *out_src = link->from;
796  return link->to;
797 }
798 
799 char const *
801 {
802  if (link->from == NULL)
803  return NULL;
804  return dict_wordstr(dag->dict, link->from->wid);
805 }
806 
807 char const *
809 {
810  if (link->from == NULL)
811  return NULL;
812  return dict_wordstr(dag->dict, link->from->basewid);
813 }
814 
815 ps_latlink_t *
817 {
818  return link->best_prev;
819 }
820 
821 int32
822 ps_latlink_prob(ps_lattice_t *dag, ps_latlink_t *link, int32 *out_ascr)
823 {
824  int32 post = link->alpha + link->beta - dag->norm;
825  if (out_ascr) *out_ascr = link->ascr << SENSCR_SHIFT;
826  return post;
827 }
828 
829 char const *
831 {
832  ps_latlink_t *l;
833  size_t len;
834  char *c;
835 
836  /* Backtrace once to get hypothesis length. */
837  len = 0;
838  /* FIXME: There may not be a search, but actually there should be a dict. */
839  if (dict_real_word(dag->dict, link->to->basewid)) {
840  char *wstr = dict_wordstr(dag->dict, link->to->basewid);
841  if (wstr != NULL)
842  len += strlen(wstr) + 1;
843  }
844  for (l = link; l; l = l->best_prev) {
845  if (dict_real_word(dag->dict, l->from->basewid)) {
846  char *wstr = dict_wordstr(dag->dict, l->from->basewid);
847  if (wstr != NULL)
848  len += strlen(wstr) + 1;
849  }
850  }
851 
852  /* Backtrace again to construct hypothesis string. */
853  ckd_free(dag->hyp_str);
854  dag->hyp_str = ckd_calloc(1, len+1); /* extra one incase the hyp is empty */
855  c = dag->hyp_str + len - 1;
856  if (dict_real_word(dag->dict, link->to->basewid)) {
857  char *wstr = dict_wordstr(dag->dict, link->to->basewid);
858  if (wstr != NULL) {
859  len = strlen(wstr);
860  c -= len;
861  memcpy(c, wstr, len);
862  if (c > dag->hyp_str) {
863  --c;
864  *c = ' ';
865  }
866  }
867  }
868  for (l = link; l; l = l->best_prev) {
869  if (dict_real_word(dag->dict, l->from->basewid)) {
870  char *wstr = dict_wordstr(dag->dict, l->from->basewid);
871  if (wstr != NULL) {
872  len = strlen(wstr);
873  c -= len;
874  memcpy(c, wstr, len);
875  if (c > dag->hyp_str) {
876  --c;
877  *c = ' ';
878  }
879  }
880  }
881  }
882 
883  return dag->hyp_str;
884 }
885 
886 static void
887 ps_lattice_compute_lscr(ps_seg_t *seg, ps_latlink_t *link, int to)
888 {
889  ngram_model_t *lmset;
890 
891  /* Language model score is included in the link score for FSG
892  * search. FIXME: Of course, this is sort of a hack :( */
893  if (0 != strcmp(ps_search_type(seg->search), PS_SEARCH_TYPE_NGRAM)) {
894  seg->lback = 1; /* Unigram... */
895  seg->lscr = 0;
896  return;
897  }
898 
899  lmset = ((ngram_search_t *)seg->search)->lmset;
900 
901  if (link->best_prev == NULL) {
902  if (to) /* Sentence has only two words. */
903  seg->lscr = ngram_bg_score(lmset, link->to->basewid,
904  link->from->basewid, &seg->lback)
905  >> SENSCR_SHIFT;
906  else {/* This is the start symbol, its lscr is always 0. */
907  seg->lscr = 0;
908  seg->lback = 1;
909  }
910  }
911  else {
912  /* Find the two predecessor words. */
913  if (to) {
914  seg->lscr = ngram_tg_score(lmset, link->to->basewid,
915  link->from->basewid,
916  link->best_prev->from->basewid,
917  &seg->lback) >> SENSCR_SHIFT;
918  }
919  else {
920  if (link->best_prev->best_prev)
921  seg->lscr = ngram_tg_score(lmset, link->from->basewid,
922  link->best_prev->from->basewid,
923  link->best_prev->best_prev->from->basewid,
924  &seg->lback) >> SENSCR_SHIFT;
925  else
926  seg->lscr = ngram_bg_score(lmset, link->from->basewid,
927  link->best_prev->from->basewid,
928  &seg->lback) >> SENSCR_SHIFT;
929  }
930  }
931 }
932 
933 static void
934 ps_lattice_link2itor(ps_seg_t *seg, ps_latlink_t *link, int to)
935 {
936  dag_seg_t *itor = (dag_seg_t *)seg;
937  ps_latnode_t *node;
938 
939  if (to) {
940  node = link->to;
941  seg->ef = node->lef;
942  seg->prob = 0; /* norm + beta - norm */
943  }
944  else {
945  latlink_list_t *x;
946  ps_latnode_t *n;
947  logmath_t *lmath = ps_search_acmod(seg->search)->lmath;
948 
949  node = link->from;
950  seg->ef = link->ef;
951  seg->prob = link->alpha + link->beta - itor->norm;
952  /* Sum over all exits for this word and any alternate
953  pronunciations at the same frame. */
954  for (n = node; n; n = n->alt) {
955  for (x = n->exits; x; x = x->next) {
956  if (x->link == link)
957  continue;
958  seg->prob = logmath_add(lmath, seg->prob,
959  x->link->alpha + x->link->beta - itor->norm);
960  }
961  }
962  }
963  seg->word = dict_wordstr(ps_search_dict(seg->search), node->wid);
964  seg->sf = node->sf;
965  seg->ascr = link->ascr << SENSCR_SHIFT;
966  /* Compute language model score from best predecessors. */
967  ps_lattice_compute_lscr(seg, link, to);
968 }
969 
970 static void
971 ps_lattice_seg_free(ps_seg_t *seg)
972 {
973  dag_seg_t *itor = (dag_seg_t *)seg;
974 
975  ckd_free(itor->links);
976  ckd_free(itor);
977 }
978 
979 static ps_seg_t *
980 ps_lattice_seg_next(ps_seg_t *seg)
981 {
982  dag_seg_t *itor = (dag_seg_t *)seg;
983 
984  ++itor->cur;
985  if (itor->cur == itor->n_links + 1) {
986  ps_lattice_seg_free(seg);
987  return NULL;
988  }
989  else if (itor->cur == itor->n_links) {
990  /* Re-use the last link but with the "to" node. */
991  ps_lattice_link2itor(seg, itor->links[itor->cur - 1], TRUE);
992  }
993  else {
994  ps_lattice_link2itor(seg, itor->links[itor->cur], FALSE);
995  }
996 
997  return seg;
998 }
999 
1000 static ps_segfuncs_t ps_lattice_segfuncs = {
1001  /* seg_next */ ps_lattice_seg_next,
1002  /* seg_free */ ps_lattice_seg_free
1003 };
1004 
1005 ps_seg_t *
1007 {
1008  dag_seg_t *itor;
1009  ps_latlink_t *l;
1010  int cur;
1011 
1012  /* Calling this an "iterator" is a bit of a misnomer since we have
1013  * to get the entire backtrace in order to produce it.
1014  */
1015  itor = ckd_calloc(1, sizeof(*itor));
1016  itor->base.vt = &ps_lattice_segfuncs;
1017  itor->base.search = dag->search;
1018  itor->base.lwf = lwf;
1019  itor->n_links = 0;
1020  itor->norm = dag->norm;
1021 
1022  for (l = link; l; l = l->best_prev) {
1023  ++itor->n_links;
1024  }
1025  if (itor->n_links == 0) {
1026  ckd_free(itor);
1027  return NULL;
1028  }
1029 
1030  itor->links = ckd_calloc(itor->n_links, sizeof(*itor->links));
1031  cur = itor->n_links - 1;
1032  for (l = link; l; l = l->best_prev) {
1033  itor->links[cur] = l;
1034  --cur;
1035  }
1036 
1037  ps_lattice_link2itor((ps_seg_t *)itor, itor->links[0], FALSE);
1038  return (ps_seg_t *)itor;
1039 }
1040 
1043 {
1044  latlink_list_t *ll;
1045 
1046  ll = listelem_malloc(dag->latlink_list_alloc);
1047  ll->link = link;
1048  ll->next = next;
1049 
1050  return ll;
1051 }
1052 
1053 void
1055 {
1056  if (dag->q_head == NULL)
1057  dag->q_head = dag->q_tail = latlink_list_new(dag, link, NULL);
1058  else {
1059  dag->q_tail->next = latlink_list_new(dag, link, NULL);
1060  dag->q_tail = dag->q_tail->next;
1061  }
1062 
1063 }
1064 
1065 ps_latlink_t *
1067 {
1068  latlink_list_t *x;
1069  ps_latlink_t *link;
1070 
1071  if (dag->q_head == NULL)
1072  return NULL;
1073  link = dag->q_head->link;
1074  x = dag->q_head->next;
1075  listelem_free(dag->latlink_list_alloc, dag->q_head);
1076  dag->q_head = x;
1077  if (dag->q_head == NULL)
1078  dag->q_tail = NULL;
1079  return link;
1080 }
1081 
1082 void
1084 {
1085  while (ps_lattice_popq(dag)) {
1086  /* Do nothing. */
1087  }
1088 }
1089 
1090 ps_latlink_t *
1092 {
1093  ps_latnode_t *node;
1094  latlink_list_t *x;
1095 
1096  /* Cancel any unfinished traversal. */
1097  ps_lattice_delq(dag);
1098 
1099  /* Initialize node fanin counts and path scores. */
1100  for (node = dag->nodes; node; node = node->next)
1101  node->info.fanin = 0;
1102  for (node = dag->nodes; node; node = node->next) {
1103  for (x = node->exits; x; x = x->next)
1104  (x->link->to->info.fanin)++;
1105  }
1106 
1107  /* Initialize agenda with all exits from start. */
1108  if (start == NULL) start = dag->start;
1109  for (x = start->exits; x; x = x->next)
1110  ps_lattice_pushq(dag, x->link);
1111 
1112  /* Pull the first edge off the queue. */
1113  return ps_lattice_traverse_next(dag, end);
1114 }
1115 
1116 ps_latlink_t *
1118 {
1119  ps_latlink_t *next;
1120 
1121  next = ps_lattice_popq(dag);
1122  if (next == NULL)
1123  return NULL;
1124 
1125  /* Decrease fanin count for destination node and expand outgoing
1126  * edges if all incoming edges have been seen. */
1127  --next->to->info.fanin;
1128  if (next->to->info.fanin == 0) {
1129  latlink_list_t *x;
1130 
1131  if (end == NULL) end = dag->end;
1132  if (next->to == end) {
1133  /* If we have traversed all links entering the end node,
1134  * clear the queue, causing future calls to this function
1135  * to return NULL. */
1136  ps_lattice_delq(dag);
1137  return next;
1138  }
1139 
1140  /* Extend all outgoing edges. */
1141  for (x = next->to->exits; x; x = x->next)
1142  ps_lattice_pushq(dag, x->link);
1143  }
1144  return next;
1145 }
1146 
1147 ps_latlink_t *
1149 {
1150  ps_latnode_t *node;
1151  latlink_list_t *x;
1152 
1153  /* Cancel any unfinished traversal. */
1154  ps_lattice_delq(dag);
1155 
1156  /* Initialize node fanout counts and path scores. */
1157  for (node = dag->nodes; node; node = node->next) {
1158  node->info.fanin = 0;
1159  for (x = node->exits; x; x = x->next)
1160  ++node->info.fanin;
1161  }
1162 
1163  /* Initialize agenda with all entries from end. */
1164  if (end == NULL) end = dag->end;
1165  for (x = end->entries; x; x = x->next)
1166  ps_lattice_pushq(dag, x->link);
1167 
1168  /* Pull the first edge off the queue. */
1169  return ps_lattice_reverse_next(dag, start);
1170 }
1171 
1172 ps_latlink_t *
1174 {
1175  ps_latlink_t *next;
1176 
1177  next = ps_lattice_popq(dag);
1178  if (next == NULL)
1179  return NULL;
1180 
1181  /* Decrease fanout count for source node and expand incoming
1182  * edges if all incoming edges have been seen. */
1183  --next->from->info.fanin;
1184  if (next->from->info.fanin == 0) {
1185  latlink_list_t *x;
1186 
1187  if (start == NULL) start = dag->start;
1188  if (next->from == start) {
1189  /* If we have traversed all links entering the start node,
1190  * clear the queue, causing future calls to this function
1191  * to return NULL. */
1192  ps_lattice_delq(dag);
1193  return next;
1194  }
1195 
1196  /* Extend all outgoing edges. */
1197  for (x = next->from->entries; x; x = x->next)
1198  ps_lattice_pushq(dag, x->link);
1199  }
1200  return next;
1201 }
1202 
1203 /*
1204  * Find the best score from dag->start to end point of any link and
1205  * use it to update links further down the path. This is like
1206  * single-source shortest path search, except that it is done over
1207  * edges rather than nodes, which allows us to do exact trigram scoring.
1208  *
1209  * Helpfully enough, we get half of the posterior probability
1210  * calculation for free that way too. (interesting research topic: is
1211  * there a reliable Viterbi analogue to word-level Forward-Backward
1212  * like there is for state-level? Or, is it just lattice density?)
1213  */
1214 ps_latlink_t *
1215 ps_lattice_bestpath(ps_lattice_t *dag, ngram_model_t *lmset,
1216  float32 lwf, float32 ascale)
1217 {
1218  ps_search_t *search;
1219  ps_latnode_t *node;
1220  ps_latlink_t *link;
1221  ps_latlink_t *bestend;
1222  latlink_list_t *x;
1223  logmath_t *lmath;
1224  int32 bestescr;
1225 
1226  search = dag->search;
1227  lmath = dag->lmath;
1228 
1229  /* Initialize path scores for all links exiting dag->start, and
1230  * set all other scores to the minimum. Also initialize alphas to
1231  * log-zero. */
1232  for (node = dag->nodes; node; node = node->next) {
1233  for (x = node->exits; x; x = x->next) {
1234  x->link->path_scr = MAX_NEG_INT32;
1235  x->link->alpha = logmath_get_zero(lmath);
1236  }
1237  }
1238  for (x = dag->start->exits; x; x = x->next) {
1239  int32 n_used;
1240  int16 to_is_fil;
1241 
1242  to_is_fil = dict_filler_word(ps_search_dict(search), x->link->to->basewid) && x->link->to != dag->end;
1243 
1244  /* Best path points to dag->start, obviously. */
1245  x->link->path_scr = x->link->ascr;
1246  if (lmset && !to_is_fil)
1247  x->link->path_scr += (ngram_bg_score(lmset, x->link->to->basewid,
1248  ps_search_start_wid(search), &n_used) >> SENSCR_SHIFT) * lwf;
1249  x->link->best_prev = NULL;
1250  /* No predecessors for start links. */
1251  x->link->alpha = 0;
1252  }
1253 
1254  /* Traverse the edges in the graph, updating path scores. */
1255  for (link = ps_lattice_traverse_edges(dag, NULL, NULL);
1256  link; link = ps_lattice_traverse_next(dag, NULL)) {
1257  int32 bprob, n_used;
1258  int32 w3_wid, w2_wid;
1259  int16 w3_is_fil, w2_is_fil;
1260  ps_latlink_t *prev_link;
1261 
1262  /* Sanity check, we should not be traversing edges that
1263  * weren't previously updated, otherwise nasty overflows will result. */
1264  assert(link->path_scr != MAX_NEG_INT32);
1265 
1266  /* Find word predecessor if from-word is filler */
1267  w3_wid = link->from->basewid;
1268  w2_wid = link->to->basewid;
1269  w3_is_fil = dict_filler_word(ps_search_dict(search), link->from->basewid) && link->from != dag->start;
1270  w2_is_fil = dict_filler_word(ps_search_dict(search), w2_wid) && link->to != dag->end;
1271  prev_link = link;
1272 
1273  if (w3_is_fil) {
1274  while (prev_link->best_prev != NULL) {
1275  prev_link = prev_link->best_prev;
1276  w3_wid = prev_link->from->basewid;
1277  if (!dict_filler_word(ps_search_dict(search), w3_wid) || prev_link->from == dag->start) {
1278  w3_is_fil = FALSE;
1279  break;
1280  }
1281  }
1282  }
1283 
1284  /* Calculate common bigram probability for all alphas. */
1285  if (lmset && !w3_is_fil && !w2_is_fil)
1286  bprob = ngram_ng_prob(lmset, w2_wid, &w3_wid, 1, &n_used);
1287  else
1288  bprob = 0;
1289  /* Add in this link's acoustic score, which was a constant
1290  factor in previous computations (if any). */
1291  link->alpha += (link->ascr << SENSCR_SHIFT) * ascale;
1292 
1293  if (w2_is_fil) {
1294  w2_is_fil = w3_is_fil;
1295  w3_is_fil = TRUE;
1296  w2_wid = w3_wid;
1297  while (prev_link->best_prev != NULL) {
1298  prev_link = prev_link->best_prev;
1299  w3_wid = prev_link->from->basewid;
1300  if (!dict_filler_word(ps_search_dict(search), w3_wid) || prev_link->from == dag->start) {
1301  w3_is_fil = FALSE;
1302  break;
1303  }
1304  }
1305  }
1306 
1307  /* Update scores for all paths exiting link->to. */
1308  for (x = link->to->exits; x; x = x->next) {
1309  int32 score;
1310  int32 w1_wid;
1311  int16 w1_is_fil;
1312 
1313  w1_wid = x->link->to->basewid;
1314  w1_is_fil = dict_filler_word(ps_search_dict(search), w1_wid) && x->link->to != dag->end;
1315 
1316  /* Update alpha with sum of previous alphas. */
1317  x->link->alpha = logmath_add(lmath, x->link->alpha, link->alpha + bprob);
1318 
1319  /* Update link score with maximum link score. */
1320  score = link->path_scr + x->link->ascr;
1321  /* Calculate language score for bestpath if possible */
1322  if (lmset && !w1_is_fil && !w2_is_fil) {
1323  if (w3_is_fil)
1324  /* partial context available */
1325  score += (ngram_bg_score(lmset, w1_wid, w2_wid, &n_used) >> SENSCR_SHIFT) * lwf;
1326  else
1327  /* full context available */
1328  score += (ngram_tg_score(lmset, w1_wid, w2_wid, w3_wid, &n_used) >> SENSCR_SHIFT) * lwf;
1329  }
1330 
1331  if (score BETTER_THAN x->link->path_scr) {
1332  x->link->path_scr = score;
1333  x->link->best_prev = link;
1334  }
1335  }
1336  }
1337 
1338  /* Find best link entering final node, and calculate normalizer
1339  * for posterior probabilities. */
1340  bestend = NULL;
1341  bestescr = MAX_NEG_INT32;
1342 
1343  /* Normalizer is the alpha for the imaginary link exiting the
1344  final node. */
1345  dag->norm = logmath_get_zero(lmath);
1346  for (x = dag->end->entries; x; x = x->next) {
1347  int32 bprob, n_used;
1348  int32 from_wid;
1349  int16 from_is_fil;
1350 
1351  from_wid = x->link->from->basewid;
1352  from_is_fil = dict_filler_word(ps_search_dict(search), from_wid) && x->link->from != dag->start;
1353  if (from_is_fil) {
1354  ps_latlink_t *prev_link = x->link;
1355  while (prev_link->best_prev != NULL) {
1356  prev_link = prev_link->best_prev;
1357  from_wid = prev_link->from->basewid;
1358  if (!dict_filler_word(ps_search_dict(search), from_wid) || prev_link->from == dag->start) {
1359  from_is_fil = FALSE;
1360  break;
1361  }
1362  }
1363  }
1364 
1365  if (lmset && !from_is_fil)
1366  bprob = ngram_ng_prob(lmset,
1367  x->link->to->basewid,
1368  &from_wid, 1, &n_used);
1369  else
1370  bprob = 0;
1371  dag->norm = logmath_add(lmath, dag->norm, x->link->alpha + bprob);
1372  if (x->link->path_scr BETTER_THAN bestescr) {
1373  bestescr = x->link->path_scr;
1374  bestend = x->link;
1375  }
1376  }
1377  /* FIXME: floating point... */
1378  dag->norm += (int32)(dag->final_node_ascr << SENSCR_SHIFT) * ascale;
1379 
1380  E_INFO("Bestpath score: %d\n", bestescr);
1381  E_INFO("Normalizer P(O) = alpha(%s:%d:%d) = %d\n",
1382  dict_wordstr(dag->search->dict, dag->end->wid),
1383  dag->end->sf, dag->end->lef,
1384  dag->norm);
1385  return bestend;
1386 }
1387 
1388 static int32
1389 ps_lattice_joint(ps_lattice_t *dag, ps_latlink_t *link, float32 ascale)
1390 {
1391  ngram_model_t *lmset;
1392  int32 jprob;
1393 
1394  /* Sort of a hack... */
1395  if (dag->search && 0 == strcmp(ps_search_type(dag->search), PS_SEARCH_TYPE_NGRAM))
1396  lmset = ((ngram_search_t *)dag->search)->lmset;
1397  else
1398  lmset = NULL;
1399 
1400  jprob = (dag->final_node_ascr << SENSCR_SHIFT) * ascale;
1401  while (link) {
1402  if (lmset) {
1403  int lback;
1404  int32 from_wid, to_wid;
1405  int16 from_is_fil, to_is_fil;
1406 
1407  from_wid = link->from->basewid;
1408  to_wid = link->to->basewid;
1409  from_is_fil = dict_filler_word(dag->dict, from_wid) && link->from != dag->start;
1410  to_is_fil = dict_filler_word(dag->dict, to_wid) && link->to != dag->end;
1411 
1412  /* Find word predecessor if from-word is filler */
1413  if (!to_is_fil && from_is_fil) {
1414  ps_latlink_t *prev_link = link;
1415  while (prev_link->best_prev != NULL) {
1416  prev_link = prev_link->best_prev;
1417  from_wid = prev_link->from->basewid;
1418  if (!dict_filler_word(dag->dict, from_wid) || prev_link->from == dag->start) {
1419  from_is_fil = FALSE;
1420  break;
1421  }
1422  }
1423  }
1424 
1425  /* Compute unscaled language model probability. Note that
1426  this is actually not the language model probability
1427  that corresponds to this link, but that is okay,
1428  because we are just taking the sum over all links in
1429  the best path. */
1430  if (!from_is_fil && !to_is_fil)
1431  jprob += ngram_ng_prob(lmset, to_wid,
1432  &from_wid, 1, &lback);
1433  }
1434  /* If there is no language model, we assume that the language
1435  model probability (such as it is) has been included in the
1436  link score. */
1437  jprob += (link->ascr << SENSCR_SHIFT) * ascale;
1438  link = link->best_prev;
1439  }
1440 
1441  E_INFO("Joint P(O,S) = %d P(S|O) = %d\n", jprob, jprob - dag->norm);
1442  return jprob;
1443 }
1444 
1445 int32
1446 ps_lattice_posterior(ps_lattice_t *dag, ngram_model_t *lmset,
1447  float32 ascale)
1448 {
1449  logmath_t *lmath;
1450  ps_latnode_t *node;
1451  ps_latlink_t *link;
1452  latlink_list_t *x;
1453  ps_latlink_t *bestend;
1454  int32 bestescr;
1455 
1456  lmath = dag->lmath;
1457 
1458  /* Reset all betas to zero. */
1459  for (node = dag->nodes; node; node = node->next) {
1460  for (x = node->exits; x; x = x->next) {
1461  x->link->beta = logmath_get_zero(lmath);
1462  }
1463  }
1464 
1465  bestend = NULL;
1466  bestescr = MAX_NEG_INT32;
1467  /* Accumulate backward probabilities for all links. */
1468  for (link = ps_lattice_reverse_edges(dag, NULL, NULL);
1469  link; link = ps_lattice_reverse_next(dag, NULL)) {
1470  int32 bprob, n_used;
1471  int32 from_wid, to_wid;
1472  int16 from_is_fil, to_is_fil;
1473 
1474  from_wid = link->from->basewid;
1475  to_wid = link->to->basewid;
1476  from_is_fil = dict_filler_word(dag->dict, from_wid) && link->from != dag->start;
1477  to_is_fil = dict_filler_word(dag->dict, to_wid) && link->to != dag->end;
1478 
1479  /* Find word predecessor if from-word is filler */
1480  if (!to_is_fil && from_is_fil) {
1481  ps_latlink_t *prev_link = link;
1482  while (prev_link->best_prev != NULL) {
1483  prev_link = prev_link->best_prev;
1484  from_wid = prev_link->from->basewid;
1485  if (!dict_filler_word(dag->dict, from_wid) || prev_link->from == dag->start) {
1486  from_is_fil = FALSE;
1487  break;
1488  }
1489  }
1490  }
1491 
1492  /* Calculate LM probability. */
1493  if (lmset && !from_is_fil && !to_is_fil)
1494  bprob = ngram_ng_prob(lmset, to_wid, &from_wid, 1, &n_used);
1495  else
1496  bprob = 0;
1497 
1498  if (link->to == dag->end) {
1499  /* Track the best path - we will backtrace in order to
1500  calculate the unscaled joint probability for sentence
1501  posterior. */
1502  if (link->path_scr BETTER_THAN bestescr) {
1503  bestescr = link->path_scr;
1504  bestend = link;
1505  }
1506  /* Imaginary exit link from final node has beta = 1.0 */
1507  link->beta = bprob + (dag->final_node_ascr << SENSCR_SHIFT) * ascale;
1508  }
1509  else {
1510  /* Update beta from all outgoing betas. */
1511  for (x = link->to->exits; x; x = x->next) {
1512  link->beta = logmath_add(lmath, link->beta,
1513  x->link->beta + bprob
1514  + (x->link->ascr << SENSCR_SHIFT) * ascale);
1515  }
1516  }
1517  }
1518 
1519  /* Return P(S|O) = P(O,S)/P(O) */
1520  return ps_lattice_joint(dag, bestend, ascale) - dag->norm;
1521 }
1522 
1523 int32
1525 {
1526  ps_latlink_t *link;
1527  int npruned = 0;
1528 
1529  for (link = ps_lattice_traverse_edges(dag, dag->start, dag->end);
1530  link; link = ps_lattice_traverse_next(dag, dag->end)) {
1531  link->from->reachable = FALSE;
1532  if (link->alpha + link->beta - dag->norm < beam) {
1533  latlink_list_t *x, *tmp, *next;
1534  tmp = NULL;
1535  for (x = link->from->exits; x; x = next) {
1536  next = x->next;
1537  if (x->link == link) {
1538  listelem_free(dag->latlink_list_alloc, x);
1539  }
1540  else {
1541  x->next = tmp;
1542  tmp = x;
1543  }
1544  }
1545  link->from->exits = tmp;
1546  tmp = NULL;
1547  for (x = link->to->entries; x; x = next) {
1548  next = x->next;
1549  if (x->link == link) {
1550  listelem_free(dag->latlink_list_alloc, x);
1551  }
1552  else {
1553  x->next = tmp;
1554  tmp = x;
1555  }
1556  }
1557  link->to->entries = tmp;
1558  listelem_free(dag->latlink_alloc, link);
1559  ++npruned;
1560  }
1561  }
1562  dag_mark_reachable(dag->end);
1564  return npruned;
1565 }
1566 
1567 
1568 /* Parameters to prune n-best alternatives search */
1569 #define MAX_PATHS 500 /* Max allowed active paths at any time */
1570 #define MAX_HYP_TRIES 10000
1571 
1572 /*
1573  * For each node in any path between from and end of utt, find the
1574  * best score from "from".sf to end of utt. (NOTE: Uses bigram probs;
1575  * this is an estimate of the best score from "from".) (NOTE #2: yes,
1576  * this is the "heuristic score" used in A* search)
1577  */
1578 static int32
1579 best_rem_score(ps_astar_t *nbest, ps_latnode_t * from)
1580 {
1581  latlink_list_t *x;
1582  int32 bestscore, score;
1583 
1584  if (from->info.rem_score <= 0)
1585  return (from->info.rem_score);
1586 
1587  /* Best score from "from" to end of utt not known; compute from successors */
1588  bestscore = WORST_SCORE;
1589  for (x = from->exits; x; x = x->next) {
1590  int32 n_used;
1591 
1592  score = best_rem_score(nbest, x->link->to);
1593  score += x->link->ascr;
1594  if (nbest->lmset)
1595  score += (ngram_bg_score(nbest->lmset, x->link->to->basewid,
1596  from->basewid, &n_used) >> SENSCR_SHIFT)
1597  * nbest->lwf;
1598  if (score BETTER_THAN bestscore)
1599  bestscore = score;
1600  }
1601  from->info.rem_score = bestscore;
1602 
1603  return bestscore;
1604 }
1605 
1606 /*
1607  * Insert newpath in sorted (by path score) list of paths. But if newpath is
1608  * too far down the list, drop it (FIXME: necessary?)
1609  * total_score = path score (newpath) + rem_score to end of utt.
1610  */
1611 static void
1612 path_insert(ps_astar_t *nbest, ps_latpath_t *newpath, int32 total_score)
1613 {
1614  ps_latpath_t *prev, *p;
1615  int32 i;
1616 
1617  prev = NULL;
1618  for (i = 0, p = nbest->path_list; (i < MAX_PATHS) && p; p = p->next, i++) {
1619  if ((p->score + p->node->info.rem_score) < total_score)
1620  break;
1621  prev = p;
1622  }
1623 
1624  /* newpath should be inserted between prev and p */
1625  if (i < MAX_PATHS) {
1626  /* Insert new partial hyp */
1627  newpath->next = p;
1628  if (!prev)
1629  nbest->path_list = newpath;
1630  else
1631  prev->next = newpath;
1632  if (!p)
1633  nbest->path_tail = newpath;
1634 
1635  nbest->n_path++;
1636  nbest->n_hyp_insert++;
1637  nbest->insert_depth += i;
1638  }
1639  else {
1640  /* newpath score too low; reject it and also prune paths beyond MAX_PATHS */
1641  nbest->path_tail = prev;
1642  prev->next = NULL;
1643  nbest->n_path = MAX_PATHS;
1644  listelem_free(nbest->latpath_alloc, newpath);
1645 
1646  nbest->n_hyp_reject++;
1647  for (; p; p = newpath) {
1648  newpath = p->next;
1649  listelem_free(nbest->latpath_alloc, p);
1650  nbest->n_hyp_reject++;
1651  }
1652  }
1653 }
1654 
1655 /* Find all possible extensions to given partial path */
1656 static void
1657 path_extend(ps_astar_t *nbest, ps_latpath_t * path)
1658 {
1659  latlink_list_t *x;
1660  ps_latpath_t *newpath;
1661  int32 total_score, tail_score;
1662 
1663  /* Consider all successors of path->node */
1664  for (x = path->node->exits; x; x = x->next) {
1665  int32 n_used;
1666 
1667  /* Skip successor if no path from it reaches the final node */
1668  if (x->link->to->info.rem_score <= WORST_SCORE)
1669  continue;
1670 
1671  /* Create path extension and compute exact score for this extension */
1672  newpath = listelem_malloc(nbest->latpath_alloc);
1673  newpath->node = x->link->to;
1674  newpath->parent = path;
1675  newpath->score = path->score + x->link->ascr;
1676  if (nbest->lmset) {
1677  if (path->parent) {
1678  newpath->score += nbest->lwf
1679  * (ngram_tg_score(nbest->lmset, newpath->node->basewid,
1680  path->node->basewid,
1681  path->parent->node->basewid, &n_used)
1682  >> SENSCR_SHIFT);
1683  }
1684  else
1685  newpath->score += nbest->lwf
1686  * (ngram_bg_score(nbest->lmset, newpath->node->basewid,
1687  path->node->basewid, &n_used)
1688  >> SENSCR_SHIFT);
1689  }
1690 
1691  /* Insert new partial path hypothesis into sorted path_list */
1692  nbest->n_hyp_tried++;
1693  total_score = newpath->score + newpath->node->info.rem_score;
1694 
1695  /* First see if hyp would be worse than the worst */
1696  if (nbest->n_path >= MAX_PATHS) {
1697  tail_score =
1698  nbest->path_tail->score
1699  + nbest->path_tail->node->info.rem_score;
1700  if (total_score < tail_score) {
1701  listelem_free(nbest->latpath_alloc, newpath);
1702  nbest->n_hyp_reject++;
1703  continue;
1704  }
1705  }
1706 
1707  path_insert(nbest, newpath, total_score);
1708  }
1709 }
1710 
1711 ps_astar_t *
1713  ngram_model_t *lmset,
1714  float32 lwf,
1715  int sf, int ef,
1716  int w1, int w2)
1717 {
1718  ps_astar_t *nbest;
1719  ps_latnode_t *node;
1720 
1721  nbest = ckd_calloc(1, sizeof(*nbest));
1722  nbest->dag = dag;
1723  nbest->lmset = lmset;
1724  nbest->lwf = lwf;
1725  nbest->sf = sf;
1726  if (ef < 0)
1727  nbest->ef = dag->n_frames + 1;
1728  else
1729  nbest->ef = ef;
1730  nbest->w1 = w1;
1731  nbest->w2 = w2;
1732  nbest->latpath_alloc = listelem_alloc_init(sizeof(ps_latpath_t));
1733 
1734  /* Initialize rem_score (A* heuristic) to default values */
1735  for (node = dag->nodes; node; node = node->next) {
1736  if (node == dag->end)
1737  node->info.rem_score = 0;
1738  else if (node->exits == NULL)
1739  node->info.rem_score = WORST_SCORE;
1740  else
1741  node->info.rem_score = 1; /* +ve => unknown value */
1742  }
1743 
1744  /* Create initial partial hypotheses list consisting of nodes starting at sf */
1745  nbest->path_list = nbest->path_tail = NULL;
1746  for (node = dag->nodes; node; node = node->next) {
1747  if (node->sf == sf) {
1748  ps_latpath_t *path;
1749  int32 n_used;
1750 
1751  best_rem_score(nbest, node);
1752  path = listelem_malloc(nbest->latpath_alloc);
1753  path->node = node;
1754  path->parent = NULL;
1755  if (nbest->lmset)
1756  path->score = nbest->lwf *
1757  ((w1 < 0)
1758  ? ngram_bg_score(nbest->lmset, node->basewid, w2, &n_used)
1759  : ngram_tg_score(nbest->lmset, node->basewid, w2, w1, &n_used));
1760  else
1761  path->score = 0;
1762  path->score >>= SENSCR_SHIFT;
1763  path_insert(nbest, path, path->score + node->info.rem_score);
1764  }
1765  }
1766 
1767  return nbest;
1768 }
1769 
1770 ps_latpath_t *
1772 {
1773  ps_lattice_t *dag;
1774 
1775  dag = nbest->dag;
1776 
1777  /* Pop the top (best) partial hypothesis */
1778  while ((nbest->top = nbest->path_list) != NULL) {
1779  nbest->path_list = nbest->path_list->next;
1780  if (nbest->top == nbest->path_tail)
1781  nbest->path_tail = NULL;
1782  nbest->n_path--;
1783 
1784  /* Complete hypothesis? */
1785  if ((nbest->top->node->sf >= nbest->ef)
1786  || ((nbest->top->node == dag->end) &&
1787  (nbest->ef > dag->end->sf))) {
1788  /* FIXME: Verify that it is non-empty. Also we may want
1789  * to verify that it is actually distinct from other
1790  * paths, since often this is not the case*/
1791  return nbest->top;
1792  }
1793  else {
1794  if (nbest->top->node->fef < nbest->ef)
1795  path_extend(nbest, nbest->top);
1796  }
1797  }
1798 
1799  /* Did not find any more paths to extend. */
1800  return NULL;
1801 }
1802 
1803 char const *
1805 {
1806  ps_search_t *search;
1807  ps_latpath_t *p;
1808  size_t len;
1809  char *c;
1810  char *hyp;
1811 
1812  search = nbest->dag->search;
1813 
1814  /* Backtrace once to get hypothesis length. */
1815  len = 0;
1816  for (p = path; p; p = p->parent) {
1817  if (dict_real_word(ps_search_dict(search), p->node->basewid)) {
1818  char *wstr = dict_wordstr(ps_search_dict(search), p->node->basewid);
1819  if (wstr != NULL)
1820  len += strlen(wstr) + 1;
1821  }
1822  }
1823 
1824  if (len == 0) {
1825  return NULL;
1826  }
1827 
1828  /* Backtrace again to construct hypothesis string. */
1829  hyp = ckd_calloc(1, len);
1830  c = hyp + len - 1;
1831  for (p = path; p; p = p->parent) {
1832  if (dict_real_word(ps_search_dict(search), p->node->basewid)) {
1833  char *wstr = dict_wordstr(ps_search_dict(search), p->node->basewid);
1834  if (wstr != NULL) {
1835  len = strlen(wstr);
1836  c -= len;
1837  memcpy(c, wstr, len);
1838  if (c > hyp) {
1839  --c;
1840  *c = ' ';
1841  }
1842  }
1843  }
1844  }
1845 
1846  nbest->hyps = glist_add_ptr(nbest->hyps, hyp);
1847  return hyp;
1848 }
1849 
1850 static void
1851 ps_astar_node2itor(astar_seg_t *itor)
1852 {
1853  ps_seg_t *seg = (ps_seg_t *)itor;
1854  ps_latnode_t *node;
1855 
1856  assert(itor->cur < itor->n_nodes);
1857  node = itor->nodes[itor->cur];
1858  if (itor->cur == itor->n_nodes - 1)
1859  seg->ef = node->lef;
1860  else
1861  seg->ef = itor->nodes[itor->cur + 1]->sf - 1;
1862  seg->word = dict_wordstr(ps_search_dict(seg->search), node->wid);
1863  seg->sf = node->sf;
1864  seg->prob = 0; /* FIXME: implement forward-backward */
1865 }
1866 
1867 static void
1868 ps_astar_seg_free(ps_seg_t *seg)
1869 {
1870  astar_seg_t *itor = (astar_seg_t *)seg;
1871  ckd_free(itor->nodes);
1872  ckd_free(itor);
1873 }
1874 
1875 static ps_seg_t *
1876 ps_astar_seg_next(ps_seg_t *seg)
1877 {
1878  astar_seg_t *itor = (astar_seg_t *)seg;
1879 
1880  ++itor->cur;
1881  if (itor->cur == itor->n_nodes) {
1882  ps_astar_seg_free(seg);
1883  return NULL;
1884  }
1885  else {
1886  ps_astar_node2itor(itor);
1887  }
1888 
1889  return seg;
1890 }
1891 
1892 static ps_segfuncs_t ps_astar_segfuncs = {
1893  /* seg_next */ ps_astar_seg_next,
1894  /* seg_free */ ps_astar_seg_free
1895 };
1896 
1897 ps_seg_t *
1898 ps_astar_seg_iter(ps_astar_t *astar, ps_latpath_t *path, float32 lwf)
1899 {
1900  astar_seg_t *itor;
1901  ps_latpath_t *p;
1902  int cur;
1903 
1904  /* Backtrace and make an iterator, this should look familiar by now. */
1905  itor = ckd_calloc(1, sizeof(*itor));
1906  itor->base.vt = &ps_astar_segfuncs;
1907  itor->base.search = astar->dag->search;
1908  itor->base.lwf = lwf;
1909  itor->n_nodes = itor->cur = 0;
1910  for (p = path; p; p = p->parent) {
1911  ++itor->n_nodes;
1912  }
1913  itor->nodes = ckd_calloc(itor->n_nodes, sizeof(*itor->nodes));
1914  cur = itor->n_nodes - 1;
1915  for (p = path; p; p = p->parent) {
1916  itor->nodes[cur] = p->node;
1917  --cur;
1918  }
1919 
1920  ps_astar_node2itor(itor);
1921  return (ps_seg_t *)itor;
1922 }
1923 
1924 void
1926 {
1927  gnode_t *gn;
1928 
1929  /* Free all hyps. */
1930  for (gn = nbest->hyps; gn; gn = gnode_next(gn)) {
1931  ckd_free(gnode_ptr(gn));
1932  }
1933  glist_free(nbest->hyps);
1934  /* Free all paths. */
1935  listelem_alloc_free(nbest->latpath_alloc);
1936  /* Free the Henge. */
1937  ckd_free(nbest);
1938 }
1939 
dict_t * dict_init(cmd_ln_t *config, bin_mdef_t *mdef)
Initialize a new dictionary.
Definition: dict.c:252
Internal implementation of PocketSphinx decoder.
int16 reachable
From.
void ps_astar_finish(ps_astar_t *nbest)
Finish N-best search, releasing resources associated with it.
Definition: ps_lattice.c:1925
ps_latpath_t * ps_astar_next(ps_astar_t *nbest)
Find next best hypothesis of A* on a word graph.
Definition: ps_lattice.c:1771
void ps_lattice_delq(ps_lattice_t *dag)
Clear and reset the traversal queue.
Definition: ps_lattice.c:1083
char const * ps_astar_hyp(ps_astar_t *nbest, ps_latpath_t *path)
Get hypothesis string from A* search.
Definition: ps_lattice.c:1804
listelem_alloc_t * latlink_list_alloc
List element allocator for this DAG.
POCKETSPHINX_EXPORT int ps_latlink_times(ps_latlink_t *link, int16 *out_sf)
Get start and end times from a lattice link.
Definition: ps_lattice.c:779
POCKETSPHINX_EXPORT ps_lattice_t * ps_lattice_read(struct ps_decoder_s *ps, char const *file)
Read a lattice from a file on disk.
Definition: ps_lattice.c:387
Base structure for search module.
POCKETSPHINX_EXPORT int ps_lattice_write_htk(ps_lattice_t *dag, char const *filename)
Write a lattice to disk in HTK format.
Definition: ps_lattice.c:270
ps_seg_t * ps_lattice_seg_iter(ps_lattice_t *dag, ps_latlink_t *link, float32 lwf)
Get hypothesis segmentation iterator after bestpath search.
Definition: ps_lattice.c:1006
logmath_t * lmath
Log-math object.
dict_t * dict
Pronunciation dictionary.
int32 fanin
Number nodes with links to this node.
POCKETSPHINX_EXPORT s3wid_t dict_wordid(dict_t *d, const char *word)
Return word id for given word string if present.
Definition: dict.c:399
int32 lef
Last end frame.
acmod_t * acmod
Acoustic model.
int dict_free(dict_t *d)
Release a pointer to a dictionary.
Definition: dict.c:468
int32 id
Unique id for this node.
void ps_lattice_penalize_fillers(ps_lattice_t *dag, int32 silpen, int32 fillpen)
Insert penalty for fillers.
Definition: ps_lattice.c:106
ps_seg_t base
Base structure.
glist_t hyps
List of hypothesis strings.
ps_latnode_t * start
Starting node.
ps_segfuncs_t * vt
V-table of seg methods.
logmath_t * lmath
Log-math computation.
Definition: acmod.h:151
ps_latlink_t ** links
Array of lattice links.
int32 lscr
Language model score.
Operations on dictionary.
POCKETSPHINX_EXPORT int32 ps_latlink_prob(ps_lattice_t *dag, ps_latlink_t *link, int32 *out_ascr)
Get acoustic score and posterior probability from a lattice link.
Definition: ps_lattice.c:822
POCKETSPHINX_EXPORT logmath_t * ps_lattice_get_logmath(ps_lattice_t *dag)
Get the log-math computation object for this lattice.
Definition: ps_lattice.c:682
latlink_list_t * q_head
Queue of links for traversal.
ps_search_t * search
Search (if generated by search).
#define BAD_S3WID
Dictionary word id.
Definition: s3types.h:90
frame_idx_t n_frames
Number of frames for this utterance.
POCKETSPHINX_EXPORT ps_latlink_iter_t * ps_latnode_entries(ps_latnode_t *node)
Iterate over entries to this node.
Definition: ps_lattice.c:755
POCKETSPHINX_EXPORT ps_latlink_t * ps_lattice_traverse_next(ps_lattice_t *dag, ps_latnode_t *end)
Get the next link in forward traversal.
Definition: ps_lattice.c:1117
POCKETSPHINX_EXPORT void ps_latlink_iter_free(ps_latlink_iter_t *itor)
Stop iterating over links.
Definition: ps_lattice.c:767
Word graph search implementation.
POCKETSPHINX_EXPORT int ps_lattice_n_frames(ps_lattice_t *dag)
Get the number of frames in the lattice.
Definition: ps_lattice.c:633
int32 frate
Frame rate.
ps_latnode_t * nodes
List of all nodes.
listelem_alloc_t * latnode_alloc
Node allocator for this DAG.
POCKETSPHINX_EXPORT ps_latlink_t * ps_lattice_traverse_edges(ps_lattice_t *dag, ps_latnode_t *start, ps_latnode_t *end)
Start a forward traversal of edges in a word graph.
Definition: ps_lattice.c:1091
A* search structure.
int32 prob
Log posterior probability.
POCKETSPHINX_EXPORT int32 ps_latnode_prob(ps_lattice_t *dag, ps_latnode_t *node, ps_latlink_t **out_link)
Get best posterior probability and associated acoustic score from a lattice node. ...
Definition: ps_lattice.c:732
dict_t * dict_retain(dict_t *d)
Retain a pointer to an dict_t.
Definition: dict.c:461
latlink_list_t * entries
Links into this node.
POCKETSPHINX_EXPORT int32 ps_lattice_posterior(ps_lattice_t *dag, ngram_model_t *lmset, float32 ascale)
Calculate link posterior probabilities on a word graph.
Definition: ps_lattice.c:1446
struct ps_latnode_s * alt
Node with alternate pronunciation for this word.
char const * word
Word string (pointer into dictionary hash)
ps_search_t * search
Search object from whence this came.
int32 final_node_ascr
Acoustic score of implicit link exiting final node.
ps_search_t * search
Currently active search module.
ps_latlink_t * ps_lattice_popq(ps_lattice_t *dag)
Remove an edge from the traversal queue.
Definition: ps_lattice.c:1066
struct ps_latpath_s * next
Pointer to next path in list of paths.
POCKETSPHINX_EXPORT char const * ps_latnode_baseword(ps_lattice_t *dag, ps_latnode_t *node)
Get base word string for this node.
Definition: ps_lattice.c:726
logmath_t * lmath
Log math computation.
void ps_lattice_pushq(ps_lattice_t *dag, ps_latlink_t *link)
Add an edge to the traversal queue.
Definition: ps_lattice.c:1054
N-Gram search module structure.
Definition: ngram_search.h:197
POCKETSPHINX_EXPORT char const * ps_latlink_baseword(ps_lattice_t *dag, ps_latlink_t *link)
Get base word string from a lattice link.
Definition: ps_lattice.c:808
struct ps_latpath_s * parent
Previous element in this path.
Decoder object.
void ps_lattice_delete_unreachable(ps_lattice_t *dag)
Remove nodes marked as unreachable.
Definition: ps_lattice.c:174
int16 n_links
Number of lattice links.
ps_latnode_t * end
Ending node.
frame_idx_t sf
Start frame.
POCKETSPHINX_EXPORT ps_latlink_t * ps_lattice_bestpath(ps_lattice_t *dag, ngram_model_t *lmset, float32 lwf, float32 ascale)
Do N-Gram based best-path search on a word graph.
Definition: ps_lattice.c:1215
latlink_list_t * exits
Links out of this node.
int32 silence
Silence word ID.
#define WORST_SCORE
Large &quot;bad&quot; score.
Definition: hmm.h:84
N-Gram based multi-pass search (&quot;FBS&quot;)
frame_idx_t ef
End frame.
int32 ascr
Acoustic score.
cmd_ln_t * config
Configuration.
int dict_filler_word(dict_t *d, s3wid_t w)
Return 1 if w is a filler word, 0 if not.
Definition: dict.c:413
POCKETSPHINX_EXPORT ps_latnode_iter_t * ps_latnode_iter_next(ps_latnode_iter_t *itor)
Move to next node in iteration.
Definition: ps_lattice.c:694
dict_t * dict
Dictionary for this DAG.
listelem_alloc_t * latpath_alloc
Path allocator for N-best search.
listelem_alloc_t * latlink_alloc
Link allocator for this DAG.
POCKETSPHINX_EXPORT int ps_lattice_write(ps_lattice_t *dag, char const *filename)
Write a lattice to disk.
Definition: ps_lattice.c:210
int32 wid
Dictionary word id.
int32 node_id
Node from fsg model, used to map lattice back to model.
#define SENSCR_SHIFT
Shift count for senone scores.
Definition: hmm.h:73
POCKETSPHINX_EXPORT ps_latlink_t * ps_latlink_pred(ps_latlink_t *link)
Get predecessor link in best path.
Definition: ps_lattice.c:816
char const * ps_lattice_hyp(ps_lattice_t *dag, ps_latlink_t *link)
Get hypothesis string after bestpath search.
Definition: ps_lattice.c:830
POCKETSPHINX_EXPORT int dict_real_word(dict_t *d, s3wid_t w)
Test if w is a &quot;real&quot; word, i.e.
Definition: dict.c:427
POCKETSPHINX_EXPORT ps_latlink_iter_t * ps_latlink_iter_next(ps_latlink_iter_t *itor)
Get next link from a lattice link iterator.
Definition: ps_lattice.c:761
Word graph structure used in bestpath/nbest search.
#define WORSE_THAN
Is one score worse than another?
Definition: hmm.h:100
ps_astar_t * ps_astar_start(ps_lattice_t *dag, ngram_model_t *lmset, float32 lwf, int sf, int ef, int w1, int w2)
Begin N-Gram based A* search on a word graph.
Definition: ps_lattice.c:1712
int32 norm
Normalizer for posterior probabilities.
POCKETSPHINX_EXPORT char const * ps_latlink_word(ps_lattice_t *dag, ps_latlink_t *link)
Get word string from a lattice link.
Definition: ps_lattice.c:800
Segmentation &quot;iterator&quot; for A* search results.
int refcount
Reference count.
POCKETSPHINX_EXPORT ps_latnode_t * ps_latnode_iter_node(ps_latnode_iter_t *itor)
Get node from iterator.
Definition: ps_lattice.c:706
latlink_list_t * q_tail
Queue of links for traversal.
int16 cur
Current position in bpidx.
Partial path structure used in N-best (A*) search.
Segmentation &quot;iterator&quot; for backpointer table results.
#define BETTER_THAN
Is one score better than another?
Definition: hmm.h:95
dict_t * dict
Pronunciation dictionary.
int32 fef
First end frame.
int32 norm
Normalizer for posterior probabilities.
int32 lback
Language model backoff.
int32 basewid
Dictionary base word id.
latlink_list_t * latlink_list_new(ps_lattice_t *dag, ps_latlink_t *link, latlink_list_t *next)
Create a new lattice link element.
Definition: ps_lattice.c:1042
POCKETSPHINX_EXPORT ps_latnode_t * ps_latlink_nodes(ps_latlink_t *link, ps_latnode_t **out_src)
Get destination and source nodes from a lattice link.
Definition: ps_lattice.c:793
ps_lattice_t * ps_lattice_init_search(ps_search_t *search, int n_frame)
Construct an empty word graph with reference to a search structure.
Definition: ps_lattice.c:639
s3wid_t dict_add_word(dict_t *d, char const *word, s3cipid_t const *p, int32 np)
Add a word with the given ciphone pronunciation list to the dictionary.
Definition: dict.c:80
POCKETSPHINX_EXPORT ps_latlink_t * ps_lattice_reverse_edges(ps_lattice_t *dag, ps_latnode_t *start, ps_latnode_t *end)
Start a reverse traversal of edges in a word graph.
Definition: ps_lattice.c:1148
struct ps_latnode_s * next
Next node in DAG (no ordering implied)
POCKETSPHINX_EXPORT char const * ps_latnode_word(ps_lattice_t *dag, ps_latnode_t *node)
Get word string for this node.
Definition: ps_lattice.c:720
POCKETSPHINX_EXPORT void ps_latnode_iter_free(ps_latnode_iter_t *itor)
Stop iterating over nodes.
Definition: ps_lattice.c:700
int32 rem_score
Estimated best score from node.sf to end.
POCKETSPHINX_EXPORT ps_lattice_t * ps_lattice_retain(ps_lattice_t *dag)
Retain a lattice.
Definition: ps_lattice.c:658
ps_seg_t * ps_astar_seg_iter(ps_astar_t *astar, ps_latpath_t *path, float32 lwf)
Get hypothesis segmentation from A* search.
Definition: ps_lattice.c:1898
char * hyp_str
Current hypothesis string.
POCKETSPHINX_EXPORT ps_latlink_t * ps_lattice_reverse_next(ps_lattice_t *dag, ps_latnode_t *start)
Get the next link in reverse traversal.
Definition: ps_lattice.c:1173
Base structure for hypothesis segmentation iterator.
POCKETSPHINX_EXPORT ps_latnode_iter_t * ps_latnode_iter(ps_lattice_t *dag)
Start iterating over nodes in the lattice.
Definition: ps_lattice.c:688
cmd_ln_t * config
Configuration.
ps_latnode_t * node
Node ending this path.
int32 score
Exact score from start node up to node-&gt;sf.
POCKETSPHINX_EXPORT int ps_lattice_free(ps_lattice_t *dag)
Free a lattice.
Definition: ps_lattice.c:665
float32 lwf
Language weight factor (for second-pass searches)
POCKETSPHINX_EXPORT ps_latlink_iter_t * ps_latnode_exits(ps_latnode_t *node)
Iterate over exits from this node.
Definition: ps_lattice.c:749
POCKETSPHINX_EXPORT void ps_lattice_link(ps_lattice_t *dag, ps_latnode_t *from, ps_latnode_t *to, int32 score, int32 ef)
Create a directed link between &quot;from&quot; and &quot;to&quot; nodes, but if a link already exists, choose one with the best link_scr.
Definition: ps_lattice.c:65
frame_idx_t sf
Start frame.
POCKETSPHINX_EXPORT int ps_latnode_times(ps_latnode_t *node, int16 *out_fef, int16 *out_lef)
Get start and end time range for a node.
Definition: ps_lattice.c:712
POCKETSPHINX_EXPORT int32 ps_lattice_posterior_prune(ps_lattice_t *dag, int32 beam)
Prune all links (and associated nodes) below a certain posterior probability.
Definition: ps_lattice.c:1524
POCKETSPHINX_EXPORT ps_latlink_t * ps_latlink_iter_link(ps_latlink_iter_t *itor)
Get link from iterator.
Definition: ps_lattice.c:773
int32 dict_word2basestr(char *word)
If the given word contains a trailing &quot;(....)&quot; (i.e., a Sphinx-II style alternative pronunciation spe...
Definition: dict.c:442