PocketSphinx  5prealpha
fsg_lextree.c
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2010 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  *
19  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
20  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
23  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  * ====================================================================
32  *
33  */
41 /* System headers. */
42 #include <stdio.h>
43 #include <string.h>
44 #include <assert.h>
45 
46 /* SphinxBase headers. */
47 #include <sphinxbase/ckd_alloc.h>
48 #include <sphinxbase/err.h>
49 
50 /* Local headers. */
51 #include "fsg_lextree.h"
52 
53 #define __FSG_DBG__ 0
54 
55 /* A linklist structure that is actually used to build local lextrees at grammar nodes */
56 typedef struct fsg_glist_linklist_t {
57  int32 ci, rc;
58  glist_t glist;
59  struct fsg_glist_linklist_t *next;
61 
68 static fsg_pnode_t *fsg_psubtree_init(fsg_lextree_t *tree,
69  fsg_model_t *fsg,
70  int32 from_state,
71  fsg_pnode_t **alloc_head);
72 
77 static void fsg_psubtree_free(fsg_pnode_t *alloc_head);
78 
83 static void fsg_psubtree_dump(fsg_lextree_t *tree, fsg_pnode_t *root, FILE *fp);
84 
88 static void
89 fsg_lextree_lc_rc(fsg_lextree_t *lextree)
90 {
91  int32 s, i, j;
92  int32 n_ci;
93  fsg_model_t *fsg;
94  int32 silcipid;
95  int32 len;
96 
97  silcipid = bin_mdef_silphone(lextree->mdef);
98  assert(silcipid >= 0);
99  n_ci = bin_mdef_n_ciphone(lextree->mdef);
100 
101  fsg = lextree->fsg;
102  /*
103  * lextree->lc[s] = set of left context CIphones for state s. Similarly, rc[s]
104  * for right context CIphones.
105  */
106  lextree->lc = ckd_calloc_2d(fsg->n_state, n_ci + 1, sizeof(**lextree->lc));
107  lextree->rc = ckd_calloc_2d(fsg->n_state, n_ci + 1, sizeof(**lextree->rc));
108  E_INFO("Allocated %d bytes (%d KiB) for left and right context phones\n",
109  fsg->n_state * (n_ci + 1) * 2,
110  fsg->n_state * (n_ci + 1) * 2 / 1024);
111 
112 
113  for (s = 0; s < fsg->n_state; s++) {
114  fsg_arciter_t *itor;
115  for (itor = fsg_model_arcs(fsg, s); itor; itor = fsg_arciter_next(itor)) {
116  fsg_link_t *l = fsg_arciter_get(itor);
117  int32 dictwid;
119  if (fsg_link_wid(l) >= 0) {
120  dictwid = dict_wordid(lextree->dict,
121  fsg_model_word_str(lextree->fsg, l->wid));
122 
123  /*
124  * Add the first CIphone of l->wid to the rclist of state s, and
125  * the last CIphone to lclist of state d.
126  * (Filler phones are a pain to deal with. There is no direct
127  * marking of a filler phone; but only filler words are supposed to
128  * use such phones, so we use that fact. HACK!! FRAGILE!!)
129  *
130  * UPD: tests carsh here if .fsg model used with wrong hmm and
131  * dictionary
132  */
133  if (fsg_model_is_filler(fsg, fsg_link_wid(l))) {
134  /* Filler phone; use silence phone as context */
135  lextree->rc[fsg_link_from_state(l)][silcipid] = 1;
136  lextree->lc[fsg_link_to_state(l)][silcipid] = 1;
137  }
138  else {
139  len = dict_pronlen(lextree->dict, dictwid);
140  lextree->rc[fsg_link_from_state(l)][dict_pron(lextree->dict, dictwid, 0)] = 1;
141  lextree->lc[fsg_link_to_state(l)][dict_pron(lextree->dict, dictwid, len - 1)] = 1;
142  }
143  }
144  }
145  }
146 
147  for (s = 0; s < fsg->n_state; s++) {
148  /*
149  * Add SIL phone to the lclist and rclist of each state. Strictly
150  * speaking, only needed at start and final states, respectively, but
151  * all states considered since the user may change the start and final
152  * states. In any case, most applications would have a silence self
153  * loop at each state, hence these would be needed anyway.
154  */
155  lextree->lc[s][silcipid] = 1;
156  lextree->rc[s][silcipid] = 1;
157  }
158 
159  /*
160  * Propagate lc and rc lists past null transitions. (Since FSG contains
161  * null transitions closure, no need to worry about a chain of successive
162  * null transitions. Right??)
163  *
164  * This can't be joined with the previous loop because we first calculate
165  * contexts and only then we can propagate them.
166  */
167  for (s = 0; s < fsg->n_state; s++) {
168  fsg_arciter_t *itor;
169  for (itor = fsg_model_arcs(fsg, s); itor; itor = fsg_arciter_next(itor)) {
170  fsg_link_t *l = fsg_arciter_get(itor);
171  if (fsg_link_wid(l) < 0) {
172  /*
173  * lclist(d) |= lclist(s), because all the words ending up at s, can
174  * now also end at d, becoming the left context for words leaving d.
175  */
176  for (i = 0; i < n_ci; i++)
177  lextree->lc[fsg_link_to_state(l)][i] |= lextree->lc[fsg_link_from_state(l)][i];
178  /*
179  * Similarly, rclist(s) |= rclist(d), because all the words leaving d
180  * can equivalently leave s, becoming the right context for words
181  * ending up at s.
182  */
183  for (i = 0; i < n_ci; i++)
184  lextree->rc[fsg_link_from_state(l)][i] |= lextree->rc[fsg_link_to_state(l)][i];
185  }
186  }
187  }
188 
189  /* Convert the bit-vector representation into a list */
190  for (s = 0; s < fsg->n_state; s++) {
191  j = 0;
192  for (i = 0; i < n_ci; i++) {
193  if (lextree->lc[s][i]) {
194  lextree->lc[s][j] = i;
195  j++;
196  }
197  }
198  lextree->lc[s][j] = -1; /* Terminate the list */
199 
200  j = 0;
201  for (i = 0; i < n_ci; i++) {
202  if (lextree->rc[s][i]) {
203  lextree->rc[s][j] = i;
204  j++;
205  }
206  }
207  lextree->rc[s][j] = -1; /* Terminate the list */
208  }
209 }
210 
211 /*
212  * For now, allocate the entire lextree statically.
213  */
215 fsg_lextree_init(fsg_model_t * fsg, dict_t *dict, dict2pid_t *d2p,
216  bin_mdef_t *mdef, hmm_context_t *ctx,
217  int32 wip, int32 pip)
218 {
219  int32 s, n_leaves;
220  fsg_lextree_t *lextree;
221  fsg_pnode_t *pn;
222 
223  lextree = ckd_calloc(1, sizeof(fsg_lextree_t));
224  lextree->fsg = fsg;
225  lextree->root = ckd_calloc(fsg_model_n_state(fsg),
226  sizeof(fsg_pnode_t *));
227  lextree->alloc_head = ckd_calloc(fsg_model_n_state(fsg),
228  sizeof(fsg_pnode_t *));
229  lextree->ctx = ctx;
230  lextree->dict = dict;
231  lextree->d2p = d2p;
232  lextree->mdef = mdef;
233  lextree->wip = wip;
234  lextree->pip = pip;
235 
236  /* Compute lc and rc for fsg. */
237  fsg_lextree_lc_rc(lextree);
238 
239  /* Create lextree for each state, i.e. an HMM network that
240  * represents words for all arcs exiting that state. Note that
241  * for a dense grammar such as an N-gram model, this will
242  * rapidly exhaust all available memory. */
243  lextree->n_pnode = 0;
244  n_leaves = 0;
245  for (s = 0; s < fsg_model_n_state(fsg); s++) {
246  lextree->root[s] =
247  fsg_psubtree_init(lextree, fsg, s, &(lextree->alloc_head[s]));
248 
249  for (pn = lextree->alloc_head[s]; pn; pn = pn->alloc_next) {
250  lextree->n_pnode++;
251  if (pn->leaf)
252  ++n_leaves;
253  }
254  }
255  E_INFO("%d HMM nodes in lextree (%d leaves)\n",
256  lextree->n_pnode, n_leaves);
257  E_INFO("Allocated %d bytes (%d KiB) for all lextree nodes\n",
258  lextree->n_pnode * sizeof(fsg_pnode_t),
259  lextree->n_pnode * sizeof(fsg_pnode_t) / 1024);
260  E_INFO("Allocated %d bytes (%d KiB) for lextree leafnodes\n",
261  n_leaves * sizeof(fsg_pnode_t),
262  n_leaves * sizeof(fsg_pnode_t) / 1024);
263 
264 #if __FSG_DBG__
265  fsg_lextree_dump(lextree, stdout);
266 #endif
267 
268  return lextree;
269 }
270 
271 
272 void
273 fsg_lextree_dump(fsg_lextree_t * lextree, FILE * fp)
274 {
275  int32 s;
276 
277  for (s = 0; s < fsg_model_n_state(lextree->fsg); s++) {
278  fprintf(fp, "State %5d root %p\n", s, lextree->root[s]);
279  fsg_psubtree_dump(lextree, lextree->root[s], fp);
280  }
281  fflush(fp);
282 }
283 
284 
285 void
287 {
288  int32 s;
289 
290  if (lextree == NULL)
291  return;
292 
293  if (lextree->fsg)
294  for (s = 0; s < fsg_model_n_state(lextree->fsg); s++)
295  fsg_psubtree_free(lextree->alloc_head[s]);
296 
297  ckd_free_2d(lextree->lc);
298  ckd_free_2d(lextree->rc);
299  ckd_free(lextree->root);
300  ckd_free(lextree->alloc_head);
301  ckd_free(lextree);
302 }
303 
304 /******************************
305  * psubtree stuff starts here *
306  ******************************/
307 
308 void fsg_glist_linklist_free(fsg_glist_linklist_t *glist)
309 {
310  if (glist) {
311  fsg_glist_linklist_t *nxtglist;
312  if (glist->glist)
313  glist_free(glist->glist);
314  nxtglist = glist->next;
315  while (nxtglist) {
316  ckd_free(glist);
317  glist = nxtglist;
318  if (glist->glist)
319  glist_free(glist->glist);
320  nxtglist = glist->next;
321  }
322  ckd_free(glist);
323  }
324  return;
325 }
326 
327 void
329 {
330  int32 i;
331 
332  for (i = 0; i < FSG_PNODE_CTXT_BVSZ; i++)
333  ctxt->bv[i] = 0xffffffff;
334 }
335 
337 {
338  int32 i;
339  uint32 res = 0;
340 
341  for (i = 0; i < FSG_PNODE_CTXT_BVSZ; i++)
342  res |= (src->bv[i] = ~(sub->bv[i]) & src->bv[i]);
343  return res;
344 }
345 
346 
347 /*
348  * fsg_pnode_ctxt_sub(fsg_pnode_ctxt_t * src, fsg_pnode_ctxt_t * sub)
349  * This has been moved into a macro in fsg_psubtree.h
350  * because it is called so frequently!
351  */
352 
353 
354 /*
355  * Add the word emitted by the given transition (fsglink) to the given lextree
356  * (rooted at root), and return the new lextree root. (There may actually be
357  * several root nodes, maintained in a linked list via fsg_pnode_t.sibling.
358  * "root" is the head of this list.)
359  * lclist, rclist: sets of left and right context phones for this link.
360  * alloc_head: head of a linear list of all allocated pnodes for the parent
361  * FSG state, kept elsewhere and updated by this routine.
362  */
363 static fsg_pnode_t *
364 psubtree_add_trans(fsg_lextree_t *lextree,
365  fsg_pnode_t * root,
366  fsg_glist_linklist_t **curglist,
367  fsg_link_t * fsglink,
368  int16 *lclist, int16 *rclist,
369  fsg_pnode_t ** alloc_head)
370 {
371  int32 silcipid; /* Silence CI phone ID */
372  int32 pronlen; /* Pronunciation length */
373  int32 wid; /* FSG (not dictionary!!) word ID */
374  int32 dictwid; /* Dictionary (not FSG!!) word ID */
375  int32 ssid; /* Senone Sequence ID */
376  int32 tmatid;
377  gnode_t *gn;
378  fsg_pnode_t *pnode, *pred, *head;
379  int32 n_ci, p, lc, rc;
380  glist_t lc_pnodelist; /* Temp pnodes list for different left contexts */
381  glist_t rc_pnodelist; /* Temp pnodes list for different right contexts */
382  int32 i, j;
383  int n_lc_alloc = 0, n_int_alloc = 0, n_rc_alloc = 0;
384 
385  silcipid = bin_mdef_silphone(lextree->mdef);
386  n_ci = bin_mdef_n_ciphone(lextree->mdef);
387 
388  wid = fsg_link_wid(fsglink);
389  assert(wid >= 0); /* Cannot be a null transition */
390  dictwid = dict_wordid(lextree->dict,
391  fsg_model_word_str(lextree->fsg, wid));
392  pronlen = dict_pronlen(lextree->dict, dictwid);
393  assert(pronlen >= 1);
394 
395  assert(lclist[0] >= 0); /* At least one phonetic context provided */
396  assert(rclist[0] >= 0);
397 
398  head = *alloc_head;
399  pred = NULL;
400 
401  if (pronlen == 1) { /* Single-phone word */
402  int ci = dict_first_phone(lextree->dict, dictwid);
403  /* Only non-filler words are mpx */
404  if (!dict_filler_word(lextree->dict, dictwid)) {
405  /*
406  * Left diphone ID for single-phone words already assumes SIL is right
407  * context; only left contexts need to be handled.
408  */
409  lc_pnodelist = NULL;
410 
411  for (i = 0; lclist[i] >= 0; i++) {
412  lc = lclist[i];
413  ssid = dict2pid_lrdiph_rc(lextree->d2p, ci, lc, silcipid);
414  tmatid = bin_mdef_pid2tmatid(lextree->mdef, dict_first_phone(lextree->dict, dictwid));
415  /* Check if this ssid already allocated for some other context */
416  for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) {
417  pnode = (fsg_pnode_t *) gnode_ptr(gn);
418 
419  if (hmm_nonmpx_ssid(&pnode->hmm) == ssid) {
420  /* already allocated; share it for this context phone */
421  fsg_pnode_add_ctxt(pnode, lc);
422  break;
423  }
424  }
425 
426  if (!gn) { /* ssid not already allocated */
427  pnode =
428  (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t));
429  pnode->ctx = lextree->ctx;
430  pnode->next.fsglink = fsglink;
431  pnode->logs2prob =
432  (fsg_link_logs2prob(fsglink) >> SENSCR_SHIFT)
433  + lextree->wip + lextree->pip;
434  pnode->ci_ext = dict_first_phone(lextree->dict, dictwid);
435  pnode->ppos = 0;
436  pnode->leaf = TRUE;
437  pnode->sibling = root; /* All root nodes linked together */
438  fsg_pnode_add_ctxt(pnode, lc); /* Initially zeroed by calloc above */
439  pnode->alloc_next = head;
440  head = pnode;
441  root = pnode;
442  ++n_lc_alloc;
443 
444  hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, tmatid);
445 
446  lc_pnodelist =
447  glist_add_ptr(lc_pnodelist, (void *) pnode);
448  }
449  }
450 
451  glist_free(lc_pnodelist);
452  }
453  else { /* Filler word; no context modelled */
454  ssid = bin_mdef_pid2ssid(lextree->mdef, ci); /* probably the same... */
455  tmatid = bin_mdef_pid2tmatid(lextree->mdef, ci);
456 
457  pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t));
458  pnode->ctx = lextree->ctx;
459  pnode->next.fsglink = fsglink;
460  pnode->logs2prob = (fsg_link_logs2prob(fsglink) >> SENSCR_SHIFT)
461  + lextree->wip + lextree->pip;
462  pnode->ci_ext = silcipid; /* Presents SIL as context to neighbors */
463  pnode->ppos = 0;
464  pnode->leaf = TRUE;
465  pnode->sibling = root;
466  fsg_pnode_add_all_ctxt(&(pnode->ctxt));
467  pnode->alloc_next = head;
468  head = pnode;
469  root = pnode;
470  ++n_int_alloc;
471 
472  hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, tmatid);
473  }
474  }
475  else { /* Multi-phone word */
476  fsg_pnode_t **ssid_pnode_map; /* Temp array of ssid->pnode mapping */
477  ssid_pnode_map =
478  (fsg_pnode_t **) ckd_calloc(n_ci, sizeof(fsg_pnode_t *));
479  lc_pnodelist = NULL;
480  rc_pnodelist = NULL;
481 
482  for (p = 0; p < pronlen; p++) {
483  int ci = dict_pron(lextree->dict, dictwid, p);
484  if (p == 0) { /* Root phone, handle required left contexts */
485  /* Find if we already have an lc_pnodelist for the first phone of this word */
486  fsg_glist_linklist_t *glist;
487 
488  rc = dict_pron(lextree->dict, dictwid, 1);
489  for (glist = *curglist;
490  glist && glist->glist;
491  glist = glist->next) {
492  if (glist->ci == ci && glist->rc == rc)
493  break;
494  }
495  if (glist && glist->glist) {
496  assert(glist->ci == ci && glist->rc == rc);
497  /* We've found a valid glist. Hook to it and move to next phoneme */
498  E_DEBUG(2,("Found match for (%d,%d)\n", ci, rc));
499  lc_pnodelist = glist->glist;
500  /* Set the predecessor node for the future tree first */
501  pred = (fsg_pnode_t *) gnode_ptr(lc_pnodelist);
502  continue;
503  }
504  else {
505  /* Two cases that can bring us here
506  * a. glist == NULL, i.e. end of current list. Create new entry.
507  * b. glist->glist == NULL, i.e. first entry into list.
508  */
509  if (glist == NULL) { /* Case a; reduce it to case b by allocing glist */
510  glist = (fsg_glist_linklist_t*) ckd_calloc(1, sizeof(fsg_glist_linklist_t));
511  glist->next = *curglist;
512  *curglist = glist;
513  }
514  glist->ci = ci;
515  glist->rc = rc;
516  lc_pnodelist = glist->glist = NULL; /* Gets created below */
517  }
518 
519  for (i = 0; lclist[i] >= 0; i++) {
520  lc = lclist[i];
521  ssid = dict2pid_ldiph_lc(lextree->d2p, ci, rc, lc);
522  tmatid = bin_mdef_pid2tmatid(lextree->mdef, dict_first_phone(lextree->dict, dictwid));
523  /* Compression is not done by d2p, so we do it
524  * here. This might be slow, but it might not
525  * be... we'll see. */
526  pnode = ssid_pnode_map[0];
527  for (j = 0; j < n_ci && ssid_pnode_map[j] != NULL; ++j) {
528  pnode = ssid_pnode_map[j];
529  if (hmm_nonmpx_ssid(&pnode->hmm) == ssid)
530  break;
531  }
532  assert(j < n_ci);
533  if (!pnode) { /* Allocate pnode for this new ssid */
534  pnode =
535  (fsg_pnode_t *) ckd_calloc(1,
536  sizeof
537  (fsg_pnode_t));
538  pnode->ctx = lextree->ctx;
539  /* This bit is tricky! For now we'll put the prob in the final link only */
540  /* pnode->logs2prob = (fsg_link_logs2prob(fsglink) >> SENSCR_SHIFT)
541  + lextree->wip + lextree->pip; */
542  pnode->logs2prob = lextree->wip + lextree->pip;
543  pnode->ci_ext = dict_first_phone(lextree->dict, dictwid);
544  pnode->ppos = 0;
545  pnode->leaf = FALSE;
546  pnode->sibling = root; /* All root nodes linked together */
547  pnode->alloc_next = head;
548  head = pnode;
549  root = pnode;
550  ++n_lc_alloc;
551 
552  hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, tmatid);
553 
554  lc_pnodelist =
555  glist_add_ptr(lc_pnodelist, (void *) pnode);
556  ssid_pnode_map[j] = pnode;
557  }
558  fsg_pnode_add_ctxt(pnode, lc);
559  }
560  /* Put the lc_pnodelist back into glist */
561  glist->glist = lc_pnodelist;
562 
563  /* The predecessor node for the future tree is the root */
564  pred = root;
565  }
566  else if (p != pronlen - 1) { /* Word internal phone */
567  fsg_pnode_t *pnodeyoungest;
568 
569  ssid = dict2pid_internal(lextree->d2p, dictwid, p);
570  tmatid = bin_mdef_pid2tmatid(lextree->mdef, dict_pron (lextree->dict, dictwid, p));
571  /* First check if we already have this ssid in our tree */
572  pnode = pred->next.succ;
573  pnodeyoungest = pnode; /* The youngest sibling */
574  while (pnode && (hmm_nonmpx_ssid(&pnode->hmm) != ssid || pnode->leaf)) {
575  pnode = pnode->sibling;
576  }
577  if (pnode && (hmm_nonmpx_ssid(&pnode->hmm) == ssid && !pnode->leaf)) {
578  /* Found the ssid; go to next phoneme */
579  E_DEBUG(2,("Found match for %d\n", ci));
580  pred = pnode;
581  continue;
582  }
583 
584  /* pnode not found, allocate it */
585  pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t));
586  pnode->ctx = lextree->ctx;
587  pnode->logs2prob = lextree->pip;
588  pnode->ci_ext = dict_pron(lextree->dict, dictwid, p);
589  pnode->ppos = p;
590  pnode->leaf = FALSE;
591  pnode->sibling = pnodeyoungest; /* May be NULL */
592  if (p == 1) { /* Predecessor = set of root nodes for left ctxts */
593  for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) {
594  pred = (fsg_pnode_t *) gnode_ptr(gn);
595  pred->next.succ = pnode;
596  }
597  }
598  else { /* Predecessor = word internal node */
599  pred->next.succ = pnode;
600  }
601  pnode->alloc_next = head;
602  head = pnode;
603  ++n_int_alloc;
604 
605  hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, tmatid);
606 
607  pred = pnode;
608  }
609  else { /* Leaf phone, handle required right contexts */
610  /* Note, leaf phones are not part of the tree */
611  xwdssid_t *rssid;
612  memset((void *) ssid_pnode_map, 0,
613  n_ci * sizeof(fsg_pnode_t *));
614  lc = dict_pron(lextree->dict, dictwid, p-1);
615  rssid = dict2pid_rssid(lextree->d2p, ci, lc);
616  tmatid = bin_mdef_pid2tmatid(lextree->mdef, dict_pron (lextree->dict, dictwid, p));
617 
618  for (i = 0; rclist[i] >= 0; i++) {
619  rc = rclist[i];
620 
621  j = rssid->cimap[rc];
622  ssid = rssid->ssid[j];
623  pnode = ssid_pnode_map[j];
624 
625  if (!pnode) { /* Allocate pnode for this new ssid */
626  pnode =
627  (fsg_pnode_t *) ckd_calloc(1,
628  sizeof
629  (fsg_pnode_t));
630  pnode->ctx = lextree->ctx;
631  /* We are plugging the word prob here. Ugly */
632  /* pnode->logs2prob = lextree->pip; */
633  pnode->logs2prob = (fsg_link_logs2prob(fsglink) >> SENSCR_SHIFT)
634  + lextree->pip;
635  pnode->ci_ext = dict_pron(lextree->dict, dictwid, p);
636  pnode->ppos = p;
637  pnode->leaf = TRUE;
638  pnode->sibling = rc_pnodelist ?
639  (fsg_pnode_t *) gnode_ptr(rc_pnodelist) : NULL;
640  pnode->next.fsglink = fsglink;
641  pnode->alloc_next = head;
642  head = pnode;
643  ++n_rc_alloc;
644 
645  hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, tmatid);
646 
647  rc_pnodelist =
648  glist_add_ptr(rc_pnodelist, (void *) pnode);
649  ssid_pnode_map[j] = pnode;
650  }
651  else {
652  assert(hmm_nonmpx_ssid(&pnode->hmm) == ssid);
653  }
654  fsg_pnode_add_ctxt(pnode, rc);
655  }
656 
657  if (p == 1) { /* Predecessor = set of root nodes for left ctxts */
658  for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) {
659  pred = (fsg_pnode_t *) gnode_ptr(gn);
660  if (!pred->next.succ)
661  pred->next.succ = (fsg_pnode_t *) gnode_ptr(rc_pnodelist);
662  else {
663  /* Link to the end of the sibling chain */
664  fsg_pnode_t *succ = pred->next.succ;
665  while (succ->sibling) succ = succ->sibling;
666  succ->sibling = (fsg_pnode_t*) gnode_ptr(rc_pnodelist);
667  /* Since all entries of lc_pnodelist point
668  to the same array, sufficient to update it once */
669  break;
670  }
671  }
672  }
673  else { /* Predecessor = word internal node */
674  if (!pred->next.succ)
675  pred->next.succ = (fsg_pnode_t *) gnode_ptr(rc_pnodelist);
676  else {
677  /* Link to the end of the sibling chain */
678  fsg_pnode_t *succ = pred->next.succ;
679  while (succ->sibling) succ = succ->sibling;
680  succ->sibling = (fsg_pnode_t *) gnode_ptr(rc_pnodelist);
681  }
682  }
683  }
684  }
685 
686  ckd_free((void *) ssid_pnode_map);
687  /* glist_free(lc_pnodelist); Nope; this gets freed outside */
688  glist_free(rc_pnodelist);
689  }
690 
691  E_DEBUG(2,("Allocated %d HMMs (%d lc, %d rc, %d internal)\n",
692  n_lc_alloc + n_rc_alloc + n_int_alloc,
693  n_lc_alloc, n_rc_alloc, n_int_alloc));
694  *alloc_head = head;
695 
696  return root;
697 }
698 
699 
700 static fsg_pnode_t *
701 fsg_psubtree_init(fsg_lextree_t *lextree,
702  fsg_model_t * fsg, int32 from_state,
703  fsg_pnode_t ** alloc_head)
704 {
705  fsg_arciter_t *itor;
706  fsg_link_t *fsglink;
707  fsg_pnode_t *root;
708  int32 n_ci, n_arc;
709  fsg_glist_linklist_t *glist = NULL;
710 
711  root = NULL;
712  assert(*alloc_head == NULL);
713 
714  n_ci = bin_mdef_n_ciphone(lextree->mdef);
715  if (n_ci > (FSG_PNODE_CTXT_BVSZ * 32)) {
716  E_FATAL
717  ("#phones > %d; increase FSG_PNODE_CTXT_BVSZ and recompile\n",
718  FSG_PNODE_CTXT_BVSZ * 32);
719  }
720 
721  n_arc = 0;
722  for (itor = fsg_model_arcs(fsg, from_state); itor;
723  itor = fsg_arciter_next(itor)) {
724  int32 dst;
725  fsglink = fsg_arciter_get(itor);
726  dst = fsglink->to_state;
727 
728  if (fsg_link_wid(fsglink) < 0)
729  continue;
730 
731  E_DEBUG(2,("Building lextree for arc from %d to %d: %s\n",
732  from_state, dst, fsg_model_word_str(fsg, fsg_link_wid(fsglink))));
733  root = psubtree_add_trans(lextree, root, &glist, fsglink,
734  lextree->lc[from_state],
735  lextree->rc[dst],
736  alloc_head);
737  ++n_arc;
738  }
739  E_DEBUG(2,("State %d has %d outgoing arcs\n", from_state, n_arc));
740 
741  fsg_glist_linklist_free(glist);
742 
743  return root;
744 }
745 
746 
747 static void
748 fsg_psubtree_free(fsg_pnode_t * head)
749 {
750  fsg_pnode_t *next;
751 
752  while (head) {
753  next = head->alloc_next;
754  hmm_deinit(&head->hmm);
755  ckd_free(head);
756  head = next;
757  }
758 }
759 
760 void fsg_psubtree_dump_node(fsg_lextree_t *tree, fsg_pnode_t *node, FILE *fp)
761 {
762  int32 i;
763  fsg_link_t *tl;
764 
765  /* Indentation */
766  for (i = 0; i <= node->ppos; i++)
767  fprintf(fp, " ");
768 
769  fprintf(fp, "%p.@", node); /* Pointer used as node
770  * ID */
771  fprintf(fp, " %5d.SS", hmm_nonmpx_ssid(&node->hmm));
772  fprintf(fp, " %10d.LP", node->logs2prob);
773  fprintf(fp, " %p.SIB", node->sibling);
774  fprintf(fp, " %s.%d", bin_mdef_ciphone_str(tree->mdef, node->ci_ext), node->ppos);
775  if ((node->ppos == 0) || node->leaf) {
776  fprintf(fp, " [");
777  for (i = 0; i < FSG_PNODE_CTXT_BVSZ; i++)
778  fprintf(fp, "%08x", node->ctxt.bv[i]);
779  fprintf(fp, "]");
780  }
781  if (node->leaf) {
782  tl = node->next.fsglink;
783  fprintf(fp, " {%s[%d->%d](%d)}",
784  fsg_model_word_str(tree->fsg, tl->wid),
785  tl->from_state, tl->to_state, tl->logs2prob);
786  } else {
787  fprintf(fp, " %p.NXT", node->next.succ);
788  }
789  fprintf(fp, "\n");
790 
791  return;
792 }
793 
794 void
795 fsg_psubtree_dump(fsg_lextree_t *tree, fsg_pnode_t *root, FILE * fp)
796 {
797  fsg_pnode_t *succ;
798 
799  if (root == NULL) return;
800  if (root->ppos == 0) {
801  while(root->sibling && root->sibling->next.succ == root->next.succ) {
802  fsg_psubtree_dump_node(tree, root, fp);
803  root = root->sibling;
804  }
805  fflush(fp);
806  }
807 
808  fsg_psubtree_dump_node(tree, root, fp);
809 
810  if (root->leaf) {
811  if (root->ppos == 0 && root->sibling) { /* For single-phone words */
812  fsg_psubtree_dump(tree, root->sibling,fp);
813  }
814  return;
815  }
816 
817  succ = root->next.succ;
818  while(succ) {
819  fsg_psubtree_dump(tree, succ,fp);
820  succ = succ->sibling;
821  }
822 
823  if (root->ppos == 0) {
824  fsg_psubtree_dump(tree, root->sibling,fp);
825  fflush(fp);
826  }
827 
828  return;
829 }
830 
831 void
833 {
834  hmm_clear(&pnode->hmm);
835 }
void fsg_lextree_free(fsg_lextree_t *lextree)
Free lextrees for an FSG.
Definition: fsg_lextree.c:286
hmm_context_t * ctx
HMM context structure.
Definition: fsg_lextree.h:182
void hmm_init(hmm_context_t *ctx, hmm_t *hmm, int mpx, int ssid, int tmatid)
Populate a previously-allocated HMM structure, allocating internal data.
Definition: hmm.c:89
POCKETSPHINX_EXPORT s3wid_t dict_wordid(dict_t *d, const char *word)
Return word id for given word string if present.
Definition: dict.c:399
void fsg_pnode_add_all_ctxt(fsg_pnode_ctxt_t *ctxt)
Set all flags on in the given context bitvector.
Definition: fsg_lextree.c:328
const char * bin_mdef_ciphone_str(bin_mdef_t *m, int32 ci)
In: ciphone id for which name wanted.
Definition: bin_mdef.c:737
bin_mdef_t * mdef
Model definition (triphone mappings).
Definition: fsg_lextree.h:185
uint32 fsg_pnode_ctxt_sub_generic(fsg_pnode_ctxt_t *src, fsg_pnode_ctxt_t *sub)
Generic variant for arbitrary size.
Definition: fsg_lextree.c:336
void fsg_lextree_dump(fsg_lextree_t *lextree, FILE *fp)
Print an FSG lextree to a file for debugging.
Definition: fsg_lextree.c:273
void hmm_deinit(hmm_t *hmm)
Free an HMM structure, releasing internal data (but not the HMM structure itself).
Definition: hmm.c:111
#define dict2pid_rssid(d, ci, lc)
Access macros; not designed for arbitrary use.
Definition: dict2pid.h:115
void fsg_psubtree_pnode_deactivate(fsg_pnode_t *pnode)
Mark the given pnode as inactive (for search).
Definition: fsg_lextree.c:832
dict_t * dict
Pronunciation dictionary for this FSG.
Definition: fsg_lextree.h:183
int dict_filler_word(dict_t *d, s3wid_t w)
Return 1 if w is a filler word, 0 if not.
Definition: dict.c:413
Shared information between a set of HMMs.
Collection of lextrees for an FSG.
Definition: fsg_lextree.h:180
#define SENSCR_SHIFT
Shift count for senone scores.
Definition: hmm.h:73
a structure for a dictionary.
Definition: dict.h:76
s3ssid_t dict2pid_internal(dict2pid_t *d2p, int32 wid, int pos)
Return the senone sequence ID for the given word position.
Definition: dict2pid.c:367
void hmm_clear(hmm_t *h)
Reset the states of the HMM to the invalid condition.
Definition: hmm.c:183
cross word triphone model structure
Definition: dict2pid.h:73
int16 ** rc
Right context triphone mappings for FSG.
Definition: fsg_lextree.h:205
int16 ** lc
Left context triphone mappings for FSG.
Definition: fsg_lextree.h:204
fsg_lextree_t * fsg_lextree_init(fsg_model_t *fsg, dict_t *dict, dict2pid_t *d2p, bin_mdef_t *mdef, hmm_context_t *ctx, int32 wip, int32 pip)
Create, initialize, and return a new phonetic lextree for the given FSG.
Definition: fsg_lextree.c:215
fsg_model_t * fsg
The fsg for which this lextree is built.
Definition: fsg_lextree.h:181
s3cipid_t * cimap
Index into ssid[] above for each ci phone.
Definition: dict2pid.h:75
#define dict_pron(d, w, p)
The CI phones of the word w at position p.
Definition: dict.h:165
Building composite triphone (as well as word internal triphones) with the dictionary.
Definition: dict2pid.h:84
s3ssid_t * ssid
Senone Sequence ID list for all context ciphones.
Definition: dict2pid.h:74
dict2pid_t * d2p
Context-dependent phone mappings for this FSG.
Definition: fsg_lextree.h:184