SphinxBase  5prealpha
fsg_model.c
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  *
19  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
20  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
23  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  * ====================================================================
32  *
33  */
34 
35 #include <stdio.h>
36 #include <string.h>
37 #include <assert.h>
38 
39 /* SphinxBase headers. */
40 #include "sphinxbase/err.h"
41 #include "sphinxbase/pio.h"
42 #include "sphinxbase/ckd_alloc.h"
43 #include "sphinxbase/prim_type.h"
44 #include "sphinxbase/strfuncs.h"
45 #include "sphinxbase/hash_table.h"
46 #include "sphinxbase/fsg_model.h"
47 #include "sphinxbase/bitvec.h"
48 
56 struct trans_list_s {
57  hash_table_t *null_trans; /* Null transitions keyed by state. */
58  hash_table_t *trans; /* Lists of non-null transitions keyed by state. */
59 };
60 
64 struct fsg_arciter_s {
65  hash_iter_t *itor, *null_itor;
66  gnode_t *gn;
67 };
68 
69 #define FSG_MODEL_BEGIN_DECL "FSG_BEGIN"
70 #define FSG_MODEL_END_DECL "FSG_END"
71 #define FSG_MODEL_N_DECL "N"
72 #define FSG_MODEL_NUM_STATES_DECL "NUM_STATES"
73 #define FSG_MODEL_S_DECL "S"
74 #define FSG_MODEL_START_STATE_DECL "START_STATE"
75 #define FSG_MODEL_F_DECL "F"
76 #define FSG_MODEL_FINAL_STATE_DECL "FINAL_STATE"
77 #define FSG_MODEL_T_DECL "T"
78 #define FSG_MODEL_TRANSITION_DECL "TRANSITION"
79 #define FSG_MODEL_COMMENT_CHAR '#'
80 
81 
82 static int32
83 nextline_str2words(FILE * fp, int32 * lineno,
84  char **lineptr, char ***wordptr)
85 {
86  for (;;) {
87  size_t len;
88  int32 n;
89 
90  ckd_free(*lineptr);
91  if ((*lineptr = fread_line(fp, &len)) == NULL)
92  return -1;
93 
94  (*lineno)++;
95 
96  if ((*lineptr)[0] == FSG_MODEL_COMMENT_CHAR)
97  continue; /* Skip comment lines */
98 
99  n = str2words(*lineptr, NULL, 0);
100  if (n == 0)
101  continue; /* Skip blank lines */
102 
103  /* Abuse of realloc(), but this doesn't have to be fast. */
104  if (*wordptr == NULL)
105  *wordptr = ckd_calloc(n, sizeof(**wordptr));
106  else
107  *wordptr = ckd_realloc(*wordptr, n * sizeof(**wordptr));
108  return str2words(*lineptr, *wordptr, n);
109  }
110 }
111 
112 void
113 fsg_model_trans_add(fsg_model_t * fsg,
114  int32 from, int32 to, int32 logp, int32 wid)
115 {
116  fsg_link_t *link;
117  glist_t gl;
118  gnode_t *gn;
119 
120  if (fsg->trans[from].trans == NULL)
121  fsg->trans[from].trans = hash_table_new(5, HASH_CASE_YES);
122 
123  /* Check for duplicate link (i.e., link already exists with label=wid) */
124  for (gn = gl = fsg_model_trans(fsg, from, to); gn; gn = gnode_next(gn)) {
125  link = (fsg_link_t *) gnode_ptr(gn);
126  if (link->wid == wid) {
127  if (link->logs2prob < logp)
128  link->logs2prob = logp;
129  return;
130  }
131  }
132 
133  /* Create transition object */
134  link = listelem_malloc(fsg->link_alloc);
135  link->from_state = from;
136  link->to_state = to;
137  link->logs2prob = logp;
138  link->wid = wid;
139 
140  /* Add it to the list of transitions and update the hash table */
141  gl = glist_add_ptr(gl, (void *) link);
142  hash_table_replace_bkey(fsg->trans[from].trans,
143  (char const *) &link->to_state,
144  sizeof(link->to_state), gl);
145 }
146 
147 int32
148 fsg_model_tag_trans_add(fsg_model_t * fsg, int32 from, int32 to,
149  int32 logp, int32 wid)
150 {
151  fsg_link_t *link, *link2;
152 
153  /* Check for transition probability */
154  if (logp > 0) {
155  E_FATAL("Null transition prob must be <= 1.0 (state %d -> %d)\n",
156  from, to);
157  }
158 
159  /* Self-loop null transitions (with prob <= 1.0) are redundant */
160  if (from == to)
161  return -1;
162 
163  if (fsg->trans[from].null_trans == NULL)
164  fsg->trans[from].null_trans = hash_table_new(5, HASH_CASE_YES);
165 
166  /* Check for a duplicate link; if found, keep the higher prob */
167  link = fsg_model_null_trans(fsg, from, to);
168  if (link) {
169  if (link->logs2prob < logp) {
170  link->logs2prob = logp;
171  return 0;
172  }
173  else
174  return -1;
175  }
176 
177  /* Create null transition object */
178  link = listelem_malloc(fsg->link_alloc);
179  link->from_state = from;
180  link->to_state = to;
181  link->logs2prob = logp;
182  link->wid = -1;
183 
184  link2 = (fsg_link_t *)
185  hash_table_enter_bkey(fsg->trans[from].null_trans,
186  (char const *) &link->to_state,
187  sizeof(link->to_state), link);
188  assert(link == link2);
189 
190  return 1;
191 }
192 
193 int32
194 fsg_model_null_trans_add(fsg_model_t * fsg, int32 from, int32 to,
195  int32 logp)
196 {
197  return fsg_model_tag_trans_add(fsg, from, to, logp, -1);
198 }
199 
200 glist_t
201 fsg_model_null_trans_closure(fsg_model_t * fsg, glist_t nulls)
202 {
203  gnode_t *gn1;
204  int updated;
205  fsg_link_t *tl1, *tl2;
206  int32 k, n;
207 
208  E_INFO("Computing transitive closure for null transitions\n");
209 
210  /* If our caller didn't give us a list of null-transitions,
211  make such a list. Just loop through all the FSG states,
212  and all the null-transitions in that state (which are kept in
213  their own hash table). */
214  if (nulls == NULL) {
215  int i;
216  for (i = 0; i < fsg->n_state; ++i) {
217  hash_iter_t *itor;
218  hash_table_t *null_trans = fsg->trans[i].null_trans;
219  if (null_trans == NULL)
220  continue;
221  for (itor = hash_table_iter(null_trans);
222  itor != NULL; itor = hash_table_iter_next(itor)) {
223  nulls = glist_add_ptr(nulls, hash_entry_val(itor->ent));
224  }
225  }
226  }
227 
228  /*
229  * Probably not the most efficient closure implementation, in general, but
230  * probably reasonably efficient for a sparse null transition matrix.
231  */
232  n = 0;
233  do {
234  updated = FALSE;
235 
236  for (gn1 = nulls; gn1; gn1 = gnode_next(gn1)) {
237  hash_iter_t *itor;
238 
239  tl1 = (fsg_link_t *) gnode_ptr(gn1);
240  assert(tl1->wid < 0);
241 
242  if (fsg->trans[tl1->to_state].null_trans == NULL)
243  continue;
244 
245  for (itor =
246  hash_table_iter(fsg->trans[tl1->to_state].null_trans);
247  itor; itor = hash_table_iter_next(itor)) {
248 
249  tl2 = (fsg_link_t *) hash_entry_val(itor->ent);
250 
251  k = fsg_model_null_trans_add(fsg,
252  tl1->from_state,
253  tl2->to_state,
254  tl1->logs2prob +
255  tl2->logs2prob);
256  if (k >= 0) {
257  updated = TRUE;
258  if (k > 0) {
259  nulls = glist_add_ptr(nulls, (void *)
260  fsg_model_null_trans
261  (fsg, tl1->from_state,
262  tl2->to_state));
263  n++;
264  }
265  }
266  }
267  }
268  } while (updated);
269 
270  E_INFO("%d null transitions added\n", n);
271 
272  return nulls;
273 }
274 
275 glist_t
276 fsg_model_trans(fsg_model_t * fsg, int32 i, int32 j)
277 {
278  void *val;
279 
280  if (fsg->trans[i].trans == NULL)
281  return NULL;
282  if (hash_table_lookup_bkey(fsg->trans[i].trans, (char const *) &j,
283  sizeof(j), &val) < 0)
284  return NULL;
285  return (glist_t) val;
286 }
287 
288 fsg_link_t *
289 fsg_model_null_trans(fsg_model_t * fsg, int32 i, int32 j)
290 {
291  void *val;
292 
293  if (fsg->trans[i].null_trans == NULL)
294  return NULL;
295  if (hash_table_lookup_bkey(fsg->trans[i].null_trans, (char const *) &j,
296  sizeof(j), &val) < 0)
297  return NULL;
298  return (fsg_link_t *) val;
299 }
300 
302 fsg_model_arcs(fsg_model_t * fsg, int32 i)
303 {
304  fsg_arciter_t *itor;
305 
306  if (fsg->trans[i].trans == NULL && fsg->trans[i].null_trans == NULL)
307  return NULL;
308  itor = ckd_calloc(1, sizeof(*itor));
309  if (fsg->trans[i].null_trans)
310  itor->null_itor = hash_table_iter(fsg->trans[i].null_trans);
311  if (fsg->trans[i].trans)
312  itor->itor = hash_table_iter(fsg->trans[i].trans);
313  if (itor->itor != NULL)
314  itor->gn = hash_entry_val(itor->itor->ent);
315  return itor;
316 }
317 
318 fsg_link_t *
319 fsg_arciter_get(fsg_arciter_t * itor)
320 {
321  /* Iterate over non-null arcs first. */
322  if (itor->gn)
323  return (fsg_link_t *) gnode_ptr(itor->gn);
324  else if (itor->null_itor)
325  return (fsg_link_t *) hash_entry_val(itor->null_itor->ent);
326  else
327  return NULL;
328 }
329 
331 fsg_arciter_next(fsg_arciter_t * itor)
332 {
333  /* Iterate over non-null arcs first. */
334  if (itor->gn) {
335  itor->gn = gnode_next(itor->gn);
336  /* Move to the next destination arc. */
337  if (itor->gn == NULL) {
338  itor->itor = hash_table_iter_next(itor->itor);
339  if (itor->itor != NULL)
340  itor->gn = hash_entry_val(itor->itor->ent);
341  else if (itor->null_itor == NULL)
342  goto stop_iteration;
343  }
344  }
345  else {
346  if (itor->null_itor == NULL)
347  goto stop_iteration;
348  itor->null_itor = hash_table_iter_next(itor->null_itor);
349  if (itor->null_itor == NULL)
350  goto stop_iteration;
351  }
352  return itor;
353  stop_iteration:
354  fsg_arciter_free(itor);
355  return NULL;
356 
357 }
358 
359 void
360 fsg_arciter_free(fsg_arciter_t * itor)
361 {
362  if (itor == NULL)
363  return;
364  hash_table_iter_free(itor->null_itor);
365  hash_table_iter_free(itor->itor);
366  ckd_free(itor);
367 }
368 
369 int
370 fsg_model_word_id(fsg_model_t * fsg, char const *word)
371 {
372  int wid;
373 
374  /* Search for an existing word matching this. */
375  for (wid = 0; wid < fsg->n_word; ++wid) {
376  if (0 == strcmp(fsg->vocab[wid], word))
377  break;
378  }
379  /* If not found, add this to the vocab. */
380  if (wid == fsg->n_word)
381  return -1;
382  return wid;
383 }
384 
385 int
386 fsg_model_word_add(fsg_model_t * fsg, char const *word)
387 {
388  int wid, old_size;
389 
390  /* Search for an existing word matching this. */
391  wid = fsg_model_word_id(fsg, word);
392  /* If not found, add this to the vocab. */
393  if (wid == -1) {
394  wid = fsg->n_word;
395  if (fsg->n_word == fsg->n_word_alloc) {
396  old_size = fsg->n_word_alloc;
397  fsg->n_word_alloc += 10;
398  fsg->vocab = ckd_realloc(fsg->vocab,
399  fsg->n_word_alloc *
400  sizeof(*fsg->vocab));
401  if (fsg->silwords)
402  fsg->silwords =
403  bitvec_realloc(fsg->silwords, old_size,
404  fsg->n_word_alloc);
405  if (fsg->altwords)
406  fsg->altwords =
407  bitvec_realloc(fsg->altwords, old_size,
408  fsg->n_word_alloc);
409  }
410  ++fsg->n_word;
411  fsg->vocab[wid] = ckd_salloc(word);
412  }
413  return wid;
414 }
415 
416 int
417 fsg_model_add_silence(fsg_model_t * fsg, char const *silword,
418  int state, float32 silprob)
419 {
420  int32 logsilp;
421  int n_trans, silwid, src;
422 
423  E_INFO("Adding silence transitions for %s to FSG\n", silword);
424 
425  silwid = fsg_model_word_add(fsg, silword);
426  logsilp = (int32) (logmath_log(fsg->lmath, silprob) * fsg->lw);
427  if (fsg->silwords == NULL)
428  fsg->silwords = bitvec_alloc(fsg->n_word_alloc);
429  bitvec_set(fsg->silwords, silwid);
430 
431  n_trans = 0;
432  if (state == -1) {
433  for (src = 0; src < fsg->n_state; src++) {
434  fsg_model_trans_add(fsg, src, src, logsilp, silwid);
435  ++n_trans;
436  }
437  }
438  else {
439  fsg_model_trans_add(fsg, state, state, logsilp, silwid);
440  ++n_trans;
441  }
442 
443  E_INFO("Added %d silence word transitions\n", n_trans);
444  return n_trans;
445 }
446 
447 int
448 fsg_model_add_alt(fsg_model_t * fsg, char const *baseword,
449  char const *altword)
450 {
451  int i, basewid, altwid;
452  int ntrans;
453 
454  /* FIXME: This will get slow, eventually... */
455  for (basewid = 0; basewid < fsg->n_word; ++basewid)
456  if (0 == strcmp(fsg->vocab[basewid], baseword))
457  break;
458  if (basewid == fsg->n_word) {
459  E_ERROR("Base word %s not present in FSG vocabulary!\n", baseword);
460  return -1;
461  }
462  altwid = fsg_model_word_add(fsg, altword);
463  if (fsg->altwords == NULL)
464  fsg->altwords = bitvec_alloc(fsg->n_word_alloc);
465  bitvec_set(fsg->altwords, altwid);
466  if (fsg_model_is_filler(fsg, basewid)) {
467  if (fsg->silwords == NULL)
468  fsg->silwords = bitvec_alloc(fsg->n_word_alloc);
469  bitvec_set(fsg->silwords, altwid);
470  }
471 
472  E_DEBUG(2, ("Adding alternate word transitions (%s,%s) to FSG\n",
473  baseword, altword));
474 
475  /* Look for all transitions involving baseword and duplicate them. */
476  /* FIXME: This will also get slow, eventually... */
477  ntrans = 0;
478  for (i = 0; i < fsg->n_state; ++i) {
479  hash_iter_t *itor;
480  if (fsg->trans[i].trans == NULL)
481  continue;
482  for (itor = hash_table_iter(fsg->trans[i].trans); itor;
483  itor = hash_table_iter_next(itor)) {
484  glist_t trans;
485  gnode_t *gn;
486 
487  trans = hash_entry_val(itor->ent);
488  for (gn = trans; gn; gn = gnode_next(gn)) {
489  fsg_link_t *fl = gnode_ptr(gn);
490  if (fl->wid == basewid) {
491  fsg_link_t *link;
492 
493  /* Create transition object */
494  link = listelem_malloc(fsg->link_alloc);
495  link->from_state = fl->from_state;
496  link->to_state = fl->to_state;
497  link->logs2prob = fl->logs2prob; /* FIXME!!!??? */
498  link->wid = altwid;
499 
500  trans = glist_add_ptr(trans, (void *) link);
501  ++ntrans;
502  }
503  }
504  hash_entry_val(itor->ent) = trans;
505  }
506  }
507 
508  E_DEBUG(2, ("Added %d alternate word transitions\n", ntrans));
509  return ntrans;
510 }
511 
512 
513 fsg_model_t *
514 fsg_model_init(char const *name, logmath_t * lmath, float32 lw,
515  int32 n_state)
516 {
517  fsg_model_t *fsg;
518 
519  /* Allocate basic stuff. */
520  fsg = ckd_calloc(1, sizeof(*fsg));
521  fsg->refcount = 1;
522  fsg->link_alloc = listelem_alloc_init(sizeof(fsg_link_t));
523  fsg->lmath = lmath;
524  fsg->name = name ? ckd_salloc(name) : NULL;
525  fsg->n_state = n_state;
526  fsg->lw = lw;
527 
528  fsg->trans = ckd_calloc(fsg->n_state, sizeof(*fsg->trans));
529 
530  return fsg;
531 }
532 
533 fsg_model_t *
534 fsg_model_read(FILE * fp, logmath_t * lmath, float32 lw)
535 {
536  fsg_model_t *fsg;
537  hash_table_t *vocab;
538  hash_iter_t *itor;
539  int32 lastwid;
540  char **wordptr;
541  char *lineptr;
542  char *fsgname;
543  int32 lineno;
544  int32 n, i, j;
545  int n_state, n_trans, n_null_trans;
546  glist_t nulls;
547  float32 p;
548 
549  lineno = 0;
550  vocab = hash_table_new(32, FALSE);
551  wordptr = NULL;
552  lineptr = NULL;
553  nulls = NULL;
554  fsgname = NULL;
555  fsg = NULL;
556 
557  /* Scan upto FSG_BEGIN header */
558  for (;;) {
559  n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
560  if (n < 0) {
561  E_ERROR("%s declaration missing\n", FSG_MODEL_BEGIN_DECL);
562  goto parse_error;
563  }
564 
565  if ((strcmp(wordptr[0], FSG_MODEL_BEGIN_DECL) == 0)) {
566  if (n > 2) {
567  E_ERROR("Line[%d]: malformed FSG_BEGIN declaration\n",
568  lineno);
569  goto parse_error;
570  }
571  break;
572  }
573  }
574  /* Save FSG name, or it will get clobbered below :(.
575  * If name is missing, try the default.
576  */
577  if (n == 2) {
578  fsgname = ckd_salloc(wordptr[1]);
579  }
580  else {
581  E_WARN("FSG name is missing\n");
582  fsgname = ckd_salloc("unknown");
583  }
584 
585  /* Read #states */
586  n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
587  if ((n != 2)
588  || ((strcmp(wordptr[0], FSG_MODEL_N_DECL) != 0)
589  && (strcmp(wordptr[0], FSG_MODEL_NUM_STATES_DECL) != 0))
590  || (sscanf(wordptr[1], "%d", &n_state) != 1)
591  || (n_state <= 0)) {
592  E_ERROR
593  ("Line[%d]: #states declaration line missing or malformed\n",
594  lineno);
595  goto parse_error;
596  }
597 
598  /* Now create the FSG. */
599  fsg = fsg_model_init(fsgname, lmath, lw, n_state);
600  ckd_free(fsgname);
601  fsgname = NULL;
602 
603  /* Read start state */
604  n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
605  if ((n != 2)
606  || ((strcmp(wordptr[0], FSG_MODEL_S_DECL) != 0)
607  && (strcmp(wordptr[0], FSG_MODEL_START_STATE_DECL) != 0))
608  || (sscanf(wordptr[1], "%d", &(fsg->start_state)) != 1)
609  || (fsg->start_state < 0)
610  || (fsg->start_state >= fsg->n_state)) {
611  E_ERROR
612  ("Line[%d]: start state declaration line missing or malformed\n",
613  lineno);
614  goto parse_error;
615  }
616 
617  /* Read final state */
618  n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
619  if ((n != 2)
620  || ((strcmp(wordptr[0], FSG_MODEL_F_DECL) != 0)
621  && (strcmp(wordptr[0], FSG_MODEL_FINAL_STATE_DECL) != 0))
622  || (sscanf(wordptr[1], "%d", &(fsg->final_state)) != 1)
623  || (fsg->final_state < 0)
624  || (fsg->final_state >= fsg->n_state)) {
625  E_ERROR
626  ("Line[%d]: final state declaration line missing or malformed\n",
627  lineno);
628  goto parse_error;
629  }
630 
631  /* Read transitions */
632  lastwid = 0;
633  n_trans = n_null_trans = 0;
634  for (;;) {
635  int32 wid, tprob;
636 
637  n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
638  if (n <= 0) {
639  E_ERROR("Line[%d]: transition or FSG_END statement expected\n",
640  lineno);
641  goto parse_error;
642  }
643 
644  if ((strcmp(wordptr[0], FSG_MODEL_END_DECL) == 0)) {
645  break;
646  }
647 
648  if ((strcmp(wordptr[0], FSG_MODEL_T_DECL) == 0)
649  || (strcmp(wordptr[0], FSG_MODEL_TRANSITION_DECL) == 0)) {
650 
651 
652  if (((n != 4) && (n != 5))
653  || (sscanf(wordptr[1], "%d", &i) != 1)
654  || (sscanf(wordptr[2], "%d", &j) != 1)
655  || (i < 0) || (i >= fsg->n_state)
656  || (j < 0) || (j >= fsg->n_state)) {
657  E_ERROR
658  ("Line[%d]: transition spec malformed; Expecting: from-state to-state trans-prob [word]\n",
659  lineno);
660  goto parse_error;
661  }
662 
663  p = atof_c(wordptr[3]);
664  if ((p <= 0.0) || (p > 1.0)) {
665  E_ERROR
666  ("Line[%d]: transition spec malformed; Expecting float as transition probability\n",
667  lineno);
668  goto parse_error;
669  }
670  }
671  else {
672  E_ERROR("Line[%d]: transition or FSG_END statement expected\n",
673  lineno);
674  goto parse_error;
675  }
676 
677  tprob = (int32) (logmath_log(lmath, p) * fsg->lw);
678  /* Add word to "dictionary". */
679  if (n > 4) {
680  if (hash_table_lookup_int32(vocab, wordptr[4], &wid) < 0) {
681  (void) hash_table_enter_int32(vocab,
682  ckd_salloc(wordptr[4]),
683  lastwid);
684  wid = lastwid;
685  ++lastwid;
686  }
687  fsg_model_trans_add(fsg, i, j, tprob, wid);
688  ++n_trans;
689  }
690  else {
691  if (fsg_model_null_trans_add(fsg, i, j, tprob) == 1) {
692  ++n_null_trans;
693  nulls =
694  glist_add_ptr(nulls, fsg_model_null_trans(fsg, i, j));
695  }
696  }
697  }
698 
699  E_INFO("FSG: %d states, %d unique words, %d transitions (%d null)\n",
700  fsg->n_state, hash_table_inuse(vocab), n_trans, n_null_trans);
701 
702 
703  /* Now create a string table from the "dictionary" */
704  fsg->n_word = hash_table_inuse(vocab);
705  fsg->n_word_alloc = fsg->n_word + 10; /* Pad it a bit. */
706  fsg->vocab = ckd_calloc(fsg->n_word_alloc, sizeof(*fsg->vocab));
707  for (itor = hash_table_iter(vocab); itor;
708  itor = hash_table_iter_next(itor)) {
709  char const *word = hash_entry_key(itor->ent);
710  int32 wid = (int32) (long) hash_entry_val(itor->ent);
711  fsg->vocab[wid] = (char *) word;
712  }
713  hash_table_free(vocab);
714 
715  /* Do transitive closure on null transitions */
716  nulls = fsg_model_null_trans_closure(fsg, nulls);
717  glist_free(nulls);
718 
719  ckd_free(lineptr);
720  ckd_free(wordptr);
721 
722  return fsg;
723 
724  parse_error:
725  for (itor = hash_table_iter(vocab); itor;
726  itor = hash_table_iter_next(itor))
727  ckd_free((char *) hash_entry_key(itor->ent));
728  glist_free(nulls);
729  hash_table_free(vocab);
730  ckd_free(fsgname);
731  ckd_free(lineptr);
732  ckd_free(wordptr);
733  fsg_model_free(fsg);
734  return NULL;
735 }
736 
737 
738 fsg_model_t *
739 fsg_model_readfile(const char *file, logmath_t * lmath, float32 lw)
740 {
741  FILE *fp;
742  fsg_model_t *fsg;
743 
744  if ((fp = fopen(file, "r")) == NULL) {
745  E_ERROR_SYSTEM("Failed to open FSG file '%s' for reading", file);
746  return NULL;
747  }
748  fsg = fsg_model_read(fp, lmath, lw);
749  fclose(fp);
750  return fsg;
751 }
752 
753 fsg_model_t *
754 fsg_model_retain(fsg_model_t * fsg)
755 {
756  ++fsg->refcount;
757  return fsg;
758 }
759 
760 static void
761 trans_list_free(fsg_model_t * fsg, int32 i)
762 {
763  hash_iter_t *itor;
764 
765  /* FIXME (maybe): FSG links will all get freed when we call
766  * listelem_alloc_free() so don't bother freeing them explicitly
767  * here. */
768  if (fsg->trans[i].trans) {
769  for (itor = hash_table_iter(fsg->trans[i].trans);
770  itor; itor = hash_table_iter_next(itor)) {
771  glist_t gl = (glist_t) hash_entry_val(itor->ent);
772  glist_free(gl);
773  }
774  }
775  hash_table_free(fsg->trans[i].trans);
776  hash_table_free(fsg->trans[i].null_trans);
777 }
778 
779 int
780 fsg_model_free(fsg_model_t * fsg)
781 {
782  int i;
783 
784  if (fsg == NULL)
785  return 0;
786 
787  if (--fsg->refcount > 0)
788  return fsg->refcount;
789 
790  for (i = 0; i < fsg->n_word; ++i)
791  ckd_free(fsg->vocab[i]);
792  for (i = 0; i < fsg->n_state; ++i)
793  trans_list_free(fsg, i);
794  ckd_free(fsg->trans);
795  ckd_free(fsg->vocab);
797  bitvec_free(fsg->silwords);
798  bitvec_free(fsg->altwords);
799  ckd_free(fsg->name);
800  ckd_free(fsg);
801  return 0;
802 }
803 
804 
805 void
806 fsg_model_write(fsg_model_t * fsg, FILE * fp)
807 {
808  int32 i;
809 
810  fprintf(fp, "%s %s\n", FSG_MODEL_BEGIN_DECL,
811  fsg->name ? fsg->name : "");
812  fprintf(fp, "%s %d\n", FSG_MODEL_NUM_STATES_DECL, fsg->n_state);
813  fprintf(fp, "%s %d\n", FSG_MODEL_START_STATE_DECL, fsg->start_state);
814  fprintf(fp, "%s %d\n", FSG_MODEL_FINAL_STATE_DECL, fsg->final_state);
815 
816  for (i = 0; i < fsg->n_state; i++) {
817  fsg_arciter_t *itor;
818 
819  for (itor = fsg_model_arcs(fsg, i); itor;
820  itor = fsg_arciter_next(itor)) {
821  fsg_link_t *tl = fsg_arciter_get(itor);
822 
823  fprintf(fp, "%s %d %d %f %s\n", FSG_MODEL_TRANSITION_DECL,
824  tl->from_state, tl->to_state,
825  logmath_exp(fsg->lmath,
826  (int32) (tl->logs2prob / fsg->lw)),
827  (tl->wid < 0) ? "" : fsg_model_word_str(fsg, tl->wid));
828  }
829  }
830 
831  fprintf(fp, "%s\n", FSG_MODEL_END_DECL);
832 
833  fflush(fp);
834 }
835 
836 void
837 fsg_model_writefile(fsg_model_t * fsg, char const *file)
838 {
839  FILE *fp;
840 
841  assert(fsg);
842 
843  E_INFO("Writing FSG file '%s'\n", file);
844 
845  if ((fp = fopen(file, "w")) == NULL) {
846  E_ERROR_SYSTEM("Failed to open FSG file '%s' for reading", file);
847  return;
848  }
849 
850  fsg_model_write(fsg, fp);
851 
852  fclose(fp);
853 }
854 
855 static void
856 fsg_model_write_fsm_trans(fsg_model_t * fsg, int i, FILE * fp)
857 {
858  fsg_arciter_t *itor;
859 
860  for (itor = fsg_model_arcs(fsg, i); itor;
861  itor = fsg_arciter_next(itor)) {
862  fsg_link_t *tl = fsg_arciter_get(itor);
863  fprintf(fp, "%d %d %s %f\n",
864  tl->from_state, tl->to_state,
865  (tl->wid < 0) ? "<eps>" : fsg_model_word_str(fsg, tl->wid),
866  -logmath_log_to_ln(fsg->lmath, tl->logs2prob / fsg->lw));
867  }
868 }
869 
870 void
871 fsg_model_write_fsm(fsg_model_t * fsg, FILE * fp)
872 {
873  int i;
874 
875  /* Write transitions from initial state first. */
876  fsg_model_write_fsm_trans(fsg, fsg_model_start_state(fsg), fp);
877 
878  /* Other states. */
879  for (i = 0; i < fsg->n_state; i++) {
880  if (i == fsg_model_start_state(fsg))
881  continue;
882  fsg_model_write_fsm_trans(fsg, i, fp);
883  }
884 
885  /* Final state. */
886  fprintf(fp, "%d 0\n", fsg_model_final_state(fsg));
887 
888  fflush(fp);
889 }
890 
891 void
892 fsg_model_writefile_fsm(fsg_model_t * fsg, char const *file)
893 {
894  FILE *fp;
895 
896  assert(fsg);
897 
898  E_INFO("Writing FSM file '%s'\n", file);
899 
900  if ((fp = fopen(file, "w")) == NULL) {
901  E_ERROR_SYSTEM("Failed to open fsm file '%s' for writing", file);
902  return;
903  }
904 
905  fsg_model_write_fsm(fsg, fp);
906 
907  fclose(fp);
908 }
909 
910 void
911 fsg_model_write_symtab(fsg_model_t * fsg, FILE * file)
912 {
913  int i;
914 
915  fprintf(file, "<eps> 0\n");
916  for (i = 0; i < fsg_model_n_word(fsg); ++i) {
917  fprintf(file, "%s %d\n", fsg_model_word_str(fsg, i), i + 1);
918  }
919  fflush(file);
920 }
921 
922 void
923 fsg_model_writefile_symtab(fsg_model_t * fsg, char const *file)
924 {
925  FILE *fp;
926 
927  assert(fsg);
928 
929  E_INFO("Writing FSM symbol table '%s'\n", file);
930 
931  if ((fp = fopen(file, "w")) == NULL) {
932  E_ERROR("Failed to open symbol table '%s' for writing", file);
933  return;
934  }
935 
936  fsg_model_write_symtab(fsg, fp);
937 
938  fclose(fp);
939 }
#define E_ERROR_SYSTEM(...)
Print error text; Call perror(&quot;&quot;);.
Definition: err.h:99
SPHINXBASE_EXPORT int32 hash_table_lookup_int32(hash_table_t *h, const char *key, int32 *val)
Look up a 32-bit integer value in a hash table.
Definition: hash_table.c:322
int32 start_state
Must be in the range [0..n_state-1].
Definition: fsg_model.h:109
SPHINXBASE_EXPORT void * hash_table_enter_bkey(hash_table_t *h, const char *key, size_t len, void *val)
Like hash_table_enter, but with an explicitly specified key length, instead of a NULL-terminated, C-style key string.
Definition: hash_table.c:535
Miscellaneous useful string functions.
#define E_INFO(...)
Print logging information to standard error stream.
Definition: err.h:114
int refcount
Reference count.
Definition: fsg_model.h:100
hash_entry_t * ent
Current entry in that table.
Definition: hash_table.h:170
int32 final_state
Must be in the range [0..n_state-1].
Definition: fsg_model.h:110
int32 n_word_alloc
Number of words allocated in vocab.
Definition: fsg_model.h:103
#define ckd_calloc(n, sz)
Macros to simplify the use of above functions.
Definition: ckd_alloc.h:248
#define E_ERROR(...)
Print error message to error log.
Definition: err.h:104
#define hash_table_enter_int32(h, k, v)
Add a 32-bit integer value to a hash table.
Definition: hash_table.h:228
SPHINXBASE_EXPORT int32 hash_table_lookup_bkey(hash_table_t *h, const char *key, size_t len, void **val)
Like hash_lookup, but with an explicitly specified key length, instead of a NULL-terminated, C-style key string.
Definition: hash_table.c:337
float32 lw
Language weight that&#39;s been applied to transition logprobs.
Definition: fsg_model.h:111
#define E_DEBUG(level, x)
Print debugging information to standard error stream.
Definition: err.h:144
listelem_alloc_t * link_alloc
Allocator for FSG links.
Definition: fsg_model.h:114
#define listelem_malloc(le)
Allocate a list element and return pointer to it.
Sphinx&#39;s memory allocation/deallocation routines.
SPHINXBASE_EXPORT void hash_table_iter_free(hash_iter_t *itor)
Delete an unfinished iterator.
Definition: hash_table.c:682
int32 n_word
Number of unique words in this FSG.
Definition: fsg_model.h:102
SPHINXBASE_EXPORT int logmath_log(logmath_t *lmath, float64 p)
Convert linear floating point number to integer log in base B.
Definition: logmath.c:447
A node in a generic list.
Definition: glist.h:100
SPHINXBASE_EXPORT hash_iter_t * hash_table_iter(hash_table_t *h)
Start iterating over key-value pairs in a hash table.
Definition: hash_table.c:646
#define ckd_salloc(ptr)
Macro for ckd_salloc
Definition: ckd_alloc.h:264
#define hash_entry_val(e)
Access macros.
Definition: hash_table.h:175
Basic type definitions used in Sphinx.
SPHINXBASE_EXPORT hash_table_t * hash_table_new(int32 size, int32 casearg)
Allocate a new hash table for a given expected size.
Definition: hash_table.c:158
Adjacency list (opaque) for a state in an FSG.
Definition: fsg_model.c:56
SPHINXBASE_EXPORT void ckd_free(void *ptr)
Test and free a 1-D array.
Definition: ckd_alloc.c:244
SPHINXBASE_EXPORT glist_t glist_add_ptr(glist_t g, void *ptr)
Create and prepend a new list node, with the given user-defined data, at the HEAD of the given generi...
Definition: glist.c:74
SPHINXBASE_EXPORT void hash_table_free(hash_table_t *h)
Free the specified hash table; the caller is responsible for freeing the key strings pointed to by th...
Definition: hash_table.c:688
SPHINXBASE_EXPORT float64 logmath_log_to_ln(logmath_t *lmath, int logb_p)
Convert integer log in base B to natural log (in floating point).
Definition: logmath.c:468
SPHINXBASE_EXPORT double atof_c(char const *str)
Locale independent version of atof().
Definition: strfuncs.c:55
SPHINXBASE_EXPORT char * fread_line(FILE *stream, size_t *out_len)
Read a line of arbitrary length from a file and return it as a newly allocated string.
Definition: pio.c:377
Implementation of arc iterator.
Definition: fsg_model.c:64
SPHINXBASE_EXPORT void glist_free(glist_t g)
Free the given generic list; user-defined data contained within is not automatically freed...
Definition: glist.c:133
#define gnode_ptr(g)
Head of a list of gnodes.
Definition: glist.h:109
Implementation of logging routines.
#define E_WARN(...)
Print warning message to error log.
Definition: err.h:109
int32 n_state
number of states in FSG
Definition: fsg_model.h:108
SPHINXBASE_EXPORT int32 str2words(char *line, char **wptr, int32 n_wptr)
Convert a line to an array of &quot;words&quot;, based on whitespace separators.
Definition: strfuncs.c:123
SPHINXBASE_EXPORT hash_iter_t * hash_table_iter_next(hash_iter_t *itor)
Get the next key-value pair in iteration.
Definition: hash_table.c:656
SPHINXBASE_EXPORT listelem_alloc_t * listelem_alloc_init(size_t elemsize)
Initialize and return a list element allocator.
bitvec_t * altwords
Indicates which words are pronunciation alternates.
Definition: fsg_model.h:106
trans_list_t * trans
Transitions out of each state, if any.
Definition: fsg_model.h:113
SPHINXBASE_EXPORT bitvec_t * bitvec_realloc(bitvec_t *vec, size_t old_len, size_t new_len)
Resize a bit vector, clear the remaining bits.
Definition: bitvec.c:64
Hash table implementation.
#define bitvec_set(v, b)
Set the b-th bit of bit vector v.
Definition: bitvec.h:95
#define E_FATAL(...)
Exit with non-zero status after error message.
Definition: err.h:81
Word level FSG definition.
Definition: fsg_model.h:99
SPHINXBASE_EXPORT void * hash_table_replace_bkey(hash_table_t *h, const char *key, size_t len, void *val)
Like hash_table_replace, but with an explicitly specified key length, instead of a NULL-terminated...
Definition: hash_table.c:548
bitvec_t * silwords
Indicates which words are silence/fillers.
Definition: fsg_model.h:105
SPHINXBASE_EXPORT float64 logmath_exp(logmath_t *lmath, int logb_p)
Convert integer log in base B to linear floating point.
Definition: logmath.c:456
#define ckd_realloc(ptr, sz)
Macro for ckd_realloc
Definition: ckd_alloc.h:258
An implementation of bit vectors.
logmath_t * lmath
Pointer to log math computation object.
Definition: fsg_model.h:107
#define bitvec_alloc(n)
Allocate a bit vector, all bits are clear.
Definition: bitvec.h:75
file IO related operations.
#define bitvec_free(v)
Free a bit vector.
Definition: bitvec.h:87
char * name
A unique string identifier for this FSG.
Definition: fsg_model.h:101
SPHINXBASE_EXPORT void listelem_alloc_free(listelem_alloc_t *le)
Finalize and release all memory associated with a list element allocator.
char ** vocab
Vocabulary for this FSG.
Definition: fsg_model.h:104