SphinxBase  5prealpha
jsgf.c
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 2007 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
38 #include <string.h>
39 #include <assert.h>
40 
41 #include "sphinxbase/ckd_alloc.h"
42 #include "sphinxbase/strfuncs.h"
43 #include "sphinxbase/hash_table.h"
44 #include "sphinxbase/filename.h"
45 #include "sphinxbase/err.h"
46 #include "sphinxbase/jsgf.h"
47 
48 #include "jsgf_internal.h"
49 #include "jsgf_parser.h"
50 #include "jsgf_scanner.h"
51 
52 extern int yyparse(void *scanner, jsgf_t * jsgf);
53 
61 static int expand_rule(jsgf_t * grammar, jsgf_rule_t * rule,
62  int rule_entry, int rule_exit);
63 
65 jsgf_atom_new(char *name, float weight)
66 {
67  jsgf_atom_t *atom;
68 
69  atom = ckd_calloc(1, sizeof(*atom));
70  atom->name = ckd_salloc(name);
71  atom->weight = weight;
72  return atom;
73 }
74 
75 int
76 jsgf_atom_free(jsgf_atom_t * atom)
77 {
78  if (atom == NULL)
79  return 0;
80  ckd_free(atom->name);
81  ckd_free(atom);
82  return 0;
83 }
84 
85 jsgf_t *
87 {
88  jsgf_t *grammar;
89 
90  grammar = ckd_calloc(1, sizeof(*grammar));
91  /* If this is an imported/subgrammar, then we will share a global
92  * namespace with the parent grammar. */
93  if (parent) {
94  grammar->rules = parent->rules;
95  grammar->imports = parent->imports;
96  grammar->searchpath = parent->searchpath;
97  grammar->parent = parent;
98  }
99  else {
100  grammar->rules = hash_table_new(64, 0);
101  grammar->imports = hash_table_new(16, 0);
102  }
103 
104  return grammar;
105 }
106 
107 void
109 {
110  /* FIXME: Probably should just use refcounting instead. */
111  if (jsgf->parent == NULL) {
112  hash_iter_t *itor;
113  gnode_t *gn;
114 
115  for (itor = hash_table_iter(jsgf->rules); itor;
116  itor = hash_table_iter_next(itor)) {
117  ckd_free((char *) itor->ent->key);
118  jsgf_rule_free((jsgf_rule_t *) itor->ent->val);
119  }
120  hash_table_free(jsgf->rules);
121  for (itor = hash_table_iter(jsgf->imports); itor;
122  itor = hash_table_iter_next(itor)) {
123  ckd_free((char *) itor->ent->key);
124  jsgf_grammar_free((jsgf_t *) itor->ent->val);
125  }
126  hash_table_free(jsgf->imports);
127  for (gn = jsgf->searchpath; gn; gn = gnode_next(gn))
128  ckd_free(gnode_ptr(gn));
129  glist_free(jsgf->searchpath);
130  for (gn = jsgf->links; gn; gn = gnode_next(gn))
131  ckd_free(gnode_ptr(gn));
132  glist_free(jsgf->links);
133  }
134  ckd_free(jsgf->name);
135  ckd_free(jsgf->version);
136  ckd_free(jsgf->charset);
137  ckd_free(jsgf->locale);
138  ckd_free(jsgf);
139 }
140 
141 static void
142 jsgf_rhs_free(jsgf_rhs_t * rhs)
143 {
144  gnode_t *gn;
145 
146  if (rhs == NULL)
147  return;
148 
149  jsgf_rhs_free(rhs->alt);
150  for (gn = rhs->atoms; gn; gn = gnode_next(gn))
151  jsgf_atom_free(gnode_ptr(gn));
152  glist_free(rhs->atoms);
153  ckd_free(rhs);
154 }
155 
156 jsgf_atom_t *
157 jsgf_kleene_new(jsgf_t * jsgf, jsgf_atom_t * atom, int plus)
158 {
159  jsgf_rule_t *rule;
160  jsgf_atom_t *rule_atom;
161  jsgf_rhs_t *rhs;
162 
163  /* Generate an "internal" rule of the form (<NULL> | <name> <g0006>) */
164  /* Or if plus is true, (<name> | <name> <g0006>) */
165  rhs = ckd_calloc(1, sizeof(*rhs));
166  if (plus)
167  rhs->atoms = glist_add_ptr(NULL, jsgf_atom_new(atom->name, 1.0));
168  else
169  rhs->atoms = glist_add_ptr(NULL, jsgf_atom_new("<NULL>", 1.0));
170  rule = jsgf_define_rule(jsgf, NULL, rhs, 0);
171  rule_atom = jsgf_atom_new(rule->name, 1.0);
172  rhs = ckd_calloc(1, sizeof(*rhs));
173  rhs->atoms = glist_add_ptr(NULL, rule_atom);
174  rhs->atoms = glist_add_ptr(rhs->atoms, atom);
175  rule->rhs->alt = rhs;
176 
177  return jsgf_atom_new(rule->name, 1.0);
178 }
179 
180 jsgf_rule_t *
181 jsgf_optional_new(jsgf_t * jsgf, jsgf_rhs_t * exp)
182 {
183  jsgf_rhs_t *rhs = ckd_calloc(1, sizeof(*rhs));
184  jsgf_atom_t *atom = jsgf_atom_new("<NULL>", 1.0);
185  rhs->alt = exp;
186  rhs->atoms = glist_add_ptr(NULL, atom);
187  return jsgf_define_rule(jsgf, NULL, rhs, 0);
188 }
189 
190 void
191 jsgf_add_link(jsgf_t * grammar, jsgf_atom_t * atom, int from, int to)
192 {
193  jsgf_link_t *link;
194 
195  link = ckd_calloc(1, sizeof(*link));
196  link->from = from;
197  link->to = to;
198  link->atom = atom;
199  grammar->links = glist_add_ptr(grammar->links, link);
200 }
201 
202 static char *
203 extract_grammar_name(char *rule_name)
204 {
205  char *dot_pos;
206  char *grammar_name = ckd_salloc(rule_name + 1);
207  if ((dot_pos = strrchr(grammar_name + 1, '.')) == NULL) {
208  ckd_free(grammar_name);
209  return NULL;
210  }
211  *dot_pos = '\0';
212  return grammar_name;
213 }
214 
215 char const *
217 {
218  return jsgf->name;
219 }
220 
221 static char *
222 jsgf_fullname(jsgf_t * jsgf, const char *name)
223 {
224  char *fullname;
225 
226  /* Check if it is already qualified */
227  if (strchr(name + 1, '.'))
228  return ckd_salloc(name);
229 
230  /* Skip leading < in name */
231  fullname = ckd_malloc(strlen(jsgf->name) + strlen(name) + 4);
232  sprintf(fullname, "<%s.%s", jsgf->name, name + 1);
233  return fullname;
234 }
235 
236 static char *
237 jsgf_fullname_from_rule(jsgf_rule_t * rule, const char *name)
238 {
239  char *fullname, *grammar_name;
240 
241  /* Check if it is already qualified */
242  if (strchr(name + 1, '.'))
243  return ckd_salloc(name);
244 
245  /* Skip leading < in name */
246  if ((grammar_name = extract_grammar_name(rule->name)) == NULL)
247  return ckd_salloc(name);
248  fullname = ckd_malloc(strlen(grammar_name) + strlen(name) + 4);
249  sprintf(fullname, "<%s.%s", grammar_name, name + 1);
250  ckd_free(grammar_name);
251 
252  return fullname;
253 }
254 
255 /* Extract as rulename everything after the secondlast dot, if existent.
256  * Because everything before the secondlast dot is the path-specification. */
257 static char *
258 importname2rulename(char *importname)
259 {
260  char *rulename = ckd_salloc(importname);
261  char *last_dotpos;
262  char *secondlast_dotpos;
263 
264  if ((last_dotpos = strrchr(rulename + 1, '.')) != NULL) {
265  *last_dotpos = '\0';
266  if ((secondlast_dotpos = strrchr(rulename + 1, '.')) != NULL) {
267  *last_dotpos = '.';
268  *secondlast_dotpos = '<';
269  secondlast_dotpos = ckd_salloc(secondlast_dotpos);
270  ckd_free(rulename);
271  return secondlast_dotpos;
272  }
273  else {
274  *last_dotpos = '.';
275  return rulename;
276  }
277  }
278  else {
279  return rulename;
280  }
281 }
282 
283 #define NO_NODE -1
284 #define RECURSIVE_NODE -2
285 
294 static int
295 expand_rhs(jsgf_t * grammar, jsgf_rule_t * rule, jsgf_rhs_t * rhs,
296  int rule_entry, int rule_exit)
297 {
298  gnode_t *gn;
299  int lastnode;
300 
301  /* Last node expanded in this sequence. */
302  lastnode = rule_entry;
303 
304  /* Iterate over atoms in rhs and generate links/nodes */
305  for (gn = rhs->atoms; gn; gn = gnode_next(gn)) {
306  jsgf_atom_t *atom = gnode_ptr(gn);
307 
308  if (jsgf_atom_is_rule(atom)) {
309  jsgf_rule_t *subrule;
310  char *fullname;
311  gnode_t *subnode;
312  jsgf_rule_stack_t *rule_stack_entry = NULL;
313 
314  /* Special case for <NULL> and <VOID> pseudo-rules
315  If this is the only atom in the rhs, and it's the
316  first rhs in the rule, then emit a null transition,
317  creating an exit state if needed. */
318  if (0 == strcmp(atom->name, "<NULL>")) {
319  if (gn == rhs->atoms && gnode_next(gn) == NULL) {
320  if (rule_exit == NO_NODE) {
321  jsgf_add_link(grammar, atom,
322  lastnode, grammar->nstate);
323  rule_exit = lastnode = grammar->nstate;
324  ++grammar->nstate;
325  }
326  else {
327  jsgf_add_link(grammar, atom, lastnode, rule_exit);
328  }
329  }
330  continue;
331  }
332  else if (0 == strcmp(atom->name, "<VOID>")) {
333  /* Make this entire RHS unspeakable */
334  return NO_NODE;
335  }
336 
337  fullname = jsgf_fullname_from_rule(rule, atom->name);
339  (grammar->rules, fullname, (void **) &subrule) == -1) {
340  E_ERROR("Undefined rule in RHS: %s\n", fullname);
341  ckd_free(fullname);
342  return NO_NODE;
343  }
344  ckd_free(fullname);
345 
346  /* Look for this subrule in the stack of expanded rules */
347  for (subnode = grammar->rulestack; subnode;
348  subnode = gnode_next(subnode)) {
349  rule_stack_entry =
350  (jsgf_rule_stack_t *) gnode_ptr(subnode);
351  if (rule_stack_entry->rule == subrule)
352  break;
353  }
354 
355  if (subnode != NULL) {
356  /* Allow right-recursion only. */
357  if (gnode_next(gn) != NULL) {
358  E_ERROR
359  ("Only right-recursion is permitted (in %s.%s)\n",
360  grammar->name, rule->name);
361  return NO_NODE;
362  }
363  /* Add a link back to the beginning of this rule instance */
364  E_INFO("Right recursion %s %d => %d\n", atom->name,
365  lastnode, rule_stack_entry->entry);
366  jsgf_add_link(grammar, atom, lastnode,
367  rule_stack_entry->entry);
368 
369  /* Let our caller know that this rhs didn't reach an
370  end state. */
371  lastnode = RECURSIVE_NODE;
372  }
373  else {
374  /* If this is the last atom in this rhs, link its
375  expansion to the parent rule's exit state.
376  Otherwise, create a new exit state for it. */
377  int subruleexit = NO_NODE;
378  if (gnode_next(gn) == NULL && rule_exit >= 0)
379  subruleexit = rule_exit;
380 
381  /* Expand the subrule */
382  lastnode =
383  expand_rule(grammar, subrule, lastnode, subruleexit);
384 
385  if (lastnode == NO_NODE)
386  return NO_NODE;
387  }
388  }
389  else {
390  /* An exit-state is created if this isn't the last atom
391  in the rhs, or if the containing rule doesn't have an
392  exit state yet.
393  Otherwise, the rhs's exit state becomes the containing
394  rule's exit state. */
395  int exitstate;
396  if (gnode_next(gn) == NULL && rule_exit >= 0) {
397  exitstate = rule_exit;
398  }
399  else {
400  exitstate = grammar->nstate;
401  ++grammar->nstate;
402  }
403 
404  /* Add a link for this token */
405  jsgf_add_link(grammar, atom, lastnode, exitstate);
406  lastnode = exitstate;
407  }
408  }
409 
410  return lastnode;
411 }
412 
413 static int
414 expand_rule(jsgf_t * grammar, jsgf_rule_t * rule, int rule_entry,
415  int rule_exit)
416 {
417  jsgf_rule_stack_t *rule_stack_entry;
418  jsgf_rhs_t *rhs;
419 
420  /* Push this rule onto the stack */
421  rule_stack_entry =
423  rule_stack_entry->rule = rule;
424  rule_stack_entry->entry = rule_entry;
425  grammar->rulestack = glist_add_ptr(grammar->rulestack,
426  rule_stack_entry);
427 
428  for (rhs = rule->rhs; rhs; rhs = rhs->alt) {
429  int lastnode;
430 
431  lastnode = expand_rhs(grammar, rule, rhs, rule_entry, rule_exit);
432 
433  if (lastnode == NO_NODE) {
434  return NO_NODE;
435  }
436  else if (lastnode == RECURSIVE_NODE) {
437  /* The rhs ended with right-recursion, i.e. a transition to
438  an earlier state. Nothing needs to happen at this level. */
439  ;
440  }
441  else if (rule_exit == NO_NODE) {
442  /* If this rule doesn't have an exit state yet, use the exit
443  state of its first right-hand-side.
444  All other right-hand-sides will use this exit state. */
445  assert(lastnode >= 0);
446  rule_exit = lastnode;
447  }
448  }
449 
450  /* If no exit-state was created, use the entry-state. */
451  if (rule_exit == NO_NODE) {
452  rule_exit = rule_entry;
453  }
454 
455  /* Pop this rule from the rule stack */
456  ckd_free(gnode_ptr(grammar->rulestack));
457  grammar->rulestack = gnode_free(grammar->rulestack, NULL);
458 
459  return rule_exit;
460 }
461 
464 {
465  return hash_table_iter(grammar->rules);
466 }
467 
468 jsgf_rule_t *
469 jsgf_get_rule(jsgf_t * grammar, char const *name)
470 {
471  void *val;
472  char *fullname;
473 
474  fullname = string_join("<", name, ">", NULL);
475  if (hash_table_lookup(grammar->rules, fullname, &val) < 0) {
476  ckd_free(fullname);
477  return NULL;
478  }
479  ckd_free(fullname);
480  return (jsgf_rule_t *) val;
481 }
482 
483 jsgf_rule_t *
485 {
486  jsgf_rule_iter_t *itor;
487  jsgf_rule_t *public_rule = NULL;
488 
489  for (itor = jsgf_rule_iter(grammar); itor;
490  itor = jsgf_rule_iter_next(itor)) {
491  jsgf_rule_t *rule = jsgf_rule_iter_rule(itor);
492  if (jsgf_rule_public(rule)) {
493  const char *rule_name = jsgf_rule_name(rule);
494  char *dot_pos;
495  if ((dot_pos = strrchr(rule_name + 1, '.')) == NULL) {
496  public_rule = rule;
497  jsgf_rule_iter_free(itor);
498  break;
499  }
500  if (0 ==
501  strncmp(rule_name + 1, jsgf_grammar_name(grammar),
502  dot_pos - rule_name - 1)) {
503  public_rule = rule;
504  jsgf_rule_iter_free(itor);
505  break;
506  }
507  }
508  }
509  return public_rule;
510 }
511 
512 char const *
514 {
515  return rule->name;
516 }
517 
518 int
520 {
521  return rule->is_public;
522 }
523 
524 static fsg_model_t *
525 jsgf_build_fsg_internal(jsgf_t * grammar, jsgf_rule_t * rule,
526  logmath_t * lmath, float32 lw, int do_closure)
527 {
528  fsg_model_t *fsg;
529  glist_t nulls;
530  gnode_t *gn;
531  int rule_entry, rule_exit;
532 
533  /* Clear previous links */
534  for (gn = grammar->links; gn; gn = gnode_next(gn)) {
535  ckd_free(gnode_ptr(gn));
536  }
537  glist_free(grammar->links);
538  grammar->links = NULL;
539  grammar->nstate = 0;
540 
541  /* Create the top-level entry state, and expand the
542  top-level rule. */
543  rule_entry = grammar->nstate++;
544  rule_exit = expand_rule(grammar, rule, rule_entry, NO_NODE);
545 
546  /* If no exit-state was created, create one. */
547  if (rule_exit == NO_NODE) {
548  rule_exit = grammar->nstate++;
549  jsgf_add_link(grammar, NULL, rule_entry, rule_exit);
550  }
551 
552  fsg = fsg_model_init(rule->name, lmath, lw, grammar->nstate);
553  fsg->start_state = rule_entry;
554  fsg->final_state = rule_exit;
555  grammar->links = glist_reverse(grammar->links);
556  for (gn = grammar->links; gn; gn = gnode_next(gn)) {
557  jsgf_link_t *link = gnode_ptr(gn);
558 
559  if (link->atom) {
560  if (jsgf_atom_is_rule(link->atom)) {
561  fsg_model_null_trans_add(fsg, link->from, link->to,
562  logmath_log(lmath,
563  link->atom->weight));
564  }
565  else {
566  int wid = fsg_model_word_add(fsg, link->atom->name);
567  fsg_model_trans_add(fsg, link->from, link->to,
568  logmath_log(lmath, link->atom->weight),
569  wid);
570  }
571  }
572  else {
573  fsg_model_null_trans_add(fsg, link->from, link->to, 0);
574  }
575  }
576  if (do_closure) {
577  nulls = fsg_model_null_trans_closure(fsg, NULL);
578  glist_free(nulls);
579  }
580 
581  return fsg;
582 }
583 
584 fsg_model_t *
586  logmath_t * lmath, float32 lw)
587 {
588  return jsgf_build_fsg_internal(grammar, rule, lmath, lw, TRUE);
589 }
590 
591 fsg_model_t *
593  logmath_t * lmath, float32 lw)
594 {
595  return jsgf_build_fsg_internal(grammar, rule, lmath, lw, FALSE);
596 }
597 
598 fsg_model_t *
599 jsgf_read_file(const char *file, logmath_t * lmath, float32 lw)
600 {
601  fsg_model_t *fsg;
602  jsgf_rule_t *rule;
603  jsgf_t *jsgf;
604  jsgf_rule_iter_t *itor;
605 
606  if ((jsgf = jsgf_parse_file(file, NULL)) == NULL) {
607  E_ERROR("Error parsing file: %s\n", file);
608  return NULL;
609  }
610 
611  rule = NULL;
612  for (itor = jsgf_rule_iter(jsgf); itor;
613  itor = jsgf_rule_iter_next(itor)) {
614  rule = jsgf_rule_iter_rule(itor);
615  if (jsgf_rule_public(rule)) {
616  jsgf_rule_iter_free(itor);
617  break;
618  }
619  }
620  if (rule == NULL) {
621  E_ERROR("No public rules found in %s\n", file);
622  return NULL;
623  }
624  fsg = jsgf_build_fsg(jsgf, rule, lmath, lw);
625  jsgf_grammar_free(jsgf);
626  return fsg;
627 }
628 
629 fsg_model_t *
630 jsgf_read_string(const char *string, logmath_t * lmath, float32 lw)
631 {
632  fsg_model_t *fsg;
633  jsgf_rule_t *rule;
634  jsgf_t *jsgf;
635  jsgf_rule_iter_t *itor;
636 
637  if ((jsgf = jsgf_parse_string(string, NULL)) == NULL) {
638  E_ERROR("Error parsing input string\n");
639  return NULL;
640  }
641 
642  rule = NULL;
643  for (itor = jsgf_rule_iter(jsgf); itor;
644  itor = jsgf_rule_iter_next(itor)) {
645  rule = jsgf_rule_iter_rule(itor);
646  if (jsgf_rule_public(rule)) {
647  jsgf_rule_iter_free(itor);
648  break;
649  }
650  }
651  if (rule == NULL) {
652  jsgf_grammar_free(jsgf);
653  E_ERROR("No public rules found in input string\n");
654  return NULL;
655  }
656  fsg = jsgf_build_fsg(jsgf, rule, lmath, lw);
657  jsgf_grammar_free(jsgf);
658  return fsg;
659 }
660 
661 
662 int
663 jsgf_write_fsg(jsgf_t * grammar, jsgf_rule_t * rule, FILE * outfh)
664 {
665  fsg_model_t *fsg;
666  logmath_t *lmath = logmath_init(1.0001, 0, 0);
667 
668  if ((fsg = jsgf_build_fsg_raw(grammar, rule, lmath, 1.0)) == NULL)
669  goto error_out;
670 
671  fsg_model_write(fsg, outfh);
672  logmath_free(lmath);
673  return 0;
674 
675  error_out:
676  logmath_free(lmath);
677  return -1;
678 }
679 
680 jsgf_rule_t *
681 jsgf_define_rule(jsgf_t * jsgf, char *name, jsgf_rhs_t * rhs,
682  int is_public)
683 {
684  jsgf_rule_t *rule;
685  void *val;
686 
687  if (name == NULL) {
688  name = ckd_malloc(strlen(jsgf->name) + 16);
689  sprintf(name, "<%s.g%05d>", jsgf->name,
690  hash_table_inuse(jsgf->rules));
691  }
692  else {
693  char *newname;
694 
695  newname = jsgf_fullname(jsgf, name);
696  name = newname;
697  }
698 
699  rule = ckd_calloc(1, sizeof(*rule));
700  rule->refcnt = 1;
701  rule->name = ckd_salloc(name);
702  rule->rhs = rhs;
703  rule->is_public = is_public;
704 
705  E_INFO("Defined rule: %s%s\n",
706  rule->is_public ? "PUBLIC " : "", rule->name);
707  val = hash_table_enter(jsgf->rules, name, rule);
708  if (val != (void *) rule) {
709  E_WARN("Multiply defined symbol: %s\n", name);
710  }
711  return rule;
712 }
713 
714 jsgf_rule_t *
715 jsgf_rule_retain(jsgf_rule_t * rule)
716 {
717  ++rule->refcnt;
718  return rule;
719 }
720 
721 int
722 jsgf_rule_free(jsgf_rule_t * rule)
723 {
724  if (rule == NULL)
725  return 0;
726  if (--rule->refcnt > 0)
727  return rule->refcnt;
728  jsgf_rhs_free(rule->rhs);
729  ckd_free(rule->name);
730  ckd_free(rule);
731  return 0;
732 }
733 
734 
735 /* FIXME: This should go in libsphinxutil */
736 static char *
737 path_list_search(glist_t paths, char *path)
738 {
739  gnode_t *gn;
740 
741  for (gn = paths; gn; gn = gnode_next(gn)) {
742  char *fullpath;
743  FILE *tmp;
744 
745  fullpath = string_join(gnode_ptr(gn), "/", path, NULL);
746  tmp = fopen(fullpath, "r");
747  if (tmp != NULL) {
748  fclose(tmp);
749  return fullpath;
750  }
751  else {
752  ckd_free(fullpath);
753  }
754  }
755  return NULL;
756 }
757 
758 jsgf_rule_t *
759 jsgf_import_rule(jsgf_t * jsgf, char *name)
760 {
761  char *c, *path, *newpath;
762  size_t namelen, packlen;
763  void *val;
764  jsgf_t *imp;
765  int import_all;
766 
767  /* Trim the leading and trailing <> */
768  namelen = strlen(name);
769  path = ckd_malloc(namelen - 2 + 6); /* room for a trailing .gram */
770  strcpy(path, name + 1);
771  /* Split off the first part of the name */
772  c = strrchr(path, '.');
773  if (c == NULL) {
774  E_ERROR("Imported rule is not qualified: %s\n", name);
775  ckd_free(path);
776  return NULL;
777  }
778  packlen = c - path;
779  *c = '\0';
780 
781  /* Look for import foo.* */
782  import_all = (strlen(name) > 2
783  && 0 == strcmp(name + namelen - 3, ".*>"));
784 
785  /* Construct a filename. */
786  for (c = path; *c; ++c)
787  if (*c == '.')
788  *c = '/';
789  strcat(path, ".gram");
790  newpath = path_list_search(jsgf->searchpath, path);
791  if (newpath == NULL) {
792  E_ERROR("Failed to find grammar %s\n", path);
793  ckd_free(path);
794  return NULL;
795  }
796  ckd_free(path);
797 
798  path = newpath;
799  E_INFO("Importing %s from %s to %s\n", name, path, jsgf->name);
800 
801  /* FIXME: Also, we need to make sure that path is fully qualified
802  * here, by adding any prefixes from jsgf->name to it. */
803  /* See if we have parsed it already */
804  if (hash_table_lookup(jsgf->imports, path, &val) == 0) {
805  E_INFO("Already imported %s\n", path);
806  imp = val;
807  ckd_free(path);
808  }
809  else {
810  /* If not, parse it. */
811  imp = jsgf_parse_file(path, jsgf);
812  val = hash_table_enter(jsgf->imports, path, imp);
813  if (val != (void *) imp) {
814  E_WARN("Multiply imported file: %s\n", path);
815  }
816  }
817  if (imp != NULL) {
818  hash_iter_t *itor;
819  /* Look for public rules matching rulename. */
820  for (itor = hash_table_iter(imp->rules); itor;
821  itor = hash_table_iter_next(itor)) {
822  hash_entry_t *he = itor->ent;
823  jsgf_rule_t *rule = hash_entry_val(he);
824  int rule_matches;
825  char *rule_name = importname2rulename(name);
826 
827  if (import_all) {
828  /* Match package name (symbol table is shared) */
829  rule_matches =
830  !strncmp(rule_name, rule->name, packlen + 1);
831  }
832  else {
833  /* Exact match */
834  rule_matches = !strcmp(rule_name, rule->name);
835  }
836  ckd_free(rule_name);
837  if (rule->is_public && rule_matches) {
838  void *val;
839  char *newname;
840 
841  /* Link this rule into the current namespace. */
842  c = strrchr(rule->name, '.');
843  assert(c != NULL);
844  newname = jsgf_fullname(jsgf, c);
845 
846  E_INFO("Imported %s\n", newname);
847  val = hash_table_enter(jsgf->rules, newname,
848  jsgf_rule_retain(rule));
849  if (val != (void *) rule) {
850  E_WARN("Multiply defined symbol: %s\n", newname);
851  }
852  if (!import_all) {
853  hash_table_iter_free(itor);
854  return rule;
855  }
856  }
857  }
858  }
859 
860  return NULL;
861 }
862 
863 static void
864 jsgf_set_search_path(jsgf_t * jsgf, const char *filename)
865 {
866  char *jsgf_path;
867 
868 #if !defined(_WIN32_WCE)
869  if ((jsgf_path = getenv("JSGF_PATH")) != NULL) {
870  char *word, *c;
871  /* FIXME: This should be a function in libsphinxbase. */
872  word = jsgf_path = ckd_salloc(jsgf_path);
873  while ((c = strchr(word, ':'))) {
874  *c = '\0';
875  jsgf->searchpath = glist_add_ptr(jsgf->searchpath, word);
876  word = c + 1;
877  }
878  jsgf->searchpath = glist_add_ptr(jsgf->searchpath, word);
879  jsgf->searchpath = glist_reverse(jsgf->searchpath);
880  return;
881  }
882 #endif
883 
884  if (!filename) {
885  jsgf->searchpath =
886  glist_add_ptr(jsgf->searchpath, ckd_salloc("."));
887  return;
888  }
889 
890  jsgf_path = ckd_salloc(filename);
891  path2dirname(filename, jsgf_path);
892  jsgf->searchpath = glist_add_ptr(jsgf->searchpath, jsgf_path);
893 }
894 
895 jsgf_t *
896 jsgf_parse_file(const char *filename, jsgf_t * parent)
897 {
898  yyscan_t yyscanner;
899  jsgf_t *jsgf;
900  int yyrv;
901  FILE *in = NULL;
902 
903  yylex_init(&yyscanner);
904  if (filename == NULL) {
905  yyset_in(stdin, yyscanner);
906  }
907  else {
908  in = fopen(filename, "r");
909  if (in == NULL) {
910  E_ERROR_SYSTEM("Failed to open %s for parsing", filename);
911  return NULL;
912  }
913  yyset_in(in, yyscanner);
914  }
915 
916  jsgf = jsgf_grammar_new(parent);
917 
918  if (!parent)
919  jsgf_set_search_path(jsgf, filename);
920 
921  yyrv = yyparse(yyscanner, jsgf);
922  if (yyrv != 0) {
923  E_ERROR("Failed to parse JSGF grammar from '%s'\n",
924  filename ? filename : "(stdin)");
925  jsgf_grammar_free(jsgf);
926  yylex_destroy(yyscanner);
927  return NULL;
928  }
929  if (in)
930  fclose(in);
931  yylex_destroy(yyscanner);
932 
933  return jsgf;
934 }
935 
936 jsgf_t *
937 jsgf_parse_string(const char *string, jsgf_t * parent)
938 {
939  yyscan_t yyscanner;
940  jsgf_t *jsgf;
941  int yyrv;
942  YY_BUFFER_STATE buf;
943 
944  yylex_init(&yyscanner);
945  buf = yy_scan_string(string, yyscanner);
946 
947  jsgf = jsgf_grammar_new(parent);
948  if (!parent)
949  jsgf_set_search_path(jsgf, NULL);
950 
951  yyrv = yyparse(yyscanner, jsgf);
952  if (yyrv != 0) {
953  E_ERROR("Failed to parse JSGF grammar from input string\n");
954  jsgf_grammar_free(jsgf);
955  yy_delete_buffer(buf, yyscanner);
956  yylex_destroy(yyscanner);
957  return NULL;
958  }
959  yy_delete_buffer(buf, yyscanner);
960  yylex_destroy(yyscanner);
961 
962  return jsgf;
963 }
#define E_ERROR_SYSTEM(...)
Print error text; Call perror(&quot;&quot;);.
Definition: err.h:99
int32 start_state
Must be in the range [0..n_state-1].
Definition: fsg_model.h:109
Miscellaneous useful string functions.
Internal definitions for JSGF grammar compiler.
#define E_INFO(...)
Print logging information to standard error stream.
Definition: err.h:114
hash_entry_t * ent
Current entry in that table.
Definition: hash_table.h:170
SPHINXBASE_EXPORT int32 hash_table_lookup(hash_table_t *h, const char *key, void **val)
Look up a key in a hash table and optionally return the associated value.
Definition: hash_table.c:302
#define jsgf_rule_iter_next(itor)
Advance an iterator to the next rule in the grammar.
Definition: jsgf.h:122
int32 final_state
Must be in the range [0..n_state-1].
Definition: fsg_model.h:110
SPHINXBASE_EXPORT jsgf_rule_t * jsgf_get_rule(jsgf_t *grammar, const char *name)
Get a rule by name from a grammar.
Definition: jsgf.c:469
#define ckd_calloc(n, sz)
Macros to simplify the use of above functions.
Definition: ckd_alloc.h:248
#define E_ERROR(...)
Print error message to error log.
Definition: err.h:104
SPHINXBASE_EXPORT jsgf_t * jsgf_parse_string(const char *string, jsgf_t *parent)
Parse a JSGF grammar from a string.
Definition: jsgf.c:937
Sphinx&#39;s memory allocation/deallocation routines.
glist_t links
Generated FSG links.
Definition: jsgf_internal.h:88
SPHINXBASE_EXPORT void hash_table_iter_free(hash_iter_t *itor)
Delete an unfinished iterator.
Definition: hash_table.c:682
File names related operation.
SPHINXBASE_EXPORT jsgf_t * jsgf_parse_file(const char *filename, jsgf_t *parent)
Parse a JSGF grammar from a file.
Definition: jsgf.c:896
SPHINXBASE_EXPORT int logmath_log(logmath_t *lmath, float64 p)
Convert linear floating point number to integer log in base B.
Definition: logmath.c:447
SPHINXBASE_EXPORT fsg_model_t * jsgf_build_fsg(jsgf_t *grammar, jsgf_rule_t *rule, logmath_t *lmath, float32 lw)
Build a Sphinx FSG object from a JSGF rule.
Definition: jsgf.c:585
A node in a generic list.
Definition: glist.h:100
SPHINXBASE_EXPORT hash_iter_t * hash_table_iter(hash_table_t *h)
Start iterating over key-value pairs in a hash table.
Definition: hash_table.c:646
SPHINXBASE_EXPORT int logmath_free(logmath_t *lmath)
Free a log table.
Definition: logmath.c:342
#define ckd_salloc(ptr)
Macro for ckd_salloc
Definition: ckd_alloc.h:264
#define hash_entry_val(e)
Access macros.
Definition: hash_table.h:175
SPHINXBASE_EXPORT hash_table_t * hash_table_new(int32 size, int32 casearg)
Allocate a new hash table for a given expected size.
Definition: hash_table.c:158
glist_t searchpath
List of directories to search for grammars.
Definition: jsgf_internal.h:84
SPHINXBASE_EXPORT char const * jsgf_rule_name(jsgf_rule_t *rule)
Get the rule name from a rule.
Definition: jsgf.c:513
SPHINXBASE_EXPORT void ckd_free(void *ptr)
Test and free a 1-D array.
Definition: ckd_alloc.c:244
SPHINXBASE_EXPORT glist_t glist_add_ptr(glist_t g, void *ptr)
Create and prepend a new list node, with the given user-defined data, at the HEAD of the given generi...
Definition: glist.c:74
char * name
Grammar name.
Definition: jsgf_internal.h:79
jsgf_rhs_t * alt
Linked list of alternates.
SPHINXBASE_EXPORT logmath_t * logmath_init(float64 base, int shift, int use_table)
Initialize a log math computation table.
Definition: logmath.c:62
SPHINXBASE_EXPORT jsgf_t * jsgf_grammar_new(jsgf_t *parent)
Create a new JSGF grammar.
Definition: jsgf.c:86
glist_t rulestack
Stack of currently expanded rules.
Definition: jsgf_internal.h:89
SPHINXBASE_EXPORT void hash_table_free(hash_table_t *h)
Free the specified hash table; the caller is responsible for freeing the key strings pointed to by th...
Definition: hash_table.c:688
SPHINXBASE_EXPORT fsg_model_t * jsgf_read_file(const char *file, logmath_t *lmath, float32 lw)
Read JSGF from file and return FSG object from it.
Definition: jsgf.c:599
int nstate
Number of generated states.
Definition: jsgf_internal.h:87
A note by ARCHAN at 20050510: Technically what we use is so-called &quot;hash table with buckets&quot; which is...
Definition: hash_table.h:149
char * charset
JSGF charset (default UTF-8)
Definition: jsgf_internal.h:77
SPHINXBASE_EXPORT fsg_model_t * jsgf_read_string(const char *string, logmath_t *lmath, float32 lw)
Read JSGF from string and return FSG object from it.
Definition: jsgf.c:630
SPHINXBASE_EXPORT glist_t glist_reverse(glist_t g)
Reverse the order of the given glist.
Definition: glist.c:169
jsgf_rhs_t * rhs
Expansion.
SPHINXBASE_EXPORT void glist_free(glist_t g)
Free the given generic list; user-defined data contained within is not automatically freed...
Definition: glist.c:133
SPHINXBASE_EXPORT char const * jsgf_grammar_name(jsgf_t *jsgf)
Get the grammar name from the file.
Definition: jsgf.c:216
int is_public
Is this rule marked &#39;public&#39;?
SPHINXBASE_EXPORT fsg_model_t * jsgf_build_fsg_raw(jsgf_t *grammar, jsgf_rule_t *rule, logmath_t *lmath, float32 lw)
Build a Sphinx FSG object from a JSGF rule.
Definition: jsgf.c:592
SPHINXBASE_EXPORT gnode_t * gnode_free(gnode_t *gn, gnode_t *pred)
Free the given node, gn, of a glist, pred being its predecessor in the list.
Definition: glist.c:257
#define gnode_ptr(g)
Head of a list of gnodes.
Definition: glist.h:109
char * name
Rule name (NULL for an alternation/grouping)
Implementation of logging routines.
SPHINXBASE_EXPORT int jsgf_rule_public(jsgf_rule_t *rule)
Test if a rule is public or not.
Definition: jsgf.c:519
int refcnt
Reference count.
Definition: jsgf_internal.h:99
SPHINXBASE_EXPORT void * hash_table_enter(hash_table_t *h, const char *key, void *val)
Try to add a new entry with given key and associated value to hash table h.
Definition: hash_table.c:501
int entry
The entry-state for this expansion.
Definition: jsgf_internal.h:95
#define E_WARN(...)
Print warning message to error log.
Definition: err.h:109
#define jsgf_rule_iter_rule(itor)
Get the current rule in a rule iterator.
Definition: jsgf.h:127
char * version
JSGF version (from header)
Definition: jsgf_internal.h:76
SPHINXBASE_EXPORT hash_iter_t * hash_table_iter_next(hash_iter_t *itor)
Get the next key-value pair in iteration.
Definition: hash_table.c:656
#define ckd_malloc(sz)
Macro for ckd_malloc
Definition: ckd_alloc.h:253
glist_t atoms
Sequence of items.
SPHINXBASE_EXPORT char * string_join(const char *base,...)
Concatenate a NULL-terminated argument list of strings, returning a newly allocated string...
Definition: strfuncs.c:70
jsgf_t * parent
Parent grammar (if this is an imported one)
Definition: jsgf_internal.h:83
char * name
Rule or token name.
JSGF grammar compiler.
#define jsgf_rule_iter_free(itor)
Free a rule iterator (if the end hasn&#39;t been reached).
Definition: jsgf.h:132
hash_table_t * rules
Defined or imported rules in this grammar.
Definition: jsgf_internal.h:81
jsgf_rule_t * rule
The rule being expanded.
Definition: jsgf_internal.h:94
Hash table implementation.
char * locale
JSGF locale (default C)
Definition: jsgf_internal.h:78
void * val
Key-length; the key string does not have to be a C-style NULL terminated string; it can have arbitrar...
Definition: hash_table.h:155
Word level FSG definition.
Definition: fsg_model.h:99
SPHINXBASE_EXPORT void path2dirname(const char *path, char *dir)
Strip off filename from the given path and copy the directory name into dir Caller must have allocate...
Definition: filename.c:68
SPHINXBASE_EXPORT void jsgf_grammar_free(jsgf_t *jsgf)
Free a JSGF grammar.
Definition: jsgf.c:108
float weight
Weight (default 1)
hash_table_t * imports
Pointers to imported grammars.
Definition: jsgf_internal.h:82
SPHINXBASE_EXPORT jsgf_rule_t * jsgf_get_public_rule(jsgf_t *grammar)
Returns the first public rule of the grammar.
Definition: jsgf.c:484
SPHINXBASE_EXPORT int jsgf_write_fsg(jsgf_t *grammar, jsgf_rule_t *rule, FILE *outfh)
Convert a JSGF rule to Sphinx FSG text form.
Definition: jsgf.c:663
SPHINXBASE_EXPORT jsgf_rule_iter_t * jsgf_rule_iter(jsgf_t *grammar)
Get an iterator over all rules in a grammar.
Definition: jsgf.c:463