49 #include "jsgf_parser.h"
50 #include "jsgf_scanner.h"
52 extern int yyparse(
void *scanner,
jsgf_t * jsgf);
62 int rule_entry,
int rule_exit);
65 jsgf_atom_new(
char *name,
float weight)
111 if (jsgf->
parent == NULL) {
127 for (gn = jsgf->
searchpath; gn; gn = gnode_next(gn))
130 for (gn = jsgf->
links; gn; gn = gnode_next(gn))
149 jsgf_rhs_free(rhs->
alt);
150 for (gn = rhs->
atoms; gn; gn = gnode_next(gn))
170 rule = jsgf_define_rule(jsgf, NULL, rhs, 0);
171 rule_atom = jsgf_atom_new(rule->
name, 1.0);
177 return jsgf_atom_new(rule->
name, 1.0);
187 return jsgf_define_rule(jsgf, NULL, rhs, 0);
203 extract_grammar_name(
char *rule_name)
206 char *grammar_name =
ckd_salloc(rule_name + 1);
207 if ((dot_pos = strrchr(grammar_name + 1,
'.')) == NULL) {
222 jsgf_fullname(
jsgf_t * jsgf,
const char *name)
227 if (strchr(name + 1,
'.'))
232 sprintf(fullname,
"<%s.%s", jsgf->
name, name + 1);
237 jsgf_fullname_from_rule(
jsgf_rule_t * rule,
const char *name)
239 char *fullname, *grammar_name;
242 if (strchr(name + 1,
'.'))
246 if ((grammar_name = extract_grammar_name(rule->
name)) == NULL)
248 fullname =
ckd_malloc(strlen(grammar_name) + strlen(name) + 4);
249 sprintf(fullname,
"<%s.%s", grammar_name, name + 1);
258 importname2rulename(
char *importname)
262 char *secondlast_dotpos;
264 if ((last_dotpos = strrchr(rulename + 1,
'.')) != NULL) {
266 if ((secondlast_dotpos = strrchr(rulename + 1,
'.')) != NULL) {
268 *secondlast_dotpos =
'<';
269 secondlast_dotpos =
ckd_salloc(secondlast_dotpos);
271 return secondlast_dotpos;
284 #define RECURSIVE_NODE -2
296 int rule_entry,
int rule_exit)
302 lastnode = rule_entry;
305 for (gn = rhs->
atoms; gn; gn = gnode_next(gn)) {
308 if (jsgf_atom_is_rule(atom)) {
318 if (0 == strcmp(atom->
name,
"<NULL>")) {
319 if (gn == rhs->
atoms && gnode_next(gn) == NULL) {
320 if (rule_exit == NO_NODE) {
321 jsgf_add_link(grammar, atom,
322 lastnode, grammar->
nstate);
323 rule_exit = lastnode = grammar->
nstate;
327 jsgf_add_link(grammar, atom, lastnode, rule_exit);
332 else if (0 == strcmp(atom->
name,
"<VOID>")) {
337 fullname = jsgf_fullname_from_rule(rule, atom->
name);
339 (grammar->
rules, fullname, (
void **) &subrule) == -1) {
340 E_ERROR(
"Undefined rule in RHS: %s\n", fullname);
347 for (subnode = grammar->
rulestack; subnode;
348 subnode = gnode_next(subnode)) {
351 if (rule_stack_entry->
rule == subrule)
355 if (subnode != NULL) {
357 if (gnode_next(gn) != NULL) {
359 (
"Only right-recursion is permitted (in %s.%s)\n",
364 E_INFO(
"Right recursion %s %d => %d\n", atom->
name,
365 lastnode, rule_stack_entry->
entry);
366 jsgf_add_link(grammar, atom, lastnode,
367 rule_stack_entry->
entry);
371 lastnode = RECURSIVE_NODE;
377 int subruleexit = NO_NODE;
378 if (gnode_next(gn) == NULL && rule_exit >= 0)
379 subruleexit = rule_exit;
383 expand_rule(grammar, subrule, lastnode, subruleexit);
385 if (lastnode == NO_NODE)
396 if (gnode_next(gn) == NULL && rule_exit >= 0) {
397 exitstate = rule_exit;
400 exitstate = grammar->
nstate;
405 jsgf_add_link(grammar, atom, lastnode, exitstate);
406 lastnode = exitstate;
423 rule_stack_entry->
rule = rule;
424 rule_stack_entry->
entry = rule_entry;
428 for (rhs = rule->
rhs; rhs; rhs = rhs->
alt) {
431 lastnode = expand_rhs(grammar, rule, rhs, rule_entry, rule_exit);
433 if (lastnode == NO_NODE) {
436 else if (lastnode == RECURSIVE_NODE) {
441 else if (rule_exit == NO_NODE) {
445 assert(lastnode >= 0);
446 rule_exit = lastnode;
451 if (rule_exit == NO_NODE) {
452 rule_exit = rule_entry;
495 if ((dot_pos = strrchr(rule_name + 1,
'.')) == NULL) {
502 dot_pos - rule_name - 1)) {
526 logmath_t * lmath, float32 lw,
int do_closure)
531 int rule_entry, rule_exit;
534 for (gn = grammar->
links; gn; gn = gnode_next(gn)) {
538 grammar->
links = NULL;
543 rule_entry = grammar->
nstate++;
544 rule_exit = expand_rule(grammar, rule, rule_entry, NO_NODE);
547 if (rule_exit == NO_NODE) {
548 rule_exit = grammar->
nstate++;
549 jsgf_add_link(grammar, NULL, rule_entry, rule_exit);
552 fsg = fsg_model_init(rule->
name, lmath, lw, grammar->
nstate);
556 for (gn = grammar->
links; gn; gn = gnode_next(gn)) {
560 if (jsgf_atom_is_rule(link->
atom)) {
561 fsg_model_null_trans_add(fsg, link->
from, link->
to,
566 int wid = fsg_model_word_add(fsg, link->
atom->
name);
567 fsg_model_trans_add(fsg, link->
from, link->
to,
573 fsg_model_null_trans_add(fsg, link->
from, link->
to, 0);
577 nulls = fsg_model_null_trans_closure(fsg, NULL);
588 return jsgf_build_fsg_internal(grammar, rule, lmath, lw, TRUE);
595 return jsgf_build_fsg_internal(grammar, rule, lmath, lw, FALSE);
607 E_ERROR(
"Error parsing file: %s\n", file);
621 E_ERROR(
"No public rules found in %s\n", file);
638 E_ERROR(
"Error parsing input string\n");
653 E_ERROR(
"No public rules found in input string\n");
671 fsg_model_write(fsg, outfh);
689 sprintf(name,
"<%s.g%05d>", jsgf->
name,
690 hash_table_inuse(jsgf->
rules));
695 newname = jsgf_fullname(jsgf, name);
705 E_INFO(
"Defined rule: %s%s\n",
708 if (val != (
void *) rule) {
709 E_WARN(
"Multiply defined symbol: %s\n", name);
728 jsgf_rhs_free(rule->
rhs);
737 path_list_search(
glist_t paths,
char *path)
741 for (gn = paths; gn; gn = gnode_next(gn)) {
746 tmp = fopen(fullpath,
"r");
759 jsgf_import_rule(
jsgf_t * jsgf,
char *name)
761 char *c, *path, *newpath;
762 size_t namelen, packlen;
768 namelen = strlen(name);
770 strcpy(path, name + 1);
772 c = strrchr(path,
'.');
774 E_ERROR(
"Imported rule is not qualified: %s\n", name);
782 import_all = (strlen(name) > 2
783 && 0 == strcmp(name + namelen - 3,
".*>"));
786 for (c = path; *c; ++c)
789 strcat(path,
".gram");
790 newpath = path_list_search(jsgf->
searchpath, path);
791 if (newpath == NULL) {
792 E_ERROR(
"Failed to find grammar %s\n", path);
799 E_INFO(
"Importing %s from %s to %s\n", name, path, jsgf->
name);
805 E_INFO(
"Already imported %s\n", path);
813 if (val != (
void *) imp) {
814 E_WARN(
"Multiply imported file: %s\n", path);
825 char *rule_name = importname2rulename(name);
830 !strncmp(rule_name, rule->
name, packlen + 1);
834 rule_matches = !strcmp(rule_name, rule->
name);
842 c = strrchr(rule->
name,
'.');
844 newname = jsgf_fullname(jsgf, c);
846 E_INFO(
"Imported %s\n", newname);
848 jsgf_rule_retain(rule));
849 if (val != (
void *) rule) {
850 E_WARN(
"Multiply defined symbol: %s\n", newname);
864 jsgf_set_search_path(
jsgf_t * jsgf,
const char *filename)
868 #if !defined(_WIN32_WCE)
869 if ((jsgf_path = getenv(
"JSGF_PATH")) != NULL) {
873 while ((c = strchr(word,
':'))) {
903 yylex_init(&yyscanner);
904 if (filename == NULL) {
905 yyset_in(stdin, yyscanner);
908 in = fopen(filename,
"r");
913 yyset_in(in, yyscanner);
919 jsgf_set_search_path(jsgf, filename);
921 yyrv = yyparse(yyscanner, jsgf);
923 E_ERROR(
"Failed to parse JSGF grammar from '%s'\n",
924 filename ? filename :
"(stdin)");
926 yylex_destroy(yyscanner);
931 yylex_destroy(yyscanner);
944 yylex_init(&yyscanner);
945 buf = yy_scan_string(
string, yyscanner);
949 jsgf_set_search_path(jsgf, NULL);
951 yyrv = yyparse(yyscanner, jsgf);
953 E_ERROR(
"Failed to parse JSGF grammar from input string\n");
955 yy_delete_buffer(buf, yyscanner);
956 yylex_destroy(yyscanner);
959 yy_delete_buffer(buf, yyscanner);
960 yylex_destroy(yyscanner);
#define E_ERROR_SYSTEM(...)
Print error text; Call perror("");.
int32 start_state
Must be in the range [0..n_state-1].
Miscellaneous useful string functions.
Internal definitions for JSGF grammar compiler.
#define E_INFO(...)
Print logging information to standard error stream.
hash_entry_t * ent
Current entry in that table.
SPHINXBASE_EXPORT int32 hash_table_lookup(hash_table_t *h, const char *key, void **val)
Look up a key in a hash table and optionally return the associated value.
#define jsgf_rule_iter_next(itor)
Advance an iterator to the next rule in the grammar.
int32 final_state
Must be in the range [0..n_state-1].
SPHINXBASE_EXPORT jsgf_rule_t * jsgf_get_rule(jsgf_t *grammar, const char *name)
Get a rule by name from a grammar.
#define ckd_calloc(n, sz)
Macros to simplify the use of above functions.
#define E_ERROR(...)
Print error message to error log.
SPHINXBASE_EXPORT jsgf_t * jsgf_parse_string(const char *string, jsgf_t *parent)
Parse a JSGF grammar from a string.
Sphinx's memory allocation/deallocation routines.
glist_t links
Generated FSG links.
SPHINXBASE_EXPORT void hash_table_iter_free(hash_iter_t *itor)
Delete an unfinished iterator.
File names related operation.
SPHINXBASE_EXPORT jsgf_t * jsgf_parse_file(const char *filename, jsgf_t *parent)
Parse a JSGF grammar from a file.
SPHINXBASE_EXPORT int logmath_log(logmath_t *lmath, float64 p)
Convert linear floating point number to integer log in base B.
SPHINXBASE_EXPORT fsg_model_t * jsgf_build_fsg(jsgf_t *grammar, jsgf_rule_t *rule, logmath_t *lmath, float32 lw)
Build a Sphinx FSG object from a JSGF rule.
A node in a generic list.
SPHINXBASE_EXPORT hash_iter_t * hash_table_iter(hash_table_t *h)
Start iterating over key-value pairs in a hash table.
SPHINXBASE_EXPORT int logmath_free(logmath_t *lmath)
Free a log table.
#define ckd_salloc(ptr)
Macro for ckd_salloc
#define hash_entry_val(e)
Access macros.
SPHINXBASE_EXPORT hash_table_t * hash_table_new(int32 size, int32 casearg)
Allocate a new hash table for a given expected size.
glist_t searchpath
List of directories to search for grammars.
SPHINXBASE_EXPORT char const * jsgf_rule_name(jsgf_rule_t *rule)
Get the rule name from a rule.
SPHINXBASE_EXPORT void ckd_free(void *ptr)
Test and free a 1-D array.
SPHINXBASE_EXPORT glist_t glist_add_ptr(glist_t g, void *ptr)
Create and prepend a new list node, with the given user-defined data, at the HEAD of the given generi...
jsgf_rhs_t * alt
Linked list of alternates.
SPHINXBASE_EXPORT logmath_t * logmath_init(float64 base, int shift, int use_table)
Initialize a log math computation table.
SPHINXBASE_EXPORT jsgf_t * jsgf_grammar_new(jsgf_t *parent)
Create a new JSGF grammar.
glist_t rulestack
Stack of currently expanded rules.
SPHINXBASE_EXPORT void hash_table_free(hash_table_t *h)
Free the specified hash table; the caller is responsible for freeing the key strings pointed to by th...
SPHINXBASE_EXPORT fsg_model_t * jsgf_read_file(const char *file, logmath_t *lmath, float32 lw)
Read JSGF from file and return FSG object from it.
int nstate
Number of generated states.
A note by ARCHAN at 20050510: Technically what we use is so-called "hash table with buckets" which is...
char * charset
JSGF charset (default UTF-8)
SPHINXBASE_EXPORT fsg_model_t * jsgf_read_string(const char *string, logmath_t *lmath, float32 lw)
Read JSGF from string and return FSG object from it.
SPHINXBASE_EXPORT glist_t glist_reverse(glist_t g)
Reverse the order of the given glist.
jsgf_rhs_t * rhs
Expansion.
SPHINXBASE_EXPORT void glist_free(glist_t g)
Free the given generic list; user-defined data contained within is not automatically freed...
SPHINXBASE_EXPORT char const * jsgf_grammar_name(jsgf_t *jsgf)
Get the grammar name from the file.
int is_public
Is this rule marked 'public'?
SPHINXBASE_EXPORT fsg_model_t * jsgf_build_fsg_raw(jsgf_t *grammar, jsgf_rule_t *rule, logmath_t *lmath, float32 lw)
Build a Sphinx FSG object from a JSGF rule.
SPHINXBASE_EXPORT gnode_t * gnode_free(gnode_t *gn, gnode_t *pred)
Free the given node, gn, of a glist, pred being its predecessor in the list.
#define gnode_ptr(g)
Head of a list of gnodes.
char * name
Rule name (NULL for an alternation/grouping)
Implementation of logging routines.
SPHINXBASE_EXPORT int jsgf_rule_public(jsgf_rule_t *rule)
Test if a rule is public or not.
int refcnt
Reference count.
SPHINXBASE_EXPORT void * hash_table_enter(hash_table_t *h, const char *key, void *val)
Try to add a new entry with given key and associated value to hash table h.
int entry
The entry-state for this expansion.
#define E_WARN(...)
Print warning message to error log.
jsgf_atom_t * atom
Name, tags, weight.
#define jsgf_rule_iter_rule(itor)
Get the current rule in a rule iterator.
char * version
JSGF version (from header)
SPHINXBASE_EXPORT hash_iter_t * hash_table_iter_next(hash_iter_t *itor)
Get the next key-value pair in iteration.
#define ckd_malloc(sz)
Macro for ckd_malloc
glist_t atoms
Sequence of items.
SPHINXBASE_EXPORT char * string_join(const char *base,...)
Concatenate a NULL-terminated argument list of strings, returning a newly allocated string...
jsgf_t * parent
Parent grammar (if this is an imported one)
char * name
Rule or token name.
#define jsgf_rule_iter_free(itor)
Free a rule iterator (if the end hasn't been reached).
hash_table_t * rules
Defined or imported rules in this grammar.
jsgf_rule_t * rule
The rule being expanded.
Hash table implementation.
char * locale
JSGF locale (default C)
void * val
Key-length; the key string does not have to be a C-style NULL terminated string; it can have arbitrar...
Word level FSG definition.
SPHINXBASE_EXPORT void path2dirname(const char *path, char *dir)
Strip off filename from the given path and copy the directory name into dir Caller must have allocate...
SPHINXBASE_EXPORT void jsgf_grammar_free(jsgf_t *jsgf)
Free a JSGF grammar.
float weight
Weight (default 1)
hash_table_t * imports
Pointers to imported grammars.
SPHINXBASE_EXPORT jsgf_rule_t * jsgf_get_public_rule(jsgf_t *grammar)
Returns the first public rule of the grammar.
SPHINXBASE_EXPORT int jsgf_write_fsg(jsgf_t *grammar, jsgf_rule_t *rule, FILE *outfh)
Convert a JSGF rule to Sphinx FSG text form.
SPHINXBASE_EXPORT jsgf_rule_iter_t * jsgf_rule_iter(jsgf_t *grammar)
Get an iterator over all rules in a grammar.