42 #include <sphinxbase/pio.h>
43 #include <sphinxbase/strfuncs.h>
50 #define DEFAULT_NUM_PHONE (MAX_S3CIPID+1)
53 #define snprintf sprintf_s
56 extern const char *
const cmu6_lts_phone_table[];
59 dict_ciphone_id(
dict_t * d,
const char *str)
72 assert((wid >= 0) && (wid < d->n_word));
73 assert((pos >= 0) && (pos < d->word[wid].pronlen));
88 E_INFO(
"Reallocating to %d KiB for word entries\n",
98 wordp->
word = (
char *) ckd_salloc(word);
101 wword = ckd_salloc(word);
106 if (hash_table_lookup_int32(d->
ht, wword, &w) < 0) {
107 E_ERROR(
"Missing base word for: %s\n", word);
109 ckd_free(wordp->
word);
126 ckd_free(wordp->
word);
149 dict_read(FILE * fp,
dict_t * d)
157 size_t stralloc, phnalloc;
160 p = (
s3cipid_t *) ckd_calloc(maxwd + 4,
sizeof(*p));
161 wptr = (
char **) ckd_calloc(maxwd,
sizeof(
char *));
164 stralloc = phnalloc = 0;
165 for (li = lineiter_start(fp); li; li = lineiter_next(li)) {
167 if (0 == strncmp(li->buf,
"##", 2)
168 || 0 == strncmp(li->buf,
";;", 2))
171 if ((nwd = str2words(li->buf, wptr, maxwd)) < 0) {
173 nwd = str2words(li->buf, NULL, 0);
176 p = (
s3cipid_t *) ckd_realloc(p, (maxwd + 4) *
sizeof(*p));
177 wptr = (
char **) ckd_realloc(wptr, maxwd *
sizeof(*wptr));
184 E_ERROR(
"Line %d: No pronunciation for word '%s'; ignored\n",
191 for (i = 1; i < nwd; i++) {
192 p[i - 1] = dict_ciphone_id(d, wptr[i]);
193 if (NOT_S3CIPID(p[i - 1])) {
194 E_ERROR(
"Line %d: Phone '%s' is mising in the acoustic model; word '%s' ignored\n",
195 lineno, wptr[i], wptr[0]);
204 (
"Line %d: Failed to add the word '%s' (duplicate?); ignored\n",
207 stralloc += strlen(d->
word[w].
word);
212 E_INFO(
"Dictionary size %d, allocated %d KiB for strings, %d KiB for phones\n",
213 dict_size(d), (
int)stralloc / 1024, (
int)phnalloc / 1024);
226 if ((fh = fopen(filename,
"w")) == NULL) {
227 E_ERROR_SYSTEM(
"Failed to open '%s'", filename);
230 for (i = 0; i < dict->
n_word; ++i) {
235 for (phlen = j = 0; j < dict_pronlen(dict, i); ++j)
237 phones = ckd_calloc(1, phlen);
238 for (j = 0; j < dict_pronlen(dict, i); ++j) {
240 if (j != dict_pronlen(dict, i) - 1)
243 fprintf(fh,
"%-30s %s\n", dict_wordstr(dict, i), phones);
259 char const *dictfile = NULL, *fillerfile = NULL;
262 dictfile = cmd_ln_str_r(config,
"-dict");
263 fillerfile = cmd_ln_str_r(config,
"_fdict");
274 if ((fp = fopen(dictfile,
"r")) == NULL) {
275 E_ERROR_SYSTEM(
"Failed to open dictionary file '%s' for reading", dictfile);
278 for (li = lineiter_start(fp); li; li = lineiter_next(li)) {
279 if (0 != strncmp(li->buf,
"##", 2)
280 && 0 != strncmp(li->buf,
";;", 2))
283 fseek(fp, 0L, SEEK_SET);
288 if ((fp2 = fopen(fillerfile,
"r")) == NULL) {
289 E_ERROR_SYSTEM(
"Failed to open filler dictionary file '%s' for reading", fillerfile);
293 for (li = lineiter_start(fp2); li; li = lineiter_next(li)) {
294 if (0 != strncmp(li->buf,
"##", 2)
295 && 0 != strncmp(li->buf,
";;", 2))
298 fseek(fp2, 0L, SEEK_SET);
308 (n + S3DICT_INC_SZ < MAX_S3WID) ? n + S3DICT_INC_SZ : MAX_S3WID;
309 if (n >= MAX_S3WID) {
310 E_ERROR(
"Number of words in dictionaries (%d) exceeds limit (%d)\n", n,
318 E_INFO(
"Allocating %d * %d bytes (%d KiB) for word entries\n",
327 if (config && cmd_ln_exists_r(config,
"-dictcase"))
328 d->nocase = cmd_ln_boolean_r(config,
"-dictcase");
333 E_INFO(
"Reading main dictionary: %s\n", dictfile);
336 E_INFO(
"%d words read\n", d->
n_word);
340 E_ERROR(
"Remove sentence start word '<s>' from the dictionary\n");
345 E_ERROR(
"Remove sentence start word '</s>' from the dictionary\n");
350 E_ERROR(
"Remove silence word '<sil>' from the dictionary\n");
358 E_INFO(
"Reading filler dictionary: %s\n", fillerfile);
364 sil = bin_mdef_silphone(mdef);
386 E_ERROR(
"Word '%s' must occur (only) in filler dictionary\n",
406 if (hash_table_lookup_int32(d->
ht, word, &w) < 0)
416 assert((w >= 0) && (w < d->n_word));
418 w = dict_basewid(d, w);
430 assert((w >= 0) && (w < d->n_word));
432 w = dict_basewid(d, w);
447 if (word[len - 1] ==
')') {
448 for (i = len - 2; (i > 0) && (word[i] !=
'('); --i);
479 for (i = 0; i < d->
n_word; i++) {
482 ckd_free((
void *) word->
word);
484 ckd_free((
void *) word->
ciphone);
488 ckd_free((
void *) d->
word);
490 hash_table_free(d->
ht);
493 ckd_free((
void *) d);
501 E_INFO_NOFN(
"Initialization of dict_t, report:\n");
502 E_INFO_NOFN(
"Max word: %d\n", d->
max_words);
503 E_INFO_NOFN(
"No of word: %d\n", d->
n_word);
dict_t * dict_init(cmd_ln_t *config, bin_mdef_t *mdef)
Initialize a new dictionary.
POCKETSPHINX_EXPORT s3wid_t dict_wordid(dict_t *d, const char *word)
Return word id for given word string if present.
char * word
Ascii word string.
const char * bin_mdef_ciphone_str(bin_mdef_t *m, int32 ci)
In: ciphone id for which name wanted.
int dict_free(dict_t *d)
Release a pointer to a dictionary.
int bin_mdef_ciphone_id(bin_mdef_t *m, const char *ciphone)
Context-independent phone lookup.
Operations on dictionary.
#define BAD_S3WID
Dictionary word id.
int32 n_word
#Occupied entries in dict; ie, excluding empty slots
const char * dict_ciphone_str(dict_t *d, s3wid_t wid, int32 pos)
Return value: CI phone string for the given word, phone position.
dict_t * dict_retain(dict_t *d)
Retain a pointer to an dict_t.
int32 filler_end
Last filler word id (read from filler dict)
int dict_write(dict_t *dict, char const *filename, char const *format)
Write dictionary to a file.
s3wid_t startwid
FOR INTERNAL-USE ONLY.
s3wid_t silwid
FOR INTERNAL-USE ONLY.
s3wid_t alt
Next alternative pronunciation id, NOT_S3WID if none.
int16 s3cipid_t
Size definitions for more semantially meaningful units.
int dict_filler_word(dict_t *d, s3wid_t w)
Return 1 if w is a filler word, 0 if not.
a structure for one dictionary word.
s3wid_t basewid
Base pronunciation id.
s3wid_t finishwid
FOR INTERNAL-USE ONLY.
a structure for a dictionary.
POCKETSPHINX_EXPORT int dict_real_word(dict_t *d, s3wid_t w)
Test if w is a "real" word, i.e.
int32 filler_start
First filler word id (read from filler dict)
int bin_mdef_ciphone_id_nocase(bin_mdef_t *m, const char *ciphone)
Case-insensitive context-independent phone lookup.
int bin_mdef_free(bin_mdef_t *m)
Release a pointer to a binary mdef.
int32 max_words
#Entries allocated in dict, including empty slots
s3cipid_t * ciphone
Pronunciation.
bin_mdef_t * bin_mdef_retain(bin_mdef_t *m)
Retain a pointer to a bin_mdef_t.
dictword_t * word
Array of entries in dictionary.
s3wid_t dict_add_word(dict_t *d, char const *word, s3cipid_t const *p, int32 np)
Add a word with the given ciphone pronunciation list to the dictionary.
int32 pronlen
Pronunciation length.
bin_mdef_t * mdef
Model definition used for phone IDs; NULL if none used.
#define dict_size(d)
Packaged macro access to dictionary members.
hash_table_t * ht
Hash table for mapping word strings to word ids.
void dict_report(dict_t *d)
Report a dictionary structure.
int32 dict_word2basestr(char *word)
If the given word contains a trailing "(....)" (i.e., a Sphinx-II style alternative pronunciation spe...