PocketSphinx  5prealpha
dict.h
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
38 #ifndef _S3_DICT_H_
39 #define _S3_DICT_H_
40 
45 /* SphinxBase headers. */
46 #include <sphinxbase/hash_table.h>
47 
48 /* Local headers. */
49 #include "s3types.h"
50 #include "bin_mdef.h"
51 #include "pocketsphinx_export.h"
52 
53 #define S3DICT_INC_SZ 4096
54 
55 #ifdef __cplusplus
56 extern "C" {
57 #endif
58 
63 typedef struct {
64  char *word;
66  int32 pronlen;
67  s3wid_t alt;
68  s3wid_t basewid;
69 } dictword_t;
70 
76 typedef struct {
77  int refcnt;
80  hash_table_t *ht;
81  int32 max_words;
82  int32 n_word;
83  int32 filler_start;
84  int32 filler_end;
85  s3wid_t startwid;
86  s3wid_t finishwid;
87  s3wid_t silwid;
88  int nocase;
89 } dict_t;
90 
91 
103 dict_t *dict_init(cmd_ln_t *config,
104  bin_mdef_t *mdef
105  );
106 
110 int dict_write(dict_t *dict, char const *filename, char const *format);
111 
113 POCKETSPHINX_EXPORT
114 s3wid_t dict_wordid(dict_t *d, const char *word);
115 
120 int dict_filler_word(dict_t *d,
121  s3wid_t w
122  );
123 
127 POCKETSPHINX_EXPORT
128 int dict_real_word(dict_t *d,
129  s3wid_t w
130  );
131 
136 s3wid_t dict_add_word(dict_t *d,
137  char const *word,
138  s3cipid_t const *p,
139  int32 np
140  );
141 
145 const char *dict_ciphone_str(dict_t *d,
146  s3wid_t wid,
147  int32 pos
148  );
149 
151 #define dict_size(d) ((d)->n_word)
152 #define dict_num_fillers(d) (dict_filler_end(d) - dict_filler_start(d))
153 
158 #define dict_num_real_words(d) \
159  (dict_size(d) - (dict_filler_end(d) - dict_filler_start(d)) - 2)
160 #define dict_basewid(d,w) ((d)->word[w].basewid)
161 #define dict_wordstr(d,w) ((w) < 0 ? NULL : (d)->word[w].word)
162 #define dict_basestr(d,w) ((d)->word[dict_basewid(d,w)].word)
163 #define dict_nextalt(d,w) ((d)->word[w].alt)
164 #define dict_pronlen(d,w) ((d)->word[w].pronlen)
165 #define dict_pron(d,w,p) ((d)->word[w].ciphone[p])
166 #define dict_filler_start(d) ((d)->filler_start)
167 #define dict_filler_end(d) ((d)->filler_end)
168 #define dict_startwid(d) ((d)->startwid)
169 #define dict_finishwid(d) ((d)->finishwid)
170 #define dict_silwid(d) ((d)->silwid)
171 #define dict_is_single_phone(d,w) ((d)->word[w].pronlen == 1)
172 #define dict_first_phone(d,w) ((d)->word[w].ciphone[0])
173 #define dict_second_phone(d,w) ((d)->word[w].ciphone[1])
174 #define dict_second_last_phone(d,w) ((d)->word[w].ciphone[(d)->word[w].pronlen - 2])
175 #define dict_last_phone(d,w) ((d)->word[w].ciphone[(d)->word[w].pronlen - 1])
176 
177 /* Hard-coded special words */
178 #define S3_START_WORD "<s>"
179 #define S3_FINISH_WORD "</s>"
180 #define S3_SILENCE_WORD "<sil>"
181 #define S3_UNKNOWN_WORD "<UNK>"
182 
190 int32 dict_word2basestr(char *word);
191 
196 
200 int dict_free(dict_t *d);
201 
203 void dict_report(dict_t *d
204  );
205 
206 #ifdef __cplusplus
207 }
208 #endif
209 
210 #endif
dict_t * dict_init(cmd_ln_t *config, bin_mdef_t *mdef)
Initialize a new dictionary.
Definition: dict.c:252
POCKETSPHINX_EXPORT s3wid_t dict_wordid(dict_t *d, const char *word)
Return word id for given word string if present.
Definition: dict.c:399
char * word
Ascii word string.
Definition: dict.h:64
int dict_free(dict_t *d)
Release a pointer to a dictionary.
Definition: dict.c:468
Binary format model definition files, with support for heterogeneous topologies and variable-size N-p...
int32 n_word
#Occupied entries in dict; ie, excluding empty slots
Definition: dict.h:82
const char * dict_ciphone_str(dict_t *d, s3wid_t wid, int32 pos)
Return value: CI phone string for the given word, phone position.
Definition: dict.c:69
dict_t * dict_retain(dict_t *d)
Retain a pointer to an dict_t.
Definition: dict.c:461
int32 filler_end
Last filler word id (read from filler dict)
Definition: dict.h:84
int dict_write(dict_t *dict, char const *filename, char const *format)
Write dictionary to a file.
Definition: dict.c:221
s3wid_t startwid
FOR INTERNAL-USE ONLY.
Definition: dict.h:85
s3wid_t silwid
FOR INTERNAL-USE ONLY.
Definition: dict.h:87
s3wid_t alt
Next alternative pronunciation id, NOT_S3WID if none.
Definition: dict.h:67
int16 s3cipid_t
Size definitions for more semantially meaningful units.
Definition: s3types.h:63
int dict_filler_word(dict_t *d, s3wid_t w)
Return 1 if w is a filler word, 0 if not.
Definition: dict.c:413
Size definition of semantically units.
a structure for one dictionary word.
Definition: dict.h:63
s3wid_t basewid
Base pronunciation id.
Definition: dict.h:68
s3wid_t finishwid
FOR INTERNAL-USE ONLY.
Definition: dict.h:86
a structure for a dictionary.
Definition: dict.h:76
POCKETSPHINX_EXPORT int dict_real_word(dict_t *d, s3wid_t w)
Test if w is a &quot;real&quot; word, i.e.
Definition: dict.c:427
int32 filler_start
First filler word id (read from filler dict)
Definition: dict.h:83
int32 max_words
#Entries allocated in dict, including empty slots
Definition: dict.h:81
s3cipid_t * ciphone
Pronunciation.
Definition: dict.h:65
dictword_t * word
Array of entries in dictionary.
Definition: dict.h:79
s3wid_t dict_add_word(dict_t *d, char const *word, s3cipid_t const *p, int32 np)
Add a word with the given ciphone pronunciation list to the dictionary.
Definition: dict.c:80
int32 pronlen
Pronunciation length.
Definition: dict.h:66
bin_mdef_t * mdef
Model definition used for phone IDs; NULL if none used.
Definition: dict.h:78
hash_table_t * ht
Hash table for mapping word strings to word ids.
Definition: dict.h:80
void dict_report(dict_t *d)
Report a dictionary structure.
Definition: dict.c:499
int32 dict_word2basestr(char *word)
If the given word contains a trailing &quot;(....)&quot; (i.e., a Sphinx-II style alternative pronunciation spe...
Definition: dict.c:442