PocketSphinx  5prealpha
cmdln_macro.h
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 2006 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
38 /* cmdln_macro.h - Command line definitions for PocketSphinx */
39 
40 #ifndef __PS_CMDLN_MACRO_H__
41 #define __PS_CMDLN_MACRO_H__
42 
43 #include <sphinxbase/cmd_ln.h>
44 #include <sphinxbase/feat.h>
45 #include <sphinxbase/fe.h>
46 
48 #define POCKETSPHINX_OPTIONS \
49  waveform_to_cepstral_command_line_macro(), \
50  cepstral_to_feature_command_line_macro(), \
51  POCKETSPHINX_ACMOD_OPTIONS, \
52  POCKETSPHINX_BEAM_OPTIONS, \
53  POCKETSPHINX_SEARCH_OPTIONS, \
54  POCKETSPHINX_DICT_OPTIONS, \
55  POCKETSPHINX_NGRAM_OPTIONS, \
56  POCKETSPHINX_FSG_OPTIONS, \
57  POCKETSPHINX_KWS_OPTIONS, \
58  POCKETSPHINX_DEBUG_OPTIONS
59 
61 #define POCKETSPHINX_DEBUG_OPTIONS \
62  { "-logfn", \
63  ARG_STRING, \
64  NULL, \
65  "File to write log messages in" }, \
66  { "-debug", \
67  ARG_INT32, \
68  NULL, \
69  "Verbosity level for debugging messages" }, \
70  { "-mfclogdir", \
71  ARG_STRING, \
72  NULL, \
73  "Directory to log feature files to" \
74  }, \
75  { "-rawlogdir", \
76  ARG_STRING, \
77  NULL, \
78  "Directory to log raw audio files to" }, \
79  { "-senlogdir", \
80  ARG_STRING, \
81  NULL, \
82  "Directory to log senone score files to" \
83  }
84 
86 #define POCKETSPHINX_BEAM_OPTIONS \
87 { "-beam", \
88  ARG_FLOAT64, \
89  "1e-48", \
90  "Beam width applied to every frame in Viterbi search (smaller values mean wider beam)" }, \
91 { "-wbeam", \
92  ARG_FLOAT64, \
93  "7e-29", \
94  "Beam width applied to word exits" }, \
95 { "-pbeam", \
96  ARG_FLOAT64, \
97  "1e-48", \
98  "Beam width applied to phone transitions" }, \
99 { "-lpbeam", \
100  ARG_FLOAT64, \
101  "1e-40", \
102  "Beam width applied to last phone in words" }, \
103 { "-lponlybeam", \
104  ARG_FLOAT64, \
105  "7e-29", \
106  "Beam width applied to last phone in single-phone words" }, \
107 { "-fwdflatbeam", \
108  ARG_FLOAT64, \
109  "1e-64", \
110  "Beam width applied to every frame in second-pass flat search" }, \
111 { "-fwdflatwbeam", \
112  ARG_FLOAT64, \
113  "7e-29", \
114  "Beam width applied to word exits in second-pass flat search" }, \
115 { "-pl_window", \
116  ARG_INT32, \
117  "5", \
118  "Phoneme lookahead window size, in frames" }, \
119 { "-pl_beam", \
120  ARG_FLOAT64, \
121  "1e-10", \
122  "Beam width applied to phone loop search for lookahead" }, \
123 { "-pl_pbeam", \
124  ARG_FLOAT64, \
125  "1e-10", \
126  "Beam width applied to phone loop transitions for lookahead" }, \
127 { "-pl_pip", \
128  ARG_FLOAT32, \
129  "1.0", \
130  "Phone insertion penalty for phone loop" }, \
131 { "-pl_weight", \
132  ARG_FLOAT64, \
133  "3.0", \
134  "Weight for phoneme lookahead penalties" } \
135 
136 
137 #define POCKETSPHINX_SEARCH_OPTIONS \
138 { "-compallsen", \
139  ARG_BOOLEAN, \
140  "no", \
141  "Compute all senone scores in every frame (can be faster when there are many senones)" }, \
142 { "-fwdtree", \
143  ARG_BOOLEAN, \
144  "yes", \
145  "Run forward lexicon-tree search (1st pass)" }, \
146 { "-fwdflat", \
147  ARG_BOOLEAN, \
148  "yes", \
149  "Run forward flat-lexicon search over word lattice (2nd pass)" }, \
150 { "-bestpath", \
151  ARG_BOOLEAN, \
152  "yes", \
153  "Run bestpath (Dijkstra) search over word lattice (3rd pass)" }, \
154 { "-backtrace", \
155  ARG_BOOLEAN, \
156  "no", \
157  "Print results and backtraces to log." }, \
158 { "-latsize", \
159  ARG_INT32, \
160  "5000", \
161  "Initial backpointer table size" }, \
162 { "-maxwpf", \
163  ARG_INT32, \
164  "-1", \
165  "Maximum number of distinct word exits at each frame (or -1 for no pruning)" }, \
166 { "-maxhmmpf", \
167  ARG_INT32, \
168  "30000", \
169  "Maximum number of active HMMs to maintain at each frame (or -1 for no pruning)" }, \
170 { "-min_endfr", \
171  ARG_INT32, \
172  "0", \
173  "Nodes ignored in lattice construction if they persist for fewer than N frames" }, \
174 { "-fwdflatefwid", \
175  ARG_INT32, \
176  "4", \
177  "Minimum number of end frames for a word to be searched in fwdflat search" }, \
178 { "-fwdflatsfwin", \
179  ARG_INT32, \
180  "25", \
181  "Window of frames in lattice to search for successor words in fwdflat search " }
182 
184 #define POCKETSPHINX_KWS_OPTIONS \
185 { "-keyphrase", \
186  ARG_STRING, \
187  NULL, \
188  "Keyphrase to spot"}, \
189 { "-kws", \
190  ARG_STRING, \
191  NULL, \
192  "A file with keyphrases to spot, one per line"}, \
193 { "-kws_plp", \
194  ARG_FLOAT64, \
195  "1e-1", \
196  "Phone loop probability for keyphrase spotting" }, \
197 { "-kws_delay", \
198  ARG_INT32, \
199  "10", \
200  "Delay to wait for best detection score" }, \
201 { "-kws_threshold", \
202  ARG_FLOAT64, \
203  "1", \
204  "Threshold for p(hyp)/p(alternatives) ratio" }
205 
207 #define POCKETSPHINX_FSG_OPTIONS \
208  { "-fsg", \
209  ARG_STRING, \
210  NULL, \
211  "Sphinx format finite state grammar file"}, \
212 { "-jsgf", \
213  ARG_STRING, \
214  NULL, \
215  "JSGF grammar file" }, \
216 { "-toprule", \
217  ARG_STRING, \
218  NULL, \
219  "Start rule for JSGF (first public rule is default)" }, \
220 { "-fsgusealtpron", \
221  ARG_BOOLEAN, \
222  "yes", \
223  "Add alternate pronunciations to FSG"}, \
224 { "-fsgusefiller", \
225  ARG_BOOLEAN, \
226  "yes", \
227  "Insert filler words at each state."}
228 
230 #define POCKETSPHINX_NGRAM_OPTIONS \
231 { "-allphone", \
232  ARG_STRING, \
233  NULL, \
234  "Perform phoneme decoding with phonetic lm" }, \
235 { "-allphone_ci", \
236  ARG_BOOLEAN, \
237  "no", \
238  "Perform phoneme decoding with phonetic lm and context-independent units only" }, \
239 { "-lm", \
240  ARG_STRING, \
241  NULL, \
242  "Word trigram language model input file" }, \
243 { "-lmctl", \
244  ARG_STRING, \
245  NULL, \
246  "Specify a set of language model"}, \
247 { "-lmname", \
248  ARG_STRING, \
249  NULL, \
250  "Which language model in -lmctl to use by default"}, \
251 { "-lw", \
252  ARG_FLOAT32, \
253  "6.5", \
254  "Language model probability weight" }, \
255 { "-fwdflatlw", \
256  ARG_FLOAT32, \
257  "8.5", \
258  "Language model probability weight for flat lexicon (2nd pass) decoding" }, \
259 { "-bestpathlw", \
260  ARG_FLOAT32, \
261  "9.5", \
262  "Language model probability weight for bestpath search" }, \
263 { "-ascale", \
264  ARG_FLOAT32, \
265  "20.0", \
266  "Inverse of acoustic model scale for confidence score calculation" }, \
267 { "-wip", \
268  ARG_FLOAT32, \
269  "0.65", \
270  "Word insertion penalty" }, \
271 { "-nwpen", \
272  ARG_FLOAT32, \
273  "1.0", \
274  "New word transition penalty" }, \
275 { "-pip", \
276  ARG_FLOAT32, \
277  "1.0", \
278  "Phone insertion penalty" }, \
279 { "-uw", \
280  ARG_FLOAT32, \
281  "1.0", \
282  "Unigram weight" }, \
283 { "-silprob", \
284  ARG_FLOAT32, \
285  "0.005", \
286  "Silence word transition probability" }, \
287 { "-fillprob", \
288  ARG_FLOAT32, \
289  "1e-8", \
290  "Filler word transition probability" } \
291 
292 
293 #define POCKETSPHINX_DICT_OPTIONS \
294  { "-dict", \
295  REQARG_STRING, \
296  NULL, \
297  "Main pronunciation dictionary (lexicon) input file" }, \
298  { "-fdict", \
299  ARG_STRING, \
300  NULL, \
301  "Noise word pronunciation dictionary input file" }, \
302  { "-dictcase", \
303  ARG_BOOLEAN, \
304  "no", \
305  "Dictionary is case sensitive (NOTE: case insensitivity applies to ASCII characters only)" } \
306 
307 
308 #define POCKETSPHINX_ACMOD_OPTIONS \
309 { "-hmm", \
310  ARG_STRING, \
311  NULL, \
312  "Directory containing acoustic model files."}, \
313 { "-featparams", \
314  ARG_STRING, \
315  NULL, \
316  "File containing feature extraction parameters."}, \
317 { "-mdef", \
318  ARG_STRING, \
319  NULL, \
320  "Model definition input file" }, \
321 { "-senmgau", \
322  ARG_STRING, \
323  NULL, \
324  "Senone to codebook mapping input file (usually not needed)" }, \
325 { "-tmat", \
326  ARG_STRING, \
327  NULL, \
328  "HMM state transition matrix input file" }, \
329 { "-tmatfloor", \
330  ARG_FLOAT32, \
331  "0.0001", \
332  "HMM state transition probability floor (applied to -tmat file)" }, \
333 { "-mean", \
334  ARG_STRING, \
335  NULL, \
336  "Mixture gaussian means input file" }, \
337 { "-var", \
338  ARG_STRING, \
339  NULL, \
340  "Mixture gaussian variances input file" }, \
341 { "-varfloor", \
342  ARG_FLOAT32, \
343  "0.0001", \
344  "Mixture gaussian variance floor (applied to data from -var file)" }, \
345 { "-mixw", \
346  ARG_STRING, \
347  NULL, \
348  "Senone mixture weights input file (uncompressed)" }, \
349 { "-mixwfloor", \
350  ARG_FLOAT32, \
351  "0.0000001", \
352  "Senone mixture weights floor (applied to data from -mixw file)" }, \
353 { "-aw", \
354  ARG_INT32, \
355  "1", \
356  "Inverse weight applied to acoustic scores." }, \
357 { "-sendump", \
358  ARG_STRING, \
359  NULL, \
360  "Senone dump (compressed mixture weights) input file" }, \
361 { "-mllr", \
362  ARG_STRING, \
363  NULL, \
364  "MLLR transformation to apply to means and variances" }, \
365 { "-mmap", \
366  ARG_BOOLEAN, \
367  "yes", \
368  "Use memory-mapped I/O (if possible) for model files" }, \
369 { "-ds", \
370  ARG_INT32, \
371  "1", \
372  "Frame GMM computation downsampling ratio" }, \
373 { "-topn", \
374  ARG_INT32, \
375  "4", \
376  "Maximum number of top Gaussians to use in scoring." }, \
377 { "-topn_beam", \
378  ARG_STRING, \
379  "0", \
380  "Beam width used to determine top-N Gaussians (or a list, per-feature)" },\
381 { "-logbase", \
382  ARG_FLOAT32, \
383  "1.0001", \
384  "Base in which all log-likelihoods calculated" }
385 
386 #define CMDLN_EMPTY_OPTION { NULL, 0, NULL, NULL }
387 
388 #endif /* __PS_CMDLN_MACRO_H__ */