PocketSphinx  5prealpha
ngram_search.h
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 2008 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
42 #ifndef __NGRAM_SEARCH_H__
43 #define __NGRAM_SEARCH_H__
44 
45 /* SphinxBase headers. */
46 #include <sphinxbase/cmd_ln.h>
47 #include <sphinxbase/logmath.h>
48 #include <sphinxbase/ngram_model.h>
49 #include <sphinxbase/listelem_alloc.h>
50 #include <sphinxbase/err.h>
51 
52 /* Local headers. */
53 #include "pocketsphinx_internal.h"
54 #include "hmm.h"
55 
64 typedef struct chan_s {
68  struct chan_s *next;
71  struct chan_s *alt;
73  int32 ciphone;
74  union {
79  int32 rc_id;
80  } info;
81 } chan_t;
82 
90 typedef struct root_chan_s {
96  int32 penult_phn_wid;
97  int32 this_phn_wid;
100  int16 ciphone;
102  int16 ci2phone;
104 } root_chan_t;
105 
109 typedef struct bptbl_s {
111  uint8 valid;
112  uint8 refcnt;
113  int32 wid;
114  int32 bp;
115  int32 score;
116  int32 s_idx;
117  int32 real_wid;
119  int16 last_phone;
120  int16 last2_phone;
121 } bptbl_t;
122 
126 typedef struct bptbl_seg_s {
128  int32 *bpidx;
129  int16 n_bpidx;
130  int16 cur;
131 } bptbl_seg_t;
132 
133 /*
134  * Candidates words for entering their last phones. Cleared and rebuilt in each
135  * frame.
136  * NOTE: candidates can only be multi-phone, real dictionary words.
137  */
138 typedef struct lastphn_cand_s {
139  int32 wid;
140  int32 score;
141  int32 bp;
142  int32 next; /* next candidate starting at the same frame */
144 
145 /*
146  * Since the same instance of a word (i.e., <word,start-frame>) reaches its last
147  * phone several times, we can compute its best BP and LM transition score info
148  * just the first time and cache it for future occurrences. Structure for such
149  * a cache.
150  */
151 typedef struct {
152  int32 sf; /* Start frame */
153  int32 dscr; /* Delta-score upon entering last phone */
154  int32 bp; /* Best BP */
155 } last_ltrans_t;
156 
157 #define CAND_SF_ALLOCSIZE 32
158 typedef struct {
159  int32 bp_ef;
160  int32 cand;
161 } cand_sf_t;
162 
163 /*
164  * Structure for reorganizing the BP table entries in the current frame according
165  * to distinct right context ci-phones. Each entry contains the best BP entry for
166  * a given right context. Each successor word will pick up the correct entry based
167  * on its first ci-phone.
168  */
169 typedef struct bestbp_rc_s {
170  int32 score;
171  int32 path; /* BP table index corresponding to this entry */
172  int32 lc; /* right most ci-phone of above BP entry word */
173 } bestbp_rc_t;
174 
175 #define NO_BP -1
176 
180 typedef struct ngram_search_stats_s {
181  int32 n_phone_eval;
182  int32 n_root_chan_eval;
183  int32 n_nonroot_chan_eval;
184  int32 n_last_chan_eval;
185  int32 n_word_lastchan_eval;
186  int32 n_lastphn_cand_utt;
187  int32 n_fwdflat_chan;
188  int32 n_fwdflat_words;
189  int32 n_fwdflat_word_transition;
190  int32 n_senone_active_utt;
192 
193 
198  ps_search_t base;
199  ngram_model_t *lmset;
202  /* Flags to quickly indicate which passes are enabled. */
203  uint8 fwdtree;
204  uint8 fwdflat;
205  uint8 bestpath;
206 
207  /* State of procesing. */
208  uint8 done;
209 
210  /* Allocators */
211  listelem_alloc_t *chan_alloc;
212  listelem_alloc_t *root_chan_alloc;
213  listelem_alloc_t *latnode_alloc;
233  int32 n_root_chan;
247  bitvec_t *word_active;
265  int32 n_1ph_words;
276  int32 n_active_chan[2];
288  int32 n_active_word[2];
290  /*
291  * FIXME: Document all of these bits.
292  */
293  lastphn_cand_t *lastphn_cand;
294  int32 n_lastphn_cand;
295  last_ltrans_t *last_ltrans; /* one per word */
296  int32 cand_sf_alloc;
297  cand_sf_t *cand_sf;
298  bestbp_rc_t *bestbp_rc;
299 
300  bptbl_t *bp_table; /* Forward pass lattice */
301  int32 bpidx; /* First free BPTable entry */
302  int32 bp_table_size;
303  int32 *bscore_stack; /* Score stack for all possible right contexts */
304  int32 bss_head; /* First free BScoreStack entry */
305  int32 bscore_stack_size;
306 
308  int32 n_frame;
309  int32 *bp_table_idx; /* First BPTable entry for each frame */
310  int32 *word_lat_idx; /* BPTable index for any word in current frame;
311  cleared before each frame */
312 
313  /*
314  * Flat lexicon (2nd pass) search stuff.
315  */
318  bitvec_t *expand_word_flag;
319  int32 *expand_word_list;
320  int32 n_expand_words;
321  int32 min_ef_width;
322  int32 max_sf_win;
323  float32 fwdflat_fwdtree_lw_ratio;
324 
325  int32 best_score;
327  int32 renormalized;
328 
329  /*
330  * DAG (3rd pass) search stuff.
331  */
332  float32 bestpath_fwdtree_lw_ratio;
333  float32 ascale;
336  ptmr_t fwdtree_perf;
337  ptmr_t fwdflat_perf;
338  ptmr_t bestpath_perf;
339  int32 n_tot_frame;
340 
341  /* A collection of beam widths. */
342  int32 beam;
343  int32 dynamic_beam;
344  int32 pbeam;
345  int32 wbeam;
346  int32 lpbeam;
347  int32 lponlybeam;
348  int32 fwdflatbeam;
349  int32 fwdflatwbeam;
350  int32 fillpen;
351  int32 silpen;
352  int32 wip;
353  int32 nwpen;
354  int32 pip;
355  int32 maxwpf;
356  int32 maxhmmpf;
357 };
358 typedef struct ngram_search_s ngram_search_t;
359 
363 ps_search_t *ngram_search_init(const char *name,
364  ngram_model_t *lm,
365  cmd_ln_t *config,
366  acmod_t *acmod,
367  dict_t *dict,
368  dict2pid_t *d2p);
369 
373 void ngram_search_free(ps_search_t *ngs);
374 
380 int ngram_search_mark_bptable(ngram_search_t *ngs, int frame_idx);
381 
385 void ngram_search_save_bp(ngram_search_t *ngs, int frame_idx, int32 w,
386  int32 score, int32 path, int32 rc);
387 
391 void ngram_search_alloc_all_rc(ngram_search_t *ngs, int32 w);
392 
396 void ngram_search_free_all_rc(ngram_search_t *ngs, int32 w);
397 
403 int ngram_search_find_exit(ngram_search_t *ngs, int frame_idx, int32 *out_best_score);
404 
410 char const *ngram_search_bp_hyp(ngram_search_t *ngs, int bpidx);
411 
415 void ngram_compute_seg_scores(ngram_search_t *ngs, float32 lwf);
416 
421 
425 int32 ngram_search_exit_score(ngram_search_t *ngs, bptbl_t *pbe, int rcphone);
426 
432 void ngram_search_set_lm(ngram_model_t *lm);
433 
434 #endif /* __NGRAM_SEARCH_H__ */
hmm_t hmm
Basic HMM structure.
Definition: ngram_search.h:65
Internal implementation of PocketSphinx decoder.
int32 n_frame_alloc
Number of frames allocated in bp_table_idx and friends.
Definition: ngram_search.h:307
int32 wid
Word index.
Definition: ngram_search.h:113
struct chan_s chan_t
Lexical tree node data type.
Base structure for search module.
int32 n_nonroot_chan
Number of valid non-root channels.
Definition: ngram_search.h:234
void ngram_search_alloc_all_rc(ngram_search_t *ngs, int32 w)
Allocate last phone channels for all possible right contexts for word w.
Definition: ngram_search.c:598
chan_t * next
first descendant of this channel
Definition: ngram_search.h:94
listelem_alloc_t * chan_alloc
For chan_t.
Definition: ngram_search.h:211
void ngram_search_set_lm(ngram_model_t *lm)
Sets the global language model.
An individual HMM among the HMM search space.
frame_idx_t frame
start or end frame
Definition: ngram_search.h:110
struct root_chan_s root_chan_t
Lexical tree node data type for the first phone (root) of each dynamic HMM tree structure.
hmm_context_t * hmmctx
HMM context.
Definition: ngram_search.h:200
int32 n_active_chan[2]
Number entries in active_chan_list.
Definition: ngram_search.h:276
int16 last2_phone
next-to-last phone of this word
Definition: ngram_search.h:120
bitvec_t * word_active
array of active flags for all words.
Definition: ngram_search.h:247
struct bptbl_seg_s bptbl_seg_t
Segmentation &quot;iterator&quot; for backpointer table results.
int32 ngram_search_exit_score(ngram_search_t *ngs, bptbl_t *pbe, int rcphone)
Get the exit score for a backpointer entry with a given right context.
Definition: ngram_search.c:660
Implementation of HMM base structure.
int16 ciphone
first ciphone of this node; all words rooted at this node begin with this ciphone ...
Definition: ngram_search.h:100
int32 ** active_word_list
Array of active multi-phone words for current and next frame.
Definition: ngram_search.h:287
struct chan_s * next
first descendant of this channel; or, in the case of the last phone of a word, the next alternative r...
Definition: ngram_search.h:68
void ngram_search_save_bp(ngram_search_t *ngs, int frame_idx, int32 w, int32 score, int32 path, int32 rc)
Enter a word in the backpointer table.
Definition: ngram_search.c:383
Various statistics for profiling.
Definition: ngram_search.h:180
int ngram_search_find_exit(ngram_search_t *ngs, int frame_idx, int32 *out_best_score)
Find the best word exit for the current frame in the backpointer table.
Definition: ngram_search.c:506
int32 * single_phone_wid
list of single-phone word ids
Definition: ngram_search.h:264
int ngram_search_mark_bptable(ngram_search_t *ngs, int frame_idx)
Record the current frame&#39;s index in the backpointer table.
Definition: ngram_search.c:329
int32 n_root_chan_alloc
Number of root_chan allocated.
Definition: ngram_search.h:232
int16 ci2phone
second ciphone of this node; one root HMM for each unique right context
Definition: ngram_search.h:102
int32 bp
Back Pointer.
Definition: ngram_search.h:114
int32 penult_phn_wid
list of words whose last phone follows this one; this field indicates the first of the list; the rest...
Definition: ngram_search.h:75
int32 n_active_word[2]
Number entries in active_word_list.
Definition: ngram_search.h:288
int32 rc_id
right-context id for last phone of words
Definition: ngram_search.h:79
N-Gram search module structure.
Definition: ngram_search.h:197
int32 max_nonroot_chan
Maximum possible number of non-root channels.
Definition: ngram_search.h:235
int32 last_phone_best_score
Best Viterbi path score for last phone.
Definition: ngram_search.h:326
int32 real_wid
wid of this or latest predecessor real word
Definition: ngram_search.h:117
root_chan_t * rhmm_1ph
Root HMMs for single-phone words.
Definition: ngram_search.h:236
int32 prev_real_wid
wid of second-last real word
Definition: ngram_search.h:118
ps_lattice_t * ngram_search_lattice(ps_search_t *search)
Construct a word lattice from the current hypothesis.
listelem_alloc_t * latnode_alloc
For latnode_t.
Definition: ngram_search.h:213
Shared information between a set of HMMs.
Segmentation &quot;iterator&quot; for backpointer table results.
Definition: ngram_search.h:126
uint8 refcnt
Reference count (number of successors)
Definition: ngram_search.h:112
ps_latnode_t ** frm_wordlist
List of active words in each frame.
Definition: ngram_search.h:316
Lexical tree node data type for the first phone (root) of each dynamic HMM tree structure.
Definition: ngram_search.h:90
Lexical tree node data type.
Definition: ngram_search.h:64
hmm_t hmm
Basic HMM structure.
Definition: ngram_search.h:91
int32 this_phn_wid
list of words consisting of this single phone; actually the first of the list, like penult_phn_wid; -...
Definition: ngram_search.h:97
int16 cur
Current position in bpidx.
Definition: ngram_search.h:130
chan_t *** active_chan_list
Array of active channels for current and next frame.
Definition: ngram_search.h:275
a structure for a dictionary.
Definition: dict.h:76
struct chan_s * alt
sibling; i.e., next descendant of parent HMM
Definition: ngram_search.h:71
void ngram_search_free(ps_search_t *search)
Finalize the N-Gram search module.
Definition: ngram_search.c:289
Word graph structure used in bestpath/nbest search.
struct bptbl_s bptbl_t
Back pointer table (forward pass lattice; actually a tree)
int16 n_bpidx
Number of backpointer IDs.
Definition: ngram_search.h:129
int32 best_score
Best Viterbi path score.
Definition: ngram_search.h:325
Back pointer table (forward pass lattice; actually a tree)
Definition: ngram_search.h:109
int32 n_1ph_LMwords
Number single phone dict words also in LM; these come first in single_phone_wid.
Definition: ngram_search.h:266
void ngram_compute_seg_scores(ngram_search_t *ngs, float32 lwf)
Compute language and acoustic scores for backpointer table entries.
void ngram_search_free_all_rc(ngram_search_t *ngs, int32 w)
Allocate last phone channels for all possible right contexts for word w.
Definition: ngram_search.c:647
root_chan_t * root_chan
Search structure of HMM instances.
Definition: ngram_search.h:231
int32 s_idx
Start of BScoreStack for various right contexts.
Definition: ngram_search.h:116
int32 frame_idx_t
Type for frame index values.
Definition: hmm.h:64
int32 n_frame
Number of frames actually present.
Definition: ngram_search.h:308
ngram_model_t * lmset
Set of language models.
Definition: ngram_search.h:199
uint8 valid
For absolute pruning.
Definition: ngram_search.h:111
int32 n_1ph_words
Number single phone words in dict (total)
Definition: ngram_search.h:265
listelem_alloc_t * root_chan_alloc
For root_chan_t.
Definition: ngram_search.h:212
int32 ciphone
ciphone for this node
Definition: ngram_search.h:73
int32 * bpidx
Sequence of backpointer IDs.
Definition: ngram_search.h:128
ngram_search_stats_t st
Various statistics for profiling.
Definition: ngram_search.h:335
chan_t ** word_chan
Channels associated with a given word (only used for right contexts, single-phone words in fwdtree se...
Definition: ngram_search.h:246
int32 * fwdflat_wordlist
List of active word IDs for utterance.
Definition: ngram_search.h:317
int32 score
Score (best among all right contexts)
Definition: ngram_search.h:115
int32 n_root_chan
Number of valid root_chan.
Definition: ngram_search.h:233
Base structure for hypothesis segmentation iterator.
int32 * homophone_set
Each node in the HMM tree structure may point to a set of words whose last phone would follow that no...
Definition: ngram_search.h:263
ps_seg_t base
Base structure.
Definition: ngram_search.h:127
float32 ascale
Acoustic score scale for posterior probabilities.
Definition: ngram_search.h:333
ps_search_t * ngram_search_init(const char *name, ngram_model_t *lm, cmd_ln_t *config, acmod_t *acmod, dict_t *dict, dict2pid_t *d2p)
Initialize the N-Gram search module.
Definition: ngram_search.c:140
Acoustic model structure.
Definition: acmod.h:148
Building composite triphone (as well as word internal triphones) with the dictionary.
Definition: dict2pid.h:84
struct ngram_search_stats_s ngram_search_stats_t
Various statistics for profiling.
int16 last_phone
last phone of this word
Definition: ngram_search.h:119
char const * ngram_search_bp_hyp(ngram_search_t *ngs, int bpidx)
Backtrace from a given backpointer index to obtain a word hypothesis.
Definition: ngram_search.c:550