47 #include <sphinxbase/ckd_alloc.h>
48 #include <sphinxbase/listelem_alloc.h>
49 #include <sphinxbase/err.h>
59 static int ngram_search_step(
ps_search_t *search,
int frame_idx);
60 static int ngram_search_finish(
ps_search_t *search);
62 static char const *ngram_search_hyp(
ps_search_t *search, int32 *out_score);
63 static int32 ngram_search_prob(
ps_search_t *search);
75 ngram_search_seg_iter,
78 static ngram_model_t *default_lm;
87 n_words = ps_search_n_words(ngs);
88 words = (
char const**)ckd_calloc(n_words,
sizeof(*words));
90 for (i = 0; i < n_words; ++i)
91 words[i] = dict_wordstr(ps_search_dict(ngs), i);
92 ngram_model_set_map_words(ngs->
lmset, words, n_words);
102 config = ps_search_config(ngs);
103 acmod = ps_search_acmod(ngs);
106 ngs->beam = logmath_log(acmod->
lmath, cmd_ln_float64_r(config,
"-beam"))>>
SENSCR_SHIFT;
107 ngs->wbeam = logmath_log(acmod->
lmath, cmd_ln_float64_r(config,
"-wbeam"))>>
SENSCR_SHIFT;
108 ngs->pbeam = logmath_log(acmod->
lmath, cmd_ln_float64_r(config,
"-pbeam"))>>
SENSCR_SHIFT;
109 ngs->lpbeam = logmath_log(acmod->
lmath, cmd_ln_float64_r(config,
"-lpbeam"))>>
SENSCR_SHIFT;
110 ngs->lponlybeam = logmath_log(acmod->
lmath, cmd_ln_float64_r(config,
"-lponlybeam"))>>
SENSCR_SHIFT;
111 ngs->fwdflatbeam = logmath_log(acmod->
lmath, cmd_ln_float64_r(config,
"-fwdflatbeam"))>>
SENSCR_SHIFT;
112 ngs->fwdflatwbeam = logmath_log(acmod->
lmath, cmd_ln_float64_r(config,
"-fwdflatwbeam"))>>
SENSCR_SHIFT;
115 ngs->maxwpf = cmd_ln_int32_r(config,
"-maxwpf");
116 ngs->maxhmmpf = cmd_ln_int32_r(config,
"-maxhmmpf");
119 ngs->wip = logmath_log(acmod->
lmath, cmd_ln_float32_r(config,
"-wip")) >>
SENSCR_SHIFT;
120 ngs->nwpen = logmath_log(acmod->
lmath, cmd_ln_float32_r(config,
"-nwpen")) >>
SENSCR_SHIFT;
121 ngs->pip = logmath_log(acmod->
lmath, cmd_ln_float32_r(config,
"-pip")) >>
SENSCR_SHIFT;
122 ngs->silpen = ngs->pip
123 + (logmath_log(acmod->
lmath, cmd_ln_float32_r(config,
"-silprob"))>>
SENSCR_SHIFT);
124 ngs->fillpen = ngs->pip
125 + (logmath_log(acmod->
lmath, cmd_ln_float32_r(config,
"-fillprob"))>>
SENSCR_SHIFT);
128 ngs->fwdflat_fwdtree_lw_ratio =
129 cmd_ln_float32_r(config,
"-fwdflatlw")
130 / cmd_ln_float32_r(config,
"-lw");
131 ngs->bestpath_fwdtree_lw_ratio =
132 cmd_ln_float32_r(config,
"-bestpathlw")
133 / cmd_ln_float32_r(config,
"-lw");
136 ngs->
ascale = 1.0 / cmd_ln_float32_r(config,
"-ascale");
148 static char *lmname =
"default";
153 cmd_ln_boolean_r(config,
"-fwdtree"));
155 ngs = ckd_calloc(1,
sizeof(*ngs));
156 ps_search_init(&ngs->base, &ngram_funcs, PS_SEARCH_TYPE_NGRAM, name, config, acmod, dict, d2p);
160 if (ngs->
hmmctx == NULL) {
161 ps_search_free(ps_search_base(ngs));
169 ngram_search_calc_beams(ngs);
174 ngs->word_lat_idx = ckd_calloc(
dict_size(dict),
175 sizeof(*ngs->word_lat_idx));
177 ngs->last_ltrans = ckd_calloc(
dict_size(dict),
178 sizeof(*ngs->last_ltrans));
182 ngs->bp_table_size = cmd_ln_int32_r(config,
"-latsize");
183 ngs->bp_table = ckd_calloc(ngs->bp_table_size,
184 sizeof(*ngs->bp_table));
186 ngs->bscore_stack_size = ngs->bp_table_size * 20;
187 ngs->bscore_stack = ckd_calloc(ngs->bscore_stack_size,
188 sizeof(*ngs->bscore_stack));
191 sizeof(*ngs->bp_table_idx));
198 ngs->
lmset = ngram_model_set_init(config, &lm, &lmname, NULL, 1);
202 if (ngram_wid(ngs->
lmset, S3_FINISH_WORD) ==
203 ngram_unknown_wid(ngs->
lmset))
205 E_ERROR(
"Language model/set does not contain </s>, "
206 "recognition will fail\n");
211 ngram_search_update_widmap(ngs);
214 if (cmd_ln_boolean_r(config,
"-fwdtree")) {
217 ngs->fwdtree_perf.name =
"fwdtree";
218 ptmr_init(&ngs->fwdtree_perf);
220 if (cmd_ln_boolean_r(config,
"-fwdflat")) {
223 ngs->fwdflat_perf.name =
"fwdflat";
224 ptmr_init(&ngs->fwdflat_perf);
226 if (cmd_ln_boolean_r(config,
"-bestpath")) {
227 ngs->bestpath = TRUE;
228 ngs->bestpath_perf.name =
"bestpath";
229 ptmr_init(&ngs->bestpath_perf);
251 ckd_free(ngs->word_lat_idx);
253 ckd_free(ngs->last_ltrans);
255 ngs->word_lat_idx = ckd_calloc(search->
n_words,
sizeof(*ngs->word_lat_idx));
257 ngs->last_ltrans = ckd_calloc(search->
n_words,
sizeof(*ngs->last_ltrans));
259 = ckd_calloc_2d(2, search->
n_words,
266 if (ngs->
lmset == NULL)
270 ngram_search_calc_beams(ngs);
273 ngram_search_update_widmap(ngs);
298 double n_speech = (double)ngs->n_tot_frame
299 / cmd_ln_int32_r(ps_search_config(ngs),
"-frate");
301 E_INFO(
"TOTAL bestpath %.2f CPU %.3f xRT\n",
302 ngs->bestpath_perf.t_tot_cpu,
303 ngs->bestpath_perf.t_tot_cpu / n_speech);
304 E_INFO(
"TOTAL bestpath %.2f wall %.3f xRT\n",
305 ngs->bestpath_perf.t_tot_elapsed,
306 ngs->bestpath_perf.t_tot_elapsed / n_speech);
314 ngram_model_free(ngs->
lmset);
317 ckd_free(ngs->word_lat_idx);
319 ckd_free(ngs->bp_table);
320 ckd_free(ngs->bscore_stack);
321 if (ngs->bp_table_idx != NULL)
322 ckd_free(ngs->bp_table_idx - 1);
324 ckd_free(ngs->last_ltrans);
333 ngs->bp_table_idx = ckd_realloc(ngs->bp_table_idx - 1,
335 *
sizeof(*ngs->bp_table_idx));
343 ngs->bp_table_idx[frame_idx] = ngs->bpidx;
353 ent = ngs->bp_table + bp;
354 if (ent->
bp == NO_BP)
357 prev = ngs->bp_table + ent->
bp;
366 ent->
real_wid = dict_basewid(ps_search_dict(ngs),
372 ent->
real_wid = dict_basewid(ps_search_dict(ngs), ent->
wid);
380 #define NGRAM_HISTORY_LONG_WORD 2000
384 int32 w, int32 score, int32 path, int32 rc)
391 bp = ngs->word_lat_idx[w];
394 if (frame_idx - ngs->bp_table[path].
frame > NGRAM_HISTORY_LONG_WORD) {
395 E_WARN(
"Word '%s' survived for %d frames, potential overpruning\n", dict_wordstr(ps_search_dict(ngs), w),
396 frame_idx - ngs->bp_table[path].
frame);
404 if (ngs->bp_table[bp].
bp != path) {
405 int32 bplh[2], newlh[2];
409 E_DEBUG(2,(
"Updating path history %d => %d frame %d\n",
410 ngs->bp_table[bp].
bp, path, frame_idx));
411 bplh[0] = ngs->bp_table[bp].
bp == -1
413 bplh[1] = ngs->bp_table[bp].
bp == -1
414 ? -1 : ngs->bp_table[ngs->bp_table[bp].
bp].
real_wid;
415 newlh[0] = path == -1
417 newlh[1] = path == -1
418 ? -1 : ngs->bp_table[path].
real_wid;
421 if (bplh[0] != newlh[0] || bplh[1] != newlh[1]) {
425 E_DEBUG(1, (
"Updating language model state %s,%s => %s,%s frame %d\n",
426 dict_wordstr(ps_search_dict(ngs), bplh[0]),
427 dict_wordstr(ps_search_dict(ngs), bplh[1]),
428 dict_wordstr(ps_search_dict(ngs), newlh[0]),
429 dict_wordstr(ps_search_dict(ngs), newlh[1]),
431 set_real_wid(ngs, bp);
433 ngs->bp_table[bp].
bp = path;
435 ngs->bp_table[bp].
score = score;
440 if (ngs->bp_table[bp].
s_idx != -1)
441 ngs->bscore_stack[ngs->bp_table[bp].
s_idx + rc] = score;
448 if (ngs->bpidx == NO_BP) {
449 E_ERROR(
"No entries in backpointer table!");
454 if (ngs->bpidx >= ngs->bp_table_size) {
455 ngs->bp_table_size *= 2;
456 ngs->bp_table = ckd_realloc(ngs->bp_table,
458 *
sizeof(*ngs->bp_table));
459 E_INFO(
"Resized backpointer table to %d entries\n", ngs->bp_table_size);
461 if (ngs->bss_head >= ngs->bscore_stack_size
462 - bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef)) {
463 ngs->bscore_stack_size *= 2;
464 ngs->bscore_stack = ckd_realloc(ngs->bscore_stack,
465 ngs->bscore_stack_size
466 *
sizeof(*ngs->bscore_stack));
467 E_INFO(
"Resized score stack to %d entries\n", ngs->bscore_stack_size);
470 ngs->word_lat_idx[w] = ngs->bpidx;
471 be = &(ngs->bp_table[ngs->bpidx]);
473 be->
frame = frame_idx;
476 be->
s_idx = ngs->bss_head;
478 assert(path != ngs->bpidx);
482 be->
last_phone = dict_last_phone(ps_search_dict(ngs),w);
483 if (dict_is_single_phone(ps_search_dict(ngs), w)) {
489 be->
last2_phone = dict_second_last_phone(ps_search_dict(ngs),w);
494 for (i = 0; i < rcsize; ++i)
495 ngs->bscore_stack[ngs->bss_head + i] =
WORST_SCORE;
497 ngs->bscore_stack[ngs->bss_head + rc] = score;
498 set_real_wid(ngs, ngs->bpidx);
501 ngs->bss_head += rcsize;
517 if (frame_idx == -1 || frame_idx >= ngs->
n_frame)
519 end_bpidx = ngs->bp_table_idx[frame_idx];
525 while (frame_idx >= 0 && ngs->bp_table_idx[frame_idx] == end_bpidx)
532 assert(end_bpidx < ngs->bp_table_size);
533 for (bp = ngs->bp_table_idx[frame_idx]; bp < end_bpidx; ++bp) {
534 if (ngs->bp_table[bp].
wid == ps_search_finish_wid(ngs)
536 best_score = ngs->bp_table[bp].
score;
539 if (ngs->bp_table[bp].
wid == ps_search_finish_wid(ngs))
543 if (out_best_score) {
544 *out_best_score = best_score;
562 while (bp != NO_BP) {
563 bptbl_t *be = &ngs->bp_table[bp];
566 len += strlen(dict_basestr(ps_search_dict(ngs), be->
wid)) + 1;
574 base->
hyp_str = ckd_calloc(1, len);
578 while (bp != NO_BP) {
579 bptbl_t *be = &ngs->bp_table[bp];
584 len = strlen(dict_basestr(ps_search_dict(ngs), be->
wid));
586 memcpy(c, dict_basestr(ps_search_dict(ngs), be->
wid), len);
602 int32 i, tmatid, ciphone;
606 assert(!dict_is_single_phone(ps_search_dict(ngs), w));
607 ciphone = dict_last_phone(ps_search_dict(ngs),w);
610 dict_second_last_phone(ps_search_dict(ngs),w));
611 tmatid = bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, ciphone);
613 if ((hmm == NULL) || (hmm_nonmpx_ssid(&hmm->
hmm) != rssid->
ssid[0])) {
621 E_DEBUG(3,(
"allocated rc_id 0 ssid %d ciphone %d lc %d word %s\n",
623 dict_second_last_phone(ps_search_dict(ngs),w),
624 dict_wordstr(ps_search_dict(ngs),w)));
626 for (i = 1; i < rssid->
n_ssid; ++i) {
627 if ((hmm->
next == NULL) || (hmm_nonmpx_ssid(&hmm->
next->
hmm) != rssid->
ssid[i])) {
636 E_DEBUG(3,(
"allocated rc_id %d ssid %d ciphone %d lc %d word %s\n",
638 dict_second_last_phone(ps_search_dict(ngs),w),
639 dict_wordstr(ps_search_dict(ngs),w)));
651 for (hmm = ngs->
word_chan[w]; hmm; hmm = thmm) {
677 return ngs->bscore_stack[pbe->
s_idx + rssid->
cimap[rcphone]];
686 int32 *out_ascr, int32 *out_lscr)
692 if (be->
bp == NO_BP) {
693 *out_ascr = be->
score;
699 pbe = ngs->bp_table + be->
bp;
701 dict_first_phone(ps_search_dict(ngs),be->
wid));
708 if (be->
wid == ps_search_silence_wid(ngs)) {
709 *out_lscr = ngs->silpen;
712 *out_lscr = ngs->fillpen;
716 *out_lscr = ngram_tg_score(ngs->
lmset,
721 *out_lscr = *out_lscr * lwf;
723 *out_ascr = be->
score - start_score - *out_lscr;
732 ngram_model_flush(ngs->
lmset);
735 else if (ngs->fwdflat)
743 ngram_search_step(
ps_search_t *search,
int frame_idx)
749 else if (ngs->fwdflat)
759 E_INFO(
"Backpointer table (%d entries):\n", ngs->bpidx);
760 for (i = 0; i < ngs->bpidx; ++i) {
761 bptbl_t *bpe = ngs->bp_table + i;
764 E_INFO_NOFN(
"%-5d %-10s start %-3d end %-3d score %-8d bp %-3d real_wid %-5d prev_real_wid %-5d",
765 i, dict_wordstr(ps_search_dict(ngs), bpe->
wid),
767 ? 0 : ngs->bp_table[bpe->
bp].
frame + 1),
778 for (j = 0; j < rcsize; ++j)
780 E_INFOCONT(
" %d", bpe->
score - ngs->bscore_stack[bpe->
s_idx + j]);
791 ngs->n_tot_frame += ngs->
n_frame;
805 while (ps_search_acmod(ngs)->n_feat_frame > 0) {
817 else if (ngs->fwdflat) {
827 ngram_search_bestpath(
ps_search_t *search, int32 *out_score,
int backward)
833 ngs->bestpath_fwdtree_lw_ratio,
839 if (search->
post == 0)
849 ngram_search_hyp(
ps_search_t *search, int32 *out_score)
854 if (ngs->bestpath && ngs->done) {
860 ptmr_reset(&ngs->bestpath_perf);
861 ptmr_start(&ngs->bestpath_perf);
864 if ((link = ngram_search_bestpath(search, out_score, FALSE)) == NULL)
867 ptmr_stop(&ngs->bestpath_perf);
869 / cmd_ln_int32_r(ps_search_config(ngs),
"-frate");
870 E_INFO(
"bestpath %.2f CPU %.3f xRT\n",
871 ngs->bestpath_perf.t_cpu,
872 ngs->bestpath_perf.t_cpu / n_speech);
873 E_INFO(
"bestpath %.2f wall %.3f xRT\n",
874 ngs->bestpath_perf.t_elapsed,
875 ngs->bestpath_perf.t_elapsed / n_speech);
891 ngram_search_bp2itor(
ps_seg_t *seg,
int bp)
896 be = &ngs->bp_table[bp];
897 pbe = be->
bp == -1 ? NULL : &ngs->bp_table[be->
bp];
898 seg->
word = dict_wordstr(ps_search_dict(ngs), be->
wid);
900 seg->
sf = pbe ? pbe->
frame + 1 : 0;
913 dict_first_phone(ps_search_dict(ngs), be->
wid));
915 if (be->
wid == ps_search_silence_wid(ngs)) {
916 seg->
lscr = ngs->silpen;
919 seg->
lscr = ngs->fillpen;
938 ckd_free(itor->
bpidx);
948 ngram_bp_seg_free(seg);
952 ngram_search_bp2itor(seg, itor->
bpidx[itor->
cur]);
971 itor = ckd_calloc(1,
sizeof(*itor));
972 itor->
base.
vt = &ngram_bp_segfuncs;
977 while (bp != NO_BP) {
978 bptbl_t *be = &ngs->bp_table[bp];
989 while (bp != NO_BP) {
990 bptbl_t *be = &ngs->bp_table[bp];
991 itor->
bpidx[cur] = bp;
1008 if (ngs->bestpath && ngs->done) {
1014 ptmr_reset(&ngs->bestpath_perf);
1015 ptmr_start(&ngs->bestpath_perf);
1018 if ((link = ngram_search_bestpath(search, NULL, TRUE)) == NULL)
1021 ngs->bestpath_fwdtree_lw_ratio);
1022 ptmr_stop(&ngs->bestpath_perf);
1024 / cmd_ln_int32_r(ps_search_config(ngs),
"-frate");
1025 E_INFO(
"bestpath %.2f CPU %.3f xRT\n",
1026 ngs->bestpath_perf.t_cpu,
1027 ngs->bestpath_perf.t_cpu / n_speech);
1028 E_INFO(
"bestpath %.2f wall %.3f xRT\n",
1029 ngs->bestpath_perf.t_elapsed,
1030 ngs->bestpath_perf.t_elapsed / n_speech);
1038 return ngram_search_bp_iter(ngs, bpidx,
1040 (ngs->done && ngs->fwdflat)
1041 ? ngs->fwdflat_fwdtree_lw_ratio : 1.0);
1053 if (ngs->bestpath && ngs->done) {
1059 if ((link = ngram_search_bestpath(search, NULL, TRUE)) == NULL)
1061 return search->
post;
1075 for (i = 0, bp_ptr = ngs->bp_table; i < ngs->bpidx; ++i, ++bp_ptr) {
1083 sf = (bp_ptr->
bp < 0) ? 0 : ngs->bp_table[bp_ptr->
bp].
frame + 1;
1087 assert(ef < dag->n_frames);
1089 if ((wid == ps_search_finish_wid(ngs)) && (ef < dag->
n_frames - 1))
1094 && (!ngram_model_set_known_wid(ngs->
lmset,
1095 dict_basewid(ps_search_dict(ngs), wid))))
1099 for (node = dag->
nodes; node; node = node->
next) {
1100 if ((node->
wid == wid) && (node->
sf == sf))
1112 node->
fef = node->
lef = i;
1133 for (node = dag->
nodes; node; node = node->
next) {
1134 if ((node->
wid == ps_search_start_wid(ngs)) && (node->
sf == 0))
1139 E_ERROR(
"Couldn't find <s> in first frame\n");
1149 int32 ef, bestbp, bp, bestscore;
1152 for (node = dag->
nodes; node; node = node->
next) {
1153 int32 lef = ngs->bp_table[node->
lef].
frame;
1154 if ((node->
wid == ps_search_finish_wid(ngs))
1165 ef >= 0 && ngs->bp_table_idx[ef] == ngs->bpidx;
1168 E_ERROR(
"Empty backpointer table: can not build DAG.\n");
1175 for (bp = ngs->bp_table_idx[ef]; bp < ngs->bp_table_idx[ef + 1]; ++bp) {
1176 int32 n_used, l_scr, wid, prev_wid;
1180 if (wid == ps_search_finish_wid(ngs)) {
1184 l_scr = ngram_tg_score(ngs->
lmset, ps_search_finish_wid(ngs),
1186 l_scr = l_scr * lwf;
1188 bestscore = ngs->bp_table[bp].
score + l_scr;
1192 if (bestbp == NO_BP) {
1193 E_ERROR(
"No word exits found in last frame (%d), assuming no recognition\n", ef);
1196 E_INFO(
"</s> not found in last frame, using %s.%d instead\n",
1197 dict_basestr(ps_search_dict(ngs), ngs->bp_table[bestbp].
wid), ef);
1200 for (node = dag->
nodes; node; node = node->
next) {
1201 if (node->
lef == bestbp)
1206 E_ERROR(
"Failed to find DAG node corresponding to %s\n",
1207 dict_basestr(ps_search_dict(ngs), ngs->bp_table[bestbp].
wid));
1217 int32 i, score, ascr, lscr;
1221 int min_endfr, nlink;
1225 min_endfr = cmd_ln_int32_r(ps_search_config(search),
"-min_endfr");
1242 lwf = ngs->fwdflat ? ngs->fwdflat_fwdtree_lw_ratio : 1.0;
1243 create_dag_nodes(ngs, dag);
1244 if ((dag->
start = find_start_node(ngs, dag)) == NULL)
1246 if ((dag->
end = find_end_node(ngs, dag, ngs->bestpath_fwdtree_lw_ratio)) == NULL)
1248 E_INFO(
"lattice start node %s.%d end node %s.%d\n",
1252 ngram_compute_seg_score(ngs, ngs->bp_table + dag->
end->
lef, lwf,
1276 E_INFO(
"Eliminated %d nodes before end node\n", i);
1279 for (to = dag->
end; to; to = to->
next) {
1288 fef = ngs->bp_table[to->
fef].
frame;
1289 lef = ngs->bp_table[to->
lef].
frame;
1290 if (to != dag->
end && lef - fef < min_endfr) {
1296 for (from = to->
next; from; from = from->
next) {
1299 fef = ngs->bp_table[from->
fef].
frame;
1300 lef = ngs->bp_table[from->
lef].
frame;
1302 if ((to->
sf <= fef) || (to->
sf > lef + 1))
1304 if (lef - fef < min_endfr) {
1311 from_bpe = ngs->bp_table + i;
1312 for (; i <= from->
lef; i++, from_bpe++) {
1313 if (from_bpe->
wid != from->
wid)
1315 if (from_bpe->
frame >= to->
sf - 1)
1319 if ((i > from->
lef) || (from_bpe->
frame != to->
sf - 1))
1324 ngram_compute_seg_score(ngs, from_bpe, lwf,
1330 dict_first_phone(ps_search_dict(ngs), to->
wid));
1332 if (score == WORST_SCORE)
1336 score = ascr + (score - from_bpe->
score);
1347 else if (score BETTER_THAN WORST_SCORE) {
1357 E_ERROR(
"End node of lattice isolated; unreachable\n");
1361 for (node = dag->
nodes; node; node = node->
next) {
1370 for (node = dag->
nodes; node; node = node->
next) {
1373 for (alt = node->
next; alt && alt->
sf == node->
sf; alt = alt->
next) {
1381 E_INFO(
"Lattice has %d nodes, %d links\n", dag->
n_nodes, nlink);
1387 dag->
end->
basewid = ps_search_finish_wid(ngs);
1405 default_lm = ngram_model_retain(lm);
hmm_t hmm
Basic HMM structure.
Internal implementation of PocketSphinx decoder.
void ngram_fwdtree_finish(ngram_search_t *ngs)
Finish fwdtree decoding for an utterance.
int32 n_frame_alloc
Number of frames allocated in bp_table_idx and friends.
void ngram_fwdtree_deinit(ngram_search_t *ngs)
Release memory associated with fwdtree decoding.
Base structure for search module.
int32 n_nodes
Number of nodes in this lattice.
void ngram_search_alloc_all_rc(ngram_search_t *ngs, int32 w)
Allocate last phone channels for all possible right contexts for word w.
ps_seg_t * ps_lattice_seg_iter(ps_lattice_t *dag, ps_latlink_t *link, float32 lwf)
Get hypothesis segmentation iterator after bestpath search.
void hmm_init(hmm_context_t *ctx, hmm_t *hmm, int mpx, int ssid, int tmatid)
Populate a previously-allocated HMM structure, allocating internal data.
int acmod_rewind(acmod_t *acmod)
Rewind the current utterance, allowing it to be rescored.
listelem_alloc_t * chan_alloc
For chan_t.
void ngram_fwdtree_start(ngram_search_t *ngs)
Start fwdtree decoding for an utterance.
void ngram_search_set_lm(ngram_model_t *lm)
Sets the global language model.
void ps_search_base_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
Re-initialize base structure with new dictionary.
void ps_lattice_penalize_fillers(ps_lattice_t *dag, int32 silpen, int32 fillpen)
Insert penalty for fillers.
frame_idx_t frame
start or end frame
ps_latnode_t * start
Starting node.
uint8 *** tp
The transition matrices; kept in the same scale as acoustic scores; tp[tmatid][from-state][to-state]...
hmm_context_t * hmmctx
HMM context.
ps_segfuncs_t * vt
V-table of seg methods.
logmath_t * lmath
Log-math computation.
uint16 ** sseq
Unique senone sequences (2D array built at load time)
void hmm_deinit(hmm_t *hmm)
Free an HMM structure, releasing internal data (but not the HMM structure itself).
int32 lscr
Language model score.
int32 n_words
Number of words known to search (may be less than in the dictionary)
int16 last2_phone
next-to-last phone of this word
#define BAD_S3WID
Dictionary word id.
int32 n_ssid
#Unique ssid in above, compressed ssid list
frame_idx_t n_frames
Number of frames for this utterance.
int ngram_fwdflat_reinit(ngram_search_t *ngs)
Rebuild search structures for updated language models.
Word graph search implementation.
bitvec_t * word_active
array of active flags for all words.
void ngram_fwdflat_finish(ngram_search_t *ngs)
Finish fwdflat decoding for an utterance.
ps_latnode_t * nodes
List of all nodes.
int ngram_fwdflat_search(ngram_search_t *ngs, int frame_idx)
Search one frame forward in an utterance.
int32 ngram_search_exit_score(ngram_search_t *ngs, bptbl_t *pbe, int rcphone)
Get the exit score for a backpointer entry with a given right context.
listelem_alloc_t * latnode_alloc
Node allocator for this DAG.
int32 prob
Log posterior probability.
latlink_list_t * entries
Links into this node.
POCKETSPHINX_EXPORT int32 ps_lattice_posterior(ps_lattice_t *dag, ngram_model_t *lmset, float32 ascale)
Calculate link posterior probabilities on a word graph.
struct ps_latnode_s * alt
Node with alternate pronunciation for this word.
char const * word
Word string (pointer into dictionary hash)
int32 ** active_word_list
Array of active multi-phone words for current and next frame.
struct chan_s * next
first descendant of this channel; or, in the case of the last phone of a word, the next alternative r...
void ngram_search_save_bp(ngram_search_t *ngs, int frame_idx, int32 w, int32 score, int32 path, int32 rc)
Enter a word in the backpointer table.
ps_search_t * search
Search object from whence this came.
int32 final_node_ascr
Acoustic score of implicit link exiting final node.
Lexicon tree based Viterbi search.
int ngram_search_find_exit(ngram_search_t *ngs, int frame_idx, int32 *out_best_score)
Find the best word exit for the current frame in the backpointer table.
void ps_search_init(ps_search_t *search, ps_searchfuncs_t *vt, const char *type, const char *name, cmd_ln_t *config, acmod_t *acmod, dict_t *dict, dict2pid_t *d2p)
Initialize base structure.
int ngram_search_mark_bptable(ngram_search_t *ngs, int frame_idx)
Record the current frame's index in the backpointer table.
int32 rc_id
right-context id for last phone of words
#define dict2pid_rssid(d, ci, lc)
Access macros; not designed for arbitrary use.
void ngram_fwdflat_start(ngram_search_t *ngs)
Start fwdflat decoding for an utterance.
N-Gram search module structure.
int ngram_fwdtree_search(ngram_search_t *ngs, int frame_idx)
Search one frame forward in an utterance.
hmm_context_t * hmm_context_init(int32 n_emit_state, uint8 **const *tp, int16 const *senscore, uint16 *const *sseq)
Create an HMM context.
void ps_search_base_free(ps_search_t *search)
Free search.
void ps_lattice_delete_unreachable(ps_lattice_t *dag)
Remove nodes marked as unreachable.
ps_latnode_t * end
Ending node.
frame_idx_t sf
Start frame.
int32 real_wid
wid of this or latest predecessor real word
int32 prev_real_wid
wid of second-last real word
POCKETSPHINX_EXPORT ps_latlink_t * ps_lattice_bestpath(ps_lattice_t *dag, ngram_model_t *lmset, float32 lwf, float32 ascale)
Do N-Gram based best-path search on a word graph.
ps_lattice_t * ngram_search_lattice(ps_search_t *search)
Construct a word lattice from the current hypothesis.
latlink_list_t * exits
Links out of this node.
#define WORST_SCORE
Large "bad" score.
N-Gram based multi-pass search ("FBS")
tmat_t * tmat
Transition matrices.
int32 ascr
Acoustic score.
int acmod_advance(acmod_t *acmod)
Advance the frame index.
listelem_alloc_t * latnode_alloc
For latnode_t.
int dict_filler_word(dict_t *d, s3wid_t w)
Return 1 if w is a filler word, 0 if not.
void ngram_fwdtree_init(ngram_search_t *ngs)
Initialize N-Gram search for fwdtree decoding.
Segmentation "iterator" for backpointer table results.
ps_latnode_t ** frm_wordlist
List of active words in each frame.
int32 path_scr
Best path score from root of DAG.
Lexical tree node data type for the first phone (root) of each dynamic HMM tree structure.
Lexical tree node data type.
int32 wid
Dictionary word id.
int16 cur
Current position in bpidx.
#define SENSCR_SHIFT
Shift count for senone scores.
a structure for a dictionary.
char const * ps_lattice_hyp(ps_lattice_t *dag, ps_latlink_t *link)
Get hypothesis string after bestpath search.
POCKETSPHINX_EXPORT int dict_real_word(dict_t *d, s3wid_t w)
Test if w is a "real" word, i.e.
void ngram_search_free(ps_search_t *search)
Finalize the N-Gram search module.
Word graph structure used in bestpath/nbest search.
#define WORSE_THAN
Is one score worse than another?
int16 n_bpidx
Number of backpointer IDs.
int32 best_score
Best Viterbi path score.
Back pointer table (forward pass lattice; actually a tree)
cross word triphone model structure
int ngram_fwdtree_reinit(ngram_search_t *ngs)
Rebuild search structures for updated language models.
void ngram_search_free_all_rc(ngram_search_t *ngs, int32 w)
Allocate last phone channels for all possible right contexts for word w.
int32 post
Utterance posterior probability.
char * hyp_str
Current hypothesis string.
#define BETTER_THAN
Is one score better than another?
dict_t * dict
Pronunciation dictionary.
void ngram_fwdflat_deinit(ngram_search_t *ngs)
Release memory associated with fwdflat decoding.
int32 s_idx
Start of BScoreStack for various right contexts.
int32 fef
First end frame.
int32 n_frame
Number of frames actually present.
Flat lexicon based Viterbi search.
ngram_model_t * lmset
Set of language models.
uint8 valid
For absolute pruning.
int32 lback
Language model backoff.
listelem_alloc_t * root_chan_alloc
For root_chan_t.
int32 basewid
Dictionary base word id.
int32 ciphone
ciphone for this node
void ngram_fwdflat_init(ngram_search_t *ngs)
Initialize N-Gram search for fwdflat decoding.
ps_lattice_t * ps_lattice_init_search(ps_search_t *search, int n_frame)
Construct an empty word graph with reference to a search structure.
void hmm_context_free(hmm_context_t *ctx)
Free an HMM context.
int32 * bpidx
Sequence of backpointer IDs.
chan_t ** word_chan
Channels associated with a given word (only used for right contexts, single-phone words in fwdtree se...
bin_mdef_t * mdef
Model definition.
ps_latlink_t * last_link
Final link in best path.
struct ps_latnode_s * next
Next node in DAG (no ordering implied)
int32 score
Score (best among all right contexts)
V-table for search algorithm.
ps_lattice_t * dag
Current hypothesis word graph.
Base structure for hypothesis segmentation iterator.
#define dict_size(d)
Packaged macro access to dictionary members.
s3cipid_t * cimap
Index into ssid[] above for each ci phone.
POCKETSPHINX_EXPORT int ps_lattice_free(ps_lattice_t *dag)
Free a lattice.
ps_seg_t base
Base structure.
float32 ascale
Acoustic score scale for posterior probabilities.
ps_search_t * ngram_search_init(const char *name, ngram_model_t *lm, cmd_ln_t *config, acmod_t *acmod, dict_t *dict, dict2pid_t *d2p)
Initialize the N-Gram search module.
Acoustic model structure.
float32 lwf
Language weight factor (for second-pass searches)
Building composite triphone (as well as word internal triphones) with the dictionary.
int acmod_set_grow(acmod_t *acmod, int grow_feat)
Set memory allocation policy for utterance processing.
POCKETSPHINX_EXPORT void ps_lattice_link(ps_lattice_t *dag, ps_latnode_t *from, ps_latnode_t *to, int32 score, int32 ef)
Create a directed link between "from" and "to" nodes, but if a link already exists, choose one with the best link_scr.
s3ssid_t * ssid
Senone Sequence ID list for all context ciphones.
frame_idx_t sf
Start frame.
int16 last_phone
last phone of this word
char const * ngram_search_bp_hyp(ngram_search_t *ngs, int bpidx)
Backtrace from a given backpointer index to obtain a word hypothesis.