47 #include <sphinxbase/ckd_alloc.h>
48 #include <sphinxbase/listelem_alloc.h>
49 #include <sphinxbase/err.h>
56 #define __CHAN_DUMP__ 0
58 #define chan_v_eval(chan) hmm_dump_vit_eval(&(chan)->hmm, stderr)
60 #define chan_v_eval(chan) hmm_vit_eval(&(chan)->hmm)
70 int32 w, ndiph, i, n_words, n_ci;
71 dict_t *dict = ps_search_dict(ngs);
74 E_INFO(
"Initializing search tree\n");
76 n_words = ps_search_n_words(ngs);
82 n_ci = bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef);
84 dimap = bitvec_alloc(n_ci * n_ci);
85 for (w = 0; w < n_words; w++) {
88 if (dict_is_single_phone(dict, w))
92 ph0 = dict_first_phone(dict, w);
93 ph1 = dict_second_phone(dict, w);
95 if (bitvec_is_clear(dimap, ph0 * n_ci + ph1)) {
96 bitvec_set(dimap, ph0 * n_ci + ph1);
101 E_INFO(
"%d unique initial diphones\n", ndiph);
109 for (w = 0; w < n_words; ++w) {
112 if (!dict_is_single_phone(dict, w)) {
113 E_WARN(
"Filler word %d = %s has more than one phone, ignoring it.\n",
114 w, dict_wordstr(dict, w));
132 for (w = 0; w < n_words; w++) {
133 if (!dict_is_single_phone(dict, w))
139 bin_mdef_pid2ssid(ps_search_acmod(ngs)->mdef, ngs->
rhmm_1ph[i].
ciphone),
140 bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, ngs->
rhmm_1ph[i].
ciphone));
179 int32 w, i, j, p, ph, tmatid;
181 dict_t *dict = ps_search_dict(ngs);
184 n_words = ps_search_n_words(ngs);
186 E_INFO(
"Creating search channels\n");
188 for (w = 0; w < n_words; w++)
195 for (w = 0; w < n_words; w++) {
196 int ciphone, ci2phone;
199 if (!ngram_model_set_known_wid(ngs->
lmset, dict_basewid(dict, w)))
203 if (dict_is_single_phone(dict, w)) {
204 E_DEBUG(1,(
"single_phone_wid[%d] = %s\n",
212 ciphone = dict_first_phone(dict, w);
213 ci2phone = dict_second_phone(dict, w);
221 rhmm->
hmm.tmatid = bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, ciphone);
223 hmm_mpx_ssid(&rhmm->
hmm, 0) =
224 bin_mdef_pid2ssid(ps_search_acmod(ngs)->mdef, ciphone);
232 E_DEBUG(3,(
"word %s rhmm %d\n", dict_wordstr(dict, w), rhmm - ngs->
root_chan));
234 if (dict_pronlen(dict, w) == 2) {
236 if ((j = rhmm->penult_phn_wid) < 0)
237 rhmm->penult_phn_wid = w;
246 tmatid = bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef,
dict_pron(dict, w, 1));
250 init_nonroot_chan(ngs, hmm, ph,
dict_pron(dict, w, 1), tmatid);
256 for (; hmm && (hmm_nonmpx_ssid(&hmm->
hmm) != ph); hmm = hmm->
alt)
260 init_nonroot_chan(ngs, hmm, ph,
dict_pron(dict, w, 1), tmatid);
264 E_DEBUG(3,(
"phone %s = %d\n",
266 dict_second_phone(dict, w)), ph));
267 for (p = 2; p < dict_pronlen(dict, w) - 1; p++) {
269 tmatid = bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef,
dict_pron(dict, w, p));
273 init_nonroot_chan(ngs, hmm, ph,
dict_pron(dict, w, p), tmatid);
279 for (hmm = hmm->next; hmm && (hmm_nonmpx_ssid(&hmm->hmm) != ph);
284 init_nonroot_chan(ngs, hmm, ph,
dict_pron(dict, w, p), tmatid);
288 E_DEBUG(3,(
"phone %s = %d\n",
294 if ((j = hmm->info.penult_phn_wid) < 0)
295 hmm->info.penult_phn_wid = w;
306 for (w = 0; w < n_words; ++w) {
308 if (!dict_is_single_phone(dict, w))
313 if (ngram_model_set_known_wid(ngs->
lmset, dict_basewid(dict, w)))
315 E_DEBUG(1,(
"single_phone_wid[%d] = %s\n",
332 E_INFO(
"Created %d root, %d non-root channels, %d single-phone words\n",
336 E_ERROR(
"No word from the language model has pronunciation in the dictionary\n");
345 for (child = hmm->
next; child; child = sibling) {
346 sibling = child->
alt;
347 reinit_search_subtree(ngs, child);
370 reinit_search_subtree(ngs, hmm);
384 ngs->bestbp_rc = ckd_calloc(bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef),
385 sizeof(*ngs->bestbp_rc));
386 ngs->lastphn_cand = ckd_calloc(ps_search_n_words(ngs),
387 sizeof(*ngs->lastphn_cand));
388 init_search_tree(ngs);
389 create_search_channels(ngs);
397 n_words = ps_search_n_words(ngs);
402 for (i = w = 0; w < n_words; ++w) {
403 if (!dict_is_single_phone(ps_search_dict(ngs), w))
424 double n_speech = (double)ngs->n_tot_frame
425 / cmd_ln_int32_r(ps_search_config(ngs),
"-frate");
427 E_INFO(
"TOTAL fwdtree %.2f CPU %.3f xRT\n",
428 ngs->fwdtree_perf.t_tot_cpu,
429 ngs->fwdtree_perf.t_tot_cpu / n_speech);
430 E_INFO(
"TOTAL fwdtree %.2f wall %.3f xRT\n",
431 ngs->fwdtree_perf.t_tot_elapsed,
432 ngs->fwdtree_perf.t_tot_elapsed / n_speech);
435 reinit_search_tree(ngs);
437 deinit_search_tree(ngs);
442 ckd_free(ngs->cand_sf);
444 ckd_free(ngs->bestbp_rc);
445 ngs->bestbp_rc = NULL;
446 ckd_free(ngs->lastphn_cand);
447 ngs->lastphn_cand = NULL;
454 reinit_search_tree(ngs);
456 deinit_search_tree(ngs);
458 ckd_free(ngs->lastphn_cand);
459 ngs->lastphn_cand = ckd_calloc(ps_search_n_words(ngs),
460 sizeof(*ngs->lastphn_cand));
462 ngs->
word_chan = ckd_calloc(ps_search_n_words(ngs),
465 init_search_tree(ngs);
466 create_search_channels(ngs);
477 n_words = ps_search_n_words(ngs);
480 memset(&ngs->
st, 0,
sizeof(ngs->
st));
481 ptmr_reset(&ngs->fwdtree_perf);
482 ptmr_start(&ngs->fwdtree_perf);
489 for (i = 0; i < n_words; ++i)
490 ngs->word_lat_idx[i] = NO_BP;
498 ngs->renormalized = 0;
501 for (i = 0; i < n_words; i++)
502 ngs->last_ltrans[i].sf = -1;
538 if (hmm_frame(&rhmm->
hmm) == frame_idx)
545 for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
552 for (w = *(awl++); i > 0; --i, w = *(awl++)) {
561 if (hmm_frame(&rhmm->
hmm) == frame_idx)
567 renormalize_scores(
ngram_search_t *ngs,
int frame_idx, int32 norm)
575 if (hmm_frame(&rhmm->
hmm) == frame_idx) {
583 for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
590 for (w = *(awl++); i > 0; --i, w = *(awl++)) {
598 if (hmm_frame(&rhmm->
hmm) == frame_idx) {
603 ngs->renormalized = TRUE;
614 if (hmm_frame(&rhmm->
hmm) == frame_idx) {
615 int32 score = chan_v_eval(rhmm);
618 ++ngs->
st.n_root_chan_eval;
633 ngs->
st.n_nonroot_chan_eval += i;
635 for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
636 int32 score = chan_v_eval(hmm);
637 assert(hmm_frame(&hmm->hmm) == frame_idx);
650 int32 i, w, bestscore, *awl, j, k;
657 for (w = *(awl++); i > 0; --i, w = *(awl++)) {
665 assert(hmm_frame(&hmm->
hmm) == frame_idx);
666 score = chan_v_eval(hmm);
683 if (hmm_frame(&rhmm->
hmm) < frame_idx)
686 score = chan_v_eval(rhmm);
688 if (score
BETTER_THAN bestscore && w != ps_search_finish_wid(ngs))
694 ngs->
st.n_last_chan_eval += k + j;
695 ngs->
st.n_nonroot_chan_eval += k + j;
696 ngs->
st.n_word_lastchan_eval +=
703 evaluate_channels(
ngram_search_t *ngs, int16
const *senone_scores,
int frame_idx)
708 ngs->
best_score = eval_root_chan(ngs, frame_idx);
711 if ((bs = eval_word_chan(ngs, frame_idx)) BETTER_THAN ngs->
best_score)
729 int32 thresh, newphone_thresh, lastphn_thresh, newphone_score;
736 newphone_thresh = ngs->
best_score + ngs->pbeam;
737 lastphn_thresh = ngs->
best_score + ngs->lpbeam;
742 E_DEBUG(3,(
"Root channel %d frame %d score %d thresh %d\n",
743 i, hmm_frame(&rhmm->
hmm), hmm_bestscore(&rhmm->
hmm), thresh));
745 if (hmm_frame(&rhmm->
hmm) < frame_idx)
748 if (hmm_bestscore(&rhmm->
hmm) BETTER_THAN thresh) {
749 hmm_frame(&rhmm->
hmm) = nf;
750 E_DEBUG(3,(
"Preserving root channel %d score %d\n", i, hmm_bestscore(&rhmm->
hmm)));
753 newphone_score = hmm_out_score(&rhmm->
hmm) + ngs->pip;
754 if (pls != NULL || newphone_score BETTER_THAN newphone_thresh) {
755 for (hmm = rhmm->
next; hmm; hmm = hmm->
alt) {
756 int32 pl_newphone_score = newphone_score
758 if (pl_newphone_score BETTER_THAN newphone_thresh) {
759 if ((hmm_frame(&hmm->
hmm) < frame_idx)
760 || (newphone_score BETTER_THAN hmm_in_score(&hmm->
hmm))) {
762 hmm_out_history(&rhmm->
hmm), nf);
774 if (pls != NULL || newphone_score BETTER_THAN lastphn_thresh) {
775 for (w = rhmm->penult_phn_wid; w >= 0;
777 int32 pl_newphone_score = newphone_score
779 (pls, dict_last_phone(ps_search_dict(ngs),w));
780 E_DEBUG(3,(
"word %s newphone_score %d\n", dict_wordstr(ps_search_dict(ngs), w), newphone_score));
781 if (pl_newphone_score BETTER_THAN lastphn_thresh) {
782 candp = ngs->lastphn_cand + ngs->n_lastphn_cand;
783 ngs->n_lastphn_cand++;
786 newphone_score - ngs->nwpen;
787 candp->bp = hmm_out_history(&rhmm->
hmm);
805 int32 thresh, newphone_thresh, lastphn_thresh, newphone_score;
813 newphone_thresh = ngs->
best_score + ngs->pbeam;
814 lastphn_thresh = ngs->
best_score + ngs->lpbeam;
820 for (i = ngs->
n_active_chan[frame_idx & 0x1], hmm = *(acl++); i > 0;
821 --i, hmm = *(acl++)) {
822 assert(hmm_frame(&hmm->hmm) >= frame_idx);
824 if (hmm_bestscore(&hmm->hmm) BETTER_THAN thresh) {
826 if (hmm_frame(&hmm->hmm) != nf) {
827 hmm_frame(&hmm->hmm) = nf;
832 newphone_score = hmm_out_score(&hmm->hmm) + ngs->pip;
833 if (pls != NULL || newphone_score BETTER_THAN newphone_thresh) {
834 for (nexthmm = hmm->
next; nexthmm; nexthmm = nexthmm->
alt) {
835 int32 pl_newphone_score = newphone_score
837 if ((pl_newphone_score BETTER_THAN newphone_thresh)
838 && ((hmm_frame(&nexthmm->
hmm) < frame_idx)
840 BETTER_THAN hmm_in_score(&nexthmm->
hmm)))) {
841 if (hmm_frame(&nexthmm->
hmm) != nf) {
846 hmm_out_history(&hmm->hmm), nf);
856 if (pls != NULL || newphone_score BETTER_THAN lastphn_thresh) {
857 for (w = hmm->info.penult_phn_wid; w >= 0;
859 int32 pl_newphone_score = newphone_score
861 (pls, dict_last_phone(ps_search_dict(ngs),w));
862 if (pl_newphone_score BETTER_THAN lastphn_thresh) {
863 candp = ngs->lastphn_cand + ngs->n_lastphn_cand;
864 ngs->n_lastphn_cand++;
867 newphone_score - ngs->nwpen;
868 candp->bp = hmm_out_history(&hmm->hmm);
873 else if (hmm_frame(&hmm->hmm) != nf) {
888 int32 i, j, k, nf, bp, bpend, w;
892 int32 bestscore, dscr;
899 ngs->
st.n_lastphn_cand_utt += ngs->n_lastphn_cand;
903 for (i = 0, candp = ngs->lastphn_cand; i < ngs->n_lastphn_cand; i++, candp++) {
910 bpe = &(ngs->bp_table[candp->bp]);
914 (ngs, bpe, dict_first_phone(ps_search_dict(ngs), candp->wid));
916 candp->score -= start_score;
924 if (ngs->last_ltrans[candp->wid].sf != bpe->
frame + 1) {
927 for (j = 0; j < n_cand_sf; j++) {
928 if (ngs->cand_sf[j].bp_ef == bpe->
frame)
933 candp->next = ngs->cand_sf[j].cand;
936 if (n_cand_sf >= ngs->cand_sf_alloc) {
937 if (ngs->cand_sf_alloc == 0) {
939 ckd_calloc(CAND_SF_ALLOCSIZE,
940 sizeof(*ngs->cand_sf));
941 ngs->cand_sf_alloc = CAND_SF_ALLOCSIZE;
944 ngs->cand_sf_alloc += CAND_SF_ALLOCSIZE;
945 ngs->cand_sf = ckd_realloc(ngs->cand_sf,
947 *
sizeof(*ngs->cand_sf));
948 E_INFO(
"cand_sf[] increased to %d entries\n",
956 ngs->cand_sf[j].bp_ef = bpe->
frame;
959 ngs->cand_sf[j].cand = i;
962 ngs->last_ltrans[candp->wid].sf = bpe->
frame + 1;
967 for (i = 0; i < n_cand_sf; i++) {
969 bp = ngs->bp_table_idx[ngs->cand_sf[i].bp_ef];
970 bpend = ngs->bp_table_idx[ngs->cand_sf[i].bp_ef + 1];
971 for (bpe = &(ngs->bp_table[bp]); bp < bpend; bp++, bpe++) {
975 for (j = ngs->cand_sf[i].cand; j >= 0; j = candp->next) {
977 candp = &(ngs->lastphn_cand[j]);
980 (ngs, bpe, dict_first_phone(ps_search_dict(ngs), candp->wid));
983 dscr += ngram_tg_score(ngs->
lmset,
984 dict_basewid(ps_search_dict(ngs), candp->wid),
990 if (dscr BETTER_THAN ngs->last_ltrans[candp->wid].dscr) {
991 ngs->last_ltrans[candp->wid].dscr = dscr;
992 ngs->last_ltrans[candp->wid].bp = bp;
1000 for (i = 0, candp = ngs->lastphn_cand; i < ngs->n_lastphn_cand; i++, candp++) {
1001 candp->score += ngs->last_ltrans[candp->wid].dscr;
1002 candp->bp = ngs->last_ltrans[candp->wid].bp;
1004 if (candp->score BETTER_THAN bestscore)
1005 bestscore = candp->score;
1010 thresh = bestscore + ngs->lponlybeam;
1011 for (i = ngs->n_lastphn_cand, candp = ngs->lastphn_cand; i > 0; --i, candp++) {
1012 if (candp->score BETTER_THAN thresh) {
1019 if ((hmm_frame(&hmm->
hmm) < frame_idx)
1020 || (candp->score BETTER_THAN hmm_in_score(&hmm->
hmm))) {
1021 assert(hmm_frame(&hmm->
hmm) != nf);
1023 candp->score, candp->bp, nf);
1029 assert(!dict_is_single_phone(ps_search_dict(ngs), w));
1049 int32 newword_thresh, lastphn_thresh;
1060 for (i = ngs->
n_active_word[frame_idx & 0x1], w = *(awl++); i > 0;
1061 --i, w = *(awl++)) {
1064 for (hmm = ngs->
word_chan[w]; hmm; hmm = thmm) {
1065 assert(hmm_frame(&hmm->
hmm) >= frame_idx);
1068 if (hmm_bestscore(&hmm->
hmm) BETTER_THAN lastphn_thresh) {
1070 hmm_frame(&hmm->
hmm) = nf;
1072 phmmp = &(hmm->
next);
1075 if (hmm_out_score(&hmm->
hmm) BETTER_THAN newword_thresh) {
1078 hmm_out_score(&hmm->
hmm),
1079 hmm_out_history(&hmm->
hmm),
1083 else if (hmm_frame(&hmm->
hmm) == nf) {
1084 phmmp = &(hmm->
next);
1092 if ((k > 0) && (bitvec_is_clear(ngs->
word_active, w))) {
1093 assert(!dict_is_single_phone(ps_search_dict(ngs), w));
1107 E_DEBUG(3,(
"Single phone word %s frame %d score %d thresh %d outscore %d nwthresh %d\n",
1108 dict_wordstr(ps_search_dict(ngs),w),
1109 hmm_frame(&rhmm->
hmm), hmm_bestscore(&rhmm->
hmm),
1110 lastphn_thresh, hmm_out_score(&rhmm->
hmm), newword_thresh));
1111 if (hmm_frame(&rhmm->
hmm) < frame_idx)
1113 if (hmm_bestscore(&rhmm->
hmm) BETTER_THAN lastphn_thresh) {
1114 hmm_frame(&rhmm->
hmm) = nf;
1117 if (hmm_out_score(&rhmm->
hmm) BETTER_THAN newword_thresh) {
1118 E_DEBUG(4,(
"Exiting single phone word %s with %d > %d, %d\n",
1119 dict_wordstr(ps_search_dict(ngs),w),
1120 hmm_out_score(&rhmm->
hmm),
1121 lastphn_thresh, newword_thresh));
1123 hmm_out_score(&rhmm->
hmm),
1124 hmm_out_history(&rhmm->
hmm), 0);
1134 ngs->n_lastphn_cand = 0;
1136 ngs->dynamic_beam = ngs->beam;
1137 if (ngs->maxhmmpf != -1
1138 && ngs->
st.n_root_chan_eval + ngs->
st.n_nonroot_chan_eval > ngs->maxhmmpf) {
1140 int32 bins[256], bw, nhmms, i;
1145 bw = -ngs->beam / 256;
1146 memset(bins, 0,
sizeof(bins));
1159 for (i = ngs->
n_active_chan[frame_idx & 0x1], hmm = *(acl++);
1160 i > 0; --i, hmm = *(acl++)) {
1164 b = (ngs->
best_score - hmm_bestscore(&hmm->hmm)) / bw;
1170 for (i = nhmms = 0; i < 256; ++i) {
1172 if (nhmms > ngs->maxhmmpf)
1175 ngs->dynamic_beam = -(i * bw);
1178 prune_root_chan(ngs, frame_idx);
1179 prune_nonroot_chan(ngs, frame_idx);
1180 last_phone_transition(ngs, frame_idx);
1181 prune_word_chan(ngs, frame_idx);
1192 int32 bestscr, worstscr;
1193 bptbl_t *bpe, *bestbpe, *worstbpe;
1196 if (ngs->maxwpf == -1 || ngs->maxwpf == ps_search_n_words(ngs))
1200 bestscr = (int32) 0x80000000;
1203 for (bp = ngs->bp_table_idx[frame_idx]; bp < ngs->bpidx; bp++) {
1204 bpe = &(ngs->bp_table[bp]);
1206 if (bpe->
score BETTER_THAN bestscr) {
1207 bestscr = bpe->
score;
1215 if (bestbpe != NULL) {
1216 bestbpe->
valid = TRUE;
1222 - ngs->bp_table_idx[frame_idx]) - n;
1223 for (; n > ngs->maxwpf; --n) {
1225 worstscr = (int32) 0x7fffffff;
1227 for (bp = ngs->bp_table_idx[frame_idx]; (bp < ngs->bpidx); bp++) {
1228 bpe = &(ngs->bp_table[bp]);
1230 worstscr = bpe->
score;
1235 if (worstbpe == NULL)
1236 E_FATAL(
"PANIC: No worst BPtable entry remaining\n");
1237 worstbpe->
valid = FALSE;
1244 int32 i, k, bp, w, nf;
1246 int32 thresh, newscore, pl_newscore;
1251 dict_t *dict = ps_search_dict(ngs);
1259 for (i = bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef) - 1; i >= 0; --i)
1260 ngs->bestbp_rc[i].score = WORST_SCORE;
1265 for (bp = ngs->bp_table_idx[frame_idx]; bp < ngs->bpidx; bp++) {
1266 bpe = &(ngs->bp_table[bp]);
1267 ngs->word_lat_idx[bpe->
wid] = NO_BP;
1269 if (bpe->
wid == ps_search_finish_wid(ngs))
1279 for (rc = 0; rc < bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef); ++rc) {
1280 if (bpe->
score BETTER_THAN ngs->bestbp_rc[rc].score) {
1281 E_DEBUG(4,(
"bestbp_rc[0] = %d lc %d\n",
1283 ngs->bestbp_rc[rc].score = bpe->
score;
1284 ngs->bestbp_rc[rc].path = bp;
1291 int32 *rcss = &(ngs->bscore_stack[bpe->
s_idx]);
1292 for (rc = 0; rc < bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef); ++rc) {
1293 if (rcss[rssid->
cimap[rc]] BETTER_THAN ngs->bestbp_rc[rc].score) {
1294 E_DEBUG(4,(
"bestbp_rc[%d] = %d lc %d\n",
1296 ngs->bestbp_rc[rc].score = rcss[rssid->
cimap[rc]];
1297 ngs->bestbp_rc[rc].path = bp;
1307 thresh = ngs->
best_score + ngs->dynamic_beam;
1313 bestbp_rc_ptr = &(ngs->bestbp_rc[rhmm->
ciphone]);
1315 newscore = bestbp_rc_ptr->score + ngs->nwpen + ngs->pip;
1316 pl_newscore = newscore
1318 if (pl_newscore BETTER_THAN thresh) {
1319 if ((hmm_frame(&rhmm->
hmm) < frame_idx)
1320 || (newscore BETTER_THAN hmm_in_score(&rhmm->
hmm))) {
1322 bestbp_rc_ptr->path, nf);
1325 hmm_mpx_ssid(&rhmm->
hmm, 0) =
1326 dict2pid_ldiph_lc(d2p, rhmm->
ciphone, rhmm->
ci2phone, bestbp_rc_ptr->lc);
1338 ngs->last_ltrans[w].dscr = (int32) 0x80000000;
1340 for (bp = ngs->bp_table_idx[frame_idx]; bp < ngs->bpidx; bp++) {
1341 bpe = &(ngs->bp_table[bp]);
1349 (ngs, bpe, dict_first_phone(dict, w));
1350 E_DEBUG(4, (
"initial newscore for %s: %d\n",
1351 dict_wordstr(dict, w), newscore));
1352 if (newscore != WORST_SCORE)
1353 newscore += ngram_tg_score(ngs->
lmset,
1354 dict_basewid(dict, w),
1361 if (newscore BETTER_THAN ngs->last_ltrans[w].dscr) {
1362 ngs->last_ltrans[w].dscr = newscore;
1363 ngs->last_ltrans[w].bp = bp;
1373 if (w == dict_startwid(ps_search_dict(ngs)))
1376 newscore = ngs->last_ltrans[w].dscr + ngs->pip;
1378 if (pl_newscore BETTER_THAN thresh) {
1379 bpe = ngs->bp_table + ngs->last_ltrans[w].bp;
1380 if ((hmm_frame(&rhmm->
hmm) < frame_idx)
1381 || (newscore BETTER_THAN hmm_in_score(&rhmm->
hmm))) {
1383 newscore, ngs->last_ltrans[w].bp, nf);
1386 hmm_mpx_ssid(&rhmm->
hmm, 0) =
1388 dict_last_phone(dict, bpe->
wid));
1395 w = ps_search_silence_wid(ngs);
1397 bestbp_rc_ptr = &(ngs->bestbp_rc[ps_search_acmod(ngs)->mdef->sil]);
1398 newscore = bestbp_rc_ptr->score + ngs->silpen + ngs->pip;
1399 pl_newscore = newscore
1401 if (pl_newscore BETTER_THAN thresh) {
1402 if ((hmm_frame(&rhmm->
hmm) < frame_idx)
1403 || (newscore BETTER_THAN hmm_in_score(&rhmm->
hmm))) {
1405 newscore, bestbp_rc_ptr->path, nf);
1408 for (w = dict_filler_start(dict); w <= dict_filler_end(dict); w++) {
1409 if (w == ps_search_silence_wid(ngs))
1413 if (w == dict_startwid(ps_search_dict(ngs)))
1419 newscore = bestbp_rc_ptr->score + ngs->fillpen + ngs->pip;
1420 pl_newscore = newscore
1422 if (pl_newscore BETTER_THAN thresh) {
1423 if ((hmm_frame(&rhmm->
hmm) < frame_idx)
1424 || (newscore BETTER_THAN hmm_in_score(&rhmm->
hmm))) {
1426 newscore, bestbp_rc_ptr->path, nf);
1440 if (hmm_frame(&rhmm->
hmm) == frame_idx) {
1448 if (hmm_frame(&rhmm->
hmm) == frame_idx) {
1457 int16
const *senscr;
1460 if (!ps_search_acmod(ngs)->compallsen)
1461 compute_sen_active(ngs, frame_idx);
1464 if ((senscr =
acmod_score(ps_search_acmod(ngs), &frame_idx)) == NULL)
1466 ngs->
st.n_senone_active_utt += ps_search_acmod(ngs)->n_senone_active;
1477 E_INFO(
"Renormalizing Scores at frame %d, best score %d\n",
1479 renormalize_scores(ngs, frame_idx, ngs->
best_score);
1483 evaluate_channels(ngs, senscr, frame_idx);
1485 prune_channels(ngs, frame_idx);
1487 bptable_maxwpf(ngs, frame_idx);
1489 word_transition(ngs, frame_idx);
1491 deactivate_channels(ngs, frame_idx);
1501 int32 i, w, cf, *awl;
1506 cf = ps_search_acmod(ngs)->output_frame;
1519 for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
1526 for (w = *(awl++); i > 0; --i, w = *(awl++)) {
1528 if (dict_is_single_phone(ps_search_dict(ngs), w))
1544 ptmr_stop(&ngs->fwdtree_perf);
1547 double n_speech = (double)(cf + 1)
1548 / cmd_ln_int32_r(ps_search_config(ngs),
"-frate");
1549 E_INFO(
"%8d words recognized (%d/fr)\n",
1550 ngs->bpidx, (ngs->bpidx + (cf >> 1)) / (cf + 1));
1551 E_INFO(
"%8d senones evaluated (%d/fr)\n", ngs->
st.n_senone_active_utt,
1552 (ngs->
st.n_senone_active_utt + (cf >> 1)) / (cf + 1));
1553 E_INFO(
"%8d channels searched (%d/fr), %d 1st, %d last\n",
1554 ngs->
st.n_root_chan_eval + ngs->
st.n_nonroot_chan_eval,
1555 (ngs->
st.n_root_chan_eval + ngs->
st.n_nonroot_chan_eval) / (cf + 1),
1556 ngs->
st.n_root_chan_eval, ngs->
st.n_last_chan_eval);
1557 E_INFO(
"%8d words for which last channels evaluated (%d/fr)\n",
1558 ngs->
st.n_word_lastchan_eval,
1559 ngs->
st.n_word_lastchan_eval / (cf + 1));
1560 E_INFO(
"%8d candidate words for entering last phone (%d/fr)\n",
1561 ngs->
st.n_lastphn_cand_utt, ngs->
st.n_lastphn_cand_utt / (cf + 1));
1562 E_INFO(
"fwdtree %.2f CPU %.3f xRT\n",
1563 ngs->fwdtree_perf.t_cpu,
1564 ngs->fwdtree_perf.t_cpu / n_speech);
1565 E_INFO(
"fwdtree %.2f wall %.3f xRT\n",
1566 ngs->fwdtree_perf.t_elapsed,
1567 ngs->fwdtree_perf.t_elapsed / n_speech);
hmm_t hmm
Basic HMM structure.
void ngram_fwdtree_finish(ngram_search_t *ngs)
Finish fwdtree decoding for an utterance.
void ngram_fwdtree_deinit(ngram_search_t *ngs)
Release memory associated with fwdtree decoding.
Base structure for search module.
int32 n_nonroot_chan
Number of valid non-root channels.
void ngram_search_alloc_all_rc(ngram_search_t *ngs, int32 w)
Allocate last phone channels for all possible right contexts for word w.
void hmm_init(hmm_context_t *ctx, hmm_t *hmm, int mpx, int ssid, int tmatid)
Populate a previously-allocated HMM structure, allocating internal data.
chan_t * next
first descendant of this channel
listelem_alloc_t * chan_alloc
For chan_t.
void ngram_fwdtree_start(ngram_search_t *ngs)
Start fwdtree decoding for an utterance.
const char * bin_mdef_ciphone_str(bin_mdef_t *m, int32 ci)
In: ciphone id for which name wanted.
frame_idx_t frame
start or end frame
hmm_context_t * hmmctx
HMM context.
int32 n_active_chan[2]
Number entries in active_chan_list.
void hmm_deinit(hmm_t *hmm)
Free an HMM structure, releasing internal data (but not the HMM structure itself).
int16 last2_phone
next-to-last phone of this word
void acmod_activate_hmm(acmod_t *acmod, hmm_t *hmm)
Activate senones associated with an HMM.
#define BAD_SSID
Invalid senone sequence ID (limited to 16 bits for PocketSphinx).
bitvec_t * word_active
array of active flags for all words.
int32 ngram_search_exit_score(ngram_search_t *ngs, bptbl_t *pbe, int rcphone)
Get the exit score for a backpointer entry with a given right context.
int16 ciphone
first ciphone of this node; all words rooted at this node begin with this ciphone ...
int32 ** active_word_list
Array of active multi-phone words for current and next frame.
struct chan_s * next
first descendant of this channel; or, in the case of the last phone of a word, the next alternative r...
void ngram_search_save_bp(ngram_search_t *ngs, int frame_idx, int32 w, int32 score, int32 path, int32 rc)
Enter a word in the backpointer table.
Lexicon tree based Viterbi search.
int32 * single_phone_wid
list of single-phone word ids
int ngram_search_mark_bptable(ngram_search_t *ngs, int frame_idx)
Record the current frame's index in the backpointer table.
int32 n_root_chan_alloc
Number of root_chan allocated.
int16 ci2phone
second ciphone of this node; one root HMM for each unique right context
int32 penult_phn_wid
list of words whose last phone follows this one; this field indicates the first of the list; the rest...
int32 n_active_word[2]
Number entries in active_word_list.
int32 rc_id
right-context id for last phone of words
#define dict2pid_rssid(d, ci, lc)
Access macros; not designed for arbitrary use.
N-Gram search module structure.
int ngram_fwdtree_search(ngram_search_t *ngs, int frame_idx)
Search one frame forward in an utterance.
void hmm_normalize(hmm_t *h, int32 bestscr)
Renormalize the scores in this HMM based on the given best score.
int32 max_nonroot_chan
Maximum possible number of non-root channels.
int32 last_phone_best_score
Best Viterbi path score for last phone.
int32 real_wid
wid of this or latest predecessor real word
root_chan_t * rhmm_1ph
Root HMMs for single-phone words.
int32 prev_real_wid
wid of second-last real word
#define WORST_SCORE
Large "bad" score.
int dict_filler_word(dict_t *d, s3wid_t w)
Return 1 if w is a filler word, 0 if not.
void ngram_fwdtree_init(ngram_search_t *ngs)
Initialize N-Gram search for fwdtree decoding.
Lexical tree node data type for the first phone (root) of each dynamic HMM tree structure.
void hmm_enter(hmm_t *h, int32 score, int32 histid, int frame)
Enter an HMM with the given path score and history ID.
Lexical tree node data type.
hmm_t hmm
Basic HMM structure.
void acmod_clear_active(acmod_t *acmod)
Clear set of active senones.
#define hmm_context_set_senscore(ctx, senscr)
Change the senone score array for a context.
#define SENSCR_SHIFT
Shift count for senone scores.
chan_t *** active_chan_list
Array of active channels for current and next frame.
a structure for a dictionary.
POCKETSPHINX_EXPORT int dict_real_word(dict_t *d, s3wid_t w)
Test if w is a "real" word, i.e.
struct chan_s * alt
sibling; i.e., next descendant of parent HMM
#define WORSE_THAN
Is one score worse than another?
s3ssid_t dict2pid_internal(dict2pid_t *d2p, int32 wid, int pos)
Return the senone sequence ID for the given word position.
void hmm_clear(hmm_t *h)
Reset the states of the HMM to the invalid condition.
int32 best_score
Best Viterbi path score.
Back pointer table (forward pass lattice; actually a tree)
int32 n_1ph_LMwords
Number single phone dict words also in LM; these come first in single_phone_wid.
cross word triphone model structure
int ngram_fwdtree_reinit(ngram_search_t *ngs)
Rebuild search structures for updated language models.
Fast and rough context-independent phoneme loop search.
void ngram_search_free_all_rc(ngram_search_t *ngs, int32 w)
Allocate last phone channels for all possible right contexts for word w.
root_chan_t * root_chan
Search structure of HMM instances.
char * hyp_str
Current hypothesis string.
#define BETTER_THAN
Is one score better than another?
int32 s_idx
Start of BScoreStack for various right contexts.
int32 n_frame
Number of frames actually present.
ngram_model_t * lmset
Set of language models.
uint8 valid
For absolute pruning.
int32 n_1ph_words
Number single phone words in dict (total)
int32 ciphone
ciphone for this node
ngram_search_stats_t st
Various statistics for profiling.
chan_t ** word_chan
Channels associated with a given word (only used for right contexts, single-phone words in fwdtree se...
int32 score
Score (best among all right contexts)
int32 n_root_chan
Number of valid root_chan.
s3cipid_t * cimap
Index into ssid[] above for each ci phone.
int32 * homophone_set
Each node in the HMM tree structure may point to a set of words whose last phone would follow that no...
#define dict_pron(d, w, p)
The CI phones of the word w at position p.
Building composite triphone (as well as word internal triphones) with the dictionary.
#define phone_loop_search_score(pls, ci)
Return lookahead heuristic score for a specific phone.
Phone loop search structure.
int16 const * acmod_score(acmod_t *acmod, int *inout_frame_idx)
Score one frame of data.
int16 last_phone
last phone of this word