42 #include <sphinxbase/err.h>
43 #include <sphinxbase/ckd_alloc.h>
44 #include <sphinxbase/strfuncs.h>
45 #include <sphinxbase/pio.h>
46 #include <sphinxbase/cmd_ln.h>
49 #include "allphone_search.h"
67 allphone_search_seg_free(
ps_seg_t * seg)
77 seg->
ascr = phseg->score;
78 seg->
lscr = phseg->tscore;
83 allphone_search_seg_next(
ps_seg_t * seg)
88 itor->seg = itor->seg->next;
90 if (itor->seg == NULL) {
91 allphone_search_seg_free(seg);
94 phseg = gnode_ptr(itor->seg);
95 allphone_search_fill_iter(seg, phseg);
101 allphone_search_seg_next,
102 allphone_search_seg_free
112 allphone_backtrace(allphs, allphs->
frame - 1, NULL);
113 if (allphs->segments == NULL)
118 iter->base.
vt = &fsg_segfuncs;
119 iter->base.
search = search;
120 iter->seg = allphs->segments;
121 allphone_search_fill_iter((
ps_seg_t *)iter, gnode_ptr(iter->seg));
127 allphone_search_start,
128 allphone_search_step,
129 allphone_search_finish,
130 allphone_search_reinit,
131 allphone_search_free,
132 allphone_search_lattice,
134 allphone_search_prob,
135 allphone_search_seg_iter,
152 for (p = ci_phmm[bin_mdef_pid2ci(mdef, pid)]; p; p = p->
next) {
153 if (mdef_pid2tmatid(mdef, p->
pid) == mdef_pid2tmatid(mdef, pid))
154 if (mdef_pid2ssid(mdef, p->
pid) == mdef_pid2ssid(mdef, pid))
175 rclist = (int32 *) ckd_calloc(mdef->
n_ciphone + 1,
sizeof(int32));
179 for (ci = 0; ci < mdef->
n_ciphone; ci++) {
180 for (p = ci_phmm[ci]; p; p = p->
next) {
183 for (rc = 0; rc < mdef->
n_ciphone; rc++) {
184 if (bitvec_is_set(p->
rc, rc))
190 for (i = 0; IS_S3CIPID(rclist[i]); i++) {
191 for (p2 = ci_phmm[rclist[i]]; p2; p2 = p2->
next) {
192 if (bitvec_is_set(p2->
lc, ci)) {
194 l = (
plink_t *) ckd_calloc(1,
sizeof(*l));
229 (
phmm_t **) ckd_calloc(bin_mdef_n_ciphone(mdef),
sizeof(
phmm_t *));
231 (
phmm_t **) ckd_calloc(bin_mdef_n_phone(mdef),
sizeof(
phmm_t *));
235 nphone = allphs->
ci_only ? bin_mdef_n_ciphone(mdef) : bin_mdef_n_phone(mdef);
236 E_INFO(
"Building PHMM net of %d phones\n", nphone);
237 for (pid = 0; pid < nphone; pid++) {
238 if ((p = phmm_lookup(allphs, pid)) == NULL) {
240 p = (
phmm_t *) ckd_calloc(1,
sizeof(*p));
242 mdef_pid2ssid(mdef, pid), mdef->
phone[pid].
tmat);
244 p->
ci = bin_mdef_pid2ci(mdef, pid);
254 lrc_size = bitvec_size(bin_mdef_n_ciphone(mdef));
255 lc = ckd_calloc(n_phmm * 2 * lrc_size,
sizeof(bitvec_t));
256 rc = lc + (n_phmm * lrc_size);
257 for (ci = 0; ci < mdef->
n_ciphone; ci++) {
268 (
s3cipid_t *) ckd_calloc(bin_mdef_n_ciphone(mdef) + 1,
273 for (ci = 0; ci < bin_mdef_n_ciphone(mdef); ci++) {
275 bitvec_set_all(p->
lc, bin_mdef_n_ciphone(mdef));
276 bitvec_set_all(p->
rc, bin_mdef_n_ciphone(mdef));
277 if (mdef->
phone[ci].info.
ci.filler) {
285 for (pid = bin_mdef_n_ciphone(mdef); pid < nphone;
289 if (mdef->
phone[mdef->
phone[pid].info.cd.ctx[1]].info.
ci.filler) {
290 for (i = 0; IS_S3CIPID(filler[i]); i++)
291 bitvec_set(p->
lc, filler[i]);
294 bitvec_set(p->
lc, mdef->
phone[pid].info.cd.ctx[1]);
296 if (mdef->
phone[mdef->
phone[pid].info.cd.ctx[2]].info.
ci.filler) {
297 for (i = 0; IS_S3CIPID(filler[i]); i++)
298 bitvec_set(p->
rc, filler[i]);
301 bitvec_set(p->
rc, mdef->
phone[pid].info.cd.ctx[2]);
307 n_link = phmm_link(allphs);
309 E_INFO(
"%d nodes, %d links\n", n_phmm, n_link);
323 for (ci = 0; ci < mdef_n_ciphone(mdef); ++ci) {
326 for (p = allphs->
ci_phmm[ci]; p; p = next) {
330 for (l = p->
succlist; l; l = lnext) {
357 for (ci = 0; ci < mdef->
n_ciphone; ci++) {
358 for (p = ci_phmm[(
unsigned) ci]; p; p = p->
next) {
359 if (hmm_frame(&(p->
hmm)) == allphs->
frame) {
385 th = best + allphs->
pbeam;
388 mdef = ps_search_acmod(allphs)->mdef;
389 curfrm = allphs->
frame;
395 for (ci = 0; ci < mdef->
n_ciphone; ci++) {
396 for (p = ci_phmm[(
unsigned) ci]; p; p = p->
next) {
397 if (hmm_frame(&(p->
hmm)) == curfrm) {
399 if (hmm_bestscore(&(p->
hmm)) >= th) {
401 h = (
history_t *) ckd_calloc(1,
sizeof(*h));
404 h->
hist = hmm_out_history(&(p->
hmm));
405 h->
score = hmm_out_score(&(p->
hmm));
414 blkarray_list_get(history, h->
hist);
416 if (pred->
hist > 0) {
418 blkarray_list_get(history,
421 ngram_tg_score(allphs->
lm,
432 ngram_bg_score(allphs->
lm,
449 blkarray_list_append(history, h);
452 hmm_frame(&(p->
hmm)) = nf;
465 int32 frame_history_start)
470 int32 newscore, nf, curfrm;
474 curfrm = allphs->
frame;
479 for (hist_idx = frame_history_start;
480 hist_idx < blkarray_list_n_valid(allphs->
history); hist_idx++) {
481 h = blkarray_list_get(allphs->
history, hist_idx);
496 ngram_tg_score(allphs->
lm,
503 tscore = ngram_bg_score(allphs->
lm,
510 newscore = h->
score + tscore;
511 if ((newscore > best + allphs->beam)
512 && (newscore > hmm_in_score(&(to->
hmm)))) {
520 allphone_search_init(
const char *name,
530 ps_search_init(ps_search_base(allphs), &allphone_funcs, PS_SEARCH_TYPE_ALLPHONE, name, config, acmod,
536 if (allphs->
hmmctx == NULL) {
537 ps_search_free(ps_search_base(allphs));
541 allphs->
ci_only = cmd_ln_boolean_r(config,
"-allphone_ci");
542 allphs->lw = cmd_ln_float32_r(config,
"-lw");
549 allphs->
lm = ngram_model_retain(lm);
554 if (silwid == ngram_unknown_wid(allphs->
lm)) {
555 E_ERROR(
"Phonetic LM does not have SIL phone in vocabulary\n");
565 ngram_wid(allphs->
lm,
568 if (allphs->
ci2lmwid[i] == ngram_unknown_wid(allphs->
lm))
574 (
"Failed to load language model specified in -allphone, doing unconstrained phone-loop decoding\n");
577 (acmod->
lmath, cmd_ln_float32_r(config,
"-pip"))
583 allphs->segments = NULL;
588 (int32) logmath_log(acmod->
lmath,
589 cmd_ln_float64_r(config,
"-beam"))
593 (int32) logmath_log(acmod->
lmath,
594 cmd_ln_float64_r(config,
"-pbeam"))
598 allphs->
history = blkarray_list_init();
601 allphs->
ascale = 1.0 / cmd_ln_float32_r(config,
"-ascale");
603 E_INFO(
"Allphone(beam: %d, pbeam: %d)\n", allphs->beam, allphs->
pbeam);
605 ptmr_init(&allphs->
perf);
621 (
"-lm argument missing; doing unconstrained phone-loop decoding\n");
625 cmd_ln_float32_r(search->
config,
640 / cmd_ln_int32_r(ps_search_config(allphs),
"-frate");
642 E_INFO(
"TOTAL allphone %.2f CPU %.3f xRT\n",
643 allphs->
perf.t_tot_cpu,
644 allphs->
perf.t_tot_cpu / n_speech);
645 E_INFO(
"TOTAL allphone %.2f wall %.3f xRT\n",
646 allphs->
perf.t_tot_elapsed,
647 allphs->
perf.t_tot_elapsed / n_speech);
654 ngram_model_free(allphs->
lm);
658 blkarray_list_free(allphs->
history);
675 for (ci = 0; ci < bin_mdef_n_ciphone(mdef); ci++) {
676 for (p = allphs->
ci_phmm[(
unsigned) ci]; p; p = p->
next) {
685 blkarray_list_reset(allphs->
history);
689 ci = bin_mdef_silphone(mdef);
697 ptmr_reset(&allphs->
perf);
698 ptmr_start(&allphs->
perf);
711 acmod = ps_search_acmod(allphs);
715 for (ci = 0; ci < bin_mdef_n_ciphone(mdef); ci++)
717 if (hmm_frame(&(p->
hmm)) == allphs->
frame)
722 allphone_search_step(
ps_search_t * search,
int frame_idx)
724 int32 bestscr, frame_history_start;
730 allphone_search_sen_active(allphs);
733 bestscr = phmm_eval_all(allphs, senscr);
735 frame_history_start = blkarray_list_n_valid(allphs->
history);
736 phmm_exit(allphs, bestscr);
737 phmm_trans(allphs, bestscr, frame_history_start);
747 int32 score = h->
score;
751 score -= pred->
score;
761 for (gn = allphs->segments; gn; gn = gn->next) {
762 ckd_free(gnode_ptr(gn));
764 glist_free(allphs->segments);
765 allphs->segments = NULL;
771 int32 best, hist_idx, best_idx;
777 allphone_clear_segments(allphs);
781 hist_idx = blkarray_list_n_valid(allphs->
history) - 1;
782 while (hist_idx > 0) {
783 h = blkarray_list_get(allphs->
history, hist_idx);
785 frm = last_frm = h->
ef;
795 best = (int32) 0x80000000;
797 while (frm == last_frm && hist_idx > 0) {
798 h = blkarray_list_get(allphs->
history, hist_idx);
800 if (h->
score > best && frm == last_frm) {
814 while (best_idx > 0) {
815 h = blkarray_list_get(allphs->
history, best_idx);
821 h->
hist))->ef + 1 : 0;
823 s->score = ascore(allphs, h);
825 allphs->segments = glist_add_ptr(allphs->segments, s);
842 n_hist = blkarray_list_n_valid(allphs->
history);
844 (
"%d frames, %d HMMs (%d/fr), %d senones (%d/fr), %d history entries (%d/fr)\n",
849 n_hist, (allphs->
frame > 0) ? n_hist / allphs->
frame : 0);
852 allphone_backtrace(allphs, allphs->
frame - 1, NULL);
855 ptmr_stop(&allphs->
perf);
857 cf = ps_search_acmod(allphs)->output_frame;
859 double n_speech = (double) (cf + 1)
860 / cmd_ln_int32_r(ps_search_config(allphs),
"-frate");
861 E_INFO(
"allphone %.2f CPU %.3f xRT\n",
862 allphs->
perf.t_cpu, allphs->
perf.t_cpu / n_speech);
863 E_INFO(
"allphone %.2f wall %.3f xRT\n",
864 allphs->
perf.t_elapsed, allphs->
perf.t_elapsed / n_speech);
872 allphone_search_hyp(
ps_search_t * search, int32 * out_score)
877 const char *phone_str;
879 int len, hyp_idx, phone_idx;
889 allphone_backtrace(allphs, allphs->
frame - 1, out_score);
890 if (allphs->segments == NULL) {
894 len = glist_count(allphs->segments) * 10;
896 search->
hyp_str = (
char *) ckd_calloc(len,
sizeof(*search->
hyp_str));
898 for (gn = allphs->segments; gn; gn = gn->next) {
902 while (phone_str[phone_idx] !=
'\0')
903 search->
hyp_str[hyp_idx++] = phone_str[phone_idx++];
904 search->
hyp_str[hyp_idx++] =
' ';
906 search->
hyp_str[--hyp_idx] =
'\0';
907 E_INFO(
"Hyp: %s\n", search->
hyp_str);
Internal implementation of PocketSphinx decoder.
History (paths) information at any point in allphone Viterbi search.
struct phmm_s * next
Next unique PHMM for same parent basephone.
Base structure for search module.
ptmr_t perf
Performance counter.
void hmm_init(hmm_context_t *ctx, hmm_t *hmm, int mpx, int ssid, int tmatid)
Populate a previously-allocated HMM structure, allocating internal data.
s3cipid_t ci
Parent basephone for this PHMM.
int32 n_sen_eval
Total senones evaluated this utt.
const char * bin_mdef_ciphone_str(bin_mdef_t *m, int32 ci)
In: ciphone id for which name wanted.
void ps_search_base_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
Re-initialize base structure with new dictionary.
int n_senone_active
Number of active GMMs.
acmod_t * acmod
Acoustic model.
An individual HMM among the HMM search space.
uint8 *** tp
The transition matrices; kept in the same scale as acoustic scores; tp[tmatid][from-state][to-state]...
ps_segfuncs_t * vt
V-table of seg methods.
logmath_t * lmath
Log-math computation.
uint16 ** sseq
Unique senone sequences (2D array built at load time)
void hmm_deinit(hmm_t *hmm)
Free an HMM structure, releasing internal data (but not the HMM structure itself).
int32 lscr
Language model score.
s3pid_t pid
Phone id (temp.
int32 tmat
Transition matrix ID.
void acmod_activate_hmm(acmod_t *acmod, hmm_t *hmm)
Activate senones associated with an HMM.
int32 hist
Previous history entry.
bitvec_t * lc
Set (bit-vector) of left context phones seen for this PHMM.
int32 score
Path score for this path.
#define BAD_S3CIPID
Ci phone id.
char const * word
Word string (pointer into dictionary hash)
ps_search_t * search
Search object from whence this came.
void ps_search_init(ps_search_t *search, ps_searchfuncs_t *vt, const char *type, const char *name, cmd_ln_t *config, acmod_t *acmod, dict_t *dict, dict2pid_t *d2p)
Initialize base structure.
int32 * ci2lmwid
Mapping of CI phones to LM word IDs.
int32 inspen
Language weights.
int32 hmm_vit_eval(hmm_t *hmm)
Viterbi evaluation of given HMM.
struct mdef_entry_s::@0::@1 ci
< CI phone information - attributes (just "filler" for now)
uint8 compallsen
Compute all senones?
bitvec_t * rc
Set (bit-vector) of right context phones seen for this PHMM.
hmm_context_t * hmm_context_init(int32 n_emit_state, uint8 **const *tp, int16 const *senscore, uint16 *const *sseq)
Create an HMM context.
void ps_search_base_free(ps_search_t *search)
Free search.
Implementation of allphone search structure.
cmd_ln_t * config
Configuration.
int16 s3cipid_t
Size definitions for more semantially meaningful units.
#define WORST_SCORE
Large "bad" score.
tmat_t * tmat
Transition matrices.
int32 ascr
Acoustic score.
int32 tscore
Transition score for this path.
hmm_context_t * hmmctx
HMM context.
void hmm_enter(hmm_t *h, int32 score, int32 histid, int frame)
Enter an HMM with the given path score and history ID.
Segment iterator over list of phseg.
void acmod_clear_active(acmod_t *acmod)
Clear set of active senones.
#define S3_SILENCE_CIPHONE
Hard-coded silence CI phone name.
struct plink_s * next
Next link for parent PHMM node.
#define hmm_context_set_senscore(ctx, senscr)
Change the senone score array for a context.
#define SENSCR_SHIFT
Shift count for senone scores.
a structure for a dictionary.
float32 ascale
Acoustic score scale for posterior probabilities.
int32 n_tot_frame
Total number of frames processed.
ngram_model_t * lm
Ngram model set.
Word graph structure used in bestpath/nbest search.
hmm_t hmm
Base HMM structure.
void hmm_clear(hmm_t *h)
Reset the states of the HMM to the invalid condition.
char * hyp_str
Current hypothesis string.
int32 ci_only
Use context-independent phones for decoding.
int32 n_hmm_eval
Total HMMs evaluated this utt.
struct plink_s * succlist
List of predecessor PHMM nodes.
Models a single unique <senone-sequence, tmat> pair.
phmm_t * phmm
PHMM ending this path.
phmm_t * phmm
Successor PHMM node.
void hmm_context_free(hmm_context_t *ctx)
Free an HMM context.
int32 pbeam
Effective beams after applying beam_factor.
bin_mdef_t * mdef
Model definition.
V-table for search algorithm.
mdef_entry_t * phone
All phone structures.
blkarray_list_t * history
List of history nodes allocated in each frame.
Base structure for hypothesis segmentation iterator.
int32 n_ciphone
Number of base (CI) phones.
cmd_ln_t * config
Configuration.
phmm_t ** ci_phmm
PHMM lists (for each CI phone)
Acoustic model structure.
Phone level segmentation information.
Building composite triphone (as well as word internal triphones) with the dictionary.
List of links from a PHMM node to its successors; one link per successor.
frame_idx_t sf
Start frame.
int16 const * acmod_score(acmod_t *acmod, int *inout_frame_idx)
Score one frame of data.
frame_idx_t frame
Current frame.