71 for (i = 0; i < sas->
n_phones; ++i) {
75 if (hmm_frame(hmm) < frame_idx)
88 int nf = frame_idx + 1;
92 for (i = 0; i < sas->
n_phones; ++i) {
94 if (hmm_frame(hmm) < frame_idx)
103 int nf = frame_idx + 1;
106 for (i = 0; i < sas->
n_phones - 1; ++i) {
108 int32 newphone_score;
111 if (hmm_frame(hmm) != nf)
114 newphone_score = hmm_out_score(hmm);
117 if (hmm_frame(nhmm) < frame_idx
118 || newphone_score
BETTER_THAN hmm_in_score(nhmm)) {
119 hmm_enter(nhmm, newphone_score, hmm_out_history(hmm), nf);
124 #define TOKEN_STEP 20
145 extend_tokenstack(sas, frame_idx);
149 for (i = 0; i < sas->
n_phones; ++i) {
153 if (hmm_frame(hmm) < frame_idx)
155 for (j = 0; j < sas->
hmmctx->n_emit_state; ++j) {
156 int state_idx = i * sas->
hmmctx->n_emit_state + j;
158 tokens[state_idx].id = hmm_history(hmm, j);
159 tokens[state_idx].score = hmm_score(hmm, j);
161 hmm_history(hmm, j) = state_idx;
167 state_align_search_step(
ps_search_t *search,
int frame_idx)
170 acmod_t *acmod = ps_search_acmod(search);
182 E_INFO(
"Renormalizing Scores at frame %d, best score %d\n",
184 renormalize_hmms(sas, frame_idx, sas->
best_score);
188 sas->
best_score = evaluate_hmms(sas, senscr, frame_idx);
189 prune_hmms(sas, frame_idx);
192 phone_transition(sas, frame_idx);
195 record_transitions(sas, frame_idx);
198 sas->
frame = frame_idx;
211 int last_frame, cur_frame;
215 last.id = cur.id = hmm_out_history(final_phone);
216 last.score = hmm_out_score(final_phone);
217 if (last.id == 0xffff) {
218 E_ERROR(
"Failed to reach final state in alignment\n");
222 last_frame = sas->
frame + 1;
223 for (cur_frame = sas->
frame - 1; cur_frame >= 0; --cur_frame) {
226 if (cur.id != last.id) {
228 assert(itor != NULL);
230 ent->start = cur_frame + 1;
231 ent->duration = last_frame - ent->start;
232 ent->score = last.score - cur.score;
233 E_DEBUG(1,(
"state %d start %d end %d\n", last.id,
234 ent->start, last_frame));
236 last_frame = cur_frame + 1;
241 assert(itor != NULL);
244 ent->duration = last_frame;
245 E_DEBUG(1,(
"state %d start %d end %d\n", 0,
246 ent->start, last_frame));
272 state_align_search_start,
273 state_align_search_step,
274 state_align_search_finish,
275 state_align_search_reinit,
276 state_align_search_free,
284 state_align_search_init(
const char *name,
293 sas = ckd_calloc(1,
sizeof(*sas));
295 PS_SEARCH_TYPE_STATE_ALIGN, name,
296 config, acmod, al->d2p->
dict, al->d2p);
299 if (sas->
hmmctx == NULL) {
313 ent->id.pid.ssid, ent->id.pid.tmatid);
315 return ps_search_base(sas);
int ps_alignment_n_states(ps_alignment_t *al)
Number of states.
int n_phones
Number of HMMs (phones).
Base structure for search module.
void hmm_init(hmm_context_t *ctx, hmm_t *hmm, int mpx, int ssid, int tmatid)
Populate a previously-allocated HMM structure, allocating internal data.
An individual HMM among the HMM search space.
uint8 *** tp
The transition matrices; kept in the same scale as acoustic scores; tp[tmatid][from-state][to-state]...
uint16 ** sseq
Unique senone sequences (2D array built at load time)
void acmod_activate_hmm(acmod_t *acmod, hmm_t *hmm)
Activate senones associated with an HMM.
ps_alignment_iter_t * ps_alignment_iter_goto(ps_alignment_iter_t *itor, int pos)
Move alignment iterator to given index.
ps_alignment_iter_t * ps_alignment_iter_next(ps_alignment_iter_t *itor)
Move an alignment iterator forward.
int n_fr_alloc
Number of frames of tokens allocated.
hmm_t * hmms
Vector of HMMs corresponding to phone level.
void ps_search_init(ps_search_t *search, ps_searchfuncs_t *vt, const char *type, const char *name, cmd_ln_t *config, acmod_t *acmod, dict_t *dict, dict2pid_t *d2p)
Initialize base structure.
int32 hmm_vit_eval(hmm_t *hmm)
Viterbi evaluation of given HMM.
int ps_alignment_propagate(ps_alignment_t *al)
Propagate timing information up from state sequence.
void hmm_normalize(hmm_t *h, int32 bestscr)
Renormalize the scores in this HMM based on the given best score.
hmm_context_t * hmm_context_init(int32 n_emit_state, uint8 **const *tp, int16 const *senscore, uint16 *const *sseq)
Create an HMM context.
void ps_search_base_free(ps_search_t *search)
Free search.
int frame
Current frame being processed.
#define WORST_SCORE
Large "bad" score.
tmat_t * tmat
Transition matrices.
state_align_hist_t * tokens
Tokens (backpointers) for state alignment.
State (and phone and word) alignment search.
void hmm_enter(hmm_t *h, int32 score, int32 histid, int frame)
Enter an HMM with the given path score and history ID.
dict_t * dict
Dictionary this table refers to.
#define hmm_context_set_senscore(ctx, senscr)
Change the senone score array for a context.
a structure for a dictionary.
#define WORSE_THAN
Is one score worse than another?
ps_alignment_t * al
Alignment structure being operated on.
#define BETTER_THAN
Is one score better than another?
int32 best_score
Best score in current frame.
Phone loop search structure.
ps_alignment_iter_t * ps_alignment_states(ps_alignment_t *al)
Iterate over the alignment starting at the first state.
void hmm_context_free(hmm_context_t *ctx)
Free an HMM context.
bin_mdef_t * mdef
Model definition.
V-table for search algorithm.
int ps_alignment_n_phones(ps_alignment_t *al)
Number of phones.
int ps_alignment_iter_free(ps_alignment_iter_t *itor)
Release an iterator before completing all iterations.
ps_alignment_iter_t * ps_alignment_phones(ps_alignment_t *al)
Iterate over the alignment starting at the first phone.
Acoustic model structure.
Building composite triphone (as well as word internal triphones) with the dictionary.
int16 const * acmod_score(acmod_t *acmod, int *inout_frame_idx)
Score one frame of data.
ps_alignment_entry_t * ps_alignment_iter_get(ps_alignment_iter_t *itor)
Get the alignment entry pointed to by an iterator.
hmm_context_t * hmmctx
HMM context structure.
int n_emit_state
Number of emitting states (tokens per frame)