PocketSphinx  5prealpha
allphone_search.c
1 /* ====================================================================
2  * Copyright (c) 2014 Carnegie Mellon University. All rights
3  * reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  * notice, this list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright
13  * notice, this list of conditions and the following disclaimer in
14  * the documentation and/or other materials provided with the
15  * distribution.
16  *
17  *
18  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
19  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
22  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *
30  * ====================================================================
31  *
32  */
33 
34 /*
35 * allphone_search.c -- Search for phonetic decoding.
36 */
37 
38 #include <stdio.h>
39 #include <string.h>
40 #include <assert.h>
41 
42 #include <sphinxbase/err.h>
43 #include <sphinxbase/ckd_alloc.h>
44 #include <sphinxbase/strfuncs.h>
45 #include <sphinxbase/pio.h>
46 #include <sphinxbase/cmd_ln.h>
47 
48 #include "pocketsphinx_internal.h"
49 #include "allphone_search.h"
50 
51 static ps_lattice_t *
52 allphone_search_lattice(ps_search_t * search)
53 {
54  return NULL;
55 }
56 
57 static int
58 allphone_search_prob(ps_search_t * search)
59 {
60  return 0;
61 }
62 
63 static void
64 allphone_backtrace(allphone_search_t * allphs, int32 f, int32 *out_score);
65 
66 static void
67 allphone_search_seg_free(ps_seg_t * seg)
68 {
69  ckd_free(seg);
70 }
71 
72 static void
73 allphone_search_fill_iter(ps_seg_t *seg, phseg_t *phseg)
74 {
75  seg->sf = phseg->sf;
76  seg->ef = phseg->ef;
77  seg->ascr = phseg->score;
78  seg->lscr = phseg->tscore;
79  seg->word = bin_mdef_ciphone_str(ps_search_acmod(seg->search)->mdef, phseg->ci);
80 }
81 
82 static ps_seg_t *
83 allphone_search_seg_next(ps_seg_t * seg)
84 {
85  phseg_iter_t *itor = (phseg_iter_t *) seg;
86  phseg_t *phseg;
87 
88  itor->seg = itor->seg->next;
89 
90  if (itor->seg == NULL) {
91  allphone_search_seg_free(seg);
92  return NULL;
93  }
94  phseg = gnode_ptr(itor->seg);
95  allphone_search_fill_iter(seg, phseg);
96 
97  return seg;
98 }
99 
100 static ps_segfuncs_t fsg_segfuncs = {
101  /* seg_next */ allphone_search_seg_next,
102  /* seg_free */ allphone_search_seg_free
103 };
104 
105 
106 static ps_seg_t *
107 allphone_search_seg_iter(ps_search_t * search)
108 {
109  allphone_search_t *allphs = (allphone_search_t *) search;
110  phseg_iter_t *iter;
111 
112  allphone_backtrace(allphs, allphs->frame - 1, NULL);
113  if (allphs->segments == NULL)
114  return NULL;
115 
116  iter = ckd_calloc(1, sizeof(phseg_iter_t));
117 
118  iter->base.vt = &fsg_segfuncs;
119  iter->base.search = search;
120  iter->seg = allphs->segments;
121  allphone_search_fill_iter((ps_seg_t *)iter, gnode_ptr(iter->seg));
122 
123  return (ps_seg_t *) iter;
124 }
125 
126 static ps_searchfuncs_t allphone_funcs = {
127  /* start: */ allphone_search_start,
128  /* step: */ allphone_search_step,
129  /* finish: */ allphone_search_finish,
130  /* reinit: */ allphone_search_reinit,
131  /* free: */ allphone_search_free,
132  /* lattice: */ allphone_search_lattice,
133  /* hyp: */ allphone_search_hyp,
134  /* prob: */ allphone_search_prob,
135  /* seg_iter: */ allphone_search_seg_iter,
136 };
137 
142 static phmm_t *
143 phmm_lookup(allphone_search_t * allphs, s3pid_t pid)
144 {
145  phmm_t *p;
146  bin_mdef_t *mdef;
147  phmm_t **ci_phmm;
148 
149  mdef = ((ps_search_t *) allphs)->acmod->mdef;
150  ci_phmm = allphs->ci_phmm;
151 
152  for (p = ci_phmm[bin_mdef_pid2ci(mdef, pid)]; p; p = p->next) {
153  if (mdef_pid2tmatid(mdef, p->pid) == mdef_pid2tmatid(mdef, pid))
154  if (mdef_pid2ssid(mdef, p->pid) == mdef_pid2ssid(mdef, pid))
155  return p;
156  }
157 
158  return NULL;
159 }
160 
161 static int32
162 phmm_link(allphone_search_t * allphs)
163 {
164  s3cipid_t ci, rc;
165  phmm_t *p, *p2;
166  int32 *rclist;
167  int32 i, n_link;
168  plink_t *l;
169  bin_mdef_t *mdef;
170  phmm_t **ci_phmm;
171 
172  mdef = ((ps_search_t *) allphs)->acmod->mdef;
173  ci_phmm = allphs->ci_phmm;
174 
175  rclist = (int32 *) ckd_calloc(mdef->n_ciphone + 1, sizeof(int32));
176 
177  /* Create successor links between PHMM nodes */
178  n_link = 0;
179  for (ci = 0; ci < mdef->n_ciphone; ci++) {
180  for (p = ci_phmm[ci]; p; p = p->next) {
181  /* Build rclist for p */
182  i = 0;
183  for (rc = 0; rc < mdef->n_ciphone; rc++) {
184  if (bitvec_is_set(p->rc, rc))
185  rclist[i++] = rc;
186  }
187  rclist[i] = BAD_S3CIPID;
188 
189  /* For each rc in rclist, transition to PHMMs for rc if left context = ci */
190  for (i = 0; IS_S3CIPID(rclist[i]); i++) {
191  for (p2 = ci_phmm[rclist[i]]; p2; p2 = p2->next) {
192  if (bitvec_is_set(p2->lc, ci)) {
193  /* transition from p to p2 */
194  l = (plink_t *) ckd_calloc(1, sizeof(*l));
195  l->phmm = p2;
196  l->next = p->succlist;
197  p->succlist = l;
198 
199  n_link++;
200  }
201  }
202  }
203  }
204  }
205 
206  ckd_free(rclist);
207 
208  return n_link;
209 }
210 
214 static int
215 phmm_build(allphone_search_t * allphs)
216 {
217  phmm_t *p, **pid2phmm;
218  bin_mdef_t *mdef;
219  int32 lrc_size;
220  uint32 *lc, *rc;
221  s3pid_t pid;
222  s3cipid_t ci;
223  s3cipid_t *filler;
224  int n_phmm, n_link;
225  int i, nphone;
226 
227  mdef = ((ps_search_t *) allphs)->acmod->mdef;
228  allphs->ci_phmm =
229  (phmm_t **) ckd_calloc(bin_mdef_n_ciphone(mdef), sizeof(phmm_t *));
230  pid2phmm =
231  (phmm_t **) ckd_calloc(bin_mdef_n_phone(mdef), sizeof(phmm_t *));
232 
233  /* For each unique ciphone/triphone entry in mdef, create a PHMM node */
234  n_phmm = 0;
235  nphone = allphs->ci_only ? bin_mdef_n_ciphone(mdef) : bin_mdef_n_phone(mdef);
236  E_INFO("Building PHMM net of %d phones\n", nphone);
237  for (pid = 0; pid < nphone; pid++) {
238  if ((p = phmm_lookup(allphs, pid)) == NULL) {
239  /* not found, should be created */
240  p = (phmm_t *) ckd_calloc(1, sizeof(*p));
241  hmm_init(allphs->hmmctx, &(p->hmm), FALSE,
242  mdef_pid2ssid(mdef, pid), mdef->phone[pid].tmat);
243  p->pid = pid;
244  p->ci = bin_mdef_pid2ci(mdef, pid);
245  p->succlist = NULL;
246  p->next = allphs->ci_phmm[p->ci];
247  allphs->ci_phmm[p->ci] = p;
248  n_phmm++;
249  }
250  pid2phmm[pid] = p;
251  }
252 
253  /* Fill out bitvecs of each PHMM node, alloc continuous memory chunk for context bitvectors */
254  lrc_size = bitvec_size(bin_mdef_n_ciphone(mdef));
255  lc = ckd_calloc(n_phmm * 2 * lrc_size, sizeof(bitvec_t));
256  rc = lc + (n_phmm * lrc_size);
257  for (ci = 0; ci < mdef->n_ciphone; ci++) {
258  for (p = allphs->ci_phmm[ci]; p; p = p->next) {
259  p->lc = lc;
260  lc += lrc_size;
261  p->rc = rc;
262  rc += lrc_size;
263  }
264  }
265 
266  /* Fill out lc and rc bitmaps (remember to map all fillers to each other!!) */
267  filler =
268  (s3cipid_t *) ckd_calloc(bin_mdef_n_ciphone(mdef) + 1,
269  sizeof(s3cipid_t));
270 
271  /* Connect fillers */
272  i = 0;
273  for (ci = 0; ci < bin_mdef_n_ciphone(mdef); ci++) {
274  p = pid2phmm[ci];
275  bitvec_set_all(p->lc, bin_mdef_n_ciphone(mdef));
276  bitvec_set_all(p->rc, bin_mdef_n_ciphone(mdef));
277  if (mdef->phone[ci].info.ci.filler) {
278  filler[i++] = ci;
279  }
280  }
281  filler[i] = BAD_S3CIPID;
282 
283 
284  /* Loop over cdphones only if ci_only is not set */
285  for (pid = bin_mdef_n_ciphone(mdef); pid < nphone;
286  pid++) {
287  p = pid2phmm[pid];
288 
289  if (mdef->phone[mdef->phone[pid].info.cd.ctx[1]].info.ci.filler) {
290  for (i = 0; IS_S3CIPID(filler[i]); i++)
291  bitvec_set(p->lc, filler[i]);
292  }
293  else
294  bitvec_set(p->lc, mdef->phone[pid].info.cd.ctx[1]);
295 
296  if (mdef->phone[mdef->phone[pid].info.cd.ctx[2]].info.ci.filler) {
297  for (i = 0; IS_S3CIPID(filler[i]); i++)
298  bitvec_set(p->rc, filler[i]);
299  }
300  else
301  bitvec_set(p->rc, mdef->phone[pid].info.cd.ctx[2]);
302  }
303  ckd_free(pid2phmm);
304  ckd_free(filler);
305 
306  /* Create links between PHMM nodes */
307  n_link = phmm_link(allphs);
308 
309  E_INFO("%d nodes, %d links\n", n_phmm, n_link);
310  return 0;
311 }
312 
313 static void
314 phmm_free(allphone_search_t * allphs)
315 {
316  s3cipid_t ci;
317  bin_mdef_t *mdef;
318 
319  if (!allphs->ci_phmm)
320  return;
321  ckd_free(allphs->ci_phmm[0]->lc);
322  mdef = ((ps_search_t *) allphs)->acmod->mdef;
323  for (ci = 0; ci < mdef_n_ciphone(mdef); ++ci) {
324  phmm_t *p, *next;
325 
326  for (p = allphs->ci_phmm[ci]; p; p = next) {
327  plink_t *l, *lnext;
328 
329  next = p->next;
330  for (l = p->succlist; l; l = lnext) {
331  lnext = l->next;
332  ckd_free(l);
333  }
334  hmm_deinit(&(p->hmm));
335  ckd_free(p);
336  }
337  }
338  ckd_free(allphs->ci_phmm);
339 }
340 
342 static int32
343 phmm_eval_all(allphone_search_t * allphs, const int16 * senscr)
344 {
345  s3cipid_t ci;
346  phmm_t *p;
347  int32 best;
348  bin_mdef_t *mdef;
349  phmm_t **ci_phmm;
350 
351  mdef = ((ps_search_t *) allphs)->acmod->mdef;
352  ci_phmm = allphs->ci_phmm;
353 
354  best = WORST_SCORE;
355 
356  hmm_context_set_senscore(allphs->hmmctx, senscr);
357  for (ci = 0; ci < mdef->n_ciphone; ci++) {
358  for (p = ci_phmm[(unsigned) ci]; p; p = p->next) {
359  if (hmm_frame(&(p->hmm)) == allphs->frame) {
360  int32 score;
361  allphs->n_hmm_eval++;
362  score = hmm_vit_eval((hmm_t *) p);
363  if (score > best)
364  best = score;
365  }
366  }
367  }
368 
369  return best;
370 }
371 
372 static void
373 phmm_exit(allphone_search_t * allphs, int32 best)
374 {
375  s3cipid_t ci;
376  phmm_t *p;
377  int32 th, nf;
378  history_t *h;
379  blkarray_list_t *history;
380  bin_mdef_t *mdef;
381  int32 curfrm;
382  phmm_t **ci_phmm;
383  int32 *ci2lmwid;
384 
385  th = best + allphs->pbeam;
386 
387  history = allphs->history;
388  mdef = ps_search_acmod(allphs)->mdef;
389  curfrm = allphs->frame;
390  ci_phmm = allphs->ci_phmm;
391  ci2lmwid = allphs->ci2lmwid;
392 
393  nf = curfrm + 1;
394 
395  for (ci = 0; ci < mdef->n_ciphone; ci++) {
396  for (p = ci_phmm[(unsigned) ci]; p; p = p->next) {
397  if (hmm_frame(&(p->hmm)) == curfrm) {
398 
399  if (hmm_bestscore(&(p->hmm)) >= th) {
400 
401  h = (history_t *) ckd_calloc(1, sizeof(*h));
402  h->ef = curfrm;
403  h->phmm = p;
404  h->hist = hmm_out_history(&(p->hmm));
405  h->score = hmm_out_score(&(p->hmm));
406 
407  if (!allphs->lm) {
408  h->tscore = allphs->inspen;
409  }
410  else {
411  if (h->hist > 0) {
412  int32 n_used;
413  history_t *pred =
414  blkarray_list_get(history, h->hist);
415 
416  if (pred->hist > 0) {
417  history_t *pred_pred =
418  blkarray_list_get(history,
419  h->hist);
420  h->tscore =
421  ngram_tg_score(allphs->lm,
422  ci2lmwid
423  [pred_pred->phmm->ci],
424  ci2lmwid[pred->
425  phmm->ci],
426  ci2lmwid[p->ci],
427  &n_used) >>
428  SENSCR_SHIFT;
429  }
430  else {
431  h->tscore =
432  ngram_bg_score(allphs->lm,
433  ci2lmwid
434  [pred->phmm->ci],
435  ci2lmwid[p->ci],
436  &n_used) >>
437  SENSCR_SHIFT;
438  }
439  }
440  else {
441  /*
442  * This is the beginning SIL and in srch_allphone_begin()
443  * it's inscore is set to 0.
444  */
445  h->tscore = 0;
446  }
447  }
448 
449  blkarray_list_append(history, h);
450 
451  /* Mark PHMM active in next frame */
452  hmm_frame(&(p->hmm)) = nf;
453  }
454  else {
455  /* Reset state scores */
456  hmm_clear(&(p->hmm));
457  }
458  }
459  }
460  }
461 }
462 
463 static void
464 phmm_trans(allphone_search_t * allphs, int32 best,
465  int32 frame_history_start)
466 {
467  history_t *h;
468  phmm_t *from, *to;
469  plink_t *l;
470  int32 newscore, nf, curfrm;
471  int32 *ci2lmwid;
472  int32 hist_idx;
473 
474  curfrm = allphs->frame;
475  nf = curfrm + 1;
476  ci2lmwid = allphs->ci2lmwid;
477 
478  /* Transition from exited nodes to initial states of HMMs */
479  for (hist_idx = frame_history_start;
480  hist_idx < blkarray_list_n_valid(allphs->history); hist_idx++) {
481  h = blkarray_list_get(allphs->history, hist_idx);
482  from = h->phmm;
483  for (l = from->succlist; l; l = l->next) {
484  int32 tscore;
485  to = l->phmm;
486 
487  /* No LM, just use uniform (insertion penalty). */
488  if (!allphs->lm)
489  tscore = allphs->inspen;
490  else {
491  int32 n_used;
492  if (h->hist > 0) {
493  history_t *pred =
494  blkarray_list_get(allphs->history, h->hist);
495  tscore =
496  ngram_tg_score(allphs->lm,
497  ci2lmwid[pred->phmm->ci],
498  ci2lmwid[from->ci],
499  ci2lmwid[to->ci],
500  &n_used) >> SENSCR_SHIFT;
501  }
502  else {
503  tscore = ngram_bg_score(allphs->lm,
504  ci2lmwid[from->ci],
505  ci2lmwid[to->ci],
506  &n_used) >> SENSCR_SHIFT;
507  }
508  }
509 
510  newscore = h->score + tscore;
511  if ((newscore > best + allphs->beam)
512  && (newscore > hmm_in_score(&(to->hmm)))) {
513  hmm_enter(&(to->hmm), newscore, hist_idx, nf);
514  }
515  }
516  }
517 }
518 
519 ps_search_t *
520 allphone_search_init(const char *name,
521  ngram_model_t * lm,
522  cmd_ln_t * config,
523  acmod_t * acmod, dict_t * dict, dict2pid_t * d2p)
524 {
525  int i;
526  bin_mdef_t *mdef;
527  allphone_search_t *allphs;
528 
529  allphs = (allphone_search_t *) ckd_calloc(1, sizeof(*allphs));
530  ps_search_init(ps_search_base(allphs), &allphone_funcs, PS_SEARCH_TYPE_ALLPHONE, name, config, acmod,
531  dict, d2p);
532  mdef = acmod->mdef;
533 
534  allphs->hmmctx = hmm_context_init(bin_mdef_n_emit_state(mdef),
535  acmod->tmat->tp, NULL, mdef->sseq);
536  if (allphs->hmmctx == NULL) {
537  ps_search_free(ps_search_base(allphs));
538  return NULL;
539  }
540 
541  allphs->ci_only = cmd_ln_boolean_r(config, "-allphone_ci");
542  allphs->lw = cmd_ln_float32_r(config, "-lw");
543 
544  phmm_build(allphs);
545 
546  if (lm) {
547  int32 silwid;
548 
549  allphs->lm = ngram_model_retain(lm);
550 
551  silwid = ngram_wid(allphs->lm, bin_mdef_ciphone_str(mdef,
552  mdef_silphone
553  (mdef)));
554  if (silwid == ngram_unknown_wid(allphs->lm)) {
555  E_ERROR("Phonetic LM does not have SIL phone in vocabulary\n");
556  allphone_search_free((ps_search_t *) allphs);
557  return NULL;
558  }
559 
560  allphs->ci2lmwid =
561  (int32 *) ckd_calloc(mdef->n_ciphone,
562  sizeof(*allphs->ci2lmwid));
563  for (i = 0; i < mdef->n_ciphone; i++) {
564  allphs->ci2lmwid[i] =
565  ngram_wid(allphs->lm,
566  (char *) bin_mdef_ciphone_str(mdef, i));
567  /* Map filler phones and other missing phones to silence if not found */
568  if (allphs->ci2lmwid[i] == ngram_unknown_wid(allphs->lm))
569  allphs->ci2lmwid[i] = silwid;
570  }
571  }
572  else {
573  E_WARN
574  ("Failed to load language model specified in -allphone, doing unconstrained phone-loop decoding\n");
575  allphs->inspen =
576  (int32) (logmath_log
577  (acmod->lmath, cmd_ln_float32_r(config, "-pip"))
578  * allphs->lw) >> SENSCR_SHIFT;
579  }
580 
581  allphs->n_tot_frame = 0;
582  allphs->frame = -1;
583  allphs->segments = NULL;
584 
585  /* Get search pruning parameters */
586  allphs->beam
587  =
588  (int32) logmath_log(acmod->lmath,
589  cmd_ln_float64_r(config, "-beam"))
590  >> SENSCR_SHIFT;
591  allphs->pbeam
592  =
593  (int32) logmath_log(acmod->lmath,
594  cmd_ln_float64_r(config, "-pbeam"))
595  >> SENSCR_SHIFT;
596 
597  /* LM related weights/penalties */
598  allphs->history = blkarray_list_init();
599 
600  /* Acoustic score scale for posterior probabilities. */
601  allphs->ascale = 1.0 / cmd_ln_float32_r(config, "-ascale");
602 
603  E_INFO("Allphone(beam: %d, pbeam: %d)\n", allphs->beam, allphs->pbeam);
604 
605  ptmr_init(&allphs->perf);
606 
607  return (ps_search_t *) allphs;
608 }
609 
610 int
611 allphone_search_reinit(ps_search_t * search, dict_t * dict,
612  dict2pid_t * d2p)
613 {
614  allphone_search_t *allphs = (allphone_search_t *) search;
615 
616  /* Free old dict2pid, dict */
617  ps_search_base_reinit(search, dict, d2p);
618 
619  if (!allphs->lm) {
620  E_WARN
621  ("-lm argument missing; doing unconstrained phone-loop decoding\n");
622  allphs->inspen =
623  (int32) (logmath_log
624  (search->acmod->lmath,
625  cmd_ln_float32_r(search->config,
626  "-pip")) *
627  allphs->lw) >> SENSCR_SHIFT;
628  }
629 
630  return 0;
631 }
632 
633 void
634 allphone_search_free(ps_search_t * search)
635 {
636  allphone_search_t *allphs = (allphone_search_t *) search;
637 
638 
639  double n_speech = (double)allphs->n_tot_frame
640  / cmd_ln_int32_r(ps_search_config(allphs), "-frate");
641 
642  E_INFO("TOTAL allphone %.2f CPU %.3f xRT\n",
643  allphs->perf.t_tot_cpu,
644  allphs->perf.t_tot_cpu / n_speech);
645  E_INFO("TOTAL allphone %.2f wall %.3f xRT\n",
646  allphs->perf.t_tot_elapsed,
647  allphs->perf.t_tot_elapsed / n_speech);
648 
649  ps_search_base_free(search);
650 
651  hmm_context_free(allphs->hmmctx);
652  phmm_free(allphs);
653  if (allphs->lm)
654  ngram_model_free(allphs->lm);
655  if (allphs->ci2lmwid)
656  ckd_free(allphs->ci2lmwid);
657  if (allphs->history)
658  blkarray_list_free(allphs->history);
659 
660  ckd_free(allphs);
661 }
662 
663 int
664 allphone_search_start(ps_search_t * search)
665 {
666  allphone_search_t *allphs;
667  bin_mdef_t *mdef;
668  s3cipid_t ci;
669  phmm_t *p;
670 
671  allphs = (allphone_search_t *) search;
672  mdef = search->acmod->mdef;
673 
674  /* Reset all HMMs. */
675  for (ci = 0; ci < bin_mdef_n_ciphone(mdef); ci++) {
676  for (p = allphs->ci_phmm[(unsigned) ci]; p; p = p->next) {
677  hmm_clear(&(p->hmm));
678  }
679  }
680 
681  allphs->n_hmm_eval = 0;
682  allphs->n_sen_eval = 0;
683 
684  /* Free history nodes, if any */
685  blkarray_list_reset(allphs->history);
686 
687  /* Initialize start state of the SILENCE PHMM */
688  allphs->frame = 0;
689  ci = bin_mdef_silphone(mdef);
690  if (NOT_S3CIPID(ci))
691  E_FATAL("Cannot find CI-phone %s\n", S3_SILENCE_CIPHONE);
692  for (p = allphs->ci_phmm[ci]; p && (p->pid != ci); p = p->next);
693  if (!p)
694  E_FATAL("Cannot find HMM for %s\n", S3_SILENCE_CIPHONE);
695  hmm_enter(&(p->hmm), 0, 0, allphs->frame);
696 
697  ptmr_reset(&allphs->perf);
698  ptmr_start(&allphs->perf);
699 
700  return 0;
701 }
702 
703 static void
704 allphone_search_sen_active(allphone_search_t * allphs)
705 {
706  acmod_t *acmod;
707  bin_mdef_t *mdef;
708  phmm_t *p;
709  int32 ci;
710 
711  acmod = ps_search_acmod(allphs);
712  mdef = acmod->mdef;
713 
714  acmod_clear_active(acmod);
715  for (ci = 0; ci < bin_mdef_n_ciphone(mdef); ci++)
716  for (p = allphs->ci_phmm[ci]; p; p = p->next)
717  if (hmm_frame(&(p->hmm)) == allphs->frame)
718  acmod_activate_hmm(acmod, &(p->hmm));
719 }
720 
721 int
722 allphone_search_step(ps_search_t * search, int frame_idx)
723 {
724  int32 bestscr, frame_history_start;
725  const int16 *senscr;
726  allphone_search_t *allphs = (allphone_search_t *) search;
727  acmod_t *acmod = search->acmod;
728 
729  if (!acmod->compallsen)
730  allphone_search_sen_active(allphs);
731  senscr = acmod_score(acmod, &frame_idx);
732  allphs->n_sen_eval += acmod->n_senone_active;
733  bestscr = phmm_eval_all(allphs, senscr);
734 
735  frame_history_start = blkarray_list_n_valid(allphs->history);
736  phmm_exit(allphs, bestscr);
737  phmm_trans(allphs, bestscr, frame_history_start);
738 
739  allphs->frame++;
740 
741  return 0;
742 }
743 
744 static int32
745 ascore(allphone_search_t * allphs, history_t * h)
746 {
747  int32 score = h->score;
748 
749  if (h->hist > 0) {
750  history_t *pred = blkarray_list_get(allphs->history, h->hist);
751  score -= pred->score;
752  }
753 
754  return score - h->tscore;
755 }
756 
757 static void
758 allphone_clear_segments(allphone_search_t * allphs)
759 {
760  gnode_t *gn;
761  for (gn = allphs->segments; gn; gn = gn->next) {
762  ckd_free(gnode_ptr(gn));
763  }
764  glist_free(allphs->segments);
765  allphs->segments = NULL;
766 }
767 
768 static void
769 allphone_backtrace(allphone_search_t * allphs, int32 f, int32 *out_score)
770 {
771  int32 best, hist_idx, best_idx;
772  int32 frm, last_frm;
773  history_t *h;
774  phseg_t *s;
775 
776  /* Clear old list */
777  allphone_clear_segments(allphs);
778 
779  frm = last_frm = f;
780  /* Find the first history entry for the requested frame */
781  hist_idx = blkarray_list_n_valid(allphs->history) - 1;
782  while (hist_idx > 0) {
783  h = blkarray_list_get(allphs->history, hist_idx);
784  if (h->ef <= f) {
785  frm = last_frm = h->ef;
786  break;
787  }
788  hist_idx--;
789  }
790 
791  if (hist_idx < 0)
792  return;
793 
794  /* Find bestscore */
795  best = (int32) 0x80000000;
796  best_idx = -1;
797  while (frm == last_frm && hist_idx > 0) {
798  h = blkarray_list_get(allphs->history, hist_idx);
799  frm = h->ef;
800  if (h->score > best && frm == last_frm) {
801  best = h->score;
802  best_idx = hist_idx;
803  }
804  hist_idx--;
805  }
806 
807  if (best_idx < 0)
808  return;
809 
810  if (out_score)
811  *out_score = best;
812 
813  /* Backtrace */
814  while (best_idx > 0) {
815  h = blkarray_list_get(allphs->history, best_idx);
816  s = (phseg_t *) ckd_calloc(1, sizeof(phseg_t));
817  s->ci = h->phmm->ci;
818  s->sf =
819  (h->hist >
820  0) ? ((history_t *) blkarray_list_get(allphs->history,
821  h->hist))->ef + 1 : 0;
822  s->ef = h->ef;
823  s->score = ascore(allphs, h);
824  s->tscore = h->tscore;
825  allphs->segments = glist_add_ptr(allphs->segments, s);
826 
827  best_idx = h->hist;
828  }
829 
830  return;
831 }
832 
833 int
834 allphone_search_finish(ps_search_t * search)
835 {
836  allphone_search_t *allphs;
837  int32 cf, n_hist;
838 
839  allphs = (allphone_search_t *) search;
840 
841  allphs->n_tot_frame += allphs->frame;
842  n_hist = blkarray_list_n_valid(allphs->history);
843  E_INFO
844  ("%d frames, %d HMMs (%d/fr), %d senones (%d/fr), %d history entries (%d/fr)\n",
845  allphs->frame, allphs->n_hmm_eval,
846  (allphs->frame > 0) ? allphs->n_hmm_eval / allphs->frame : 0,
847  allphs->n_sen_eval,
848  (allphs->frame > 0) ? allphs->n_sen_eval / allphs->frame : 0,
849  n_hist, (allphs->frame > 0) ? n_hist / allphs->frame : 0);
850 
851  /* Now backtrace. */
852  allphone_backtrace(allphs, allphs->frame - 1, NULL);
853 
854  /* Print out some statistics. */
855  ptmr_stop(&allphs->perf);
856  /* This is the number of frames processed. */
857  cf = ps_search_acmod(allphs)->output_frame;
858  if (cf > 0) {
859  double n_speech = (double) (cf + 1)
860  / cmd_ln_int32_r(ps_search_config(allphs), "-frate");
861  E_INFO("allphone %.2f CPU %.3f xRT\n",
862  allphs->perf.t_cpu, allphs->perf.t_cpu / n_speech);
863  E_INFO("allphone %.2f wall %.3f xRT\n",
864  allphs->perf.t_elapsed, allphs->perf.t_elapsed / n_speech);
865  }
866 
867 
868  return 0;
869 }
870 
871 char const *
872 allphone_search_hyp(ps_search_t * search, int32 * out_score)
873 {
874  allphone_search_t *allphs;
875  phseg_t *p;
876  gnode_t *gn;
877  const char *phone_str;
878  bin_mdef_t *mdef;
879  int len, hyp_idx, phone_idx;
880 
881  allphs = (allphone_search_t *) search;
882  mdef = search->acmod->mdef;
883 
884  /* Create hypothesis */
885  if (search->hyp_str)
886  ckd_free(search->hyp_str);
887  search->hyp_str = NULL;
888 
889  allphone_backtrace(allphs, allphs->frame - 1, out_score);
890  if (allphs->segments == NULL) {
891  return NULL;
892  }
893 
894  len = glist_count(allphs->segments) * 10; /* maximum length of one phone with spacebar */
895 
896  search->hyp_str = (char *) ckd_calloc(len, sizeof(*search->hyp_str));
897  hyp_idx = 0;
898  for (gn = allphs->segments; gn; gn = gn->next) {
899  p = gnode_ptr(gn);
900  phone_str = bin_mdef_ciphone_str(mdef, p->ci);
901  phone_idx = 0;
902  while (phone_str[phone_idx] != '\0')
903  search->hyp_str[hyp_idx++] = phone_str[phone_idx++];
904  search->hyp_str[hyp_idx++] = ' ';
905  }
906  search->hyp_str[--hyp_idx] = '\0';
907  E_INFO("Hyp: %s\n", search->hyp_str);
908  return search->hyp_str;
909 }
Internal implementation of PocketSphinx decoder.
History (paths) information at any point in allphone Viterbi search.
struct phmm_s * next
Next unique PHMM for same parent basephone.
Base structure for search module.
ptmr_t perf
Performance counter.
void hmm_init(hmm_context_t *ctx, hmm_t *hmm, int mpx, int ssid, int tmatid)
Populate a previously-allocated HMM structure, allocating internal data.
Definition: hmm.c:89
s3cipid_t ci
Parent basephone for this PHMM.
int32 n_sen_eval
Total senones evaluated this utt.
const char * bin_mdef_ciphone_str(bin_mdef_t *m, int32 ci)
In: ciphone id for which name wanted.
Definition: bin_mdef.c:737
void ps_search_base_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
Re-initialize base structure with new dictionary.
int n_senone_active
Number of active GMMs.
Definition: acmod.h:169
acmod_t * acmod
Acoustic model.
An individual HMM among the HMM search space.
uint8 *** tp
The transition matrices; kept in the same scale as acoustic scores; tp[tmatid][from-state][to-state]...
Definition: tmat.h:56
ps_segfuncs_t * vt
V-table of seg methods.
logmath_t * lmath
Log-math computation.
Definition: acmod.h:151
uint16 ** sseq
Unique senone sequences (2D array built at load time)
Definition: bin_mdef.h:134
void hmm_deinit(hmm_t *hmm)
Free an HMM structure, releasing internal data (but not the HMM structure itself).
Definition: hmm.c:111
int32 lscr
Language model score.
s3pid_t pid
Phone id (temp.
int32 tmat
Transition matrix ID.
Definition: bin_mdef.h:75
void acmod_activate_hmm(acmod_t *acmod, hmm_t *hmm)
Activate senones associated with an HMM.
Definition: acmod.c:1213
int32 hist
Previous history entry.
bitvec_t * lc
Set (bit-vector) of left context phones seen for this PHMM.
int32 score
Path score for this path.
#define BAD_S3CIPID
Ci phone id.
Definition: s3types.h:64
char const * word
Word string (pointer into dictionary hash)
ps_search_t * search
Search object from whence this came.
void ps_search_init(ps_search_t *search, ps_searchfuncs_t *vt, const char *type, const char *name, cmd_ln_t *config, acmod_t *acmod, dict_t *dict, dict2pid_t *d2p)
Initialize base structure.
int32 * ci2lmwid
Mapping of CI phones to LM word IDs.
int32 inspen
Language weights.
int32 hmm_vit_eval(hmm_t *hmm)
Viterbi evaluation of given HMM.
Definition: hmm.c:789
struct mdef_entry_s::@0::@1 ci
&lt; CI phone information - attributes (just &quot;filler&quot; for now)
frame_idx_t ef
End frame.
uint8 compallsen
Compute all senones?
Definition: acmod.h:188
bitvec_t * rc
Set (bit-vector) of right context phones seen for this PHMM.
hmm_context_t * hmm_context_init(int32 n_emit_state, uint8 **const *tp, int16 const *senscore, uint16 *const *sseq)
Create an HMM context.
Definition: hmm.c:56
void ps_search_base_free(ps_search_t *search)
Free search.
Implementation of allphone search structure.
cmd_ln_t * config
Configuration.
Definition: acmod.h:150
int16 s3cipid_t
Size definitions for more semantially meaningful units.
Definition: s3types.h:63
#define WORST_SCORE
Large &quot;bad&quot; score.
Definition: hmm.h:84
tmat_t * tmat
Transition matrices.
Definition: acmod.h:160
frame_idx_t ef
End frame.
int32 ascr
Acoustic score.
int32 tscore
Transition score for this path.
hmm_context_t * hmmctx
HMM context.
void hmm_enter(hmm_t *h, int32 score, int32 histid, int frame)
Enter an HMM with the given path score and history ID.
Definition: hmm.c:201
Segment iterator over list of phseg.
void acmod_clear_active(acmod_t *acmod)
Clear set of active senones.
Definition: acmod.c:1197
#define S3_SILENCE_CIPHONE
Hard-coded silence CI phone name.
Definition: mdef.h:81
#define hmm_context_set_senscore(ctx, senscr)
Change the senone score array for a context.
Definition: hmm.h:227
#define SENSCR_SHIFT
Shift count for senone scores.
Definition: hmm.h:73
a structure for a dictionary.
Definition: dict.h:76
float32 ascale
Acoustic score scale for posterior probabilities.
int32 n_tot_frame
Total number of frames processed.
ngram_model_t * lm
Ngram model set.
Word graph structure used in bestpath/nbest search.
hmm_t hmm
Base HMM structure.
void hmm_clear(hmm_t *h)
Reset the states of the HMM to the invalid condition.
Definition: hmm.c:183
char * hyp_str
Current hypothesis string.
int32 ci_only
Use context-independent phones for decoding.
int32 n_hmm_eval
Total HMMs evaluated this utt.
struct plink_s * succlist
List of predecessor PHMM nodes.
Models a single unique &lt;senone-sequence, tmat&gt; pair.
phmm_t * phmm
PHMM ending this path.
void hmm_context_free(hmm_context_t *ctx)
Free an HMM context.
Definition: hmm.c:80
int32 pbeam
Effective beams after applying beam_factor.
bin_mdef_t * mdef
Model definition.
Definition: acmod.h:159
V-table for search algorithm.
mdef_entry_t * phone
All phone structures.
Definition: bin_mdef.h:133
blkarray_list_t * history
List of history nodes allocated in each frame.
Base structure for hypothesis segmentation iterator.
int32 n_ciphone
Number of base (CI) phones.
Definition: bin_mdef.h:119
cmd_ln_t * config
Configuration.
phmm_t ** ci_phmm
PHMM lists (for each CI phone)
Acoustic model structure.
Definition: acmod.h:148
Phone level segmentation information.
Building composite triphone (as well as word internal triphones) with the dictionary.
Definition: dict2pid.h:84
frame_idx_t sf
Start frame.
int16 const * acmod_score(acmod_t *acmod, int *inout_frame_idx)
Score one frame of data.
Definition: acmod.c:1106
frame_idx_t frame
Current frame.