PocketSphinx  5prealpha
kws_search.c
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 2013 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  *
19  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
20  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
23  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  * ====================================================================
32  *
33  */
34 
35 /*
36 * kws_search.c -- Search object for key phrase spotting.
37 */
38 
39 #include <stdio.h>
40 #include <string.h>
41 #include <assert.h>
42 
43 #include <sphinxbase/err.h>
44 #include <sphinxbase/ckd_alloc.h>
45 #include <sphinxbase/strfuncs.h>
46 #include <sphinxbase/pio.h>
47 #include <sphinxbase/cmd_ln.h>
48 
49 #include "pocketsphinx_internal.h"
50 #include "kws_search.h"
51 
53 #define hmm_is_active(hmm) ((hmm)->frame > 0)
54 #define kws_nth_hmm(keyphrase,n) (&((keyphrase)->hmms[n]))
55 
56 /* Value selected experimentally as maximum difference between triphone
57 score and phone loop score, used in confidence computation to make sure
58 that confidence value is less than 1. This might be different for
59 different models. Corresponds to threshold of about 1e+50 */
60 #define KWS_MAX 1500
61 
62 static ps_lattice_t *
63 kws_search_lattice(ps_search_t * search)
64 {
65  return NULL;
66 }
67 
68 static int
69 kws_search_prob(ps_search_t * search)
70 {
71  return 0;
72 }
73 
74 static void
75 kws_seg_free(ps_seg_t *seg)
76 {
77  kws_seg_t *itor = (kws_seg_t *)seg;
78  ckd_free(itor);
79 }
80 
81 static void
82 kws_seg_fill(kws_seg_t *itor)
83 {
84  kws_detection_t* detection = (kws_detection_t*)gnode_ptr(itor->detection);
85 
86  itor->base.word = detection->keyphrase;
87  itor->base.sf = detection->sf;
88  itor->base.ef = detection->ef;
89  itor->base.prob = detection->prob;
90  itor->base.ascr = detection->ascr;
91  itor->base.lscr = 0;
92 }
93 
94 static ps_seg_t *
95 kws_seg_next(ps_seg_t *seg)
96 {
97  kws_seg_t *itor = (kws_seg_t *)seg;
98 
99  gnode_t *detect_head = gnode_next(itor->detection);
100  while (detect_head != NULL && ((kws_detection_t*)gnode_ptr(detect_head))->ef > itor->last_frame)
101  detect_head = gnode_next(detect_head);
102  itor->detection = detect_head;
103 
104  if (!itor->detection) {
105  kws_seg_free(seg);
106  return NULL;
107  }
108 
109  kws_seg_fill(itor);
110 
111  return seg;
112 }
113 
114 static ps_segfuncs_t kws_segfuncs = {
115  /* seg_next */ kws_seg_next,
116  /* seg_free */ kws_seg_free
117 };
118 
119 static ps_seg_t *
120 kws_search_seg_iter(ps_search_t * search)
121 {
122  kws_search_t *kwss = (kws_search_t *)search;
123  kws_seg_t *itor;
124  gnode_t *detect_head = kwss->detections->detect_list;
125 
126  while (detect_head != NULL && ((kws_detection_t*)gnode_ptr(detect_head))->ef > kwss->frame - kwss->delay)
127  detect_head = gnode_next(detect_head);
128 
129  if (!detect_head)
130  return NULL;
131 
132  itor = (kws_seg_t *)ckd_calloc(1, sizeof(*itor));
133  itor->base.vt = &kws_segfuncs;
134  itor->base.search = search;
135  itor->base.lwf = 1.0;
136  itor->detection = detect_head;
137  itor->last_frame = kwss->frame - kwss->delay;
138  kws_seg_fill(itor);
139  return (ps_seg_t *)itor;
140 }
141 
142 static ps_searchfuncs_t kws_funcs = {
143  /* start: */ kws_search_start,
144  /* step: */ kws_search_step,
145  /* finish: */ kws_search_finish,
146  /* reinit: */ kws_search_reinit,
147  /* free: */ kws_search_free,
148  /* lattice: */ kws_search_lattice,
149  /* hyp: */ kws_search_hyp,
150  /* prob: */ kws_search_prob,
151  /* seg_iter: */ kws_search_seg_iter,
152 };
153 
154 
155 /* Activate senones for scoring */
156 static void
157 kws_search_sen_active(kws_search_t * kwss)
158 {
159  int i;
160  gnode_t *gn;
161 
162  acmod_clear_active(ps_search_acmod(kwss));
163 
164  /* active phone loop hmms */
165  for (i = 0; i < kwss->n_pl; i++)
166  acmod_activate_hmm(ps_search_acmod(kwss), &kwss->pl_hmms[i]);
167 
168  /* activate hmms in active nodes */
169  for (gn = kwss->keyphrases; gn; gn = gnode_next(gn)) {
170  kws_keyphrase_t *keyphrase = gnode_ptr(gn);
171  for (i = 0; i < keyphrase->n_hmms; i++) {
172  if (hmm_is_active(kws_nth_hmm(keyphrase, i)))
173  acmod_activate_hmm(ps_search_acmod(kwss), kws_nth_hmm(keyphrase, i));
174  }
175  }
176 }
177 
178 /*
179 * Evaluate all the active HMMs.
180 * (Executed once per frame.)
181 */
182 static void
183 kws_search_hmm_eval(kws_search_t * kwss, int16 const *senscr)
184 {
185  int32 i;
186  gnode_t *gn;
187  int32 bestscore = WORST_SCORE;
188 
189  hmm_context_set_senscore(kwss->hmmctx, senscr);
190 
191  /* evaluate hmms from phone loop */
192  for (i = 0; i < kwss->n_pl; ++i) {
193  hmm_t *hmm = &kwss->pl_hmms[i];
194  int32 score;
195 
196  score = hmm_vit_eval(hmm);
197  if (score BETTER_THAN bestscore)
198  bestscore = score;
199  }
200  /* evaluate hmms for active nodes */
201  for (gn = kwss->keyphrases; gn; gn = gnode_next(gn)) {
202  kws_keyphrase_t *keyphrase = gnode_ptr(gn);
203  for (i = 0; i < keyphrase->n_hmms; i++) {
204  hmm_t *hmm = kws_nth_hmm(keyphrase, i);
205 
206  if (hmm_is_active(hmm)) {
207  int32 score;
208  score = hmm_vit_eval(hmm);
209  if (score BETTER_THAN bestscore)
210  bestscore = score;
211  }
212  }
213  }
214 
215  kwss->bestscore = bestscore;
216 }
217 
218 /*
219 * (Beam) prune the just evaluated HMMs, determine which ones remain
220 * active. Executed once per frame.
221 */
222 static void
223 kws_search_hmm_prune(kws_search_t * kwss)
224 {
225  int32 thresh, i;
226  gnode_t *gn;
227 
228  thresh = kwss->bestscore + kwss->beam;
229 
230  for (gn = kwss->keyphrases; gn; gn = gnode_next(gn)) {
231  kws_keyphrase_t *keyphrase = gnode_ptr(gn);
232  for (i = 0; i < keyphrase->n_hmms; i++) {
233  hmm_t *hmm = kws_nth_hmm(keyphrase, i);
234  if (hmm_is_active(hmm) && hmm_bestscore(hmm) < thresh)
235  hmm_clear(hmm);
236  }
237  }
238 }
239 
240 
244 static void
245 kws_search_trans(kws_search_t * kwss)
246 {
247  hmm_t *pl_best_hmm = NULL;
248  int32 best_out_score = WORST_SCORE;
249  int i;
250  gnode_t *gn;
251 
252  /* select best hmm in phone-loop to be a predecessor */
253  for (i = 0; i < kwss->n_pl; i++)
254  if (hmm_out_score(&kwss->pl_hmms[i]) BETTER_THAN best_out_score) {
255  best_out_score = hmm_out_score(&kwss->pl_hmms[i]);
256  pl_best_hmm = &kwss->pl_hmms[i];
257  }
258 
259  /* out probs are not ready yet */
260  if (!pl_best_hmm)
261  return;
262 
263  /* Check whether keyphrase wasn't spotted yet */
264  for (gn = kwss->keyphrases; gn; gn = gnode_next(gn)) {
265  kws_keyphrase_t *keyphrase = gnode_ptr(gn);
266  hmm_t *last_hmm;
267 
268  if (keyphrase->n_hmms < 1)
269  continue;
270 
271  last_hmm = kws_nth_hmm(keyphrase, keyphrase->n_hmms - 1);
272 
273  if (hmm_is_active(last_hmm)
274  && hmm_out_score(pl_best_hmm) BETTER_THAN WORST_SCORE) {
275 
276  if (hmm_out_score(last_hmm) - hmm_out_score(pl_best_hmm)
277  >= keyphrase->threshold) {
278 
279  int32 prob = hmm_out_score(last_hmm) - hmm_out_score(pl_best_hmm) - KWS_MAX;
280  kws_detections_add(kwss->detections, keyphrase->word,
281  hmm_out_history(last_hmm),
282  kwss->frame, prob,
283  hmm_out_score(last_hmm));
284  } /* keyphrase is spotted */
285  } /* last hmm of keyphrase is active */
286  } /* keyphrase loop */
287 
288  /* Make transition for all phone loop hmms */
289  for (i = 0; i < kwss->n_pl; i++) {
290  if (hmm_out_score(pl_best_hmm) + kwss->plp BETTER_THAN
291  hmm_in_score(&kwss->pl_hmms[i])) {
292  hmm_enter(&kwss->pl_hmms[i],
293  hmm_out_score(pl_best_hmm) + kwss->plp,
294  hmm_out_history(pl_best_hmm), kwss->frame + 1);
295  }
296  }
297 
298  /* Activate new keyphrase nodes, enter their hmms */
299  for (gn = kwss->keyphrases; gn; gn = gnode_next(gn)) {
300  kws_keyphrase_t *keyphrase = gnode_ptr(gn);
301 
302  if (keyphrase->n_hmms < 1)
303  continue;
304 
305  for (i = keyphrase->n_hmms - 1; i > 0; i--) {
306  hmm_t *pred_hmm = kws_nth_hmm(keyphrase, i - 1);
307  hmm_t *hmm = kws_nth_hmm(keyphrase, i);
308 
309  if (hmm_is_active(pred_hmm)) {
310  if (!hmm_is_active(hmm)
311  || hmm_out_score(pred_hmm) BETTER_THAN
312  hmm_in_score(hmm))
313  hmm_enter(hmm, hmm_out_score(pred_hmm),
314  hmm_out_history(pred_hmm), kwss->frame + 1);
315  }
316  }
317 
318  /* Enter keyphrase start node from phone loop */
319  if (hmm_out_score(pl_best_hmm) BETTER_THAN
320  hmm_in_score(kws_nth_hmm(keyphrase, 0)))
321  hmm_enter(kws_nth_hmm(keyphrase, 0), hmm_out_score(pl_best_hmm),
322  kwss->frame, kwss->frame + 1);
323  }
324 }
325 
326 static int
327 kws_search_read_list(kws_search_t *kwss, const char* keyfile)
328 {
329  FILE *list_file;
330  lineiter_t *li;
331  char *line;
332 
333  if ((list_file = fopen(keyfile, "r")) == NULL) {
334  E_ERROR_SYSTEM("Failed to open keyphrase file '%s'", keyfile);
335  return -1;
336  }
337 
338  kwss->keyphrases = NULL;
339 
340  /* read keyphrases */
341  for (li = lineiter_start_clean(list_file); li; li = lineiter_next(li)) {
342  size_t begin, end;
343  kws_keyphrase_t *keyphrase;
344 
345  if (li->len == 0)
346  continue;
347 
348  keyphrase = ckd_calloc(1, sizeof(kws_keyphrase_t));
349 
350  line = li->buf;
351  end = strlen(line) - 1;
352  begin = end - 1;
353  if (line[end] == '/') {
354  while (line[begin] != '/' && begin > 0)
355  begin--;
356  line[end] = 0;
357  line[begin] = 0;
358  keyphrase->threshold = (int32) logmath_log(kwss->base.acmod->lmath, atof_c(line + begin + 1))
359  >> SENSCR_SHIFT;
360  } else {
361  keyphrase->threshold = kwss->def_threshold;
362  }
363 
364  keyphrase->word = ckd_salloc(line);
365 
366  kwss->keyphrases = glist_add_ptr(kwss->keyphrases, keyphrase);
367  }
368 
369  fclose(list_file);
370  return 0;
371 }
372 
373 ps_search_t *
374 kws_search_init(const char *name,
375  const char *keyphrase,
376  const char *keyfile,
377  cmd_ln_t * config,
378  acmod_t * acmod, dict_t * dict, dict2pid_t * d2p)
379 {
380  kws_search_t *kwss = (kws_search_t *) ckd_calloc(1, sizeof(*kwss));
381  ps_search_init(ps_search_base(kwss), &kws_funcs, PS_SEARCH_TYPE_KWS, name, config, acmod, dict,
382  d2p);
383 
384  kwss->detections = (kws_detections_t *)ckd_calloc(1, sizeof(*kwss->detections));
385 
386  kwss->beam =
387  (int32) logmath_log(acmod->lmath,
388  cmd_ln_float64_r(config,
389  "-beam")) >> SENSCR_SHIFT;
390 
391  kwss->plp =
392  (int32) logmath_log(acmod->lmath,
393  cmd_ln_float32_r(config,
394  "-kws_plp")) >> SENSCR_SHIFT;
395 
396 
397  kwss->def_threshold =
398  (int32) logmath_log(acmod->lmath,
399  cmd_ln_float64_r(config,
400  "-kws_threshold")) >>
401  SENSCR_SHIFT;
402 
403  kwss->delay = (int32) cmd_ln_int32_r(config, "-kws_delay");
404 
405  E_INFO("KWS(beam: %d, plp: %d, default threshold %d, delay %d)\n",
406  kwss->beam, kwss->plp, kwss->def_threshold, kwss->delay);
407 
408  if (keyfile) {
409  if (kws_search_read_list(kwss, keyfile) < 0) {
410  E_ERROR("Failed to create kws search\n");
411  kws_search_free(ps_search_base(kwss));
412  return NULL;
413  }
414  } else {
415  kws_keyphrase_t *k = ckd_calloc(1, sizeof(kws_keyphrase_t));
416  k->threshold = kwss->def_threshold;
417  k->word = ckd_salloc(keyphrase);
418  kwss->keyphrases = glist_add_ptr(NULL, k);
419  }
420 
421  /* Reinit for provided keyphrase */
422  if (kws_search_reinit(ps_search_base(kwss),
423  ps_search_dict(kwss),
424  ps_search_dict2pid(kwss)) < 0) {
425  ps_search_free(ps_search_base(kwss));
426  return NULL;
427  }
428 
429  ptmr_init(&kwss->perf);
430 
431  return ps_search_base(kwss);
432 }
433 
434 void
435 kws_search_free(ps_search_t * search)
436 {
437  kws_search_t *kwss;
438  double n_speech;
439  gnode_t *gn;
440 
441  kwss = (kws_search_t *) search;
442 
443  n_speech = (double)kwss->n_tot_frame
444  / cmd_ln_int32_r(ps_search_config(kwss), "-frate");
445 
446  E_INFO("TOTAL kws %.2f CPU %.3f xRT\n",
447  kwss->perf.t_tot_cpu,
448  kwss->perf.t_tot_cpu / n_speech);
449  E_INFO("TOTAL kws %.2f wall %.3f xRT\n",
450  kwss->perf.t_tot_elapsed,
451  kwss->perf.t_tot_elapsed / n_speech);
452 
453 
454  ps_search_base_free(search);
455  hmm_context_free(kwss->hmmctx);
456  kws_detections_reset(kwss->detections);
457  ckd_free(kwss->detections);
458 
459  ckd_free(kwss->pl_hmms);
460  for (gn = kwss->keyphrases; gn; gn = gnode_next(gn)) {
461  kws_keyphrase_t *keyphrase = gnode_ptr(gn);
462  ckd_free(keyphrase->hmms);
463  ckd_free(keyphrase->word);
464  ckd_free(keyphrase);
465  }
466  glist_free(kwss->keyphrases);
467  ckd_free(kwss);
468 }
469 
470 int
471 kws_search_reinit(ps_search_t * search, dict_t * dict, dict2pid_t * d2p)
472 {
473  char **wrdptr;
474  char *tmp_keyphrase;
475  int32 wid, pronlen, in_dict;
476  int32 n_hmms, n_wrds;
477  int32 ssid, tmatid;
478  int i, j, p;
479  kws_search_t *kwss = (kws_search_t *) search;
480  bin_mdef_t *mdef = search->acmod->mdef;
481  int32 silcipid = bin_mdef_silphone(mdef);
482  gnode_t *gn;
483 
484  /* Free old dict2pid, dict */
485  ps_search_base_reinit(search, dict, d2p);
486 
487  /* Initialize HMM context. */
488  if (kwss->hmmctx)
489  hmm_context_free(kwss->hmmctx);
490  kwss->hmmctx =
491  hmm_context_init(bin_mdef_n_emit_state(search->acmod->mdef),
492  search->acmod->tmat->tp, NULL,
493  search->acmod->mdef->sseq);
494  if (kwss->hmmctx == NULL)
495  return -1;
496 
497  /* Initialize phone loop HMMs. */
498  if (kwss->pl_hmms) {
499  for (i = 0; i < kwss->n_pl; ++i)
500  hmm_deinit((hmm_t *) & kwss->pl_hmms[i]);
501  ckd_free(kwss->pl_hmms);
502  }
503  kwss->n_pl = bin_mdef_n_ciphone(search->acmod->mdef);
504  kwss->pl_hmms =
505  (hmm_t *) ckd_calloc(kwss->n_pl, sizeof(*kwss->pl_hmms));
506  for (i = 0; i < kwss->n_pl; ++i) {
507  hmm_init(kwss->hmmctx, (hmm_t *) & kwss->pl_hmms[i],
508  FALSE,
509  bin_mdef_pid2ssid(search->acmod->mdef, i),
510  bin_mdef_pid2tmatid(search->acmod->mdef, i));
511  }
512 
513  for (gn = kwss->keyphrases; gn; gn = gnode_next(gn)) {
514  kws_keyphrase_t *keyphrase = gnode_ptr(gn);
515 
516  /* Initialize keyphrase HMMs */
517  tmp_keyphrase = (char *) ckd_salloc(keyphrase->word);
518  n_wrds = str2words(tmp_keyphrase, NULL, 0);
519  wrdptr = (char **) ckd_calloc(n_wrds, sizeof(*wrdptr));
520  str2words(tmp_keyphrase, wrdptr, n_wrds);
521 
522  /* count amount of hmms */
523  n_hmms = 0;
524  in_dict = TRUE;
525  for (i = 0; i < n_wrds; i++) {
526  wid = dict_wordid(dict, wrdptr[i]);
527  if (wid == BAD_S3WID) {
528  E_ERROR("Word '%s' in phrase '%s' is missing in the dictionary\n", wrdptr[i], keyphrase->word);
529  in_dict = FALSE;
530  break;
531  }
532  pronlen = dict_pronlen(dict, wid);
533  n_hmms += pronlen;
534  }
535 
536  if (!in_dict) {
537  ckd_free(wrdptr);
538  ckd_free(tmp_keyphrase);
539  continue;
540  }
541 
542  /* allocate node array */
543  if (keyphrase->hmms)
544  ckd_free(keyphrase->hmms);
545  keyphrase->hmms = (hmm_t *) ckd_calloc(n_hmms, sizeof(hmm_t));
546  keyphrase->n_hmms = n_hmms;
547 
548  /* fill node array */
549  j = 0;
550  for (i = 0; i < n_wrds; i++) {
551  wid = dict_wordid(dict, wrdptr[i]);
552  pronlen = dict_pronlen(dict, wid);
553  for (p = 0; p < pronlen; p++) {
554  int32 ci = dict_pron(dict, wid, p);
555  if (p == 0) {
556  /* first phone of word */
557  int32 rc =
558  pronlen > 1 ? dict_pron(dict, wid, 1) : silcipid;
559  ssid = dict2pid_ldiph_lc(d2p, ci, rc, silcipid);
560  }
561  else if (p == pronlen - 1) {
562  /* last phone of the word */
563  int32 lc = dict_pron(dict, wid, p - 1);
564  xwdssid_t *rssid = dict2pid_rssid(d2p, ci, lc);
565  int j = rssid->cimap[silcipid];
566  ssid = rssid->ssid[j];
567  }
568  else {
569  /* word internal phone */
570  ssid = dict2pid_internal(d2p, wid, p);
571  }
572  tmatid = bin_mdef_pid2tmatid(mdef, ci);
573  hmm_init(kwss->hmmctx, &keyphrase->hmms[j], FALSE, ssid,
574  tmatid);
575  j++;
576  }
577  }
578 
579  ckd_free(wrdptr);
580  ckd_free(tmp_keyphrase);
581  }
582 
583 
584 
585  return 0;
586 }
587 
588 int
589 kws_search_start(ps_search_t * search)
590 {
591  int i;
592  kws_search_t *kwss = (kws_search_t *) search;
593 
594  kwss->frame = 0;
595  kwss->bestscore = 0;
596  kws_detections_reset(kwss->detections);
597 
598  /* Reset and enter all phone-loop HMMs. */
599  for (i = 0; i < kwss->n_pl; ++i) {
600  hmm_t *hmm = (hmm_t *) & kwss->pl_hmms[i];
601  hmm_clear(hmm);
602  hmm_enter(hmm, 0, -1, 0);
603  }
604 
605  ptmr_reset(&kwss->perf);
606  ptmr_start(&kwss->perf);
607 
608  return 0;
609 }
610 
611 int
612 kws_search_step(ps_search_t * search, int frame_idx)
613 {
614  int16 const *senscr;
615  kws_search_t *kwss = (kws_search_t *) search;
616  acmod_t *acmod = search->acmod;
617 
618  /* Activate senones */
619  if (!acmod->compallsen)
620  kws_search_sen_active(kwss);
621 
622  /* Calculate senone scores for current frame. */
623  senscr = acmod_score(acmod, &frame_idx);
624 
625  /* Evaluate hmms in phone loop and in active keyphrase nodes */
626  kws_search_hmm_eval(kwss, senscr);
627 
628  /* Prune hmms with low prob */
629  kws_search_hmm_prune(kwss);
630 
631  /* Do hmms transitions */
632  kws_search_trans(kwss);
633 
634  ++kwss->frame;
635  return 0;
636 }
637 
638 int
639 kws_search_finish(ps_search_t * search)
640 {
641  kws_search_t *kwss;
642  int32 cf;
643 
644  kwss = (kws_search_t *) search;
645 
646  kwss->n_tot_frame += kwss->frame;
647 
648  /* Print out some statistics. */
649  ptmr_stop(&kwss->perf);
650  /* This is the number of frames processed. */
651  cf = ps_search_acmod(kwss)->output_frame;
652  if (cf > 0) {
653  double n_speech = (double) (cf + 1)
654  / cmd_ln_int32_r(ps_search_config(kwss), "-frate");
655  E_INFO("kws %.2f CPU %.3f xRT\n",
656  kwss->perf.t_cpu, kwss->perf.t_cpu / n_speech);
657  E_INFO("kws %.2f wall %.3f xRT\n",
658  kwss->perf.t_elapsed, kwss->perf.t_elapsed / n_speech);
659  }
660 
661  return 0;
662 }
663 
664 char const *
665 kws_search_hyp(ps_search_t * search, int32 * out_score)
666 {
667  kws_search_t *kwss = (kws_search_t *) search;
668  if (out_score)
669  *out_score = 0;
670 
671  if (search->hyp_str)
672  ckd_free(search->hyp_str);
673  search->hyp_str = kws_detections_hyp_str(kwss->detections, kwss->frame, kwss->delay);
674 
675  return search->hyp_str;
676 }
677 
678 char *
679 kws_search_get_keyphrases(ps_search_t * search)
680 {
681  int c, len;
682  kws_search_t *kwss;
683  char* line;
684  gnode_t *gn;
685 
686  kwss = (kws_search_t *) search;
687 
688  len = 0;
689  for (gn = kwss->keyphrases; gn; gn = gnode_next(gn))
690  len += strlen(((kws_keyphrase_t *)gnode_ptr(gn))->word) + 1;
691 
692  c = 0;
693  line = (char *)ckd_calloc(len, sizeof(*line));
694  for (gn = kwss->keyphrases; gn; gn = gnode_next(gn)) {
695  const char *str = ((kws_keyphrase_t *)gnode_ptr(gn))->word;
696  memcpy(&line[c], str, strlen(str));
697  c += strlen(str);
698  line[c++] = '\n';
699  }
700  line[--c] = '\0';
701 
702  return line;
703 }
Internal implementation of PocketSphinx decoder.
frame_idx_t last_frame
Last frame to raise the detection.
Definition: kws_search.h:57
Base structure for search module.
void hmm_init(hmm_context_t *ctx, hmm_t *hmm, int mpx, int ssid, int tmatid)
Populate a previously-allocated HMM structure, allocating internal data.
Definition: hmm.c:89
POCKETSPHINX_EXPORT s3wid_t dict_wordid(dict_t *d, const char *word)
Return word id for given word string if present.
Definition: dict.c:399
glist_t keyphrases
Keyphrases to spot.
Definition: kws_search.h:75
void ps_search_base_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
Re-initialize base structure with new dictionary.
acmod_t * acmod
Acoustic model.
An individual HMM among the HMM search space.
gnode_t * detection
Keyphrase detection correspondent to segment.
Definition: kws_search.h:56
uint8 *** tp
The transition matrices; kept in the same scale as acoustic scores; tp[tmatid][from-state][to-state]...
Definition: tmat.h:56
int32 plp
Phone loop probability.
Definition: kws_search.h:82
ps_segfuncs_t * vt
V-table of seg methods.
int32 def_threshold
default threshold for p(hyp)/p(altern) ratio
Definition: kws_search.h:84
logmath_t * lmath
Log-math computation.
Definition: acmod.h:151
uint16 ** sseq
Unique senone sequences (2D array built at load time)
Definition: bin_mdef.h:134
void hmm_deinit(hmm_t *hmm)
Free an HMM structure, releasing internal data (but not the HMM structure itself).
Definition: hmm.c:111
int32 lscr
Language model score.
void acmod_activate_hmm(acmod_t *acmod, hmm_t *hmm)
Activate senones associated with an HMM.
Definition: acmod.c:1213
#define BAD_S3WID
Dictionary word id.
Definition: s3types.h:90
Segmentation &quot;iterator&quot; for KWS history.
Definition: kws_search.h:54
int32 bestscore
For beam pruning.
Definition: kws_search.h:83
int32 prob
Log posterior probability.
char const * word
Word string (pointer into dictionary hash)
ps_search_t * search
Search object from whence this came.
void ps_search_init(ps_search_t *search, ps_searchfuncs_t *vt, const char *type, const char *name, cmd_ln_t *config, acmod_t *acmod, dict_t *dict, dict2pid_t *d2p)
Initialize base structure.
hmm_t * pl_hmms
Phone loop hmms - hmms of CI phones.
Definition: kws_search.h:88
int32 hmm_vit_eval(hmm_t *hmm)
Viterbi evaluation of given HMM.
Definition: hmm.c:789
#define dict2pid_rssid(d, ci, lc)
Access macros; not designed for arbitrary use.
Definition: dict2pid.h:115
hmm_context_t * hmmctx
HMM context.
Definition: kws_search.h:73
uint8 compallsen
Compute all senones?
Definition: acmod.h:188
int32 delay
Delay to wait for best detection score.
Definition: kws_search.h:85
hmm_context_t * hmm_context_init(int32 n_emit_state, uint8 **const *tp, int16 const *senscore, uint16 *const *sseq)
Create an HMM context.
Definition: hmm.c:56
void ps_search_base_free(ps_search_t *search)
Free search.
#define WORST_SCORE
Large &quot;bad&quot; score.
Definition: hmm.h:84
tmat_t * tmat
Transition matrices.
Definition: acmod.h:160
frame_idx_t ef
End frame.
int32 ascr
Acoustic score.
void hmm_enter(hmm_t *h, int32 score, int32 histid, int frame)
Enter an HMM with the given path score and history ID.
Definition: hmm.c:201
void acmod_clear_active(acmod_t *acmod)
Clear set of active senones.
Definition: acmod.c:1197
ps_seg_t base
Base structure.
Definition: kws_search.h:55
#define hmm_context_set_senscore(ctx, senscr)
Change the senone score array for a context.
Definition: hmm.h:227
#define SENSCR_SHIFT
Shift count for senone scores.
Definition: hmm.h:73
a structure for a dictionary.
Definition: dict.h:76
Word graph structure used in bestpath/nbest search.
s3ssid_t dict2pid_internal(dict2pid_t *d2p, int32 wid, int pos)
Return the senone sequence ID for the given word position.
Definition: dict2pid.c:367
void hmm_clear(hmm_t *h)
Reset the states of the HMM to the invalid condition.
Definition: hmm.c:183
cross word triphone model structure
Definition: dict2pid.h:73
char * hyp_str
Current hypothesis string.
frame_idx_t frame
Frame index.
Definition: kws_search.h:78
#define BETTER_THAN
Is one score better than another?
Definition: hmm.h:95
int32 n_pl
Number of CI phones.
Definition: kws_search.h:87
void hmm_context_free(hmm_context_t *ctx)
Free an HMM context.
Definition: hmm.c:80
bin_mdef_t * mdef
Model definition.
Definition: acmod.h:159
Implementation of KWS search structure.
Definition: kws_search.h:70
kws_detections_t * detections
Keyword spotting history.
Definition: kws_search.h:77
V-table for search algorithm.
ptmr_t perf
Performance counter.
Definition: kws_search.h:90
Base structure for hypothesis segmentation iterator.
s3cipid_t * cimap
Index into ssid[] above for each ci phone.
Definition: dict2pid.h:75
#define dict_pron(d, w, p)
The CI phones of the word w at position p.
Definition: dict.h:165
Acoustic model structure.
Definition: acmod.h:148
float32 lwf
Language weight factor (for second-pass searches)
Building composite triphone (as well as word internal triphones) with the dictionary.
Definition: dict2pid.h:84
s3ssid_t * ssid
Senone Sequence ID list for all context ciphones.
Definition: dict2pid.h:74
frame_idx_t sf
Start frame.
int16 const * acmod_score(acmod_t *acmod, int *inout_frame_idx)
Score one frame of data.
Definition: acmod.c:1106