PocketSphinx  5prealpha
ps_alignment.c
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 2010 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
42 /* System headers. */
43 
44 /* SphinxBase headers. */
45 #include <sphinxbase/ckd_alloc.h>
46 
47 /* Local headers. */
48 #include "ps_alignment.h"
49 
52 {
53  ps_alignment_t *al = ckd_calloc(1, sizeof(*al));
54  al->d2p = dict2pid_retain(d2p);
55  return al;
56 }
57 
58 int
60 {
61  if (al == NULL)
62  return 0;
63  dict2pid_free(al->d2p);
64  ckd_free(al->word.seq);
65  ckd_free(al->sseq.seq);
66  ckd_free(al->state.seq);
67  ckd_free(al);
68  return 0;
69 }
70 
71 #define VECTOR_GROW 10
72 static void *
73 vector_grow_one(void *ptr, uint16 *n_alloc, uint16 *n, size_t item_size)
74 {
75  int newsize = *n + 1;
76  if (newsize < *n_alloc) {
77  *n += 1;
78  return ptr;
79  }
80  newsize += VECTOR_GROW;
81  if (newsize > 0xffff)
82  return NULL;
83  ptr = ckd_realloc(ptr, newsize * item_size);
84  *n += 1;
85  *n_alloc = newsize;
86  return ptr;
87 }
88 
89 static ps_alignment_entry_t *
90 ps_alignment_vector_grow_one(ps_alignment_vector_t *vec)
91 {
92  void *ptr;
93  ptr = vector_grow_one(vec->seq, &vec->n_alloc,
94  &vec->n_ent, sizeof(*vec->seq));
95  if (ptr == NULL)
96  return NULL;
97  vec->seq = ptr;
98  return vec->seq + vec->n_ent - 1;
99 }
100 
101 static void
102 ps_alignment_vector_empty(ps_alignment_vector_t *vec)
103 {
104  vec->n_ent = 0;
105 }
106 
107 int
109  int32 wid, int duration)
110 {
112 
113  if ((ent = ps_alignment_vector_grow_one(&al->word)) == NULL)
114  return 0;
115  ent->id.wid = wid;
116  if (al->word.n_ent > 1)
117  ent->start = ent[-1].start + ent[-1].duration;
118  else
119  ent->start = 0;
120  ent->duration = duration;
121  ent->score = 0;
122  ent->parent = PS_ALIGNMENT_NONE;
123  ent->child = PS_ALIGNMENT_NONE;
124 
125  return al->word.n_ent;
126 }
127 
128 int
130 {
131  dict2pid_t *d2p;
132  dict_t *dict;
133  bin_mdef_t *mdef;
134  int i, lc;
135 
136  /* Clear phone and state sequences. */
137  ps_alignment_vector_empty(&al->sseq);
138  ps_alignment_vector_empty(&al->state);
139 
140  /* For each word, expand to phones/senone sequences. */
141  d2p = al->d2p;
142  dict = d2p->dict;
143  mdef = d2p->mdef;
144  lc = bin_mdef_silphone(mdef);
145  for (i = 0; i < al->word.n_ent; ++i) {
146  ps_alignment_entry_t *went = al->word.seq + i;
147  ps_alignment_entry_t *sent;
148  int wid = went->id.wid;
149  int len = dict_pronlen(dict, wid);
150  int j, rc;
151 
152  if (i < al->word.n_ent - 1)
153  rc = dict_first_phone(dict, al->word.seq[i+1].id.wid);
154  else
155  rc = bin_mdef_silphone(mdef);
156 
157  /* First phone. */
158  if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) {
159  E_ERROR("Failed to add phone entry!\n");
160  return -1;
161  }
162  sent->id.pid.cipid = dict_first_phone(dict, wid);
163  sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid);
164  sent->start = went->start;
165  sent->duration = went->duration;
166  sent->score = 0;
167  sent->parent = i;
168  went->child = (uint16)(sent - al->sseq.seq);
169  if (len == 1)
170  sent->id.pid.ssid
171  = dict2pid_lrdiph_rc(d2p, sent->id.pid.cipid, lc, rc);
172  else
173  sent->id.pid.ssid
174  = dict2pid_ldiph_lc(d2p, sent->id.pid.cipid,
175  dict_second_phone(dict, wid), lc);
176  assert(sent->id.pid.ssid != BAD_SSID);
177 
178  /* Internal phones. */
179  for (j = 1; j < len - 1; ++j) {
180  if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) {
181  E_ERROR("Failed to add phone entry!\n");
182  return -1;
183  }
184  sent->id.pid.cipid = dict_pron(dict, wid, j);
185  sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid);
186  sent->id.pid.ssid = dict2pid_internal(d2p, wid, j);
187  assert(sent->id.pid.ssid != BAD_SSID);
188  sent->start = went->start;
189  sent->duration = went->duration;
190  sent->score = 0;
191  sent->parent = i;
192  }
193 
194  /* Last phone. */
195  if (j < len) {
196  xwdssid_t *rssid;
197  assert(j == len - 1);
198  if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) {
199  E_ERROR("Failed to add phone entry!\n");
200  return -1;
201  }
202  sent->id.pid.cipid = dict_last_phone(dict, wid);
203  sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid);
204  rssid = dict2pid_rssid(d2p, sent->id.pid.cipid,
205  dict_second_last_phone(dict, wid));
206  sent->id.pid.ssid = rssid->ssid[rssid->cimap[rc]];
207  assert(sent->id.pid.ssid != BAD_SSID);
208  sent->start = went->start;
209  sent->duration = went->duration;
210  sent->score = 0;
211  sent->parent = i;
212  }
213  /* Update lc. Could just use sent->id.pid.cipid here but that
214  * seems needlessly obscure. */
215  lc = dict_last_phone(dict, wid);
216  }
217 
218  /* For each senone sequence, expand to senones. (we could do this
219  * nested above but this makes it more clear and easier to
220  * refactor) */
221  for (i = 0; i < al->sseq.n_ent; ++i) {
222  ps_alignment_entry_t *pent = al->sseq.seq + i;
223  ps_alignment_entry_t *sent;
224  int j;
225 
226  for (j = 0; j < bin_mdef_n_emit_state(mdef); ++j) {
227  if ((sent = ps_alignment_vector_grow_one(&al->state)) == NULL) {
228  E_ERROR("Failed to add state entry!\n");
229  return -1;
230  }
231  sent->id.senid = bin_mdef_sseq2sen(mdef, pent->id.pid.ssid, j);
232  assert(sent->id.senid != BAD_SENID);
233  sent->start = pent->start;
234  sent->duration = pent->duration;
235  sent->score = 0;
236  sent->parent = i;
237  if (j == 0)
238  pent->child = (uint16)(sent - al->state.seq);
239  }
240  }
241 
242  return 0;
243 }
244 
245 /* FIXME: Somewhat the same as the above function, needs refactoring */
246 int
248 {
249  dict2pid_t *d2p;
250  dict_t *dict;
251  bin_mdef_t *mdef;
252  int i;
253 
254  /* Clear phone and state sequences. */
255  ps_alignment_vector_empty(&al->sseq);
256  ps_alignment_vector_empty(&al->state);
257 
258  /* For each word, expand to phones/senone sequences. */
259  d2p = al->d2p;
260  dict = d2p->dict;
261  mdef = d2p->mdef;
262  for (i = 0; i < al->word.n_ent; ++i) {
263  ps_alignment_entry_t *went = al->word.seq + i;
264  ps_alignment_entry_t *sent;
265  int wid = went->id.wid;
266  int len = dict_pronlen(dict, wid);
267  int j;
268 
269  for (j = 0; j < len; ++j) {
270  if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) {
271  E_ERROR("Failed to add phone entry!\n");
272  return -1;
273  }
274  sent->id.pid.cipid = dict_pron(dict, wid, j);
275  sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid);
276  sent->id.pid.ssid = bin_mdef_pid2ssid(mdef, sent->id.pid.cipid);
277  assert(sent->id.pid.ssid != BAD_SSID);
278  sent->start = went->start;
279  sent->duration = went->duration;
280  sent->score = 0;
281  sent->parent = i;
282  }
283  }
284 
285  /* For each senone sequence, expand to senones. (we could do this
286  * nested above but this makes it more clear and easier to
287  * refactor) */
288  for (i = 0; i < al->sseq.n_ent; ++i) {
289  ps_alignment_entry_t *pent = al->sseq.seq + i;
290  ps_alignment_entry_t *sent;
291  int j;
292 
293  for (j = 0; j < bin_mdef_n_emit_state(mdef); ++j) {
294  if ((sent = ps_alignment_vector_grow_one(&al->state)) == NULL) {
295  E_ERROR("Failed to add state entry!\n");
296  return -1;
297  }
298  sent->id.senid = bin_mdef_sseq2sen(mdef, pent->id.pid.ssid, j);
299  assert(sent->id.senid != BAD_SENID);
300  sent->start = pent->start;
301  sent->duration = pent->duration;
302  sent->score = 0;
303  sent->parent = i;
304  if (j == 0)
305  pent->child = (uint16)(sent - al->state.seq);
306  }
307  }
308 
309  return 0;
310 }
311 
312 int
314 {
315  ps_alignment_entry_t *last_ent = NULL;
316  int i;
317 
318  /* Propagate duration up from states to phones. */
319  for (i = 0; i < al->state.n_ent; ++i) {
320  ps_alignment_entry_t *sent = al->state.seq + i;
321  ps_alignment_entry_t *pent = al->sseq.seq + sent->parent;
322  if (pent != last_ent) {
323  pent->start = sent->start;
324  pent->duration = 0;
325  pent->score = 0;
326  }
327  pent->duration += sent->duration;
328  pent->score += sent->score;
329  last_ent = pent;
330  }
331 
332  /* Propagate duration up from phones to words. */
333  last_ent = NULL;
334  for (i = 0; i < al->sseq.n_ent; ++i) {
335  ps_alignment_entry_t *pent = al->sseq.seq + i;
336  ps_alignment_entry_t *went = al->word.seq + pent->parent;
337  if (went != last_ent) {
338  went->start = pent->start;
339  went->duration = 0;
340  went->score = 0;
341  }
342  went->duration += pent->duration;
343  went->score += pent->score;
344  last_ent = went;
345  }
346 
347  return 0;
348 }
349 
350 int
352 {
353  return (int)al->word.n_ent;
354 }
355 
356 int
358 {
359  return (int)al->sseq.n_ent;
360 }
361 
362 int
364 {
365  return (int)al->state.n_ent;
366 }
367 
370 {
371  ps_alignment_iter_t *itor;
372 
373  if (al->word.n_ent == 0)
374  return NULL;
375  itor = ckd_calloc(1, sizeof(*itor));
376  itor->al = al;
377  itor->vec = &al->word;
378  itor->pos = 0;
379  return itor;
380 }
381 
384 {
385  ps_alignment_iter_t *itor;
386 
387  if (al->sseq.n_ent == 0)
388  return NULL;
389  itor = ckd_calloc(1, sizeof(*itor));
390  itor->al = al;
391  itor->vec = &al->sseq;
392  itor->pos = 0;
393  return itor;
394 }
395 
398 {
399  ps_alignment_iter_t *itor;
400 
401  if (al->state.n_ent == 0)
402  return NULL;
403  itor = ckd_calloc(1, sizeof(*itor));
404  itor->al = al;
405  itor->vec = &al->state;
406  itor->pos = 0;
407  return itor;
408 }
409 
412 {
413  return itor->vec->seq + itor->pos;
414 }
415 
416 int
418 {
419  ckd_free(itor);
420  return 0;
421 }
422 
425 {
426  if (itor == NULL)
427  return NULL;
428  if (pos >= itor->vec->n_ent) {
430  return NULL;
431  }
432  itor->pos = pos;
433  return itor;
434 }
435 
438 {
439  if (itor == NULL)
440  return NULL;
441  if (++itor->pos >= itor->vec->n_ent) {
443  return NULL;
444  }
445  return itor;
446 }
447 
450 {
451  if (itor == NULL)
452  return NULL;
453  if (--itor->pos < 0) {
455  return NULL;
456  }
457  return itor;
458 }
459 
462 {
463  ps_alignment_iter_t *itor2;
464  if (itor == NULL)
465  return NULL;
466  if (itor->vec == &itor->al->word)
467  return NULL;
468  if (itor->vec->seq[itor->pos].parent == PS_ALIGNMENT_NONE)
469  return NULL;
470  itor2 = ckd_calloc(1, sizeof(*itor2));
471  itor2->al = itor->al;
472  itor2->pos = itor->vec->seq[itor->pos].parent;
473  if (itor->vec == &itor->al->sseq)
474  itor2->vec = &itor->al->word;
475  else
476  itor2->vec = &itor->al->sseq;
477  return itor2;
478 }
479 
482 {
483  ps_alignment_iter_t *itor2;
484  if (itor == NULL)
485  return NULL;
486  if (itor->vec == &itor->al->state)
487  return NULL;
488  if (itor->vec->seq[itor->pos].child == PS_ALIGNMENT_NONE)
489  return NULL;
490  itor2 = ckd_calloc(1, sizeof(*itor2));
491  itor2->al = itor->al;
492  itor2->pos = itor->vec->seq[itor->pos].child;
493  if (itor->vec == &itor->al->word)
494  itor2->vec = &itor->al->sseq;
495  else
496  itor2->vec = &itor->al->state;
497  return itor2;
498 }
int ps_alignment_n_states(ps_alignment_t *al)
Number of states.
Definition: ps_alignment.c:363
Definition: ps_alignment.h:56
int ps_alignment_populate_ci(ps_alignment_t *al)
Populate lower layers using context-independent phones.
Definition: ps_alignment.c:247
ps_alignment_t * ps_alignment_init(dict2pid_t *d2p)
Create a new, empty alignment.
Definition: ps_alignment.c:51
ps_alignment_iter_t * ps_alignment_iter_goto(ps_alignment_iter_t *itor, int pos)
Move alignment iterator to given index.
Definition: ps_alignment.c:424
#define BAD_SSID
Invalid senone sequence ID (limited to 16 bits for PocketSphinx).
Definition: bin_mdef.h:94
ps_alignment_iter_t * ps_alignment_iter_next(ps_alignment_iter_t *itor)
Move an alignment iterator forward.
Definition: ps_alignment.c:437
int dict2pid_free(dict2pid_t *d2p)
Free the memory dict2pid structure.
Definition: dict2pid.c:507
int ps_alignment_free(ps_alignment_t *al)
Release an alignment.
Definition: ps_alignment.c:59
#define dict2pid_rssid(d, ci, lc)
Access macros; not designed for arbitrary use.
Definition: dict2pid.h:115
int ps_alignment_propagate(ps_alignment_t *al)
Propagate timing information up from state sequence.
Definition: ps_alignment.c:313
#define BAD_SENID
Invalid senone ID (limited to 16 bits for PocketSphinx).
Definition: bin_mdef.h:98
bin_mdef_t * mdef
Model definition, used to generate internal ssids on the fly.
Definition: dict2pid.h:87
ps_alignment_iter_t * ps_alignment_words(ps_alignment_t *al)
Iterate over the alignment starting at the first word.
Definition: ps_alignment.c:369
dict_t * dict
Dictionary this table refers to.
Definition: dict2pid.h:89
a structure for a dictionary.
Definition: dict.h:76
s3ssid_t dict2pid_internal(dict2pid_t *d2p, int32 wid, int pos)
Return the senone sequence ID for the given word position.
Definition: dict2pid.c:367
cross word triphone model structure
Definition: dict2pid.h:73
dict2pid_t * dict2pid_retain(dict2pid_t *d2p)
Retain a pointer to dict2pid.
Definition: dict2pid.c:500
ps_alignment_iter_t * ps_alignment_iter_prev(ps_alignment_iter_t *itor)
Move an alignment iterator back.
Definition: ps_alignment.c:449
ps_alignment_iter_t * ps_alignment_states(ps_alignment_t *al)
Iterate over the alignment starting at the first state.
Definition: ps_alignment.c:397
ps_alignment_iter_t * ps_alignment_iter_up(ps_alignment_iter_t *itor)
Get a new iterator starting at the parent of the current node.
Definition: ps_alignment.c:461
ps_alignment_iter_t * ps_alignment_iter_down(ps_alignment_iter_t *itor)
Get a new iterator starting at the first child of the current node.
Definition: ps_alignment.c:481
int ps_alignment_n_phones(ps_alignment_t *al)
Number of phones.
Definition: ps_alignment.c:357
int ps_alignment_iter_free(ps_alignment_iter_t *itor)
Release an iterator before completing all iterations.
Definition: ps_alignment.c:417
s3cipid_t * cimap
Index into ssid[] above for each ci phone.
Definition: dict2pid.h:75
Multi-level alignment structure.
int ps_alignment_populate(ps_alignment_t *al)
Populate lower layers using available word information.
Definition: ps_alignment.c:129
#define dict_pron(d, w, p)
The CI phones of the word w at position p.
Definition: dict.h:165
ps_alignment_iter_t * ps_alignment_phones(ps_alignment_t *al)
Iterate over the alignment starting at the first phone.
Definition: ps_alignment.c:383
Building composite triphone (as well as word internal triphones) with the dictionary.
Definition: dict2pid.h:84
s3ssid_t * ssid
Senone Sequence ID list for all context ciphones.
Definition: dict2pid.h:74
int ps_alignment_add_word(ps_alignment_t *al, int32 wid, int duration)
Append a word.
Definition: ps_alignment.c:108
ps_alignment_entry_t * ps_alignment_iter_get(ps_alignment_iter_t *itor)
Get the alignment entry pointed to by an iterator.
Definition: ps_alignment.c:411
int ps_alignment_n_words(ps_alignment_t *al)
Number of words.
Definition: ps_alignment.c:351