PocketSphinx  5prealpha
bin_mdef.c
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 2005 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 /*********************************************************************
38  *
39  * File: bin_mdef.c
40  *
41  * Description:
42  * Binary format model definition files, with support for
43  * heterogeneous topologies and variable-size N-phones
44  *
45  * Author:
46  * David Huggins-Daines <dhuggins@cs.cmu.edu>
47  *********************************************************************/
48 
49 /* System headers. */
50 #include <stdio.h>
51 #include <string.h>
52 #include <assert.h>
53 
54 /* SphinxBase headers. */
55 #include <sphinxbase/prim_type.h>
56 #include <sphinxbase/ckd_alloc.h>
57 #include <sphinxbase/byteorder.h>
58 #include <sphinxbase/case.h>
59 #include <sphinxbase/err.h>
60 
61 /* Local headers. */
62 #include "mdef.h"
63 #include "bin_mdef.h"
64 
65 bin_mdef_t *
66 bin_mdef_read_text(cmd_ln_t *config, const char *filename)
67 {
68  bin_mdef_t *bmdef;
69  mdef_t *mdef;
70  int i, nodes, ci_idx, lc_idx, rc_idx;
71  int nchars;
72 
73  if ((mdef = mdef_init((char *) filename, TRUE)) == NULL)
74  return NULL;
75 
76  /* Enforce some limits. */
77  if (mdef->n_sen > BAD_SENID) {
78  E_ERROR("Number of senones exceeds limit: %d > %d\n",
79  mdef->n_sen, BAD_SENID);
80  mdef_free(mdef);
81  return NULL;
82  }
83  if (mdef->n_sseq > BAD_SSID) {
84  E_ERROR("Number of senone sequences exceeds limit: %d > %d\n",
85  mdef->n_sseq, BAD_SSID);
86  mdef_free(mdef);
87  return NULL;
88  }
89  /* We use uint8 for ciphones */
90  if (mdef->n_ciphone > 255) {
91  E_ERROR("Number of phones exceeds limit: %d > %d\n",
92  mdef->n_ciphone, 255);
93  mdef_free(mdef);
94  return NULL;
95  }
96 
97  bmdef = ckd_calloc(1, sizeof(*bmdef));
98  bmdef->refcnt = 1;
99 
100  /* Easy stuff. The mdef.c code has done the heavy lifting for us. */
101  bmdef->n_ciphone = mdef->n_ciphone;
102  bmdef->n_phone = mdef->n_phone;
103  bmdef->n_emit_state = mdef->n_emit_state;
104  bmdef->n_ci_sen = mdef->n_ci_sen;
105  bmdef->n_sen = mdef->n_sen;
106  bmdef->n_tmat = mdef->n_tmat;
107  bmdef->n_sseq = mdef->n_sseq;
108  bmdef->sseq = mdef->sseq;
109  bmdef->cd2cisen = mdef->cd2cisen;
110  bmdef->sen2cimap = mdef->sen2cimap;
111  bmdef->n_ctx = 3; /* Triphones only. */
112  bmdef->sil = mdef->sil;
113  mdef->sseq = NULL; /* We are taking over this one. */
114  mdef->cd2cisen = NULL; /* And this one. */
115  mdef->sen2cimap = NULL; /* And this one. */
116 
117  /* Get the phone names. If they are not sorted
118  * ASCII-betically then we are in a world of hurt and
119  * therefore will simply refuse to continue. */
120  bmdef->ciname = ckd_calloc(bmdef->n_ciphone, sizeof(*bmdef->ciname));
121  nchars = 0;
122  for (i = 0; i < bmdef->n_ciphone; ++i)
123  nchars += strlen(mdef->ciphone[i].name) + 1;
124  bmdef->ciname[0] = ckd_calloc(nchars, 1);
125  strcpy(bmdef->ciname[0], mdef->ciphone[0].name);
126  for (i = 1; i < bmdef->n_ciphone; ++i) {
127  bmdef->ciname[i] =
128  bmdef->ciname[i - 1] + strlen(bmdef->ciname[i - 1]) + 1;
129  strcpy(bmdef->ciname[i], mdef->ciphone[i].name);
130  if (i > 0 && strcmp(bmdef->ciname[i - 1], bmdef->ciname[i]) > 0) {
131  /* FIXME: there should be a solution to this, actually. */
132  E_ERROR("Phone names are not in sorted order, sorry.");
133  bin_mdef_free(bmdef);
134  return NULL;
135  }
136  }
137 
138  /* Copy over phone information. */
139  bmdef->phone = ckd_calloc(bmdef->n_phone, sizeof(*bmdef->phone));
140  for (i = 0; i < mdef->n_phone; ++i) {
141  bmdef->phone[i].ssid = mdef->phone[i].ssid;
142  bmdef->phone[i].tmat = mdef->phone[i].tmat;
143  if (i < bmdef->n_ciphone) {
144  bmdef->phone[i].info.ci.filler = mdef->ciphone[i].filler;
145  }
146  else {
147  bmdef->phone[i].info.cd.wpos = mdef->phone[i].wpos;
148  bmdef->phone[i].info.cd.ctx[0] = mdef->phone[i].ci;
149  bmdef->phone[i].info.cd.ctx[1] = mdef->phone[i].lc;
150  bmdef->phone[i].info.cd.ctx[2] = mdef->phone[i].rc;
151  }
152  }
153 
154  /* Walk the wpos_ci_lclist once to find the total number of
155  * nodes and the starting locations for each level. */
156  nodes = lc_idx = ci_idx = rc_idx = 0;
157  for (i = 0; i < N_WORD_POSN; ++i) {
158  int j;
159  for (j = 0; j < mdef->n_ciphone; ++j) {
160  ph_lc_t *lc;
161 
162  for (lc = mdef->wpos_ci_lclist[i][j]; lc; lc = lc->next) {
163  ph_rc_t *rc;
164  for (rc = lc->rclist; rc; rc = rc->next) {
165  ++nodes; /* RC node */
166  }
167  ++nodes; /* LC node */
168  ++rc_idx; /* Start of RC nodes (after LC nodes) */
169  }
170  ++nodes; /* CI node */
171  ++lc_idx; /* Start of LC nodes (after CI nodes) */
172  ++rc_idx; /* Start of RC nodes (after CI and LC nodes) */
173  }
174  ++nodes; /* wpos node */
175  ++ci_idx; /* Start of CI nodes (after wpos nodes) */
176  ++lc_idx; /* Start of LC nodes (after CI nodes) */
177  ++rc_idx; /* STart of RC nodes (after wpos, CI, and LC nodes) */
178  }
179  E_INFO("Allocating %d * %d bytes (%d KiB) for CD tree\n",
180  nodes, sizeof(*bmdef->cd_tree),
181  nodes * sizeof(*bmdef->cd_tree) / 1024);
182  bmdef->n_cd_tree = nodes;
183  bmdef->cd_tree = ckd_calloc(nodes, sizeof(*bmdef->cd_tree));
184  for (i = 0; i < N_WORD_POSN; ++i) {
185  int j;
186 
187  bmdef->cd_tree[i].ctx = i;
188  bmdef->cd_tree[i].n_down = mdef->n_ciphone;
189  bmdef->cd_tree[i].c.down = ci_idx;
190 #if 0
191  E_INFO("%d => %c (%d@%d)\n",
192  i, (WPOS_NAME)[i],
193  bmdef->cd_tree[i].n_down, bmdef->cd_tree[i].c.down);
194 #endif
195 
196  /* Now we can build the rest of the tree. */
197  for (j = 0; j < mdef->n_ciphone; ++j) {
198  ph_lc_t *lc;
199 
200  bmdef->cd_tree[ci_idx].ctx = j;
201  bmdef->cd_tree[ci_idx].c.down = lc_idx;
202  for (lc = mdef->wpos_ci_lclist[i][j]; lc; lc = lc->next) {
203  ph_rc_t *rc;
204 
205  bmdef->cd_tree[lc_idx].ctx = lc->lc;
206  bmdef->cd_tree[lc_idx].c.down = rc_idx;
207  for (rc = lc->rclist; rc; rc = rc->next) {
208  bmdef->cd_tree[rc_idx].ctx = rc->rc;
209  bmdef->cd_tree[rc_idx].n_down = 0;
210  bmdef->cd_tree[rc_idx].c.pid = rc->pid;
211 #if 0
212  E_INFO("%d => %s %s %s %c (%d@%d)\n",
213  rc_idx,
214  bmdef->ciname[j],
215  bmdef->ciname[lc->lc],
216  bmdef->ciname[rc->rc],
217  (WPOS_NAME)[i],
218  bmdef->cd_tree[rc_idx].n_down,
219  bmdef->cd_tree[rc_idx].c.down);
220 #endif
221 
222  ++bmdef->cd_tree[lc_idx].n_down;
223  ++rc_idx;
224  }
225  /* If there are no triphones here,
226  * this is considered a leafnode, so
227  * set the pid to -1. */
228  if (bmdef->cd_tree[lc_idx].n_down == 0)
229  bmdef->cd_tree[lc_idx].c.pid = -1;
230 #if 0
231  E_INFO("%d => %s %s %c (%d@%d)\n",
232  lc_idx,
233  bmdef->ciname[j],
234  bmdef->ciname[lc->lc],
235  (WPOS_NAME)[i],
236  bmdef->cd_tree[lc_idx].n_down,
237  bmdef->cd_tree[lc_idx].c.down);
238 #endif
239 
240  ++bmdef->cd_tree[ci_idx].n_down;
241  ++lc_idx;
242  }
243 
244  /* As above, so below. */
245  if (bmdef->cd_tree[ci_idx].n_down == 0)
246  bmdef->cd_tree[ci_idx].c.pid = -1;
247 #if 0
248  E_INFO("%d => %d=%s (%d@%d)\n",
249  ci_idx, j, bmdef->ciname[j],
250  bmdef->cd_tree[ci_idx].n_down,
251  bmdef->cd_tree[ci_idx].c.down);
252 #endif
253 
254  ++ci_idx;
255  }
256  }
257 
258  mdef_free(mdef);
259 
260  bmdef->alloc_mode = BIN_MDEF_FROM_TEXT;
261  return bmdef;
262 }
263 
264 bin_mdef_t *
266 {
267  ++m->refcnt;
268  return m;
269 }
270 
271 int
273 {
274  if (m == NULL)
275  return 0;
276  if (--m->refcnt > 0)
277  return m->refcnt;
278 
279  switch (m->alloc_mode) {
280  case BIN_MDEF_FROM_TEXT:
281  ckd_free(m->ciname[0]);
282  ckd_free(m->sseq[0]);
283  ckd_free(m->phone);
284  ckd_free(m->cd_tree);
285  break;
286  case BIN_MDEF_IN_MEMORY:
287  ckd_free(m->ciname[0]);
288  break;
289  case BIN_MDEF_ON_DISK:
290  break;
291  }
292  if (m->filemap)
293  mmio_file_unmap(m->filemap);
294  ckd_free(m->cd2cisen);
295  ckd_free(m->sen2cimap);
296  ckd_free(m->ciname);
297  ckd_free(m->sseq);
298  ckd_free(m);
299  return 0;
300 }
301 
302 static const char format_desc[] =
303  "BEGIN FILE FORMAT DESCRIPTION\n"
304  "int32 n_ciphone; /**< Number of base (CI) phones */\n"
305  "int32 n_phone; /**< Number of base (CI) phones + (CD) triphones */\n"
306  "int32 n_emit_state; /**< Number of emitting states per phone (0 if heterogeneous) */\n"
307  "int32 n_ci_sen; /**< Number of CI senones; these are the first */\n"
308  "int32 n_sen; /**< Number of senones (CI+CD) */\n"
309  "int32 n_tmat; /**< Number of transition matrices */\n"
310  "int32 n_sseq; /**< Number of unique senone sequences */\n"
311  "int32 n_ctx; /**< Number of phones of context */\n"
312  "int32 n_cd_tree; /**< Number of nodes in CD tree structure */\n"
313  "int32 sil; /**< CI phone ID for silence */\n"
314  "char ciphones[][]; /**< CI phone strings (null-terminated) */\n"
315  "char padding[]; /**< Padding to a 4-bytes boundary */\n"
316  "struct { int16 ctx; int16 n_down; int32 pid/down } cd_tree[];\n"
317  "struct { int32 ssid; int32 tmat; int8 attr[4] } phones[];\n"
318  "int16 sseq[]; /**< Unique senone sequences */\n"
319  "int8 sseq_len[]; /**< Number of states in each sseq (none if homogeneous) */\n"
320  "END FILE FORMAT DESCRIPTION\n";
321 
322 bin_mdef_t *
323 bin_mdef_read(cmd_ln_t *config, const char *filename)
324 {
325  bin_mdef_t *m;
326  FILE *fh;
327  size_t tree_start;
328  int32 val, i, do_mmap, swap;
329  long pos, end;
330  int32 *sseq_size;
331 
332  /* Try to read it as text first. */
333  if ((m = bin_mdef_read_text(config, filename)) != NULL)
334  return m;
335 
336  E_INFO("Reading binary model definition: %s\n", filename);
337  if ((fh = fopen(filename, "rb")) == NULL)
338  return NULL;
339 
340  if (fread(&val, 4, 1, fh) != 1) {
341  fclose(fh);
342  E_ERROR_SYSTEM("Failed to read byte-order marker from %s\n",
343  filename);
344  return NULL;
345  }
346  swap = 0;
347  if (val == BIN_MDEF_OTHER_ENDIAN) {
348  swap = 1;
349  E_INFO("Must byte-swap %s\n", filename);
350  }
351  if (fread(&val, 4, 1, fh) != 1) {
352  fclose(fh);
353  E_ERROR_SYSTEM("Failed to read version from %s\n", filename);
354  return NULL;
355  }
356  if (swap)
357  SWAP_INT32(&val);
358  if (val > BIN_MDEF_FORMAT_VERSION) {
359  E_ERROR("File format version %d for %s is newer than library\n",
360  val, filename);
361  fclose(fh);
362  return NULL;
363  }
364  if (fread(&val, 4, 1, fh) != 1) {
365  fclose(fh);
366  E_ERROR_SYSTEM("Failed to read header length from %s\n", filename);
367  return NULL;
368  }
369  if (swap)
370  SWAP_INT32(&val);
371  /* Skip format descriptor. */
372  fseek(fh, val, SEEK_CUR);
373 
374  /* Finally allocate it. */
375  m = ckd_calloc(1, sizeof(*m));
376  m->refcnt = 1;
377 
378  /* Check these, to make gcc/glibc shut up. */
379 #define FREAD_SWAP32_CHK(dest) \
380  if (fread((dest), 4, 1, fh) != 1) { \
381  fclose(fh); \
382  ckd_free(m); \
383  E_ERROR_SYSTEM("Failed to read %s from %s\n", #dest, filename); \
384  return NULL; \
385  } \
386  if (swap) SWAP_INT32(dest);
387 
388  FREAD_SWAP32_CHK(&m->n_ciphone);
389  FREAD_SWAP32_CHK(&m->n_phone);
390  FREAD_SWAP32_CHK(&m->n_emit_state);
391  FREAD_SWAP32_CHK(&m->n_ci_sen);
392  FREAD_SWAP32_CHK(&m->n_sen);
393  FREAD_SWAP32_CHK(&m->n_tmat);
394  FREAD_SWAP32_CHK(&m->n_sseq);
395  FREAD_SWAP32_CHK(&m->n_ctx);
396  FREAD_SWAP32_CHK(&m->n_cd_tree);
397  FREAD_SWAP32_CHK(&m->sil);
398 
399  /* CI names are first in the file. */
400  m->ciname = ckd_calloc(m->n_ciphone, sizeof(*m->ciname));
401 
402  /* Decide whether to read in the whole file or mmap it. */
403  do_mmap = config ? cmd_ln_boolean_r(config, "-mmap") : TRUE;
404  if (swap) {
405  E_WARN("-mmap specified, but mdef is other-endian. Will not memory-map.\n");
406  do_mmap = FALSE;
407  }
408  /* Actually try to mmap it. */
409  if (do_mmap) {
410  m->filemap = mmio_file_read(filename);
411  if (m->filemap == NULL)
412  do_mmap = FALSE;
413  }
414  pos = ftell(fh);
415  if (do_mmap) {
416  /* Get the base pointer from the memory map. */
417  m->ciname[0] = (char *)mmio_file_ptr(m->filemap) + pos;
418  /* Success! */
419  m->alloc_mode = BIN_MDEF_ON_DISK;
420  }
421  else {
422  /* Read everything into memory. */
423  m->alloc_mode = BIN_MDEF_IN_MEMORY;
424  fseek(fh, 0, SEEK_END);
425  end = ftell(fh);
426  fseek(fh, pos, SEEK_SET);
427  m->ciname[0] = ckd_malloc(end - pos);
428  if (fread(m->ciname[0], 1, end - pos, fh) != end - pos)
429  E_FATAL("Failed to read %d bytes of data from %s\n", end - pos, filename);
430  }
431 
432  for (i = 1; i < m->n_ciphone; ++i)
433  m->ciname[i] = m->ciname[i - 1] + strlen(m->ciname[i - 1]) + 1;
434 
435  /* Skip past the padding. */
436  tree_start =
437  m->ciname[i - 1] + strlen(m->ciname[i - 1]) + 1 - m->ciname[0];
438  tree_start = (tree_start + 3) & ~3;
439  m->cd_tree = (cd_tree_t *) (m->ciname[0] + tree_start);
440  if (swap) {
441  for (i = 0; i < m->n_cd_tree; ++i) {
442  SWAP_INT16(&m->cd_tree[i].ctx);
443  SWAP_INT16(&m->cd_tree[i].n_down);
444  SWAP_INT32(&m->cd_tree[i].c.down);
445  }
446  }
447  m->phone = (mdef_entry_t *) (m->cd_tree + m->n_cd_tree);
448  if (swap) {
449  for (i = 0; i < m->n_phone; ++i) {
450  SWAP_INT32(&m->phone[i].ssid);
451  SWAP_INT32(&m->phone[i].tmat);
452  }
453  }
454  sseq_size = (int32 *) (m->phone + m->n_phone);
455  if (swap)
456  SWAP_INT32(sseq_size);
457  m->sseq = ckd_calloc(m->n_sseq, sizeof(*m->sseq));
458  m->sseq[0] = (uint16 *) (sseq_size + 1);
459  if (swap) {
460  for (i = 0; i < *sseq_size; ++i)
461  SWAP_INT16(m->sseq[0] + i);
462  }
463  if (m->n_emit_state) {
464  for (i = 1; i < m->n_sseq; ++i)
465  m->sseq[i] = m->sseq[0] + i * m->n_emit_state;
466  }
467  else {
468  m->sseq_len = (uint8 *) (m->sseq[0] + *sseq_size);
469  for (i = 1; i < m->n_sseq; ++i)
470  m->sseq[i] = m->sseq[i - 1] + m->sseq_len[i - 1];
471  }
472 
473  /* Now build the CD-to-CI mappings using the senone sequences.
474  * This is the only really accurate way to do it, though it is
475  * still inaccurate in the case of heterogeneous topologies or
476  * cross-state tying. */
477  m->cd2cisen = (int16 *) ckd_malloc(m->n_sen * sizeof(*m->cd2cisen));
478  m->sen2cimap = (int16 *) ckd_malloc(m->n_sen * sizeof(*m->sen2cimap));
479 
480  /* Default mappings (identity, none) */
481  for (i = 0; i < m->n_ci_sen; ++i)
482  m->cd2cisen[i] = i;
483  for (; i < m->n_sen; ++i)
484  m->cd2cisen[i] = -1;
485  for (i = 0; i < m->n_sen; ++i)
486  m->sen2cimap[i] = -1;
487  for (i = 0; i < m->n_phone; ++i) {
488  int32 j, ssid = m->phone[i].ssid;
489 
490  for (j = 0; j < bin_mdef_n_emit_state_phone(m, i); ++j) {
491  int s = bin_mdef_sseq2sen(m, ssid, j);
492  int ci = bin_mdef_pid2ci(m, i);
493  /* Take the first one and warn if we have cross-state tying. */
494  if (m->sen2cimap[s] == -1)
495  m->sen2cimap[s] = ci;
496  if (m->sen2cimap[s] != ci)
497  E_WARN
498  ("Senone %d is shared between multiple base phones\n",
499  s);
500 
501  if (j > bin_mdef_n_emit_state_phone(m, ci))
502  E_WARN("CD phone %d has fewer states than CI phone %d\n",
503  i, ci);
504  else
505  m->cd2cisen[s] =
506  bin_mdef_sseq2sen(m, m->phone[ci].ssid, j);
507  }
508  }
509 
510  /* Set the silence phone. */
512 
513  E_INFO
514  ("%d CI-phone, %d CD-phone, %d emitstate/phone, %d CI-sen, %d Sen, %d Sen-Seq\n",
515  m->n_ciphone, m->n_phone - m->n_ciphone, m->n_emit_state,
516  m->n_ci_sen, m->n_sen, m->n_sseq);
517  fclose(fh);
518  return m;
519 }
520 
521 int
522 bin_mdef_write(bin_mdef_t * m, const char *filename)
523 {
524  FILE *fh;
525  int32 val, i;
526 
527  if ((fh = fopen(filename, "wb")) == NULL)
528  return -1;
529 
530  /* Byteorder marker. */
531  val = BIN_MDEF_NATIVE_ENDIAN;
532  fwrite(&val, 1, 4, fh);
533  /* Version. */
534  val = BIN_MDEF_FORMAT_VERSION;
535  fwrite(&val, 1, sizeof(val), fh);
536 
537  /* Round the format descriptor size up to a 4-byte boundary. */
538  val = ((sizeof(format_desc) + 3) & ~3);
539  fwrite(&val, 1, sizeof(val), fh);
540  fwrite(format_desc, 1, sizeof(format_desc), fh);
541  /* Pad it with zeros. */
542  i = 0;
543  fwrite(&i, 1, val - sizeof(format_desc), fh);
544 
545  /* Binary header things. */
546  fwrite(&m->n_ciphone, 4, 1, fh);
547  fwrite(&m->n_phone, 4, 1, fh);
548  fwrite(&m->n_emit_state, 4, 1, fh);
549  fwrite(&m->n_ci_sen, 4, 1, fh);
550  fwrite(&m->n_sen, 4, 1, fh);
551  fwrite(&m->n_tmat, 4, 1, fh);
552  fwrite(&m->n_sseq, 4, 1, fh);
553  fwrite(&m->n_ctx, 4, 1, fh);
554  fwrite(&m->n_cd_tree, 4, 1, fh);
555  /* Write this as a 32-bit value to preserve alignment for the
556  * non-mmap case (we want things aligned both from the
557  * beginning of the file and the beginning of the phone
558  * strings). */
559  val = m->sil;
560  fwrite(&val, 4, 1, fh);
561 
562  /* Phone strings. */
563  for (i = 0; i < m->n_ciphone; ++i)
564  fwrite(m->ciname[i], 1, strlen(m->ciname[i]) + 1, fh);
565  /* Pad with zeros. */
566  val = (ftell(fh) + 3) & ~3;
567  i = 0;
568  fwrite(&i, 1, val - ftell(fh), fh);
569 
570  /* Write CD-tree */
571  fwrite(m->cd_tree, sizeof(*m->cd_tree), m->n_cd_tree, fh);
572  /* Write phones */
573  fwrite(m->phone, sizeof(*m->phone), m->n_phone, fh);
574  if (m->n_emit_state) {
575  /* Write size of sseq */
576  val = m->n_sseq * m->n_emit_state;
577  fwrite(&val, 4, 1, fh);
578 
579  /* Write sseq */
580  fwrite(m->sseq[0], sizeof(**m->sseq),
581  m->n_sseq * m->n_emit_state, fh);
582  }
583  else {
584  int32 n;
585 
586  /* Calcluate size of sseq */
587  n = 0;
588  for (i = 0; i < m->n_sseq; ++i)
589  n += m->sseq_len[i];
590 
591  /* Write size of sseq */
592  fwrite(&n, 4, 1, fh);
593 
594  /* Write sseq */
595  fwrite(m->sseq[0], sizeof(**m->sseq), n, fh);
596 
597  /* Write sseq_len */
598  fwrite(m->sseq_len, 1, m->n_sseq, fh);
599  }
600  fclose(fh);
601 
602  return 0;
603 }
604 
605 int
606 bin_mdef_write_text(bin_mdef_t * m, const char *filename)
607 {
608  FILE *fh;
609  int p, i, n_total_state;
610 
611  if (strcmp(filename, "-") == 0)
612  fh = stdout;
613  else {
614  if ((fh = fopen(filename, "w")) == NULL)
615  return -1;
616  }
617 
618  fprintf(fh, "0.3\n");
619  fprintf(fh, "%d n_base\n", m->n_ciphone);
620  fprintf(fh, "%d n_tri\n", m->n_phone - m->n_ciphone);
621  if (m->n_emit_state)
622  n_total_state = m->n_phone * (m->n_emit_state + 1);
623  else {
624  n_total_state = 0;
625  for (i = 0; i < m->n_phone; ++i)
626  n_total_state += m->sseq_len[m->phone[i].ssid] + 1;
627  }
628  fprintf(fh, "%d n_state_map\n", n_total_state);
629  fprintf(fh, "%d n_tied_state\n", m->n_sen);
630  fprintf(fh, "%d n_tied_ci_state\n", m->n_ci_sen);
631  fprintf(fh, "%d n_tied_tmat\n", m->n_tmat);
632  fprintf(fh, "#\n# Columns definitions\n");
633  fprintf(fh, "#%4s %3s %3s %1s %6s %4s %s\n",
634  "base", "lft", "rt", "p", "attrib", "tmat",
635  " ... state id's ...");
636 
637  for (p = 0; p < m->n_ciphone; p++) {
638  int n_state;
639 
640  fprintf(fh, "%5s %3s %3s %1s", m->ciname[p], "-", "-", "-");
641 
642  if (bin_mdef_is_fillerphone(m, p))
643  fprintf(fh, " %6s", "filler");
644  else
645  fprintf(fh, " %6s", "n/a");
646  fprintf(fh, " %4d", m->phone[p].tmat);
647 
648  if (m->n_emit_state)
649  n_state = m->n_emit_state;
650  else
651  n_state = m->sseq_len[m->phone[p].ssid];
652  for (i = 0; i < n_state; i++) {
653  fprintf(fh, " %6u", m->sseq[m->phone[p].ssid][i]);
654  }
655  fprintf(fh, " N\n");
656  }
657 
658 
659  for (; p < m->n_phone; p++) {
660  int n_state;
661 
662  fprintf(fh, "%5s %3s %3s %c",
663  m->ciname[m->phone[p].info.cd.ctx[0]],
664  m->ciname[m->phone[p].info.cd.ctx[1]],
665  m->ciname[m->phone[p].info.cd.ctx[2]],
666  (WPOS_NAME)[m->phone[p].info.cd.wpos]);
667 
668  if (bin_mdef_is_fillerphone(m, p))
669  fprintf(fh, " %6s", "filler");
670  else
671  fprintf(fh, " %6s", "n/a");
672  fprintf(fh, " %4d", m->phone[p].tmat);
673 
674 
675  if (m->n_emit_state)
676  n_state = m->n_emit_state;
677  else
678  n_state = m->sseq_len[m->phone[p].ssid];
679  for (i = 0; i < n_state; i++) {
680  fprintf(fh, " %6u", m->sseq[m->phone[p].ssid][i]);
681  }
682  fprintf(fh, " N\n");
683  }
684 
685  if (strcmp(filename, "-") != 0)
686  fclose(fh);
687  return 0;
688 }
689 
690 int
691 bin_mdef_ciphone_id(bin_mdef_t * m, const char *ciphone)
692 {
693  int low, mid, high;
694 
695  /* Exact binary search on m->ciphone */
696  low = 0;
697  high = m->n_ciphone;
698  while (low < high) {
699  int c;
700 
701  mid = (low + high) / 2;
702  c = strcmp(ciphone, m->ciname[mid]);
703  if (c == 0)
704  return mid;
705  else if (c > 0)
706  low = mid + 1;
707  else if (c < 0)
708  high = mid;
709  }
710  return -1;
711 }
712 
713 int
714 bin_mdef_ciphone_id_nocase(bin_mdef_t * m, const char *ciphone)
715 {
716  int low, mid, high;
717 
718  /* Exact binary search on m->ciphone */
719  low = 0;
720  high = m->n_ciphone;
721  while (low < high) {
722  int c;
723 
724  mid = (low + high) / 2;
725  c = strcmp_nocase(ciphone, m->ciname[mid]);
726  if (c == 0)
727  return mid;
728  else if (c > 0)
729  low = mid + 1;
730  else if (c < 0)
731  high = mid;
732  }
733  return -1;
734 }
735 
736 const char *
738 {
739  assert(m != NULL);
740  assert(ci < m->n_ciphone);
741  return m->ciname[ci];
742 }
743 
744 int
745 bin_mdef_phone_id(bin_mdef_t * m, int32 ci, int32 lc, int32 rc, int32 wpos)
746 {
747  cd_tree_t *cd_tree;
748  int level, max;
749  int16 ctx[4];
750 
751  assert(m);
752 
753  /* In the future, we might back off when context is not available,
754  * but for now we'll just return the CI phone. */
755  if (lc < 0 || rc < 0)
756  return ci;
757 
758  assert((ci >= 0) && (ci < m->n_ciphone));
759  assert((lc >= 0) && (lc < m->n_ciphone));
760  assert((rc >= 0) && (rc < m->n_ciphone));
761  assert((wpos >= 0) && (wpos < N_WORD_POSN));
762 
763  /* Create a context list, mapping fillers to silence. */
764  ctx[0] = wpos;
765  ctx[1] = ci;
766  ctx[2] = (m->sil >= 0
767  && m->phone[lc].info.ci.filler) ? m->sil : lc;
768  ctx[3] = (m->sil >= 0
769  && m->phone[rc].info.ci.filler) ? m->sil : rc;
770 
771  /* Walk down the cd_tree. */
772  cd_tree = m->cd_tree;
773  level = 0; /* What level we are on. */
774  max = N_WORD_POSN; /* Number of nodes on this level. */
775  while (level < 4) {
776  int i;
777 
778 #if 0
779  E_INFO("Looking for context %d=%s in %d at %d\n",
780  ctx[level], m->ciname[ctx[level]],
781  max, cd_tree - m->cd_tree);
782 #endif
783  for (i = 0; i < max; ++i) {
784 #if 0
785  E_INFO("Look at context %d=%s at %d\n",
786  cd_tree[i].ctx,
787  m->ciname[cd_tree[i].ctx], cd_tree + i - m->cd_tree);
788 #endif
789  if (cd_tree[i].ctx == ctx[level])
790  break;
791  }
792  if (i == max)
793  return -1;
794 #if 0
795  E_INFO("Found context %d=%s at %d, n_down=%d, down=%d\n",
796  ctx[level], m->ciname[ctx[level]],
797  cd_tree + i - m->cd_tree,
798  cd_tree[i].n_down, cd_tree[i].c.down);
799 #endif
800  /* Leaf node, stop here. */
801  if (cd_tree[i].n_down == 0)
802  return cd_tree[i].c.pid;
803 
804  /* Go down one level. */
805  max = cd_tree[i].n_down;
806  cd_tree = m->cd_tree + cd_tree[i].c.down;
807  ++level;
808  }
809  /* We probably shouldn't get here. */
810  return -1;
811 }
812 
813 int
814 bin_mdef_phone_id_nearest(bin_mdef_t * m, int32 b, int32 l, int32 r, int32 pos)
815 {
816  int p, tmppos;
817 
818 
819 
820  /* In the future, we might back off when context is not available,
821  * but for now we'll just return the CI phone. */
822  if (l < 0 || r < 0)
823  return b;
824 
825  p = bin_mdef_phone_id(m, b, l, r, pos);
826  if (p >= 0)
827  return p;
828 
829  /* Exact triphone not found; backoff to other word positions */
830  for (tmppos = 0; tmppos < N_WORD_POSN; tmppos++) {
831  if (tmppos != pos) {
832  p = bin_mdef_phone_id(m, b, l, r, tmppos);
833  if (p >= 0)
834  return p;
835  }
836  }
837 
838  /* Nothing yet; backoff to silence phone if non-silence filler context */
839  /* In addition, backoff to silence phone on left/right if in beginning/end position */
840  if (m->sil >= 0) {
841  int newl = l, newr = r;
842  if (m->phone[(int)l].info.ci.filler
843  || pos == WORD_POSN_BEGIN || pos == WORD_POSN_SINGLE)
844  newl = m->sil;
845  if (m->phone[(int)r].info.ci.filler
846  || pos == WORD_POSN_END || pos == WORD_POSN_SINGLE)
847  newr = m->sil;
848  if ((newl != l) || (newr != r)) {
849  p = bin_mdef_phone_id(m, b, newl, newr, pos);
850  if (p >= 0)
851  return p;
852 
853  for (tmppos = 0; tmppos < N_WORD_POSN; tmppos++) {
854  if (tmppos != pos) {
855  p = bin_mdef_phone_id(m, b, newl, newr, tmppos);
856  if (p >= 0)
857  return p;
858  }
859  }
860  }
861  }
862 
863  /* Nothing yet; backoff to base phone */
864  return b;
865 }
866 
867 int
868 bin_mdef_phone_str(bin_mdef_t * m, int pid, char *buf)
869 {
870  char *wpos_name;
871 
872  assert(m);
873  assert((pid >= 0) && (pid < m->n_phone));
874  wpos_name = WPOS_NAME;
875 
876  buf[0] = '\0';
877  if (pid < m->n_ciphone)
878  sprintf(buf, "%s", bin_mdef_ciphone_str(m, pid));
879  else {
880  sprintf(buf, "%s %s %s %c",
881  bin_mdef_ciphone_str(m, m->phone[pid].info.cd.ctx[0]),
882  bin_mdef_ciphone_str(m, m->phone[pid].info.cd.ctx[1]),
883  bin_mdef_ciphone_str(m, m->phone[pid].info.cd.ctx[2]),
884  wpos_name[m->phone[pid].info.cd.wpos]);
885  }
886  return 0;
887 }
int32 n_sseq
No.
Definition: mdef.h:148
The main model definition structure.
Definition: mdef.h:135
int16 ctx
Context (word position or CI phone)
Definition: bin_mdef.h:105
int16 n_down
Number of children (0 for leafnode)
Definition: bin_mdef.h:106
int32 n_ciphone
number basephones actually present
Definition: mdef.h:136
POCKETSPHINX_EXPORT int bin_mdef_write(bin_mdef_t *m, const char *filename)
Write a binary mdef to a file.
Definition: bin_mdef.c:522
const char * bin_mdef_ciphone_str(bin_mdef_t *m, int32 ci)
In: ciphone id for which name wanted.
Definition: bin_mdef.c:737
word_posn_t wpos
Word position.
Definition: mdef.h:103
int32 n_tmat
Number of transition matrices.
Definition: bin_mdef.h:124
Single phone word (i.e.
Definition: mdef.h:76
int32 n_sen
Number of senones (CI+CD)
Definition: bin_mdef.h:123
int32 ssid
Senone sequence ID.
Definition: bin_mdef.h:74
int bin_mdef_ciphone_id(bin_mdef_t *m, const char *ciphone)
Context-independent phone lookup.
Definition: bin_mdef.c:691
int32 n_cd_tree
Number of nodes in cd_tree (below)
Definition: bin_mdef.h:127
uint16 ** sseq
Unique senone sequences (2D array built at load time)
Definition: bin_mdef.h:134
#define WPOS_NAME
Printable code for each word position above.
Definition: mdef.h:80
int32 tmat
Transition matrix ID.
Definition: bin_mdef.h:75
cd_tree_t * cd_tree
Tree mapping CD phones to phone IDs.
Definition: bin_mdef.h:132
Binary format model definition files, with support for heterogeneous topologies and variable-size N-p...
#define BAD_SSID
Invalid senone sequence ID (limited to 16 bits for PocketSphinx).
Definition: bin_mdef.h:94
int32 filler
Whether a filler phone; if so, can be substituted by silence phone in left or right context position...
Definition: mdef.h:89
Ending phone of word.
Definition: mdef.h:75
mmio_file_t * filemap
File map for this file (if any)
Definition: bin_mdef.h:130
int32 n_phone
number basephones + number triphones actually present
Definition: mdef.h:137
int16 rc
Base, left, right context ciphones.
Definition: mdef.h:102
char * name
The name of the CI phone.
Definition: mdef.h:88
int16 * cd2cisen
Parent CI-senone id for each senone; the first n_ci_sen are identity mappings; the CD-senones are con...
Definition: mdef.h:150
int32 n_phone
Number of base (CI) phones + (CD) triphones.
Definition: bin_mdef.h:120
uint8 * sseq_len
Number of states in each sseq (NULL for homogeneous)
Definition: bin_mdef.h:135
struct mdef_entry_s::@0::@1 ci
&lt; CI phone information - attributes (just &quot;filler&quot; for now)
int32 n_sseq
Number of unique senone sequences.
Definition: bin_mdef.h:125
char ** ciname
CI phone names.
Definition: bin_mdef.h:131
POCKETSPHINX_EXPORT bin_mdef_t * bin_mdef_read(cmd_ln_t *config, const char *filename)
Read a binary mdef from a file.
Definition: bin_mdef.c:323
int32 n_ctx
Number of phones of context.
Definition: bin_mdef.h:126
void mdef_free(mdef_t *mdef)
Free an mdef_t.
Definition: mdef.c:720
#define N_WORD_POSN
total # of word positions (excluding undefined)
Definition: mdef.h:79
int32 n_ci_sen
Number of CI senones; these are the first.
Definition: bin_mdef.h:122
int32 n_ci_sen
number CI senones; these are the first
Definition: mdef.h:139
int32 ssid
State sequence (or senone sequence) ID, considering the n_emit_state senone-ids are a unit...
Definition: mdef.h:98
#define BAD_SENID
Invalid senone ID (limited to 16 bits for PocketSphinx).
Definition: bin_mdef.h:98
int bin_mdef_phone_id(bin_mdef_t *m, int32 b, int32 l, int32 r, int32 pos)
In: Word position.
Definition: bin_mdef.c:745
int16 sil
CI phone ID for silence.
Definition: bin_mdef.h:128
int16 * cd2cisen
Parent CI-senone id for each senone.
Definition: bin_mdef.h:138
#define S3_SILENCE_CIPHONE
Hard-coded silence CI phone name.
Definition: mdef.h:81
enum bin_mdef_s::@4 alloc_mode
Allocation mode for this object.
int32 n_emit_state
Number of emitting states per phone (0 for heterogeneous)
Definition: bin_mdef.h:121
Structures for storing the left context.
Definition: bin_mdef.h:73
uint16 ** sseq
Unique state (or senone) sequences in this model, shared among all phones/triphones.
Definition: mdef.h:146
mdef_t * mdef_init(char *mdeffile, int breport)
Initialize the phone structure from the given model definition file.
int32 down
Next level of the tree (offset from start of cd_trees)
Definition: bin_mdef.h:109
int bin_mdef_ciphone_id_nocase(bin_mdef_t *m, const char *ciphone)
Case-insensitive context-independent phone lookup.
Definition: bin_mdef.c:714
int16 * sen2cimap
Parent CI-phone for each senone (CI or CD)
Definition: bin_mdef.h:139
int bin_mdef_free(bin_mdef_t *m)
Release a pointer to a binary mdef.
Definition: bin_mdef.c:272
Model definition.
ciphone_t * ciphone
CI-phone information for all ciphones.
Definition: mdef.h:144
POCKETSPHINX_EXPORT bin_mdef_t * bin_mdef_read_text(cmd_ln_t *config, const char *filename)
Read a text mdef from a file (creating an in-memory binary mdef).
Definition: bin_mdef.c:66
bin_mdef_t * bin_mdef_retain(bin_mdef_t *m)
Retain a pointer to a bin_mdef_t.
Definition: bin_mdef.c:265
int32 pid
Phone ID (leafnode)
Definition: bin_mdef.h:108
int32 n_sen
number senones (CI+CD)
Definition: mdef.h:140
Beginning phone of word.
Definition: mdef.h:74
int32 tmat
Transition matrix id.
Definition: mdef.h:101
int bin_mdef_phone_str(bin_mdef_t *m, int pid, char *buf)
Create a phone string for the given phone (base or triphone) id in the given buf. ...
Definition: bin_mdef.c:868
int16 * sen2cimap
Parent CI-phone for each senone (CI or CD)
Definition: mdef.h:153
mdef_entry_t * phone
All phone structures.
Definition: bin_mdef.h:133
POCKETSPHINX_EXPORT int bin_mdef_write_text(bin_mdef_t *m, const char *filename)
Write a binary mdef to a text file.
Definition: bin_mdef.c:606
int32 n_ciphone
Number of base (CI) phones.
Definition: bin_mdef.h:119
int32 n_tmat
number transition matrices
Definition: mdef.h:141
int32 n_emit_state
number emitting states per phone
Definition: mdef.h:138
int16 sil
SILENCE_CIPHONE id.
Definition: mdef.h:155
ph_lc_t *** wpos_ci_lclist
wpos_ci_lclist[wpos][ci] = list of lc for &lt;wpos,ci&gt;.
Definition: mdef.h:157
Structures needed for mapping &lt;ci,lc,rc,wpos&gt; into pid.
phone_t * phone
Information for all ciphones and triphones.
Definition: mdef.h:145