PocketSphinx  5prealpha
bin_mdef.h
Go to the documentation of this file.
1 /* -*- c-file-style: "linux" -*- */
2 /* ====================================================================
3  * Copyright (c) 2005 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
45 #ifndef __BIN_MDEF_H__
46 #define __BIN_MDEF_H__
47 
48 #ifdef __cplusplus
49 extern "C" {
50 #endif /* __cplusplus */
51 
52 /* SphinxBase headers. */
53 #include <sphinxbase/mmio.h>
54 #include <sphinxbase/cmd_ln.h>
55 #include <pocketsphinx_export.h>
56 
57 #include "mdef.h"
58 
59 #define BIN_MDEF_FORMAT_VERSION 1
60 /* Little-endian machines will write "BMDF" to disk, big-endian ones "FDMB". */
61 #define BIN_MDEF_NATIVE_ENDIAN 0x46444d42 /* 'BMDF' in little-endian order */
62 #define BIN_MDEF_OTHER_ENDIAN 0x424d4446 /* 'BMDF' in big-endian order */
63 #ifdef __GNUC__
64 #define __ATTRIBUTE_PACKED __attribute__((packed))
65 #else
66 #define __ATTRIBUTE_PACKED
67 #endif
68 
72 typedef struct mdef_entry_s mdef_entry_t;
73 struct mdef_entry_s {
74  int32 ssid;
75  int32 tmat;
76  /* FIXME: is any of this actually necessary? */
77  union {
79  struct {
80  uint8 filler;
81  uint8 reserved[3];
82  } ci;
84  struct {
85  uint8 wpos;
86  uint8 ctx[3];
87  } cd;
88  } info;
89 } __ATTRIBUTE_PACKED;
90 
94 #define BAD_SSID 0xffff
95 
98 #define BAD_SENID 0xffff
99 
103 typedef struct cd_tree_s cd_tree_t;
104 struct cd_tree_s {
105  int16 ctx;
106  int16 n_down;
107  union {
108  int32 pid;
109  int32 down;
110  } c;
111 };
112 
116 typedef struct bin_mdef_s bin_mdef_t;
117 struct bin_mdef_s {
118  int refcnt;
119  int32 n_ciphone;
120  int32 n_phone;
121  int32 n_emit_state;
122  int32 n_ci_sen;
123  int32 n_sen;
124  int32 n_tmat;
125  int32 n_sseq;
126  int32 n_ctx;
127  int32 n_cd_tree;
128  int16 sil;
130  mmio_file_t *filemap;
131  char **ciname;
134  uint16 **sseq;
135  uint8 *sseq_len;
137  /* These two are not stored on disk, but are generated at load time. */
138  int16 *cd2cisen;
139  int16 *sen2cimap;
142  enum { BIN_MDEF_FROM_TEXT, BIN_MDEF_IN_MEMORY, BIN_MDEF_ON_DISK } alloc_mode;
143 };
144 
145 #define bin_mdef_is_fillerphone(m,p) (((p) < (m)->n_ciphone) \
146  ? (m)->phone[p].info.ci.filler \
147  : (m)->phone[(m)->phone[p].info.cd.ctx[0]].info.ci.filler)
148 #define bin_mdef_is_ciphone(m,p) ((p) < (m)->n_ciphone)
149 #define bin_mdef_n_ciphone(m) ((m)->n_ciphone)
150 #define bin_mdef_n_phone(m) ((m)->n_phone)
151 #define bin_mdef_n_sseq(m) ((m)->n_sseq)
152 #define bin_mdef_n_emit_state(m) ((m)->n_emit_state)
153 #define bin_mdef_n_emit_state_phone(m,p) ((m)->n_emit_state ? (m)->n_emit_state \
154  : (m)->sseq_len[(m)->phone[p].ssid])
155 #define bin_mdef_n_sen(m) ((m)->n_sen)
156 #define bin_mdef_n_tmat(m) ((m)->n_tmat)
157 #define bin_mdef_pid2ssid(m,p) ((m)->phone[p].ssid)
158 #define bin_mdef_pid2tmatid(m,p) ((m)->phone[p].tmat)
159 #define bin_mdef_silphone(m) ((m)->sil)
160 #define bin_mdef_sen2cimap(m,s) ((m)->sen2cimap[s])
161 #define bin_mdef_sseq2sen(m,ss,pos) ((m)->sseq[ss][pos])
162 #define bin_mdef_pid2ci(m,p) (((p) < (m)->n_ciphone) ? (p) \
163  : (m)->phone[p].info.cd.ctx[0])
164 
168 POCKETSPHINX_EXPORT
169 bin_mdef_t *bin_mdef_read(cmd_ln_t *config, const char *filename);
173 POCKETSPHINX_EXPORT
174 bin_mdef_t *bin_mdef_read_text(cmd_ln_t *config, const char *filename);
178 POCKETSPHINX_EXPORT
179 int bin_mdef_write(bin_mdef_t *m, const char *filename);
183 POCKETSPHINX_EXPORT
184 int bin_mdef_write_text(bin_mdef_t *m, const char *filename);
192 int bin_mdef_free(bin_mdef_t *m);
193 
199  const char *ciphone);
206  const char *ciphone);
208 /* Return value: READ-ONLY ciphone string name for the given ciphone id */
209 const char *bin_mdef_ciphone_str(bin_mdef_t *m,
210  int32 ci);
212 /* Return value: phone id for the given constituents if found, else -1 */
214  int32 b,
215  int32 l,
216  int32 r,
217  int32 pos);
219 /* Look up a phone id, backing off to other word positions. */
220 int bin_mdef_phone_id_nearest(bin_mdef_t * m, int32 b,
221  int32 l, int32 r, int32 pos);
222 
229  int pid,
230  char *buf);
232 #ifdef __cplusplus
233 }; /* extern "C" */
234 #endif /* __cplusplus */
235 
236 #endif /* __BIN_MDEF_H__ */
int16 ctx
Context (word position or CI phone)
Definition: bin_mdef.h:105
int16 n_down
Number of children (0 for leafnode)
Definition: bin_mdef.h:106
POCKETSPHINX_EXPORT int bin_mdef_write(bin_mdef_t *m, const char *filename)
Write a binary mdef to a file.
Definition: bin_mdef.c:522
const char * bin_mdef_ciphone_str(bin_mdef_t *m, int32 ci)
In: ciphone id for which name wanted.
Definition: bin_mdef.c:737
int32 n_tmat
Number of transition matrices.
Definition: bin_mdef.h:124
int32 n_sen
Number of senones (CI+CD)
Definition: bin_mdef.h:123
int32 ssid
Senone sequence ID.
Definition: bin_mdef.h:74
int bin_mdef_ciphone_id(bin_mdef_t *m, const char *ciphone)
Context-independent phone lookup.
Definition: bin_mdef.c:691
int32 n_cd_tree
Number of nodes in cd_tree (below)
Definition: bin_mdef.h:127
uint16 ** sseq
Unique senone sequences (2D array built at load time)
Definition: bin_mdef.h:134
int32 tmat
Transition matrix ID.
Definition: bin_mdef.h:75
cd_tree_t * cd_tree
Tree mapping CD phones to phone IDs.
Definition: bin_mdef.h:132
uint8 ctx[3]
quintphones will require hacking
Definition: bin_mdef.h:86
mmio_file_t * filemap
File map for this file (if any)
Definition: bin_mdef.h:130
int32 n_phone
Number of base (CI) phones + (CD) triphones.
Definition: bin_mdef.h:120
uint8 * sseq_len
Number of states in each sseq (NULL for homogeneous)
Definition: bin_mdef.h:135
struct mdef_entry_s::@0::@1 ci
&lt; CI phone information - attributes (just &quot;filler&quot; for now)
int32 n_sseq
Number of unique senone sequences.
Definition: bin_mdef.h:125
char ** ciname
CI phone names.
Definition: bin_mdef.h:131
POCKETSPHINX_EXPORT bin_mdef_t * bin_mdef_read(cmd_ln_t *config, const char *filename)
Read a binary mdef from a file.
Definition: bin_mdef.c:323
int32 n_ctx
Number of phones of context.
Definition: bin_mdef.h:126
int32 n_ci_sen
Number of CI senones; these are the first.
Definition: bin_mdef.h:122
int bin_mdef_phone_id(bin_mdef_t *m, int32 b, int32 l, int32 r, int32 pos)
In: Word position.
Definition: bin_mdef.c:745
int16 sil
CI phone ID for silence.
Definition: bin_mdef.h:128
int16 * cd2cisen
Parent CI-senone id for each senone.
Definition: bin_mdef.h:138
enum bin_mdef_s::@4 alloc_mode
Allocation mode for this object.
int32 n_emit_state
Number of emitting states per phone (0 for heterogeneous)
Definition: bin_mdef.h:121
Definition: bin_mdef.h:73
int32 down
Next level of the tree (offset from start of cd_trees)
Definition: bin_mdef.h:109
int bin_mdef_ciphone_id_nocase(bin_mdef_t *m, const char *ciphone)
Case-insensitive context-independent phone lookup.
Definition: bin_mdef.c:714
int16 * sen2cimap
Parent CI-phone for each senone (CI or CD)
Definition: bin_mdef.h:139
int bin_mdef_free(bin_mdef_t *m)
Release a pointer to a binary mdef.
Definition: bin_mdef.c:272
Model definition.
POCKETSPHINX_EXPORT bin_mdef_t * bin_mdef_read_text(cmd_ln_t *config, const char *filename)
Read a text mdef from a file (creating an in-memory binary mdef).
Definition: bin_mdef.c:66
bin_mdef_t * bin_mdef_retain(bin_mdef_t *m)
Retain a pointer to a bin_mdef_t.
Definition: bin_mdef.c:265
int32 pid
Phone ID (leafnode)
Definition: bin_mdef.h:108
int bin_mdef_phone_str(bin_mdef_t *m, int pid, char *buf)
Create a phone string for the given phone (base or triphone) id in the given buf. ...
Definition: bin_mdef.c:868
mdef_entry_t * phone
All phone structures.
Definition: bin_mdef.h:133
POCKETSPHINX_EXPORT int bin_mdef_write_text(bin_mdef_t *m, const char *filename)
Write a binary mdef to a text file.
Definition: bin_mdef.c:606
int32 n_ciphone
Number of base (CI) phones.
Definition: bin_mdef.h:119