PocketSphinx  5prealpha
dict.h File Reference

Operations on dictionary. More...

#include <sphinxbase/hash_table.h>
#include "s3types.h"
#include "bin_mdef.h"
#include "pocketsphinx_export.h"

Go to the source code of this file.

Data Structures

struct  dictword_t
 a structure for one dictionary word. More...
 
struct  dict_t
 a structure for a dictionary. More...
 

Macros

#define S3DICT_INC_SZ   4096
 
#define dict_size(d)   ((d)->n_word)
 Packaged macro access to dictionary members.
 
#define dict_num_fillers(d)   (dict_filler_end(d) - dict_filler_start(d))
 
#define dict_num_real_words(d)   (dict_size(d) - (dict_filler_end(d) - dict_filler_start(d)) - 2)
 Number of "real words" in the dictionary. More...
 
#define dict_basewid(d, w)   ((d)->word[w].basewid)
 
#define dict_wordstr(d, w)   ((w) < 0 ? NULL : (d)->word[w].word)
 
#define dict_basestr(d, w)   ((d)->word[dict_basewid(d,w)].word)
 
#define dict_nextalt(d, w)   ((d)->word[w].alt)
 
#define dict_pronlen(d, w)   ((d)->word[w].pronlen)
 
#define dict_pron(d, w, p)   ((d)->word[w].ciphone[p])
 The CI phones of the word w at position p.
 
#define dict_filler_start(d)   ((d)->filler_start)
 
#define dict_filler_end(d)   ((d)->filler_end)
 
#define dict_startwid(d)   ((d)->startwid)
 
#define dict_finishwid(d)   ((d)->finishwid)
 
#define dict_silwid(d)   ((d)->silwid)
 
#define dict_is_single_phone(d, w)   ((d)->word[w].pronlen == 1)
 
#define dict_first_phone(d, w)   ((d)->word[w].ciphone[0])
 
#define dict_second_phone(d, w)   ((d)->word[w].ciphone[1])
 
#define dict_second_last_phone(d, w)   ((d)->word[w].ciphone[(d)->word[w].pronlen - 2])
 
#define dict_last_phone(d, w)   ((d)->word[w].ciphone[(d)->word[w].pronlen - 1])
 
#define S3_START_WORD   "<s>"
 
#define S3_FINISH_WORD   "</s>"
 
#define S3_SILENCE_WORD   "<sil>"
 
#define S3_UNKNOWN_WORD   "<UNK>"
 

Functions

dict_tdict_init (cmd_ln_t *config, bin_mdef_t *mdef)
 Initialize a new dictionary. More...
 
int dict_write (dict_t *dict, char const *filename, char const *format)
 Write dictionary to a file.
 
POCKETSPHINX_EXPORT s3wid_t dict_wordid (dict_t *d, const char *word)
 Return word id for given word string if present. More...
 
int dict_filler_word (dict_t *d, s3wid_t w)
 Return 1 if w is a filler word, 0 if not. More...
 
POCKETSPHINX_EXPORT int dict_real_word (dict_t *d, s3wid_t w)
 Test if w is a "real" word, i.e. More...
 
s3wid_t dict_add_word (dict_t *d, char const *word, s3cipid_t const *p, int32 np)
 Add a word with the given ciphone pronunciation list to the dictionary. More...
 
const char * dict_ciphone_str (dict_t *d, s3wid_t wid, int32 pos)
 Return value: CI phone string for the given word, phone position. More...
 
int32 dict_word2basestr (char *word)
 If the given word contains a trailing "(....)" (i.e., a Sphinx-II style alternative pronunciation specification), strip that trailing portion from it. More...
 
dict_tdict_retain (dict_t *d)
 Retain a pointer to an dict_t.
 
int dict_free (dict_t *d)
 Release a pointer to a dictionary.
 
void dict_report (dict_t *d)
 Report a dictionary structure. More...
 

Detailed Description

Operations on dictionary.

Definition in file dict.h.

Macro Definition Documentation

#define dict_num_real_words (   d)    (dict_size(d) - (dict_filler_end(d) - dict_filler_start(d)) - 2)

Number of "real words" in the dictionary.

This is the number of words that are not fillers, <s>, or </s>.

Definition at line 158 of file dict.h.

Function Documentation

s3wid_t dict_add_word ( dict_t d,
char const *  word,
s3cipid_t const *  p,
int32  np 
)

Add a word with the given ciphone pronunciation list to the dictionary.

Return value: Result word id if successful, BAD_S3WID otherwise

Parameters
dThe dictionary structure.
wordThe word.
pThe pronunciation.
npNumber of phones.

Definition at line 80 of file dict.c.

References dictword_t::alt, BAD_S3WID, dictword_t::basewid, dictword_t::ciphone, dict_word2basestr(), dict_t::ht, dict_t::max_words, dict_t::n_word, dictword_t::pronlen, dictword_t::word, and dict_t::word.

Referenced by dict_init(), ps_add_word(), and ps_lattice_read().

const char* dict_ciphone_str ( dict_t d,
s3wid_t  wid,
int32  pos 
)

Return value: CI phone string for the given word, phone position.

Parameters
dIn: Dictionary to look up
widIn: Component word being looked up
posIn: Pronunciation phone position

Definition at line 69 of file dict.c.

References bin_mdef_ciphone_str(), dictword_t::ciphone, dict_t::mdef, and dict_t::word.

Referenced by dict_write(), and ps_lookup_word().

int dict_filler_word ( dict_t d,
s3wid_t  w 
)

Return 1 if w is a filler word, 0 if not.

A filler word is one that was read in from the filler dictionary; however, sentence START and FINISH words are not filler words.

Parameters
dThe dictionary structure
wThe word ID

Definition at line 413 of file dict.c.

References dict_t::filler_start, dict_t::finishwid, and dict_t::startwid.

Referenced by dict_init(), ngram_search_lattice(), ps_lattice_bestpath(), ps_lattice_penalize_fillers(), ps_lattice_posterior(), ps_lattice_read(), and ps_lattice_write_htk().

dict_t* dict_init ( cmd_ln_t *  config,
bin_mdef_t mdef 
)

Initialize a new dictionary.

If config and mdef are supplied, then the dictionary will be read from the files specified by the -dict and -fdict options in config, with case sensitivity determined by the -dictcase option.

Otherwise an empty case-sensitive dictionary will be created.

Return ptr to dict_t if successful, NULL otherwise.

Parameters
configConfiguration (-dict, -fdict, -dictcase) or NULL
mdefFor looking up CI phone IDs (or NULL)

Definition at line 252 of file dict.c.

References BAD_S3WID, bin_mdef_retain(), dict_add_word(), dict_filler_word(), dict_free(), dict_wordid(), dict_t::filler_end, dict_t::filler_start, dict_t::finishwid, dict_t::ht, dict_t::max_words, dict_t::mdef, dict_t::n_word, dict_t::silwid, dict_t::startwid, and dict_t::word.

Referenced by ps_lattice_read(), ps_load_dict(), and ps_reinit().

POCKETSPHINX_EXPORT int dict_real_word ( dict_t d,
s3wid_t  w 
)

Test if w is a "real" word, i.e.

neither a filler word nor START/FINISH.

Parameters
dThe dictionary structure
wThe word ID

Definition at line 427 of file dict.c.

References dict_t::filler_start, dict_t::finishwid, and dict_t::startwid.

Referenced by dict_write(), ngram_search_bp_hyp(), ps_astar_hyp(), and ps_lattice_hyp().

void dict_report ( dict_t d)

Report a dictionary structure.

Parameters
dA dictionary structure

Definition at line 499 of file dict.c.

References dict_t::max_words, and dict_t::n_word.

int32 dict_word2basestr ( char *  word)

If the given word contains a trailing "(....)" (i.e., a Sphinx-II style alternative pronunciation specification), strip that trailing portion from it.

Note that the given string is modified. Return value: If string was modified, the character position at which the original string was truncated; otherwise -1.

Definition at line 442 of file dict.c.

Referenced by dict_add_word(), and ps_lattice_read().

POCKETSPHINX_EXPORT s3wid_t dict_wordid ( dict_t d,
const char *  word 
)

Return word id for given word string if present.

Otherwise return BAD_S3WID

Definition at line 399 of file dict.c.

References BAD_S3WID, and dict_t::ht.

Referenced by dict_init(), ps_lattice_read(), and ps_lookup_word().