PocketSphinx  5.0.0rc5
A small speech recognizer
Macros | Typedefs | Enumerations | Functions
model.h File Reference

Public API for language models. More...

#include <stdio.h>
#include <pocketsphinx/prim_type.h>
#include <pocketsphinx/logmath.h>
#include <pocketsphinx/export.h>

Go to the source code of this file.

Macros

#define NGRAM_INVALID_WID   -1
 

Typedefs

typedef struct cmd_ln_s ps_config_t
 
typedef struct jsgf_s jsgf_t
 
typedef struct jsgf_rule_s jsgf_rule_t
 
typedef struct fsg_model_s fsg_model_t
 
typedef struct ngram_model_s ngram_model_t
 
typedef struct hash_iter_s jsgf_rule_iter_t
 
typedef struct ngram_class_s ngram_class_t
 
typedef enum ngram_file_type_e ngram_file_type_t
 File types for N-Gram files.
 
typedef enum ngram_case_e ngram_case_t
 
typedef struct ngram_iter_s ngram_iter_t
 
typedef struct ngram_model_set_iter_s ngram_model_set_iter_t
 

Enumerations

enum  ngram_file_type_e { NGRAM_INVALID = -1, NGRAM_AUTO, NGRAM_ARPA, NGRAM_BIN }
 
enum  ngram_case_e { NGRAM_UPPER, NGRAM_LOWER }
 

Functions

POCKETSPHINX_EXPORT jsgf_tjsgf_parse_file (const char *filename, jsgf_t *parent)
 
POCKETSPHINX_EXPORT jsgf_tjsgf_parse_string (const char *string, jsgf_t *parent)
 
POCKETSPHINX_EXPORT const char * jsgf_grammar_name (jsgf_t *jsgf)
 
POCKETSPHINX_EXPORT void jsgf_grammar_free (jsgf_t *jsgf)
 
POCKETSPHINX_EXPORT jsgf_rule_tjsgf_get_rule (jsgf_t *grammar, const char *name)
 
POCKETSPHINX_EXPORT jsgf_rule_tjsgf_get_public_rule (jsgf_t *grammar)
 
POCKETSPHINX_EXPORT const char * jsgf_rule_name (jsgf_rule_t *rule)
 
POCKETSPHINX_EXPORT int jsgf_rule_public (jsgf_rule_t *rule)
 
POCKETSPHINX_EXPORT jsgf_rule_iter_tjsgf_rule_iter (jsgf_t *grammar)
 
POCKETSPHINX_EXPORT jsgf_rule_iter_tjsgf_rule_iter_next (jsgf_rule_iter_t *itor)
 
POCKETSPHINX_EXPORT jsgf_rule_tjsgf_rule_iter_rule (jsgf_rule_iter_t *itor)
 
POCKETSPHINX_EXPORT void jsgf_rule_iter_free (jsgf_rule_iter_t *itor)
 
POCKETSPHINX_EXPORT fsg_model_tjsgf_build_fsg (jsgf_t *grammar, jsgf_rule_t *rule, logmath_t *lmath, float32 lw)
 
POCKETSPHINX_EXPORT fsg_model_tjsgf_read_file (const char *file, logmath_t *lmath, float32 lw)
 
POCKETSPHINX_EXPORT fsg_model_tjsgf_read_string (const char *string, logmath_t *lmath, float32 lw)
 
POCKETSPHINX_EXPORT int jsgf_write_fsg (jsgf_t *grammar, jsgf_rule_t *rule, FILE *outfh)
 
POCKETSPHINX_EXPORT fsg_model_tfsg_model_retain (fsg_model_t *fsg)
 
POCKETSPHINX_EXPORT int fsg_model_free (fsg_model_t *fsg)
 
POCKETSPHINX_EXPORT ngram_model_tngram_model_read (ps_config_t *config, const char *file_name, ngram_file_type_t file_type, logmath_t *lmath)
 
POCKETSPHINX_EXPORT int ngram_model_write (ngram_model_t *model, const char *file_name, ngram_file_type_t format)
 
POCKETSPHINX_EXPORT ngram_file_type_t ngram_file_name_to_type (const char *file_name)
 
POCKETSPHINX_EXPORT ngram_file_type_t ngram_str_to_type (const char *str_name)
 
POCKETSPHINX_EXPORT const char * ngram_type_to_str (int type)
 
POCKETSPHINX_EXPORT ngram_model_tngram_model_retain (ngram_model_t *model)
 
POCKETSPHINX_EXPORT int ngram_model_free (ngram_model_t *model)
 
POCKETSPHINX_EXPORT int ngram_model_casefold (ngram_model_t *model, int kase)
 
POCKETSPHINX_EXPORT int ngram_model_apply_weights (ngram_model_t *model, float32 lw, float32 wip)
 
POCKETSPHINX_EXPORT float32 ngram_model_get_weights (ngram_model_t *model, int32 *out_log_wip)
 
POCKETSPHINX_EXPORT int32 ngram_score (ngram_model_t *model, const char *word,...)
 
POCKETSPHINX_EXPORT int32 ngram_tg_score (ngram_model_t *model, int32 w3, int32 w2, int32 w1, int32 *n_used)
 
POCKETSPHINX_EXPORT int32 ngram_bg_score (ngram_model_t *model, int32 w2, int32 w1, int32 *n_used)
 
POCKETSPHINX_EXPORT int32 ngram_ng_score (ngram_model_t *model, int32 wid, int32 *history, int32 n_hist, int32 *n_used)
 
POCKETSPHINX_EXPORT int32 ngram_probv (ngram_model_t *model, const char *word,...)
 
POCKETSPHINX_EXPORT int32 ngram_prob (ngram_model_t *model, const char *const *words, int32 n)
 
POCKETSPHINX_EXPORT int32 ngram_ng_prob (ngram_model_t *model, int32 wid, int32 *history, int32 n_hist, int32 *n_used)
 
POCKETSPHINX_EXPORT int32 ngram_score_to_prob (ngram_model_t *model, int32 score)
 
POCKETSPHINX_EXPORT int32 ngram_wid (ngram_model_t *model, const char *word)
 
const POCKETSPHINX_EXPORT char * ngram_word (ngram_model_t *model, int32 wid)
 
POCKETSPHINX_EXPORT int32 ngram_unknown_wid (ngram_model_t *model)
 
POCKETSPHINX_EXPORT int32 ngram_zero (ngram_model_t *model)
 
POCKETSPHINX_EXPORT int32 ngram_model_get_size (ngram_model_t *model)
 
POCKETSPHINX_EXPORT const uint32 * ngram_model_get_counts (ngram_model_t *model)
 
POCKETSPHINX_EXPORT ngram_iter_tngram_model_mgrams (ngram_model_t *model, int m)
 
POCKETSPHINX_EXPORT ngram_iter_tngram_iter (ngram_model_t *model, const char *word,...)
 
POCKETSPHINX_EXPORT ngram_iter_tngram_ng_iter (ngram_model_t *model, int32 wid, int32 *history, int32 n_hist)
 
POCKETSPHINX_EXPORT const int32 * ngram_iter_get (ngram_iter_t *itor, int32 *out_score, int32 *out_bowt)
 
POCKETSPHINX_EXPORT ngram_iter_tngram_iter_successors (ngram_iter_t *itor)
 
POCKETSPHINX_EXPORT ngram_iter_tngram_iter_next (ngram_iter_t *itor)
 
POCKETSPHINX_EXPORT void ngram_iter_free (ngram_iter_t *itor)
 
POCKETSPHINX_EXPORT int32 ngram_model_add_word (ngram_model_t *model, const char *word, float32 weight)
 
POCKETSPHINX_EXPORT int32 ngram_model_read_classdef (ngram_model_t *model, const char *file_name)
 
POCKETSPHINX_EXPORT int32 ngram_model_add_class (ngram_model_t *model, const char *classname, float32 classweight, char **words, const float32 *weights, int32 n_words)
 
POCKETSPHINX_EXPORT int32 ngram_model_add_class_word (ngram_model_t *model, const char *classname, const char *word, float32 weight)
 
POCKETSPHINX_EXPORT ngram_model_tngram_model_set_init (ps_config_t *config, ngram_model_t **models, char **names, const float32 *weights, int32 n_models)
 
POCKETSPHINX_EXPORT ngram_model_tngram_model_set_read (ps_config_t *config, const char *lmctlfile, logmath_t *lmath)
 
POCKETSPHINX_EXPORT int32 ngram_model_set_count (ngram_model_t *set)
 
POCKETSPHINX_EXPORT ngram_model_set_iter_tngram_model_set_iter (ngram_model_t *set)
 
POCKETSPHINX_EXPORT ngram_model_set_iter_tngram_model_set_iter_next (ngram_model_set_iter_t *itor)
 
POCKETSPHINX_EXPORT void ngram_model_set_iter_free (ngram_model_set_iter_t *itor)
 
POCKETSPHINX_EXPORT ngram_model_tngram_model_set_iter_model (ngram_model_set_iter_t *itor, char const **lmname)
 
POCKETSPHINX_EXPORT ngram_model_tngram_model_set_select (ngram_model_t *set, const char *name)
 
POCKETSPHINX_EXPORT ngram_model_tngram_model_set_lookup (ngram_model_t *set, const char *name)
 
const POCKETSPHINX_EXPORT char * ngram_model_set_current (ngram_model_t *set)
 
POCKETSPHINX_EXPORT ngram_model_tngram_model_set_interp (ngram_model_t *set, const char **names, const float32 *weights)
 
POCKETSPHINX_EXPORT ngram_model_tngram_model_set_add (ngram_model_t *set, ngram_model_t *model, const char *name, float32 weight, int reuse_widmap)
 
POCKETSPHINX_EXPORT ngram_model_tngram_model_set_remove (ngram_model_t *set, const char *name, int reuse_widmap)
 
POCKETSPHINX_EXPORT void ngram_model_set_map_words (ngram_model_t *set, const char **words, int32 n_words)
 
POCKETSPHINX_EXPORT int32 ngram_model_set_current_wid (ngram_model_t *set, int32 set_wid)
 
POCKETSPHINX_EXPORT int32 ngram_model_set_known_wid (ngram_model_t *set, int32 set_wid)
 
POCKETSPHINX_EXPORT void ngram_model_flush (ngram_model_t *lm)
 

Detailed Description

Public API for language models.

Macro Definition Documentation

◆ NGRAM_INVALID_WID

#define NGRAM_INVALID_WID   -1

Impossible word ID

Typedef Documentation

◆ jsgf_rule_iter_t

typedef struct hash_iter_s jsgf_rule_iter_t

Iterator over rules in a grammar.

◆ ngram_case_t

typedef enum ngram_case_e ngram_case_t

Constants for case folding.

◆ ngram_iter_t

typedef struct ngram_iter_s ngram_iter_t

M-gram iterator object.

◆ ngram_model_set_iter_t

typedef struct ngram_model_set_iter_s ngram_model_set_iter_t

Iterator over language models in a set.

Enumeration Type Documentation

◆ ngram_case_e

Constants for case folding.

◆ ngram_file_type_e

Enumerator
NGRAM_INVALID 

Not a valid file type.

NGRAM_AUTO 

Determine file type automatically.

NGRAM_ARPA 

ARPABO text format (the standard).

NGRAM_BIN 

Sphinx .DMP format.

Function Documentation

◆ fsg_model_free()

POCKETSPHINX_EXPORT int fsg_model_free ( fsg_model_t fsg)

Free the given word FSG.

Returns
new reference count (0 if freed completely)

◆ fsg_model_retain()

POCKETSPHINX_EXPORT fsg_model_t* fsg_model_retain ( fsg_model_t fsg)

Retain ownership of an FSG.

Returns
Pointer to retained FSG.

◆ jsgf_build_fsg()

POCKETSPHINX_EXPORT fsg_model_t* jsgf_build_fsg ( jsgf_t grammar,
jsgf_rule_t rule,
logmath_t lmath,
float32  lw 
)

Build a Sphinx FSG object from a JSGF rule.

◆ jsgf_get_public_rule()

POCKETSPHINX_EXPORT jsgf_rule_t* jsgf_get_public_rule ( jsgf_t grammar)

Returns the first public rule of the grammar

◆ jsgf_get_rule()

POCKETSPHINX_EXPORT jsgf_rule_t* jsgf_get_rule ( jsgf_t grammar,
const char *  name 
)

Get a rule by name from a grammar. Name should not contain brackets.

◆ jsgf_grammar_free()

POCKETSPHINX_EXPORT void jsgf_grammar_free ( jsgf_t jsgf)

Free a JSGF grammar.

◆ jsgf_grammar_name()

POCKETSPHINX_EXPORT const char* jsgf_grammar_name ( jsgf_t jsgf)

Get the grammar name from the file.

◆ jsgf_parse_file()

POCKETSPHINX_EXPORT jsgf_t* jsgf_parse_file ( const char *  filename,
jsgf_t parent 
)

Parse a JSGF grammar from a file.

Parameters
filenamethe name of the file to parse.
parentoptional parent grammar for this one (NULL, usually).
Returns
new JSGF grammar object, or NULL on failure.

◆ jsgf_parse_string()

POCKETSPHINX_EXPORT jsgf_t* jsgf_parse_string ( const char *  string,
jsgf_t parent 
)

Parse a JSGF grammar from a string.

Parameters
0-terminatedstring with grammar.
parentoptional parent grammar for this one (NULL, usually).
Returns
new JSGF grammar object, or NULL on failure.

◆ jsgf_read_file()

POCKETSPHINX_EXPORT fsg_model_t* jsgf_read_file ( const char *  file,
logmath_t lmath,
float32  lw 
)

Read JSGF from file and return FSG object from it.

This function looks for a first public rule in jsgf and constructs JSGF from it.

◆ jsgf_read_string()

POCKETSPHINX_EXPORT fsg_model_t* jsgf_read_string ( const char *  string,
logmath_t lmath,
float32  lw 
)

Read JSGF from string and return FSG object from it.

This function looks for a first public rule in jsgf and constructs JSGF from it.

◆ jsgf_rule_iter()

POCKETSPHINX_EXPORT jsgf_rule_iter_t* jsgf_rule_iter ( jsgf_t grammar)

Get an iterator over all rules in a grammar.

◆ jsgf_rule_iter_free()

POCKETSPHINX_EXPORT void jsgf_rule_iter_free ( jsgf_rule_iter_t itor)

Free a rule iterator (if the end hasn't been reached).

◆ jsgf_rule_iter_next()

POCKETSPHINX_EXPORT jsgf_rule_iter_t* jsgf_rule_iter_next ( jsgf_rule_iter_t itor)

Advance an iterator to the next rule in the grammar.

◆ jsgf_rule_iter_rule()

POCKETSPHINX_EXPORT jsgf_rule_t* jsgf_rule_iter_rule ( jsgf_rule_iter_t itor)

Get the current rule in a rule iterator.

◆ jsgf_rule_name()

POCKETSPHINX_EXPORT const char* jsgf_rule_name ( jsgf_rule_t rule)

Get the rule name from a rule.

◆ jsgf_rule_public()

POCKETSPHINX_EXPORT int jsgf_rule_public ( jsgf_rule_t rule)

Test if a rule is public or not.

◆ jsgf_write_fsg()

POCKETSPHINX_EXPORT int jsgf_write_fsg ( jsgf_t grammar,
jsgf_rule_t rule,
FILE *  outfh 
)

Convert a JSGF rule to Sphinx FSG text form.

This does a direct conversion without doing transitive closure on null transitions and so forth.

◆ ngram_bg_score()

POCKETSPHINX_EXPORT int32 ngram_bg_score ( ngram_model_t model,
int32  w2,
int32  w1,
int32 *  n_used 
)

Quick bigram score lookup.

◆ ngram_file_name_to_type()

POCKETSPHINX_EXPORT ngram_file_type_t ngram_file_name_to_type ( const char *  file_name)

Guess the file type for an N-Gram model from the filename.

Returns
the guessed file type, or NGRAM_INVALID if none could be guessed.

◆ ngram_iter()

POCKETSPHINX_EXPORT ngram_iter_t* ngram_iter ( ngram_model_t model,
const char *  word,
  ... 
)

Get an iterator over M-grams pointing to the specified M-gram.

◆ ngram_iter_free()

POCKETSPHINX_EXPORT void ngram_iter_free ( ngram_iter_t itor)

Terminate an M-gram iterator.

◆ ngram_iter_get()

POCKETSPHINX_EXPORT const int32* ngram_iter_get ( ngram_iter_t itor,
int32 *  out_score,
int32 *  out_bowt 
)

Get information from the current M-gram in an iterator.

Parameters
out_scoreOutput: Score for this M-gram (including any word penalty and language weight).
out_bowtOutput: Backoff weight for this M-gram.
Returns
read-only array of word IDs.

◆ ngram_iter_next()

POCKETSPHINX_EXPORT ngram_iter_t* ngram_iter_next ( ngram_iter_t itor)

Advance an M-gram iterator.

◆ ngram_iter_successors()

POCKETSPHINX_EXPORT ngram_iter_t* ngram_iter_successors ( ngram_iter_t itor)

Iterate over all M-gram successors of an M-1-gram.

Parameters
itorIterator pointing to the M-1-gram to get successors of.

◆ ngram_model_add_class()

POCKETSPHINX_EXPORT int32 ngram_model_add_class ( ngram_model_t model,
const char *  classname,
float32  classweight,
char **  words,
const float32 *  weights,
int32  n_words 
)

Add a new class to a language model.

If classname already exists in the unigram set for model, then it will be converted to a class tag, and classweight will be ignored. Otherwise, a new unigram will be created as in ngram_model_add_word().

◆ ngram_model_add_class_word()

POCKETSPHINX_EXPORT int32 ngram_model_add_class_word ( ngram_model_t model,
const char *  classname,
const char *  word,
float32  weight 
)

Add a word to a class in a language model.

Parameters
modelThe model to add a word to.
classnameName of the class to add this word to.
wordText of the word to add.
weightWeight of this word relative to the within-class uniform distribution.
Returns
The word ID for the new word.

◆ ngram_model_add_word()

POCKETSPHINX_EXPORT int32 ngram_model_add_word ( ngram_model_t model,
const char *  word,
float32  weight 
)

Add a word (unigram) to the language model.

Note
The semantics of this are not particularly well-defined for model sets, and may be subject to change. Currently this will add the word to all of the submodels
Parameters
modelThe model to add a word to.
wordText of the word to add.
weightWeight of this word relative to the uniform distribution.
Returns
The word ID for the new word.

◆ ngram_model_apply_weights()

POCKETSPHINX_EXPORT int ngram_model_apply_weights ( ngram_model_t model,
float32  lw,
float32  wip 
)

Apply a language weight, insertion penalty, and unigram weight to a language model.

This will change the values output by ngram_score() and friends. This is done for efficiency since in decoding, these are the only values we actually need. Call ngram_prob() if you want the "raw" N-Gram probability estimate.

To remove all weighting, call ngram_apply_weights(model, 1.0, 1.0).

◆ ngram_model_casefold()

POCKETSPHINX_EXPORT int ngram_model_casefold ( ngram_model_t model,
int  kase 
)

Case-fold word strings in an N-Gram model.

WARNING: This is not Unicode aware, so any non-ASCII characters will not be converted.

◆ ngram_model_flush()

POCKETSPHINX_EXPORT void ngram_model_flush ( ngram_model_t lm)

Flush any cached N-Gram information

◆ ngram_model_free()

POCKETSPHINX_EXPORT int ngram_model_free ( ngram_model_t model)

Release memory associated with an N-Gram model.

Returns
new reference count (0 if freed completely)

◆ ngram_model_get_counts()

POCKETSPHINX_EXPORT const uint32* ngram_model_get_counts ( ngram_model_t model)

Get the counts of the various N-grams in the model.

◆ ngram_model_get_size()

POCKETSPHINX_EXPORT int32 ngram_model_get_size ( ngram_model_t model)

Get the order of the N-gram model (i.e. the "N" in "N-gram")

◆ ngram_model_get_weights()

POCKETSPHINX_EXPORT float32 ngram_model_get_weights ( ngram_model_t model,
int32 *  out_log_wip 
)

Get the current weights from a language model.

Parameters
modelThe model in question.
out_log_wipOutput: (optional) logarithm of word insertion penalty.
Returns
language weight.

◆ ngram_model_mgrams()

POCKETSPHINX_EXPORT ngram_iter_t* ngram_model_mgrams ( ngram_model_t model,
int  m 
)

Iterate over all M-grams.

Parameters
modelLanguage model to query.
mOrder of the M-Grams requested minus one (i.e. order of the history)
Returns
An iterator over the requested M, or NULL if no N-grams of order M+1 exist.

◆ ngram_model_read()

POCKETSPHINX_EXPORT ngram_model_t* ngram_model_read ( ps_config_t config,
const char *  file_name,
ngram_file_type_t  file_type,
logmath_t lmath 
)

Read an N-Gram model from a file on disk.

Parameters
configOptional pointer to a set of command-line arguments. Recognized arguments are:
  • -mmap (boolean) whether to use memory-mapped I/O
  • -lw (float32) language weight to apply to the model
  • -wip (float32) word insertion penalty to apply to the model
Parameters
file_namepath to the file to read.
file_typetype of the file, or NGRAM_AUTO to determine automatically.
lmathLog-math parameters to use for probability calculations. Ownership of this object is assumed by the newly created ngram_model_t, and you should not attempt to free it manually. If you wish to reuse it elsewhere, you must retain it with logmath_retain().
Returns
newly created ngram_model_t.

◆ ngram_model_read_classdef()

POCKETSPHINX_EXPORT int32 ngram_model_read_classdef ( ngram_model_t model,
const char *  file_name 
)

Read a class definition file and add classes to a language model.

This function assumes that the class tags have already been defined as unigrams in the language model. All words in the class definition will be added to the vocabulary as special in-class words. For this reason is is necessary that they not have the same names as any words in the general unigram distribution. The convention is to suffix them with ":class_tag", where class_tag is the class tag minus the enclosing square brackets.

Returns
0 for success, <0 for error

◆ ngram_model_retain()

POCKETSPHINX_EXPORT ngram_model_t* ngram_model_retain ( ngram_model_t model)

Retain ownership of an N-Gram model.

Returns
Pointer to retained model.

◆ ngram_model_set_add()

POCKETSPHINX_EXPORT ngram_model_t* ngram_model_set_add ( ngram_model_t set,
ngram_model_t model,
const char *  name,
float32  weight,
int  reuse_widmap 
)

Add a language model to a set.

Parameters
setThe language model set to add to.
modelThe language model to add.
nameThe name to associate with this model.
weightInterpolation weight for this model, relative to the uniform distribution. 1.0 is a safe value.
reuse_widmapReuse the existing word-ID mapping in set. Any new words present in model will not be added to the word-ID mapping in this case.

◆ ngram_model_set_count()

POCKETSPHINX_EXPORT int32 ngram_model_set_count ( ngram_model_t set)

Returns the number of language models in a set.

◆ ngram_model_set_current()

const POCKETSPHINX_EXPORT char* ngram_model_set_current ( ngram_model_t set)

Get the current language model name, if any.

◆ ngram_model_set_current_wid()

POCKETSPHINX_EXPORT int32 ngram_model_set_current_wid ( ngram_model_t set,
int32  set_wid 
)

Query the word-ID mapping for the current language model.

Returns
the local word ID in the current language model, or NGRAM_INVALID_WID if set_wid is invalid or interpolation is enabled.

◆ ngram_model_set_init()

POCKETSPHINX_EXPORT ngram_model_t* ngram_model_set_init ( ps_config_t config,
ngram_model_t **  models,
char **  names,
const float32 *  weights,
int32  n_models 
)

Create a set of language models sharing a common space of word IDs.

This function creates a meta-language model which groups together a set of language models, synchronizing word IDs between them. To use this language model, you can either select a submodel to use exclusively using ngram_model_set_select(), or interpolate between scores from all models. To do the latter, you can either pass a non-NULL value of the weights parameter, or re-activate interpolation later on by calling ngram_model_set_interp().

In order to make this efficient, there are some restrictions on the models that can be grouped together. The most important (and currently the only) one is that they must all share the same log-math parameters.

Parameters
configAny configuration parameters to be shared between models.
modelsArray of pointers to previously created language models.
namesArray of strings to use as unique identifiers for LMs.
weightsArray of weights to use in interpolating LMs, or NULL for no interpolation.
n_modelsNumber of elements in the arrays passed to this function.

◆ ngram_model_set_interp()

POCKETSPHINX_EXPORT ngram_model_t* ngram_model_set_interp ( ngram_model_t set,
const char **  names,
const float32 *  weights 
)

Set interpolation weights for a set and enables interpolation.

If weights is NULL, any previously initialized set of weights will be used. If no weights were specified to ngram_model_set_init(), then a uniform distribution will be used.

◆ ngram_model_set_iter()

POCKETSPHINX_EXPORT ngram_model_set_iter_t* ngram_model_set_iter ( ngram_model_t set)

Begin iterating over language models in a set.

Returns
iterator pointing to the first language model, or NULL if no models remain.

◆ ngram_model_set_iter_free()

POCKETSPHINX_EXPORT void ngram_model_set_iter_free ( ngram_model_set_iter_t itor)

Finish iteration over a langauge model set.

◆ ngram_model_set_iter_model()

POCKETSPHINX_EXPORT ngram_model_t* ngram_model_set_iter_model ( ngram_model_set_iter_t itor,
char const **  lmname 
)

Get language model and associated name from an iterator.

Parameters
itorthe iterator
lmnameOutput: string name associated with this language model.
Returns
Language model pointed to by this iterator.

◆ ngram_model_set_iter_next()

POCKETSPHINX_EXPORT ngram_model_set_iter_t* ngram_model_set_iter_next ( ngram_model_set_iter_t itor)

Move to the next language model in a set.

Returns
iterator pointing to the next language model, or NULL if no models remain.

◆ ngram_model_set_known_wid()

POCKETSPHINX_EXPORT int32 ngram_model_set_known_wid ( ngram_model_t set,
int32  set_wid 
)

Test whether a word ID corresponds to a known word in the current state of the language model set.

Returns
If there is a current language model, returns non-zero if set_wid corresponds to a known word in that language model. Otherwise, returns non-zero if set_wid corresponds to a known word in any language model.

◆ ngram_model_set_lookup()

POCKETSPHINX_EXPORT ngram_model_t* ngram_model_set_lookup ( ngram_model_t set,
const char *  name 
)

Look up a language model by name from a set.

Returns
language model corresponding to name, or NULL if no language model by that name exists.

◆ ngram_model_set_map_words()

POCKETSPHINX_EXPORT void ngram_model_set_map_words ( ngram_model_t set,
const char **  words,
int32  n_words 
)

Set the word-to-ID mapping for this model set.

◆ ngram_model_set_read()

POCKETSPHINX_EXPORT ngram_model_t* ngram_model_set_read ( ps_config_t config,
const char *  lmctlfile,
logmath_t lmath 
)

Read a set of language models from a control file.

This file creates a language model set from a "control file" of the type used in Sphinx-II and Sphinx-III. File format (optional stuff is indicated by enclosing in []):

  [{ LMClassFileName LMClassFilename ... }]
  TrigramLMFileName LMName [{ LMClassName LMClassName ... }]
  TrigramLMFileName LMName [{ LMClassName LMClassName ... }]
  ...
(There should be whitespace around the { and } delimiters.)

This is an extension of the older format that had only TrigramLMFilenName and LMName pairs. The new format allows a set of LMClass files to be read in and referred to by the trigram LMs.

No "comments" allowed in this file.

Parameters
configConfiguration parameters.
lmctlfilePath to the language model control file.
lmathLog-math parameters to use for probability calculations. Ownership of this object is assumed by the newly created ngram_model_t, and you should not attempt to free it manually. If you wish to reuse it elsewhere, you must retain it with logmath_retain().
Returns
newly created language model set.

◆ ngram_model_set_remove()

POCKETSPHINX_EXPORT ngram_model_t* ngram_model_set_remove ( ngram_model_t set,
const char *  name,
int  reuse_widmap 
)

Remove a language model from a set.

Parameters
setThe language model set to remove from.
nameThe name associated with the model to remove.
reuse_widmapReuse the existing word-ID mapping in set.

◆ ngram_model_set_select()

POCKETSPHINX_EXPORT ngram_model_t* ngram_model_set_select ( ngram_model_t set,
const char *  name 
)

Select a single language model from a set for scoring.

Returns
the newly selected language model, or NULL if no language model by that name exists.

◆ ngram_model_write()

POCKETSPHINX_EXPORT int ngram_model_write ( ngram_model_t model,
const char *  file_name,
ngram_file_type_t  format 
)

Write an N-Gram model to disk.

Returns
0 for success, <0 on error

◆ ngram_ng_iter()

POCKETSPHINX_EXPORT ngram_iter_t* ngram_ng_iter ( ngram_model_t model,
int32  wid,
int32 *  history,
int32  n_hist 
)

Get an iterator over M-grams pointing to the specified M-gram.

◆ ngram_ng_prob()

POCKETSPHINX_EXPORT int32 ngram_ng_prob ( ngram_model_t model,
int32  wid,
int32 *  history,
int32  n_hist,
int32 *  n_used 
)

Quick "raw" probability lookup for a general N-Gram.

See documentation for ngram_ng_score() and ngram_apply_weights() for an explanation of this.

◆ ngram_ng_score()

POCKETSPHINX_EXPORT int32 ngram_ng_score ( ngram_model_t model,
int32  wid,
int32 *  history,
int32  n_hist,
int32 *  n_used 
)

Quick general N-Gram score lookup.

◆ ngram_prob()

POCKETSPHINX_EXPORT int32 ngram_prob ( ngram_model_t model,
const char *const *  words,
int32  n 
)

Get the "raw" log-probability for a general N-Gram.

This returns the log-probability of an N-Gram, as defined in the language model file, before any language weighting, interpolation, or insertion penalty has been applied.

Note
When backing off to a unigram from a bigram or trigram, the unigram weight (interpolation with uniform) is not removed.

◆ ngram_probv()

POCKETSPHINX_EXPORT int32 ngram_probv ( ngram_model_t model,
const char *  word,
  ... 
)

Get the "raw" log-probability for a general N-Gram.

This returns the log-probability of an N-Gram, as defined in the language model file, before any language weighting, interpolation, or insertion penalty has been applied.

Note
When backing off to a unigram from a bigram or trigram, the unigram weight (interpolation with uniform) is not removed.

◆ ngram_score()

POCKETSPHINX_EXPORT int32 ngram_score ( ngram_model_t model,
const char *  word,
  ... 
)

Get the score (scaled, interpolated log-probability) for a general N-Gram.

The argument list consists of the history words (as null-terminated strings) of the N-Gram, in reverse order, followed by NULL. Therefore, if you wanted to get the N-Gram score for "a whole joy", you would call:

 score = ngram_score(model, "joy", "whole", "a", NULL);

This is not the function to use in decoding, because it has some overhead for looking up words. Use ngram_ng_score(), ngram_tg_score(), or ngram_bg_score() instead. In the future there will probably be a version that takes a general language model state object, to support suffix-array LM and things like that.

If one of the words is not in the LM's vocabulary, the result will depend on whether this is an open or closed vocabulary language model. For an open-vocabulary model, unknown words are all mapped to the unigram <UNK> which has a non-zero probability and also participates in higher-order N-Grams. Therefore, you will get a score of some sort in this case.

For a closed-vocabulary model, unknown words are impossible and thus have zero probability. Therefore, if word is unknown, this function will return a "zero" log-probability, i.e. a large negative number. To obtain this number for comparison, call ngram_zero().

◆ ngram_score_to_prob()

POCKETSPHINX_EXPORT int32 ngram_score_to_prob ( ngram_model_t model,
int32  score 
)

Convert score to "raw" log-probability.

Note
The unigram weight (interpolation with uniform) is not removed, since there is no way to know which order of N-Gram generated score.
Parameters
modelThe N-Gram model from which score was obtained.
scoreThe N-Gram score to convert
Returns
The raw log-probability value.

◆ ngram_str_to_type()

POCKETSPHINX_EXPORT ngram_file_type_t ngram_str_to_type ( const char *  str_name)

Get the N-Gram file type from a string.

Returns
file type, or NGRAM_INVALID if no such file type exists.

◆ ngram_tg_score()

POCKETSPHINX_EXPORT int32 ngram_tg_score ( ngram_model_t model,
int32  w3,
int32  w2,
int32  w1,
int32 *  n_used 
)

Quick trigram score lookup.

◆ ngram_type_to_str()

POCKETSPHINX_EXPORT const char* ngram_type_to_str ( int  type)

Get the canonical name for an N-Gram file type.

Returns
read-only string with the name for this file type, or NULL if no such type exists.

◆ ngram_unknown_wid()

POCKETSPHINX_EXPORT int32 ngram_unknown_wid ( ngram_model_t model)

Get the unknown word ID for a language model.

Language models can be either "open vocabulary" or "closed vocabulary". The difference is that the former assigns a fixed non-zero unigram probability to unknown words, while the latter does not allow unknown words (or, equivalently, it assigns them zero probability). If this is a closed vocabulary model, this function will return NGRAM_INVALID_WID.

Returns
The ID for the unknown word, or NGRAM_INVALID_WID if none exists.

◆ ngram_wid()

POCKETSPHINX_EXPORT int32 ngram_wid ( ngram_model_t model,
const char *  word 
)

Look up numerical word ID.

◆ ngram_word()

const POCKETSPHINX_EXPORT char* ngram_word ( ngram_model_t model,
int32  wid 
)

Look up word string for numerical word ID.

◆ ngram_zero()

POCKETSPHINX_EXPORT int32 ngram_zero ( ngram_model_t model)

Get the "zero" log-probability value for a language model.