public class NgramTrieModel extends java.lang.Object implements LanguageModel
Modifier and Type | Class and Description |
---|---|
static class |
NgramTrieModel.TrieRange
Structure to keep ngram indexes range for trie traversal
|
static class |
NgramTrieModel.TrieUnigram
Structure that keeps unigram instance data in trie.
|
Modifier and Type | Field and Description |
---|---|
protected boolean |
applyLanguageWeightAndWip |
protected boolean |
clearCacheAfterUtterance |
protected int[] |
counts |
protected int |
curDepth |
protected Dictionary |
dictionary |
protected java.lang.String |
format |
protected float |
languageWeight |
protected java.util.logging.Logger |
logger |
protected LogMath |
logMath |
protected float |
logWip |
protected int |
maxDepth |
protected int |
ngramCacheSize |
protected java.lang.String |
ngramLogFile |
static java.lang.String |
PROP_APPLY_LANGUAGE_WEIGHT_AND_WIP
The property that controls whether or not the language model will apply
the language weight and word insertion probability
|
static java.lang.String |
PROP_CLEAR_CACHES_AFTER_UTTERANCE
The property that controls whether the ngram caches are cleared after
every utterance
|
static java.lang.String |
PROP_LANGUAGE_WEIGHT
The property that defines the language weight for the search
|
static java.lang.String |
PROP_NGRAM_CACHE_SIZE
The property that defines that maximum number of ngrams to be cached
|
static java.lang.String |
PROP_QUERY_LOG_FILE
The property for the name of the file that logs all the queried N-grams.
|
static java.lang.String |
PROP_WORD_INSERTION_PROBABILITY
Word insertion probability property
|
protected NgramTrieQuant |
quant |
protected NgramTrie |
trie |
protected java.util.Map<Word,java.lang.Integer> |
unigramIDMap |
protected NgramTrieModel.TrieUnigram[] |
unigrams |
protected float |
unigramWeight |
protected java.lang.String[] |
words |
PROP_DICTIONARY, PROP_LOCATION, PROP_MAX_DEPTH, PROP_UNIGRAM_WEIGHT
Constructor and Description |
---|
NgramTrieModel() |
NgramTrieModel(java.lang.String format,
java.net.URL location,
java.lang.String ngramLogFile,
int maxNGramCacheSize,
boolean clearCacheAfterUtterance,
int maxDepth,
Dictionary dictionary,
boolean applyLanguageWeightAndWip,
float languageWeight,
double wip,
float unigramWeight) |
Modifier and Type | Method and Description |
---|---|
void |
allocate()
Create the language model
|
void |
deallocate()
Deallocate resources allocated to this language model
|
int |
getMaxDepth()
Returns the maximum depth of the language model
|
int |
getNGramHits()
Returns the number of NGram hits.
|
int |
getNGramMisses()
Returns the number of times when a NGram is queried, but there is no such
NGram in the LM (in which case it uses the backoff probabilities).
|
float |
getProbability(WordSequence wordSequence)
Gets the ngram probability of the word sequence represented by the word
list
|
float |
getSmear(WordSequence wordSequence)
Gets the smear term for the given wordSequence
|
java.util.Set<java.lang.String> |
getVocabulary()
Returns the set of words in the language model.
|
void |
newProperties(PropertySheet ps)
This method is called when this configurable component needs to be reconfigured.
|
void |
onUtteranceEnd()
Called by lexicon after recognition.
|
@S4String(mandatory=false) public static final java.lang.String PROP_QUERY_LOG_FILE
@S4Integer(defaultValue=100000) public static final java.lang.String PROP_NGRAM_CACHE_SIZE
@S4Boolean(defaultValue=false) public static final java.lang.String PROP_CLEAR_CACHES_AFTER_UTTERANCE
@S4Double(defaultValue=1.0) public static final java.lang.String PROP_LANGUAGE_WEIGHT
@S4Boolean(defaultValue=false) public static final java.lang.String PROP_APPLY_LANGUAGE_WEIGHT_AND_WIP
@S4Double(defaultValue=1.0) public static final java.lang.String PROP_WORD_INSERTION_PROBABILITY
protected java.util.logging.Logger logger
protected LogMath logMath
protected int maxDepth
protected int curDepth
protected int[] counts
protected int ngramCacheSize
protected boolean clearCacheAfterUtterance
protected Dictionary dictionary
protected java.lang.String format
protected boolean applyLanguageWeightAndWip
protected float languageWeight
protected float unigramWeight
protected float logWip
protected java.lang.String ngramLogFile
protected NgramTrieModel.TrieUnigram[] unigrams
protected java.lang.String[] words
protected NgramTrieQuant quant
protected NgramTrie trie
protected java.util.Map<Word,java.lang.Integer> unigramIDMap
public NgramTrieModel(java.lang.String format, java.net.URL location, java.lang.String ngramLogFile, int maxNGramCacheSize, boolean clearCacheAfterUtterance, int maxDepth, Dictionary dictionary, boolean applyLanguageWeightAndWip, float languageWeight, double wip, float unigramWeight)
public NgramTrieModel()
public void newProperties(PropertySheet ps) throws PropertyException
Configurable
newProperties
in interface Configurable
ps
- a property sheet holding the new dataPropertyException
- if there is a problem with the properties.public void allocate() throws java.io.IOException
LanguageModel
allocate
in interface LanguageModel
java.io.IOException
- if error occurrspublic void deallocate() throws java.io.IOException
LanguageModel
deallocate
in interface LanguageModel
java.io.IOException
- if error occurrspublic float getProbability(WordSequence wordSequence)
getProbability
in interface LanguageModel
wordSequence
- - the word sequencepublic float getSmear(WordSequence wordSequence)
getSmear
in interface LanguageModel
wordSequence
- - the word sequencepublic java.util.Set<java.lang.String> getVocabulary()
getVocabulary
in interface LanguageModel
public int getNGramMisses()
public int getNGramHits()
public int getMaxDepth()
getMaxDepth
in interface LanguageModel
public void onUtteranceEnd()
onUtteranceEnd
in interface LanguageModel