public class TextDictionary extends java.lang.Object implements Dictionary
The format of the ASCII dictionary is the word, followed by spaces or tab, followed by the pronunciation(s). For example, a digits dictionary will look like:
ONE HH W AH N ONE(2) W AH N TWO T UW THREE TH R IY FOUR F AO R FIVE F AY V SIX S IH K S SEVEN S EH V AH N EIGHT EY T NINE N AY N ZERO Z IH R OW ZERO(2) Z IY R OW OH OWIn the above example, the words "one" and "zero" have two pronunciations each.
Modifier and Type | Field and Description |
---|---|
protected java.util.List<java.net.URL> |
addendaUrlList |
protected boolean |
allocated |
protected java.util.Map<java.lang.String,java.lang.String> |
dictionary |
protected static java.lang.String |
FILLER_TAG |
protected java.net.URL |
fillerDictionaryFile |
protected java.util.Set<java.lang.String> |
fillerWords |
protected G2PConverter |
g2pDecoder |
protected int |
g2pMaxPron |
protected java.net.URL |
g2pModelFile |
protected java.util.logging.Logger |
logger |
protected UnitManager |
unitManager |
protected java.util.Map<java.lang.String,Word> |
wordDictionary |
protected java.net.URL |
wordDictionaryFile |
PROP_ADDENDA, PROP_DICTIONARY, PROP_FILLER_DICTIONARY, PROP_G2P_MAX_PRONUNCIATIONS, PROP_G2P_MODEL_PATH, PROP_UNIT_MANAGER, PROP_WORD_REPLACEMENT, SENTENCE_END_SPELLING, SENTENCE_START_SPELLING, SILENCE_SPELLING
Constructor and Description |
---|
TextDictionary() |
TextDictionary(java.lang.String wordDictionaryFile,
java.lang.String fillerDictionaryFile,
java.util.List<java.net.URL> addendaUrlList,
boolean addSilEndingPronunciation,
java.lang.String wordReplacement,
UnitManager unitManager) |
TextDictionary(java.net.URL wordDictionaryFile,
java.net.URL fillerDictionaryFile,
java.util.List<java.net.URL> addendaUrlList,
boolean addSilEndingPronunciation,
java.lang.String wordReplacement,
UnitManager unitManager,
java.net.URL g2pModelFile,
int g2pMaxPron) |
TextDictionary(java.net.URL wordDictionaryFile,
java.net.URL fillerDictionaryFile,
java.util.List<java.net.URL> addendaUrlList,
java.lang.String wordReplacement,
UnitManager unitManager) |
Modifier and Type | Method and Description |
---|---|
void |
allocate()
Allocates the dictionary
|
void |
deallocate()
Deallocates the dictionary
|
void |
dump()
Dumps this FastDictionary to System.out.
|
protected Unit |
getCIUnit(java.lang.String name,
boolean isFiller)
Gets a context independent unit.
|
java.net.URL |
getFillerDictionaryFile()
Get the filler dictionary file
|
Word[] |
getFillerWords()
Gets the set of all filler words in the dictionary
|
Word |
getSentenceEndWord()
Returns the sentence end word.
|
Word |
getSentenceStartWord()
Returns the sentence start word.
|
Word |
getSilenceWord()
Returns the silence word.
|
Word |
getWord(java.lang.String text)
Returns a Word object based on the spelling and its classification.
|
java.net.URL |
getWordDictionaryFile()
Get the word dictionary file
|
protected void |
loadDictionary(java.io.InputStream inputStream,
boolean isFillerDict)
Loads the given simple dictionary from the given InputStream.
|
void |
newProperties(PropertySheet ps)
This method is called when this configurable component needs to be reconfigured.
|
java.lang.String |
toString()
Returns a string representation of this TextDictionary in alphabetical
order.
|
protected java.util.logging.Logger logger
protected java.net.URL wordDictionaryFile
protected java.net.URL fillerDictionaryFile
protected java.util.List<java.net.URL> addendaUrlList
protected java.net.URL g2pModelFile
protected int g2pMaxPron
protected UnitManager unitManager
protected java.util.Map<java.lang.String,java.lang.String> dictionary
protected java.util.Map<java.lang.String,Word> wordDictionary
protected G2PConverter g2pDecoder
protected static final java.lang.String FILLER_TAG
protected java.util.Set<java.lang.String> fillerWords
protected boolean allocated
public TextDictionary(java.lang.String wordDictionaryFile, java.lang.String fillerDictionaryFile, java.util.List<java.net.URL> addendaUrlList, boolean addSilEndingPronunciation, java.lang.String wordReplacement, UnitManager unitManager) throws java.net.MalformedURLException, java.lang.ClassNotFoundException
java.net.MalformedURLException
java.lang.ClassNotFoundException
public TextDictionary(java.net.URL wordDictionaryFile, java.net.URL fillerDictionaryFile, java.util.List<java.net.URL> addendaUrlList, java.lang.String wordReplacement, UnitManager unitManager)
public TextDictionary(java.net.URL wordDictionaryFile, java.net.URL fillerDictionaryFile, java.util.List<java.net.URL> addendaUrlList, boolean addSilEndingPronunciation, java.lang.String wordReplacement, UnitManager unitManager, java.net.URL g2pModelFile, int g2pMaxPron)
public TextDictionary()
public void newProperties(PropertySheet ps) throws PropertyException
Configurable
newProperties
in interface Configurable
ps
- a property sheet holding the new dataPropertyException
- if there is a problem with the properties.public java.net.URL getWordDictionaryFile()
public java.net.URL getFillerDictionaryFile()
public void allocate() throws java.io.IOException
Dictionary
allocate
in interface Dictionary
java.io.IOException
- if there is trouble loading the dictionarypublic void deallocate()
Dictionary
deallocate
in interface Dictionary
protected void loadDictionary(java.io.InputStream inputStream, boolean isFillerDict) throws java.io.IOException
inputStream
- the InputStream of the dictionaryisFillerDict
- true if this is a filler dictionary, false otherwisejava.io.IOException
- if there is an error reading the dictionaryprotected Unit getCIUnit(java.lang.String name, boolean isFiller)
name
- the name of the unitisFiller
- if true, the unit is a filler unitpublic Word getSentenceStartWord()
getSentenceStartWord
in interface Dictionary
public Word getSentenceEndWord()
getSentenceEndWord
in interface Dictionary
public Word getSilenceWord()
getSilenceWord
in interface Dictionary
public Word getWord(java.lang.String text)
getWord
in interface Dictionary
text
- the spelling of the word of interest.Word
public java.lang.String toString()
toString
in class java.lang.Object
public Word[] getFillerWords()
getFillerWords
in interface Dictionary
public void dump()