SphinxBase  5prealpha
lm_trie.h
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 2015 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
38 #ifndef __LM_TRIE_H__
39 #define __LM_TRIE_H__
40 
41 #include <sphinxbase/pio.h>
42 #include <sphinxbase/bitarr.h>
43 
44 #include "ngram_model_internal.h"
45 #include "lm_trie_quant.h"
46 
47 typedef struct unigram_s {
48  float prob;
49  float bo;
50  uint32 next;
51 } unigram_t;
52 
53 typedef struct node_range_s {
54  uint32 begin;
55  uint32 end;
56 } node_range_t;
57 
58 typedef struct base_s {
59  uint8 word_bits;
60  uint8 total_bits;
61  uint32 word_mask;
62  uint8 *base;
63  uint32 insert_index;
64  uint32 max_vocab;
65 } base_t;
66 
67 typedef struct middle_s {
68  base_t base;
69  bitarr_mask_t next_mask;
70  uint8 quant_bits;
71  void *next_source;
72 } middle_t;
73 
74 typedef struct longest_s {
75  base_t base;
76  uint8 quant_bits;
77 } longest_t;
78 
79 typedef struct lm_trie_s {
80  uint8 *ngram_mem;
81  size_t ngram_mem_size;
82  unigram_t *unigrams;
83  middle_t *middle_begin;
84  middle_t *middle_end;
85  longest_t *longest;
86  lm_trie_quant_t *quant;
87 
88  float backoff_cache[NGRAM_MAX_ORDER];
89  uint32 hist_cache[NGRAM_MAX_ORDER - 1];
90 } lm_trie_t;
91 
95 lm_trie_t *lm_trie_create(uint32 unigram_count, int order);
96 
97 lm_trie_t *lm_trie_read_bin(uint32 * counts, int order, FILE * fp);
98 
99 void lm_trie_write_bin(lm_trie_t * trie, uint32 unigram_count, FILE * fp);
100 
101 void lm_trie_free(lm_trie_t * trie);
102 
103 void lm_trie_build(lm_trie_t * trie, ngram_raw_t ** raw_ngrams,
104  uint32 * counts, uint32 *out_counts, int order);
105 
106 void lm_trie_fill_raw_ngram(lm_trie_t * trie,
107  ngram_raw_t * raw_ngrams, uint32 * raw_ngram_idx,
108  uint32 * counts, node_range_t range, uint32 * hist,
109  int n_hist, int order, int max_order);
110 
111 float lm_trie_score(lm_trie_t * trie, int order, int32 wid, int32 * hist,
112  int32 n_hist, int32 * n_used);
113 
114 #endif /* __LM_TRIE_H__ */
Definition: lm_trie.h:58
Structure that specifies bits required to efficiently store certain data.
Definition: bitarr.h:65
An implementation bit array - memory efficient storage for digit int and float data.
file IO related operations.