45 #if defined(__ADSPBLACKFIN__)
46 #elif !defined(_WIN32_WCE)
47 #include <sys/types.h>
51 #include <sphinx_config.h>
52 #include <sphinxbase/cmd_ln.h>
53 #include <sphinxbase/fixpoint.h>
54 #include <sphinxbase/ckd_alloc.h>
55 #include <sphinxbase/bio.h>
56 #include <sphinxbase/err.h>
57 #include <sphinxbase/prim_type.h>
66 ptm_mgau_mllr_transform,
70 #define COMPUTE_GMM_MAP(_idx) \
71 diff[_idx] = obs[_idx] - mean[_idx]; \
72 sqdiff[_idx] = MFCCMUL(diff[_idx], diff[_idx]); \
73 compl[_idx] = MFCCMUL(sqdiff[_idx], var[_idx]);
74 #define COMPUTE_GMM_REDUCE(_idx) \
75 d = GMMSUB(d, compl[_idx]);
78 insertion_sort_topn(
ptm_topn_t *topn,
int i, int32 d)
87 for (j = i - 1; j >= 0 && d > topn[j].
score; j--) {
88 topn[j + 1] = topn[j];
94 eval_topn(
ptm_mgau_t *s,
int cb,
int feat, mfcc_t *z)
99 topn = s->
f->
topn[cb][feat];
102 for (i = 0; i < s->max_topn; i++) {
103 mfcc_t *mean, diff[4], sqdiff[4], compl[4];
109 mean = s->
g->
mean[cb][feat][0] + cw * ceplen;
110 var = s->
g->
var[cb][feat][0] + cw * ceplen;
111 d = s->
g->
det[cb][feat][cw];
113 for (j = 0; j < ceplen % 4; ++j) {
114 diff[0] = *obs++ - *mean++;
115 sqdiff[0] = MFCCMUL(diff[0], diff[0]);
116 compl[0] = MFCCMUL(sqdiff[0], *var);
122 for (;j < ceplen; j += 4) {
127 COMPUTE_GMM_REDUCE(0);
128 COMPUTE_GMM_REDUCE(1);
129 COMPUTE_GMM_REDUCE(2);
130 COMPUTE_GMM_REDUCE(3);
135 insertion_sort_topn(topn, i, (int32)d);
138 return topn[0].
score;
147 for (*cur = worst - 1; *cur >= best && intd >= (*cur)->
score; --*cur)
148 memcpy(*cur + 1, *cur,
sizeof(**cur));
151 (*cur)->score = intd;
155 eval_cb(
ptm_mgau_t *s,
int cb,
int feat, mfcc_t *z)
159 mfcc_t *var, *det, *detP, *detE;
162 best = topn = s->
f->
topn[cb][feat];
163 worst = topn + (s->max_topn - 1);
164 mean = s->
g->
mean[cb][feat][0];
165 var = s->
g->
var[cb][feat][0];
166 det = s->
g->
det[cb][feat];
170 for (detP = det; detP < detE; ++detP) {
171 mfcc_t diff[4], sqdiff[4], compl[4];
178 thresh = (mfcc_t) worst->
score;
180 cw = (
int)(detP - det);
185 for (j = 0; (j < ceplen % 4) && (d >= thresh); ++j) {
186 diff[0] = *obs++ - *mean++;
187 sqdiff[0] = MFCCMUL(diff[0], diff[0]);
188 compl[0] = MFCCMUL(sqdiff[0], *var++);
194 for (; j < ceplen && d >= thresh; j += 4) {
199 COMPUTE_GMM_REDUCE(0);
200 COMPUTE_GMM_REDUCE(1);
201 COMPUTE_GMM_REDUCE(2);
202 COMPUTE_GMM_REDUCE(3);
209 mean += (ceplen - j);
215 for (i = 0; i < s->max_topn; i++) {
217 if (topn[i].cw == cw)
222 insertion_sort_cb(&cur, worst, best, cw, (int32)d);
232 ptm_mgau_codebook_eval(
ptm_mgau_t *s, mfcc_t **z,
int frame)
237 for (i = 0; i < s->
g->
n_mgau; ++i)
238 for (j = 0; j < s->
g->
n_feat; ++j)
239 eval_topn(s, i, j, z[j]);
242 if (frame % s->ds_ratio)
246 for (i = 0; i < s->
g->
n_mgau; ++i) {
249 for (j = 0; j < s->
g->
n_feat; ++j) {
250 eval_cb(s, i, j, z[j]);
266 ptm_mgau_codebook_norm(
ptm_mgau_t *s, mfcc_t **z,
int frame)
270 for (j = 0; j < s->
g->
n_feat; ++j) {
272 for (i = 0; i < s->
g->
n_mgau; ++i) {
279 for (i = 0; i < s->
g->
n_mgau; ++i) {
283 for (k = 0; k < s->max_topn; ++k) {
297 ptm_mgau_calc_cb_active(
ptm_mgau_t *s, uint8 *senone_active,
298 int32 n_senone_active,
int compallsen)
307 for (lastsen = i = 0; i < n_senone_active; ++i) {
308 int sen = senone_active[i] + lastsen;
313 E_DEBUG(1, (
"Active codebooks:"));
314 for (i = 0; i < s->
g->
n_mgau; ++i) {
317 E_DEBUGCONT(1, (
" %d", i));
319 E_DEBUGCONT(1, (
"\n"));
327 ptm_mgau_senone_eval(
ptm_mgau_t *s, int16 *senone_scores,
328 uint8 *senone_active, int32 n_senone_active,
331 int i, lastsen, bestscore;
333 memset(senone_scores, 0, s->
n_sen *
sizeof(*senone_scores));
340 n_senone_active = s->
n_sen;
341 bestscore = 0x7fffffff;
342 for (lastsen = i = 0; i < n_senone_active; ++i) {
349 sen = senone_active[i] + lastsen;
359 for (f = 0; f < s->
g->
n_feat; ++f) {
360 for (j = 0; j < s->max_topn; ++j) {
368 for (f = 0; f < s->
g->
n_feat; ++f) {
371 topn = s->
f->
topn[cb][f];
372 for (j = 0; j < s->max_topn; ++j) {
376 int dcw = s->
mixw[f][topn[j].
cw][sen/2];
377 dcw = (dcw & 1) ? dcw >> 4 : dcw & 0x0f;
378 mixw = s->mixw_cb[dcw];
381 mixw = s->
mixw[f][topn[j].
cw][sen];
384 fden = mixw + topn[j].
score;
387 mixw + topn[j].
score);
388 E_DEBUG(3, (
"fden[%d][%d] l+= %d + %d = %d\n",
389 sen, f, mixw, topn[j].score, fden));
393 if (ascore < bestscore) bestscore = ascore;
394 senone_scores[sen] = ascore;
398 for (i = 0; i < s->
n_sen; ++i) {
399 senone_scores[i] -= bestscore;
410 int16 *senone_scores,
411 uint8 *senone_active,
412 int32 n_senone_active,
413 mfcc_t ** featbuf, int32 frame,
426 s->
f = s->
hist + fast_eval_idx;
430 if (frame >= ps_mgau_base(ps)->frame_idx) {
435 if (fast_eval_idx == 0)
438 lastf = s->
hist + fast_eval_idx - 1;
440 memcpy(s->
f->
topn[0][0], lastf->
topn[0][0],
444 ptm_mgau_calc_cb_active(s, senone_active, n_senone_active, compallsen);
446 ptm_mgau_codebook_eval(s, featbuf, frame);
447 ptm_mgau_codebook_norm(s, featbuf, frame);
450 ptm_mgau_senone_eval(s, senone_scores, senone_active,
451 n_senone_active, compallsen);
462 int32 do_swap, do_mmap;
467 int n_sen = bin_mdef_n_sen(mdef);
471 do_mmap = cmd_ln_boolean_r(s->
config,
"-mmap");
473 if ((fp = fopen(file,
"rb")) == NULL)
476 E_INFO(
"Loading senones from dump file %s\n", file);
478 if (fread(&n,
sizeof(int32), 1, fp) != 1) {
479 E_ERROR_SYSTEM(
"Failed to read title size from %s", file);
484 if (n < 1 || n > 999) {
486 if (n < 1 || n > 999) {
487 E_ERROR(
"Title length %x in dump file %s out of range\n", n, file);
492 if (fread(line,
sizeof(
char), n, fp) != n) {
493 E_ERROR_SYSTEM(
"Cannot read title");
496 if (line[n - 1] !=
'\0') {
497 E_ERROR(
"Bad title in dump file\n");
500 E_INFO(
"%s\n", line);
503 if (fread(&n,
sizeof(n), 1, fp) != 1) {
504 E_ERROR_SYSTEM(
"Failed to read header size from %s", file);
507 if (do_swap) SWAP_INT32(&n);
508 if (fread(line,
sizeof(
char), n, fp) != n) {
509 E_ERROR_SYSTEM(
"Cannot read header");
512 if (line[n - 1] !=
'\0') {
513 E_ERROR(
"Bad header in dump file\n");
519 if (fread(&n,
sizeof(n), 1, fp) != 1) {
520 E_ERROR_SYSTEM(
"Failed to read header string size from %s", file);
523 if (do_swap) SWAP_INT32(&n);
526 if (fread(line,
sizeof(
char), n, fp) != n) {
527 E_ERROR_SYSTEM(
"Cannot read header");
531 if (!strncmp(line,
"feature_count ", strlen(
"feature_count "))) {
532 n_feat = atoi(line + strlen(
"feature_count "));
534 if (!strncmp(line,
"mixture_count ", strlen(
"mixture_count "))) {
535 n_density = atoi(line + strlen(
"mixture_count "));
537 if (!strncmp(line,
"model_count ", strlen(
"model_count "))) {
538 n_sen = atoi(line + strlen(
"model_count "));
540 if (!strncmp(line,
"cluster_count ", strlen(
"cluster_count "))) {
541 n_clust = atoi(line + strlen(
"cluster_count "));
543 if (!strncmp(line,
"cluster_bits ", strlen(
"cluster_bits "))) {
544 n_bits = atoi(line + strlen(
"cluster_bits "));
553 if (fread(&r,
sizeof(r), 1, fp) != 1) {
554 E_ERROR_SYSTEM(
"Cannot read #rows");
557 if (do_swap) SWAP_INT32(&r);
558 if (fread(&c,
sizeof(c), 1, fp) != 1) {
559 E_ERROR_SYSTEM(
"Cannot read #columns");
562 if (do_swap) SWAP_INT32(&c);
563 E_INFO(
"Rows: %d, Columns: %d\n", r, c);
567 E_ERROR(
"Number of feature streams mismatch: %d != %d\n",
572 E_ERROR(
"Number of densities mismatch: %d != %d\n",
576 if (n_sen != s->
n_sen) {
577 E_ERROR(
"Number of senones mismatch: %d != %d\n",
582 if (!((n_clust == 0) || (n_clust == 15) || (n_clust == 16))) {
583 E_ERROR(
"Cluster count must be 0, 15, or 16\n");
589 if (!((n_bits == 8) || (n_bits == 4))) {
590 E_ERROR(
"Cluster count must be 4 or 8\n");
595 E_INFO(
"Using memory-mapped I/O for senones\n");
601 s->sendump_mmap = mmio_file_read(file);
604 s->mixw_cb = ((uint8 *) mmio_file_ptr(s->sendump_mmap)) + offset;
611 s->mixw_cb = ckd_calloc(1, n_clust);
612 if (fread(s->mixw_cb, 1, n_clust, fp) != (
size_t) n_clust) {
613 E_ERROR(
"Failed to read %d bytes from sendump\n", n_clust);
620 if (s->sendump_mmap) {
621 s->
mixw = ckd_calloc_2d(n_feat, n_density,
sizeof(*s->
mixw));
622 for (n = 0; n < n_feat; n++) {
625 step = (step + 1) / 2;
626 for (i = 0; i < r; i++) {
627 s->
mixw[n][i] = ((uint8 *) mmio_file_ptr(s->sendump_mmap)) + offset;
633 s->
mixw = ckd_calloc_3d(n_feat, n_density, n_sen,
sizeof(***s->
mixw));
635 for (n = 0; n < n_feat; n++) {
638 step = (step + 1) / 2;
639 for (i = 0; i < r; i++) {
640 if (fread(s->
mixw[n][i],
sizeof(***s->
mixw), step, fp)
642 E_ERROR(
"Failed to read %d bytes from sendump\n", step);
657 read_mixw(
ptm_mgau_t * s,
char const *file_name,
double SmoothMin)
659 char **argname, **argval;
662 int32 byteswap, chksum_present;
671 E_INFO(
"Reading mixture weights file '%s'\n", file_name);
673 if ((fp = fopen(file_name,
"rb")) == NULL)
674 E_FATAL_SYSTEM(
"Failed to open mixture file '%s' for reading", file_name);
677 if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0)
678 E_FATAL(
"Failed to read header from '%s'\n", file_name);
682 for (i = 0; argname[i]; i++) {
683 if (strcmp(argname[i],
"version") == 0) {
684 if (strcmp(argval[i], MGAU_MIXW_VERSION) != 0)
685 E_WARN(
"Version mismatch(%s): %s, expecting %s\n",
686 file_name, argval[i], MGAU_MIXW_VERSION);
688 else if (strcmp(argname[i],
"chksum0") == 0) {
692 bio_hdrarg_free(argname, argval);
693 argname = argval = NULL;
698 if ((bio_fread(&n_sen,
sizeof(int32), 1, fp, byteswap, &chksum) != 1)
699 || (bio_fread(&n_feat,
sizeof(int32), 1, fp, byteswap, &chksum) !=
701 || (bio_fread(&n_comp,
sizeof(int32), 1, fp, byteswap, &chksum) !=
703 || (bio_fread(&n,
sizeof(int32), 1, fp, byteswap, &chksum) != 1)) {
704 E_FATAL(
"bio_fread(%s) (arraysize) failed\n", file_name);
707 E_FATAL(
"#Features streams(%d) != %d\n", n_feat, s->
g->
n_feat);
708 if (n != n_sen * n_feat * n_comp) {
710 (
"%s: #float32s(%d) doesn't match header dimensions: %d x %d x %d\n",
711 file_name, i, n_sen, n_feat, n_comp);
721 n_sen,
sizeof(***s->
mixw));
724 pdf = (float32 *) ckd_calloc(n_comp,
sizeof(float32));
728 for (i = 0; i < n_sen; i++) {
729 for (f = 0; f < n_feat; f++) {
730 if (bio_fread((
void *) pdf,
sizeof(float32),
731 n_comp, fp, byteswap, &chksum) != n_comp) {
732 E_FATAL(
"bio_fread(%s) (arraydata) failed\n", file_name);
736 if (vector_sum_norm(pdf, n_comp) <= 0.0)
738 vector_floor(pdf, n_comp, SmoothMin);
739 vector_sum_norm(pdf, n_comp);
742 for (c = 0; c < n_comp; c++) {
745 qscr = -logmath_log(s->lmath_8b, pdf[c]);
748 s->
mixw[f][c][i] = qscr;
753 E_WARN(
"Weight normalization failed for %d mixture weights components\n", n_err);
758 bio_verify_chksum(fp, byteswap, chksum);
760 if (fread(&eofchk, 1, 1, fp) == 1)
761 E_FATAL(
"More data than expected in %s\n", file_name);
765 E_INFO(
"Read %d x %d x %d mixture weights\n", n_sen, n_feat, n_comp);
774 char const *sendump_path;
777 s = ckd_calloc(1,
sizeof(*s));
780 s->lmath = logmath_retain(acmod->
lmath);
783 if (s->lmath_8b == NULL)
786 if (logmath_get_width(s->lmath_8b) != 1) {
787 E_ERROR(
"Log base %f is too small to represent add table in 8 bits\n",
788 logmath_get_base(s->lmath_8b));
794 cmd_ln_str_r(s->
config,
"_var"),
795 cmd_ln_float32_r(s->
config,
"-varfloor"),
796 s->lmath)) == NULL) {
797 E_ERROR(
"Failed to read means and variances\n");
804 E_INFO(
"Number of codebooks exceeds 256: %d\n", s->
g->
n_mgau);
807 if (s->
g->
n_mgau != bin_mdef_n_ciphone(mdef)) {
808 E_INFO(
"Number of codebooks doesn't match number of ciphones, doesn't look like PTM: %d != %d\n", s->
g->
n_mgau, bin_mdef_n_ciphone(mdef));
812 if (s->
g->
n_feat != feat_dimension1(acmod->
fcb)) {
813 E_ERROR(
"Number of streams does not match: %d != %d\n",
817 for (i = 0; i < s->
g->
n_feat; ++i) {
818 if (s->
g->
featlen[i] != feat_dimension2(acmod->
fcb, i)) {
819 E_ERROR(
"Dimension of stream %d does not match: %d != %d\n",
825 if ((sendump_path = cmd_ln_str_r(s->
config,
"_sendump"))) {
826 if (read_sendump(s, acmod->
mdef, sendump_path) < 0) {
831 if (read_mixw(s, cmd_ln_str_r(s->
config,
"_mixw"),
832 cmd_ln_float32_r(s->
config,
"-mixwfloor")) < 0) {
836 s->ds_ratio = cmd_ln_int32_r(s->
config,
"-ds");
837 s->max_topn = cmd_ln_int32_r(s->
config,
"-topn");
838 E_INFO(
"Maximum top-N: %d\n", s->max_topn);
843 for (i = 0; i < s->
n_sen; ++i)
844 s->
sen2cb[i] = bin_mdef_sen2cimap(acmod->
mdef, i);
859 for (j = 0; j < s->
g->
n_mgau; ++j) {
860 for (k = 0; k < s->
g->
n_feat; ++k) {
861 for (m = 0; m < s->max_topn; ++m) {
875 ps->
vt = &ptm_mgau_funcs;
878 ptm_mgau_free(ps_mgau_base(s));
896 logmath_free(s->lmath);
897 logmath_free(s->lmath_8b);
898 if (s->sendump_mmap) {
899 ckd_free_2d(s->
mixw);
900 mmio_file_unmap(s->sendump_mmap);
903 ckd_free_3d(s->
mixw);
int32 n_density
Number gaussian densities in each codebook-feature stream.
ptm_topn_t *** topn
Top-N for each codebook (mgau x feature x topn)
void gauden_free(gauden_t *g)
Release memory allocated by gauden_init.
mfcc_t *** det
log(determinant) for each variance vector; actually, log(sqrt(2*pi*det))
uint8 * sen2cb
Senone to codebook mapping.
logmath_t * lmath
Log-math computation.
int n_fast_hist
Number of past frames tracked.
gauden_t * g
Set of Gaussians.
int32 gauden_mllr_transform(gauden_t *s, ps_mllr_t *mllr, cmd_ln_t *config)
Transform Gaussians according to an MLLR matrix (or, eventually, more).
gauden_t * gauden_init(char const *meanfile, char const *varfile, float32 varfloor, logmath_t *lmath)
Read mixture gaussian codebooks from the given files.
int ptm_mgau_frame_eval(ps_mgau_t *s, int16 *senone_scores, uint8 *senone_active, int32 n_senone_active, mfcc_t **featbuf, int32 frame, int32 compallsen)
Compute senone scores for the active senones.
Fast phonetically-tied mixture evaluation.
cmd_ln_t * config
Configuration.
#define WORST_SCORE
Large "bad" score.
int32 * featlen
feature length for each feature
#define GMMSUB(a, b)
Subtract GMM component b (assumed to be positive) and saturate.
int32 n_mgau
Number codebooks.
Feature space linear transform structure.
#define SENSCR_SHIFT
Shift count for senone scores.
mfcc_t **** mean
mean[codebook][feature][codeword] vector
feat_t * fcb
Dynamic feature computation.
cmd_ln_t * config
Configuration parameters.
uint8 *** mixw
Mixture weight distributions by feature, codeword, senone.
ptm_fast_eval_t * hist
Fast evaluation info for past frames.
int32 n_feat
Number feature streams in each codebook.
ptm_fast_eval_t * f
Fast eval info for current frame.
ps_mgaufuncs_t * vt
vtable of mgau functions.
LOGMATH_INLINE int fast_logmath_add(logmath_t *lmath, int mlx, int mly)
Quickly log-add two negated log probabilities.
bin_mdef_t * mdef
Model definition.
bitvec_t * mgau_active
Set of active codebooks.
#define MAX_NEG_ASCR
Maximum negated acoustic score value.
int32 n_sen
Number of senones.
#define MAX_NEG_MIXW
Maximum negated mixture weight value.
Acoustic model structure.
mfcc_t **** var
like mean; diagonal covariance vector only
Common code shared between SC and PTM (tied-state) models.