SphinxBase  5prealpha
fe_interface.c
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1996-2004 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 #include <stdio.h>
38 #include <string.h>
39 #include <math.h>
40 #include <stdlib.h>
41 #include <assert.h>
42 
43 #ifdef HAVE_CONFIG_H
44 #include <config.h>
45 #endif
46 
47 #include "sphinxbase/prim_type.h"
48 #include "sphinxbase/byteorder.h"
49 #include "sphinxbase/fixpoint.h"
50 #include "sphinxbase/genrand.h"
51 #include "sphinxbase/err.h"
52 #include "sphinxbase/cmd_ln.h"
53 #include "sphinxbase/ckd_alloc.h"
54 
55 #include "fe_internal.h"
56 #include "fe_warp.h"
57 
58 static const arg_t fe_args[] = {
59  waveform_to_cepstral_command_line_macro(),
60  { NULL, 0, NULL, NULL }
61 };
62 
63 int
64 fe_parse_general_params(cmd_ln_t *config, fe_t * fe)
65 {
66  int j, frate;
67 
68  fe->config = config;
69  fe->sampling_rate = cmd_ln_float32_r(config, "-samprate");
70  frate = cmd_ln_int32_r(config, "-frate");
71  if (frate > MAX_INT16 || frate > fe->sampling_rate || frate < 1) {
72  E_ERROR
73  ("Frame rate %d can not be bigger than sample rate %.02f\n",
74  frate, fe->sampling_rate);
75  return -1;
76  }
77 
78  fe->frame_rate = (int16)frate;
79  if (cmd_ln_boolean_r(config, "-dither")) {
80  fe->dither = 1;
81  fe->dither_seed = cmd_ln_int32_r(config, "-seed");
82  }
83 #ifdef WORDS_BIGENDIAN
84  fe->swap = strcmp("big", cmd_ln_str_r(config, "-input_endian")) == 0 ? 0 : 1;
85 #else
86  fe->swap = strcmp("little", cmd_ln_str_r(config, "-input_endian")) == 0 ? 0 : 1;
87 #endif
88  fe->window_length = cmd_ln_float32_r(config, "-wlen");
89  fe->pre_emphasis_alpha = cmd_ln_float32_r(config, "-alpha");
90 
91  fe->num_cepstra = (uint8)cmd_ln_int32_r(config, "-ncep");
92  fe->fft_size = (int16)cmd_ln_int32_r(config, "-nfft");
93 
94  /* Check FFT size, compute FFT order (log_2(n)) */
95  for (j = fe->fft_size, fe->fft_order = 0; j > 1; j >>= 1, fe->fft_order++) {
96  if (((j % 2) != 0) || (fe->fft_size <= 0)) {
97  E_ERROR("fft: number of points must be a power of 2 (is %d)\n",
98  fe->fft_size);
99  return -1;
100  }
101  }
102  /* Verify that FFT size is greater or equal to window length. */
103  if (fe->fft_size < (int)(fe->window_length * fe->sampling_rate)) {
104  E_ERROR("FFT: Number of points must be greater or equal to frame size (%d samples)\n",
105  (int)(fe->window_length * fe->sampling_rate));
106  return -1;
107  }
108 
109  fe->pre_speech = (int16)cmd_ln_int32_r(config, "-vad_prespeech");
110  fe->post_speech = (int16)cmd_ln_int32_r(config, "-vad_postspeech");
111  fe->start_speech = (int16)cmd_ln_int32_r(config, "-vad_startspeech");
112  fe->vad_threshold = cmd_ln_float32_r(config, "-vad_threshold");
113 
114  fe->remove_dc = cmd_ln_boolean_r(config, "-remove_dc");
115  fe->remove_noise = cmd_ln_boolean_r(config, "-remove_noise");
116  fe->remove_silence = cmd_ln_boolean_r(config, "-remove_silence");
117 
118  if (0 == strcmp(cmd_ln_str_r(config, "-transform"), "dct"))
119  fe->transform = DCT_II;
120  else if (0 == strcmp(cmd_ln_str_r(config, "-transform"), "legacy"))
121  fe->transform = LEGACY_DCT;
122  else if (0 == strcmp(cmd_ln_str_r(config, "-transform"), "htk"))
123  fe->transform = DCT_HTK;
124  else {
125  E_ERROR("Invalid transform type (values are 'dct', 'legacy', 'htk')\n");
126  return -1;
127  }
128 
129  if (cmd_ln_boolean_r(config, "-logspec"))
130  fe->log_spec = RAW_LOG_SPEC;
131  if (cmd_ln_boolean_r(config, "-smoothspec"))
132  fe->log_spec = SMOOTH_LOG_SPEC;
133 
134  return 0;
135 }
136 
137 static int
138 fe_parse_melfb_params(cmd_ln_t *config, fe_t *fe, melfb_t * mel)
139 {
140  mel->sampling_rate = fe->sampling_rate;
141  mel->fft_size = fe->fft_size;
142  mel->num_cepstra = fe->num_cepstra;
143  mel->num_filters = cmd_ln_int32_r(config, "-nfilt");
144 
145  if (fe->log_spec)
146  fe->feature_dimension = mel->num_filters;
147  else
148  fe->feature_dimension = fe->num_cepstra;
149 
150  mel->upper_filt_freq = cmd_ln_float32_r(config, "-upperf");
151  mel->lower_filt_freq = cmd_ln_float32_r(config, "-lowerf");
152 
153  mel->doublewide = cmd_ln_boolean_r(config, "-doublebw");
154 
155  mel->warp_type = cmd_ln_str_r(config, "-warp_type");
156  mel->warp_params = cmd_ln_str_r(config, "-warp_params");
157  mel->lifter_val = cmd_ln_int32_r(config, "-lifter");
158 
159  mel->unit_area = cmd_ln_boolean_r(config, "-unit_area");
160  mel->round_filters = cmd_ln_boolean_r(config, "-round_filters");
161 
162  if (fe_warp_set(mel, mel->warp_type) != FE_SUCCESS) {
163  E_ERROR("Failed to initialize the warping function.\n");
164  return -1;
165  }
166  fe_warp_set_parameters(mel, mel->warp_params, mel->sampling_rate);
167  return 0;
168 }
169 
170 void
171 fe_print_current(fe_t const *fe)
172 {
173  E_INFO("Current FE Parameters:\n");
174  E_INFO("\tSampling Rate: %f\n", fe->sampling_rate);
175  E_INFO("\tFrame Size: %d\n", fe->frame_size);
176  E_INFO("\tFrame Shift: %d\n", fe->frame_shift);
177  E_INFO("\tFFT Size: %d\n", fe->fft_size);
178  E_INFO("\tLower Frequency: %g\n",
179  fe->mel_fb->lower_filt_freq);
180  E_INFO("\tUpper Frequency: %g\n",
181  fe->mel_fb->upper_filt_freq);
182  E_INFO("\tNumber of filters: %d\n", fe->mel_fb->num_filters);
183  E_INFO("\tNumber of Overflow Samps: %d\n", fe->num_overflow_samps);
184  E_INFO("Will %sremove DC offset at frame level\n",
185  fe->remove_dc ? "" : "not ");
186  if (fe->dither) {
187  E_INFO("Will add dither to audio\n");
188  E_INFO("Dither seeded with %d\n", fe->dither_seed);
189  }
190  else {
191  E_INFO("Will not add dither to audio\n");
192  }
193  if (fe->mel_fb->lifter_val) {
194  E_INFO("Will apply sine-curve liftering, period %d\n",
195  fe->mel_fb->lifter_val);
196  }
197  E_INFO("Will %snormalize filters to unit area\n",
198  fe->mel_fb->unit_area ? "" : "not ");
199  E_INFO("Will %sround filter frequencies to DFT points\n",
200  fe->mel_fb->round_filters ? "" : "not ");
201  E_INFO("Will %suse double bandwidth in mel filter\n",
202  fe->mel_fb->doublewide ? "" : "not ");
203 }
204 
205 fe_t *
206 fe_init_auto()
207 {
208  return fe_init_auto_r(cmd_ln_get());
209 }
210 
211 fe_t *
212 fe_init_auto_r(cmd_ln_t *config)
213 {
214  fe_t *fe;
215  int prespch_frame_len;
216 
217  fe = (fe_t*)ckd_calloc(1, sizeof(*fe));
218  fe->refcount = 1;
219 
220  /* transfer params to front end */
221  if (fe_parse_general_params(cmd_ln_retain(config), fe) < 0) {
222  fe_free(fe);
223  return NULL;
224  }
225 
226  /* compute remaining fe parameters */
227  /* We add 0.5 so approximate the float with the closest
228  * integer. E.g., 2.3 is truncate to 2, whereas 3.7 becomes 4
229  */
230  fe->frame_shift = (int32) (fe->sampling_rate / fe->frame_rate + 0.5);
231  fe->frame_size = (int32) (fe->window_length * fe->sampling_rate + 0.5);
232  fe->pre_emphasis_prior = 0;
233 
234  fe_start_stream(fe);
235 
236  assert (fe->frame_shift > 1);
237 
238  if (fe->frame_size < fe->frame_shift) {
239  E_ERROR
240  ("Frame size %d (-wlen) must be greater than frame shift %d (-frate)\n",
241  fe->frame_size, fe->frame_shift);
242  fe_free(fe);
243  return NULL;
244  }
245 
246 
247  if (fe->frame_size > (fe->fft_size)) {
248  E_ERROR
249  ("Number of FFT points has to be a power of 2 higher than %d, it is %d\n",
250  fe->frame_size, fe->fft_size);
251  fe_free(fe);
252  return NULL;
253  }
254 
255  if (fe->dither)
256  fe_init_dither(fe->dither_seed);
257 
258  /* establish buffers for overflow samps and hamming window */
259  fe->overflow_samps = ckd_calloc(fe->frame_size, sizeof(int16));
260  fe->hamming_window = ckd_calloc(fe->frame_size/2, sizeof(window_t));
261 
262  /* create hamming window */
263  fe_create_hamming(fe->hamming_window, fe->frame_size);
264 
265  /* init and fill appropriate filter structure */
266  fe->mel_fb = ckd_calloc(1, sizeof(*fe->mel_fb));
267 
268  /* transfer params to mel fb */
269  fe_parse_melfb_params(config, fe, fe->mel_fb);
270 
271  if (fe->mel_fb->upper_filt_freq > fe->sampling_rate / 2 + 1.0) {
272  E_ERROR("Upper frequency %.1f is higher than samprate/2 (%.1f)\n",
273  fe->mel_fb->upper_filt_freq, fe->sampling_rate / 2);
274  fe_free(fe);
275  return NULL;
276  }
277 
278  fe_build_melfilters(fe->mel_fb);
279 
280  fe_compute_melcosine(fe->mel_fb);
281  if (fe->remove_noise || fe->remove_silence)
282  fe->noise_stats = fe_init_noisestats(fe->mel_fb->num_filters);
283 
284  fe->vad_data = (vad_data_t*)ckd_calloc(1, sizeof(*fe->vad_data));
285  prespch_frame_len = fe->log_spec != RAW_LOG_SPEC ? fe->num_cepstra : fe->mel_fb->num_filters;
286  fe->vad_data->prespch_buf = fe_prespch_init(fe->pre_speech + 1, prespch_frame_len, fe->frame_shift);
287 
288  /* Create temporary FFT, spectrum and mel-spectrum buffers. */
289  /* FIXME: Gosh there are a lot of these. */
290  fe->spch = ckd_calloc(fe->frame_size, sizeof(*fe->spch));
291  fe->frame = ckd_calloc(fe->fft_size, sizeof(*fe->frame));
292  fe->spec = ckd_calloc(fe->fft_size, sizeof(*fe->spec));
293  fe->mfspec = ckd_calloc(fe->mel_fb->num_filters, sizeof(*fe->mfspec));
294 
295  /* create twiddle factors */
296  fe->ccc = ckd_calloc(fe->fft_size / 4, sizeof(*fe->ccc));
297  fe->sss = ckd_calloc(fe->fft_size / 4, sizeof(*fe->sss));
298  fe_create_twiddle(fe);
299 
300  if (cmd_ln_boolean_r(config, "-verbose")) {
301  fe_print_current(fe);
302  }
303 
304  /*** Initialize the overflow buffers ***/
305  fe_start_utt(fe);
306  return fe;
307 }
308 
309 arg_t const *
310 fe_get_args(void)
311 {
312  return fe_args;
313 }
314 
315 const cmd_ln_t *
316 fe_get_config(fe_t *fe)
317 {
318  return fe->config;
319 }
320 
321 void
322 fe_init_dither(int32 seed)
323 {
324  E_INFO("Using %d as the seed.\n", seed);
325  s3_rand_seed(seed);
326 }
327 
328 static void
329 fe_reset_vad_data(vad_data_t * vad_data)
330 {
331  vad_data->in_speech = 0;
332  vad_data->pre_speech_frames = 0;
333  vad_data->post_speech_frames = 0;
334  fe_prespch_reset_cep(vad_data->prespch_buf);
335 }
336 
337 int32
338 fe_start_utt(fe_t * fe)
339 {
340  fe->num_overflow_samps = 0;
341  memset(fe->overflow_samps, 0, fe->frame_size * sizeof(int16));
342  fe->pre_emphasis_prior = 0;
343  fe_reset_vad_data(fe->vad_data);
344  return 0;
345 }
346 
347 void
348 fe_start_stream(fe_t *fe)
349 {
350  fe->num_processed_samps = 0;
351  fe_reset_noisestats(fe->noise_stats);
352 }
353 
354 int
355 fe_get_output_size(fe_t *fe)
356 {
357  return (int)fe->feature_dimension;
358 }
359 
360 void
361 fe_get_input_size(fe_t *fe, int *out_frame_shift,
362  int *out_frame_size)
363 {
364  if (out_frame_shift)
365  *out_frame_shift = fe->frame_shift;
366  if (out_frame_size)
367  *out_frame_size = fe->frame_size;
368 }
369 
370 uint8
371 fe_get_vad_state(fe_t *fe)
372 {
373  return fe->vad_data->in_speech;
374 }
375 
376 int
377 fe_process_frames(fe_t *fe,
378  int16 const **inout_spch,
379  size_t *inout_nsamps,
380  mfcc_t **buf_cep,
381  int32 *inout_nframes,
382  int32 *out_frameidx)
383 {
384  return fe_process_frames_ext(fe, inout_spch, inout_nsamps, buf_cep, inout_nframes, NULL, NULL, out_frameidx);
385 }
386 
387 
391 static int
392 fe_copy_from_prespch(fe_t *fe, int32 *inout_nframes, mfcc_t **buf_cep, int outidx)
393 {
394  while ((*inout_nframes) > 0 && fe_prespch_read_cep(fe->vad_data->prespch_buf, buf_cep[outidx]) > 0) {
395  outidx++;
396  (*inout_nframes)--;
397  }
398  return outidx;
399 }
400 
404 static int
405 fe_check_prespeech(fe_t *fe, int32 *inout_nframes, mfcc_t **buf_cep, int outidx, int32 *out_frameidx, size_t *inout_nsamps, int orig_nsamps)
406 {
407  if (fe->vad_data->in_speech) {
408  if (fe_prespch_ncep(fe->vad_data->prespch_buf) > 0) {
409 
410  /* Previous frame triggered vad into speech state. Last frame is in the end of
411  prespeech buffer, so overwrite it */
412  outidx = fe_copy_from_prespch(fe, inout_nframes, buf_cep, outidx);
413 
414  /* Sets the start frame for the returned data so that caller can update timings */
415  if (out_frameidx) {
416  *out_frameidx = (fe->num_processed_samps + orig_nsamps - *inout_nsamps) / fe->frame_shift - fe->pre_speech;
417  }
418  } else {
419  outidx++;
420  (*inout_nframes)--;
421  }
422  }
423  /* Amount of data behind the original input which is still needed. */
424  if (fe->num_overflow_samps > 0)
425  fe->num_overflow_samps -= fe->frame_shift;
426 
427  return outidx;
428 }
429 
430 int
431 fe_process_frames_ext(fe_t *fe,
432  int16 const **inout_spch,
433  size_t *inout_nsamps,
434  mfcc_t **buf_cep,
435  int32 *inout_nframes,
436  int16 *voiced_spch,
437  int32 *voiced_spch_nsamps,
438  int32 *out_frameidx)
439 {
440  int outidx, n_overflow, orig_n_overflow;
441  int16 const *orig_spch;
442  size_t orig_nsamps;
443 
444  /* The logic here is pretty complex, please be careful with modifications */
445 
446  /* FIXME: Dump PCM data if needed */
447 
448  /* In the special case where there is no output buffer, return the
449  * maximum number of frames which would be generated. */
450  if (buf_cep == NULL) {
451  if (*inout_nsamps + fe->num_overflow_samps < (size_t)fe->frame_size)
452  *inout_nframes = 0;
453  else
454  *inout_nframes = 1
455  + ((*inout_nsamps + fe->num_overflow_samps - fe->frame_size)
456  / fe->frame_shift);
457  if (!fe->vad_data->in_speech)
458  *inout_nframes += fe_prespch_ncep(fe->vad_data->prespch_buf);
459  return *inout_nframes;
460  }
461 
462  if (out_frameidx)
463  *out_frameidx = 0;
464 
465  /* Are there not enough samples to make at least 1 frame? */
466  if (*inout_nsamps + fe->num_overflow_samps < (size_t)fe->frame_size) {
467  if (*inout_nsamps > 0) {
468  /* Append them to the overflow buffer. */
469  memcpy(fe->overflow_samps + fe->num_overflow_samps,
470  *inout_spch, *inout_nsamps * (sizeof(int16)));
471  fe->num_overflow_samps += *inout_nsamps;
472  fe->num_processed_samps += *inout_nsamps;
473  *inout_spch += *inout_nsamps;
474  *inout_nsamps = 0;
475  }
476  /* We produced no frames of output, sorry! */
477  *inout_nframes = 0;
478  return 0;
479  }
480 
481  /* Can't write a frame? Then do nothing! */
482  if (*inout_nframes < 1) {
483  *inout_nframes = 0;
484  return 0;
485  }
486 
487  /* Index of output frame. */
488  outidx = 0;
489 
490  /* Try to read from prespeech buffer */
491  if (fe->vad_data->in_speech && fe_prespch_ncep(fe->vad_data->prespch_buf) > 0) {
492  outidx = fe_copy_from_prespch(fe, inout_nframes, buf_cep, outidx);
493  if ((*inout_nframes) < 1) {
494  /* mfcc buffer is filled from prespeech buffer */
495  *inout_nframes = outidx;
496  return 0;
497  }
498  }
499 
500  /* Keep track of the original start of the buffer. */
501  orig_spch = *inout_spch;
502  orig_nsamps = *inout_nsamps;
503  orig_n_overflow = fe->num_overflow_samps;
504 
505  /* Start processing, taking care of any incoming overflow. */
506  if (fe->num_overflow_samps > 0) {
507  int offset = fe->frame_size - fe->num_overflow_samps;
508  /* Append start of spch to overflow samples to make a full frame. */
509  memcpy(fe->overflow_samps + fe->num_overflow_samps,
510  *inout_spch, offset * sizeof(**inout_spch));
511  fe_read_frame(fe, fe->overflow_samps, fe->frame_size);
512  /* Update input-output pointers and counters. */
513  *inout_spch += offset;
514  *inout_nsamps -= offset;
515  } else {
516  fe_read_frame(fe, *inout_spch, fe->frame_size);
517  /* Update input-output pointers and counters. */
518  *inout_spch += fe->frame_size;
519  *inout_nsamps -= fe->frame_size;
520  }
521 
522  fe_write_frame(fe, buf_cep[outidx], voiced_spch != NULL);
523  outidx = fe_check_prespeech(fe, inout_nframes, buf_cep, outidx, out_frameidx, inout_nsamps, orig_nsamps);
524 
525  /* Process all remaining frames. */
526  while (*inout_nframes > 0 && *inout_nsamps >= (size_t)fe->frame_shift) {
527  fe_shift_frame(fe, *inout_spch, fe->frame_shift);
528  fe_write_frame(fe, buf_cep[outidx], voiced_spch != NULL);
529 
530  outidx = fe_check_prespeech(fe, inout_nframes, buf_cep, outidx, out_frameidx, inout_nsamps, orig_nsamps);
531 
532  /* Update input-output pointers and counters. */
533  *inout_spch += fe->frame_shift;
534  *inout_nsamps -= fe->frame_shift;
535  }
536 
537  /* How many relevant overflow samples are there left? */
538  if (fe->num_overflow_samps <= 0) {
539  /* Maximum number of overflow samples past *inout_spch to save. */
540  n_overflow = *inout_nsamps;
541  if (n_overflow > fe->frame_shift)
542  n_overflow = fe->frame_shift;
543  fe->num_overflow_samps = fe->frame_size - fe->frame_shift;
544  /* Make sure this isn't an illegal read! */
545  if (fe->num_overflow_samps > *inout_spch - orig_spch)
546  fe->num_overflow_samps = *inout_spch - orig_spch;
547  fe->num_overflow_samps += n_overflow;
548  if (fe->num_overflow_samps > 0) {
549  memcpy(fe->overflow_samps,
550  *inout_spch - (fe->frame_size - fe->frame_shift),
551  fe->num_overflow_samps * sizeof(**inout_spch));
552  /* Update the input pointer to cover this stuff. */
553  *inout_spch += n_overflow;
554  *inout_nsamps -= n_overflow;
555  }
556  } else {
557  /* There is still some relevant data left in the overflow buffer. */
558  /* Shift existing data to the beginning. */
559  memmove(fe->overflow_samps,
560  fe->overflow_samps + orig_n_overflow - fe->num_overflow_samps,
561  fe->num_overflow_samps * sizeof(*fe->overflow_samps));
562  /* Copy in whatever we had in the original speech buffer. */
563  n_overflow = *inout_spch - orig_spch + *inout_nsamps;
564  if (n_overflow > fe->frame_size - fe->num_overflow_samps)
565  n_overflow = fe->frame_size - fe->num_overflow_samps;
566  memcpy(fe->overflow_samps + fe->num_overflow_samps,
567  orig_spch, n_overflow * sizeof(*orig_spch));
568  fe->num_overflow_samps += n_overflow;
569  /* Advance the input pointers. */
570  if (n_overflow > *inout_spch - orig_spch) {
571  n_overflow -= (*inout_spch - orig_spch);
572  *inout_spch += n_overflow;
573  *inout_nsamps -= n_overflow;
574  }
575  }
576 
577  /* Finally update the frame counter with the number of frames
578  * and global sample counter with number of samples we procesed */
579  *inout_nframes = outidx; /* FIXME: Not sure why I wrote it this way... */
580  fe->num_processed_samps += orig_nsamps - *inout_nsamps;
581 
582  return 0;
583 }
584 
585 int
586 fe_process_utt(fe_t * fe, int16 const * spch, size_t nsamps,
587  mfcc_t *** cep_block, int32 * nframes)
588 {
589  mfcc_t **cep;
590  int rv;
591 
592  /* Figure out how many frames we will need. */
593  fe_process_frames(fe, NULL, &nsamps, NULL, nframes, NULL);
594  /* Create the output buffer (it has to exist, even if there are no output frames). */
595  if (*nframes)
596  cep = (mfcc_t **)ckd_calloc_2d(*nframes, fe->feature_dimension, sizeof(**cep));
597  else
598  cep = (mfcc_t **)ckd_calloc_2d(1, fe->feature_dimension, sizeof(**cep));
599  /* Now just call fe_process_frames() with the allocated buffer. */
600  rv = fe_process_frames(fe, &spch, &nsamps, cep, nframes, NULL);
601  *cep_block = cep;
602 
603  return rv;
604 }
605 
606 
607 int32
608 fe_end_utt(fe_t * fe, mfcc_t * cepvector, int32 * nframes)
609 {
610  /* Process any remaining data, not very accurate for the VAD */
611  *nframes = 0;
612  if (fe->num_overflow_samps > 0) {
613  fe_read_frame(fe, fe->overflow_samps, fe->num_overflow_samps);
614  fe_write_frame(fe, cepvector, FALSE);
615  if (fe->vad_data->in_speech)
616  *nframes = 1;
617  }
618 
619  /* reset overflow buffers... */
620  fe->num_overflow_samps = 0;
621 
622  return 0;
623 }
624 
625 fe_t *
626 fe_retain(fe_t *fe)
627 {
628  ++fe->refcount;
629  return fe;
630 }
631 
632 int
633 fe_free(fe_t * fe)
634 {
635  if (fe == NULL)
636  return 0;
637  if (--fe->refcount > 0)
638  return fe->refcount;
639 
640  /* kill FE instance - free everything... */
641  if (fe->mel_fb) {
642  if (fe->mel_fb->mel_cosine)
643  fe_free_2d((void *) fe->mel_fb->mel_cosine);
644  ckd_free(fe->mel_fb->lifter);
645  ckd_free(fe->mel_fb->spec_start);
646  ckd_free(fe->mel_fb->filt_start);
647  ckd_free(fe->mel_fb->filt_width);
648  ckd_free(fe->mel_fb->filt_coeffs);
649  ckd_free(fe->mel_fb);
650  }
651  ckd_free(fe->spch);
652  ckd_free(fe->frame);
653  ckd_free(fe->ccc);
654  ckd_free(fe->sss);
655  ckd_free(fe->spec);
656  ckd_free(fe->mfspec);
657  ckd_free(fe->overflow_samps);
658  ckd_free(fe->hamming_window);
659 
660  if (fe->noise_stats)
661  fe_free_noisestats(fe->noise_stats);
662 
663  if (fe->vad_data) {
664  fe_prespch_free(fe->vad_data->prespch_buf);
665  ckd_free(fe->vad_data);
666  }
667 
668  cmd_ln_free_r(fe->config);
669  ckd_free(fe);
670 
671  return 0;
672 }
673 
677 int32
678 fe_mfcc_to_float(fe_t * fe,
679  mfcc_t ** input, float32 ** output, int32 nframes)
680 {
681  int32 i;
682 
683 #ifndef FIXED_POINT
684  if ((void *) input == (void *) output)
685  return nframes * fe->feature_dimension;
686 #endif
687  for (i = 0; i < nframes * fe->feature_dimension; ++i)
688  output[0][i] = MFCC2FLOAT(input[0][i]);
689 
690  return i;
691 }
692 
696 int32
697 fe_float_to_mfcc(fe_t * fe,
698  float32 ** input, mfcc_t ** output, int32 nframes)
699 {
700  int32 i;
701 
702 #ifndef FIXED_POINT
703  if ((void *) input == (void *) output)
704  return nframes * fe->feature_dimension;
705 #endif
706  for (i = 0; i < nframes * fe->feature_dimension; ++i)
707  output[0][i] = FLOAT2MFCC(input[0][i]);
708 
709  return i;
710 }
711 
712 int32
713 fe_logspec_to_mfcc(fe_t * fe, const mfcc_t * fr_spec, mfcc_t * fr_cep)
714 {
715 #ifdef FIXED_POINT
716  fe_spec2cep(fe, fr_spec, fr_cep);
717 #else /* ! FIXED_POINT */
718  powspec_t *powspec;
719  int32 i;
720 
721  powspec = ckd_malloc(fe->mel_fb->num_filters * sizeof(powspec_t));
722  for (i = 0; i < fe->mel_fb->num_filters; ++i)
723  powspec[i] = (powspec_t) fr_spec[i];
724  fe_spec2cep(fe, powspec, fr_cep);
725  ckd_free(powspec);
726 #endif /* ! FIXED_POINT */
727  return 0;
728 }
729 
730 int32
731 fe_logspec_dct2(fe_t * fe, const mfcc_t * fr_spec, mfcc_t * fr_cep)
732 {
733 #ifdef FIXED_POINT
734  fe_dct2(fe, fr_spec, fr_cep, 0);
735 #else /* ! FIXED_POINT */
736  powspec_t *powspec;
737  int32 i;
738 
739  powspec = ckd_malloc(fe->mel_fb->num_filters * sizeof(powspec_t));
740  for (i = 0; i < fe->mel_fb->num_filters; ++i)
741  powspec[i] = (powspec_t) fr_spec[i];
742  fe_dct2(fe, powspec, fr_cep, 0);
743  ckd_free(powspec);
744 #endif /* ! FIXED_POINT */
745  return 0;
746 }
747 
748 int32
749 fe_mfcc_dct3(fe_t * fe, const mfcc_t * fr_cep, mfcc_t * fr_spec)
750 {
751 #ifdef FIXED_POINT
752  fe_dct3(fe, fr_cep, fr_spec);
753 #else /* ! FIXED_POINT */
754  powspec_t *powspec;
755  int32 i;
756 
757  powspec = ckd_malloc(fe->mel_fb->num_filters * sizeof(powspec_t));
758  fe_dct3(fe, fr_cep, powspec);
759  for (i = 0; i < fe->mel_fb->num_filters; ++i)
760  fr_spec[i] = (mfcc_t) powspec[i];
761  ckd_free(powspec);
762 #endif /* ! FIXED_POINT */
763  return 0;
764 }
Command-line and other configurationparsing and handling.
SPHINXBASE_EXPORT cmd_ln_t * cmd_ln_retain(cmd_ln_t *cmdln)
Retain ownership of a command-line argument set.
Definition: cmd_ln.c:1039
#define E_INFO(...)
Print logging information to standard error stream.
Definition: err.h:114
#define ckd_calloc_2d(d1, d2, sz)
Macro for ckd_calloc_2d
Definition: ckd_alloc.h:270
#define ckd_calloc(n, sz)
Macros to simplify the use of above functions.
Definition: ckd_alloc.h:248
#define E_ERROR(...)
Print error message to error log.
Definition: err.h:104
Base Struct to hold all structure for MFCC computation.
Definition: fe_internal.h:75
Sphinx&#39;s memory allocation/deallocation routines.
SPHINXBASE_EXPORT int cmd_ln_free_r(cmd_ln_t *cmdln)
Release a command-line argument set and all associated strings.
Definition: cmd_ln.c:1046
Basic type definitions used in Sphinx.
SPHINXBASE_EXPORT char const * cmd_ln_str_r(cmd_ln_t *cmdln, char const *name)
Retrieve a string from a command-line object.
Definition: cmd_ln.c:949
#define s3_rand_seed(s)
Macros to simplify calling of random generator function.
Definition: genrand.h:144
SPHINXBASE_EXPORT void ckd_free(void *ptr)
Test and free a 1-D array.
Definition: ckd_alloc.c:244
SPHINXBASE_EXPORT cmd_ln_t * cmd_ln_get(void)
Retrieve the global cmd_ln_t object used by non-re-entrant functions.
Definition: cmd_ln.c:493
Implementation of logging routines.
Argument definition structure.
High performance prortable random generator created by Takuji Nishimura and Makoto Matsumoto...
Opaque structure used to hold the results of command-line parsing.
#define ckd_malloc(sz)
Macro for ckd_malloc
Definition: ckd_alloc.h:253
#define cmd_ln_boolean_r(c, n)
Retrieve a boolean value from a command-line object.
Definition: cmd_ln.h:334
Structure for the front-end computation.
Definition: fe_internal.h:117