48 #include <sphinxbase/byteorder.h>
53 static arg_t defn[] = {
57 "Single audio input file" },
62 "Single text output file (standard output will be used if not given)" },
67 "Control file for batch processing" },
72 "If a control file was specified, the number of utterances to skip at the head of the file" },
77 "If a control file was specified, the number of utterances to process (see -nskip too)" },
82 "Input directory, input file names are relative to this, if defined" },
87 "Input extension to be applied to all input files" },
92 "Output directory, output files are relative to this" },
97 "Output extension to be applied to all output files" },
102 "Defines input format as NIST sphere" },
107 "Defines input format as raw binary data" },
112 "Defines input format as Microsoft Wav (RIFF)" },
117 "Sampling rate of audio data (will be determined automatically if 0)" },
122 "Endianness of audio data (will be determined automatically if not given)" },
127 "Frame shift: number of seconds between each analysis frame." },
132 "Number of seconds in each analysis frame (needs to be greater than twice the longest period you wish to detect - to detect down to 80Hz you need a frame length of 2.0/80 = 0.025)." },
137 "Number of frames on either side of the current frame to use for smoothing." },
142 "Threshold of normalized difference under which to search for the fundamental period." },
147 "Fraction of the best local estimate to use as a search range for smoothing." },
149 { NULL, 0, NULL, NULL }
152 static int extract_pitch(
const char *in,
const char *out);
153 static int run_control_file(
const char *ctl);
156 main(
int argc,
char *argv[])
175 guess_file_type(
char const *file, FILE *infh)
179 fseek(infh, 0, SEEK_SET);
180 if (fread(header, 1, 4, infh) != 4) {
184 if (0 == memcmp(header,
"RIFF", 4)) {
185 E_INFO(
"%s appears to be a WAV file\n", file);
190 else if (0 == memcmp(header,
"NIST", 4)) {
191 E_INFO(
"%s appears to be a NIST SPHERE file\n", file);
197 E_INFO(
"%s appears to be raw data\n", file);
202 fseek(infh, 0, SEEK_SET);
206 #define TRY_FREAD(ptr, size, nmemb, stream) \
207 if (fread(ptr, size, nmemb, stream) != (nmemb)) { \
208 E_ERROR_SYSTEM("Failed to read %d bytes", size * nmemb); \
213 read_riff_header(FILE *infh)
216 int32 intval, header_len;
223 TRY_FREAD(
id, 1, 4, infh);
225 TRY_FREAD(&intval, 4, 1, infh);
227 TRY_FREAD(
id, 1, 4, infh);
228 if (0 != memcmp(
id,
"WAVE", 4)) {
229 E_ERROR(
"This is not a WAVE file\n");
233 TRY_FREAD(
id, 1, 4, infh);
234 if (0 != memcmp(
id,
"fmt ", 4)) {
235 E_ERROR(
"Format chunk missing\n");
239 TRY_FREAD(&intval, 4, 1, infh);
244 TRY_FREAD(&shortval, 2, 1, infh);
245 SWAP_LE_16(&shortval);
247 E_ERROR(
"WAVE file is not in PCM format\n");
252 TRY_FREAD(&shortval, 2, 1, infh);
253 SWAP_LE_16(&shortval);
255 E_ERROR(
"WAVE file is not single channel\n");
260 TRY_FREAD(&intval, 4, 1, infh);
265 E_WARN(
"WAVE file sampling rate %d != -samprate %d\n",
270 TRY_FREAD(&intval, 4, 1, infh);
273 TRY_FREAD(&shortval, 2, 1, infh);
276 TRY_FREAD(&shortval, 2, 1, infh);
277 SWAP_LE_16(&shortval);
278 if (shortval != 16) {
279 E_ERROR(
"WAVE file is not 16-bit\n");
285 fseek(infh, header_len - 16, SEEK_CUR);
289 TRY_FREAD(
id, 1, 4, infh);
290 if (0 == memcmp(
id,
"data", 4)) {
292 TRY_FREAD(&intval, 4, 1, infh);
298 TRY_FREAD(&intval, 4, 1, infh);
300 fseek(infh, intval, SEEK_CUR);
311 read_nist_header(FILE *infh)
316 TRY_FREAD(hdr, 1, 1024, infh);
321 if ((line = strstr(hdr,
"sample_rate")) == NULL) {
322 E_ERROR(
"No sampling rate in NIST header!\n");
325 c = strchr(line,
'\n');
327 c = strrchr(line,
' ');
329 E_ERROR(
"Could not find sampling rate!\n");
336 E_WARN(
"NIST file sampling rate %d != -samprate %d\n",
340 if (line + strlen(line) < hdr + 1023)
341 line[strlen(line)] =
' ';
342 if ((line = strstr(hdr,
"sample_byte_format")) == NULL) {
343 E_ERROR(
"No sample byte format in NIST header!\n");
346 c = strchr(line,
'\n');
348 c = strrchr(line,
' ');
350 E_ERROR(
"Could not find sample byte order!\n");
354 if (0 == memcmp(c,
"01", 2)) {
357 else if (0 == memcmp(c,
"10", 2)) {
361 E_ERROR(
"Unknown byte order %s\n", c);
372 extract_pitch(
const char *in,
const char *out)
374 FILE *infh = NULL, *outfh = NULL;
375 size_t flen, fshift, nsamps;
378 uint16 period, bestdiff;
382 if ((outfh = fopen(out,
"w")) == NULL) {
390 if ((infh = fopen(in,
"rb")) == NULL) {
400 if (guess_file_type(in, infh) < 0)
407 if (read_riff_header(infh) < 0)
411 if (read_nist_header(infh) < 0)
431 E_ERROR(
"Failed to initialize YIN\n");
436 if (fread(buf,
sizeof(*buf), flen, infh) != flen) {
441 while (!feof(infh)) {
444 if (
yin_read(yin, &period, &bestdiff)) {
445 fprintf(outfh,
"%.3f %.2f %.2f\n",
449 bestdiff > 32768 ? 0.0 : 1.0 - (
double)bestdiff / 32768,
451 period == 0 ? sps : (
double)sps / period);
455 memmove(buf, buf + fshift, (flen - fshift) *
sizeof(*buf));
456 if (fread(buf + flen - fshift,
sizeof(*buf), fshift, infh) != fshift) {
462 while (
yin_read(yin, &period, &bestdiff)) {
463 fprintf(outfh,
"%.3f %.2f %.2f\n",
467 bestdiff > 32768 ? 0.0 : 1.0 - (
double)bestdiff / 32768,
469 period == 0 ? sps : (
double)sps / period);
476 if (outfh && outfh != stdout)
484 if (infh) fclose(infh);
485 if (outfh && outfh != stdout)
491 run_control_file(
const char *ctl)
495 char *di, *dout, *ei, *eio;
497 int rv, guess_type, guess_sps, guess_endian;
510 guess_endian = (
cmd_ln_str(
"-input_endian") == NULL);
512 if ((ctlfh = fopen(ctl,
"r")) == NULL) {
533 while ((line =
fread_line(ctlfh, &len)) != NULL) {
534 char *infile, *outfile;
546 if (line[len-1] ==
'\n')
563 rv = extract_pitch(infile, outfile);
SPHINXBASE_EXPORT void yin_end(yin_t *pe)
Mark the end of an utterance.
#define E_ERROR_SYSTEM(...)
Print error text; Call perror("");.
Command-line and other configurationparsing and handling.
Miscellaneous useful string functions.
#define E_INFO(...)
Print logging information to standard error stream.
#define cmd_ln_set_int32(n, i)
Set a 32-bit integer value in the global command line.
#define ckd_calloc(n, sz)
Macros to simplify the use of above functions.
#define E_ERROR(...)
Print error message to error log.
Sphinx's memory allocation/deallocation routines.
#define cmd_ln_set_boolean(n, b)
Set a boolean value in the global command line.
#define ARG_STRING
String argument (optional).
#define ckd_salloc(ptr)
Macro for ckd_salloc
SPHINXBASE_EXPORT int yin_read(yin_t *pe, uint16 *out_period, uint16 *out_bestdiff)
Read a raw estimated pitch value from the pitch estimator.
SPHINXBASE_EXPORT void ckd_free(void *ptr)
Test and free a 1-D array.
SPHINXBASE_EXPORT void yin_free(yin_t *pe)
Free a moving-window pitch estimator.
SPHINXBASE_EXPORT void yin_start(yin_t *pe)
Start processing an utterance.
SPHINXBASE_EXPORT char * fread_line(FILE *stream, size_t *out_len)
Read a line of arbitrary length from a file and return it as a newly allocated string.
SPHINXBASE_EXPORT yin_t * yin_init(int frame_size, float search_threshold, float search_range, int smooth_window)
Initialize moving-window pitch estimation.
Implementation of logging routines.
#define ARG_BOOLEAN
Boolean (true/false) argument (optional).
Argument definition structure.
#define E_WARN(...)
Print warning message to error log.
Implementation of pitch estimation.
SPHINXBASE_EXPORT void yin_write(yin_t *pe, int16 const *frame)
Feed a frame of data to the pitch estimator.
SPHINXBASE_EXPORT char * string_join(const char *base,...)
Concatenate a NULL-terminated argument list of strings, returning a newly allocated string...
#define cmd_ln_boolean(name)
Retrieve a boolean from the global command line.
SPHINXBASE_EXPORT int32 cmd_ln_parse(const arg_t *defn, int32 argc, char *argv[], int32 strict)
Non-reentrant version of cmd_ln_parse().
SPHINXBASE_EXPORT void cmd_ln_free(void)
Free the global command line, if any exists.
#define cmd_ln_set_str(n, s)
Set a string in the global command line.
#define cmd_ln_str(name)
Retrieve a string from the global command line.
file IO related operations.
#define cmd_ln_int32(name)
Retrieve a 32-bit integer from the global command line.
#define cmd_ln_float32(name)
Retrieve a 32-bit float from the global command line.