Speech recognition with live audio input and endpointing.
Speech recognition with live audio input and endpointing.This file shows how to use PocketSphinx in conjunction with sox to detect and recognize speech from the default audio input device.
This file shows how to use PocketSphinx to recognize a single input file. To compile it, assuming you have built the library as in these directions, you can run:
#include <signal.h>
static int global_done = 0;
static void
catch_sig(int signum)
{
(void)signum;
global_done = 1;
}
#ifdef WIN32
#define popen _popen
#define pclose _pclose
#endif
static FILE *
popen_sox(int sample_rate)
{
char *soxcmd;
int len;
FILE *sox;
#define SOXCMD "sox -q -r %d -c 1 -b 16 -e signed-integer -d -t raw -"
len = snprintf(NULL, 0, SOXCMD, sample_rate);
if ((soxcmd = malloc(len + 1)) == NULL)
if (snprintf(soxcmd, len + 1, SOXCMD, sample_rate) != len)
if ((sox = popen(soxcmd, "r")) == NULL)
free(soxcmd);
return sox;
}
int
main(int argc, char *argv[])
{
FILE *sox;
short *frame;
size_t frame_size;
(void)argc; (void)argv;
config = ps_config_init(NULL);
ps_default_search_args(config);
if ((decoder = ps_init(config)) == NULL)
E_FATAL(
"PocketSphinx decoder init failed\n");
if ((ep = ps_endpointer_init(0, 0.0, 0, 0, 0)) == NULL)
E_FATAL(
"PocketSphinx endpointer init failed\n");
if ((frame = malloc(frame_size * sizeof(frame[0]))) == NULL)
if (signal(SIGINT, catch_sig) == SIG_ERR)
while (!global_done) {
const int16 *speech;
int prev_in_speech = ps_endpointer_in_speech(ep);
size_t len, end_samples;
if ((len = fread(frame, sizeof(frame[0]),
frame_size, sox)) != frame_size) {
if (len > 0) {
speech = ps_endpointer_end_stream(ep, frame,
frame_size,
&end_samples);
}
else
break;
} else {
speech = ps_endpointer_process(ep, frame);
}
if (speech != NULL) {
const char *hyp;
if (!prev_in_speech) {
fprintf(stderr, "Speech start at %.2f\n",
ps_endpointer_speech_start(ep));
ps_start_utt(decoder);
}
if (ps_process_raw(decoder, speech, frame_size, FALSE, FALSE) < 0)
E_FATAL(
"ps_process_raw() failed\n");
if ((hyp = ps_get_hyp(decoder, NULL)) != NULL)
fprintf(stderr, "PARTIAL RESULT: %s\n", hyp);
if (!ps_endpointer_in_speech(ep)) {
fprintf(stderr, "Speech end at %.2f\n",
ps_endpointer_speech_end(ep));
ps_end_utt(decoder);
if ((hyp = ps_get_hyp(decoder, NULL)) != NULL)
printf("%s\n", hyp);
}
}
}
free(frame);
if (pclose(sox) < 0)
ps_endpointer_free(ep);
ps_free(decoder);
ps_config_free(config);
return 0;
}
#define ps_endpointer_sample_rate(ep)
Definition endpointer.h:163
#define ps_endpointer_frame_size(ep)
Definition endpointer.h:153
#define E_FATAL(...)
Definition err.h:80
#define E_FATAL_SYSTEM(...)
Definition err.h:89
#define E_ERROR_SYSTEM(...)
Definition err.h:98
Speech recognizer object.
Simple voice activity detection based endpointing.