2010-04-18 15 views
6

Domanda: Sto cercando di utilizzare il motore di sintesi vocale. Quindi, per me è stato utile lavorare su Linux (codice sotto). Ora volevo portare questo programma di base anche a Windows, ma è quasi impossibile ...Espeak SAPI/dll utilizzo su Windows?

Parte del problema è che Windows dll consente solo AUDIO_OUTPUT_SYNCHRONOUS, il che significa che richiede una richiamata, ma io posso ' t capire come riprodurre l'audio dal callback ... Prima è andato in crash, poi ho capito, ho bisogno di una funzione di callback, ora ho i dati nella funzione di callback, ma non so come suonarlo .. in quanto non è né un file wav né riproduce automaticamente come su Linux.

Il sito SourceForge è piuttosto inutile, perché si dice in sostanza utilizzare la versione SAPI, ma allora non c'è esempio su come utilizzare la DLL sapi ... espeak

Comunque, ecco il mio codice, chiunque può aiutare ?

#ifdef __cplusplus 
#include <cstdio> 
#include <cstdlib> 
#include <cstring> 
#else 
#include <stdio.h> 
#include <stdlib.h> 
#include <string.h> 
#endif 

#include <assert.h> 
#include <ctype.h> 

//#include "speak_lib.h" 
#include "espeak/speak_lib.h" 

// libespeak-dev: /usr/include/espeak/speak_lib.h 
// apt-get install libespeak-dev 
// apt-get install libportaudio-dev 

// g++ -o mine mine.cpp -lespeak 
// g++ -o mine mine.cpp -I/usr/include/espeak/ -lespeak 
// gcc -o mine mine.cpp -I/usr/include/espeak/ -lespeak 


char voicename[40]; 
int samplerate; 
int quiet = 0; 
static char genders[4] = {' ','M','F',' '}; 

//const char *data_path = "/usr/share/"; // /usr/share/espeak-data/ 
const char *data_path = NULL; // use default path for espeak-data 


int strrcmp(const char *s, const char *sub) 
{ 
int slen = strlen(s); 
int sublen = strlen(sub); 
return memcmp(s + slen - sublen, sub, sublen); 
} 


char * strrcpy(char *dest, const char *source) 
{ 
// Pre assertions 
assert(dest != NULL); 
assert(source != NULL); 
assert(dest != source); 

// tk: parentheses 
while((*dest++ = *source++)) 
    ; 
return(--dest); 
} 

const char* GetLanguageVoiceName(const char* pszShortSign) 
{ 
#define LANGUAGE_LENGTH 30 
static char szReturnValue[LANGUAGE_LENGTH] ; 
memset(szReturnValue, 0, LANGUAGE_LENGTH); 

for (int i = 0; pszShortSign[i] != '\0'; ++i) 
    szReturnValue[i] = (char) tolower(pszShortSign[i]); 

const espeak_VOICE **voices; 
espeak_VOICE voice_select; 
voices = espeak_ListVoices(NULL); 

const espeak_VOICE *v; 
for(int ix=0; (v = voices[ix]) != NULL; ix++) 
{ 
    if(!strrcmp(v->languages, szReturnValue)) 
    { 
     strcpy(szReturnValue, v->name); 
     return szReturnValue; 
    } 
} // End for 

strcpy(szReturnValue, "default"); 
return szReturnValue; 
} // End function getvoicename 


void ListVoices() 
{ 
const espeak_VOICE **voices; 
espeak_VOICE voice_select; 
voices = espeak_ListVoices(NULL); 

const espeak_VOICE *v; 
for(int ix=0; (v = voices[ix]) != NULL; ix++) 
{ 
    printf("Shortsign: %s\n", v->languages); 
    printf("age: %d\n", v->age); 
    printf("gender: %c\n", genders[v->gender]); 
    printf("name: %s\n", v->name); 
    printf("\n\n"); 
} // End for 
} // End function getvoicename 


int main() 
{ 
printf("Hello World!\n"); 
const char* szVersionInfo = espeak_Info(NULL); 

printf("Espeak version: %s\n", szVersionInfo); 
samplerate = espeak_Initialize(AUDIO_OUTPUT_PLAYBACK,0,data_path,0); 

strcpy(voicename, "default"); 
// espeak --voices 
strcpy(voicename, "german"); 
strcpy(voicename, GetLanguageVoiceName("DE")); 

if(espeak_SetVoiceByName(voicename) != EE_OK) 
{ 
    printf("Espeak setvoice error...\n"); 
} 

static char word[200] = "Hello World" ; 
strcpy(word, "TV-fäns aufgepasst, es ist 20 Uhr 15. Zeit für Rambo 3"); 
strcpy(word, "Unnamed Player wurde zum Opfer von GSG9"); 
int speed = 220; 
int volume = 500; // volume in range 0-100 0=silence 
int pitch = 50; // base pitch, range 0-100. 50=normal 

// espeak.cpp 625 
espeak_SetParameter(espeakRATE, speed, 0); 
espeak_SetParameter(espeakVOLUME,volume,0); 
espeak_SetParameter(espeakPITCH,pitch,0); 
// espeakRANGE: pitch range, range 0-100. 0-monotone, 50=normal 
// espeakPUNCTUATION: which punctuation characters to announce: 
    // value in espeak_PUNCT_TYPE (none, all, some), 
espeak_VOICE *voice_spec = espeak_GetCurrentVoice(); 
voice_spec->gender=2; // 0=none 1=male, 2=female, 
//voice_spec->age = age; 

espeak_SetVoiceByProperties(voice_spec); 


espeak_Synth((char*) word, strlen(word)+1, 0, POS_CHARACTER, 0, espeakCHARS_AUTO, NULL, NULL); 
espeak_Synchronize(); 

strcpy(voicename, GetLanguageVoiceName("EN")); 
espeak_SetVoiceByName(voicename); 
strcpy(word, "Geany was fragged by GSG9 Googlebot"); 
strcpy(word, "Googlebot"); 

espeak_Synth((char*) word, strlen(word)+1, 0, POS_CHARACTER, 0, espeakCHARS_AUTO, NULL, NULL); 
espeak_Synchronize(); 


espeak_Terminate(); 
printf("Espeak terminated\n"); 
return EXIT_SUCCESS; 
} 

/* 
if(espeak_SetVoiceByName(voicename) != EE_OK) 
{ 
    memset(&voice_select,0,sizeof(voice_select)); 
    voice_select.languages = voicename; 
    if(espeak_SetVoiceByProperties(&voice_select) != EE_OK) 
    { 
     fprintf(stderr,"%svoice '%s'\n",err_load,voicename); 
     exit(2); 
    } 
} 
*/ 

Il codice di cui sopra è per Linux. Il codice che segue è circa per quanto ho ottenuto su Vista x64 (32 bit emu):

#ifdef __cplusplus 
#include <cstdio> 
#include <cstdlib> 
#include <cstring> 
#else 
#include <stdio.h> 
#include <stdlib.h> 
#include <string.h> 
#endif 

#include <assert.h> 
#include <ctype.h> 

#include "speak_lib.h" 
//#include "espeak/speak_lib.h" 

// libespeak-dev: /usr/include/espeak/speak_lib.h 
// apt-get install libespeak-dev 
// apt-get install libportaudio-dev 

// g++ -o mine mine.cpp -lespeak 
// g++ -o mine mine.cpp -I/usr/include/espeak/ -lespeak 
// gcc -o mine mine.cpp -I/usr/include/espeak/ -lespeak 


char voicename[40]; 
int iSampleRate; 
int quiet = 0; 
static char genders[4] = {' ','M','F',' '}; 

//const char *data_path = "/usr/share/"; // /usr/share/espeak-data/ 
//const char *data_path = NULL; // use default path for espeak-data 
const char *data_path = "C:\\Users\\Username\\Desktop\\espeak-1.43-source\\espeak-1.43-source\\"; 


int strrcmp(const char *s, const char *sub) 
{ 
int slen = strlen(s); 
int sublen = strlen(sub); 
return memcmp(s + slen - sublen, sub, sublen); 
} 


char * strrcpy(char *dest, const char *source) 
{ 
// Pre assertions 
assert(dest != NULL); 
assert(source != NULL); 
assert(dest != source); 

// tk: parentheses 
while((*dest++ = *source++)) 
    ; 
return(--dest); 
} 

const char* GetLanguageVoiceName(const char* pszShortSign) 
{ 
#define LANGUAGE_LENGTH 30 
static char szReturnValue[LANGUAGE_LENGTH] ; 
memset(szReturnValue, 0, LANGUAGE_LENGTH); 

for (int i = 0; pszShortSign[i] != '\0'; ++i) 
    szReturnValue[i] = (char) tolower(pszShortSign[i]); 

const espeak_VOICE **voices; 
espeak_VOICE voice_select; 
voices = espeak_ListVoices(NULL); 

const espeak_VOICE *v; 
for(int ix=0; (v = voices[ix]) != NULL; ix++) 
{ 
    if(!strrcmp(v->languages, szReturnValue)) 
    { 
     strcpy(szReturnValue, v->name); 
     return szReturnValue; 
    } 
} // End for 

strcpy(szReturnValue, "default"); 
return szReturnValue; 
} // End function getvoicename 


void ListVoices() 
{ 
const espeak_VOICE **voices; 
espeak_VOICE voice_select; 
voices = espeak_ListVoices(NULL); 

const espeak_VOICE *v; 
for(int ix=0; (v = voices[ix]) != NULL; ix++) 
{ 
    printf("Shortsign: %s\n", v->languages); 
    printf("age: %d\n", v->age); 
    printf("gender: %c\n", genders[v->gender]); 
    printf("name: %s\n", v->name); 
    printf("\n\n"); 
} // End for 
} // End function getvoicename 


/* Callback from espeak. Directly speaks using AudioTrack. */ 
#define LOGI(x) printf("%s\n", x) 
static int AndroidEspeakDirectSpeechCallback(short *wav, int numsamples, espeak_EVENT *events) 
{ 
    char buf[100]; 
    sprintf(buf, "AndroidEspeakDirectSpeechCallback: %d samples", numsamples); 
    LOGI(buf); 

    if (wav == NULL) 
{ 
     LOGI("Null: speech has completed"); 
    } 

    if (numsamples > 0) 
{ 
     //audout->write(wav, sizeof(short) * numsamples); 
     sprintf(buf, "AudioTrack wrote: %d bytes", sizeof(short) * numsamples); 
     LOGI(buf); 
    } 

    return 0; // continue synthesis (1 is to abort) 
} 


static int AndroidEspeakSynthToFileCallback(short *wav, int numsamples,espeak_EVENT *events) 
{ 
    char buf[100]; 
    sprintf(buf, "AndroidEspeakSynthToFileCallback: %d samples", numsamples); 
    LOGI(buf); 

    if (wav == NULL) 
{ 
     LOGI("Null: speech has completed"); 
    } 

    // The user data should contain the file pointer of the file to write to 
    //void* user_data = events->user_data; 
FILE* user_data = fopen ("myfile1.wav" , "ab"); 

    FILE* fp = static_cast<FILE *>(user_data); 

    // Write all of the samples 
    fwrite(wav, sizeof(short), numsamples, fp); 
    return 0; // continue synthesis (1 is to abort) 
} 



int main() 
{ 
printf("Hello World!\n"); 
const char* szVersionInfo = espeak_Info(NULL); 

printf("Espeak version: %s\n", szVersionInfo); 

iSampleRate = espeak_Initialize(AUDIO_OUTPUT_SYNCHRONOUS, 4096, data_path, 0); 
if (iSampleRate <= 0) 
{ 
    printf("Unable to initialize espeak"); 
    return EXIT_FAILURE; 
} 

//samplerate = espeak_Initialize(AUDIO_OUTPUT_PLAYBACK,0,data_path,0); 

//ListVoices(); 

strcpy(voicename, "default"); 
// espeak --voices 
//strcpy(voicename, "german"); 
//strcpy(voicename, GetLanguageVoiceName("DE")); 

if(espeak_SetVoiceByName(voicename) != EE_OK) 
{ 
    printf("Espeak setvoice error...\n"); 
} 

static char word[200] = "Hello World" ; 
strcpy(word, "TV-fäns aufgepasst, es ist 20 Uhr 15. Zeit für Rambo 3"); 
strcpy(word, "Unnamed Player wurde zum Opfer von GSG9"); 
int speed = 220; 
int volume = 500; // volume in range 0-100 0=silence 
int pitch = 50; // base pitch, range 0-100. 50=normal 


// espeak.cpp 625 
espeak_SetParameter(espeakRATE, speed, 0); 
espeak_SetParameter(espeakVOLUME,volume,0); 
espeak_SetParameter(espeakPITCH,pitch,0); 
// espeakRANGE: pitch range, range 0-100. 0-monotone, 50=normal 
// espeakPUNCTUATION: which punctuation characters to announce: 
    // value in espeak_PUNCT_TYPE (none, all, some), 
//espeak_VOICE *voice_spec = espeak_GetCurrentVoice(); 
//voice_spec->gender=2; // 0=none 1=male, 2=female, 
//voice_spec->age = age; 

//espeak_SetVoiceByProperties(voice_spec); 

//espeak_SetSynthCallback(AndroidEspeakDirectSpeechCallback); 
espeak_SetSynthCallback(AndroidEspeakSynthToFileCallback); 

unsigned int unique_identifier; 
espeak_ERROR err = espeak_Synth((char*) word, strlen(word)+1, 0, POS_CHARACTER, 0, espeakCHARS_AUTO, &unique_identifier, NULL); 

err = espeak_Synchronize(); 



/* 
strcpy(voicename, GetLanguageVoiceName("EN")); 
espeak_SetVoiceByName(voicename); 
strcpy(word, "Geany was fragged by GSG9 Googlebot"); 
strcpy(word, "Googlebot"); 

espeak_Synth((char*) word, strlen(word)+1, 0, POS_CHARACTER, 0, espeakCHARS_AUTO, NULL, NULL); 
espeak_Synchronize(); 
*/ 

// espeak_Cancel(); 
espeak_Terminate(); 
printf("Espeak terminated\n"); 
system("pause"); 
return EXIT_SUCCESS; 
} 
+1

+1 per pubblicare un codice di esempio su come utilizzare espeak come libreria. Ho avuto difficoltà a trovare esempi. Grazie. – Noremac

risposta

1

Hai provato passando il buffer di si ottiene nel vostro callback per sndplaysnd() ??

Declare Function sndPlaySound Lib "winmm.dll" Alias "sndPlaySoundA" (ByVal lpszSoundName As String, ByVal uFlags As Long) As Long 

sua WINAPI di serie è la seguente:

sndPlaySound(buffer[0], SND_ASYNC | SND_MEMORY) 

In alternativa, se si dispone di un file wav che ha l'audio da riprodurre:

sndPlaySound(filename, SND_ASYNC) 

PlaySound ha un ASYNC modalità che non bloccherebbe l'esecuzione del programma mentre viene riprodotto l'audio.

NOTA: L'ho usato in VB e i frammenti di cui sopra sono per l'uso in VB. Se stai codificando in VC++, potresti doverli modificare di conseguenza. Ma l'intenzione di base rimane la stessa; per passare il buffer a sndPlaySound con il flag impostato ASYNC.

Buona FORTUNA !!

+0

sndPlaySound (wav, SND_ASYNC | SND_MEMORY); Grazie, richiede il collegamento a winmm.lib in C++, ma ho provato e non funziona (nessun crash, ma nessun suono, ho anche provato SND _) ... sndPlaySound (wav [0], SND_ASYNC | SND_MEMORY); arresti anomali –

+0

Poiché U sta usando questo, FILE * user_data = fopen ("myfile1.wav", "ab"); l'output si trova in myfile1.wav.Ho provato a giocare usando sndPlaySound (filename, SND_ASYNC) ??. Prova a riprodurre myfile1.wav nel tuo media-payer e controlla se contiene dati vocali. Sono curioso ... – TheCodeArtist

+0

Non è un file wav, sono solo i dati ... non gioca nemmeno nel mediaplayer –

3

Sono necessarie diverse modifiche nel codice sorgente per rendere la libreria di Windows la stessa funzionalità di quella su Linux. Ho elencato le modifiche here. È anche disponibile il binario pronto all'uso.

Tutte le patch e la descrizione sono state inviate anche ad espeak maintainer (pubblicamente, tramite mailing list e patch tracker), quindi forse in futuro sarà disponibile direttamente.

Problemi correlati