ailia_speech  1.5.2.0
ailia_speech.h
Go to the documentation of this file.
1 
9 #ifndef INCLUDED_AILIA_SPEECH
10 #define INCLUDED_AILIA_SPEECH
11 
12 /* エラーコードと構造体 */
13 
14 #include "ailia.h"
15 #include "ailia_tokenizer.h"
16 
17 /* 呼び出し規約 */
18 
19 #if defined(_WIN64) || defined(_M_X64) || defined(__amd64__) || defined(__x86_64__) || defined(__APPLE__) || \
20  defined(__ANDROID__) || defined(ANDROID) || defined(__linux__) || defined(NN_NINTENDO_SDK)
21 #define AILIA_API
22 #else
23 #define AILIA_API __stdcall
24 #endif
25 
26 #ifdef __cplusplus
27 extern "C" {
28 #endif
29 
30 /****************************************************************
31  * モデルタイプ定義
32  **/
33 
43 #define AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_TINY (0)
44 
54 #define AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_BASE (1)
55 
65 #define AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_SMALL (2)
66 
76 #define AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_MEDIUM (3)
77 
87 #define AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_LARGE (4)
88 
98 #define AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_LARGE_V3 (5)
99 
109 #define AILIA_SPEECH_MODEL_TYPE_SENSEVOICE_SMALL (10)
110 
111 /****************************************************************
112  * タスク定義
113  **/
114 
124 #define AILIA_SPEECH_TASK_TRANSCRIBE (0)
125 
135 #define AILIA_SPEECH_TASK_TRANSLATE (1)
136 
137 /****************************************************************
138  * 制約定義
139  **/
140 
150 #define AILIA_SPEECH_CONSTRAINT_CHARACTERS (0)
151 
161 #define AILIA_SPEECH_CONSTRAINT_WORDS (1)
162 
163 /****************************************************************
164  * フラグ定義
165  **/
166 
176 #define AILIA_SPEECH_FLAG_NONE (0)
177 
187 #define AILIA_SPEECH_FLAG_LIVE (1)
188 
189 /****************************************************************
190  * VAD定義
191  **/
192 
202 #define AILIA_SPEECH_VAD_TYPE_SILERO (0)
203 
204 /****************************************************************
205  * DIARIZATION定義
206  **/
207 
217 #define AILIA_SPEECH_DIARIZATION_TYPE_PYANNOTE_AUDIO (0)
218 
219 /****************************************************************
220  * 辞書定義
221  **/
222 
232 #define AILIA_SPEECH_DICTIONARY_TYPE_REPLACE (0)
233 
234 /****************************************************************
235  * 後処理定義
236  **/
237 
247 #define AILIA_SPEECH_POST_PROCESS_TYPE_T5 (0)
248 
262 #define AILIA_SPEECH_POST_PROCESS_TYPE_FUGUMT_EN_JA (1)
263 
277 #define AILIA_SPEECH_POST_PROCESS_TYPE_FUGUMT_JA_EN (2)
278 
279 /****************************************************************
280  * APIコールバック定義
281  **/
282 
283 // 接続が必要なAPI
284 
285 #if defined(_WIN32) && !defined(_WIN64)
286 #define AILIA_SPEECH_USER_API __stdcall
287 #else
288 #define AILIA_SPEECH_USER_API
289 #endif
290 
291 // ailia.audio API
293 typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_AUDIO_GET_MEL_SPECTROGRAM)(void*, const void*, int, int, int, int, int, int, int, int, float, int, float, float, int, int, int);
294 typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_AUDIO_RESAMPLE)(void*, const void*, int, int, int, int);
296 
297 // ailia.tokenizer API
298 typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_TOKENIZER_CREATE)(struct AILIATokenizer**, int, int);
299 typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_TOKENIZER_OPEN_MODEL_FILE_A)(struct AILIATokenizer*, const char*);
300 typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_TOKENIZER_OPEN_MODEL_FILE_W)(struct AILIATokenizer*, const wchar_t*);
301 typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_TOKENIZER_ENCODE)(struct AILIATokenizer*, const char*);
302 typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_TOKENIZER_GET_TOKEN_COUNT)(struct AILIATokenizer*, unsigned int*);
303 typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_TOKENIZER_GET_TOKENS)(struct AILIATokenizer*, int*, unsigned int);
304 typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_TOKENIZER_DECODE)(struct AILIATokenizer*, const int*, unsigned int);
305 typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_TOKENIZER_GET_TEXT_LENGTH)(struct AILIATokenizer*, unsigned int*);
306 typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_TOKENIZER_GET_TEXT)(struct AILIATokenizer*, char*, unsigned int);
308 typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_TOKENIZER_UTF8_TO_UTF32)(unsigned int*, unsigned int*, const char*, unsigned int);
309 typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_TOKENIZER_UTF32_TO_UTF8)(char*, unsigned int*, unsigned int);
310 
311 // ailia API
312 typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_CREATE)(struct AILIANetwork**, int, int);
313 typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_OPEN_WEIGHT_FILE_A)(struct AILIANetwork*, const char*);
314 typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_OPEN_WEIGHT_FILE_W)(struct AILIANetwork*, const wchar_t*);
315 typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_OPEN_WEIGHT_MEM)(struct AILIANetwork*, const void*, unsigned int);
316 typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_SET_MEMORY_MODE)(struct AILIANetwork*, unsigned int);
317 typedef void(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_DESTROY)(struct AILIANetwork*);
318 typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_UPDATE)(struct AILIANetwork*);
319 typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_GET_BLOB_INDEX_BY_INPUT_INDEX)(struct AILIANetwork*, unsigned int*, unsigned int);
320 typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_GET_BLOB_INDEX_BY_OUTPUT_INDEX)(struct AILIANetwork*, unsigned int*, unsigned int);
321 typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_GET_BLOB_DATA)(struct AILIANetwork*, void*, unsigned int, unsigned int);
322 typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_SET_INPUT_BLOB_DATA)(struct AILIANetwork*, const void*, unsigned int, unsigned int);
323 typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_SET_INPUT_BLOB_SHAPE)(struct AILIANetwork*, const AILIAShape*, unsigned int, unsigned int);
324 typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_GET_BLOB_SHAPE)(struct AILIANetwork*, AILIAShape*, unsigned int, unsigned int);
325 typedef const char*(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_GET_ERROR_DETAIL)(struct AILIANetwork*);
326 typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_COPY_BLOB_DATA)(struct AILIANetwork* dst_net, unsigned int dst_blob_idx, struct AILIANetwork* src_net, unsigned int src_blob_idx);
327 typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_GET_ENVIRONMENT)(AILIAEnvironment** env, unsigned int env_idx, unsigned int version);
328 
338 #define AILIA_SPEECH_API_CALLBACK_VERSION (6)
339 
340 /* APIコールバック関数構造体 */
341 typedef struct _AILIASpeechApiCallback {
375 
376 /****************************************************************
377  * 中間情報取得コールバック
378  **/
379 
395 typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_INTERMEDIATE_CALLBACK)(void* handle, const char* text);
396 
397 /****************************************************************
398  * ネットワークオブジェクトのインスタンス
399  **/
400 
401 struct AILIASpeech;
402 
412 #define AILIA_SPEECH_TEXT_VERSION (2)
413 
423 #define AILIA_SPEECH_SPEAKER_ID_UNKNOWN 0xFFFFFFFF
424 
425 typedef struct _AILIASpeechText {
426  const char* text;
429  unsigned int speaker_id;
430  const char* language;
431  float confidence;
433 
434 /****************************************************************
435  * Speech2Text API
436  **/
437 
469 int AILIA_API ailiaSpeechCreate(struct AILIASpeech** net, int env_id, int num_thread, int memory_mode, int task, int flags, AILIASpeechApiCallback callback, int version);
470 
490 int AILIA_API ailiaSpeechOpenModelFileA(struct AILIASpeech* net, const char* encoder_path, const char* decoder_path, int model_type);
491 
515 int AILIA_API ailiaSpeechOpenModelFileW(struct AILIASpeech* net, const wchar_t* encoder_path, const wchar_t* decoder_path, int model_type);
516 
538 int AILIA_API ailiaSpeechOpenVadFileA(struct AILIASpeech* net, const char* vad_path, int vad_type);
539 
561 int AILIA_API ailiaSpeechOpenVadFileW(struct AILIASpeech* net, const wchar_t* vad_path, int vad_type);
562 
580 int AILIA_API ailiaSpeechOpenDictionaryFileA(struct AILIASpeech* net, const char* dictionary_path, int dictionary_type);
581 
599 int AILIA_API ailiaSpeechOpenDictionaryFileW(struct AILIASpeech* net, const wchar_t* dictionary_path, int dictionary_type);
600 
626 int AILIA_API ailiaSpeechOpenPostProcessFileA(struct AILIASpeech* net, const char* encoder_path, const char* decoder_path, const char* source_path, const char* target_path, const char* prefix, int post_process_type);
627 
653 int AILIA_API ailiaSpeechOpenPostProcessFileW(struct AILIASpeech* net, const wchar_t* encoder_path, const wchar_t* decoder_path, const wchar_t* source_path, const wchar_t* target_path, const char* prefix, int post_process_type);
654 
674 int AILIA_API ailiaSpeechOpenDiarizationFileA(struct AILIASpeech* net, const char* segmentation_path, const char* embedding_path, int type);
675 
695 int AILIA_API ailiaSpeechOpenDiarizationFileW(struct AILIASpeech* net, const wchar_t* segmentation_path, const wchar_t* embedding_path, int type);
696 
718 int AILIA_API
719 ailiaSpeechPushInputData(struct AILIASpeech* net, const float* src, unsigned int channels, unsigned int samples, unsigned int sampling_rate);
720 
740 int AILIA_API
741 ailiaSpeechFinalizeInputData(struct AILIASpeech* net);
742 
758 int AILIA_API ailiaSpeechBuffered(struct AILIASpeech* net, unsigned int* buffered);
759 
775 int AILIA_API
776 ailiaSpeechComplete(struct AILIASpeech* net, unsigned int* complete);
777 
793 int AILIA_API
794 ailiaSpeechSetPrompt(struct AILIASpeech* net, const char* prompt);
795 
813 int AILIA_API
814 ailiaSpeechSetConstraint(struct AILIASpeech* net, const char* constraint, int type);
815 
835 int AILIA_API
836 ailiaSpeechSetLanguage(struct AILIASpeech* net, const char* language);
837 
862 int AILIA_API
863 ailiaSpeechSetSilentThreshold(struct AILIASpeech* net, float silent_threshold, float speech_sec, float no_speech_sec);
864 
888 int AILIA_API
890 
908 int AILIA_API
909 ailiaSpeechTranscribe(struct AILIASpeech* net);
910 
928 int AILIA_API
929 ailiaSpeechPostProcess(struct AILIASpeech* net);
930 
946 int AILIA_API
947 ailiaSpeechGetTextCount(struct AILIASpeech* net, unsigned int* count);
948 
974 int AILIA_API
975 ailiaSpeechGetText(struct AILIASpeech* net, AILIASpeechText* text, unsigned int version, unsigned int idx);
976 
1004 int AILIA_API
1005 ailiaSpeechSetText(struct AILIASpeech* net, const AILIASpeechText* text, unsigned int version, unsigned int idx);
1006 
1016 void AILIA_API ailiaSpeechDestroy(struct AILIASpeech* net);
1017 
1039 int AILIA_API ailiaSpeechResetTranscribeState(struct AILIASpeech* net);
1040 
1062 const char* AILIA_API ailiaSpeechGetErrorDetail(struct AILIASpeech* net);
1063 
1064 #ifdef UNICODE
1065 #define ailiaSpeechOpenModelFile ailiaSpeechOpenModelFileW
1066 #define ailiaSpeechOpenVadFile ailiaSpeechOpenVadFileW
1067 #define ailiaSpeechOpenDiarizationFile ailiaSpeechOpenDiarizationFileW
1068 #define ailiaSpeechOpenDictionaryFile ailiaSpeechOpenDictionaryFileW
1069 #define ailiaSpeechOpenPostProcessFile ailiaSpeechOpenPostProcessFileW
1070 #else
1071 #define ailiaSpeechOpenModelFile ailiaSpeechOpenModelFileA
1072 #define ailiaSpeechOpenVadFile ailiaSpeechOpenVadFileA
1073 #define ailiaSpeechOpenDiarizationFile ailiaSpeechOpenDiarizationFileA
1074 #define ailiaSpeechOpenDictionaryFile ailiaSpeechOpenDictionaryFileA
1075 #define ailiaSpeechOpenPostProcessFile ailiaSpeechOpenPostProcessFileA
1076 #endif
1077 
1078 #ifdef __cplusplus
1079 }
1080 #endif
1081 #endif /* !defined(INCLUDED_AILIA_SPEECH) */
int AILIA_API ailiaSpeechOpenVadFileA(struct AILIASpeech *net, const char *vad_path, int vad_type)
Set vad model for voice activity detection.
void AILIA_API ailiaSpeechDestroy(struct AILIASpeech *net)
It destroys the network instance.
int(AILIA_SPEECH_USER_API * AILIA_SPEECH_USER_API_AILIA_SET_INPUT_BLOB_DATA)(struct AILIANetwork *, const void *, unsigned int, unsigned int)
Definition: ailia_speech.h:322
int AILIA_API ailiaSpeechBuffered(struct AILIASpeech *net, unsigned int *buffered)
Determines if there is enough data to perform speech recognition.
int(AILIA_SPEECH_USER_API * AILIA_SPEECH_USER_API_AILIA_AUDIO_RESAMPLE)(void *, const void *, int, int, int, int)
Definition: ailia_speech.h:294
int(AILIA_SPEECH_USER_API * AILIA_SPEECH_USER_API_AILIA_TOKENIZER_OPEN_MODEL_FILE_A)(struct AILIATokenizer *, const char *)
Definition: ailia_speech.h:299
int AILIA_API ailiaSpeechOpenVadFileW(struct AILIASpeech *net, const wchar_t *vad_path, int vad_type)
Set vad model for voice activity detection.
#define AILIA_API
Definition: ailia_speech.h:23
int AILIA_API ailiaSpeechSetText(struct AILIASpeech *net, const AILIASpeechText *text, unsigned int version, unsigned int idx)
Set postprocess text.
int(AILIA_SPEECH_USER_API * AILIA_SPEECH_USER_API_AILIA_TOKENIZER_GET_TOKEN_COUNT)(struct AILIATokenizer *, unsigned int *)
Definition: ailia_speech.h:302
int(AILIA_SPEECH_USER_API * AILIA_SPEECH_USER_API_AILIA_GET_BLOB_SHAPE)(struct AILIANetwork *, AILIAShape *, unsigned int, unsigned int)
Definition: ailia_speech.h:324
int(AILIA_SPEECH_USER_API * AILIA_SPEECH_USER_API_AILIA_TOKENIZER_ENCODE)(struct AILIATokenizer *, const char *)
Definition: ailia_speech.h:301
struct _AILIASpeechApiCallback AILIASpeechApiCallback
void(AILIA_SPEECH_USER_API * AILIA_SPEECH_USER_API_AILIA_TOKENIZER_DESTROY)(struct AILIATokenizer *)
Definition: ailia_speech.h:307
int(AILIA_SPEECH_USER_API * AILIA_SPEECH_USER_API_AILIA_CREATE)(struct AILIANetwork **, int, int)
Definition: ailia_speech.h:312
int(AILIA_SPEECH_USER_API * AILIA_SPEECH_USER_API_AILIA_AUDIO_GET_RESAMPLE_LEN)(int *, int, int, int)
Definition: ailia_speech.h:295
int AILIA_API ailiaSpeechPushInputData(struct AILIASpeech *net, const float *src, unsigned int channels, unsigned int samples, unsigned int sampling_rate)
Push PCM data to queue.
int(AILIA_SPEECH_USER_API * AILIA_SPEECH_USER_API_AILIA_AUDIO_GET_FRAME_LEN)(int *, int, int, int, int)
Definition: ailia_speech.h:292
int(AILIA_SPEECH_USER_API * AILIA_SPEECH_USER_API_INTERMEDIATE_CALLBACK)(void *handle, const char *text)
Notify the status during the inference.
Definition: ailia_speech.h:395
int(AILIA_SPEECH_USER_API * AILIA_SPEECH_USER_API_AILIA_TOKENIZER_GET_TEXT_LENGTH)(struct AILIATokenizer *, unsigned int *)
Definition: ailia_speech.h:305
int AILIA_API ailiaSpeechOpenModelFileA(struct AILIASpeech *net, const char *encoder_path, const char *decoder_path, int model_type)
Set models into a network instance.
int(AILIA_SPEECH_USER_API * AILIA_SPEECH_USER_API_AILIA_TOKENIZER_DECODE)(struct AILIATokenizer *, const int *, unsigned int)
Definition: ailia_speech.h:304
int AILIA_API ailiaSpeechGetText(struct AILIASpeech *net, AILIASpeechText *text, unsigned int version, unsigned int idx)
Get recognized text.
int(AILIA_SPEECH_USER_API * AILIA_SPEECH_USER_API_AILIA_GET_BLOB_INDEX_BY_INPUT_INDEX)(struct AILIANetwork *, unsigned int *, unsigned int)
Definition: ailia_speech.h:319
int AILIA_API ailiaSpeechSetIntermediateCallback(struct AILIASpeech *net, AILIA_SPEECH_USER_API_INTERMEDIATE_CALLBACK callback, void *handle)
Set a callback to get intermediate results of recognition.
int(AILIA_SPEECH_USER_API * AILIA_SPEECH_USER_API_AILIA_TOKENIZER_UTF32_TO_UTF8)(char *, unsigned int *, unsigned int)
Definition: ailia_speech.h:309
int AILIA_API ailiaSpeechOpenDiarizationFileW(struct AILIASpeech *net, const wchar_t *segmentation_path, const wchar_t *embedding_path, int type)
Set AI model for speaker diarization (UTF16)
#define AILIA_SPEECH_USER_API
Definition: ailia_speech.h:288
int AILIA_API ailiaSpeechSetLanguage(struct AILIASpeech *net, const char *language)
Set language.
int AILIA_API ailiaSpeechPostProcess(struct AILIASpeech *net)
Execute post process.
struct _AILIASpeechText AILIASpeechText
int AILIA_API ailiaSpeechOpenDictionaryFileW(struct AILIASpeech *net, const wchar_t *dictionary_path, int dictionary_type)
Set dictionary for error correction.
int(AILIA_SPEECH_USER_API * AILIA_SPEECH_USER_API_AILIA_COPY_BLOB_DATA)(struct AILIANetwork *dst_net, unsigned int dst_blob_idx, struct AILIANetwork *src_net, unsigned int src_blob_idx)
Definition: ailia_speech.h:326
int AILIA_API ailiaSpeechGetTextCount(struct AILIASpeech *net, unsigned int *count)
Get recognized text count.
int(AILIA_SPEECH_USER_API * AILIA_SPEECH_USER_API_AILIA_OPEN_WEIGHT_FILE_A)(struct AILIANetwork *, const char *)
Definition: ailia_speech.h:313
void(AILIA_SPEECH_USER_API * AILIA_SPEECH_USER_API_AILIA_DESTROY)(struct AILIANetwork *)
Definition: ailia_speech.h:317
int AILIA_API ailiaSpeechOpenPostProcessFileA(struct AILIASpeech *net, const char *encoder_path, const char *decoder_path, const char *source_path, const char *target_path, const char *prefix, int post_process_type)
Set AI model for post process (MBSC)
int(AILIA_SPEECH_USER_API * AILIA_SPEECH_USER_API_AILIA_AUDIO_GET_MEL_SPECTROGRAM)(void *, const void *, int, int, int, int, int, int, int, int, float, int, float, float, int, int, int)
Definition: ailia_speech.h:293
int AILIA_API ailiaSpeechSetConstraint(struct AILIASpeech *net, const char *constraint, int type)
Set constraint.
int(AILIA_SPEECH_USER_API * AILIA_SPEECH_USER_API_AILIA_TOKENIZER_OPEN_MODEL_FILE_W)(struct AILIATokenizer *, const wchar_t *)
Definition: ailia_speech.h:300
int(AILIA_SPEECH_USER_API * AILIA_SPEECH_USER_API_AILIA_GET_BLOB_DATA)(struct AILIANetwork *, void *, unsigned int, unsigned int)
Definition: ailia_speech.h:321
int AILIA_API ailiaSpeechResetTranscribeState(struct AILIASpeech *net)
It resets the network instance.
int AILIA_API ailiaSpeechOpenModelFileW(struct AILIASpeech *net, const wchar_t *encoder_path, const wchar_t *decoder_path, int model_type)
Set models into a network instance.
int AILIA_API ailiaSpeechComplete(struct AILIASpeech *net, unsigned int *complete)
Determines whether all data has been processed.
int(AILIA_SPEECH_USER_API * AILIA_SPEECH_USER_API_AILIA_SET_MEMORY_MODE)(struct AILIANetwork *, unsigned int)
Definition: ailia_speech.h:316
int(AILIA_SPEECH_USER_API * AILIA_SPEECH_USER_API_AILIA_TOKENIZER_GET_TEXT)(struct AILIATokenizer *, char *, unsigned int)
Definition: ailia_speech.h:306
int(AILIA_SPEECH_USER_API * AILIA_SPEECH_USER_API_AILIA_TOKENIZER_UTF8_TO_UTF32)(unsigned int *, unsigned int *, const char *, unsigned int)
Definition: ailia_speech.h:308
int AILIA_API ailiaSpeechFinalizeInputData(struct AILIASpeech *net)
Finalize input PCM data to queue.
int(AILIA_SPEECH_USER_API * AILIA_SPEECH_USER_API_AILIA_TOKENIZER_CREATE)(struct AILIATokenizer **, int, int)
Definition: ailia_speech.h:298
const char *AILIA_API ailiaSpeechGetErrorDetail(struct AILIASpeech *net)
Returns the details of errors.
int AILIA_API ailiaSpeechCreate(struct AILIASpeech **net, int env_id, int num_thread, int memory_mode, int task, int flags, AILIASpeechApiCallback callback, int version)
Creates a network instance.
int AILIA_API ailiaSpeechTranscribe(struct AILIASpeech *net)
Perform speech recognition.
int AILIA_API ailiaSpeechOpenPostProcessFileW(struct AILIASpeech *net, const wchar_t *encoder_path, const wchar_t *decoder_path, const wchar_t *source_path, const wchar_t *target_path, const char *prefix, int post_process_type)
Set AI model for post process (UTF16)
int AILIA_API ailiaSpeechSetSilentThreshold(struct AILIASpeech *net, float silent_threshold, float speech_sec, float no_speech_sec)
Set silent threshold.
int AILIA_API ailiaSpeechOpenDiarizationFileA(struct AILIASpeech *net, const char *segmentation_path, const char *embedding_path, int type)
Set AI model for speaker diarization (MBSC)
int(AILIA_SPEECH_USER_API * AILIA_SPEECH_USER_API_AILIA_GET_BLOB_INDEX_BY_OUTPUT_INDEX)(struct AILIANetwork *, unsigned int *, unsigned int)
Definition: ailia_speech.h:320
int(AILIA_SPEECH_USER_API * AILIA_SPEECH_USER_API_AILIA_SET_INPUT_BLOB_SHAPE)(struct AILIANetwork *, const AILIAShape *, unsigned int, unsigned int)
Definition: ailia_speech.h:323
const char *(AILIA_SPEECH_USER_API * AILIA_SPEECH_USER_API_AILIA_GET_ERROR_DETAIL)(struct AILIANetwork *)
Definition: ailia_speech.h:325
int(AILIA_SPEECH_USER_API * AILIA_SPEECH_USER_API_AILIA_TOKENIZER_GET_TOKENS)(struct AILIATokenizer *, int *, unsigned int)
Definition: ailia_speech.h:303
int(AILIA_SPEECH_USER_API * AILIA_SPEECH_USER_API_AILIA_UPDATE)(struct AILIANetwork *)
Definition: ailia_speech.h:318
int(AILIA_SPEECH_USER_API * AILIA_SPEECH_USER_API_AILIA_OPEN_WEIGHT_FILE_W)(struct AILIANetwork *, const wchar_t *)
Definition: ailia_speech.h:314
int AILIA_API ailiaSpeechOpenDictionaryFileA(struct AILIASpeech *net, const char *dictionary_path, int dictionary_type)
Set dictionary for error correction.
int(AILIA_SPEECH_USER_API * AILIA_SPEECH_USER_API_AILIA_GET_ENVIRONMENT)(AILIAEnvironment **env, unsigned int env_idx, unsigned int version)
Definition: ailia_speech.h:327
int AILIA_API ailiaSpeechSetPrompt(struct AILIASpeech *net, const char *prompt)
Set prompt.
int(AILIA_SPEECH_USER_API * AILIA_SPEECH_USER_API_AILIA_OPEN_WEIGHT_MEM)(struct AILIANetwork *, const void *, unsigned int)
Definition: ailia_speech.h:315
Definition: ailia_speech.h:341
AILIA_SPEECH_USER_API_AILIA_AUDIO_RESAMPLE ailiaAudioResample
Definition: ailia_speech.h:344
AILIA_SPEECH_USER_API_AILIA_SET_MEMORY_MODE ailiaSetMemoryMode
Definition: ailia_speech.h:362
AILIA_SPEECH_USER_API_AILIA_TOKENIZER_GET_TOKEN_COUNT ailiaTokenizerGetTokenCount
Definition: ailia_speech.h:350
AILIA_SPEECH_USER_API_AILIA_GET_BLOB_SHAPE ailiaGetBlobShape
Definition: ailia_speech.h:370
AILIA_SPEECH_USER_API_AILIA_TOKENIZER_DESTROY ailiaTokenizerDestroy
Definition: ailia_speech.h:355
AILIA_SPEECH_USER_API_AILIA_TOKENIZER_GET_TEXT_LENGTH ailiaTokenizerGetTextLength
Definition: ailia_speech.h:353
AILIA_SPEECH_USER_API_AILIA_TOKENIZER_UTF32_TO_UTF8 ailiaTokenizerUtf32ToUtf8
Definition: ailia_speech.h:357
AILIA_SPEECH_USER_API_AILIA_TOKENIZER_ENCODE ailiaTokenizerEncode
Definition: ailia_speech.h:349
AILIA_SPEECH_USER_API_AILIA_GET_BLOB_INDEX_BY_INPUT_INDEX ailiaGetBlobIndexByInputIndex
Definition: ailia_speech.h:365
AILIA_SPEECH_USER_API_AILIA_UPDATE ailiaUpdate
Definition: ailia_speech.h:364
AILIA_SPEECH_USER_API_AILIA_GET_BLOB_DATA ailiaGetBlobData
Definition: ailia_speech.h:367
AILIA_SPEECH_USER_API_AILIA_TOKENIZER_GET_TEXT ailiaTokenizerGetText
Definition: ailia_speech.h:354
AILIA_SPEECH_USER_API_AILIA_AUDIO_GET_RESAMPLE_LEN ailiaAudioGetResampleLen
Definition: ailia_speech.h:345
AILIA_SPEECH_USER_API_AILIA_CREATE ailiaCreate
Definition: ailia_speech.h:358
AILIA_SPEECH_USER_API_AILIA_OPEN_WEIGHT_FILE_A ailiaOpenWeightFileA
Definition: ailia_speech.h:359
AILIA_SPEECH_USER_API_AILIA_TOKENIZER_UTF8_TO_UTF32 ailiaTokenizerUtf8ToUtf32
Definition: ailia_speech.h:356
AILIA_SPEECH_USER_API_AILIA_SET_INPUT_BLOB_SHAPE ailiaSetInputBlobShape
Definition: ailia_speech.h:369
AILIA_SPEECH_USER_API_AILIA_SET_INPUT_BLOB_DATA ailiaSetInputBlobData
Definition: ailia_speech.h:368
AILIA_SPEECH_USER_API_AILIA_AUDIO_GET_FRAME_LEN ailiaAudioGetFrameLen
Definition: ailia_speech.h:342
AILIA_SPEECH_USER_API_AILIA_AUDIO_GET_MEL_SPECTROGRAM ailiaAudioGetMelSpectrogram
Definition: ailia_speech.h:343
AILIA_SPEECH_USER_API_AILIA_OPEN_WEIGHT_MEM ailiaOpenWeightMem
Definition: ailia_speech.h:361
AILIA_SPEECH_USER_API_AILIA_TOKENIZER_DECODE ailiaTokenizerDecode
Definition: ailia_speech.h:352
AILIA_SPEECH_USER_API_AILIA_OPEN_WEIGHT_FILE_W ailiaOpenWeightFileW
Definition: ailia_speech.h:360
AILIA_SPEECH_USER_API_AILIA_DESTROY ailiaDestroy
Definition: ailia_speech.h:363
AILIA_SPEECH_USER_API_AILIA_COPY_BLOB_DATA ailiaCopyBlobData
Definition: ailia_speech.h:372
AILIA_SPEECH_USER_API_AILIA_TOKENIZER_GET_TOKENS ailiaTokenizerGetTokens
Definition: ailia_speech.h:351
AILIA_SPEECH_USER_API_AILIA_GET_BLOB_INDEX_BY_OUTPUT_INDEX ailiaGetBlobIndexByOutputIndex
Definition: ailia_speech.h:366
AILIA_SPEECH_USER_API_AILIA_GET_ENVIRONMENT ailiaGetEnvironment
Definition: ailia_speech.h:373
AILIA_SPEECH_USER_API_AILIA_TOKENIZER_OPEN_MODEL_FILE_W ailiaTokenizerOpenModelFileW
Definition: ailia_speech.h:348
AILIA_SPEECH_USER_API_AILIA_TOKENIZER_OPEN_MODEL_FILE_A ailiaTokenizerOpenModelFileA
Definition: ailia_speech.h:347
AILIA_SPEECH_USER_API_AILIA_GET_ERROR_DETAIL ailiaGetErrorDetail
Definition: ailia_speech.h:371
AILIA_SPEECH_USER_API_AILIA_TOKENIZER_CREATE ailiaTokenizerCreate
Definition: ailia_speech.h:346
Definition: ailia_speech.h:425
unsigned int speaker_id
Definition: ailia_speech.h:429
float confidence
Definition: ailia_speech.h:431
float time_stamp_end
Definition: ailia_speech.h:428
const char * language
Definition: ailia_speech.h:430
const char * text
Definition: ailia_speech.h:426
float time_stamp_begin
Definition: ailia_speech.h:427