diff --git a/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/AvatarCore_STT.Build.cs b/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/AvatarCore_STT.Build.cs index f111eb3..8533c4f 100644 --- a/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/AvatarCore_STT.Build.cs +++ b/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/AvatarCore_STT.Build.cs @@ -39,26 +39,26 @@ public class AvatarCore_STT : ModuleRules PublicIncludePaths.AddRange( new string[] { // ... add public include paths required here ... - Path.Combine(ModuleDirectory, "ThirdParty", "portaudio", "include"), + Path.Combine(ModuleDirectory, "..", "ThirdParty", "portaudio", "include"), } ); - PublicIncludePaths.Add(Path.Combine(ModuleDirectory, "ThirdParty", "fvad", "include")); - PublicAdditionalLibraries.Add(Path.Combine(ModuleDirectory, "ThirdParty", "fvad", "lib", "fvad.lib")); + PublicIncludePaths.Add(Path.Combine(ModuleDirectory, "..", "ThirdParty", "fvad", "include")); + PublicAdditionalLibraries.Add(Path.Combine(ModuleDirectory, "..", "ThirdParty", "fvad", "lib", "fvad.lib")); - PublicIncludePaths.Add(Path.Combine(ModuleDirectory, "ThirdParty", "SpeexDSP", "include")); - PublicAdditionalLibraries.Add(Path.Combine(ModuleDirectory, "ThirdParty", "SpeexDSP", "lib", "libspeexdsp.dll.a")); - PublicDelayLoadDLLs.Add(Path.Combine(ModuleDirectory, "ThirdParty", "SpeexDSP", "lib", "libspeexdsp-1.dll")); - RuntimeDependencies.Add(Path.Combine(@"$(BinaryOutputDir)", "libspeexdsp-1.dll"), Path.Combine(ModuleDirectory, "ThirdParty", "SpeexDSP", "lib", "libspeexdsp-1.dll")); + PublicIncludePaths.Add(Path.Combine(ModuleDirectory, "..", "ThirdParty", "SpeexDSP", "include")); + PublicAdditionalLibraries.Add(Path.Combine(ModuleDirectory, "..", "ThirdParty", "SpeexDSP", "lib", "libspeexdsp.dll.a")); + PublicDelayLoadDLLs.Add(Path.Combine(ModuleDirectory, "..", "ThirdParty", "SpeexDSP", "lib", "libspeexdsp-1.dll")); + RuntimeDependencies.Add(Path.Combine(@"$(BinaryOutputDir)", "libspeexdsp-1.dll"), Path.Combine(ModuleDirectory, "..", "ThirdParty", "SpeexDSP", "lib", "libspeexdsp-1.dll")); CppStandard = CppStandardVersion.Cpp20; - PublicIncludePaths.Add(Path.Combine(ModuleDirectory, "ThirdParty", "AzureWrapper", "include", "c_api")); - PublicIncludePaths.Add(Path.Combine(ModuleDirectory, "ThirdParty", "AzureWrapper", "include", "cxx_api")); - PublicAdditionalLibraries.Add(Path.Combine(ModuleDirectory, "ThirdParty", "AzureWrapper", "libs", "Microsoft.CognitiveServices.Speech.core.lib")); + PublicIncludePaths.Add(Path.Combine(ModuleDirectory, "..", "ThirdParty", "AzureWrapper", "include", "c_api")); + PublicIncludePaths.Add(Path.Combine(ModuleDirectory, "..", "ThirdParty", "AzureWrapper", "include", "cxx_api")); + PublicAdditionalLibraries.Add(Path.Combine(ModuleDirectory, "..", "ThirdParty", "AzureWrapper", "libs", "Microsoft.CognitiveServices.Speech.core.lib")); foreach (string DynamicLib in GetDynamicLibraries()) { - PublicDelayLoadDLLs.Add(Path.Combine(ModuleDirectory, "ThirdParty", "AzureWrapper", "libs", "Runtime", DynamicLib)); - RuntimeDependencies.Add(Path.Combine(@"$(BinaryOutputDir)", DynamicLib), Path.Combine(ModuleDirectory, "ThirdParty", "AzureWrapper", "libs", "Runtime", DynamicLib)); + PublicDelayLoadDLLs.Add(Path.Combine(ModuleDirectory, "..", "ThirdParty", "AzureWrapper", "libs", "Runtime", DynamicLib)); + RuntimeDependencies.Add(Path.Combine(@"$(BinaryOutputDir)", DynamicLib), Path.Combine(ModuleDirectory, "..", "ThirdParty", "AzureWrapper", "libs", "Runtime", DynamicLib)); } PrivateIncludePaths.AddRange( @@ -80,7 +80,7 @@ public class AvatarCore_STT : ModuleRules // PortAudio linking for Win64 if (Target.Platform == UnrealTargetPlatform.Win64) { - string PortAudioLibDir = Path.Combine(ModuleDirectory, "ThirdParty", "portaudio", "lib"); + string PortAudioLibDir = Path.Combine(ModuleDirectory, "..", "ThirdParty", "portaudio", "lib"); PublicAdditionalLibraries.Add(Path.Combine(PortAudioLibDir, "portaudio_x64.lib")); // Link .lib for static symbols string PortAudioDllPath = Path.Combine(PortAudioLibDir, "portaudio_x64.dll"); PublicDelayLoadDLLs.Add(PortAudioDllPath); @@ -95,6 +95,9 @@ public class AvatarCore_STT : ModuleRules "Slate", "SlateCore", "AvatarCore_AI", + "HTTP", + "Json", + "JsonUtilities", // ... add private dependencies that you statically link with here ... } ); diff --git a/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Processor/Azure/STTProcessorAzure.cpp b/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Processor/Azure/STTProcessorAzure.cpp index 2bff435..78db7d0 100644 --- a/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Processor/Azure/STTProcessorAzure.cpp +++ b/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Processor/Azure/STTProcessorAzure.cpp @@ -51,7 +51,7 @@ void USTTProcessorAzure::InitSTTProcessor(USTTManagerBase* BaseSTTManager, USTTB if(bDebugMode) STTManager->OnSTTLog.Broadcast(TEXT("SpeechConfig initialized successfully.")); - AzureRunnable = MakeUnique(config, audioConfig, AzureProcessorConfig->AzurePhraseList, this, true); + AzureRunnable = MakeUnique(config, audioConfig, STTManager->GetSpecialWords(), this, true); } void USTTProcessorAzure::ClearSTTProcessor() @@ -118,7 +118,7 @@ void USTTProcessorAzure::StartRecognition() StopRecognition(true); //In case there is something else running intermediateResult = ""; USTTProcessorBase::OnTranscriptionStarted(); - AzureRunnable = MakeUnique(config, audioConfig, AzureProcessorConfig->AzurePhraseList, this, false); + AzureRunnable = MakeUnique(config, audioConfig, STTManager->GetSpecialWords(), this, false); } void USTTProcessorAzure::StopRecognition(bool Forced) diff --git a/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Processor/Whisper/STTProcessorWhisper.cpp b/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Processor/Whisper/STTProcessorWhisper.cpp new file mode 100644 index 0000000..cbc1069 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Processor/Whisper/STTProcessorWhisper.cpp @@ -0,0 +1,438 @@ +// Fill out your copyright notice in the Description page of Project Settings. + + +#include "Processor/Whisper/STTProcessorWhisper.h" +#include "STTManagerBase.h" +#include "HttpModule.h" +#include "Interfaces/IHttpRequest.h" +#include "Interfaces/IHttpResponse.h" +#include "Serialization/JsonSerializer.h" +#include "Dom/JsonObject.h" + +namespace +{ + static void AppendStringToBody(TArray& Body, const FString& Str) + { + FTCHARToUTF8 Converter(*Str); + Body.Append(reinterpret_cast(Converter.Get()), Converter.Length()); + } + + static const FString TranscribeModelEnumToString(EOpenAITranscriptionModel Model) + { + switch (Model) + { + case EOpenAITranscriptionModel::Whisper1: + return TEXT("whisper-1"); + break; + + case EOpenAITranscriptionModel::TranscribeMini4o: + return TEXT("gpt-4o-mini-transcribe"); + break; + + case EOpenAITranscriptionModel::Transcribe4o: + return TEXT("gpt-4o-transcribe"); + break; + + default: + return TEXT(""); + break; + } + } +} + +void USTTProcessorWhisper::InitSTTProcessor(USTTManagerBase* BaseSTTManager, USTTBaseProcessorConfig* InProcessorConfig, bool InDebugMode) +{ + USTTProcessorBase::InitSTTProcessor(BaseSTTManager, InProcessorConfig, InDebugMode); + WhisperProcessorConfig = Cast(InProcessorConfig); + if(!WhisperProcessorConfig) + { + if (IsValid(STTManager)) + STTManager->OnSTTError.Broadcast(TEXT("Whisper Processor Config is invalid.")); + return; + } + + if (WhisperProcessorConfig->OpenAI_API_Key.IsEmpty()) { + if (IsValid(STTManager)) + STTManager->OnSTTError.Broadcast(TEXT("OpenAI API Key not set. Needs to be done before initializing modules.")); + return; + } + + NormalizeWhisperURL(); + + if (IsValid(STTManager)) + { + STTManager->OnSpeechStateChanged.AddUniqueDynamic(this, &USTTProcessorWhisper::OnSpeechStateChanged); + } + + PerformHealthCheck(); +} + +void USTTProcessorWhisper::ClearSTTProcessor() +{ + USTTProcessorBase::ClearSTTProcessor(); + BufferedPCMData.Empty(); + bHasBufferedAudioInformation = false; + + for (TSharedPtr& Request : ActiveRequests) + { + if (Request.IsValid()) + { + Request->OnProcessRequestComplete().Unbind(); + Request->CancelRequest(); + } + } + ActiveRequests.Empty(); +} + +void USTTProcessorWhisper::DestroySTTProcessor() +{ + ClearSTTProcessor(); + STTManager = nullptr; +} + +void USTTProcessorWhisper::OnChunkReceived(TArray PCMData, FAudioInformation AudioInformation) +{ + if (CurrentTalkingState != ESTTTalkingState::TALKING) + return; + + if (PCMData.Num() == 0) + return; + + if (!bHasBufferedAudioInformation) + { + BufferedAudioInformation = AudioInformation; + bHasBufferedAudioInformation = true; + } + else if (BufferedAudioInformation.SampleRate != AudioInformation.SampleRate || + BufferedAudioInformation.NumChannels != AudioInformation.NumChannels) + { + BufferedPCMData.Empty(); + BufferedAudioInformation = AudioInformation; + } + + const int64 BytesPerSample = sizeof(int16); + const int64 CurrentBytes = static_cast(BufferedPCMData.Num()) * BytesPerSample; + const int64 NewBytes = static_cast(PCMData.Num()) * BytesPerSample; + const int64 MaxUploadBytes = static_cast(25) * 1024 * 1024; + const int64 MaxAudioBytes = MaxUploadBytes - 1024; + + if (CurrentBytes + NewBytes > MaxAudioBytes) + { + int64 ExcessBytes = CurrentBytes + NewBytes - MaxAudioBytes; + int64 SamplesToRemove = (ExcessBytes + BytesPerSample - 1) / BytesPerSample; + + if (SamplesToRemove >= BufferedPCMData.Num()) + { + BufferedPCMData.Empty(); + if (NewBytes > MaxAudioBytes) + { + int64 NewSamplesAllowed = MaxAudioBytes / BytesPerSample; + if (NewSamplesAllowed > 0 && NewSamplesAllowed < PCMData.Num()) + { + int32 StartIndex = PCMData.Num() - static_cast(NewSamplesAllowed); + BufferedPCMData.Append(&PCMData[StartIndex], static_cast(NewSamplesAllowed)); + } + } + else + { + BufferedPCMData.Append(PCMData); + } + } + else + { + BufferedPCMData.RemoveAt(0, static_cast(SamplesToRemove), EAllowShrinking::No); + BufferedPCMData.Append(PCMData); + } + } + else + { + BufferedPCMData.Append(PCMData); + } +} + +void USTTProcessorWhisper::OnSpeechStateChanged(ESTTTalkingState TalkingState) +{ + CurrentTalkingState = TalkingState; + + if (TalkingState == ESTTTalkingState::BLOCKED) + { + ClearSTTProcessor(); + return; + } + + if (TalkingState == ESTTTalkingState::SILENCE || TalkingState == ESTTTalkingState::TRANSCRIBING) + { + StartTranscriptionFromBuffer(); + } +} + +void USTTProcessorWhisper::StartTranscriptionFromBuffer() +{ + + if (BufferedPCMData.Num() == 0 || !bHasBufferedAudioInformation) + return; + + TArray PCMDataCopy = BufferedPCMData; + FAudioInformation AudioInfoCopy = BufferedAudioInformation; + BufferedPCMData.Empty(); + bHasBufferedAudioInformation = false; + + // Require at least x seconds of audio before sending to Whisper + if (AudioInfoCopy.SampleRate > 0 && AudioInfoCopy.NumChannels > 0) + { + const float Frames = static_cast(PCMDataCopy.Num()) / static_cast(AudioInfoCopy.NumChannels); + const float DurationSeconds = Frames / static_cast(AudioInfoCopy.SampleRate); + if (DurationSeconds < WhisperProcessorConfig->MinDuration) + { + return; + } + } + + TArray WavData; + if (!BuildWavFromPCM(PCMDataCopy, AudioInfoCopy, WavData)) + { + if (IsValid(STTManager)) + STTManager->OnSTTError.Broadcast(TEXT("Failed to build WAV data for Whisper transcription.")); + return; + } + + const int64 MaxUploadBytes = static_cast(25) * 1024 * 1024; + if (WavData.Num() > MaxUploadBytes) + { + if (IsValid(STTManager)) + STTManager->OnSTTError.Broadcast(TEXT("Whisper audio size exceeds 25MB limit. Transcription aborted.")); + return; + } + + SendWhisperRequest(MoveTemp(WavData)); +} + +bool USTTProcessorWhisper::BuildWavFromPCM(const TArray& PCMData, const FAudioInformation& AudioInformation, TArray& OutWavData) const +{ + if (PCMData.Num() == 0) + return false; + + if (AudioInformation.SampleRate <= 0 || AudioInformation.NumChannels <= 0) + return false; + + const uint32 BitsPerSample = 16; + const uint32 BytesPerSample = BitsPerSample / 8; + const uint32 NumSamples = static_cast(PCMData.Num()); + const uint32 ByteRate = static_cast(AudioInformation.SampleRate) * static_cast(AudioInformation.NumChannels) * BytesPerSample; + const uint32 BlockAlign = static_cast(AudioInformation.NumChannels) * BytesPerSample; + const uint32 Subchunk1Size = 16; + const uint32 AudioFormat = 1; + const uint32 Subchunk2Size = NumSamples * BytesPerSample; + const uint32 ChunkSize = 4 + (8 + Subchunk1Size) + (8 + Subchunk2Size); + + OutWavData.Reset(); + OutWavData.Reserve(ChunkSize + 8); + + auto AppendLittleEndian = [&OutWavData](uint32 Value, int32 ByteCount) + { + for (int32 i = 0; i < ByteCount; i++) + { + OutWavData.Add(static_cast((Value >> (i * 8)) & 0xFF)); + } + }; + + OutWavData.Append(reinterpret_cast("RIFF"), 4); + AppendLittleEndian(ChunkSize, 4); + OutWavData.Append(reinterpret_cast("WAVE"), 4); + + OutWavData.Append(reinterpret_cast("fmt "), 4); + AppendLittleEndian(Subchunk1Size, 4); + AppendLittleEndian(AudioFormat, 2); + AppendLittleEndian(static_cast(AudioInformation.NumChannels), 2); + AppendLittleEndian(static_cast(AudioInformation.SampleRate), 4); + AppendLittleEndian(ByteRate, 4); + AppendLittleEndian(BlockAlign, 2); + AppendLittleEndian(BitsPerSample, 2); + + OutWavData.Append(reinterpret_cast("data"), 4); + AppendLittleEndian(Subchunk2Size, 4); + + const uint8* PCMDataPtr = reinterpret_cast(PCMData.GetData()); + OutWavData.Append(PCMDataPtr, Subchunk2Size); + + return true; +} + +void USTTProcessorWhisper::BuildMultipartBody(const TArray& WavData, const FString& Boundary, TArray& OutBody, const FString& Prompt) const +{ + OutBody.Reset(); + + FString BoundaryLine = FString::Printf(TEXT("--%s\r\n"), *Boundary); + AppendStringToBody(OutBody, BoundaryLine); + AppendStringToBody(OutBody, TEXT("Content-Disposition: form-data; name=\"model\"\r\n\r\n")); + AppendStringToBody(OutBody, TranscribeModelEnumToString(WhisperProcessorConfig->Model) + TEXT("\r\n")); + + if (!Prompt.IsEmpty()) + { + BoundaryLine = FString::Printf(TEXT("--%s\r\n"), *Boundary); + AppendStringToBody(OutBody, BoundaryLine); + AppendStringToBody(OutBody, TEXT("Content-Disposition: form-data; name=\"prompt\"\r\n\r\n")); + AppendStringToBody(OutBody, Prompt + TEXT("\r\n")); + } + + BoundaryLine = FString::Printf(TEXT("--%s\r\n"), *Boundary); + AppendStringToBody(OutBody, BoundaryLine); + AppendStringToBody(OutBody, TEXT("Content-Disposition: form-data; name=\"file\"; filename=\"audio.wav\"\r\n")); + AppendStringToBody(OutBody, TEXT("Content-Type: audio/wav\r\n\r\n")); + OutBody.Append(WavData); + AppendStringToBody(OutBody, TEXT("\r\n")); + + BoundaryLine = FString::Printf(TEXT("--%s--\r\n"), *Boundary); + AppendStringToBody(OutBody, BoundaryLine); +} + +void USTTProcessorWhisper::NormalizeWhisperURL() +{ + if (!WhisperProcessorConfig) + return; + + NormalizedWhisperURL = WhisperProcessorConfig->WhisperURL; + if (!NormalizedWhisperURL.StartsWith(TEXT("http://")) && !NormalizedWhisperURL.StartsWith(TEXT("https://"))) + { + NormalizedWhisperURL = FString::Printf(TEXT("https://%s"), *NormalizedWhisperURL); + } +} + +void USTTProcessorWhisper::PerformHealthCheck() +{ + if (!WhisperProcessorConfig) + return; + + if (NormalizedWhisperURL.IsEmpty()) + NormalizeWhisperURL(); + + FHttpModule& HttpModule = FHttpModule::Get(); + TSharedRef Request = HttpModule.CreateRequest(); + Request->SetURL(NormalizedWhisperURL); + Request->SetVerb(TEXT("GET")); + FString AuthHeader = FString::Printf(TEXT("Bearer %s"), *WhisperProcessorConfig->OpenAI_API_Key); + Request->SetHeader(TEXT("Authorization"), AuthHeader); + Request->OnProcessRequestComplete().BindLambda([ + WeakManager = TWeakObjectPtr(STTManager)](FHttpRequestPtr Req, FHttpResponsePtr Resp, bool bWasSuccessful) + { + if (!WeakManager.IsValid()) + return; + + USTTManagerBase* Manager = WeakManager.Get(); + if (!bWasSuccessful || !Resp.IsValid()) + { + Manager->OnSTTError.Broadcast(TEXT("Whisper initialization check failed: URL not reachable.")); + return; + } + + int32 StatusCode = Resp->GetResponseCode(); + if (StatusCode == 401) + { + Manager->OnSTTError.Broadcast(TEXT("Whisper initialization check failed: API key invalid (401).")); + } + }); + + Request->ProcessRequest(); +} + +void USTTProcessorWhisper::SendWhisperRequest(TArray&& WavData) +{ + if (!WhisperProcessorConfig) + return; + + if (NormalizedWhisperURL.IsEmpty()) + NormalizeWhisperURL(); + + FHttpModule& HttpModule = FHttpModule::Get(); + TSharedRef Request = HttpModule.CreateRequest(); + Request->SetURL(NormalizedWhisperURL); + Request->SetVerb(TEXT("POST")); + + FString AuthHeader = FString::Printf(TEXT("Bearer %s"), *WhisperProcessorConfig->OpenAI_API_Key); + Request->SetHeader(TEXT("Authorization"), AuthHeader); + Request->SetHeader(TEXT("Accept"), TEXT("application/json")); + + FString Boundary = TEXT("----AvatarCoreSTTWhisperBoundary"); + FString ContentType = FString::Printf(TEXT("multipart/form-data; boundary=%s"), *Boundary); + Request->SetHeader(TEXT("Content-Type"), ContentType); + + FString Prompt; + if (IsValid(STTManager)) + { + Prompt = STTManager->GetSpecialWordsAsString(); + } + + TArray Body; + BuildMultipartBody(WavData, Boundary, Body, Prompt); + Request->SetContent(Body); + + OnTranscriptionStarted(); + int32 TranscriptionId = TranscriptionCounter; + + TWeakObjectPtr WeakThis(this); + TSharedPtr RequestPtr = Request; + Request->OnProcessRequestComplete().BindLambda([ + WeakThis, + RequestPtr, + TranscriptionId](FHttpRequestPtr Req, FHttpResponsePtr Resp, bool bWasSuccessful) + { + if (!WeakThis.IsValid()) + return; + + USTTProcessorWhisper* Self = WeakThis.Get(); + Self->ActiveRequests.Remove(RequestPtr); + + if (!bWasSuccessful || !Resp.IsValid()) + { + if (IsValid(Self->STTManager)) + Self->STTManager->OnSTTError.Broadcast(TEXT("Whisper request failed or no response received.")); + Self->bTranscriptionRunning = false; + return; + } + + int32 StatusCode = Resp->GetResponseCode(); + if (StatusCode == 401) + { + if (IsValid(Self->STTManager)) + Self->STTManager->OnSTTError.Broadcast(TEXT("Whisper request unauthorized (401). Please check API key.")); + Self->bTranscriptionRunning = false; + return; + } + else if (StatusCode < 200 || StatusCode >= 300) + { + FString ErrorBody = Resp->GetContentAsString(); + if (IsValid(Self->STTManager)) + { + FString ErrorMessage = FString::Printf(TEXT("Whisper request failed with HTTP %d: %s"), StatusCode, *ErrorBody); + Self->STTManager->OnSTTError.Broadcast(ErrorMessage); + } + Self->bTranscriptionRunning = false; + return; + } + + FString JsonString = Resp->GetContentAsString(); + TSharedPtr RootObject; + TSharedRef> Reader = TJsonReaderFactory<>::Create(JsonString); + if (!FJsonSerializer::Deserialize(Reader, RootObject) || !RootObject.IsValid()) + { + if (IsValid(Self->STTManager)) + Self->STTManager->OnSTTError.Broadcast(TEXT("Failed to parse Whisper JSON response.")); + Self->bTranscriptionRunning = false; + return; + } + + FString Text; + if (!RootObject->TryGetStringField(TEXT("text"), Text)) + { + if (IsValid(Self->STTManager)) + Self->STTManager->OnSTTError.Broadcast(TEXT("Whisper response JSON does not contain 'text' field.")); + Self->bTranscriptionRunning = false; + return; + } + + Self->OnTranscriptionResult(TranscriptionId, Text); + }); + + ActiveRequests.Add(RequestPtr); + Request->ProcessRequest(); +} \ No newline at end of file diff --git a/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Processor/Whisper/STTWhisperProcessorConfig.cpp b/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Processor/Whisper/STTWhisperProcessorConfig.cpp new file mode 100644 index 0000000..19a81e2 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Processor/Whisper/STTWhisperProcessorConfig.cpp @@ -0,0 +1,10 @@ +// Fill out your copyright notice in the Description page of Project Settings. + + +#include "Processor/Whisper/STTWhisperProcessorConfig.h" +#include "Processor/Whisper/STTProcessorWhisper.h" + +USTTWhisperProcessorConfig::USTTWhisperProcessorConfig(const FObjectInitializer& ObjectInitializer) +{ + STTProcessorClass = USTTProcessorWhisper::StaticClass(); +} diff --git a/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/STTManagerBase.cpp b/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/STTManagerBase.cpp index d761513..50a1c19 100644 --- a/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/STTManagerBase.cpp +++ b/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/STTManagerBase.cpp @@ -182,15 +182,24 @@ void USTTManagerBase::RemoveSpecialWords(const TArray WordsToRemove) TArray USTTManagerBase::GetSpecialWords() { - return SpecialWords; + TArray CombinedSpecialWords; + CombinedSpecialWords.Append(ProcessorConfig->BaseSettings.STTSpecialWords); + CombinedSpecialWords.Append(SpecialWords); + return CombinedSpecialWords; } FString USTTManagerBase::GetSpecialWordsAsString() { FString SpecialWordsString; - for (int i = 0; i < SpecialWords.Num(); i++) { - SpecialWordsString.Append(SpecialWords[i]); - if (i < SpecialWords.Num() - 1) + + //Combine both the special words from STTSettings globally as also the on runtime special words (Kira, Aki etc.) + TArray CombinedSpecialWords; + CombinedSpecialWords.Append(ProcessorConfig->BaseSettings.STTSpecialWords); + CombinedSpecialWords.Append(SpecialWords); + + for (int i = 0; i < CombinedSpecialWords.Num(); i++) { + SpecialWordsString.Append(CombinedSpecialWords[i]); + if (i < CombinedSpecialWords.Num() - 1) SpecialWordsString.Append(TEXT(", ")); } return SpecialWordsString; diff --git a/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Processor/Azure/STTAzureProcessorConfig.h b/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Processor/Azure/STTAzureProcessorConfig.h index fa66143..7e69364 100644 --- a/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Processor/Azure/STTAzureProcessorConfig.h +++ b/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Processor/Azure/STTAzureProcessorConfig.h @@ -37,8 +37,6 @@ public: USTTAzureProcessorConfig(const FObjectInitializer& ObjectInitializer); - UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreSTT|Azure", meta = (ExposeOnSpawn = "true")) - TArray AzurePhraseList = { TEXT("b.ReX"), TEXT("Bruce B.") }; UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreSTT|Azure", meta = (ExposeOnSpawn = "true")) FString AzureAPIKey = ""; UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreSTT|Azure", meta = (ExposeOnSpawn = "true")) diff --git a/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Processor/STTProcessorBase.h b/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Processor/STTProcessorBase.h index d0a26f5..a802b16 100644 --- a/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Processor/STTProcessorBase.h +++ b/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Processor/STTProcessorBase.h @@ -24,7 +24,7 @@ public: virtual void InitSTTProcessor(USTTManagerBase* BaseSTTManager, USTTBaseProcessorConfig* InPreprocessorConfig, bool InDebugMode = false) { STTManager = BaseSTTManager; PreprocessorConfig = InPreprocessorConfig; bDebugMode = InDebugMode; }; UFUNCTION() - void ClearSTTProcessor(); + virtual void ClearSTTProcessor(); UFUNCTION() virtual void DestroySTTProcessor() {}; diff --git a/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Processor/Whisper/STTProcessorWhisper.h b/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Processor/Whisper/STTProcessorWhisper.h new file mode 100644 index 0000000..2010520 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Processor/Whisper/STTProcessorWhisper.h @@ -0,0 +1,47 @@ +// Fill out your copyright notice in the Description page of Project Settings. + +#pragma once + +#include "CoreMinimal.h" +#include "Processor/STTProcessorBase.h" +#include "Processor/Whisper/STTWhisperProcessorConfig.h" +#include "STTProcessorWhisper.generated.h" + +/** + * + */ +UCLASS() +class USTTProcessorWhisper : public USTTProcessorBase +{ + GENERATED_BODY() + +public: + + void InitSTTProcessor(USTTManagerBase* BaseSTTManager, USTTBaseProcessorConfig* InProcessorConfig, bool InDebugMode = false) override; + virtual void ClearSTTProcessor() override; + virtual void DestroySTTProcessor() override; + + virtual void OnChunkReceived(TArray PCMData, FAudioInformation AudioInformation) override; + + UFUNCTION() + void OnSpeechStateChanged(ESTTTalkingState TalkingState); + +private: + + void StartTranscriptionFromBuffer(); + bool BuildWavFromPCM(const TArray& PCMData, const FAudioInformation& AudioInformation, TArray& OutWavData) const; + void BuildMultipartBody(const TArray& WavData, const FString& Boundary, TArray& OutBody, const FString& Prompt) const; + void NormalizeWhisperURL(); + void PerformHealthCheck(); + void SendWhisperRequest(TArray&& WavData); + +private: + + USTTWhisperProcessorConfig* WhisperProcessorConfig = nullptr; + FString NormalizedWhisperURL; + TArray BufferedPCMData; + FAudioInformation BufferedAudioInformation; + bool bHasBufferedAudioInformation = false; + TArray> ActiveRequests; + ESTTTalkingState CurrentTalkingState = ESTTTalkingState::SILENCE; +}; diff --git a/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Processor/Whisper/STTWhisperProcessorConfig.h b/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Processor/Whisper/STTWhisperProcessorConfig.h new file mode 100644 index 0000000..dcd6948 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Processor/Whisper/STTWhisperProcessorConfig.h @@ -0,0 +1,38 @@ +// Fill out your copyright notice in the Description page of Project Settings. + +#pragma once + +#include "CoreMinimal.h" +#include "Processor/STTBaseProcessorConfig.h" +#include "STTWhisperProcessorConfig.generated.h" + +UENUM(BlueprintType) +enum class EOpenAITranscriptionModel : uint8 +{ + Whisper1 UMETA(DisplayName = "Whisper-1"), + TranscribeMini4o UMETA(DisplayName = "4o Transcribe Mini"), + Transcribe4o UMETA(DisplayName = "4o Transcribe") +}; + +/** + * + */ +UCLASS() +class USTTWhisperProcessorConfig : public USTTBaseProcessorConfig +{ + GENERATED_BODY() + +public: + + USTTWhisperProcessorConfig(const FObjectInitializer& ObjectInitializer); + + UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreSTT|Whisper", meta = (ExposeOnSpawn = "true")) + FString OpenAI_API_Key = ""; + UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreSTT|Whisper", meta = (ExposeOnSpawn = "true")) + FString WhisperURL = "api.openai.com/v1/audio/transcriptions"; + UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreSTT|Whisper", meta = (ExposeOnSpawn = "true")) + EOpenAITranscriptionModel Model = EOpenAITranscriptionModel::Transcribe4o; + UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreSTT|Whisper", meta = (ExposeOnSpawn = "true")) + float MinDuration = 0.75f; + +}; diff --git a/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/STTStructs.h b/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/STTStructs.h index 5e117c1..7bf75a2 100644 --- a/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/STTStructs.h +++ b/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/STTStructs.h @@ -123,4 +123,7 @@ struct FSTTBaseSettings FSpeexDSPSettings SpeexDSPSettings; UPROPERTY(EditAnywhere, BlueprintReadWrite, meta = (ToolTip = "Transcriptions to always change to another word.", Category = "STT|Base")) TArray STTReplacements; + UPROPERTY(EditAnywhere, BlueprintReadWrite, meta = (ToolTip = "Special words that the transcription service needs to know (e.g. b.ReX or Bruce-B).", Category = "STT|Base")) + TArray STTSpecialWords; + }; \ No newline at end of file diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/CMakeLists.txt b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/CMakeLists.txt new file mode 100644 index 0000000..725b6bc --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/CMakeLists.txt @@ -0,0 +1,8 @@ +cmake_minimum_required(VERSION 3.19) + +project(c_headers) + +set(SRC_DIR "${PROJECT_SOURCE_DIR}") +add_library(${PROJECT_NAME} INTERFACE ${SPEECH_C_API_HEADERS}) +target_include_directories(${PROJECT_NAME} INTERFACE ${PROJECT_SOURCE_DIR}) +set_target_properties(${PROJECT_NAME} PROPERTIES FOLDER api) diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/azac_api_c_common.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/azac_api_c_common.h new file mode 100644 index 0000000..a1e1571 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/azac_api_c_common.h @@ -0,0 +1,81 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/azai/license202106 for the full license information. +// + +#pragma once + +// TODO: TFS#3671215 - Vision: C/C++ azac_api* files are in shared include directory, speech and vision share + +#include +#include + +#ifdef __cplusplus +#define AZAC_EXTERN_C extern "C" +#else +#define AZAC_EXTERN_C +#endif + +#ifdef _WIN32 +#define AZAC_DLL_EXPORT __declspec(dllexport) +#define AZAC_DLL_IMPORT __declspec(dllimport) +#define AZAC_API_NOTHROW __declspec(nothrow) +#define AZAC_API_RESULTTYPE AZACHR +#define AZAC_API_CALLTYPE __stdcall +#define AZAC_API_VCALLTYPE __cdecl +#else +#define AZAC_DLL_EXPORT __attribute__ ((__visibility__("default"))) +#define AZAC_DLL_IMPORT +#define AZAC_API_NOTHROW __attribute__((nothrow)) +#define AZAC_API_RESULTTYPE AZACHR +#define AZAC_API_CALLTYPE +#define AZAC_API_VCALLTYPE __attribute__((cdecl)) +#endif + +#ifdef AZAC_CONFIG_EXPORTAPIS +#define AZAC_API_EXPORT AZAC_DLL_EXPORT +#endif +#ifdef AZAC_CONFIG_IMPORTAPIS +#define AZAC_API_EXPORT AZAC_DLL_IMPORT +#endif +#ifdef AZAC_CONFIG_STATIC_LINK_APIS +#define AZAC_API_EXPORT +#endif +#ifndef AZAC_API_EXPORT +#define AZAC_API_EXPORT AZAC_DLL_IMPORT +#endif + +#define AZAC_API AZAC_EXTERN_C AZAC_API_EXPORT AZAC_API_RESULTTYPE AZAC_API_NOTHROW AZAC_API_CALLTYPE +#define AZAC_API_(type) AZAC_EXTERN_C AZAC_API_EXPORT type AZAC_API_NOTHROW AZAC_API_CALLTYPE +#define AZAC_API__(type) AZAC_EXTERN_C AZAC_API_EXPORT AZAC_API_NOTHROW type AZAC_API_CALLTYPE +#define AZAC_APIV AZAC_EXTERN_C AZAC_API_EXPORT AZAC_API_NOTHROW AZAC_API_RESULTTYPE AZAC_API_VCALLTYPE +#define AZAC_APIV_(type) AZAC_EXTERN_C AZAC_API_EXPORT AZAC_API_NOTHROW type AZAC_API_VCALLTYPE +#define AZAC_API_PRIVATE AZAC_EXTERN_C AZAC_API_RESULTTYPE AZAC_API_NOTHROW AZAC_API_CALLTYPE +#define AZAC_API_PRIVATE_(type) AZAC_EXTERN_C type AZAC_API_NOTHROW AZAC_API_CALLTYPE + +struct _azac_empty {}; +typedef struct _azac_empty* _azachandle; +typedef _azachandle AZAC_HANDLE; + +#define AZAC_HANDLE_INVALID ((AZAC_HANDLE)-1) +#define AZAC_HANDLE_RESERVED1 ((AZAC_HANDLE)+1) + +#ifndef AZAC_SUPPRESS_DIAGNOSTICS_INCLUDE_FROM_COMMON +#define AZAC_SUPPRESS_COMMON_INCLUDE_FROM_DIAGNOSTICS +#include +#undef AZAC_SUPPRESS_COMMON_INCLUDE_FROM_DIAGNOSTICS +#endif + +#ifndef AZAC_SUPPRESS_ERROR_INCLUDE_FROM_COMMON +#define AZAC_SUPPRESS_COMMON_INCLUDE_FROM_ERROR +#include +#undef AZAC_SUPPRESS_COMMON_INCLUDE_FROM_ERROR +#endif + +#ifndef AZAC_SUPPRESS_DEBUG_INCLUDE_FROM_COMMON +#define AZAC_SUPPRESS_COMMON_INCLUDE_FROM_DEBUG +#include +#undef AZAC_SUPPRESS_COMMON_INCLUDE_FROM_DEBUG +#endif + +#define AZACPROPERTYBAGHANDLE AZAC_HANDLE diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/azac_api_c_diagnostics.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/azac_api_c_diagnostics.h new file mode 100644 index 0000000..1059860 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/azac_api_c_diagnostics.h @@ -0,0 +1,80 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/azai/license202106 for the full license information. +// + +#pragma once + +// TODO: TFS#3671215 - Vision: C/C++ azac_api* files are in shared include directory, speech and vision share + +#ifndef AZAC_SUPPRESS_COMMON_INCLUDE_FROM_DIAGNOSTICS +#define AZAC_SUPPRESS_DIAGNOSTICS_INCLUDE_FROM_COMMON +#include +#undef AZAC_SUPPRESS_DIAGNOSTICS_INCLUDE_FROM_COMMON +#endif + +#include +#include + +// +// APIs to manage logging to file +// +AZAC_API diagnostics_log_start_logging(AZAC_HANDLE hpropbag, void* reserved); +AZAC_API diagnostics_log_apply_properties(AZAC_HANDLE hpropbag, void* reserved); +AZAC_API diagnostics_log_stop_logging(); + +// +// APIs to manage logging events +// +typedef void(*DIAGNOSTICS_CALLBACK_FUNC)(const char *logLine); +AZAC_API diagnostics_logmessage_set_callback(DIAGNOSTICS_CALLBACK_FUNC callback); +AZAC_API diagnostics_logmessage_set_filters(const char* filters); + +// +// APIs to managed eventSource events +// +typedef void(*DIAGNOSTICS_EVENTSOURCE_CALLBACK_FUNC)(const char *logLine, const int level); +AZAC_API diagnostics_eventsource_logmessage_set_callback(DIAGNOSTICS_EVENTSOURCE_CALLBACK_FUNC callback); +AZAC_API diagnostics_eventsource_logmessage_set_filters(const char* filters); + +// +// APIs to manage logging to memory +// +AZAC_API_(void) diagnostics_log_memory_start_logging(); +AZAC_API_(void) diagnostics_log_memory_stop_logging(); +AZAC_API_(void) diagnostics_log_memory_set_filters(const char* filters); + +// The binding layers use these to implement a dump to vector of strings or an output stream +AZAC_API_(size_t) diagnostics_log_memory_get_line_num_oldest(); +AZAC_API_(size_t) diagnostics_log_memory_get_line_num_newest(); +AZAC_API__(const char*) diagnostics_log_memory_get_line(size_t lineNum); + +// Dump to file, std out or std err with optional prefix string +AZAC_API diagnostics_log_memory_dump_to_stderr(); // This calls diagnostics_log_memory_dump(nullptr, nullptr, false, true) +AZAC_API diagnostics_log_memory_dump(const char* filename, const char* linePrefix, bool emitToStdOut, bool emitToStdErr); +AZAC_API diagnostics_log_memory_dump_on_exit(const char* filename, const char* linePrefix, bool emitToStdOut, bool emitToStdErr); + +// +// APIs to manage logging to the console +// +AZAC_API_(void) diagnostics_log_console_start_logging(bool logToStderr); +AZAC_API_(void) diagnostics_log_console_stop_logging(); +AZAC_API_(void) diagnostics_log_console_set_filters(const char* filters); + +// +// APIs to log a string +// +AZAC_API_(void) diagnostics_log_format_message(char* buffer, size_t bufferSize, int level, const char* pszTitle, const char* fileName, const int lineNumber, const char* pszFormat, va_list argptr); +AZAC_API_(void) diagnostics_log_trace_string(int level, const char* pszTitle, const char* fileName, const int lineNumber, const char* psz); +AZAC_API_(void) diagnostics_log_trace_message(int level, const char* pszTitle, const char* fileName, const int lineNumber, const char* pszFormat, ...); +AZAC_API_(void) diagnostics_log_trace_message2(int level, const char* pszTitle, const char* fileName, const int lineNumber, const char* pszFormat, va_list argptr); + +AZAC_API_(void) diagnostics_set_log_level(const char * logger, const char * level); +AZAC_API_(bool) diagnostics_is_log_level_enabled(int level); + +// +// Memory tracking API's +// +AZAC_API_(size_t) diagnostics_get_handle_count(); +AZAC_API__(const char*) diagnostics_get_handle_info(); +AZAC_API diagnostics_free_string(const char* value); diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/azac_api_c_error.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/azac_api_c_error.h new file mode 100644 index 0000000..e0653d7 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/azac_api_c_error.h @@ -0,0 +1,24 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/azai/vision/license for the full license information. +// + +#pragma once + +// TODO: TFS#3671215 - Vision: C/C++ azac_api* files are in shared include directory, speech and vision share + +#ifndef AZAC_SUPPRESS_COMMON_INCLUDE_FROM_ERROR +#define AZAC_SUPPRESS_ERROR_INCLUDE_FROM_COMMON +#include +#undef AZAC_SUPPRESS_ERROR_INCLUDE_FROM_COMMON +#endif + +typedef const char * const_char_ptr; + +AZAC_API_(const_char_ptr) error_get_message(AZAC_HANDLE errorHandle); + +AZAC_API_(const_char_ptr) error_get_call_stack(AZAC_HANDLE errorHandle); + +AZAC_API error_get_error_code(AZAC_HANDLE errorHandle); + +AZAC_API error_release(AZAC_HANDLE errorHandle); diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/azac_api_c_pal.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/azac_api_c_pal.h new file mode 100644 index 0000000..b67fd3a --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/azac_api_c_pal.h @@ -0,0 +1,13 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/azai/license202106 for the full license information. +// + +#pragma once + +// TODO: TFS#3671215 - Vision: C/C++ azac_api* files are in shared include directory, speech and vision share + +#include "azac_api_c_common.h" + +AZAC_API_(size_t) pal_wstring_to_string(char * dst, const wchar_t * src, size_t dstSize); +AZAC_API_(size_t) pal_string_to_wstring(wchar_t * dst, const char * src, size_t dstSize); \ No newline at end of file diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/azac_debug.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/azac_debug.h new file mode 100644 index 0000000..c5ff05f --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/azac_debug.h @@ -0,0 +1,845 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/azai/license202106 for the full license information. +// + +#pragma once +#include +#include + +// TODO: TFS#3671215 - Vision: C/C++ azac_api* files are in shared include directory, speech and vision share + +#ifndef _MSC_VER +// macros in this header generate a bunch of +// "ISO C++11 requires at least one argument for the "..." in a variadic macro" errors. +// system_header pragma is the only mechanism that helps to suppress them. +// https://stackoverflow.com/questions/35587137/how-to-suppress-gcc-variadic-macro-argument-warning-for-zero-arguments-for-a-par +// TODO: try to make macros standard-compliant. +#pragma GCC system_header +#endif + +#ifndef __cplusplus +#define static_assert _Static_assert +#endif + +#define UNUSED(x) (void)(x) + +//------------------------------------------------------- +// Re-enabled ability to compile out all macros... +// However, currently still need to keep all macros until +// final review of all macros is complete. +//------------------------------------------------------- + +#define AZAC_CONFIG_TRACE_INCLUDE_DBG_WITH_ALL 1 + +#ifdef AZAC_CONFIG_TRACE_INCLUDE_DBG_WITH_ALL +#if defined(AZAC_CONFIG_TRACE_ALL) && !defined(AZAC_CONFIG_DBG_TRACE_ALL) && (!defined(DEBUG) || !defined(_DEBUG)) +#define AZAC_CONFIG_DBG_TRACE_ALL 1 +#endif +#endif + +//------------------------------------------------------- +// AZAC_ and AZAC_DBG_ macro configuration +//------------------------------------------------------- + +#ifdef AZAC_CONFIG_DBG_TRACE_ALL +#define AZAC_CONFIG_DBG_TRACE_VERBOSE 1 +#define AZAC_CONFIG_DBG_TRACE_INFO 1 +#define AZAC_CONFIG_DBG_TRACE_WARNING 1 +#define AZAC_CONFIG_DBG_TRACE_ERROR 1 +#define AZAC_CONFIG_DBG_TRACE_FUNCTION 1 +#define AZAC_CONFIG_DBG_TRACE_SCOPE 1 +#define AZAC_CONFIG_DBG_TRACE_ASSERT 1 +#define AZAC_CONFIG_DBG_TRACE_VERIFY 1 +#ifndef AZAC_CONFIG_TRACE_ALL +#define AZAC_CONFIG_TRACE_ALL 1 +#endif +#endif + +#ifdef AZAC_CONFIG_TRACE_ALL +#define AZAC_CONFIG_TRACE_VERBOSE 1 +#define AZAC_CONFIG_TRACE_INFO 1 +#define AZAC_CONFIG_TRACE_WARNING 1 +#define AZAC_CONFIG_TRACE_ERROR 1 +#define AZAC_CONFIG_TRACE_FUNCTION 1 +#define AZAC_CONFIG_TRACE_SCOPE 1 +#define AZAC_CONFIG_TRACE_THROW_ON_FAIL 1 +#define AZAC_CONFIG_TRACE_REPORT_ON_FAIL 1 +#define AZAC_CONFIG_TRACE_RETURN_ON_FAIL 1 +#define AZAC_CONFIG_TRACE_EXITFN_ON_FAIL 1 +#endif + +//----------------------------------------------------------- +// AZAC_TRACE macro common implementations +//----------------------------------------------------------- + +#define __AZAC_TRACE_LEVEL_INFO 0x08 // Trace_Info +#define __AZAC_TRACE_LEVEL_WARNING 0x04 // Trace_Warning +#define __AZAC_TRACE_LEVEL_ERROR 0x02 // Trace_Error +#define __AZAC_TRACE_LEVEL_VERBOSE 0x10 // Trace_Verbose + +#ifndef __AZAC_DO_TRACE_IMPL +#ifdef __cplusplus +#include +#include +#include +#include +inline void __azac_do_trace_message(int level, const char* pszTitle, const char* fileName, const int lineNumber, const char* pszFormat, ...) throw() +{ + UNUSED(level); + + bool logToConsole = false; +#if defined(DEBUG) || defined(_DEBUG) + logToConsole = true; +#endif + + if (!logToConsole) + { + return; + } + + try + { + va_list argptr; + va_start(argptr, pszFormat); + + std::string format; + while (*pszFormat == '\n' || *pszFormat == '\r') + { + if (*pszFormat == '\r') + { + pszTitle = nullptr; + } + + format += *pszFormat++; + } + + if (pszTitle != nullptr) + { + format += pszTitle; + } + + std::string fileNameOnly(fileName); + std::replace(fileNameOnly.begin(), fileNameOnly.end(), '\\', '/'); + + std::string fileNameLineNumber = " " + fileNameOnly.substr(fileNameOnly.find_last_of('/', std::string::npos) + 1) + ":" + std::to_string(lineNumber) + " "; + + format += fileNameLineNumber; + + format += pszFormat; + + if (format.length() < 1 || format[format.length() - 1] != '\n') + { + format += "\n"; + } + + vfprintf(stderr, format.c_str(), argptr); + + va_end(argptr); + } + catch(...) + { + } +} +#define __AZAC_DO_TRACE_IMPL __azac_do_trace_message +#else // __cplusplus +#define __AZAC_DO_TRACE_IMPL +#endif // __cplusplus +#endif + +#define __AZAC_DOTRACE(level, title, fileName, lineNumber, ...) \ + do { \ + __AZAC_DO_TRACE_IMPL(level, title, fileName, lineNumber, ##__VA_ARGS__); \ + } while (0) + +#define __AZAC_TRACE_INFO(title, fileName, lineNumber, msg, ...) __AZAC_DOTRACE(__AZAC_TRACE_LEVEL_INFO, title, fileName, lineNumber, msg, ##__VA_ARGS__) +#define __AZAC_TRACE_INFO_IF(cond, title, fileName, lineNumber, msg, ...) \ + do { \ + int fCond = !!(cond); \ + if (fCond) { \ + __AZAC_TRACE_INFO(title, fileName, lineNumber, msg, ##__VA_ARGS__); \ + } } while (0) + +#define __AZAC_TRACE_WARNING(title, fileName, lineNumber, msg, ...) __AZAC_DOTRACE(__AZAC_TRACE_LEVEL_WARNING, title, fileName, lineNumber, msg, ##__VA_ARGS__) +#define __AZAC_TRACE_WARNING_IF(cond, title, fileName, lineNumber, msg, ...) \ + do { \ + int fCond = !!(cond); \ + if (fCond) { \ + __AZAC_TRACE_WARNING(title, fileName, lineNumber, msg, ##__VA_ARGS__); \ + } } while (0) + +#define __AZAC_TRACE_ERROR(title, fileName, lineNumber, msg, ...) __AZAC_DOTRACE(__AZAC_TRACE_LEVEL_ERROR, title, fileName, lineNumber, msg, ##__VA_ARGS__) +#define __AZAC_TRACE_ERROR_IF(cond, title, fileName, lineNumber, msg, ...) \ + do { \ + int fCond = !!(cond); \ + if (fCond) { \ + __AZAC_TRACE_ERROR(title, fileName, lineNumber, msg, ##__VA_ARGS__); \ + } } while (0) + +#define __AZAC_TRACE_VERBOSE(title, fileName, lineNumber, msg, ...) __AZAC_DOTRACE(__AZAC_TRACE_LEVEL_VERBOSE, title, fileName, lineNumber, msg, ##__VA_ARGS__) +#define __AZAC_TRACE_VERBOSE_IF(cond, title, fileName, lineNumber, msg, ...) \ + do { \ + int fCond = !!(cond); \ + if (fCond) { \ + __AZAC_TRACE_VERBOSE(title, fileName, lineNumber, msg, ##__VA_ARGS__); \ + } } while (0) + +#define ___AZAC_EXPR_AS_STRING(_String) "" #_String +#define __AZAC_EXPR_AS_STRING(_String) ___AZAC_EXPR_AS_STRING(_String) + +#define __AZAC_TRACE_HR(title, fileName, lineNumber, hr, x) __AZAC_TRACE_ERROR(title, fileName, lineNumber, __AZAC_EXPR_AS_STRING(hr) " = 0x%0" PRIxPTR, x) + +#define __AZAC_REPORT_ON_FAIL(title, fileName, lineNumber, hr) \ + do { \ + AZACHR x = hr; \ + if (AZAC_FAILED(x)) { \ + __AZAC_TRACE_HR(title, fileName, lineNumber, hr, x); \ + } } while (0) +#define __AZAC_REPORT_ON_FAIL_IFNOT(title, fileName, lineNumber, hr, hrNot) \ + do { \ + AZACHR x = hr; \ + if (x != hrNot) { \ + if (AZAC_FAILED(x)) { \ + __AZAC_TRACE_HR(title, fileName, lineNumber, hr, x); \ + } } } while (0) + +#define __AZAC_T_RETURN_HR(title, fileName, lineNumber, hr) \ + do { \ + AZACHR x = hr; \ + if (AZAC_FAILED(x)) { \ + __AZAC_TRACE_HR(title, fileName, lineNumber, hr, x); \ + } \ + return x; \ + } while (0) +#define __AZAC_T_RETURN_HR_IF(title, fileName, lineNumber, hr, cond) \ + do { \ + int fCond = !!(cond); \ + if (fCond) { \ + AZACHR x = hr; \ + if (AZAC_FAILED(x)) { \ + __AZAC_TRACE_HR(title, fileName, lineNumber, hr, x); \ + } \ + return x; \ + } } while (0) +#define __AZAC_T_RETURN_ON_FAIL(title, fileName, lineNumber, hr) \ + do { \ + AZACHR x = hr; \ + if (AZAC_FAILED(x)) { \ + __AZAC_TRACE_HR(title, fileName, lineNumber, hr, x); \ + return x; \ + } } while (0) +#define __AZAC_T_RETURN_ON_FAIL_IF_NOT(title, fileName, lineNumber, hr, hrNot) \ + do { \ + AZACHR x = hr; \ + if (x != hrNot) { \ + if (AZAC_FAILED(x)) { \ + __AZAC_TRACE_HR(title, fileName, lineNumber, hr, x); \ + return x; \ + } } } while (0) +#define __AZAC_RETURN_HR(hr) return hr +#define __AZAC_RETURN_HR_IF(hr, cond) \ + do { \ + int fCond = !!(cond); \ + if (fCond) { \ + return hr; \ + } } while (0) +#define __AZAC_RETURN_ON_FAIL(hr) \ + do { \ + AZACHR x = hr; \ + if (AZAC_FAILED(x)) { \ + return x; \ + } } while (0) +#define __AZAC_RETURN_ON_FAIL_IF_NOT(hr, hrNot) \ + do { \ + AZACHR x = hr; \ + if (x != hrNot) { \ + if (AZAC_FAILED(x)) { \ + return x; \ + } } } while (0) + +#define __AZAC_T_EXITFN_HR(title, fileName, lineNumber, hr) \ + do { \ + AZACHR x = hr; \ + if (AZAC_FAILED(x)) { \ + __AZAC_TRACE_HR(title, fileName, lineNumber, hr, x); \ + } \ + goto AZAC_EXITFN_CLEANUP; \ + } while (0) +#define __AZAC_T_EXITFN_HR_IF(title, fileName, lineNumber, hr, cond) \ + do { \ + int fCond = !!(cond); \ + if (fCond) { \ + AZACHR x = hr; \ + if (AZAC_FAILED(x)) { \ + __AZAC_TRACE_HR(title, fileName, lineNumber, hr, x); \ + } \ + goto AZAC_EXITFN_CLEANUP; \ + } } while (0) +#define __AZAC_T_EXITFN_ON_FAIL(title, fileName, lineNumber, hr) \ + do { \ + AZACHR x = hr; \ + if (AZAC_FAILED(x)) { \ + __AZAC_TRACE_HR(title, fileName, lineNumber, hr, x); \ + goto AZAC_EXITFN_CLEANUP; \ + } } while (0) +#define __AZAC_T_EXITFN_ON_FAIL_IF_NOT(title, fileName, lineNumber, hr, hrNot) \ + do { \ + AZACHR x = hr; \ + if (x != hrNot) { \ + if (AZAC_FAILED(x)) { \ + __AZAC_TRACE_HR(title, fileName, lineNumber, hr, x); \ + goto AZAC_EXITFN_CLEANUP; \ + } } } while (0) + +#define __AZAC_EXITFN_HR(hr) \ + do { \ + AZACHR x = hr; \ + goto AZAC_EXITFN_CLEANUP; \ + } while (0) +#define __AZAC_EXITFN_HR_IF(hr, cond) \ + do { \ + int fCond = !!(cond); \ + if (fCond) { \ + AZACHR x = hr; \ + goto AZAC_EXITFN_CLEANUP; \ + } } while (0) +#define __AZAC_EXITFN_ON_FAIL(hr) \ + do { \ + AZACHR x = hr; \ + if (AZAC_FAILED(x)) { \ + goto AZAC_EXITFN_CLEANUP; \ + } } while (0) +#define __AZAC_EXITFN_ON_FAIL_IF_NOT(hr, hrNot) \ + do { \ + AZACHR x = hr; \ + if (x != hrNot) { \ + if (AZAC_FAILED(x)) { \ + goto AZAC_EXITFN_CLEANUP; \ + } } } while (0) + +#define __AZAC_TRACE_ASSERT(title, fileName, lineNumber, expr) __AZAC_TRACE_ERROR(title, fileName, lineNumber, __AZAC_EXPR_AS_STRING(expr) " = false") +#define __AZAC_TRACE_ASSERT_MSG(title, fileName, lineNumber, expr, ...) __AZAC_TRACE_ERROR(title, fileName, lineNumber, __AZAC_EXPR_AS_STRING(expr) " = false; " __VA_ARGS__) + +#define __AZAC_DBG_ASSERT(title, fileName, lineNumber, expr) \ + do { \ + int fCond = !!(expr); \ + if (!fCond) { \ + __AZAC_TRACE_ASSERT(title, fileName, lineNumber, expr); \ + abort(); \ + } } while (0) +#define __AZAC_DBG_ASSERT_WITH_MESSAGE(title, fileName, lineNumber, expr, ...) \ + do { \ + int fCond = !!(expr); \ + if (!fCond) { \ + __AZAC_TRACE_ASSERT_MSG(title, fileName, lineNumber, expr, ##__VA_ARGS__); \ + abort(); \ + } } while (0) + +#define __AZAC_DBG_VERIFY(title, fileName, lineNumber, expr) \ + do { \ + int fCond = !!(expr); \ + if (!fCond) { \ + __AZAC_TRACE_ASSERT(title, fileName, lineNumber, expr); \ + abort(); \ + } } while (0) +#define __AZAC_DBG_VERIFY_WITH_MESSAGE(title, fileName, lineNumber, expr, ...) \ + do { \ + int fCond = !!(expr); \ + if (!fCond) { \ + __AZAC_TRACE_ASSERT_MSG(title, fileName, lineNumber, expr, ##__VA_ARGS__); \ + abort(); \ + } } while (0) + +#ifdef __cplusplus + +#include +#define __AZAC_TRACE_SCOPE(t1, fileName, lineNumber, t2, x, y) \ + __AZAC_TRACE_INFO(t1, fileName, lineNumber, "%s", x); \ + auto evaluateYInScopeInMacros##lineNumber = y; \ + auto leavingScopePrinterInMacros##lineNumber = [&evaluateYInScopeInMacros##lineNumber](int*) -> void { \ + __AZAC_TRACE_INFO(t2, fileName, lineNumber, "%s", evaluateYInScopeInMacros##lineNumber); \ + }; \ + std::unique_ptr onExit##lineNumber((int*)1, leavingScopePrinterInMacros##lineNumber) + +#ifndef __AZAC_THROW_HR_IMPL +#define __AZAC_THROW_HR_IMPL(hr) __azac_rethrow(hr) +#endif +#ifndef __AZAC_THROW_HR +#define __AZAC_THROW_HR(hr) __AZAC_THROW_HR_IMPL(hr) +#endif + +#ifndef __AZAC_LOG_HR_IMPL +#define __AZAC_LOG_HR_IMPL(hr) __azac_log_only(hr) +#endif +#ifndef __AZAC_LOG_HR +#define __AZAC_LOG_HR(hr) __AZAC_LOG_HR_IMPL(hr) +#endif + +#define __AZAC_T_LOG_ON_FAIL(title, fileName, lineNumber, hr) \ + do { \ + AZACHR x = hr; \ + if (AZAC_FAILED(x)) { \ + __AZAC_TRACE_HR(title, fileName, lineNumber, hr, x); \ + __AZAC_LOG_HR(x); \ + } } while (0) +#define __AZAC_T_THROW_ON_FAIL(title, fileName, lineNumber, hr) \ + do { \ + AZACHR x = hr; \ + if (AZAC_FAILED(x)) { \ + __AZAC_TRACE_HR(title, fileName, lineNumber, hr, x); \ + __AZAC_THROW_HR(x); \ + } } while (0) +#define __AZAC_T_THROW_ON_FAIL_IF_NOT(title, fileName, lineNumber, hr, hrNot) \ + do { \ + AZACHR x = hr; \ + if (x != hrNot) { \ + if (AZAC_FAILED(x)) { \ + __AZAC_TRACE_HR(title, fileName, lineNumber, hr, x); \ + __AZAC_THROW_HR(x); \ + } } } while (0) +#define __AZAC_T_THROW_HR_IF(title, fileName, lineNumber, hr, cond) \ + do { \ + int fCond = !!(cond); \ + if (fCond) { \ + AZACHR x = hr; \ + __AZAC_TRACE_HR(title, fileName, lineNumber, hr, x); \ + __AZAC_THROW_HR(x); \ + } } while (0) +#define __AZAC_T_THROW_HR(title, fileName, lineNumber, hr) \ + do { \ + AZACHR x = hr; \ + __AZAC_TRACE_HR(title, fileName, lineNumber, hr, x); \ + __AZAC_THROW_HR(x); \ + } while (0) + + +#define __AZAC_LOG_ON_FAIL(hr) \ + do { \ + AZACHR x = hr; \ + if (AZAC_FAILED(x)) { \ + __AZAC_LOG_HR(x); \ + } } while (0) +#define __AZAC_THROW_ON_FAIL(hr) \ + do { \ + AZACHR x = hr; \ + if (AZAC_FAILED(x)) { \ + __AZAC_THROW_HR(x); \ + } } while (0) +#define __AZAC_THROW_ON_FAIL_IF_NOT(hr, hrNot) \ + do { \ + AZACHR x = hr; \ + if (x != hrNot) { \ + if (AZAC_FAILED(x)) { \ + __AZAC_THROW_HR(x); \ + } } } while (0) +#define __AZAC_THROW_HR_IF(hr, cond) \ + do { \ + int fCond = !!(cond); \ + if (fCond) { \ + AZACHR x = hr; \ + __AZAC_THROW_HR(x); \ + } } while (0) + +#endif // __cplusplus + + + +//------------------------------------------------------- +// AZAC_ macro definitions +//------------------------------------------------------- + +#ifdef AZAC_CONFIG_TRACE_VERBOSE +#define AZAC_TRACE_VERBOSE(msg, ...) __AZAC_TRACE_VERBOSE("AZAC_TRACE_VERBOSE: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#define AZAC_TRACE_VERBOSE_IF(cond, msg, ...) __AZAC_TRACE_VERBOSE_IF(cond, "AZAC_TRACE_VERBOSE: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#else +#define AZAC_TRACE_VERBOSE(...) +#define AZAC_TRACE_VERBOSE_IF(...) +#endif + +#ifdef AZAC_CONFIG_DBG_TRACE_VERBOSE +#define AZAC_DBG_TRACE_VERBOSE(msg, ...) __AZAC_TRACE_VERBOSE("AZAC_DBG_TRACE_VERBOSE: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#define AZAC_DBG_TRACE_VERBOSE_IF(cond, msg, ...) __AZAC_TRACE_VERBOSE_IF(cond, "AZAC_DBG_TRACE_VERBOSE: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#else +#define AZAC_DBG_TRACE_VERBOSE(...) +#define AZAC_DBG_TRACE_VERBOSE_IF(...) +#endif + +#ifdef AZAC_CONFIG_TRACE_INFO +#define AZAC_TRACE_INFO(msg, ...) __AZAC_TRACE_INFO("AZAC_TRACE_INFO: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#define AZAC_TRACE_INFO_IF(cond, msg, ...) __AZAC_TRACE_INFO_IF(cond, "AZAC_TRACE_INFO: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#else +#define AZAC_TRACE_INFO(...) +#define AZAC_TRACE_INFO_IF(...) +#endif + +#ifdef AZAC_CONFIG_DBG_TRACE_INFO +#define AZAC_DBG_TRACE_INFO(msg, ...) __AZAC_TRACE_INFO("AZAC_DBG_TRACE_INFO: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#define AZAC_DBG_TRACE_INFO_IF(cond, msg, ...) __AZAC_TRACE_INFO_IF(cond, "AZAC_DBG_TRACE_INFO: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#else +#define AZAC_DBG_TRACE_INFO(...) +#define AZAC_DBG_TRACE_INFO_IF(...) +#endif + +#ifdef AZAC_CONFIG_TRACE_WARNING +#define AZAC_TRACE_WARNING(msg, ...) __AZAC_TRACE_WARNING("AZAC_TRACE_WARNING:", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#define AZAC_TRACE_WARNING_IF(cond, msg, ...) __AZAC_TRACE_WARNING_IF(cond, "AZAC_TRACE_WARNING:", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#else +#define AZAC_TRACE_WARNING(...) +#define AZAC_TRACE_WARNING_IF(...) +#endif + +#ifdef AZAC_CONFIG_DBG_TRACE_WARNING +#define AZAC_DBG_TRACE_WARNING(msg, ...) __AZAC_TRACE_WARNING("AZAC_DBG_TRACE_WARNING:", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#define AZAC_DBG_TRACE_WARNING_IF(cond, msg, ...) __AZAC_TRACE_WARNING_IF(cond, "AZAC_DBG_TRACE_WARNING:", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#else +#define AZAC_DBG_TRACE_WARNING(...) +#define AZAC_DBG_TRACE_WARNING_IF(...) +#endif + +#ifdef AZAC_CONFIG_TRACE_ERROR +#define AZAC_TRACE_ERROR(msg, ...) __AZAC_TRACE_ERROR("AZAC_TRACE_ERROR: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#define AZAC_TRACE_ERROR_IF(cond, msg, ...) __AZAC_TRACE_ERROR_IF(cond, "AZAC_TRACE_ERROR: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#else +#define AZAC_TRACE_ERROR(...) +#define AZAC_TRACE_ERROR_IF(...) +#endif + +#ifdef AZAC_CONFIG_DBG_TRACE_ERROR +#define AZAC_DBG_TRACE_ERROR(msg, ...) __AZAC_TRACE_ERROR("AZAC_DBG_TRACE_ERROR: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#define AZAC_DBG_TRACE_ERROR_IF(cond, msg, ...) __AZAC_TRACE_ERROR_IF(cond, "AZAC_DBG_TRACE_ERROR: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#else +#define AZAC_DBG_TRACE_ERROR(...) +#define AZAC_DBG_TRACE_ERROR_IF(...) +#endif + +#ifdef AZAC_CONFIG_TRACE_FUNCTION +#define AZAC_TRACE_FUNCTION(...) __AZAC_TRACE_VERBOSE("AZAC_TRACE_FUNCTION: ", __FILE__, __LINE__, __FUNCTION__) +#else +#define AZAC_TRACE_FUNCTION(...) +#endif + +#ifdef AZAC_CONFIG_DBG_TRACE_FUNCTION +#define AZAC_DBG_TRACE_FUNCTION(...) __AZAC_TRACE_VERBOSE("AZAC_DBG_TRACE_FUNCTION: ", __FILE__, __LINE__, __FUNCTION__) +#else +#define AZAC_DBG_TRACE_FUNCTION(...) +#endif + +#ifdef AZAC_CONFIG_TRACE_REPORT_ON_FAIL +#define AZAC_REPORT_ON_FAIL(hr) __AZAC_REPORT_ON_FAIL("AZAC_REPORT_ON_FAIL: ", __FILE__, __LINE__, hr) +#define AZAC_REPORT_ON_FAIL_IFNOT(hr, hrNot) __AZAC_REPORT_ON_FAIL_IFNOT("AZAC_REPORT_ON_FAIL: ", __FILE__, __LINE__, hr, hrNot) +#else +#define AZAC_REPORT_ON_FAIL(hr) UNUSED(hr) +#define AZAC_REPORT_ON_FAIL_IFNOT(hr, hrNot) UNUSED(hr); UNUSED(hrNot) +#endif + +#ifdef AZAC_CONFIG_TRACE_RETURN_ON_FAIL +#define AZAC_RETURN_HR(hr) __AZAC_T_RETURN_HR("AZAC_RETURN_ON_FAIL: ", __FILE__, __LINE__, hr) +#define AZAC_RETURN_HR_IF(hr, cond) __AZAC_T_RETURN_HR_IF("AZAC_RETURN_ON_FAIL: ", __FILE__, __LINE__, hr, cond) +#define AZAC_RETURN_ON_FAIL(hr) __AZAC_T_RETURN_ON_FAIL("AZAC_RETURN_ON_FAIL: ", __FILE__, __LINE__, hr) +#define AZAC_RETURN_ON_FAIL_IF_NOT(hr, hrNot) __AZAC_T_RETURN_ON_FAIL_IF_NOT("AZAC_RETURN_ON_FAIL: ", __FILE__, __LINE__, hr, hrNot) +#else +#define AZAC_RETURN_HR(hr) __AZAC_RETURN_HR(hr) +#define AZAC_RETURN_HR_IF(hr, cond) __AZAC_RETURN_HR_IF(hr, cond) +#define AZAC_RETURN_ON_FAIL(hr) __AZAC_RETURN_ON_FAIL(hr) +#define AZAC_RETURN_ON_FAIL_IF_NOT(hr, hrNot) __AZAC_RETURN_ON_FAIL_IF_NOT(hr, hrNot) +#endif + +#define AZAC_IFTRUE_RETURN_HR(cond, hr) AZAC_RETURN_HR_IF(hr, cond) +#define AZAC_IFFALSE_RETURN_HR(cond, hr) AZAC_RETURN_HR_IF(hr, !(cond)) +#define AZAC_IFFAILED_RETURN_HR(hr) AZAC_RETURN_ON_FAIL(hr) +#define AZAC_IFFAILED_RETURN_HR_IFNOT(hr, hrNot) AZAC_RETURN_ON_FAIL_IF_NOT(hr, hrNot) + +#ifdef AZAC_CONFIG_TRACE_EXITFN_ON_FAIL +#define AZAC_EXITFN_HR(hr) __AZAC_T_EXITFN_HR("AZAC_EXITFN_ON_FAIL: ", __FILE__, __LINE__, hr) +#define AZAC_EXITFN_HR_IF(hr, cond) __AZAC_T_EXITFN_HR_IF("AZAC_EXITFN_ON_FAIL: ", __FILE__, __LINE__, hr, cond) +#define AZAC_EXITFN_ON_FAIL(hr) __AZAC_T_EXITFN_ON_FAIL("AZAC_EXITFN_ON_FAIL: ", __FILE__, __LINE__, hr) +#define AZAC_EXITFN_ON_FAIL_IF_NOT(hr, hrNot) __AZAC_EXITFN_ON_FAIL_IF_NOT("AZAC_EXITFN_ON_FAIL: ", __FILE__, __LINE__, hr, hrNot) +#else +#define AZAC_EXITFN_HR(hr) __AZAC_EXITFN_HR(hr) +#define AZAC_EXITFN_HR_IF(hr, cond) __AZAC_EXITFN_HR_IF(hr, cond) +#define AZAC_EXITFN_ON_FAIL(hr) __AZAC_EXITFN_ON_FAIL(hr) +#define AZAC_EXITFN_ON_FAIL_IF_NOT(hr, hrNot) __AZAC_EXITFN_ON_FAIL_IF_NOT(hr, hrNot) +#endif + +#define AZAC_IFTRUE_EXITFN_WHR(cond, hr) AZAC_EXITFN_HR_IF(hr, cond) +#define AZAC_IFFALSE_EXITFN_WHR(cond, hr) AZAC_EXITFN_HR_IF(hr, !(cond)) +#define AZAC_IFFAILED_EXITFN_WHR(hr) AZAC_EXITFN_ON_FAIL(hr) +#define AZAC_IFFAILED_EXITFN_WHR_IFNOT(hr, hrNot) AZAC_EXITFN_ON_FAIL_IF_NOT(hr, hrNot) + +#define AZAC_IFTRUE_EXITFN_CLEANUP(cond, expr) \ + do { \ + int fCondT = !!(cond); \ + if (fCondT) { \ + expr; \ + goto AZAC_EXITFN_CLEANUP; \ + } } while (0) + +#define AZAC_IFFALSE_EXITFN_CLEANUP(cond, expr) \ + do { \ + int fCondF = !!(cond); \ + if (!fCondF) { \ + expr; \ + goto AZAC_EXITFN_CLEANUP; \ + } } while (0) + +#if defined(AZAC_CONFIG_DBG_TRACE_ASSERT) && (defined(DEBUG) || defined(_DEBUG)) +#define AZAC_DBG_ASSERT(expr) __AZAC_DBG_ASSERT("AZAC_ASSERT: ", __FILE__, __LINE__, expr) +#define AZAC_DBG_ASSERT_WITH_MESSAGE(expr, ...) __AZAC_DBG_ASSERT_WITH_MESSAGE("AZAC_ASSERT: ", __FILE__, __LINE__, expr, ##__VA_ARGS__) +#else +#define AZAC_DBG_ASSERT(expr) +#define AZAC_DBG_ASSERT_WITH_MESSAGE(expr, ...) +#endif + +#if defined(AZAC_CONFIG_DBG_TRACE_VERIFY) && (defined(DEBUG) || defined(_DEBUG)) +#define AZAC_DBG_VERIFY(expr) __AZAC_DBG_VERIFY("AZAC_VERIFY: ", __FILE__, __LINE__, expr) +#define AZAC_DBG_VERIFY_WITH_MESSAGE(expr, ...) __AZAC_DBG_VERIFY_WITH_MESSAGE("AZAC_VERIFY: ", __FILE__, __LINE__, expr, ##__VA_ARGS__) +#else +#define AZAC_DBG_VERIFY(expr) (expr) +#define AZAC_DBG_VERIFY_WITH_MESSAGE(expr, ...) (expr) +#endif + +#define AZAC_IFTRUE(cond, expr) \ + do { \ + int fCondT = !!(cond); \ + if (fCondT) { \ + expr; \ + } } while (0) + +#define AZAC_IFFALSE(cond, expr) \ + do { \ + int fCondF = !!(cond); \ + if (!fCondF) { \ + expr; \ + } } while (0) + +// handle circular dependency +#ifndef AZAC_SUPPRESS_COMMON_INCLUDE_FROM_DEBUG +#define AZAC_SUPPRESS_DEBUG_INCLUDE_FROM_COMMON +#include +#undef AZAC_SUPPRESS_DEBUG_INCLUDE_FROM_COMMON +#endif + +#ifdef __cplusplus + +#ifdef AZAC_CONFIG_TRACE_SCOPE +#define AZAC_TRACE_SCOPE(x, y) __AZAC_TRACE_SCOPE("AZAC_TRACE_SCOPE_ENTER: ", __FILE__, __LINE__, "AZAC_TRACE_SCOPE_EXIT: ", x, y) +#else +#define AZAC_TRACE_SCOPE(x, y) +#endif + +#ifdef AZAC_CONFIG_DBG_TRACE_SCOPE +#define AZAC_DBG_TRACE_SCOPE(x, y) __AZAC_TRACE_SCOPE("AZAC_DBG_TRACE_SCOPE_ENTER: ", __FILE__, __LINE__, "AZAC_DBG_TRACE_SCOPE_EXIT: ", x, y) +#else +#define AZAC_DBG_TRACE_SCOPE(x, y) +#endif + +#ifdef AZAC_CONFIG_TRACE_THROW_ON_FAIL +#define AZAC_THROW_ON_FAIL(hr) __AZAC_T_THROW_ON_FAIL("AZAC_THROW_ON_FAIL: ", __FILE__, __LINE__, hr) +#define AZAC_THROW_ON_FAIL_IF_NOT(hr, hrNot) __AZAC_T_THROW_ON_FAIL_IF_NOT("AZAC_THROW_ON_FAIL: ", __FILE__, __LINE__, hr, hrNot) +#define AZAC_LOG_ON_FAIL(hr) __AZAC_T_LOG_ON_FAIL("AZAC_LOG_ON_FAIL: ", __FILE__, __LINE__, hr) +#define AZAC_THROW_HR_IF(hr, cond) __AZAC_T_THROW_HR_IF("AZAC_THROW_HR_IF: ", __FILE__, __LINE__, hr, cond) +#define AZAC_THROW_HR(hr) __AZAC_T_THROW_HR("AZAC_THROW_HR: ", __FILE__, __LINE__, hr) +#else +#define AZAC_THROW_ON_FAIL(hr) __AZAC_THROW_ON_FAIL(hr) +#define AZAC_THROW_ON_FAIL_IF_NOT(hr, hrNot) __AZAC_THROW_ON_FAIL_IF_NOT(hr, hrNot) +#define AZAC_LOG_ON_FAIL(hr) __AZAC_LOG_ON_FAIL(hr) +#define AZAC_THROW_HR_IF(hr, cond) __AZAC_THROW_HR_IF(hr, cond) +#define AZAC_THROW_HR(hr) __AZAC_THROW_HR(hr) +#endif + +#define AZAC_IFTRUE_THROW_HR(cond, hr) AZAC_THROW_HR_IF(hr, cond) +#define AZAC_IFFALSE_THROW_HR(cond, hr) AZAC_THROW_HR_IF(hr, !(cond)) +#define AZAC_IFFAILED_THROW_HR(hr) AZAC_THROW_ON_FAIL(hr) +#define AZAC_IFFAILED_THROW_HR_IFNOT(hr, hrNot) AZAC_THROW_ON_FAIL_IF_NOT(hr, hrNot) + +#include +#include +#include +#include + +inline void __azac_handle_native_ex(AZACHR hr, bool throwException) +{ + AZAC_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + + auto handle = reinterpret_cast(hr); + auto error = error_get_error_code(handle); + if (error == AZAC_ERR_NONE) + { + if (throwException) + { + throw hr; + } + else + { + // do nothing. This is already logged by the macros that call this function + return; + } + } + + std::string errorMsg; + try + { + auto callstack = error_get_call_stack(handle); + auto what = error_get_message(handle); + + if (what) + { + errorMsg += what; + } + else + { + errorMsg += "Exception with error code: "; + errorMsg += std::to_string(error); + } + + if (callstack) + { + errorMsg += callstack; + } + } + catch (...) + { + error_release(handle); + throw hr; + } + + error_release(handle); + if (throwException) + { + throw std::runtime_error(errorMsg); + } + else + { + AZAC_TRACE_ERROR("Error details: %s", errorMsg.c_str()); + } +} + +inline void __azac_log_only(AZACHR hr) +{ + __azac_handle_native_ex(hr, false); +} + +inline void __azac_rethrow(AZACHR hr) +{ + __azac_handle_native_ex(hr, true); +} + +#else // __cplusplus + +#define AZAC_TRACE_SCOPE(x, y) static_assert(false) +#define AZAC_DBG_TRACE_SCOPE(x, y) static_assert(false) +#define AZAC_LOG_ON_FAIL(hr) static_assert(false) +#define AZAC_THROW_ON_FAIL(hr) static_assert(false) +#define AZAC_THROW_ON_FAIL_IF_NOT(hr, hrNot) static_assert(false) +#define AZAC_THROW_HR_IF(hr, cond) static_assert(false) +#define AZAC_THROW_HR(hr) static_assert(false) +#define AZAC_IFTRUE_THROW_HR(cond, hr) static_assert(false) +#define AZAC_IFFALSE_THROW_HR(cond, hr) static_assert(false) +#define AZAC_IFFAILED_THROW_HR(hr) static_assert(false) +#define AZAC_IFFAILED_THROW_HR_IFNOT(hr, hrNot) static_assert(false) + +#endif // __cplusplus + +//--------------------------------------------------------------------------- + +#ifdef __AZAC_DEBUG_H_EXAMPLES_IN_MAIN + +void main() +{ + int x = 4; + printf("%s = %d\n", __AZAC_EXPR_AS_STRING(x + 3), x + 3); + + AZAC_TRACE_INFO("hello there"); + AZAC_TRACE_ERROR("hello there"); + AZAC_TRACE_WARNING("hello there"); + AZAC_TRACE_VERBOSE("hello there"); + + AZAC_TRACE_INFO("hello there %d", 5); + AZAC_TRACE_ERROR("hello there %d", 5); + AZAC_TRACE_WARNING("hello there %d", 5); + AZAC_TRACE_VERBOSE("hello there %d", 5); + + AZAC_TRACE_INFO_IF(false, "hello there false"); + AZAC_TRACE_ERROR_IF(false, "hello there false"); + AZAC_TRACE_WARNING_IF(false, "hello there false"); + AZAC_TRACE_VERBOSE_IF(false, "hello there false"); + + AZAC_TRACE_INFO_IF(false, "hello there false %d", 5); + AZAC_TRACE_ERROR_IF(false, "hello there false %d", 5); + AZAC_TRACE_WARNING_IF(false, "hello there false %d", 5); + AZAC_TRACE_VERBOSE_IF(false, "hello there false %d", 5); + + AZAC_TRACE_INFO_IF(true, "hello there true"); + AZAC_TRACE_ERROR_IF(true, "hello there true"); + AZAC_TRACE_WARNING_IF(true, "hello there true"); + AZAC_TRACE_VERBOSE_IF(true, "hello there true"); + + AZAC_TRACE_INFO_IF(true, "hello there true %d", 5); + AZAC_TRACE_ERROR_IF(true, "hello there true %d", 5); + AZAC_TRACE_WARNING_IF(true, "hello there true %d", 5); + AZAC_TRACE_VERBOSE_IF(true, "hello there true %d", 5); + + AZAC_DBG_TRACE_INFO("hello there"); + AZAC_DBG_TRACE_ERROR("hello there"); + AZAC_DBG_TRACE_WARNING("hello there"); + AZAC_DBG_TRACE_VERBOSE("hello there"); + + AZAC_DBG_TRACE_INFO("hello there %d", 5); + AZAC_DBG_TRACE_ERROR("hello there %d", 5); + AZAC_DBG_TRACE_WARNING("hello there %d", 5); + AZAC_DBG_TRACE_VERBOSE("hello there %d", 5); + + AZAC_DBG_TRACE_INFO_IF(false, "hello there false"); + AZAC_DBG_TRACE_ERROR_IF(false, "hello there false"); + AZAC_DBG_TRACE_WARNING_IF(false, "hello there false"); + AZAC_DBG_TRACE_VERBOSE_IF(false, "hello there false"); + + AZAC_DBG_TRACE_INFO_IF(false, "hello there false %d", 5); + AZAC_DBG_TRACE_ERROR_IF(false, "hello there false %d", 5); + AZAC_DBG_TRACE_WARNING_IF(false, "hello there false %d", 5); + AZAC_DBG_TRACE_VERBOSE_IF(false, "hello there false %d", 5); + + AZAC_DBG_TRACE_INFO_IF(true, "hello there true"); + AZAC_DBG_TRACE_ERROR_IF(true, "hello there true"); + AZAC_DBG_TRACE_WARNING_IF(true, "hello there true"); + AZAC_DBG_TRACE_VERBOSE_IF(true, "hello there true"); + + AZAC_DBG_TRACE_INFO_IF(true, "hello there true %d", 5); + AZAC_DBG_TRACE_ERROR_IF(true, "hello there true %d", 5); + AZAC_DBG_TRACE_WARNING_IF(true, "hello there true %d", 5); + AZAC_DBG_TRACE_VERBOSE_IF(true, "hello there true %d", 5); + + AZAC_TRACE_SCOPE("A", "B"); + + AZAC_TRACE_FUNCTION(); + AZAC_DBG_TRACE_FUNCTION(); + + AZAC_DBG_ASSERT(false); + AZAC_DBG_ASSERT(true); + + AZAC_DBG_ASSERT_WITH_MESSAGE(false, "HEY!"); + AZAC_DBG_ASSERT_WITH_MESSAGE(true, "HEY!!"); + + AZAC_DBG_VERIFY(false); + AZAC_DBG_VERIFY(true); + + AZAC_DBG_VERIFY_WITH_MESSAGE(false, "HEY!"); + AZAC_DBG_VERIFY_WITH_MESSAGE(true, "HEY!!"); + + AZACHR hr1 { 0x80001111 }; + AZACHR hr2 { 0x00001111 }; + + AZAC_TRACE_VERBOSE("Testing out AZAC_REPORT_ON_FAIL, should see two failures..."); + AZAC_REPORT_ON_FAIL(hr1); + AZAC_REPORT_ON_FAIL_IFNOT(hr1, 0x80001000); + AZAC_TRACE_VERBOSE("Testing out AZAC_REPORT_ON_FAIL, should see two failures... Done!"); + + AZAC_TRACE_VERBOSE("Testing out AZAC_REPORT_ON_FAIL, should see zero failures..."); + AZAC_REPORT_ON_FAIL(hr2); + AZAC_REPORT_ON_FAIL_IFNOT(hr1, 0x80001111); + AZAC_REPORT_ON_FAIL_IFNOT(hr2, 0x80001111); + AZAC_REPORT_ON_FAIL_IFNOT(hr2, 0x80001000); + AZAC_TRACE_VERBOSE("Testing out AZAC_REPORT_ON_FAIL, should see zero failures... Done!"); +} + +#endif diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/azac_error.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/azac_error.h new file mode 100644 index 0000000..61ec189 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/azac_error.h @@ -0,0 +1,455 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/azai/license202106 for the full license information. + +#pragma once + +// TODO: TFS#3671215 - Vision: C/C++ azac_api* files are in shared include directory, speech and vision share + +#include + +/// +/// Type definition for Azure AI Core result codes. +/// +typedef uintptr_t AZACHR; + +/// +/// Default result code indicating no error. +/// +#define AZAC_ERR_NONE 0 + +/// +/// Declare and initialize result code variable. +/// +#define AZAC_INIT_HR(hr) AZACHR hr = AZAC_ERR_NONE; \ + (void)(hr) + +/// +/// Check if result code indicates success. +/// +#define AZAC_SUCCEEDED(x) ((x) == AZAC_ERR_NONE) + +/// +/// Check if result code indicates error. +/// +#define AZAC_FAILED(x) (!AZAC_SUCCEEDED(x)) + +/// +/// Base macros for all error codes. +/// +#define __AZAC_ERRCODE_FAILED(x) (x) + +/// +/// The function is not implemented. +/// +#define AZAC_ERR_NOT_IMPL __AZAC_ERRCODE_FAILED(0xfff) + +/// +/// The object has not been properly initialized. +/// +#define AZAC_ERR_UNINITIALIZED __AZAC_ERRCODE_FAILED(0x001) + +/// +/// The object has already been initialized. +/// +#define AZAC_ERR_ALREADY_INITIALIZED __AZAC_ERRCODE_FAILED(0x002) + +/// +/// An unhandled exception was detected. +/// +#define AZAC_ERR_UNHANDLED_EXCEPTION __AZAC_ERRCODE_FAILED(0x003) + +/// +/// The object or property was not found. +/// +#define AZAC_ERR_NOT_FOUND __AZAC_ERRCODE_FAILED(0x004) + +/// +/// One or more arguments are not valid. +/// +#define AZAC_ERR_INVALID_ARG __AZAC_ERRCODE_FAILED(0x005) + +/// +/// The specified timeout value has elapsed. +/// +#define AZAC_ERR_TIMEOUT __AZAC_ERRCODE_FAILED(0x006) + +/// +/// The asynchronous operation is already in progress. +/// +#define AZAC_ERR_ALREADY_IN_PROGRESS __AZAC_ERRCODE_FAILED(0x007) + +/// +/// The attempt to open the file failed. +/// +#define AZAC_ERR_FILE_OPEN_FAILED __AZAC_ERRCODE_FAILED(0x008) + +/// +/// The end of the file was reached unexpectedly. +/// +#define AZAC_ERR_UNEXPECTED_EOF __AZAC_ERRCODE_FAILED(0x009) + +/// +/// Invalid audio header encountered. +/// +#define AZAC_ERR_INVALID_HEADER __AZAC_ERRCODE_FAILED(0x00a) + +/// +/// The requested operation cannot be performed while audio is pumping +/// +#define AZAC_ERR_AUDIO_IS_PUMPING __AZAC_ERRCODE_FAILED(0x00b) + +/// +/// Unsupported audio format. +/// +#define AZAC_ERR_UNSUPPORTED_FORMAT __AZAC_ERRCODE_FAILED(0x00c) + +/// +/// Operation aborted. +/// +#define AZAC_ERR_ABORT __AZAC_ERRCODE_FAILED(0x00d) + +/// +/// Microphone is not available. +/// +#define AZAC_ERR_MIC_NOT_AVAILABLE __AZAC_ERRCODE_FAILED(0x00e) + +/// +/// An invalid state was encountered. +/// +#define AZAC_ERR_INVALID_STATE __AZAC_ERRCODE_FAILED(0x00f) + +/// +/// Attempting to create a UUID failed. +/// +#define AZAC_ERR_UUID_CREATE_FAILED __AZAC_ERRCODE_FAILED(0x010) + +/// +/// An unexpected session state transition was encountered when setting the session audio format. +/// +/// +/// Valid transitions are: +/// * WaitForPumpSetFormatStart --> ProcessingAudio (at the beginning of stream) +/// * StoppingPump --> WaitForAdapterCompletedSetFormatStop (at the end of stream) +/// * ProcessingAudio --> WaitForAdapterCompletedSetFormatStop (when the stream runs out of data) +/// All other state transitions are invalid. +/// +#define AZAC_ERR_SETFORMAT_UNEXPECTED_STATE_TRANSITION __AZAC_ERRCODE_FAILED(0x011) + +/// +/// An unexpected session state was encountered in while processing audio. +/// +/// +/// Valid states to encounter are: +/// * ProcessingAudio: We're allowed to process audio while in this state. +/// * StoppingPump: We're allowed to be called to process audio, but we'll ignore the data passed in while we're attempting to stop the pump. +/// All other states are invalid while processing audio. +/// +#define AZAC_ERR_PROCESS_AUDIO_INVALID_STATE __AZAC_ERRCODE_FAILED(0x012) + +/// +/// An unexpected state transition was encountered while attempting to start recognizing. +/// +/// +/// A valid transition is: +/// * Idle --> WaitForPumpSetFormatStart +/// All other state transitions are invalid when attempting to start recognizing +/// +#define AZAC_ERR_START_RECOGNIZING_INVALID_STATE_TRANSITION __AZAC_ERRCODE_FAILED(0x013) + +/// +/// An unexpected error was encountered when trying to create an internal object. +/// +#define AZAC_ERR_UNEXPECTED_CREATE_OBJECT_FAILURE __AZAC_ERRCODE_FAILED(0x014) + +/// +/// An error in the audio-capturing system. +/// +#define AZAC_ERR_MIC_ERROR __AZAC_ERRCODE_FAILED(0x015) + +/// +/// The requested operation cannot be performed; there is no audio input. +/// +#define AZAC_ERR_NO_AUDIO_INPUT __AZAC_ERRCODE_FAILED(0x016) + +/// +/// An unexpected error was encountered when trying to access the USP site. +/// +#define AZAC_ERR_UNEXPECTED_USP_SITE_FAILURE __AZAC_ERRCODE_FAILED(0x017) + +/// +/// An unexpected error was encountered when trying to access the LU site. +/// +#define AZAC_ERR_UNEXPECTED_LU_SITE_FAILURE __AZAC_ERRCODE_FAILED(0x018) + +/// +/// The buffer is too small. +/// +#define AZAC_ERR_BUFFER_TOO_SMALL __AZAC_ERRCODE_FAILED(0x019) + +/// +/// A method failed to allocate memory. +/// +#define AZAC_ERR_OUT_OF_MEMORY __AZAC_ERRCODE_FAILED(0x01A) + +/// +/// An unexpected runtime error occurred. +/// +#define AZAC_ERR_RUNTIME_ERROR __AZAC_ERRCODE_FAILED(0x01B) + +/// +/// The url specified is invalid. +/// +#define AZAC_ERR_INVALID_URL __AZAC_ERRCODE_FAILED(0x01C) + +/// +/// The region specified is invalid or missing. +/// +#define AZAC_ERR_INVALID_REGION __AZAC_ERRCODE_FAILED(0x01D) + +/// +/// Switch between single shot and continuous recognition is not supported. +/// +#define AZAC_ERR_SWITCH_MODE_NOT_ALLOWED __AZAC_ERRCODE_FAILED(0x01E) + +/// +/// Changing connection status is not supported in the current recognition state. +/// +#define AZAC_ERR_CHANGE_CONNECTION_STATUS_NOT_ALLOWED __AZAC_ERRCODE_FAILED(0x01F) + +/// +/// Explicit connection management is not supported by the specified recognizer. +/// +#define AZAC_ERR_EXPLICIT_CONNECTION_NOT_SUPPORTED_BY_RECOGNIZER __AZAC_ERRCODE_FAILED(0x020) + +/// +/// The handle is invalid. +/// +#define AZAC_ERR_INVALID_HANDLE __AZAC_ERRCODE_FAILED(0x021) + +/// +/// The recognizer is invalid. +/// +#define AZAC_ERR_INVALID_RECOGNIZER __AZAC_ERRCODE_FAILED(0x022) + +/// +/// The value is out of range. +/// Added in version 1.3.0. +/// +#define AZAC_ERR_OUT_OF_RANGE __AZAC_ERRCODE_FAILED(0x023) + +/// +/// Extension library not found. +/// Added in version 1.3.0. +/// +#define AZAC_ERR_EXTENSION_LIBRARY_NOT_FOUND __AZAC_ERRCODE_FAILED(0x024) + +/// +/// An unexpected error was encountered when trying to access the TTS engine site. +/// Added in version 1.4.0. +/// +#define AZAC_ERR_UNEXPECTED_TTS_ENGINE_SITE_FAILURE __AZAC_ERRCODE_FAILED(0x025) + +/// +/// An unexpected error was encountered when trying to access the audio output stream. +/// Added in version 1.4.0. +/// +#define AZAC_ERR_UNEXPECTED_AUDIO_OUTPUT_FAILURE __AZAC_ERRCODE_FAILED(0x026) + +/// +/// Gstreamer internal error. +/// Added in version 1.4.0. +/// +#define AZAC_ERR_GSTREAMER_INTERNAL_ERROR __AZAC_ERRCODE_FAILED(0x027) + +/// +/// Compressed container format not supported. +/// Added in version 1.4.0. +/// +#define AZAC_ERR_CONTAINER_FORMAT_NOT_SUPPORTED_ERROR __AZAC_ERRCODE_FAILED(0x028) + +/// +/// Codec extension or gstreamer not found. +/// Added in version 1.4.0. +/// +#define AZAC_ERR_GSTREAMER_NOT_FOUND_ERROR __AZAC_ERRCODE_FAILED(0x029) + +/// +/// The language specified is missing. +/// Added in version 1.5.0. +/// +#define AZAC_ERR_INVALID_LANGUAGE __AZAC_ERRCODE_FAILED(0x02A) + +/// +/// The API is not applicable. +/// Added in version 1.5.0. +/// +#define AZAC_ERR_UNSUPPORTED_API_ERROR __AZAC_ERRCODE_FAILED(0x02B) + +/// +/// The ring buffer is unavailable. +/// Added in version 1.8.0. +/// +#define AZAC_ERR_RINGBUFFER_DATA_UNAVAILABLE __AZAC_ERRCODE_FAILED(0x02C) + +/// +/// An unexpected error was encountered when trying to access the Conversation site. +/// Added in version 1.5.0. +/// +#define AZAC_ERR_UNEXPECTED_CONVERSATION_SITE_FAILURE __AZAC_ERRCODE_FAILED(0x030) + +/// +/// An unexpected error was encountered when trying to access the Conversation site. +/// Added in version 1.8.0. +/// +#define AZAC_ERR_UNEXPECTED_CONVERSATION_TRANSLATOR_SITE_FAILURE __AZAC_ERRCODE_FAILED(0x031) + +/// +/// An asynchronous operation was canceled before it was executed. +/// Added in version 1.8.0. +/// +#define AZAC_ERR_CANCELED __AZAC_ERRCODE_FAILED(0x032) + +/// +/// Codec for compression could not be initialized. +/// Added in version 1.10.0. +/// +#define AZAC_ERR_COMPRESS_AUDIO_CODEC_INITIFAILED __AZAC_ERRCODE_FAILED(0x033) + +/// +/// Data not available. +/// Added in version 1.10.0. +/// +#define AZAC_ERR_DATA_NOT_AVAILABLE __AZAC_ERRCODE_FAILED(0x034) + +/// +/// Invalid result reason. +/// Added in version 1.12.0 +/// +#define AZAC_ERR_INVALID_RESULT_REASON __AZAC_ERRCODE_FAILED(0x035) + +/// +/// An unexpected error was encountered when trying to access the RNN-T site. +/// +#define AZAC_ERR_UNEXPECTED_RNNT_SITE_FAILURE __AZAC_ERRCODE_FAILED(0x036) + +/// +/// Sending of a network message failed. +/// +#define AZAC_ERR_NETWORK_SEND_FAILED __AZAC_ERRCODE_FAILED(0x037) + +/// +/// Audio extension library not found. +/// Added in version 1.16.0. +/// +#define AZAC_ERR_AUDIO_SYS_LIBRARY_NOT_FOUND __AZAC_ERRCODE_FAILED(0x038) + +/// +/// An error in the audio-rendering system. +/// Added in version 1.20.0 +/// +#define AZAC_ERR_LOUDSPEAKER_ERROR __AZAC_ERRCODE_FAILED(0x039) + +/// +/// An unexpected error was encountered when trying to access the Vision site. +/// Added in version 1.15.0. +/// +#define AZAC_ERR_VISION_SITE_FAILURE __AZAC_ERRCODE_FAILED(0x050) + +/// +/// Stream number provided was invalid in the current context. +/// Added in version 1.15.0. +/// +#define AZAC_ERR_MEDIA_INVALID_STREAM __AZAC_ERRCODE_FAILED(0x060) + +/// +/// Offset required is invalid in the current context. +/// Added in version 1.15.0. +/// +#define AZAC_ERR_MEDIA_INVALID_OFFSET __AZAC_ERRCODE_FAILED(0x061) + +/// +/// No more data is available in source. +/// Added in version 1.15.0. +/// +#define AZAC_ERR_MEDIA_NO_MORE_DATA __AZAC_ERRCODE_FAILED(0x062) + +/// +/// Source has not been started. +/// Added in version 1.15.0. +/// +#define AZAC_ERR_MEDIA_NOT_STARTED __AZAC_ERRCODE_FAILED(0x063) + +/// +/// Source has already been started. +/// Added in version 1.15.0. +/// +#define AZAC_ERR_MEDIA_ALREADY_STARTED __AZAC_ERRCODE_FAILED(0x064) + +/// +/// Media device creation failed. +/// Added in version 1.18.0. +/// +#define AZAC_ERR_MEDIA_DEVICE_CREATION_FAILED __AZAC_ERRCODE_FAILED(0x065) + +/// +/// No devices of the selected category are available. +/// Added in version 1.18.0. +/// +#define AZAC_ERR_MEDIA_NO_DEVICE_AVAILABLE __AZAC_ERRCODE_FAILED(0x066) + +/// +/// Enabled Voice Activity Detection while using keyword recognition is not allowed. +/// +#define AZAC_ERR_VAD_COULD_NOT_USE_WITH_KEYWORD_RECOGNIZER __AZAC_ERRCODE_FAILED(0x067) + +/// +/// The specified RecoEngineAdapter could not be created. +/// +#define AZAC_ERR_COULD_NOT_CREATE_ENGINE_ADAPTER __AZAC_ERRCODE_FAILED(0x070) + +/// +/// The input file has a size of 0 bytes. +/// +#define AZAC_ERR_INPUT_FILE_SIZE_IS_ZERO_BYTES __AZAC_ERRCODE_FAILED(0x072) + +/// +/// Cannot open the input media file for reading. Does it exist? +/// +#define AZAC_ERR_FAILED_TO_OPEN_INPUT_FILE_FOR_READING __AZAC_ERRCODE_FAILED(0x073) + +/// +/// Failed to read from the input media file. +/// +#define AZAC_ERR_FAILED_TO_READ_FROM_INPUT_FILE __AZAC_ERRCODE_FAILED(0x074) + +/// +/// Input media file is too large. +/// +#define AZAC_ERR_INPUT_FILE_TOO_LARGE __AZAC_ERRCODE_FAILED(0x075) + +/// +/// The input URL is unsupported. It should start with `http://`, `https://` or `rtsp://`. +/// +#define AZAC_ERR_UNSUPPORTED_URL_PROTOCOL __AZAC_ERRCODE_FAILED(0x076) + +/// +/// The Nullable value is empty. Check HasValue() before getting the value. +/// +#define AZAC_ERR_EMPTY_NULLABLE __AZAC_ERRCODE_FAILED(0x077) + +/// +/// The given model version string is not in the expected format. The format +/// is specified by the regular expression `^(latest|\d{4}-\d{2}-\d{2})(-preview)?$`. +/// +#define AZAC_ERR_INVALID_MODEL_VERSION_FORMAT __AZAC_ERRCODE_FAILED(0x078) + +/// +/// Malformed network message +/// +#define AZAC_ERR_NETWORK_MALFORMED __AZAC_ERRCODE_FAILED(0x090) + +/// +/// Unexpected message received +/// +#define AZAC_ERR_NETWORK_PROTOCOL_VIOLATION __AZAC_ERRCODE_FAILED(0x091) diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c.h new file mode 100644 index 0000000..61a4a5b --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c.h @@ -0,0 +1,51 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c.h: Master include header for public C API declarations +// + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_audio_config.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_audio_config.h new file mode 100644 index 0000000..a8d042e --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_audio_config.h @@ -0,0 +1,27 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_audio_config.h: Public API declarations for audio configuration related C methods and types +// + +#pragma once +#include + + +SPXAPI_(bool) audio_config_is_handle_valid(SPXAUDIOCONFIGHANDLE haudioConfig); +SPXAPI audio_config_create_audio_input_from_default_microphone(SPXAUDIOCONFIGHANDLE* haudioConfig); +SPXAPI audio_config_create_audio_input_from_a_microphone(SPXAUDIOCONFIGHANDLE* haudioConfig, const char* deviceName); +SPXAPI audio_config_create_audio_input_from_wav_file_name(SPXAUDIOCONFIGHANDLE* haudioConfig, const char* fileName); +SPXAPI audio_config_create_audio_input_from_stream(SPXAUDIOCONFIGHANDLE* haudioConfig, SPXAUDIOSTREAMHANDLE haudioStream); +SPXAPI audio_config_create_push_audio_input_stream(SPXAUDIOCONFIGHANDLE* haudioConfig, SPXAUDIOSTREAMHANDLE* haudioStream, SPXAUDIOSTREAMFORMATHANDLE hformat); +SPXAPI audio_config_create_pull_audio_input_stream(SPXAUDIOCONFIGHANDLE* haudioConfig, SPXAUDIOSTREAMHANDLE* haudioStream, SPXAUDIOSTREAMFORMATHANDLE hformat); +SPXAPI audio_config_create_audio_output_from_default_speaker(SPXAUDIOCONFIGHANDLE* haudioConfig); +SPXAPI audio_config_create_audio_output_from_a_speaker(SPXAUDIOCONFIGHANDLE* haudioConfig, const char* deviceName); +SPXAPI audio_config_create_audio_output_from_wav_file_name(SPXAUDIOCONFIGHANDLE* haudioConfig, const char* fileName); +SPXAPI audio_config_create_audio_output_from_stream(SPXAUDIOCONFIGHANDLE* haudioConfig, SPXAUDIOSTREAMHANDLE haudioStream); +SPXAPI audio_config_set_audio_processing_options(SPXAUDIOCONFIGHANDLE haudioConfig, SPXAUDIOPROCESSINGOPTIONSHANDLE haudioProcessingOptions); +SPXAPI audio_config_get_audio_processing_options(SPXAUDIOCONFIGHANDLE haudioConfig, SPXAUDIOPROCESSINGOPTIONSHANDLE* haudioProcessingOptions); +SPXAPI audio_config_release(SPXAUDIOCONFIGHANDLE haudioConfig); +SPXAPI audio_config_get_property_bag(SPXAUDIOCONFIGHANDLE haudioConfig, SPXPROPERTYBAGHANDLE* hpropbag); + diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_audio_processing_options.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_audio_processing_options.h new file mode 100644 index 0000000..ece9933 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_audio_processing_options.h @@ -0,0 +1,173 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_audio_processing_options.h: Public API declarations for audio processing options related C methods and types +// + +#pragma once +#include + +/// +/// Types of preset microphone array geometries. +/// See [Microphone Array Recommendations](/azure/cognitive-services/speech-service/speech-devices-sdk-microphone) for more details. +/// +typedef enum +{ + /// + /// Indicates that no geometry specified. Speech SDK will determine the microphone array geometry. + /// + AudioProcessingOptions_PresetMicrophoneArrayGeometry_Uninitialized, + /// + /// Indicates a microphone array with one microphone in the center and six microphones evenly spaced + /// in a circle with radius approximately equal to 42.5 mm. + /// + AudioProcessingOptions_PresetMicrophoneArrayGeometry_Circular7, + /// + /// Indicates a microphone array with one microphone in the center and three microphones evenly spaced + /// in a circle with radius approximately equal to 42.5 mm. + /// + AudioProcessingOptions_PresetMicrophoneArrayGeometry_Circular4, + /// + /// Indicates a microphone array with four linearly placed microphones with 40 mm spacing between them. + /// + AudioProcessingOptions_PresetMicrophoneArrayGeometry_Linear4, + /// + /// Indicates a microphone array with two linearly placed microphones with 40 mm spacing between them. + /// + AudioProcessingOptions_PresetMicrophoneArrayGeometry_Linear2, + /// + /// Indicates a microphone array with a single microphone. + /// + AudioProcessingOptions_PresetMicrophoneArrayGeometry_Mono, + /// + /// Indicates a microphone array with custom geometry. + /// + AudioProcessingOptions_PresetMicrophoneArrayGeometry_Custom +} AudioProcessingOptions_PresetMicrophoneArrayGeometry; + +/// +/// Types of microphone arrays. +/// +typedef enum +{ + AudioProcessingOptions_MicrophoneArrayType_Linear, + AudioProcessingOptions_MicrophoneArrayType_Planar +} AudioProcessingOptions_MicrophoneArrayType; + +/// +/// Defines speaker reference channel position in input audio. +/// +typedef enum +{ + /// + /// Indicates that the input audio does not have a speaker reference channel. + /// + AudioProcessingOptions_SpeakerReferenceChannel_None, + /// + /// Indicates that the last channel in the input audio corresponds to the speaker + /// reference for echo cancellation. + /// + AudioProcessingOptions_SpeakerReferenceChannel_LastChannel +} AudioProcessingOptions_SpeakerReferenceChannel; + +#pragma pack(push, 1) + +/// +/// Represents coordinates of a microphone. +/// +typedef struct +{ + /// + /// X-coordinate of the microphone in millimeters. + /// + int X; + /// + /// Y-coordinate of the microphone in millimeters. + /// + int Y; + /// + /// Z-coordinate of the microphone in millimeters. + /// + int Z; +} AudioProcessingOptions_MicrophoneCoordinates; + +/// +/// Represents the geometry of a microphone array. +/// +typedef struct +{ + /// + /// Type of microphone array. + /// + AudioProcessingOptions_MicrophoneArrayType microphoneArrayType; + /// + /// Start angle for beamforming in degrees. + /// + uint16_t beamformingStartAngle; + /// + /// End angle for beamforming in degrees. + /// + uint16_t beamformingEndAngle; + /// + /// Number of microphones in the microphone array. + /// + uint16_t numberOfMicrophones; + /// + /// Coordinates of microphones in the microphone array. + /// + AudioProcessingOptions_MicrophoneCoordinates* microphoneCoordinates; +} AudioProcessingOptions_MicrophoneArrayGeometry; + +#pragma pack(pop) + +/// +/// Disables built-in input audio processing. +/// +const int AUDIO_INPUT_PROCESSING_NONE = 0x00000000; +/// +/// Enables default built-in input audio processing. +/// +const int AUDIO_INPUT_PROCESSING_ENABLE_DEFAULT = 0x00000001; +/// +/// Disables dereverberation in the default audio processing pipeline. +/// +const int AUDIO_INPUT_PROCESSING_DISABLE_DEREVERBERATION = 0x00000002; +/// +/// Disables noise suppression in the default audio processing pipeline. +/// +const int AUDIO_INPUT_PROCESSING_DISABLE_NOISE_SUPPRESSION = 0x00000004; +/// +/// Disables automatic gain control in the default audio processing pipeline. +/// +const int AUDIO_INPUT_PROCESSING_DISABLE_GAIN_CONTROL = 0x00000008; +/// +/// Disables echo cancellation in the default audio processing pipeline. +/// +const int AUDIO_INPUT_PROCESSING_DISABLE_ECHO_CANCELLATION = 0x00000010; +/// +/// Enables voice activity detection in input audio processing. +/// +const int AUDIO_INPUT_PROCESSING_ENABLE_VOICE_ACTIVITY_DETECTION = 0x00000020; +/// +/// Enables the new version (V2) of input audio processing with improved echo cancellation performance. +/// This flag is mutually exclusive with AUDIO_INPUT_PROCESSING_ENABLE_DEFAULT flag. +/// AUDIO_INPUT_PROCESSING_DISABLE_* flags do not affect this pipeline. +/// This feature is currently in preview and only available for Windows x64 and ARM64 platform. +/// +const int AUDIO_INPUT_PROCESSING_ENABLE_V2 = 0x00000040; + +SPXAPI_(bool) audio_processing_options_is_handle_valid(SPXAUDIOPROCESSINGOPTIONSHANDLE hoptions); +SPXAPI audio_processing_options_create(SPXAUDIOPROCESSINGOPTIONSHANDLE* hoptions, int audioProcessingFlags); +SPXAPI audio_processing_options_create_from_preset_microphone_array_geometry(SPXAUDIOPROCESSINGOPTIONSHANDLE* hoptions, int audioProcessingFlags, AudioProcessingOptions_PresetMicrophoneArrayGeometry microphoneArrayGeometry, AudioProcessingOptions_SpeakerReferenceChannel speakerReferenceChannel); +SPXAPI audio_processing_options_create_from_microphone_array_geometry(SPXAUDIOPROCESSINGOPTIONSHANDLE* hoptions, int audioProcessingFlags, const AudioProcessingOptions_MicrophoneArrayGeometry* microphoneArrayGeometry, AudioProcessingOptions_SpeakerReferenceChannel speakerReferenceChannel); +SPXAPI audio_processing_options_get_audio_processing_flags(SPXAUDIOPROCESSINGOPTIONSHANDLE hoptions, int* audioProcessingFlags); +SPXAPI audio_processing_options_get_preset_microphone_array_geometry(SPXAUDIOPROCESSINGOPTIONSHANDLE hoptions, AudioProcessingOptions_PresetMicrophoneArrayGeometry* microphoneArrayGeometry); +SPXAPI audio_processing_options_get_microphone_array_type(SPXAUDIOPROCESSINGOPTIONSHANDLE hoptions, AudioProcessingOptions_MicrophoneArrayType* microphoneArrayType); +SPXAPI audio_processing_options_get_beamforming_start_angle(SPXAUDIOPROCESSINGOPTIONSHANDLE hoptions, uint16_t* startAngle); +SPXAPI audio_processing_options_get_beamforming_end_angle(SPXAUDIOPROCESSINGOPTIONSHANDLE hoptions, uint16_t* endAngle); +SPXAPI audio_processing_options_get_microphone_count(SPXAUDIOPROCESSINGOPTIONSHANDLE hoptions, uint16_t* microphoneCount); +SPXAPI audio_processing_options_get_microphone_coordinates(SPXAUDIOPROCESSINGOPTIONSHANDLE hoptions, AudioProcessingOptions_MicrophoneCoordinates* microphoneCoordinates, uint16_t microphoneCount); +SPXAPI audio_processing_options_get_speaker_reference_channel(SPXAUDIOPROCESSINGOPTIONSHANDLE hoptions, AudioProcessingOptions_SpeakerReferenceChannel* speakerReferenceChannel); +SPXAPI audio_processing_options_release(SPXAUDIOPROCESSINGOPTIONSHANDLE hoptions); +SPXAPI audio_processing_options_get_property_bag(SPXAUDIOPROCESSINGOPTIONSHANDLE hoptions, SPXPROPERTYBAGHANDLE* hpropbag); diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_audio_stream.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_audio_stream.h new file mode 100644 index 0000000..0056480 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_audio_stream.h @@ -0,0 +1,67 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_audio_stream.h: Public API declarations for audio stream related C methods and types +// + +#pragma once +#include +#include + +typedef enum +{ + StreamStatus_Unknown = 0, + StreamStatus_NoData = 1, + StreamStatus_PartialData = 2, + StreamStatus_AllData = 3, + StreamStatus_Canceled = 4 +} Stream_Status; + +// audio_stream +SPXAPI_(bool) audio_stream_is_handle_valid(SPXAUDIOSTREAMHANDLE haudioStream); +SPXAPI audio_stream_create_push_audio_input_stream(SPXAUDIOSTREAMHANDLE* haudioStream, SPXAUDIOSTREAMFORMATHANDLE hformat); +SPXAPI audio_stream_create_pull_audio_input_stream(SPXAUDIOSTREAMHANDLE* haudioStream, SPXAUDIOSTREAMFORMATHANDLE hformat); +SPXAPI audio_stream_create_pull_audio_output_stream(SPXAUDIOSTREAMHANDLE* haudioStream); +SPXAPI audio_stream_create_push_audio_output_stream(SPXAUDIOSTREAMHANDLE* haudioStream); +SPXAPI audio_stream_release(SPXAUDIOSTREAMHANDLE haudioStream); + +// pull_audio_input_stream +typedef int (*CUSTOM_AUDIO_PULL_STREAM_READ_CALLBACK)(void* pvContext, uint8_t* buffer, uint32_t size); +typedef void (*CUSTOM_AUDIO_PULL_STREAM_CLOSE_CALLBACK)(void* pvContext); +typedef void (*CUSTOM_AUDIO_PULL_STREAM_GET_PROPERTY_CALLBACK)(void* pvContext, int id, uint8_t* value, uint32_t size); +SPXAPI pull_audio_input_stream_set_callbacks(SPXAUDIOSTREAMHANDLE haudioStream, void* pvContext, CUSTOM_AUDIO_PULL_STREAM_READ_CALLBACK readCallback, CUSTOM_AUDIO_PULL_STREAM_CLOSE_CALLBACK closeCallback); +SPXAPI pull_audio_input_stream_set_getproperty_callback(SPXAUDIOSTREAMHANDLE haudioStream, void* pvContext, CUSTOM_AUDIO_PULL_STREAM_GET_PROPERTY_CALLBACK getPropertyCallback); + +// push_audio_input_stream +SPXAPI push_audio_input_stream_write(SPXAUDIOSTREAMHANDLE haudioStream, uint8_t* buffer, uint32_t size); +SPXAPI push_audio_input_stream_close(SPXAUDIOSTREAMHANDLE haudioStream); +SPXAPI push_audio_input_stream_set_property_by_id(SPXAUDIOSTREAMHANDLE haudioStream, int id, const char* value); +SPXAPI push_audio_input_stream_set_property_by_name(SPXAUDIOSTREAMHANDLE haudioStream, const char* name, const char* value); + +// pull audio output stream +SPXAPI pull_audio_output_stream_read(SPXAUDIOSTREAMHANDLE haudioStream, uint8_t* buffer, uint32_t bufferSize, uint32_t* pfilledSize); + +// push_audio_output_stream +typedef int(*CUSTOM_AUDIO_PUSH_STREAM_WRITE_CALLBACK)(void* pvContext, uint8_t* buffer, uint32_t size); +typedef void(*CUSTOM_AUDIO_PUSH_STREAM_CLOSE_CALLBACK)(void* pvContext); +SPXAPI push_audio_output_stream_set_callbacks(SPXAUDIOSTREAMHANDLE haudioStream, void* pvContext, CUSTOM_AUDIO_PUSH_STREAM_WRITE_CALLBACK writeCallback, CUSTOM_AUDIO_PUSH_STREAM_CLOSE_CALLBACK closeCallback); + +// audio data stream +SPXAPI_(bool) audio_data_stream_is_handle_valid(SPXAUDIOSTREAMHANDLE haudioStream); +SPXAPI audio_data_stream_create_from_file(SPXAUDIOSTREAMHANDLE* haudioStream, const char* fileName); +SPXAPI audio_data_stream_create_from_result(SPXAUDIOSTREAMHANDLE* haudioStream, SPXRESULTHANDLE hresult); +SPXAPI audio_data_stream_create_from_keyword_result(SPXAUDIOSTREAMHANDLE* audioStreamHandle, SPXRESULTHANDLE resultHandle); +SPXAPI audio_data_stream_get_status(SPXAUDIOSTREAMHANDLE haudioStream, Stream_Status* status); +SPXAPI audio_data_stream_get_reason_canceled(SPXAUDIOSTREAMHANDLE haudioStream, Result_CancellationReason* reason); +SPXAPI audio_data_stream_get_canceled_error_code(SPXAUDIOSTREAMHANDLE haudioStream, Result_CancellationErrorCode* errorCode); +SPXAPI_(bool) audio_data_stream_can_read_data(SPXAUDIOSTREAMHANDLE haudioStream, uint32_t requestedSize); +SPXAPI_(bool) audio_data_stream_can_read_data_from_position(SPXAUDIOSTREAMHANDLE haudioStream, uint32_t requestedSize, uint32_t position); +SPXAPI audio_data_stream_read(SPXAUDIOSTREAMHANDLE haudioStream, uint8_t* buffer, uint32_t bufferSize, uint32_t* pfilledSize); +SPXAPI audio_data_stream_read_from_position(SPXAUDIOSTREAMHANDLE haudioStream, uint8_t* buffer, uint32_t bufferSize, uint32_t position, uint32_t* pfilledSize); +SPXAPI audio_data_stream_save_to_wave_file(SPXAUDIOSTREAMHANDLE haudioStream, const char* fileName); +SPXAPI audio_data_stream_get_position(SPXAUDIOSTREAMHANDLE haudioStream, uint32_t* position); +SPXAPI audio_data_stream_set_position(SPXAUDIOSTREAMHANDLE haudioStream, uint32_t position); +SPXAPI audio_data_stream_detach_input(SPXAUDIOSTREAMHANDLE audioStreamHandle); +SPXAPI audio_data_stream_get_property_bag(SPXAUDIOSTREAMHANDLE haudioStream, SPXPROPERTYBAGHANDLE* hpropbag); +SPXAPI audio_data_stream_release(SPXAUDIOSTREAMHANDLE haudioStream); diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_audio_stream_format.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_audio_stream_format.h new file mode 100644 index 0000000..ac1e8e4 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_audio_stream_format.h @@ -0,0 +1,93 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_audio_stream_format.h: Public API declarations for audio stream format related C methods and types +// + +#pragma once +#include + +/// +/// Defines supported audio stream container format. +/// Changed in version 1.4.0. +/// +enum Audio_Stream_Container_Format +{ + /// + /// Stream ContainerFormat definition for OGG OPUS. + /// + StreamFormat_Ogg_Opus = 0x101, + + /// + /// Stream ContainerFormat definition for MP3. + /// + StreamFormat_Mp3 = 0x102, + + /// + /// Stream ContainerFormat definition for FLAC. Added in version 1.7.0. + /// + StreamFormat_Flac = 0x103, + + /// + /// Stream ContainerFormat definition for ALAW. Added in version 1.7.0. + /// + StreamFormat_Alaw = 0x104, + + /// + /// Stream ContainerFormat definition for MULAW. Added in version 1.7.0. + /// + StreamFormat_Mulaw = 0x105, + + /// + /// Stream ContainerFormat definition for AMRNB. Currently not supported. + /// + StreamFormat_Amrnb = 0x106, + + /// + /// Stream ContainerFormat definition for AMRWB. Currently not supported. + /// + StreamFormat_Amrwb = 0x107, + + /// + /// Stream ContainerFormat definition for any other or unknown format. + /// + StreamFormat_Any = 0x108, +}; + +/// +/// Defines supported audio stream wave format in WAV container. +/// +enum Audio_Stream_Wave_Format +{ + /// + /// Stream WaveFormat definition for PCM (pulse-code modulated) data in integer format. + /// + StreamWaveFormat_PCM = 0x0001, + + /// + /// Stream WaveFormat definition for A-law-encoded format. + /// + StreamWaveFormat_ALAW = 0x0006, + + /// + /// Stream WaveFormat definition for Mu-law-encoded format. + /// + StreamWaveFormat_MULAW = 0x0007, + + /// + /// Stream WaveFormat definition for G.722-encoded format. + /// + StreamWaveFormat_G722 = 0x028F +}; + +typedef enum Audio_Stream_Container_Format Audio_Stream_Container_Format; +typedef enum Audio_Stream_Wave_Format Audio_Stream_Wave_Format; + +SPXAPI_(bool) audio_stream_format_is_handle_valid(SPXAUDIOSTREAMFORMATHANDLE hformat); +SPXAPI audio_stream_format_create_from_default_input(SPXAUDIOSTREAMFORMATHANDLE* hformat); +SPXAPI audio_stream_format_create_from_waveformat(SPXAUDIOSTREAMFORMATHANDLE* hformat, uint32_t samplesPerSecond, uint8_t bitsPerSample, uint8_t channels, Audio_Stream_Wave_Format waveFormat); +SPXAPI audio_stream_format_create_from_waveformat_pcm(SPXAUDIOSTREAMFORMATHANDLE* hformat, uint32_t samplesPerSecond, uint8_t bitsPerSample, uint8_t channels); +SPXAPI audio_stream_format_create_from_default_output(SPXAUDIOSTREAMFORMATHANDLE* hformat); +SPXAPI audio_stream_format_create_from_compressed_format(SPXAUDIOSTREAMFORMATHANDLE* hformat, Audio_Stream_Container_Format compressedFormat); +SPXAPI audio_stream_format_release(SPXAUDIOSTREAMFORMATHANDLE hformat); diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_auto_detect_source_lang_config.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_auto_detect_source_lang_config.h new file mode 100644 index 0000000..f62c5eb --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_auto_detect_source_lang_config.h @@ -0,0 +1,15 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include + +SPXAPI create_auto_detect_source_lang_config_from_open_range(SPXAUTODETECTSOURCELANGCONFIGHANDLE* hAutoDetectSourceLanguageconfig); +SPXAPI create_auto_detect_source_lang_config_from_languages(SPXAUTODETECTSOURCELANGCONFIGHANDLE* hAutoDetectSourceLanguageconfig, const char* languages); +SPXAPI create_auto_detect_source_lang_config_from_source_lang_config(SPXAUTODETECTSOURCELANGCONFIGHANDLE* hAutoDetectSourceLanguageconfig, SPXSOURCELANGCONFIGHANDLE hSourceLanguageConfig); +SPXAPI add_source_lang_config_to_auto_detect_source_lang_config(SPXAUTODETECTSOURCELANGCONFIGHANDLE hAutoDetectSourceLanguageconfig, SPXSOURCELANGCONFIGHANDLE hSourceLanguageConfig); +SPXAPI_(bool) auto_detect_source_lang_config_is_handle_valid(SPXAUTODETECTSOURCELANGCONFIGHANDLE hAutoDetectSourceLanguageconfig); +SPXAPI auto_detect_source_lang_config_release(SPXAUTODETECTSOURCELANGCONFIGHANDLE hAutoDetectSourceLanguageconfig); +SPXAPI auto_detect_source_lang_config_get_property_bag(SPXAUTODETECTSOURCELANGCONFIGHANDLE hAutoDetectSourceLanguageconfig, SPXPROPERTYBAGHANDLE* hpropbag); diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_common.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_common.h new file mode 100644 index 0000000..ebfb802 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_common.h @@ -0,0 +1,81 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_common.h: Public API declarations for global C definitions and typedefs +// + +#pragma once + +#include +#include // must include after spxdebug.h or speechapi*.h (can NOT be included before) +#include +#include + +#define SPX_EXTERN_C AZAC_EXTERN_C +#ifndef SPXAPI_EXPORT +#define SPXAPI_EXPORT AZAC_API_EXPORT +#endif + +#define SPXAPI_NOTHROW AZAC_API_NOTHROW +#define SPXAPI_RESULTTYPE SPXHR +#define SPXAPI_CALLTYPE AZAC_API_CALLTYPE +#define SPXAPI_VCALLTYPE AZAC_VCALLTYPE + +#define SPXDLL_EXPORT AZAC_DLL_EXPORT + +#define SPXAPI SPX_EXTERN_C SPXAPI_EXPORT SPXAPI_RESULTTYPE SPXAPI_NOTHROW SPXAPI_CALLTYPE +#define SPXAPI_(type) SPX_EXTERN_C SPXAPI_EXPORT type SPXAPI_NOTHROW SPXAPI_CALLTYPE +#define SPXAPI__(type) SPX_EXTERN_C SPXAPI_EXPORT SPXAPI_NOTHROW type SPXAPI_CALLTYPE + +#define SPXAPIV SPX_EXTERN_C SPXAPI_EXPORT SPXAPI_NOTHROW SPXAPI_RESULTTYPE SPXAPI_VCALLTYPE +#define SPXAPIV_(type) SPX_EXTERN_C SPXAPI_EXPORT SPXAPI_NOTHROW type SPXAPI_VCALLTYPE + +#define SPXAPI_PRIVATE SPX_EXTERN_C SPXAPI_RESULTTYPE SPXAPI_NOTHROW SPXAPI_CALLTYPE +#define SPXAPI_PRIVATE_(type) SPX_EXTERN_C type SPXAPI_NOTHROW SPXAPI_CALLTYPE + +#define _spx_empty _azac_empty +#define _spxhandle _azac_handle +#define SPXHANDLE AZAC_HANDLE +#define SPXERRORHANDLE AZAC_HANDLE + +#define SPXPROPERTYBAGHANDLE AZAC_HANDLE +typedef SPXHANDLE SPXASYNCHANDLE; +typedef SPXHANDLE SPXFACTORYHANDLE; +typedef SPXHANDLE SPXRECOHANDLE; +typedef SPXHANDLE SPXSYNTHHANDLE; +typedef SPXHANDLE SPXRESULTHANDLE; +typedef SPXHANDLE SPXEVENTHANDLE; +typedef SPXHANDLE SPXSESSIONHANDLE; +typedef SPXHANDLE SPXTRIGGERHANDLE; +typedef SPXHANDLE SPXLUMODELHANDLE; +typedef SPXHANDLE SPXKEYWORDHANDLE; +typedef SPXHANDLE SPXAUDIOSTREAMFORMATHANDLE; +typedef SPXHANDLE SPXAUDIOSTREAMHANDLE; +typedef SPXHANDLE SPXAUDIOCONFIGHANDLE; +typedef SPXHANDLE SPXSPEECHCONFIGHANDLE; +typedef SPXHANDLE SPXCONNECTIONHANDLE; +typedef SPXHANDLE SPXCONNECTIONMESSAGEHANDLE; +typedef SPXHANDLE SPXACTIVITYHANDLE; +typedef SPXHANDLE SPXACTIVITYJSONHANDLE; +typedef SPXHANDLE SPXGRAMMARHANDLE; +typedef SPXHANDLE SPXPHRASEHANDLE; +typedef SPXHANDLE SPXUSERHANDLE; +typedef SPXHANDLE SPXPARTICIPANTHANDLE; +typedef SPXHANDLE SPXAUTODETECTSOURCELANGCONFIGHANDLE; +typedef SPXHANDLE SPXSOURCELANGCONFIGHANDLE; +typedef SPXHANDLE SPXCONVERSATIONHANDLE; +typedef SPXHANDLE SPXMEETINGHANDLE; +typedef SPXHANDLE SPXCONVERSATIONTRANSLATORHANDLE; +typedef SPXHANDLE SPXVOICEPROFILECLIENTHANDLE; +typedef SPXHANDLE SPXVOICEPROFILEHANDLE; +typedef SPXHANDLE SPXSPEAKERIDHANDLE; +typedef SPXHANDLE SPXSIMODELHANDLE; +typedef SPXHANDLE SPXSVMODELHANDLE; +typedef SPXHANDLE SPXPRONUNCIATIONASSESSMENTCONFIGHANDLE; +typedef SPXHANDLE SPXAUDIOPROCESSINGOPTIONSHANDLE; +typedef SPXHANDLE SPXSPEECHRECOMODELHANDLE; +typedef SPXHANDLE SPXREQUESTHANDLE; + +#define SPXHANDLE_INVALID ((SPXHANDLE)-1) +#define SPXHANDLE_RESERVED1 ((SPXHANDLE)+1) diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_connection.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_connection.h new file mode 100644 index 0000000..3bb69c9 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_connection.h @@ -0,0 +1,46 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include + +SPXAPI connection_from_recognizer(SPXRECOHANDLE recognizerHandle, SPXCONNECTIONHANDLE* connectionHandle); +SPXAPI connection_from_conversation_translator(SPXCONVERSATIONTRANSLATORHANDLE convTransHandle, SPXCONNECTIONHANDLE* connectionHandle); +SPXAPI connection_from_dialog_service_connector(SPXRECOHANDLE convTransHandle, SPXCONNECTIONHANDLE* connectionHandle); +SPXAPI connection_from_speech_synthesizer(SPXSYNTHHANDLE synthesizerHandle, SPXCONNECTIONHANDLE* connectionHandle); + +SPXAPI_(bool) connection_handle_is_valid(SPXCONNECTIONHANDLE handle); +SPXAPI connection_handle_release(SPXCONNECTIONHANDLE handle); +SPXAPI connection_async_handle_release(SPXASYNCHANDLE hasync); + +SPXAPI connection_open(SPXCONNECTIONHANDLE handle, bool forContinuousRecognition); +SPXAPI connection_close(SPXCONNECTIONHANDLE handle); +SPXAPI connection_set_message_property(SPXCONNECTIONHANDLE handle, const char* path, const char* name, const char* value); +SPXAPI connection_send_message(SPXCONNECTIONHANDLE handle, const char* path, const char* payload); +SPXAPI connection_send_message_async(SPXCONNECTIONHANDLE handle, const char* path, const char* payload, SPXASYNCHANDLE* phasync); + +SPXAPI connection_send_message_data(SPXCONNECTIONHANDLE handle, const char* path, uint8_t* data, uint32_t size); +SPXAPI connection_send_message_data_async(SPXCONNECTIONHANDLE handle, const char* path, uint8_t* data, uint32_t size, SPXASYNCHANDLE* phasync); + +SPXAPI connection_send_message_wait_for(SPXASYNCHANDLE hasync, uint32_t milliseconds); + +SPXAPI connection_get_property_bag(SPXRECOHANDLE hconn, SPXPROPERTYBAGHANDLE* hpropbag); + +typedef void(*CONNECTION_CALLBACK_FUNC)(SPXEVENTHANDLE event, void* context); +SPXAPI connection_connected_set_callback(SPXCONNECTIONHANDLE connection, CONNECTION_CALLBACK_FUNC callback, void* context); +SPXAPI connection_disconnected_set_callback(SPXCONNECTIONHANDLE connection, CONNECTION_CALLBACK_FUNC callback, void* context); +SPXAPI connection_message_received_set_callback(SPXCONNECTIONHANDLE connection, CONNECTION_CALLBACK_FUNC callback, void* context); + +SPXAPI_(bool) connection_message_received_event_handle_is_valid(SPXEVENTHANDLE hevent); +SPXAPI connection_message_received_event_handle_release(SPXEVENTHANDLE hevent); + +SPXAPI connection_message_received_event_get_message(SPXEVENTHANDLE hevent, SPXCONNECTIONMESSAGEHANDLE* hcm); + +SPXAPI_(bool) connection_message_handle_is_valid(SPXCONNECTIONMESSAGEHANDLE handle); +SPXAPI connection_message_handle_release(SPXCONNECTIONMESSAGEHANDLE handle); + +SPXAPI connection_message_get_property_bag(SPXCONNECTIONMESSAGEHANDLE hcm, SPXPROPERTYBAGHANDLE* hpropbag); +SPXAPI connection_message_get_data(SPXCONNECTIONMESSAGEHANDLE hcm, uint8_t* data, uint32_t size); +SPXAPI_(uint32_t) connection_message_get_data_size(SPXCONNECTIONMESSAGEHANDLE hcm); diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_conversation.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_conversation.h new file mode 100644 index 0000000..be12f34 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_conversation.h @@ -0,0 +1,28 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_conversation.h: Public API declarations for conversation related C methods and typedefs +// + +#pragma once +#include + +SPXAPI conversation_create_from_config(SPXCONVERSATIONHANDLE* phconv, SPXSPEECHCONFIGHANDLE hspeechconfig, const char* id); +SPXAPI conversation_update_participant_by_user_id(SPXCONVERSATIONHANDLE hconv, bool add, const char* userId); +SPXAPI conversation_update_participant_by_user(SPXCONVERSATIONHANDLE hconv, bool add, SPXUSERHANDLE huser); +SPXAPI conversation_update_participant(SPXCONVERSATIONHANDLE hconv, bool add, SPXPARTICIPANTHANDLE hparticipant); +SPXAPI conversation_get_conversation_id(SPXCONVERSATIONHANDLE hconv, char* id, size_t size); +SPXAPI conversation_end_conversation(SPXCONVERSATIONHANDLE hconv); +SPXAPI conversation_get_property_bag(SPXCONVERSATIONHANDLE hconv, SPXPROPERTYBAGHANDLE* phpropbag); +SPXAPI conversation_release_handle(SPXHANDLE handle); + +SPXAPI conversation_start_conversation(SPXCONVERSATIONHANDLE hconv); +SPXAPI conversation_delete_conversation(SPXCONVERSATIONHANDLE hconv); +SPXAPI conversation_lock_conversation(SPXCONVERSATIONHANDLE hconv); +SPXAPI conversation_unlock_conversation(SPXCONVERSATIONHANDLE hconv); +SPXAPI conversation_mute_all_participants(SPXCONVERSATIONHANDLE hconv); +SPXAPI conversation_unmute_all_participants(SPXCONVERSATIONHANDLE hconv); +SPXAPI conversation_mute_participant(SPXCONVERSATIONHANDLE hconv, const char * participantId); +SPXAPI conversation_unmute_participant(SPXCONVERSATIONHANDLE hconv, const char * participantId); + diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_conversation_transcription_result.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_conversation_transcription_result.h new file mode 100644 index 0000000..7bcc7b1 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_conversation_transcription_result.h @@ -0,0 +1,11 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_conversation_transcriber_result.h: Public API declarations for ConversationTranscriberResult related C methods and enumerations +// + +#pragma once +#include + +SPXAPI conversation_transcription_result_get_speaker_id(SPXRESULTHANDLE hresult, char* pszSpeakerId, uint32_t cchSpeakerId); diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_conversation_translator.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_conversation_translator.h new file mode 100644 index 0000000..0b1881b --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_conversation_translator.h @@ -0,0 +1,63 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_conversation_translator.h: Public API declarations for conversation translator related C methods and typedefs +// + +#pragma once +#include +#include + +#ifdef __cplusplus +#include +typedef Microsoft::CognitiveServices::Speech::Transcription::ParticipantChangedReason ParticipantChangedReason; +#else +#include +#endif + +typedef void(*PCONV_TRANS_CALLBACK)(SPXCONVERSATIONTRANSLATORHANDLE hConvTrans, SPXEVENTHANDLE hEvent, void* pvContext); + +SPXAPI conversation_translator_create_from_config(SPXCONVERSATIONTRANSLATORHANDLE* phandle, SPXAUDIOCONFIGHANDLE haudioinput); +SPXAPI conversation_translator_get_property_bag(SPXCONVERSATIONTRANSLATORHANDLE hconvtranslator, SPXPROPERTYBAGHANDLE* phpropertyBag); + +SPXAPI conversation_translator_join(SPXCONVERSATIONTRANSLATORHANDLE hconvtranslator, SPXCONVERSATIONHANDLE hconv, const char* psznickname); +SPXAPI conversation_translator_join_with_id(SPXCONVERSATIONTRANSLATORHANDLE hconvtranslator, const char *pszconversationid, const char* psznickname, const char * pszlang); +SPXAPI conversation_translator_start_transcribing(SPXCONVERSATIONTRANSLATORHANDLE hconvtranslator); +SPXAPI conversation_translator_stop_transcribing(SPXCONVERSATIONTRANSLATORHANDLE hconvtranslator); +SPXAPI conversation_translator_send_text_message(SPXCONVERSATIONTRANSLATORHANDLE hconvtranslator, const char *pszmessage); +SPXAPI conversation_translator_leave(SPXCONVERSATIONTRANSLATORHANDLE hconvtranslator); +SPXAPI conversation_translator_set_authorization_token(SPXCONVERSATIONTRANSLATORHANDLE hconvtranslator, const char* pszAuthToken, const char* pszRegion); + +SPXAPI_(bool) conversation_translator_handle_is_valid(SPXCONVERSATIONTRANSLATORHANDLE handle); +SPXAPI conversation_translator_handle_release(SPXHANDLE handle); + +SPXAPI conversation_translator_session_started_set_callback(SPXCONVERSATIONTRANSLATORHANDLE hConvTrans, PCONV_TRANS_CALLBACK pCallback, void* pvContext); +SPXAPI conversation_translator_session_stopped_set_callback(SPXCONVERSATIONTRANSLATORHANDLE hConvTrans, PCONV_TRANS_CALLBACK pCallback, void* pvContext); +SPXAPI conversation_translator_canceled_set_callback(SPXCONVERSATIONTRANSLATORHANDLE hConvTrans, PCONV_TRANS_CALLBACK pCallback, void* pvContext); +SPXAPI conversation_translator_participants_changed_set_callback(SPXCONVERSATIONTRANSLATORHANDLE hConvTrans, PCONV_TRANS_CALLBACK pCallback, void* pvContext); +SPXAPI conversation_translator_conversation_expiration_set_callback(SPXCONVERSATIONTRANSLATORHANDLE hConvTrans, PCONV_TRANS_CALLBACK pCallback, void* pvContext); +SPXAPI conversation_translator_transcribing_set_callback(SPXCONVERSATIONTRANSLATORHANDLE hConvTrans, PCONV_TRANS_CALLBACK pCallback, void* pvContext); +SPXAPI conversation_translator_transcribed_set_callback(SPXCONVERSATIONTRANSLATORHANDLE hConvTrans, PCONV_TRANS_CALLBACK pCallback, void* pvContext); +SPXAPI conversation_translator_text_message_recevied_set_callback(SPXCONVERSATIONTRANSLATORHANDLE hConvTrans, PCONV_TRANS_CALLBACK pCallback, void* pvContext); + +SPXAPI conversation_translator_connection_connected_set_callback(SPXCONNECTIONHANDLE hConnection, CONNECTION_CALLBACK_FUNC pCallback, void * pvContext); +SPXAPI conversation_translator_connection_disconnected_set_callback(SPXCONNECTIONHANDLE hConnection, CONNECTION_CALLBACK_FUNC pCallback, void * pvContext); + +SPXAPI_(bool) conversation_translator_event_handle_is_valid(SPXCONVERSATIONTRANSLATORHANDLE handle); +SPXAPI conversation_translator_event_handle_release(SPXHANDLE handle); + +SPXAPI conversation_translator_event_get_expiration_time(SPXEVENTHANDLE hevent, int32_t* pexpirationminutes); +SPXAPI conversation_translator_event_get_participant_changed_reason(SPXEVENTHANDLE hevent, ParticipantChangedReason* preason); +SPXAPI conversation_translator_event_get_participant_changed_at_index(SPXEVENTHANDLE hevent, int index, SPXPARTICIPANTHANDLE* phparticipant); + +SPXAPI conversation_translator_result_get_user_id(SPXRESULTHANDLE hresult, char* pszUserId, uint32_t cchUserId); + +SPXAPI conversation_translator_result_get_original_lang(SPXRESULTHANDLE hresult, char * psz, uint32_t * pcch); + +SPXAPI conversation_translator_participant_get_avatar(SPXEVENTHANDLE hevent, char * psz, uint32_t * pcch); +SPXAPI conversation_translator_participant_get_displayname(SPXEVENTHANDLE hevent, char * psz, uint32_t * pcch); +SPXAPI conversation_translator_participant_get_id(SPXEVENTHANDLE hevent, char * psz, uint32_t * pcch); +SPXAPI conversation_translator_participant_get_is_muted(SPXEVENTHANDLE hevent, bool * pMuted); +SPXAPI conversation_translator_participant_get_is_host(SPXEVENTHANDLE hevent, bool * pIsHost); +SPXAPI conversation_translator_participant_get_is_using_tts(SPXEVENTHANDLE hevent, bool * ptts); diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_diagnostics.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_diagnostics.h new file mode 100644 index 0000000..25b1a73 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_diagnostics.h @@ -0,0 +1,8 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include +#include // must include after spxdebug.h or speechapi*.h (can NOT be included before) diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_dialog_service_config.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_dialog_service_config.h new file mode 100644 index 0000000..4fb8b35 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_dialog_service_config.h @@ -0,0 +1,15 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_dialog_service_config.h: Public API declarations for dialog service connector configuration related C methods and types +// +#pragma once + +#include + +SPXAPI bot_framework_config_from_subscription(SPXSPEECHCONFIGHANDLE* ph_config, const char* subscription, const char* region, const char *bot_Id); +SPXAPI bot_framework_config_from_authorization_token(SPXSPEECHCONFIGHANDLE* ph_config, const char* auth_token, const char* region, const char* bot_Id); + +SPXAPI custom_commands_config_from_subscription(SPXSPEECHCONFIGHANDLE* ph_dialog_service_config, const char* app_id, const char *subscription, const char* region); +SPXAPI custom_commands_config_from_authorization_token(SPXSPEECHCONFIGHANDLE* ph_dialog_service_config, const char* app_id, const char *auth_token, const char* region); diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_dialog_service_connector.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_dialog_service_connector.h new file mode 100644 index 0000000..94e0fbf --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_dialog_service_connector.h @@ -0,0 +1,92 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_dialog_service_connector.h: Public API declaration for Dialog Service Connector related C methods. +// + +#pragma once +#include + +SPXAPI_(bool) dialog_service_connector_handle_is_valid(SPXRECOHANDLE h_connector); +SPXAPI dialog_service_connector_handle_release(SPXRECOHANDLE h_connector); + +SPXAPI_(bool) dialog_service_connector_async_handle_is_valid(SPXASYNCHANDLE h_async); +SPXAPI dialog_service_connector_async_handle_release(SPXASYNCHANDLE h_async); + +SPXAPI_(bool) dialog_service_connector_async_void_handle_is_valid(SPXASYNCHANDLE h_async); +SPXAPI dialog_service_connector_async_void_handle_release(SPXASYNCHANDLE h_async); + +SPXAPI_(bool) dialog_service_connector_async_string_handle_is_valid(SPXASYNCHANDLE h_async); +SPXAPI dialog_service_connector_async_string_handle_release(SPXASYNCHANDLE h_async); + +SPXAPI_(bool) dialog_service_connector_async_reco_result_handle_is_valid(SPXASYNCHANDLE h_async); +SPXAPI dialog_service_connector_async_reco_result_handle_release(SPXASYNCHANDLE h_async); + +SPXAPI_(bool) dialog_service_connector_activity_received_event_handle_is_valid(SPXEVENTHANDLE h_event); +SPXAPI dialog_service_connector_activity_received_event_release(SPXEVENTHANDLE h_event); + +SPXAPI_(bool) dialog_service_connector_turn_status_received_handle_is_valid(SPXEVENTHANDLE h_event); +SPXAPI dialog_service_connector_turn_status_received_release(SPXEVENTHANDLE h_event); + +SPXAPI dialog_service_connector_get_property_bag(SPXRECOHANDLE h_connector, SPXPROPERTYBAGHANDLE* h_prop_bag); + +SPXAPI dialog_service_connector_connect(SPXRECOHANDLE h_connector); +SPXAPI dialog_service_connector_connect_async(SPXRECOHANDLE h_connector, SPXASYNCHANDLE* p_async); +SPXAPI dialog_service_connector_connect_async_wait_for(SPXASYNCHANDLE h_async, uint32_t milliseconds); + +SPXAPI dialog_service_connector_disconnect(SPXRECOHANDLE h_connector); +SPXAPI dialog_service_connector_disconnect_async(SPXRECOHANDLE h_connector, SPXASYNCHANDLE* p_async); +SPXAPI dialog_service_connector_disconnect_async_wait_for(SPXASYNCHANDLE h_async, uint32_t milliseconds); + +SPXAPI dialog_service_connector_send_activity(SPXRECOHANDLE h_connector, const char* activity, char* interaction_id); +SPXAPI dialog_service_connector_send_activity_async(SPXRECOHANDLE h_connector, const char* activity, SPXASYNCHANDLE* p_async); +SPXAPI dialog_service_connector_send_activity_async_wait_for(SPXASYNCHANDLE h_async, uint32_t milliseconds, char* interaction_id); + +SPXAPI dialog_service_connector_start_keyword_recognition(SPXRECOHANDLE h_connector, SPXKEYWORDHANDLE h_keyword); +SPXAPI dialog_service_connector_start_keyword_recognition_async(SPXRECOHANDLE h_connector, SPXKEYWORDHANDLE h_keyword, SPXASYNCHANDLE* p_async); +SPXAPI dialog_service_connector_start_keyword_recognition_async_wait_for(SPXASYNCHANDLE h_async, uint32_t milliseconds); + +SPXAPI dialog_service_connector_stop_keyword_recognition(SPXRECOHANDLE h_connector); +SPXAPI dialog_service_connector_stop_keyword_recognition_async(SPXRECOHANDLE h_connector, SPXASYNCHANDLE* p_async); +SPXAPI dialog_service_connector_stop_keyword_recognition_async_wait_for(SPXASYNCHANDLE h_async, uint32_t milliseconds); + +SPXAPI dialog_service_connector_listen_once(SPXRECOHANDLE h_connector, SPXRESULTHANDLE* p_result); +SPXAPI dialog_service_connector_listen_once_async(SPXRECOHANDLE h_connector, SPXASYNCHANDLE* p_async); +SPXAPI dialog_service_connector_listen_once_async_wait_for(SPXASYNCHANDLE h_async, uint32_t milliseconds, SPXRESULTHANDLE* p_result); + +SPXAPI dialog_service_connector_start_continuous_listening(SPXRECOHANDLE h_connector); +SPXAPI dialog_service_connector_start_continuous_listening_async(SPXRECOHANDLE h_connector, SPXASYNCHANDLE* p_async); + +SPXAPI dialog_service_connector_stop_listening(SPXRECOHANDLE h_connector); +SPXAPI dialog_service_connector_stop_listening_async(SPXRECOHANDLE h_connector, SPXASYNCHANDLE* p_async); + +typedef void(*PSESSION_CALLBACK_FUNC)(SPXRECOHANDLE h_connector, SPXEVENTHANDLE h_event, void* pv_context); + +SPXAPI dialog_service_connector_session_started_set_callback(SPXRECOHANDLE h_connector, PSESSION_CALLBACK_FUNC p_callback, void *pv_context); +SPXAPI dialog_service_connector_session_stopped_set_callback(SPXRECOHANDLE h_connector, PSESSION_CALLBACK_FUNC p_callback, void *pv_context); + +SPXAPI dialog_service_connector_speech_start_detected_set_callback(SPXRECOHANDLE h_connector, PSESSION_CALLBACK_FUNC p_callback, void* pv_context); +SPXAPI dialog_service_connector_speech_end_detected_set_callback(SPXRECOHANDLE h_connector, PSESSION_CALLBACK_FUNC p_callback, void* pv_context); + +typedef void(*PRECOGNITION_CALLBACK_FUNC)(SPXRECOHANDLE h_connector, SPXEVENTHANDLE h_event, void* pv_context); + +SPXAPI dialog_service_connector_recognized_set_callback(SPXRECOHANDLE h_connector, PRECOGNITION_CALLBACK_FUNC p_callback, void *pv_context); +SPXAPI dialog_service_connector_recognizing_set_callback(SPXRECOHANDLE h_connector, PRECOGNITION_CALLBACK_FUNC p_callback, void *pv_context); +SPXAPI dialog_service_connector_canceled_set_callback(SPXRECOHANDLE h_connector, PRECOGNITION_CALLBACK_FUNC p_callback, void *pv_context); +SPXAPI dialog_service_connector_activity_received_set_callback(SPXRECOHANDLE h_connector, PRECOGNITION_CALLBACK_FUNC p_callback, void *pv_context); +SPXAPI dialog_service_connector_turn_status_received_set_callback(SPXRECOHANDLE h_connector, PRECOGNITION_CALLBACK_FUNC p_callback, void* pv_context); + +SPXAPI dialog_service_connector_activity_received_event_get_activity_size(SPXEVENTHANDLE h_event, size_t* size); +SPXAPI dialog_service_connector_activity_received_event_get_activity(SPXEVENTHANDLE h_event, char* p_activity, size_t size); +SPXAPI_(bool) dialog_service_connector_activity_received_event_has_audio(SPXEVENTHANDLE h_event); +SPXAPI dialog_service_connector_activity_received_event_get_audio(SPXEVENTHANDLE h_event, SPXAUDIOSTREAMHANDLE* p_audio); + +SPXAPI dialog_service_connector_turn_status_received_get_interaction_id_size(SPXEVENTHANDLE h_event, size_t* size); +SPXAPI dialog_service_connector_turn_status_received_get_interaction_id(SPXEVENTHANDLE h_event, char* p_interaction_id, size_t size); +SPXAPI dialog_service_connector_turn_status_received_get_conversation_id_size(SPXEVENTHANDLE h_event, size_t* size); +SPXAPI dialog_service_connector_turn_status_received_get_conversation_id(SPXEVENTHANDLE h_event, char* p_interaction_id, size_t size); +SPXAPI dialog_service_connector_turn_status_received_get_status(SPXEVENTHANDLE h_event, int* p_status); + +SPXAPI dialog_service_connector_recognized_size(SPXEVENTHANDLE h_event, uint32_t* size); +SPXAPI dialog_service_connector_recognized_get_result(SPXEVENTHANDLE h_event, uint32_t* size); diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_embedded_speech_config.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_embedded_speech_config.h new file mode 100644 index 0000000..fb45fc1 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_embedded_speech_config.h @@ -0,0 +1,21 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include +#include +#include +#include + +SPXAPI embedded_speech_config_create(SPXSPEECHCONFIGHANDLE* hconfig); +SPXAPI embedded_speech_config_add_path(SPXSPEECHCONFIGHANDLE hconfig, const char* path); +SPXAPI embedded_speech_config_get_num_speech_reco_models(SPXSPEECHCONFIGHANDLE hconfig, uint32_t* numModels); +SPXAPI embedded_speech_config_get_speech_reco_model(SPXSPEECHCONFIGHANDLE hconfig, uint32_t index, SPXSPEECHRECOMODELHANDLE* hmodel); +SPXAPI embedded_speech_config_get_num_speech_translation_models(SPXSPEECHCONFIGHANDLE hconfig, uint32_t* numModels); +SPXAPI embedded_speech_config_get_speech_translation_model(SPXSPEECHCONFIGHANDLE hconfig, uint32_t index, SPXSPEECHRECOMODELHANDLE* hmodel); +SPXAPI embedded_speech_config_set_speech_recognition_model(SPXSPEECHCONFIGHANDLE hconfig, const char* name, const char* license); +SPXAPI embedded_speech_config_set_speech_synthesis_voice(SPXSPEECHCONFIGHANDLE hconfig, const char* name, const char* license); +SPXAPI embedded_speech_config_set_speech_translation_model(SPXSPEECHCONFIGHANDLE hconfig, const char* name, const char* license); +SPXAPI embedded_speech_config_set_keyword_recognition_model(SPXSPEECHCONFIGHANDLE hconfig, const char* name, const char* license); diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_error.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_error.h new file mode 100644 index 0000000..0a581e3 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_error.h @@ -0,0 +1,9 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// + +#pragma once +#include +#include // must include after spxdebug.h or speechapi*.h (can NOT be included before) diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_ext_audiocompression.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_ext_audiocompression.h new file mode 100644 index 0000000..8af2d78 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_ext_audiocompression.h @@ -0,0 +1,105 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include "speechapi_c_common.h" + +const char CODECCREATEEXPORTNAME[] = "codec_create"; +struct codec_c_interface; +typedef struct codec_c_interface* codec_c_interface_P; +typedef codec_c_interface_P SPXCODECCTYPE; + +/*! \cond INTERNAL */ + +/** + * The SPX_CODEC_CLIENT_GET_PROPERTY represents the function reading a property value + * @param id Property id. + * @param buffer caller provided buffer to receive the value of the property + * @param buffersize buffer size. If buffer is passed as null it will return the required buffer size. + * @param codecContext A pointer to caller data provided through the codec_create call. + * @return A return code or zero if successful. + */ +typedef SPXAPI_RESULTTYPE (SPXAPI_CALLTYPE *SPX_CODEC_CLIENT_GET_PROPERTY)(const char* id, char* buffer, uint64_t* buffersize, void* codecContext); + +/** + * The AUDIO_ENCODER_ONENCODEDDATA type represents an application-defined + * status callback function used to provide the encoded data. + * @param pBuffer audio data buffer. + * @param bytesToWrite The length of pBuffer in bytes. + * @param duration_100nanos The duration of the audio sample + * @param pContext A pointer to the application-defined callback context. + */ +typedef void(SPXAPI_CALLTYPE *AUDIO_ENCODER_ONENCODEDDATA)(const uint8_t* pBuffer, size_t bytesToWrite, uint64_t duration_100nanos, void* pContext); + +struct codec_c_interface +{ + /** + * @param codec codec Object returned by the codec_create call to be initialized + * @param inputSamplesPerSecond sample rate for the input audio + * @param inputBitsPerSample bits per sample for the input audio + * @param inputChannels number of channel of the input audio + * @param dataCallback An application defined callback. + * @param pContext A pointer to the application-defined callback context. + * @return A return code or zero if successful. + */ + SPXAPI_RESULTTYPE (SPXAPI_CALLTYPE *init)( + SPXCODECCTYPE codec, + uint32_t inputSamplesPerSecond, + uint8_t inputBitsPerSample, + uint8_t inputChannels, + AUDIO_ENCODER_ONENCODEDDATA datacallback, + void* pContext); + + /** + * @param codec codec object returned by the codec_create call. + * @param buffer caller provided buffer to receive the value of the property + * @param buffersize buffer size. If buffer is passed as null it will return the required buffer size. + * @return A return code or zero if successful. + */ + SPXAPI_RESULTTYPE (SPXAPI_CALLTYPE* get_format_type)(SPXCODECCTYPE codec, char* buffer, uint64_t* buffersize); + + /** + * Encodes raw PCM data. + * @param codec codec object returned by the codec_create call. + * @param pBuffer The PCM data. + * @param bytesToWrite The length pBuffer. + * @return A return code or zero if successful. + */ + SPXAPI_RESULTTYPE (SPXAPI_CALLTYPE *encode) (SPXCODECCTYPE codec, const uint8_t* pBuffer, size_t bytesToWrite); + + /** + * Flushes the encoder. + * @param codec codec object returned by the codec_create call. + * @return A return code or zero if successful. + */ + SPXAPI_RESULTTYPE(SPXAPI_CALLTYPE* flush)(SPXCODECCTYPE codec); + + /** + * Terminate the encoded stream immediately + * @param codec codec object returned by the codec_create call. + * @return A return code or zero if successful. + */ + SPXAPI_RESULTTYPE (SPXAPI_CALLTYPE *endstream)(SPXCODECCTYPE codec); + + /** + * Destroys the encoder. The codec object should not be used anymore after this call. + * @param codec codec object returned by the codec_create call. + * @return A return code or zero if successful. + */ + SPXAPI_RESULTTYPE (SPXAPI_CALLTYPE *destroy) (SPXCODECCTYPE codec); +}; + +/** +* Creates a codec object. This method needs to be exported from the dll +* @param codecid - codec id, can be null or empty if the library implements only one codec. +* @param codecContext - context to be used to call back to the caller +* @param property_read_func - function to read properties +* @return A codec object +*/ + +SPX_EXTERN_C SPXDLL_EXPORT SPXCODECCTYPE codec_create(const char* codecid, void* codecContext, SPX_CODEC_CLIENT_GET_PROPERTY property_read_func); +typedef SPXCODECCTYPE (*PCODEC_CREATE_FUNC)(const char* codecid, void* codecContext, SPX_CODEC_CLIENT_GET_PROPERTY property_read_func); + +/*! \endcond */ diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_factory.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_factory.h new file mode 100644 index 0000000..43e8b4b --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_factory.h @@ -0,0 +1,29 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include + +SPXAPI recognizer_create_speech_recognizer_from_config(SPXRECOHANDLE* phreco, SPXSPEECHCONFIGHANDLE hspeechconfig, SPXAUDIOCONFIGHANDLE haudioInput); +SPXAPI recognizer_create_speech_recognizer_from_auto_detect_source_lang_config(SPXRECOHANDLE* phreco, SPXSPEECHCONFIGHANDLE hspeechconfig, SPXAUTODETECTSOURCELANGCONFIGHANDLE hautoDetectSourceLangConfig, SPXAUDIOCONFIGHANDLE haudioInput); +SPXAPI recognizer_create_speech_recognizer_from_source_lang_config(SPXRECOHANDLE* phreco, SPXSPEECHCONFIGHANDLE hspeechconfig, SPXSOURCELANGCONFIGHANDLE hSourceLangConfig, SPXAUDIOCONFIGHANDLE haudioInput); +SPXAPI recognizer_create_translation_recognizer_from_config(SPXRECOHANDLE* phreco, SPXSPEECHCONFIGHANDLE hspeechconfig, SPXAUDIOCONFIGHANDLE haudioInput); +SPXAPI recognizer_create_translation_recognizer_from_auto_detect_source_lang_config(SPXRECOHANDLE* phreco, SPXSPEECHCONFIGHANDLE hspeechconfig, SPXAUTODETECTSOURCELANGCONFIGHANDLE hautoDetectSourceLangConfig, SPXAUDIOCONFIGHANDLE haudioInput); +SPXAPI recognizer_create_intent_recognizer_from_config(SPXRECOHANDLE* phreco, SPXSPEECHCONFIGHANDLE hspeechconfig, SPXAUDIOCONFIGHANDLE haudioInput); +SPXAPI recognizer_create_keyword_recognizer_from_audio_config(SPXRECOHANDLE* phreco, SPXAUDIOCONFIGHANDLE haudio); +SPXAPI recognizer_create_source_language_recognizer_from_auto_detect_source_lang_config(SPXRECOHANDLE* phreco, SPXSPEECHCONFIGHANDLE hspeechconfig, SPXAUTODETECTSOURCELANGCONFIGHANDLE hautoDetectSourceLangConfig, SPXAUDIOCONFIGHANDLE haudioInput); +SPXAPI synthesizer_create_speech_synthesizer_from_config(SPXSYNTHHANDLE* phsynth, SPXSPEECHCONFIGHANDLE hspeechconfig, SPXAUDIOCONFIGHANDLE haudioOuput); +SPXAPI synthesizer_create_speech_synthesizer_from_auto_detect_source_lang_config(SPXSYNTHHANDLE* phsynth, SPXSPEECHCONFIGHANDLE hspeechconfig, SPXAUTODETECTSOURCELANGCONFIGHANDLE hautoDetectSourceLangConfig, SPXAUDIOCONFIGHANDLE haudioOutput); +SPXAPI dialog_service_connector_create_dialog_service_connector_from_config(SPXRECOHANDLE* phreco, SPXSPEECHCONFIGHANDLE hspeechconfig, SPXAUDIOCONFIGHANDLE haudioInput); +//SPXAPI recognizer_create_conversation_transcriber_from_config(SPXRECOHANDLE* phreco, SPXAUDIOCONFIGHANDLE haudioInput); +SPXAPI recognizer_create_conversation_transcriber_from_config(SPXRECOHANDLE* phreco, SPXSPEECHCONFIGHANDLE hspeechconfig, SPXAUDIOCONFIGHANDLE haudioInput); +SPXAPI recognizer_create_conversation_transcriber_from_auto_detect_source_lang_config(SPXRECOHANDLE* phreco, SPXSPEECHCONFIGHANDLE hspeechconfig, SPXAUTODETECTSOURCELANGCONFIGHANDLE hautoDetectSourceLangConfig, SPXAUDIOCONFIGHANDLE haudioInput); +SPXAPI recognizer_create_conversation_transcriber_from_source_lang_config(SPXRECOHANDLE* phreco, SPXSPEECHCONFIGHANDLE hspeechconfig, SPXSOURCELANGCONFIGHANDLE hSourceLangConfig, SPXAUDIOCONFIGHANDLE haudioInput); +SPXAPI recognizer_join_conversation(SPXCONVERSATIONHANDLE hconv, SPXRECOHANDLE hreco); +SPXAPI recognizer_leave_conversation(SPXRECOHANDLE hreco); +SPXAPI recognizer_create_meeting_transcriber_from_config(SPXRECOHANDLE* phreco, SPXAUDIOCONFIGHANDLE haudioInput); +SPXAPI recognizer_join_meeting(SPXMEETINGHANDLE hmeeting, SPXRECOHANDLE hreco); +SPXAPI recognizer_leave_meeting(SPXRECOHANDLE hreco); +SPXAPI transcriber_get_participants_list(SPXRECOHANDLE hreco, SPXPARTICIPANTHANDLE* participants, int size); diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_grammar.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_grammar.h new file mode 100644 index 0000000..2833055 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_grammar.h @@ -0,0 +1,33 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_grammar.h: Public API declarations for Grammar related C methods and typedefs +// + +#pragma once +#include + +typedef enum +{ + // A Recognition Factor will apply to grammars that are referenced as individual words. + PartialPhrase = 1 +} GrammarList_RecognitionFactorScope; + +SPXAPI_(bool) grammar_handle_is_valid(SPXGRAMMARHANDLE hgrammar); +SPXAPI phrase_list_grammar_from_recognizer_by_name(SPXGRAMMARHANDLE* hgrammar, SPXRECOHANDLE hreco, const char* name); +SPXAPI grammar_handle_release(SPXGRAMMARHANDLE hgrammar); + +SPXAPI phrase_list_grammar_add_phrase(SPXGRAMMARHANDLE hgrammar, SPXPHRASEHANDLE hphrase); +SPXAPI phrase_list_grammar_clear(SPXGRAMMARHANDLE hgrammar); + +SPXAPI_(bool) grammar_phrase_handle_is_valid(SPXPHRASEHANDLE hphrase); +SPXAPI grammar_phrase_create_from_text(SPXPHRASEHANDLE* hphrase, const char* phrase); +SPXAPI grammar_phrase_handle_release(SPXPHRASEHANDLE hphrase); + +SPXAPI grammar_create_from_storage_id(SPXGRAMMARHANDLE *hgrammarlist, const char *id); +SPXAPI grammar_list_from_recognizer(SPXGRAMMARHANDLE *hgrammarlist, SPXRECOHANDLE hreco); +SPXAPI grammar_list_add_grammar(SPXGRAMMARHANDLE hgrammarlist, SPXGRAMMARHANDLE hgrammar); +SPXAPI grammar_list_set_recognition_factor(SPXGRAMMARHANDLE hgrammarlist, double factor, GrammarList_RecognitionFactorScope scope); +SPXAPI class_language_model_from_storage_id(SPXGRAMMARHANDLE* hclm, const char *storageid); +SPXAPI class_language_model_assign_class(SPXGRAMMARHANDLE hclm, const char *classname, SPXGRAMMARHANDLE hgrammar); diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_hybrid_speech_config.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_hybrid_speech_config.h new file mode 100644 index 0000000..945c3ee --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_hybrid_speech_config.h @@ -0,0 +1,9 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include + +SPXAPI hybrid_speech_config_create(SPXSPEECHCONFIGHANDLE* hconfig, SPXSPEECHCONFIGHANDLE hcloudSpeechConfig, SPXSPEECHCONFIGHANDLE hembeddedSpeechConfig); diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_intent_recognizer.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_intent_recognizer.h new file mode 100644 index 0000000..241cf99 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_intent_recognizer.h @@ -0,0 +1,16 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_intent_recognizer.h: Public API declarations for IntentRecognizer related C methods and typedefs +// + +#pragma once +#include + +SPXAPI intent_recognizer_add_intent(SPXRECOHANDLE hreco, const char* intentId, SPXTRIGGERHANDLE htrigger); +SPXAPI intent_recognizer_add_intent_with_model_id(SPXRECOHANDLE hreco, SPXTRIGGERHANDLE htrigger, const char* modelId); +SPXAPI intent_recognizer_recognize_text_once(SPXRECOHANDLE hreco, const char* text, SPXRESULTHANDLE* hresult); +SPXAPI intent_recognizer_clear_language_models(SPXRECOHANDLE hreco); +SPXAPI intent_recognizer_import_pattern_matching_model(SPXRECOHANDLE hreco, const char* jsonData); +SPXAPI intent_recognizer_add_conversational_language_understanding_model(SPXRECOHANDLE hreco, const char* languageResourceKey, const char* endpoint, const char* projectName, const char* deploymentName); diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_intent_result.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_intent_result.h new file mode 100644 index 0000000..491121b --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_intent_result.h @@ -0,0 +1,11 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_intent_result.h: Public API declarations for IntentResult related C methods and enumerations +// + +#pragma once +#include + +SPXAPI intent_result_get_intent_id(SPXRESULTHANDLE hresult, char* pszIntentId, uint32_t cchIntentId); diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_intent_trigger.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_intent_trigger.h new file mode 100644 index 0000000..50a49e5 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_intent_trigger.h @@ -0,0 +1,17 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_intent_trigger.h: Public API declarations for IntentTrigger related C methods and typedefs +// + +#pragma once +#include + + +SPXAPI_(bool) intent_trigger_handle_is_valid(SPXTRIGGERHANDLE htrigger); + +SPXAPI intent_trigger_create_from_phrase(SPXTRIGGERHANDLE* htrigger, const char* phrase); +SPXAPI intent_trigger_create_from_language_understanding_model(SPXTRIGGERHANDLE* htrigger, SPXLUMODELHANDLE hlumodel, const char* intentName); + +SPXAPI intent_trigger_handle_release(SPXTRIGGERHANDLE htrigger); diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_json.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_json.h new file mode 100644 index 0000000..02e27ba --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_json.h @@ -0,0 +1,37 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/azai/vision/license for the full license information. +// + +#pragma once +#include + +SPXAPI__(const char*) ai_core_string_create(const char* str, size_t size); +SPXAPI_(void) ai_core_string_free(const char* str); + +SPXAPI_(int) ai_core_json_parser_create(SPXHANDLE* parser, const char* json, size_t jsize); // returns item for root +SPXAPI_(bool) ai_core_json_parser_handle_is_valid(SPXHANDLE parser); +SPXAPI ai_core_json_parser_handle_release(SPXHANDLE parser); + +SPXAPI_(int) ai_core_json_builder_create(SPXHANDLE* builder, const char* json, size_t jsize); // returns item for root +SPXAPI_(bool) ai_core_json_builder_handle_is_valid(SPXHANDLE builder); +SPXAPI ai_core_json_builder_handle_release(SPXHANDLE builder); + +SPXAPI_(int) ai_core_json_item_count(SPXHANDLE parserOrBuilder, int item); +SPXAPI_(int) ai_core_json_item_at(SPXHANDLE parserOrBuilder, int item, int index, const char* find); // returns item found +SPXAPI_(int) ai_core_json_item_next(SPXHANDLE parserOrBuilder, int item); // returns next item +SPXAPI_(int) ai_core_json_item_name(SPXHANDLE parserOrBuilder, int item); // returns item representing name of item specified + +SPXAPI_(int) ai_core_json_value_kind(SPXHANDLE parserOrBuilder, int item); +SPXAPI_(bool) ai_core_json_value_as_bool(SPXHANDLE parserOrBuilder, int item, bool defaultValue); +SPXAPI_(double) ai_core_json_value_as_double(SPXHANDLE parserOrBuilder, int item, double defaultValue); +SPXAPI_(int64_t) ai_core_json_value_as_int(SPXHANDLE parserOrBuilder, int item, int64_t defaultValue); +SPXAPI_(uint64_t) ai_core_json_value_as_uint(SPXHANDLE parserOrBuilder, int item, uint64_t defaultValue); + +SPXAPI__(const char*) ai_core_json_value_as_string_ptr(SPXHANDLE parserOrBuilder, int item, size_t* size); + +SPXAPI__(const char*) ai_core_json_value_as_string_copy(SPXHANDLE parserOrBuilder, int item, const char* defaultValue); +SPXAPI__(const char*) ai_core_json_value_as_json_copy(SPXHANDLE parserOrBuilder, int item); + +SPXAPI_(int) ai_core_json_builder_item_add(SPXHANDLE builder, int item, int index, const char* find); +SPXAPI ai_core_json_builder_item_set(SPXHANDLE builder, int item, const char* json, size_t jsize, int kind, const char* str, size_t ssize, bool boolean, int integer, double number); diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_keyword_recognition_model.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_keyword_recognition_model.h new file mode 100644 index 0000000..45f1ae3 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_keyword_recognition_model.h @@ -0,0 +1,17 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_keyword_recognition_model.h: Public API declarations for KeywordRecognitionModel related C methods and typedefs +// + +#pragma once +#include + + +SPXAPI_(bool) keyword_recognition_model_handle_is_valid(SPXKEYWORDHANDLE hkeyword); +SPXAPI keyword_recognition_model_handle_release(SPXKEYWORDHANDLE hkeyword); + +SPXAPI keyword_recognition_model_create_from_file(const char* fileName, SPXKEYWORDHANDLE* phkwmodel); +SPXAPI keyword_recognition_model_create_from_config(SPXSPEECHCONFIGHANDLE hconfig, SPXKEYWORDHANDLE* phkwmodel); +SPXAPI keyword_recognition_model_add_user_defined_wake_word(SPXKEYWORDHANDLE hkwmodel, const char* wakeWord); diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_language_understanding_model.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_language_understanding_model.h new file mode 100644 index 0000000..c86d5b4 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_language_understanding_model.h @@ -0,0 +1,18 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_language_understanding_model.h: Public API declarations for LanguageUnderstandingModel related C methods and typedefs +// + +#pragma once +#include + +SPXAPI_(bool) language_understanding_model_handle_is_valid(SPXLUMODELHANDLE hlumodel); + +SPXAPI language_understanding_model_create_from_uri(SPXLUMODELHANDLE* hlumodel, const char* uri); +SPXAPI language_understanding_model_create_from_app_id(SPXLUMODELHANDLE* hlumodel, const char* appId); +SPXAPI language_understanding_model_create_from_subscription(SPXLUMODELHANDLE* hlumodel, const char* subscriptionKey, const char* appId, const char* region); + +SPXAPI language_understanding_model__handle_release(SPXLUMODELHANDLE hlumodel); +SPXAPI__(const char *) language_understanding_model_get_model_id(SPXLUMODELHANDLE hlumodel); diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_meeting.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_meeting.h new file mode 100644 index 0000000..b39a60f --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_meeting.h @@ -0,0 +1,28 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_meeting.h: Public API declarations for meeting related C methods and typedefs +// + +#pragma once +#include + +SPXAPI meeting_create_from_config(SPXMEETINGHANDLE* phmeeting, SPXSPEECHCONFIGHANDLE hspeechconfig, const char* id); +SPXAPI meeting_update_participant_by_user_id(SPXMEETINGHANDLE hconv, bool add, const char* userId); +SPXAPI meeting_update_participant_by_user(SPXMEETINGHANDLE hconv, bool add, SPXUSERHANDLE huser); +SPXAPI meeting_update_participant(SPXMEETINGHANDLE hconv, bool add, SPXPARTICIPANTHANDLE hparticipant); +SPXAPI meeting_get_meeting_id(SPXMEETINGHANDLE hconv, char* id, size_t size); +SPXAPI meeting_end_meeting(SPXMEETINGHANDLE hconv); +SPXAPI meeting_get_property_bag(SPXMEETINGHANDLE hconv, SPXPROPERTYBAGHANDLE* phpropbag); +SPXAPI meeting_release_handle(SPXHANDLE handle); + +SPXAPI meeting_start_meeting(SPXMEETINGHANDLE hconv); +SPXAPI meeting_delete_meeting(SPXMEETINGHANDLE hconv); +SPXAPI meeting_lock_meeting(SPXMEETINGHANDLE hconv); +SPXAPI meeting_unlock_meeting(SPXMEETINGHANDLE hconv); +SPXAPI meeting_mute_all_participants(SPXMEETINGHANDLE hconv); +SPXAPI meeting_unmute_all_participants(SPXMEETINGHANDLE hconv); +SPXAPI meeting_mute_participant(SPXMEETINGHANDLE hconv, const char * participantId); +SPXAPI meeting_unmute_participant(SPXMEETINGHANDLE hconv, const char * participantId); + diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_meeting_transcription_result.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_meeting_transcription_result.h new file mode 100644 index 0000000..bcfdd35 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_meeting_transcription_result.h @@ -0,0 +1,12 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_meeting_transcriber_result.h: Public API declarations for MeetingTranscriberResult related C methods and enumerations +// + +#pragma once +#include + +SPXAPI meeting_transcription_result_get_user_id(SPXRESULTHANDLE hresult, char* pszUserId, uint32_t cchUserId); +SPXAPI meeting_transcription_result_get_utterance_id(SPXRESULTHANDLE hresult, char* pszUtteranceId, uint32_t cchUtteranceId); diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_operations.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_operations.h new file mode 100644 index 0000000..ed556d7 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_operations.h @@ -0,0 +1,12 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_operations.h: Public API declaration for common operation methods in the C API layer. +// + +#pragma once +#include + +SPXAPI speechapi_async_handle_release(SPXASYNCHANDLE h_async); +SPXAPI speechapi_async_wait_for(SPXASYNCHANDLE h_async, uint32_t milliseconds); diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_participant.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_participant.h new file mode 100644 index 0000000..1ddb4c8 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_participant.h @@ -0,0 +1,15 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_participant.h: Public API declarations for conversation transcriber participant related C methods and enumerations +// + +#pragma once +#include + +SPXAPI participant_create_handle(SPXPARTICIPANTHANDLE* hparticipant, const char* userId, const char* preferred_language, const char* voice_signature); +SPXAPI participant_release_handle(SPXPARTICIPANTHANDLE hparticipant); +SPXAPI participant_set_preferred_langugage(SPXPARTICIPANTHANDLE hparticipant, const char* preferred_language); +SPXAPI participant_set_voice_signature(SPXPARTICIPANTHANDLE hparticipant, const char* voice_signature); +SPXAPI participant_get_property_bag(SPXPARTICIPANTHANDLE hparticipant, SPXPROPERTYBAGHANDLE* hpropbag); diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_pattern_matching_model.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_pattern_matching_model.h new file mode 100644 index 0000000..264f063 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_pattern_matching_model.h @@ -0,0 +1,33 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_pattern_matching_model.h: Public API declarations for PatternMatchingModel related C methods and typedefs +// + +#pragma once +#include + +SPXAPI_(bool) pattern_matching_model_handle_is_valid(SPXLUMODELHANDLE hlumodel); + +SPXAPI pattern_matching_model_create(SPXLUMODELHANDLE* hlumodel, SPXRECOHANDLE hIntentReco, const char* id); +SPXAPI pattern_matching_model_create_from_id(SPXLUMODELHANDLE* hlumodel, const char* id); + +typedef SPXAPI_RESULTTYPE(SPXAPI_CALLTYPE* PATTERN_MATCHING_MODEL_GET_STR_FROM_INDEX)(void* context, size_t index, const char** str, size_t* size); + +SPXAPI pattern_matching_model_add_entity( + SPXLUMODELHANDLE hlumodel, + const char* id, + int32_t type, + int32_t mode, + size_t numPhrases, + void* phraseContext, + PATTERN_MATCHING_MODEL_GET_STR_FROM_INDEX phraseGetter); + +SPXAPI pattern_matching_model_add_intent( + SPXLUMODELHANDLE hlumodel, + const char* id, + uint32_t priority, + size_t numPhrases, + void* phraseContext, + PATTERN_MATCHING_MODEL_GET_STR_FROM_INDEX phraseGetter); diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_pronunciation_assessment_config.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_pronunciation_assessment_config.h new file mode 100644 index 0000000..1d5df85 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_pronunciation_assessment_config.h @@ -0,0 +1,33 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include + +typedef enum +{ + PronunciationAssessmentGradingSystem_FivePoint = 1, + PronunciationAssessmentGradingSystem_HundredMark = 2 +} Pronunciation_Assessment_Grading_System; + +typedef enum +{ + PronunciationAssessmentGranularity_Phoneme = 1, + PronunciationAssessmentGranularity_Word = 2, + PronunciationAssessmentGranularity_FullText = 3 +} Pronunciation_Assessment_Granularity; + +SPXAPI create_pronunciation_assessment_config(SPXPRONUNCIATIONASSESSMENTCONFIGHANDLE* hPronunciationAssessmentConfig, + const char* referenceText, + Pronunciation_Assessment_Grading_System gradingSystem, + Pronunciation_Assessment_Granularity granularity, + bool enableMiscue); +SPXAPI create_pronunciation_assessment_config_from_json(SPXPRONUNCIATIONASSESSMENTCONFIGHANDLE* hPronunciationAssessmentConfig, const char* json); +SPXAPI_(bool) pronunciation_assessment_config_is_handle_valid(SPXPRONUNCIATIONASSESSMENTCONFIGHANDLE hPronunciationAssessmentConfig); +SPXAPI pronunciation_assessment_config_release(SPXPRONUNCIATIONASSESSMENTCONFIGHANDLE hPronunciationAssessmentConfig); +SPXAPI pronunciation_assessment_config_get_property_bag( + SPXPRONUNCIATIONASSESSMENTCONFIGHANDLE hPronunciationAssessmentConfig, SPXPROPERTYBAGHANDLE* hpropbag); +SPXAPI__(const char*) pronunciation_assessment_config_to_json(SPXPRONUNCIATIONASSESSMENTCONFIGHANDLE hPronunciationAssessmentConfig); +SPXAPI pronunciation_assessment_config_apply_to_recognizer(SPXPRONUNCIATIONASSESSMENTCONFIGHANDLE hPronunciationAssessmentConfig, SPXRECOHANDLE hreco); diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_property_bag.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_property_bag.h new file mode 100644 index 0000000..39413df --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_property_bag.h @@ -0,0 +1,159 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_property_bag.h: Public API declarations for Property Bag related C methods +// + +#pragma once +#include + +SPXAPI property_bag_create(SPXPROPERTYBAGHANDLE* hpropbag); +SPXAPI_(bool) property_bag_is_valid(SPXPROPERTYBAGHANDLE hpropbag); +SPXAPI property_bag_set_string(SPXPROPERTYBAGHANDLE hpropbag, int id, const char* name, const char* value); +SPXAPI__(const char*) property_bag_get_string(SPXPROPERTYBAGHANDLE hpropbag, int id, const char* name, const char* defaultValue); +SPXAPI property_bag_free_string(const char* value); +SPXAPI property_bag_release(SPXPROPERTYBAGHANDLE hpropbag); +SPXAPI property_bag_copy(SPXPROPERTYBAGHANDLE hfrom, SPXPROPERTYBAGHANDLE hto); + +// NOTE: Currently this enum is duplicated with C++ side, +// because SWIG cannot properly resolve conditional compilation. +#ifndef __cplusplus +enum PropertyId +{ + SpeechServiceConnection_Key = 1000, + SpeechServiceConnection_Endpoint = 1001, + SpeechServiceConnection_Region = 1002, + SpeechServiceAuthorization_Token = 1003, + SpeechServiceAuthorization_Type = 1004, + SpeechServiceConnection_EndpointId = 1005, + SpeechServiceConnection_Host = 1006, + + SpeechServiceConnection_ProxyHostName = 1100, + SpeechServiceConnection_ProxyPort = 1101, + SpeechServiceConnection_ProxyUserName = 1102, + SpeechServiceConnection_ProxyPassword = 1103, + SpeechServiceConnection_Url = 1104, + SpeechServiceConnection_ProxyHostBypass = 1105, + + SpeechServiceConnection_TranslationToLanguages = 2000, + SpeechServiceConnection_TranslationVoice = 2001, + SpeechServiceConnection_TranslationFeatures = 2002, + SpeechServiceConnection_IntentRegion = 2003, + + SpeechServiceConnection_RecoMode = 3000, + SpeechServiceConnection_RecoLanguage = 3001, + Speech_SessionId = 3002, + SpeechServiceConnection_UserDefinedQueryParameters = 3003, + SpeechServiceConnection_RecoModelBackend = 3004, + SpeechServiceConnection_RecoModelName = 3005, + SpeechServiceConnection_RecoModelKey = 3006, + SpeechServiceConnection_RecoModelIniFile = 3007, + + SpeechServiceConnection_SynthLanguage = 3100, + SpeechServiceConnection_SynthVoice = 3101, + SpeechServiceConnection_SynthOutputFormat = 3102, + SpeechServiceConnection_SynthEnableCompressedAudioTransmission = 3103, + SpeechServiceConnection_SynthBackend = 3110, + SpeechServiceConnection_SynthOfflineDataPath = 3112, + SpeechServiceConnection_SynthOfflineVoice = 3113, + SpeechServiceConnection_SynthModelKey = 3114, + SpeechServiceConnection_VoicesListEndpoint = 3130, + + SpeechServiceConnection_InitialSilenceTimeoutMs = 3200, + SpeechServiceConnection_EndSilenceTimeoutMs = 3201, + SpeechServiceConnection_EnableAudioLogging = 3202, + SpeechServiceConnection_LanguageIdMode = 3205, + SpeechServiceConnection_TranslationCategoryId = 3206, + + SpeechServiceConnection_AutoDetectSourceLanguages = 3300, + SpeechServiceConnection_AutoDetectSourceLanguageResult = 3301, + + SpeechServiceResponse_RequestDetailedResultTrueFalse = 4000, + SpeechServiceResponse_RequestProfanityFilterTrueFalse = 4001, + SpeechServiceResponse_ProfanityOption = 4002, + SpeechServiceResponse_PostProcessingOption = 4003, + SpeechServiceResponse_RequestWordLevelTimestamps = 4004, + SpeechServiceResponse_StablePartialResultThreshold = 4005, + SpeechServiceResponse_OutputFormatOption = 4006, + SpeechServiceResponse_RequestSnr = 4007, + + SpeechServiceResponse_TranslationRequestStablePartialResult = 4100, + + SpeechServiceResponse_RequestWordBoundary = 4200, + SpeechServiceResponse_RequestPunctuationBoundary = 4201, + SpeechServiceResponse_RequestSentenceBoundary = 4202, + SpeechServiceResponse_SynthesisEventsSyncToAudio = 4210, + + SpeechServiceResponse_JsonResult = 5000, + SpeechServiceResponse_JsonErrorDetails = 5001, + SpeechServiceResponse_RecognitionLatencyMs = 5002, + SpeechServiceResponse_RecognitionBackend = 5003, + + SpeechServiceResponse_SynthesisFirstByteLatencyMs = 5010, + SpeechServiceResponse_SynthesisFinishLatencyMs = 5011, + SpeechServiceResponse_SynthesisUnderrunTimeMs = 5012, + SpeechServiceResponse_SynthesisConnectionLatencyMs = 5013, + SpeechServiceResponse_SynthesisNetworkLatencyMs = 5014, + SpeechServiceResponse_SynthesisServiceLatencyMs = 5015, + SpeechServiceResponse_DiarizeIntermediateResults = 5025, + + CancellationDetails_Reason = 6000, + CancellationDetails_ReasonText = 6001, + CancellationDetails_ReasonDetailedText = 6002, + + LanguageUnderstandingServiceResponse_JsonResult = 7000, + + AudioConfig_DeviceNameForCapture = 8000, + AudioConfig_NumberOfChannelsForCapture = 8001, + AudioConfig_SampleRateForCapture = 8002, + AudioConfig_BitsPerSampleForCapture = 8003, + AudioConfig_AudioSource = 8004, + AudioConfig_DeviceNameForRender = 8005, + AudioConfig_PlaybackBufferLengthInMs = 8006, + + Speech_LogFilename = 9001, + Speech_SegmentationSilenceTimeoutMs = 9002, + Speech_SegmentationMaximumTimeMs = 9003, + Speech_SegmentationMaximumTimeMs = 9004, + + Conversation_ApplicationId = 10000, + Conversation_DialogType = 10001, + Conversation_Initial_Silence_Timeout = 10002, + Conversation_From_Id = 10003, + Conversation_Conversation_Id = 10004, + Conversation_Custom_Voice_Deployment_Ids = 10005, + Conversation_Speech_Activity_Template = 10006, + Conversation_ParticipantId = 10007, + DataBuffer_TimeStamp = 11001, + DataBuffer_UserId = 11002, + + PronunciationAssessment_ReferenceText = 12001, + PronunciationAssessment_GradingSystem = 12002, + PronunciationAssessment_Granularity = 12003, + PronunciationAssessment_EnableMiscue = 12005, + PronunciationAssessment_PhonemeAlphabet = 12006, + PronunciationAssessment_NBestPhonemeCount = 12007, + PronunciationAssessment_EnableProsodyAssessment = 12008, + PronunciationAssessment_Json = 12009, + PronunciationAssessment_Params = 12010, + PronunciationAssessment_ContentTopic = 12020, + SpeakerRecognition_Api_Version = 13001, + + SpeechTranslation_ModelName = 13100, + SpeechTranslation_ModelKey = 13101, + + KeywordRecognition_ModelName = 13200, + KeywordRecognition_ModelKey = 13201, + + EmbeddedSpeech_EnablePerformanceMetrics = 13300 +}; + +typedef enum _ParticipantChangedReason +{ + JoinedConversation, + LeftConversation, + Updated +} ParticipantChangedReason; +#endif + diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_recognizer.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_recognizer.h new file mode 100644 index 0000000..69caba0 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_recognizer.h @@ -0,0 +1,66 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_recognizer.h: Public API declarations for Recognizer related C methods and typedefs +// + +#pragma once +#include + + +SPXAPI_(bool) recognizer_handle_is_valid(SPXRECOHANDLE hreco); +SPXAPI recognizer_handle_release(SPXRECOHANDLE hreco); + +SPXAPI_(bool) recognizer_async_handle_is_valid(SPXASYNCHANDLE hasync); +SPXAPI recognizer_async_handle_release(SPXASYNCHANDLE hasync); + +SPXAPI_(bool) recognizer_result_handle_is_valid(SPXRESULTHANDLE hresult); +SPXAPI recognizer_result_handle_release(SPXRESULTHANDLE hresult); + +SPXAPI_(bool) recognizer_event_handle_is_valid(SPXEVENTHANDLE hevent); +SPXAPI recognizer_event_handle_release(SPXEVENTHANDLE hevent); + +SPXAPI recognizer_get_property_bag(SPXRECOHANDLE hreco, SPXPROPERTYBAGHANDLE* hpropbag); + +SPXAPI recognizer_recognize_once(SPXRECOHANDLE hreco, SPXRESULTHANDLE* phresult); +SPXAPI recognizer_recognize_once_async(SPXRECOHANDLE hreco, SPXASYNCHANDLE* phasync); +SPXAPI recognizer_recognize_text_once_async(SPXRECOHANDLE hreco, const char* text, SPXASYNCHANDLE* phasync); +SPXAPI recognizer_recognize_once_async_wait_for(SPXASYNCHANDLE hasync, uint32_t milliseconds, SPXRESULTHANDLE* phresult); + +SPXAPI recognizer_start_continuous_recognition(SPXRECOHANDLE hreco); +SPXAPI recognizer_start_continuous_recognition_async(SPXRECOHANDLE hreco, SPXASYNCHANDLE* phasync); +SPXAPI recognizer_start_continuous_recognition_async_wait_for(SPXASYNCHANDLE hasync, uint32_t milliseconds); + +SPXAPI recognizer_stop_continuous_recognition(SPXRECOHANDLE hreco); +SPXAPI recognizer_stop_continuous_recognition_async(SPXRECOHANDLE hreco, SPXASYNCHANDLE* phasync); +SPXAPI recognizer_stop_continuous_recognition_async_wait_for(SPXASYNCHANDLE hasync, uint32_t milliseconds); + +SPXAPI recognizer_start_keyword_recognition(SPXRECOHANDLE hreco, SPXKEYWORDHANDLE hkeyword); +SPXAPI recognizer_start_keyword_recognition_async(SPXRECOHANDLE hreco, SPXKEYWORDHANDLE hkeyword, SPXASYNCHANDLE* phasync); +SPXAPI recognizer_start_keyword_recognition_async_wait_for(SPXASYNCHANDLE hasync, uint32_t milliseconds); + +SPXAPI recognizer_recognize_keyword_once(SPXRECOHANDLE hreco, SPXKEYWORDHANDLE hkeyword, SPXRESULTHANDLE* phresult); +SPXAPI recognizer_recognize_keyword_once_async(SPXRECOHANDLE hreco, SPXKEYWORDHANDLE hkeyword, SPXASYNCHANDLE* phasync); +SPXAPI recognizer_recognize_keyword_once_async_wait_for(SPXASYNCHANDLE hasync, uint32_t milliseconds, SPXRESULTHANDLE* phresult); + +SPXAPI recognizer_stop_keyword_recognition(SPXRECOHANDLE hreco); +SPXAPI recognizer_stop_keyword_recognition_async(SPXRECOHANDLE hreco, SPXASYNCHANDLE* phasync); +SPXAPI recognizer_stop_keyword_recognition_async_wait_for(SPXASYNCHANDLE hasync, uint32_t milliseconds); + +typedef void (*PSESSION_CALLBACK_FUNC)(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext); +SPXAPI recognizer_session_started_set_callback(SPXRECOHANDLE hreco, PSESSION_CALLBACK_FUNC pCallback, void* pvContext); +SPXAPI recognizer_session_stopped_set_callback(SPXRECOHANDLE hreco, PSESSION_CALLBACK_FUNC pCallback, void* pvContext); + +typedef void (*PRECOGNITION_CALLBACK_FUNC)(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext); +SPXAPI recognizer_recognizing_set_callback(SPXRECOHANDLE hreco, PRECOGNITION_CALLBACK_FUNC pCallback, void* pvContext); +SPXAPI recognizer_recognized_set_callback(SPXRECOHANDLE hreco, PRECOGNITION_CALLBACK_FUNC pCallback, void* pvContext); +SPXAPI recognizer_canceled_set_callback(SPXRECOHANDLE hreco, PRECOGNITION_CALLBACK_FUNC pCallback, void* pvContext); +SPXAPI recognizer_speech_start_detected_set_callback(SPXRECOHANDLE hreco, PRECOGNITION_CALLBACK_FUNC pCallback, void* pvContext); +SPXAPI recognizer_speech_end_detected_set_callback(SPXRECOHANDLE hreco, PRECOGNITION_CALLBACK_FUNC pCallback, void* pvContext); + +SPXAPI recognizer_session_event_get_session_id(SPXEVENTHANDLE hevent, char* pszSessionId, uint32_t cchSessionId); +SPXAPI recognizer_recognition_event_get_offset(SPXEVENTHANDLE hevent, uint64_t *pszOffset); +SPXAPI recognizer_recognition_event_get_result(SPXEVENTHANDLE hevent, SPXRESULTHANDLE* phresult); + +SPXAPI recognizer_connection_event_get_property_bag(SPXEVENTHANDLE hevent, SPXPROPERTYBAGHANDLE* hpropbag); diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_result.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_result.h new file mode 100644 index 0000000..26572eb --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_result.h @@ -0,0 +1,108 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_result.h: Public API declarations for Result related C methods and enumerations +// + +#pragma once +#include + +enum Result_Reason +{ + ResultReason_NoMatch = 0, + ResultReason_Canceled = 1, + ResultReason_RecognizingSpeech = 2, + ResultReason_RecognizedSpeech = 3, + ResultReason_RecognizingIntent = 4, + ResultReason_RecognizedIntent = 5, + ResultReason_TranslatingSpeech = 6, + ResultReason_TranslatedSpeech = 7, + ResultReason_SynthesizingAudio = 8, + ResultReason_SynthesizingAudioComplete = 9, + ResultReason_RecognizingKeyword = 10, + ResultReason_RecognizedKeyword = 11, + ResultReason_SynthesizingAudioStart = 12 +}; +typedef enum Result_Reason Result_Reason; + +enum Result_CancellationReason +{ + CancellationReason_Error = 1, + CancellationReason_EndOfStream = 2, + CancellationReason_UserCancelled = 3, +}; + +typedef enum Result_CancellationReason Result_CancellationReason; + +enum Result_CancellationErrorCode +{ + CancellationErrorCode_NoError = 0, + CancellationErrorCode_AuthenticationFailure = 1, + CancellationErrorCode_BadRequest = 2, + CancellationErrorCode_TooManyRequests = 3, + CancellationErrorCode_Forbidden = 4, + CancellationErrorCode_ConnectionFailure = 5, + CancellationErrorCode_ServiceTimeout = 6, + CancellationErrorCode_ServiceError = 7, + CancellationErrorCode_ServiceUnavailable = 8, + CancellationErrorCode_RuntimeError = 9 +}; +typedef enum Result_CancellationErrorCode Result_CancellationErrorCode; + +enum Result_NoMatchReason +{ + NoMatchReason_NotRecognized = 1, + NoMatchReason_InitialSilenceTimeout = 2, + NoMatchReason_InitialBabbleTimeout = 3, + NoMatchReason_KeywordNotRecognized = 4, + NoMatchReason_EndSilenceTimeout = 5 +}; +typedef enum Result_NoMatchReason Result_NoMatchReason; + +enum Synthesis_VoiceType +{ + SynthesisVoiceType_OnlineNeural = 1, + SynthesisVoiceType_OnlineStandard = 2, + SynthesisVoiceType_OfflineNeural = 3, + SynthesisVoiceType_OfflineStandard = 4 +}; +typedef enum Synthesis_VoiceType Synthesis_VoiceType; + +SPXAPI result_get_reason(SPXRESULTHANDLE hresult, Result_Reason* reason); +SPXAPI result_get_reason_canceled(SPXRESULTHANDLE hresult, Result_CancellationReason* reason); +SPXAPI result_get_canceled_error_code(SPXRESULTHANDLE hresult, Result_CancellationErrorCode* errorCode); +SPXAPI result_get_no_match_reason(SPXRESULTHANDLE hresult, Result_NoMatchReason* reason); + +SPXAPI result_get_result_id(SPXRESULTHANDLE hresult, char* pszResultId, uint32_t cchResultId); + +SPXAPI result_get_text(SPXRESULTHANDLE hresult, char* pszText, uint32_t cchText); +SPXAPI result_get_offset(SPXRESULTHANDLE hresult, uint64_t* offset); +SPXAPI result_get_duration(SPXRESULTHANDLE hresult, uint64_t* duration); + +SPXAPI result_get_property_bag(SPXRESULTHANDLE hresult, SPXPROPERTYBAGHANDLE* hpropbag); + +SPXAPI synth_result_get_result_id(SPXRESULTHANDLE hresult, char* resultId, uint32_t resultIdLength); +SPXAPI synth_result_get_reason(SPXRESULTHANDLE hresult, Result_Reason* reason); +SPXAPI synth_result_get_reason_canceled(SPXRESULTHANDLE hresult, Result_CancellationReason* reason); +SPXAPI synth_result_get_canceled_error_code(SPXRESULTHANDLE hresult, Result_CancellationErrorCode* errorCode); +SPXAPI synth_result_get_audio_data(SPXRESULTHANDLE hresult, uint8_t* buffer, uint32_t bufferSize, uint32_t* filledSize); +SPXAPI synth_result_get_audio_length_duration(SPXRESULTHANDLE hresult, uint32_t* audioLength, uint64_t* audioDuration); +SPXAPI synth_result_get_audio_format(SPXRESULTHANDLE hresult, SPXAUDIOSTREAMFORMATHANDLE* hformat); +SPXAPI synth_result_get_property_bag(SPXRESULTHANDLE hresult, SPXPROPERTYBAGHANDLE* hpropbag); + +SPXAPI synthesis_voices_result_get_result_id(SPXRESULTHANDLE hresult, char* resultId, uint32_t resultIdLength); +SPXAPI synthesis_voices_result_get_reason(SPXRESULTHANDLE hresult, Result_Reason* reason); +SPXAPI synthesis_voices_result_get_voice_num(SPXRESULTHANDLE hresult, uint32_t* voiceNum); +SPXAPI synthesis_voices_result_get_voice_info(SPXRESULTHANDLE hresult, uint32_t index, SPXRESULTHANDLE* hVoiceInfo); +SPXAPI synthesis_voices_result_get_property_bag(SPXRESULTHANDLE hresult, SPXPROPERTYBAGHANDLE* hpropbag); + +SPXAPI voice_info_handle_release(SPXRESULTHANDLE hVoiceInfo); +SPXAPI__(const char*) voice_info_get_name(SPXRESULTHANDLE hVoiceInfo); +SPXAPI__(const char*) voice_info_get_locale(SPXRESULTHANDLE hVoiceInfo); +SPXAPI__(const char*) voice_info_get_short_name(SPXRESULTHANDLE hVoiceInfo); +SPXAPI__(const char*) voice_info_get_local_name(SPXRESULTHANDLE hVoiceInfo); +SPXAPI__(const char*) voice_info_get_style_list(SPXRESULTHANDLE hVoiceInfo); +SPXAPI__(const char*) voice_info_get_voice_path(SPXRESULTHANDLE hVoiceInfo); +SPXAPI voice_info_get_voice_type(SPXRESULTHANDLE hVoiceInfo, Synthesis_VoiceType* voiceType); +SPXAPI voice_info_get_property_bag(SPXRESULTHANDLE hVoiceInfo, SPXPROPERTYBAGHANDLE* hpropbag); diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_session.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_session.h new file mode 100644 index 0000000..c0a8186 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_session.h @@ -0,0 +1,16 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_session.h: Public API declarations for Session related C methods +// + +#pragma once +#include + +SPXAPI session_from_recognizer(SPXRECOHANDLE hreco, SPXSESSIONHANDLE* phsession); + +SPXAPI_(bool) session_handle_is_valid(SPXSESSIONHANDLE hsession); +SPXAPI session_handle_release(SPXSESSIONHANDLE hsession); + +SPXAPI session_get_property_bag(SPXSESSIONHANDLE hsession, SPXPROPERTYBAGHANDLE* hpropbag); diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_source_lang_config.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_source_lang_config.h new file mode 100644 index 0000000..53da418 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_source_lang_config.h @@ -0,0 +1,13 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include + +SPXAPI source_lang_config_from_language(SPXSOURCELANGCONFIGHANDLE* hconfig, const char* language); +SPXAPI source_lang_config_from_language_and_endpointId(SPXSOURCELANGCONFIGHANDLE* hconfig, const char* language, const char* endpointId); +SPXAPI_(bool) source_lang_config_is_handle_valid(SPXSOURCELANGCONFIGHANDLE hconfig); +SPXAPI source_lang_config_release(SPXSOURCELANGCONFIGHANDLE hconfig); +SPXAPI source_lang_config_get_property_bag(SPXSOURCELANGCONFIGHANDLE hconfig, SPXPROPERTYBAGHANDLE* hpropbag); diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_speaker_recognition.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_speaker_recognition.h new file mode 100644 index 0000000..84d4b74 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_speaker_recognition.h @@ -0,0 +1,37 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_speaker_recogntion.h: c API declarations for speaker recognition. +// + +#pragma once + +#include + +SPXAPI create_voice_profile_client_from_config(SPXVOICEPROFILECLIENTHANDLE* phclient, SPXSPEECHCONFIGHANDLE hSpeechConfig); +SPXAPI voice_profile_client_release_handle(SPXVOICEPROFILECLIENTHANDLE hVoiceClient); +SPXAPI create_voice_profile(SPXVOICEPROFILECLIENTHANDLE hVoiceProfileClient, int id, const char* locale, SPXVOICEPROFILEHANDLE* pProfileHandle); + +SPXAPI enroll_voice_profile(SPXVOICEPROFILECLIENTHANDLE hVoiceProfileClient, SPXVOICEPROFILEHANDLE hProfileHandle, SPXAUDIOCONFIGHANDLE hAudioInput, SPXRESULTHANDLE* phresult); +SPXAPI voice_profile_client_get_property_bag(SPXVOICEPROFILECLIENTHANDLE hVoiceProfileClient, SPXPROPERTYBAGHANDLE* hpropbag); +SPXAPI create_voice_profile_from_id_and_type(SPXVOICEPROFILEHANDLE* phVoiceProfile, const char* id, int type); +SPXAPI voice_profile_get_id(SPXVOICEPROFILEHANDLE hVoiceProfile, char* psz, uint32_t* pcch); +SPXAPI voice_profile_get_type(SPXVOICEPROFILEHANDLE hVoiceProfile, int* ptype); +SPXAPI voice_profile_release_handle(SPXVOICEPROFILEHANDLE hVoiceProfile); +SPXAPI voice_profile_get_property_bag(SPXVOICEPROFILEHANDLE voiceprofilehandle, SPXPROPERTYBAGHANDLE* pProperties); +SPXAPI delete_voice_profile(SPXVOICEPROFILECLIENTHANDLE hclient, SPXVOICEPROFILEHANDLE hProfileHandle, SPXRESULTHANDLE* phresult); +SPXAPI reset_voice_profile(SPXVOICEPROFILECLIENTHANDLE hVoiceProfileClient, SPXVOICEPROFILEHANDLE hProfileHandle, SPXRESULTHANDLE* phresult); +SPXAPI get_profiles_json(SPXVOICEPROFILECLIENTHANDLE hVoiceProfileClient, int type, char** ppsz, size_t* pcch); +SPXAPI retrieve_enrollment_result(SPXVOICEPROFILECLIENTHANDLE hVoiceProfileClient, const char* pId, int type, SPXVOICEPROFILEHANDLE* phVoiceProfile); +SPXAPI get_activation_phrases(SPXVOICEPROFILECLIENTHANDLE hVoiceProfileClient, const char* pLocale, int type, SPXRESULTHANDLE* phresult); +SPXAPI recognizer_create_speaker_recognizer_from_config(SPXSPEAKERIDHANDLE* phspeakerid, SPXSPEECHCONFIGHANDLE hspeechconfig, SPXAUDIOCONFIGHANDLE haudioInput); +SPXAPI speaker_recognizer_release_handle(SPXSPEAKERIDHANDLE phspeakerid); +SPXAPI speaker_recognizer_get_property_bag(SPXSPEAKERIDHANDLE phspeakerid, SPXPROPERTYBAGHANDLE* hpropbag); +SPXAPI speaker_identification_model_create(SPXSIMODELHANDLE* psimodel); +SPXAPI speaker_identification_model_add_profile(SPXSIMODELHANDLE hsimodel, SPXVOICEPROFILEHANDLE hprofile); +SPXAPI speaker_identification_model_release_handle(SPXSIMODELHANDLE hmodel); +SPXAPI speaker_recognizer_identify(SPXSPEAKERIDHANDLE phspeakerid, SPXSIMODELHANDLE hsimodel, SPXRESULTHANDLE* phresult); +SPXAPI speaker_recognizer_verify(SPXSPEAKERIDHANDLE phspeakerid, SPXSVMODELHANDLE hsvmodel, SPXRESULTHANDLE* phresult); +SPXAPI speaker_verification_model_create(SPXSVMODELHANDLE* psvmodel, SPXVOICEPROFILEHANDLE hprofile); +SPXAPI speaker_verification_model_release_handle(SPXSVMODELHANDLE hsvmodel); diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_speech_config.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_speech_config.h new file mode 100644 index 0000000..ef2c521 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_speech_config.h @@ -0,0 +1,171 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include + +typedef enum { SpeechOutputFormat_Simple = 0, SpeechOutputFormat_Detailed = 1 } SpeechOutputFormat; + +typedef enum +{ + // raw-8khz-8bit-mono-mulaw + SpeechSynthesisOutputFormat_Raw8Khz8BitMonoMULaw = 1, + + // riff-16khz-16kbps-mono-siren + // Unsupported by the service. Do not use this value. + SpeechSynthesisOutputFormat_Riff16Khz16KbpsMonoSiren = 2, + + // audio-16khz-16kbps-mono-siren + // Unsupported by the service. Do not use this value. + SpeechSynthesisOutputFormat_Audio16Khz16KbpsMonoSiren = 3, + + // audio-16khz-32kbitrate-mono-mp3 + SpeechSynthesisOutputFormat_Audio16Khz32KBitRateMonoMp3 = 4, + + // audio-16khz-128kbitrate-mono-mp3 + SpeechSynthesisOutputFormat_Audio16Khz128KBitRateMonoMp3 = 5, + + // audio-16khz-64kbitrate-mono-mp3 + SpeechSynthesisOutputFormat_Audio16Khz64KBitRateMonoMp3 = 6, + + // audio-24khz-48kbitrate-mono-mp3 + SpeechSynthesisOutputFormat_Audio24Khz48KBitRateMonoMp3 = 7, + + // audio-24khz-96kbitrate-mono-mp3 + SpeechSynthesisOutputFormat_Audio24Khz96KBitRateMonoMp3 = 8, + + // audio-24khz-160kbitrate-mono-mp3 + SpeechSynthesisOutputFormat_Audio24Khz160KBitRateMonoMp3 = 9, + + // raw-16khz-16bit-mono-truesilk + SpeechSynthesisOutputFormat_Raw16Khz16BitMonoTrueSilk = 10, + + // riff-16khz-16bit-mono-pcm + SpeechSynthesisOutputFormat_Riff16Khz16BitMonoPcm = 11, + + // riff-8khz-16bit-mono-pcm + SpeechSynthesisOutputFormat_Riff8Khz16BitMonoPcm = 12, + + // riff-24khz-16bit-mono-pcm + SpeechSynthesisOutputFormat_Riff24Khz16BitMonoPcm = 13, + + // riff-8khz-8bit-mono-mulaw + SpeechSynthesisOutputFormat_Riff8Khz8BitMonoMULaw = 14, + + // raw-16khz-16bit-mono-pcm + SpeechSynthesisOutputFormat_Raw16Khz16BitMonoPcm = 15, + + // raw-24khz-16bit-mono-pcm + SpeechSynthesisOutputFormat_Raw24Khz16BitMonoPcm = 16, + + // raw-8khz-16bit-mono-pcm + SpeechSynthesisOutputFormat_Raw8Khz16BitMonoPcm = 17, + + // ogg-16khz-16bit-mono-opus + SpeechSynthesisOutputFormat_Ogg16khz16BitMonoOpus = 18, + + // ogg-24khz-24bit-mono-opus + SpeechSynthesisOutputFormat_Ogg24Khz16BitMonoOpus = 19, + + // raw-48khz-16bit-mono-pcm + SpeechSynthesisOutputFormat_Raw48Khz16BitMonoPcm = 20, + + // riff-48khz-16bit-mono-pcm + SpeechSynthesisOutputFormat_Riff48Khz16BitMonoPcm = 21, + + // audio-48khz-96kbitrate-mono-mp3 + SpeechSynthesisOutputFormat_Audio48Khz96KBitRateMonoMp3 = 22, + + // audio-48khz-192kbitrate-mono-mp3 + SpeechSynthesisOutputFormat_Audio48Khz192KBitRateMonoMp3 = 23, + + // ogg-48khz-16bit-mono-opus + SpeechSynthesisOutputFormat_Ogg48Khz16BitMonoOpus = 24, + + // webm-16khz-16bit-mono-opus + SpeechSynthesisOutputFormat_Webm16Khz16BitMonoOpus = 25, + + // webm-24khz-16bit-mono-opus + SpeechSynthesisOutputFormat_Webm24Khz16BitMonoOpus = 26, + + // raw-24khz-16bit-mono-truesilk + SpeechSynthesisOutputFormat_Raw24Khz16BitMonoTrueSilk = 27, + + // raw-8khz-8bit-mono-alaw + SpeechSynthesisOutputFormat_Raw8Khz8BitMonoALaw = 28, + + // riff-8khz-8bit-mono-alaw + SpeechSynthesisOutputFormat_Riff8Khz8BitMonoALaw = 29, + + // webm-24khz-16bit-24kbps-mono-opus + // Audio compressed by OPUS codec in a WebM container, with bitrate of 24kbps, optimized for IoT scenario. + SpeechSynthesisOutputFormat_Webm24Khz16Bit24KbpsMonoOpus = 30, + + // audio-16khz-16bit-32kbps-mono-opus + // Audio compressed by OPUS codec without container, with bitrate of 32kbps. + SpeechSynthesisOutputFormat_Audio16Khz16Bit32KbpsMonoOpus = 31, + + // audio-24khz-48bit-mono-opus + // Audio compressed by OPUS codec without container, with bitrate of 48kbps. + SpeechSynthesisOutputFormat_Audio24Khz16Bit48KbpsMonoOpus = 32, + + // audio-24khz-24bit-mono-opus + // Audio compressed by OPUS codec without container, with bitrate of 24kbps. + SpeechSynthesisOutputFormat_Audio24Khz16Bit24KbpsMonoOpus = 33, + + // raw-22050hz-16bit-mono-pcm + // Raw PCM audio at 22050Hz sampling rate and 16-bit depth. + SpeechSynthesisOutputFormat_Raw22050Hz16BitMonoPcm = 34, + + // riff-22050hz-16bit-mono-pcm + // PCM audio at 22050Hz sampling rate and 16-bit depth, with RIFF header. + SpeechSynthesisOutputFormat_Riff22050Hz16BitMonoPcm = 35, + + // raw-44100hz-16bit-mono-pcm + // Raw PCM audio at 44100Hz sampling rate and 16-bit depth. + SpeechSynthesisOutputFormat_Raw44100Hz16BitMonoPcm = 36, + + // riff-44100hz-16bit-mono-pcm + // PCM audio at 44100Hz sampling rate and 16-bit depth, with RIFF header. + SpeechSynthesisOutputFormat_Riff44100Hz16BitMonoPcm = 37, + + /// amr-wb-16000hz + /// AMR-WB audio at 16kHz sampling rate. + /// (Added in 1.24.0) + SpeechSynthesisOutputFormat_AmrWb16000Hz = 38, + + /// g722-16khz-64kbps + /// G.722 audio at 16kHz sampling rate and 64kbps bitrate. + /// (Added in 1.38.0) + SpeechSynthesisOutputFormat_G72216Khz64Kbps = 39, +} Speech_Synthesis_Output_Format; + +typedef enum +{ + // Using URI query parameter to pass property settings to service. + SpeechConfig_ServicePropertyChannel_UriQueryParameter = 0, + + // Using HttpHeader to set a key/value in a HTTP header. + SpeechConfig_ServicePropertyChannel_HttpHeader = 1 +} SpeechConfig_ServicePropertyChannel; + +typedef enum +{ + SpeechConfig_ProfanityMasked = 0, + SpeechConfig_ProfanityRemoved = 1, + SpeechConfig_ProfanityRaw = 2 +} SpeechConfig_ProfanityOption; + +SPXAPI_(bool) speech_config_is_handle_valid(SPXSPEECHCONFIGHANDLE hconfig); +SPXAPI speech_config_from_subscription(SPXSPEECHCONFIGHANDLE* hconfig, const char* subscription, const char* region); +SPXAPI speech_config_from_authorization_token(SPXSPEECHCONFIGHANDLE* hconfig, const char* authToken, const char* region); +SPXAPI speech_config_from_endpoint(SPXSPEECHCONFIGHANDLE * hconfig, const char* endpoint, const char* subscription); +SPXAPI speech_config_from_host(SPXSPEECHCONFIGHANDLE* hconfig, const char* host, const char* subscription); +SPXAPI speech_config_release(SPXSPEECHCONFIGHANDLE hconfig); +SPXAPI speech_config_get_property_bag(SPXSPEECHCONFIGHANDLE hconfig, SPXPROPERTYBAGHANDLE* hpropbag); +SPXAPI speech_config_set_audio_output_format(SPXSPEECHCONFIGHANDLE hconfig, Speech_Synthesis_Output_Format formatId); +SPXAPI speech_config_set_service_property(SPXSPEECHCONFIGHANDLE configHandle, const char* propertyName, const char* propertyValue, SpeechConfig_ServicePropertyChannel channel); +SPXAPI speech_config_set_profanity(SPXSPEECHCONFIGHANDLE configHandle, SpeechConfig_ProfanityOption profanity); + diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_speech_recognition_model.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_speech_recognition_model.h new file mode 100644 index 0000000..0c0dada --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_speech_recognition_model.h @@ -0,0 +1,13 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include + +SPXAPI speech_recognition_model_handle_release(SPXSPEECHRECOMODELHANDLE hmodel); +SPXAPI__(const char*) speech_recognition_model_get_name(SPXSPEECHRECOMODELHANDLE hmodel); +SPXAPI__(const char*) speech_recognition_model_get_locales(SPXSPEECHRECOMODELHANDLE hmodel); +SPXAPI__(const char*) speech_recognition_model_get_path(SPXSPEECHRECOMODELHANDLE hmodel); +SPXAPI__(const char*) speech_recognition_model_get_version(SPXSPEECHRECOMODELHANDLE hmodel); diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_speech_translation_config.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_speech_translation_config.h new file mode 100644 index 0000000..ed2ea7c --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_speech_translation_config.h @@ -0,0 +1,16 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include + +SPXAPI speech_translation_config_from_subscription(SPXSPEECHCONFIGHANDLE* configHandle, const char* subscription, const char* region); +SPXAPI speech_translation_config_from_authorization_token(SPXSPEECHCONFIGHANDLE* configHandle, const char* authToken, const char* region); +SPXAPI speech_translation_config_from_endpoint(SPXSPEECHCONFIGHANDLE* configHandle, const char* endpoint, const char* subscription); +SPXAPI speech_translation_config_from_host(SPXSPEECHCONFIGHANDLE* configHandle, const char* host, const char* subscription); + +SPXAPI speech_translation_config_add_target_language(SPXSPEECHCONFIGHANDLE configHandle, const char* language); +SPXAPI speech_translation_config_remove_target_language(SPXSPEECHCONFIGHANDLE configHandle, const char* language); +SPXAPI speech_translation_config_set_custom_model_category_id(SPXSPEECHCONFIGHANDLE configHandle, const char* categoryId); diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_speech_translation_model.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_speech_translation_model.h new file mode 100644 index 0000000..cca8d4d --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_speech_translation_model.h @@ -0,0 +1,14 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include + +SPXAPI speech_translation_model_handle_release(SPXSPEECHRECOMODELHANDLE hmodel); +SPXAPI__(const char*) speech_translation_model_get_name(SPXSPEECHRECOMODELHANDLE hmodel); +SPXAPI__(const char*) speech_translation_model_get_source_languages(SPXSPEECHRECOMODELHANDLE hmodel); +SPXAPI__(const char*) speech_translation_model_get_target_languages(SPXSPEECHRECOMODELHANDLE hmodel); +SPXAPI__(const char*) speech_translation_model_get_path(SPXSPEECHRECOMODELHANDLE hmodel); +SPXAPI__(const char*) speech_translation_model_get_version(SPXSPEECHRECOMODELHANDLE hmodel); diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_synthesis_request.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_synthesis_request.h new file mode 100644 index 0000000..90af519 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_synthesis_request.h @@ -0,0 +1,17 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include + +SPXAPI speech_synthesis_request_create(bool textStreamingEnabled, bool isSSML, const char* inputText, uint32_t textLength, SPXREQUESTHANDLE* hrequest); +SPXAPI speech_synthesis_request_set_voice(SPXREQUESTHANDLE hrequest, const char* voice, const char* personalVoice, const char* modelName); +SPXAPI speech_synthesis_request_send_text_piece(SPXREQUESTHANDLE hrequest, const char* text, uint32_t textLength); +SPXAPI speech_synthesis_request_finish(SPXREQUESTHANDLE hrequest); +SPXAPI speech_synthesis_request_handle_is_valid(SPXREQUESTHANDLE hrequest); +SPXAPI speech_synthesis_request_release(SPXREQUESTHANDLE hrequest); + +SPXAPI speech_synthesis_request_get_property_bag(SPXREQUESTHANDLE hrequest, SPXPROPERTYBAGHANDLE* hpropbag); + diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_synthesizer.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_synthesizer.h new file mode 100644 index 0000000..af1385d --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_synthesizer.h @@ -0,0 +1,74 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_synthesizer.h: Public API declarations for Synthesizer related C methods and typedefs +// + +#pragma once +#include +#include +#include + + +enum SpeechSynthesis_BoundaryType +{ + SpeechSynthesis_BoundaryType_Word = 0, + SpeechSynthesis_BoundaryType_Punctuation = 1, + SpeechSynthesis_BoundaryType_Sentence = 2 +}; +typedef enum SpeechSynthesis_BoundaryType SpeechSynthesis_BoundaryType; + +SPXAPI_(bool) synthesizer_handle_is_valid(SPXSYNTHHANDLE hsynth); +SPXAPI synthesizer_handle_release(SPXSYNTHHANDLE hsynth); + +SPXAPI_(bool) synthesizer_async_handle_is_valid(SPXASYNCHANDLE hasync); +SPXAPI synthesizer_async_handle_release(SPXASYNCHANDLE hasync); + +SPXAPI_(bool) synthesizer_result_handle_is_valid(SPXRESULTHANDLE hresult); +SPXAPI synthesizer_result_handle_release(SPXRESULTHANDLE hresult); + +SPXAPI_(bool) synthesizer_event_handle_is_valid(SPXEVENTHANDLE hevent); +SPXAPI synthesizer_event_handle_release(SPXEVENTHANDLE hevent); + +SPXAPI synthesizer_get_property_bag(SPXSYNTHHANDLE hsynth, SPXPROPERTYBAGHANDLE* hpropbag); + +SPXAPI synthesizer_speak_text(SPXSYNTHHANDLE hsynth, const char* text, uint32_t textLength, SPXRESULTHANDLE* phresult); +SPXAPI synthesizer_speak_ssml(SPXSYNTHHANDLE hsynth, const char* ssml, uint32_t ssmlLength, SPXRESULTHANDLE* phresult); +SPXAPI synthesizer_speak_request(SPXSYNTHHANDLE hsynth, SPXREQUESTHANDLE hrequest, SPXRESULTHANDLE* phresult); +SPXAPI synthesizer_speak_text_async(SPXSYNTHHANDLE hsynth, const char* text, uint32_t textLength, SPXASYNCHANDLE* phasync); +SPXAPI synthesizer_speak_ssml_async(SPXSYNTHHANDLE hsynth, const char* ssml, uint32_t ssmlLength, SPXASYNCHANDLE* phasync); +SPXAPI synthesizer_speak_request_async(SPXSYNTHHANDLE hsynth, SPXREQUESTHANDLE hrequest, SPXASYNCHANDLE* phasync); +SPXAPI synthesizer_start_speaking_text(SPXSYNTHHANDLE hsynth, const char* text, uint32_t textLength, SPXRESULTHANDLE* phresult); +SPXAPI synthesizer_start_speaking_ssml(SPXSYNTHHANDLE hsynth, const char* ssml, uint32_t ssmlLength, SPXRESULTHANDLE* phresult); +SPXAPI synthesizer_start_speaking_request(SPXSYNTHHANDLE hsynth, SPXREQUESTHANDLE hrequest, SPXRESULTHANDLE* phresult); +SPXAPI synthesizer_start_speaking_text_async(SPXSYNTHHANDLE hsynth, const char* text, uint32_t textLength, SPXASYNCHANDLE* phasync); +SPXAPI synthesizer_start_speaking_ssml_async(SPXSYNTHHANDLE hsynth, const char* ssml, uint32_t ssmlLength, SPXASYNCHANDLE* phasync); +SPXAPI synthesizer_speak_async_wait_for(SPXASYNCHANDLE hasync, uint32_t milliseconds, SPXRESULTHANDLE* phresult); +SPXAPI synthesizer_stop_speaking(SPXSYNTHHANDLE hsynth); +SPXAPI synthesizer_stop_speaking_async(SPXSYNTHHANDLE hsynth, SPXASYNCHANDLE* phasync); +SPXAPI synthesizer_stop_speaking_async_wait_for(SPXASYNCHANDLE hasync, uint32_t milliseconds); + +SPXAPI synthesizer_get_voices_list(SPXSYNTHHANDLE hsynth, const char* locale, SPXRESULTHANDLE* phresult); +SPXAPI synthesizer_get_voices_list_async(SPXSYNTHHANDLE hsynth, const char* locale, SPXASYNCHANDLE* phasync); +SPXAPI synthesizer_get_voices_list_async_wait_for(SPXASYNCHANDLE hasync, uint32_t milliseconds, SPXRESULTHANDLE* phresult); + +typedef void(*PSYNTHESIS_CALLBACK_FUNC)(SPXSYNTHHANDLE hsynth, SPXEVENTHANDLE hevent, void* pvContext); +SPXAPI synthesizer_started_set_callback(SPXSYNTHHANDLE hsynth, PSYNTHESIS_CALLBACK_FUNC pCallback, void* pvContext); +SPXAPI synthesizer_synthesizing_set_callback(SPXSYNTHHANDLE hsynth, PSYNTHESIS_CALLBACK_FUNC pCallback, void* pvContext); +SPXAPI synthesizer_completed_set_callback(SPXSYNTHHANDLE hsynth, PSYNTHESIS_CALLBACK_FUNC pCallback, void* pvContext); +SPXAPI synthesizer_canceled_set_callback(SPXSYNTHHANDLE hsynth, PSYNTHESIS_CALLBACK_FUNC pCallback, void* pvContext); +SPXAPI synthesizer_word_boundary_set_callback(SPXSYNTHHANDLE hsynth, PSYNTHESIS_CALLBACK_FUNC pCallback, void* pvContext); +SPXAPI synthesizer_viseme_received_set_callback(SPXSYNTHHANDLE hsynth, PSYNTHESIS_CALLBACK_FUNC pCallback, void* pvContext); +SPXAPI synthesizer_bookmark_reached_set_callback(SPXSYNTHHANDLE hsynth, PSYNTHESIS_CALLBACK_FUNC pCallback, void* pvContext); +SPXAPI synthesizer_connection_connected_set_callback(SPXCONNECTIONHANDLE hConnection, CONNECTION_CALLBACK_FUNC pCallback, void * pvContext); +SPXAPI synthesizer_connection_disconnected_set_callback(SPXCONNECTIONHANDLE hConnection, CONNECTION_CALLBACK_FUNC pCallback, void * pvContext); + +SPXAPI synthesizer_synthesis_event_get_result(SPXEVENTHANDLE hevent, SPXRESULTHANDLE* phresult); +SPXAPI synthesizer_word_boundary_event_get_values(SPXEVENTHANDLE hevent, uint64_t *pAudioOffset, uint64_t *pDuration, + uint32_t *pTextOffset, uint32_t *pWordLength, SpeechSynthesis_BoundaryType *pBoundaryType); +SPXAPI synthesizer_event_get_result_id(SPXEVENTHANDLE hEvent, char* resultId, uint32_t resultIdLength); +SPXAPI__(const char*) synthesizer_event_get_text(SPXEVENTHANDLE hEvent); +SPXAPI synthesizer_viseme_event_get_values(SPXEVENTHANDLE hevent, uint64_t* pAudioOffset, uint32_t* pVisemeId); +SPXAPI__(const char*) synthesizer_viseme_event_get_animation(SPXEVENTHANDLE hEvent); +SPXAPI synthesizer_bookmark_event_get_values(SPXEVENTHANDLE hevent, uint64_t* pAudioOffset); diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_translation_recognizer.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_translation_recognizer.h new file mode 100644 index 0000000..8912ffe --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_translation_recognizer.h @@ -0,0 +1,17 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + + +#pragma once +#include + + +// Todo: Translation recognizer management API. + +typedef void(*PTRANSLATIONSYNTHESIS_CALLBACK_FUNC)(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext); +SPXAPI translator_synthesizing_audio_set_callback(SPXRECOHANDLE hreco, PTRANSLATIONSYNTHESIS_CALLBACK_FUNC pCallback, void* pvContext); + +SPXAPI translator_add_target_language(SPXRECOHANDLE hreco, const char* language); +SPXAPI translator_remove_target_language(SPXRECOHANDLE hreco, const char* language); diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_translation_result.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_translation_result.h new file mode 100644 index 0000000..60bcae0 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_translation_result.h @@ -0,0 +1,14 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include + +SPXAPI translation_text_result_get_translation_count(SPXRESULTHANDLE handle, size_t * size); +SPXAPI translation_text_result_get_translation(SPXRESULTHANDLE handle, size_t index, char * language, char * text, size_t * language_size, size_t * text_size); + +// audioBuffer: point to the header for storing synthesis audio data. The parameter lengthPointer points to the variable saving the size of buffer. On return, *lengthPointer is set to the size of the buffer returned. +// If textBuffer is nullptr or the length is smaller than the size required, the function returns SPXERR_BUFFER_TOO_SMALL. +SPXAPI translation_synthesis_result_get_audio_data(SPXRESULTHANDLE handle, uint8_t* audioBuffer, size_t* lengthPointer); diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_user.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_user.h new file mode 100644 index 0000000..bd4d705 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/speechapi_c_user.h @@ -0,0 +1,13 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_user.h: Public API declarations for user related C methods and enumerations +// + +#pragma once +#include + +SPXAPI user_create_from_id(const char* user_id, SPXUSERHANDLE* huser); +SPXAPI user_release_handle(SPXUSERHANDLE huser); +SPXAPI user_get_id(SPXUSERHANDLE huser, char* user_id, size_t user_id_size); diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/spxdebug.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/spxdebug.h new file mode 100644 index 0000000..6e0131b --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/spxdebug.h @@ -0,0 +1,548 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// spxdebug.h: Public API definitions for global C Trace/Debug methods and related #defines +// + +#pragma once + +//------------------------------------------------------- +// Re-enabled ability to compile out all macros... +// However, currently still need to keep all macros until +// final review of all macros is complete. +//------------------------------------------------------- +#define SPX_CONFIG_TRACE_INCLUDE_DBG_WITH_ALL 1 + +#ifdef SPX_CONFIG_TRACE_INCLUDE_DBG_WITH_ALL +#if defined(SPX_CONFIG_TRACE_ALL) && !defined(SPX_CONFIG_DBG_TRACE_ALL) && (!defined(DEBUG) || !defined(_DEBUG)) +#define SPX_CONFIG_DBG_TRACE_ALL 1 +#endif +#endif + +//------------------------------------------------------- +// SPX_ and AZAC_ compatibility section +// (must preceed #include ) +//------------------------------------------------------- + +#if defined(SPX_CONFIG_DBG_TRACE_ALL) && !defined(AZAC_CONFIG_DBG_TRACE_ALL) +#define AZAC_CONFIG_DBG_TRACE_ALL SPX_CONFIG_DBG_TRACE_ALL +#elif !defined(SPX_CONFIG_DBG_TRACE_ALL) && defined(AZAC_CONFIG_DBG_TRACE_ALL) +#define SPX_CONFIG_DBG_TRACE_ALL AZAC_CONFIG_DBG_TRACE_ALL +#endif + +#if defined(SPX_CONFIG_DBG_TRACE_VERBOSE) && !defined(AZAC_CONFIG_DBG_TRACE_VERBOSE) +#define AZAC_CONFIG_DBG_TRACE_VERBOSE SPX_CONFIG_DBG_TRACE_VERBOSE +#elif !defined(SPX_CONFIG_DBG_TRACE_VERBOSE) && defined(AZAC_CONFIG_DBG_TRACE_VERBOSE) +#define SPX_CONFIG_DBG_TRACE_VERBOSE AZAC_CONFIG_DBG_TRACE_VERBOSE +#endif + +#if defined(SPX_CONFIG_DBG_TRACE_INFO) && !defined(AZAC_CONFIG_DBG_TRACE_INFO) +#define AZAC_CONFIG_DBG_TRACE_INFO SPX_CONFIG_DBG_TRACE_INFO +#elif !defined(SPX_CONFIG_DBG_TRACE_INFO) && defined(AZAC_CONFIG_DBG_TRACE_INFO) +#define SPX_CONFIG_DBG_TRACE_INFO AZAC_CONFIG_DBG_TRACE_INFO +#endif + +#if defined(SPX_CONFIG_DBG_TRACE_WARNING) && !defined(AZAC_CONFIG_DBG_TRACE_WARNING) +#define AZAC_CONFIG_DBG_TRACE_WARNING SPX_CONFIG_DBG_TRACE_WARNING +#elif !defined(SPX_CONFIG_DBG_TRACE_WARNING) && defined(AZAC_CONFIG_DBG_TRACE_WARNING) +#define SPX_CONFIG_DBG_TRACE_WARNING AZAC_CONFIG_DBG_TRACE_WARNING +#endif + +#if defined(SPX_CONFIG_DBG_TRACE_ERROR) && !defined(AZAC_CONFIG_DBG_TRACE_ERROR) +#define AZAC_CONFIG_DBG_TRACE_ERROR SPX_CONFIG_DBG_TRACE_ERROR +#elif !defined(SPX_CONFIG_DBG_TRACE_ERROR) && defined(AZAC_CONFIG_DBG_TRACE_ERROR) +#define SPX_CONFIG_DBG_TRACE_ERROR AZAC_CONFIG_DBG_TRACE_ERROR +#endif + +#if defined(SPX_CONFIG_DBG_TRACE_FUNCTION) && !defined(AZAC_CONFIG_DBG_TRACE_FUNCTION) +#define AZAC_CONFIG_DBG_TRACE_FUNCTION SPX_CONFIG_DBG_TRACE_FUNCTION +#elif !defined(SPX_CONFIG_DBG_TRACE_FUNCTION) && defined(AZAC_CONFIG_DBG_TRACE_FUNCTION) +#define SPX_CONFIG_DBG_TRACE_FUNCTION AZAC_CONFIG_DBG_TRACE_FUNCTION +#endif + +#if defined(SPX_CONFIG_DBG_TRACE_SCOPE) && !defined(AZAC_CONFIG_DBG_TRACE_SCOPE) +#define AZAC_CONFIG_DBG_TRACE_SCOPE SPX_CONFIG_DBG_TRACE_SCOPE +#elif !defined(SPX_CONFIG_DBG_TRACE_SCOPE) && defined(AZAC_CONFIG_DBG_TRACE_SCOPE) +#define SPX_CONFIG_DBG_TRACE_SCOPE AZAC_CONFIG_DBG_TRACE_SCOPE +#endif + +#if defined(SPX_CONFIG_DBG_TRACE_ASSERT) && !defined(AZAC_CONFIG_DBG_TRACE_ASSERT) +#define AZAC_CONFIG_DBG_TRACE_ASSERT SPX_CONFIG_DBG_TRACE_ASSERT +#elif !defined(SPX_CONFIG_DBG_TRACE_ASSERT) && defined(AZAC_CONFIG_DBG_TRACE_ASSERT) +#define SPX_CONFIG_DBG_TRACE_ASSERT AZAC_CONFIG_DBG_TRACE_ASSERT +#endif + +#if defined(SPX_CONFIG_DBG_TRACE_VERIFY) && !defined(AZAC_CONFIG_DBG_TRACE_VERIFY) +#define AZAC_CONFIG_DBG_TRACE_VERIFY SPX_CONFIG_DBG_TRACE_VERIFY +#elif !defined(SPX_CONFIG_DBG_TRACE_VERIFY) && defined(AZAC_CONFIG_DBG_TRACE_VERIFY) +#define SPX_CONFIG_DBG_TRACE_VERIFY AZAC_CONFIG_DBG_TRACE_VERIFY +#endif + +#if defined(SPX_CONFIG_TRACE_ALL) && !defined(AZAC_CONFIG_TRACE_ALL) +#define AZAC_CONFIG_TRACE_ALL SPX_CONFIG_TRACE_ALL +#elif !defined(SPX_CONFIG_TRACE_ALL) && defined(AZAC_CONFIG_TRACE_ALL) +#define SPX_CONFIG_TRACE_ALL AZAC_CONFIG_TRACE_ALL +#endif + +#if defined(SPX_CONFIG_TRACE_VERBOSE) && !defined(AZAC_CONFIG_TRACE_VERBOSE) +#define AZAC_CONFIG_TRACE_VERBOSE SPX_CONFIG_TRACE_VERBOSE +#elif !defined(SPX_CONFIG_TRACE_VERBOSE) && defined(AZAC_CONFIG_TRACE_VERBOSE) +#define SPX_CONFIG_TRACE_VERBOSE AZAC_CONFIG_TRACE_VERBOSE +#endif + +#if defined(SPX_CONFIG_TRACE_INFO) && !defined(AZAC_CONFIG_TRACE_INFO) +#define AZAC_CONFIG_TRACE_INFO SPX_CONFIG_TRACE_INFO +#elif !defined(SPX_CONFIG_TRACE_INFO) && defined(AZAC_CONFIG_TRACE_INFO) +#define SPX_CONFIG_TRACE_INFO AZAC_CONFIG_TRACE_INFO +#endif + +#if defined(SPX_CONFIG_TRACE_WARNING) && !defined(AZAC_CONFIG_TRACE_WARNING) +#define AZAC_CONFIG_TRACE_WARNING SPX_CONFIG_TRACE_WARNING +#elif !defined(SPX_CONFIG_TRACE_WARNING) && defined(AZAC_CONFIG_TRACE_WARNING) +#define SPX_CONFIG_TRACE_WARNING AZAC_CONFIG_TRACE_WARNING +#endif + +#if defined(SPX_CONFIG_TRACE_ERROR) && !defined(AZAC_CONFIG_TRACE_ERROR) +#define AZAC_CONFIG_TRACE_ERROR SPX_CONFIG_TRACE_ERROR +#elif !defined(SPX_CONFIG_TRACE_ERROR) && defined(AZAC_CONFIG_TRACE_ERROR) +#define SPX_CONFIG_TRACE_ERROR AZAC_CONFIG_TRACE_ERROR +#endif + +#if defined(SPX_CONFIG_TRACE_FUNCTION) && !defined(AZAC_CONFIG_TRACE_FUNCTION) +#define AZAC_CONFIG_TRACE_FUNCTION SPX_CONFIG_TRACE_FUNCTION +#elif !defined(SPX_CONFIG_TRACE_FUNCTION) && defined(AZAC_CONFIG_TRACE_FUNCTION) +#define SPX_CONFIG_TRACE_FUNCTION AZAC_CONFIG_TRACE_FUNCTION +#endif + +#if defined(SPX_CONFIG_TRACE_SCOPE) && !defined(AZAC_CONFIG_TRACE_SCOPE) +#define AZAC_CONFIG_TRACE_SCOPE SPX_CONFIG_TRACE_SCOPE +#elif !defined(SPX_CONFIG_TRACE_SCOPE) && defined(AZAC_CONFIG_TRACE_SCOPE) +#define SPX_CONFIG_TRACE_SCOPE AZAC_CONFIG_TRACE_SCOPE +#endif + +#if defined(SPX_CONFIG_TRACE_THROW_ON_FAIL) && !defined(AZAC_CONFIG_TRACE_THROW_ON_FAIL) +#define AZAC_CONFIG_TRACE_THROW_ON_FAIL SPX_CONFIG_TRACE_THROW_ON_FAIL +#elif !defined(SPX_CONFIG_TRACE_THROW_ON_FAIL) && defined(AZAC_CONFIG_TRACE_THROW_ON_FAIL) +#define SPX_CONFIG_TRACE_THROW_ON_FAIL AZAC_CONFIG_TRACE_THROW_ON_FAIL +#endif + +#if defined(SPX_CONFIG_TRACE_REPORT_ON_FAIL) && !defined(AZAC_CONFIG_TRACE_REPORT_ON_FAIL) +#define AZAC_CONFIG_TRACE_REPORT_ON_FAIL SPX_CONFIG_TRACE_REPORT_ON_FAIL +#elif !defined(SPX_CONFIG_TRACE_REPORT_ON_FAIL) && defined(AZAC_CONFIG_TRACE_REPORT_ON_FAIL) +#define SPX_CONFIG_TRACE_REPORT_ON_FAIL AZAC_CONFIG_TRACE_REPORT_ON_FAIL +#endif + +#if defined(SPX_CONFIG_TRACE_RETURN_ON_FAIL) && !defined(AZAC_CONFIG_TRACE_RETURN_ON_FAIL) +#define AZAC_CONFIG_TRACE_RETURN_ON_FAIL SPX_CONFIG_TRACE_RETURN_ON_FAIL +#elif !defined(SPX_CONFIG_TRACE_RETURN_ON_FAIL) && defined(AZAC_CONFIG_TRACE_RETURN_ON_FAIL) +#define SPX_CONFIG_TRACE_RETURN_ON_FAIL AZAC_CONFIG_TRACE_RETURN_ON_FAIL +#endif + +#if defined(SPX_CONFIG_TRACE_EXITFN_ON_FAIL) && !defined(AZAC_CONFIG_TRACE_EXITFN_ON_FAIL) +#define AZAC_CONFIG_TRACE_EXITFN_ON_FAIL SPX_CONFIG_TRACE_EXITFN_ON_FAIL +#elif !defined(SPX_CONFIG_TRACE_EXITFN_ON_FAIL) && defined(AZAC_CONFIG_TRACE_EXITFN_ON_FAIL) +#define SPX_CONFIG_TRACE_EXITFN_ON_FAIL AZAC_CONFIG_TRACE_EXITFN_ON_FAIL +#endif + +#if !defined(__AZAC_THROW_HR_IMPL) && defined(__SPX_THROW_HR_IMPL) +#define __AZAC_THROW_HR_IMPL __SPX_THROW_HR_IMPL +#elif !defined(__SPX_THROW_HR_IMPL) && defined(__AZAC_THROW_HR_IMPL) +#define __SPX_THROW_HR_IMPL __AZAC_THROW_HR_IMPL +#elif !defined(__AZAC_THROW_HR_IMPL) && !defined(__SPX_THROW_HR_IMPL) +#define __AZAC_THROW_HR_IMPL __azac_rethrow +#define __SPX_THROW_HR_IMPL __azac_rethrow +#else +#error Both __AZAC_THROW_HR_IMPL and __SPX_THROW_HR_IMPL cannot be defined at the same time +#endif + +//------------------------------------------------------- +// SPX_ and SPX_DBG_ macro configuration +//------------------------------------------------------- + +#ifdef SPX_CONFIG_DBG_TRACE_ALL +#define SPX_CONFIG_DBG_TRACE_VERBOSE 1 +#define SPX_CONFIG_DBG_TRACE_INFO 1 +#define SPX_CONFIG_DBG_TRACE_WARNING 1 +#define SPX_CONFIG_DBG_TRACE_ERROR 1 +#define SPX_CONFIG_DBG_TRACE_FUNCTION 1 +#define SPX_CONFIG_DBG_TRACE_SCOPE 1 +#define SPX_CONFIG_DBG_TRACE_ASSERT 1 +#define SPX_CONFIG_DBG_TRACE_VERIFY 1 +#ifndef SPX_CONFIG_TRACE_ALL +#define SPX_CONFIG_TRACE_ALL 1 +#endif +#endif // SPX_CONFIG_DBG_TRACE_ALL + +#ifdef SPX_CONFIG_TRACE_ALL +#define SPX_CONFIG_TRACE_VERBOSE 1 +#define SPX_CONFIG_TRACE_INFO 1 +#define SPX_CONFIG_TRACE_WARNING 1 +#define SPX_CONFIG_TRACE_ERROR 1 +#define SPX_CONFIG_TRACE_FUNCTION 1 +#define SPX_CONFIG_TRACE_SCOPE 1 +#define SPX_CONFIG_TRACE_THROW_ON_FAIL 1 +#define SPX_CONFIG_TRACE_REPORT_ON_FAIL 1 +#define SPX_CONFIG_TRACE_RETURN_ON_FAIL 1 +#define SPX_CONFIG_TRACE_EXITFN_ON_FAIL 1 +#endif // SPX_CONFIG_TRACE_ALL + +//------------------------------------------------------- +// #include section ... +// (must come after everything above) +//------------------------------------------------------- + +#include +#include +#include + +#ifndef _MSC_VER +// macros in this header generate a bunch of +// "ISO C++11 requires at least one argument for the "..." in a variadic macro" errors. +// system_header pragma is the only mechanism that helps to suppress them. +// https://stackoverflow.com/questions/35587137/how-to-suppress-gcc-variadic-macro-argument-warning-for-zero-arguments-for-a-par +// TODO: try to make macros standard-compliant. +#pragma GCC system_header +#endif + +//----------------------------------------------------------- +// SPX_TRACE macro common implementations +//----------------------------------------------------------- + +#define __SPX_TRACE_LEVEL_INFO __AZAC_TRACE_LEVEL_INFO // Trace_Info +#define __SPX_TRACE_LEVEL_WARNING __AZAC_TRACE_LEVEL_WARNING // Trace_Warning +#define __SPX_TRACE_LEVEL_ERROR __AZAC_TRACE_LEVEL_ERROR // Trace_Error +#define __SPX_TRACE_LEVEL_VERBOSE __AZAC_TRACE_LEVEL_VERBOSE // Trace_Verbose + +#ifndef __SPX_DO_TRACE_IMPL +#define __SPX_DO_TRACE_IMPL __AZAC_DO_TRACE_IMPL +#endif + +#define __SPX_DOTRACE(level, title, fileName, lineNumber, ...) \ + __AZAC_DOTRACE(level, title, fileName, lineNumber, ##__VA_ARGS__) + +#define __SPX_TRACE_INFO(title, fileName, lineNumber, msg, ...) \ + __AZAC_TRACE_INFO(title, fileName, lineNumber, msg, ##__VA_ARGS__) + +#define __SPX_TRACE_INFO_IF(cond, title, fileName, lineNumber, msg, ...) \ + __AZAC_TRACE_INFO_IF(cond, title, fileName, lineNumber, msg, ##__VA_ARGS__) + +#define __SPX_TRACE_WARNING(title, fileName, lineNumber, msg, ...) \ + __AZAC_TRACE_WARNING(title, fileName, lineNumber, msg, ##__VA_ARGS__) + +#define __SPX_TRACE_WARNING_IF(cond, title, fileName, lineNumber, msg, ...) \ + __AZAC_TRACE_WARNING_IF(cond, title, fileName, lineNumber, msg, ##__VA_ARGS__) + +#define __SPX_TRACE_ERROR(title, fileName, lineNumber, msg, ...) \ + __AZAC_TRACE_ERROR(title, fileName, lineNumber, msg, ##__VA_ARGS__) + +#define __SPX_TRACE_ERROR_IF(cond, title, fileName, lineNumber, msg, ...) \ + __AZAC_TRACE_ERROR_IF(cond, title, fileName, lineNumber, msg, ##__VA_ARGS__) + +#define __SPX_TRACE_VERBOSE(title, fileName, lineNumber, msg, ...) \ + __AZAC_TRACE_VERBOSE(title, fileName, lineNumber, msg, ##__VA_ARGS__) + +#define __SPX_TRACE_VERBOSE_IF(cond, title, fileName, lineNumber, msg, ...) \ + __AZAC_TRACE_VERBOSE_IF(cond, title, fileName, lineNumber, msg, ##__VA_ARGS__) + +#define ___SPX_EXPR_AS_STRING(_String) \ + ___AZAC_EXPR_AS_STRING(_String) + +#define __SPX_EXPR_AS_STRING(_String) \ + __AZAC_EXPR_AS_STRING(_String) + +#define __SPX_TRACE_HR(title, fileName, lineNumber, hr, x) \ + __AZAC_TRACE_HR(title, fileName, lineNumber, hr, x) + +#define __SPX_REPORT_ON_FAIL(title, fileName, lineNumber, hr) \ + __AZAC_REPORT_ON_FAIL(title, fileName, lineNumber, hr) + +#define __SPX_REPORT_ON_FAIL_IFNOT(title, fileName, lineNumber, hr, hrNot) \ + __AZAC_REPORT_ON_FAIL_IFNOT(title, fileName, lineNumber, hr, hrNot) + +#define __SPX_T_RETURN_HR(title, fileName, lineNumber, hr) \ + __AZAC_T_RETURN_HR(title, fileName, lineNumber, hr) + +#define __SPX_T_RETURN_HR_IF(title, fileName, lineNumber, hr, cond) \ + __AZAC_T_RETURN_HR_IF(title, fileName, lineNumber, hr, cond) + +#define __SPX_T_RETURN_ON_FAIL(title, fileName, lineNumber, hr) \ + __AZAC_T_RETURN_ON_FAIL(title, fileName, lineNumber, hr) + +#define __SPX_T_RETURN_ON_FAIL_IF_NOT(title, fileName, lineNumber, hr, hrNot) \ + __AZAC_T_RETURN_ON_FAIL_IF_NOT(title, fileName, lineNumber, hr, hrNot) + +#define __SPX_RETURN_HR(hr) \ + __AZAC_RETURN_HR(hr) + +#define __SPX_RETURN_HR_IF(hr, cond) \ + __AZAC_RETURN_HR_IF(hr, cond) + +#define __SPX_RETURN_ON_FAIL(hr) \ + __AZAC_RETURN_ON_FAIL(hr) + +#define __SPX_RETURN_ON_FAIL_IF_NOT(hr, hrNot) \ + __AZAC_RETURN_ON_FAIL_IF_NOT(hr, hrNot) + +#define SPX_EXITFN_CLEANUP AZAC_EXITFN_CLEANUP + +#define __SPX_T_EXITFN_HR(title, fileName, lineNumber, hr) \ + __AZAC_T_EXITFN_HR(title, fileName, lineNumber, hr) + +#define __SPX_T_EXITFN_HR_IF(title, fileName, lineNumber, hr, cond) \ + __AZAC_T_EXITFN_HR_IF(title, fileName, lineNumber, hr, cond) + +#define __SPX_T_EXITFN_ON_FAIL(title, fileName, lineNumber, hr) \ + __AZAC_T_EXITFN_ON_FAIL(title, fileName, lineNumber, hr) + +#define __SPX_T_EXITFN_ON_FAIL_IF_NOT(title, fileName, lineNumber, hr, hrNot) \ + __AZAC_T_EXITFN_ON_FAIL_IF_NOT(title, fileName, lineNumber, hr, hrNot) + +#define __SPX_EXITFN_HR(hr) \ + __AZAC_EXITFN_HR(hr) + +#define __SPX_EXITFN_HR_IF(hr, cond) \ + __AZAC_EXITFN_HR_IF(hr, cond) + +#define __SPX_EXITFN_ON_FAIL(hr) \ + __AZAC_EXITFN_ON_FAIL(hr) + +#define __SPX_EXITFN_ON_FAIL_IF_NOT(hr, hrNot) \ + __AZAC_EXITFN_ON_FAIL_IF_NOT(hr, hrNot) + +#define __SPX_TRACE_ASSERT(title, fileName, lineNumber, expr) \ + __AZAC_TRACE_ASSERT(title, fileName, lineNumber, expr) + +#define __SPX_TRACE_ASSERT_MSG(title, fileName, lineNumber, expr, ...) \ + __AZAC_TRACE_ASSERT_MSG(title, fileName, lineNumber, expr, ##__VA_ARGS__) + +#define __SPX_DBG_ASSERT(title, fileName, lineNumber, expr) \ + __AZAC_DBG_ASSERT(title, fileName, lineNumber, expr) + +#define __SPX_DBG_ASSERT_WITH_MESSAGE(title, fileName, lineNumber, expr, ...) \ + __AZAC_DBG_ASSERT_WITH_MESSAGE(title, fileName, lineNumber, expr, ##__VA_ARGS__) + +#define __SPX_DBG_VERIFY(title, fileName, lineNumber, expr) \ + __AZAC_DBG_VERIFY(title, fileName, lineNumber, expr) + +#define __SPX_DBG_VERIFY_WITH_MESSAGE(title, fileName, lineNumber, expr, ...) \ + __AZAC_DBG_VERIFY_WITH_MESSAGE(title, fileName, lineNumber, expr, ##__VA_ARGS__) + +#ifdef __cplusplus + +#define __SPX_TRACE_SCOPE(t1, fileName, lineNumber, t2, x, y) \ + __AZAC_TRACE_SCOPE(t1, fileName, lineNumber, t2, x, y) + +#ifndef __SPX_THROW_HR +#define __SPX_THROW_HR(hr) __SPX_THROW_HR_IMPL(hr) +#endif + +#define __SPX_T_THROW_ON_FAIL(title, fileName, lineNumber, hr) \ + __AZAC_T_THROW_ON_FAIL(title, fileName, lineNumber, hr) + +#define __SPX_T_THROW_ON_FAIL_IF_NOT(title, fileName, lineNumber, hr, hrNot) \ + __AZAC_T_THROW_ON_FAIL_IF_NOT(title, fileName, lineNumber, hr, hrNot) + +#define __SPX_T_THROW_HR_IF(title, fileName, lineNumber, hr, cond) \ + __AZAC_T_THROW_HR_IF(title, fileName, lineNumber, hr, cond) + +#define __SPX_T_THROW_HR(title, fileName, lineNumber, hr) \ + __AZAC_T_THROW_HR(title, fileName, lineNumber, hr) + +#define __SPX_THROW_ON_FAIL(hr) \ + __AZAC_THROW_ON_FAIL(hr) + +#define __SPX_THROW_ON_FAIL_IF_NOT(hr, hrNot) \ + __AZAC_THROW_ON_FAIL_IF_NOT(hr, hrNot) + +#define __SPX_THROW_HR_IF(hr, cond) \ + __AZAC_THROW_HR_IF(hr, cond) + +#endif // __cplusplus + + +//------------------------------------------------------- +// SPX_ macro definitions +//------------------------------------------------------- + +#ifdef SPX_CONFIG_TRACE_VERBOSE +#define SPX_TRACE_VERBOSE(msg, ...) __SPX_TRACE_VERBOSE("SPX_TRACE_VERBOSE: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#define SPX_TRACE_VERBOSE_IF(cond, msg, ...) __SPX_TRACE_VERBOSE_IF(cond, "SPX_TRACE_VERBOSE: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#else +#define SPX_TRACE_VERBOSE(...) +#define SPX_TRACE_VERBOSE_IF(...) +#endif + +#ifdef SPX_CONFIG_DBG_TRACE_VERBOSE +#define SPX_DBG_TRACE_VERBOSE(msg, ...) __SPX_TRACE_VERBOSE("SPX_DBG_TRACE_VERBOSE: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#define SPX_DBG_TRACE_VERBOSE_IF(cond, msg, ...) __SPX_TRACE_VERBOSE_IF(cond, "SPX_DBG_TRACE_VERBOSE: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#else +#define SPX_DBG_TRACE_VERBOSE(...) +#define SPX_DBG_TRACE_VERBOSE_IF(...) +#endif + +#ifdef SPX_CONFIG_TRACE_INFO +#define SPX_TRACE_INFO(msg, ...) __SPX_TRACE_INFO("SPX_TRACE_INFO: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#define SPX_TRACE_INFO_IF(cond, msg, ...) __SPX_TRACE_INFO_IF(cond, "SPX_TRACE_INFO: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#else +#define SPX_TRACE_INFO(...) +#define SPX_TRACE_INFO_IF(...) +#endif + +#ifdef SPX_CONFIG_DBG_TRACE_INFO +#define SPX_DBG_TRACE_INFO(msg, ...) __SPX_TRACE_INFO("SPX_DBG_TRACE_INFO: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#define SPX_DBG_TRACE_INFO_IF(cond, msg, ...) __SPX_TRACE_INFO_IF(cond, "SPX_DBG_TRACE_INFO: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#else +#define SPX_DBG_TRACE_INFO(...) +#define SPX_DBG_TRACE_INFO_IF(...) +#endif + +#ifdef SPX_CONFIG_TRACE_WARNING +#define SPX_TRACE_WARNING(msg, ...) __SPX_TRACE_WARNING("SPX_TRACE_WARNING:", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#define SPX_TRACE_WARNING_IF(cond, msg, ...) __SPX_TRACE_WARNING_IF(cond, "SPX_TRACE_WARNING:", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#else +#define SPX_TRACE_WARNING(...) +#define SPX_TRACE_WARNING_IF(...) +#endif + +#ifdef SPX_CONFIG_DBG_TRACE_WARNING +#define SPX_DBG_TRACE_WARNING(msg, ...) __SPX_TRACE_WARNING("SPX_DBG_TRACE_WARNING:", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#define SPX_DBG_TRACE_WARNING_IF(cond, msg, ...) __SPX_TRACE_WARNING_IF(cond, "SPX_DBG_TRACE_WARNING:", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#else +#define SPX_DBG_TRACE_WARNING(...) +#define SPX_DBG_TRACE_WARNING_IF(...) +#endif + +#ifdef SPX_CONFIG_TRACE_ERROR +#define SPX_TRACE_ERROR(msg, ...) __SPX_TRACE_ERROR("SPX_TRACE_ERROR: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#define SPX_TRACE_ERROR_IF(cond, msg, ...) __SPX_TRACE_ERROR_IF(cond, "SPX_TRACE_ERROR: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#else +#define SPX_TRACE_ERROR(...) +#define SPX_TRACE_ERROR_IF(...) +#endif + +#ifdef SPX_CONFIG_DBG_TRACE_ERROR +#define SPX_DBG_TRACE_ERROR(msg, ...) __SPX_TRACE_ERROR("SPX_DBG_TRACE_ERROR: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#define SPX_DBG_TRACE_ERROR_IF(cond, msg, ...) __SPX_TRACE_ERROR_IF(cond, "SPX_DBG_TRACE_ERROR: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#else +#define SPX_DBG_TRACE_ERROR(...) +#define SPX_DBG_TRACE_ERROR_IF(...) +#endif + +#ifdef SPX_CONFIG_TRACE_FUNCTION +#define SPX_TRACE_FUNCTION(...) __SPX_TRACE_VERBOSE("SPX_TRACE_FUNCTION: ", __FILE__, __LINE__, __FUNCTION__) +#else +#define SPX_TRACE_FUNCTION(...) +#endif + +#ifdef SPX_CONFIG_DBG_TRACE_FUNCTION +#define SPX_DBG_TRACE_FUNCTION(...) __SPX_TRACE_VERBOSE("SPX_DBG_TRACE_FUNCTION: ", __FILE__, __LINE__, __FUNCTION__) +#else +#define SPX_DBG_TRACE_FUNCTION(...) +#endif + +#ifdef SPX_CONFIG_TRACE_REPORT_ON_FAIL +#define SPX_REPORT_ON_FAIL(hr) __SPX_REPORT_ON_FAIL("SPX_REPORT_ON_FAIL: ", __FILE__, __LINE__, hr) +#define SPX_REPORT_ON_FAIL_IFNOT(hr, hrNot) __SPX_REPORT_ON_FAIL_IFNOT("SPX_REPORT_ON_FAIL: ", __FILE__, __LINE__, hr, hrNot) +#else +#define SPX_REPORT_ON_FAIL(hr) UNUSED(hr) +#define SPX_REPORT_ON_FAIL_IFNOT(hr, hrNot) UNUSED(hr); UNUSED(hrNot) +#endif + +#ifdef SPX_CONFIG_TRACE_RETURN_ON_FAIL +#define SPX_RETURN_HR(hr) __SPX_T_RETURN_HR("SPX_RETURN_ON_FAIL: ", __FILE__, __LINE__, hr) +#define SPX_RETURN_HR_IF(hr, cond) __SPX_T_RETURN_HR_IF("SPX_RETURN_ON_FAIL: ", __FILE__, __LINE__, hr, cond) +#define SPX_RETURN_ON_FAIL(hr) __SPX_T_RETURN_ON_FAIL("SPX_RETURN_ON_FAIL: ", __FILE__, __LINE__, hr) +#define SPX_RETURN_ON_FAIL_IF_NOT(hr, hrNot) __SPX_T_RETURN_ON_FAIL_IF_NOT("SPX_RETURN_ON_FAIL: ", __FILE__, __LINE__, hr, hrNot) +#else +#define SPX_RETURN_HR(hr) __SPX_RETURN_HR(hr) +#define SPX_RETURN_HR_IF(hr, cond) __SPX_RETURN_HR_IF(hr, cond) +#define SPX_RETURN_ON_FAIL(hr) __SPX_RETURN_ON_FAIL(hr) +#define SPX_RETURN_ON_FAIL_IF_NOT(hr, hrNot) __SPX_RETURN_ON_FAIL_IF_NOT(hr, hrNot) +#endif + +#define SPX_IFTRUE_RETURN_HR(cond, hr) SPX_RETURN_HR_IF(hr, cond) +#define SPX_IFFALSE_RETURN_HR(cond, hr) SPX_RETURN_HR_IF(hr, !(cond)) +#define SPX_IFFAILED_RETURN_HR(hr) SPX_RETURN_ON_FAIL(hr) +#define SPX_IFFAILED_RETURN_HR_IFNOT(hr, hrNot) SPX_RETURN_ON_FAIL_IF_NOT(hr, hrNot) + +#ifdef SPX_CONFIG_TRACE_EXITFN_ON_FAIL +#define SPX_EXITFN_HR(hr) __SPX_T_EXITFN_HR("SPX_EXITFN_ON_FAIL: ", __FILE__, __LINE__, hr) +#define SPX_EXITFN_HR_IF(hr, cond) __SPX_T_EXITFN_HR_IF("SPX_EXITFN_ON_FAIL: ", __FILE__, __LINE__, hr, cond) +#define SPX_EXITFN_ON_FAIL(hr) __SPX_T_EXITFN_ON_FAIL("SPX_EXITFN_ON_FAIL: ", __FILE__, __LINE__, hr) +#define SPX_EXITFN_ON_FAIL_IF_NOT(hr, hrNot) __SPX_T_EXITFN_ON_FAIL_IF_NOT("SPX_EXITFN_ON_FAIL: ", __FILE__, __LINE__, hr, hrNot) +#else +#define SPX_EXITFN_HR(hr) __SPX_EXITFN_HR(hr) +#define SPX_EXITFN_HR_IF(hr, cond) __SPX_EXITFN_HR_IF(hr, cond) +#define SPX_EXITFN_ON_FAIL(hr) __SPX_EXITFN_ON_FAIL(hr) +#define SPX_EXITFN_ON_FAIL_IF_NOT(hr, hrNot) __SPX_EXITFN_ON_FAIL_IF_NOT(hr, hrNot) +#endif + +#define SPX_IFTRUE_EXITFN_WHR(cond, hr) SPX_EXITFN_HR_IF(hr, cond) +#define SPX_IFFALSE_EXITFN_WHR(cond, hr) SPX_EXITFN_HR_IF(hr, !(cond)) +#define SPX_IFFAILED_EXITFN_WHR(hr) SPX_EXITFN_ON_FAIL(hr) +#define SPX_IFFAILED_EXITFN_WHR_IFNOT(hr, hrNot) SPX_EXITFN_ON_FAIL_IF_NOT(hr, hrNot) + +#define SPX_IFTRUE_EXITFN_CLEANUP(cond, expr) AZAC_IFTRUE_EXITFN_CLEANUP(cond, expr) +#define SPX_IFFALSE_EXITFN_CLEANUP(cond, expr) AZAC_IFFALSE_EXITFN_CLEANUP(cond, expr) + +#if defined(SPX_CONFIG_DBG_TRACE_ASSERT) && (defined(DEBUG) || defined(_DEBUG)) +#define SPX_DBG_ASSERT(expr) __SPX_DBG_ASSERT("SPX_ASSERT: ", __FILE__, __LINE__, expr) +#define SPX_DBG_ASSERT_WITH_MESSAGE(expr, ...) __SPX_DBG_ASSERT_WITH_MESSAGE("SPX_ASSERT: ", __FILE__, __LINE__, expr, ##__VA_ARGS__) +#else +#define SPX_DBG_ASSERT(expr) +#define SPX_DBG_ASSERT_WITH_MESSAGE(expr, ...) +#endif + +#if defined(SPX_CONFIG_DBG_TRACE_VERIFY) && (defined(DEBUG) || defined(_DEBUG)) +#define SPX_DBG_VERIFY(expr) __SPX_DBG_VERIFY("SPX_VERIFY: ", __FILE__, __LINE__, expr) +#define SPX_DBG_VERIFY_WITH_MESSAGE(expr, ...) __SPX_DBG_VERIFY_WITH_MESSAGE("SPX_VERIFY: ", __FILE__, __LINE__, expr, ##__VA_ARGS__) +#else +#define SPX_DBG_VERIFY(expr) (expr) +#define SPX_DBG_VERIFY_WITH_MESSAGE(expr, ...) (expr) +#endif + +#define SPX_IFTRUE(cond, expr) AZAC_IFTRUE(cond, expr) +#define SPX_IFFALSE(cond, expr) AZAC_IFFALSE(cond, expr) + +#ifdef __cplusplus + +#ifdef SPX_CONFIG_TRACE_SCOPE +#define SPX_TRACE_SCOPE(x, y) __SPX_TRACE_SCOPE("SPX_TRACE_SCOPE_ENTER: ", __FILE__, __LINE__, "SPX_TRACE_SCOPE_EXIT: ", x, y) +#else +#define SPX_TRACE_SCOPE(x, y) +#endif + +#ifdef SPX_CONFIG_DBG_TRACE_SCOPE +#define SPX_DBG_TRACE_SCOPE(x, y) __SPX_TRACE_SCOPE("SPX_DBG_TRACE_SCOPE_ENTER: ", __FILE__, __LINE__, "SPX_DBG_TRACE_SCOPE_EXIT: ", x, y) +#else +#define SPX_DBG_TRACE_SCOPE(x, y) +#endif + +#ifdef SPX_CONFIG_TRACE_THROW_ON_FAIL +#define SPX_THROW_ON_FAIL(hr) __SPX_T_THROW_ON_FAIL("SPX_THROW_ON_FAIL: ", __FILE__, __LINE__, hr) +#define SPX_THROW_ON_FAIL_IF_NOT(hr, hrNot) __SPX_T_THROW_ON_FAIL_IF_NOT("SPX_THROW_ON_FAIL: ", __FILE__, __LINE__, hr, hrNot) +#define SPX_THROW_HR_IF(hr, cond) __SPX_T_THROW_HR_IF("SPX_THROW_HR_IF: ", __FILE__, __LINE__, hr, cond) +#define SPX_THROW_HR(hr) __SPX_T_THROW_HR("SPX_THROW_HR: ", __FILE__, __LINE__, hr) +#else +#define SPX_THROW_ON_FAIL(hr) __SPX_THROW_ON_FAIL(hr) +#define SPX_THROW_ON_FAIL_IF_NOT(hr, hrNot) __SPX_THROW_ON_FAIL_IF_NOT(hr, hrNot) +#define SPX_THROW_HR_IF(hr, cond) __SPX_THROW_HR_IF(hr, cond) +#define SPX_THROW_HR(hr) __SPX_THROW_HR(hr) +#endif + +#define SPX_IFFAILED_THROW_HR(hr) SPX_THROW_ON_FAIL(hr) +#define SPX_IFFAILED_THROW_HR_IFNOT(hr, hrNot) SPX_THROW_ON_FAIL_IF_NOT(hr, hrNot) + +#else // __cplusplus + +#define SPX_TRACE_SCOPE(x, y) static_assert(false) +#define SPX_DBG_TRACE_SCOPE(x, y) static_assert(false) +#define SPX_THROW_ON_FAIL(hr) static_assert(false) +#define SPX_THROW_ON_FAIL_IF_NOT(hr, hrNot) static_assert(false) +#define SPX_THROW_HR_IF(hr, cond) static_assert(false) +#define SPX_THROW_HR(hr) static_assert(false) +#define SPX_IFFAILED_THROW_HR(hr) static_assert(false) +#define SPX_IFFAILED_THROW_HR_IFNOT(hr, hrNot) static_assert(false) + +#endif // __cplusplus diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/spxerror.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/spxerror.h new file mode 100644 index 0000000..760ec06 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/c_api/spxerror.h @@ -0,0 +1,449 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once + +#include // must include after spxdebug.h or speechapi*.h (can NOT be included before) + +#define SPXHR AZACHR +#define SPX_NOERROR AZAC_ERR_NONE +#define SPX_INIT_HR(hr) AZAC_INIT_HR(hr) +#define SPX_SUCCEEDED(x) AZAC_SUCCEEDED(x) +#define SPX_FAILED(x) AZAC_FAILED(x) +#define __SPX_ERRCODE_FAILED(x) __AZAC_ERRCODE_FAILED(x) + +/// +/// The function is not implemented. +/// +#define SPXERR_NOT_IMPL \ + AZAC_ERR_NOT_IMPL + +/// +/// The object has not been properly initialized. +/// +#define SPXERR_UNINITIALIZED \ + AZAC_ERR_UNINITIALIZED + +/// +/// The object has already been initialized. +/// +#define SPXERR_ALREADY_INITIALIZED \ + AZAC_ERR_ALREADY_INITIALIZED + +/// +/// An unhandled exception was detected. +/// +#define SPXERR_UNHANDLED_EXCEPTION \ + AZAC_ERR_UNHANDLED_EXCEPTION + +/// +/// The object or property was not found. +/// +#define SPXERR_NOT_FOUND \ + AZAC_ERR_NOT_FOUND + +/// +/// One or more arguments are not valid. +/// +#define SPXERR_INVALID_ARG \ + AZAC_ERR_INVALID_ARG + +/// +/// The specified timeout value has elapsed. +/// +#define SPXERR_TIMEOUT \ + AZAC_ERR_TIMEOUT + +/// +/// The asynchronous operation is already in progress. +/// +#define SPXERR_ALREADY_IN_PROGRESS \ + AZAC_ERR_ALREADY_IN_PROGRESS + +/// +/// The attempt to open the file failed. +/// +#define SPXERR_FILE_OPEN_FAILED \ + AZAC_ERR_FILE_OPEN_FAILED + +/// +/// The end of the file was reached unexpectedly. +/// +#define SPXERR_UNEXPECTED_EOF \ + AZAC_ERR_UNEXPECTED_EOF + +/// +/// Invalid audio header encountered. +/// +#define SPXERR_INVALID_HEADER \ + AZAC_ERR_INVALID_HEADER + +/// +/// The requested operation cannot be performed while audio is pumping +/// +#define SPXERR_AUDIO_IS_PUMPING \ + AZAC_ERR_AUDIO_IS_PUMPING + +/// +/// Unsupported audio format. +/// +#define SPXERR_UNSUPPORTED_FORMAT \ + AZAC_ERR_UNSUPPORTED_FORMAT + +/// +/// Operation aborted. +/// +#define SPXERR_ABORT \ + AZAC_ERR_ABORT + +/// +/// Microphone is not available. +/// +#define SPXERR_MIC_NOT_AVAILABLE \ + AZAC_ERR_MIC_NOT_AVAILABLE + +/// +/// An invalid state was encountered. +/// +#define SPXERR_INVALID_STATE \ + AZAC_ERR_INVALID_STATE + +/// +/// Attempting to create a UUID failed. +/// +#define SPXERR_UUID_CREATE_FAILED \ + AZAC_ERR_UUID_CREATE_FAILED + +/// +/// An unexpected session state transition was encountered when setting the session audio format. +/// +/// +/// Valid transitions are: +/// * WaitForPumpSetFormatStart --> ProcessingAudio (at the beginning of stream) +/// * StoppingPump --> WaitForAdapterCompletedSetFormatStop (at the end of stream) +/// * ProcessingAudio --> WaitForAdapterCompletedSetFormatStop (when the stream runs out of data) +/// All other state transitions are invalid. +/// +#define SPXERR_SETFORMAT_UNEXPECTED_STATE_TRANSITION \ + AZAC_ERR_SETFORMAT_UNEXPECTED_STATE_TRANSITION + +/// +/// An unexpected session state was encountered in while processing audio. +/// +/// +/// Valid states to encounter are: +/// * ProcessingAudio: We're allowed to process audio while in this state. +/// * StoppingPump: We're allowed to be called to process audio, but we'll ignore the data passed in while we're attempting to stop the pump. +/// All other states are invalid while processing audio. +/// +#define SPXERR_PROCESS_AUDIO_INVALID_STATE \ + AZAC_ERR_PROCESS_AUDIO_INVALID_STATE + +/// +/// An unexpected state transition was encountered while attempting to start recognizing. +/// +/// +/// A valid transition is: +/// * Idle --> WaitForPumpSetFormatStart +/// All other state transitions are invalid when attempting to start recognizing +/// +#define SPXERR_START_RECOGNIZING_INVALID_STATE_TRANSITION \ + AZAC_ERR_START_RECOGNIZING_INVALID_STATE_TRANSITION + +/// +/// An unexpected error was encountered when trying to create an internal object. +/// +#define SPXERR_UNEXPECTED_CREATE_OBJECT_FAILURE \ + AZAC_ERR_UNEXPECTED_CREATE_OBJECT_FAILURE + +/// +/// An error in the audio-capturing system. +/// +#define SPXERR_MIC_ERROR \ + AZAC_ERR_MIC_ERROR + +/// +/// The requested operation cannot be performed; there is no audio input. +/// +#define SPXERR_NO_AUDIO_INPUT \ + AZAC_ERR_NO_AUDIO_INPUT + +/// +/// An unexpected error was encountered when trying to access the USP site. +/// +#define SPXERR_UNEXPECTED_USP_SITE_FAILURE \ + AZAC_ERR_UNEXPECTED_USP_SITE_FAILURE + +/// +/// An unexpected error was encountered when trying to access the LuAdapterSite site. +/// +#define SPXERR_UNEXPECTED_LU_SITE_FAILURE \ + AZAC_ERR_UNEXPECTED_LU_SITE_FAILURE + +/// +/// The buffer is too small. +/// +#define SPXERR_BUFFER_TOO_SMALL \ + AZAC_ERR_BUFFER_TOO_SMALL + +/// +/// A method failed to allocate memory. +/// +#define SPXERR_OUT_OF_MEMORY \ + AZAC_ERR_OUT_OF_MEMORY + +/// +/// An unexpected runtime error occurred. +/// +#define SPXERR_RUNTIME_ERROR \ + AZAC_ERR_RUNTIME_ERROR + +/// +/// The url specified is invalid. +/// +#define SPXERR_INVALID_URL \ + AZAC_ERR_INVALID_URL + +/// +/// The region specified is invalid or missing. +/// +#define SPXERR_INVALID_REGION \ + AZAC_ERR_INVALID_REGION + +/// +/// Switch between single shot and continuous recognition is not supported. +/// +#define SPXERR_SWITCH_MODE_NOT_ALLOWED \ + AZAC_ERR_SWITCH_MODE_NOT_ALLOWED + +/// +/// Changing connection status is not supported in the current recognition state. +/// +#define SPXERR_CHANGE_CONNECTION_STATUS_NOT_ALLOWED \ + AZAC_ERR_CHANGE_CONNECTION_STATUS_NOT_ALLOWED + +/// +/// Explicit connection management is not supported by the specified recognizer. +/// +#define SPXERR_EXPLICIT_CONNECTION_NOT_SUPPORTED_BY_RECOGNIZER \ + AZAC_ERR_EXPLICIT_CONNECTION_NOT_SUPPORTED_BY_RECOGNIZER + +/// +/// The handle is invalid. +/// +#define SPXERR_INVALID_HANDLE \ + AZAC_ERR_INVALID_HANDLE + +/// +/// The recognizer is invalid. +/// +#define SPXERR_INVALID_RECOGNIZER \ + AZAC_ERR_INVALID_RECOGNIZER + +/// +/// The value is out of range. +/// Added in version 1.3.0. +/// +#define SPXERR_OUT_OF_RANGE \ + AZAC_ERR_OUT_OF_RANGE + +/// +/// Extension library not found. +/// Added in version 1.3.0. +/// +#define SPXERR_EXTENSION_LIBRARY_NOT_FOUND \ + AZAC_ERR_EXTENSION_LIBRARY_NOT_FOUND + +/// +/// An unexpected error was encountered when trying to access the TTS engine site. +/// Added in version 1.4.0. +/// +#define SPXERR_UNEXPECTED_TTS_ENGINE_SITE_FAILURE \ + AZAC_ERR_UNEXPECTED_TTS_ENGINE_SITE_FAILURE + +/// +/// An unexpected error was encountered when trying to access the audio output stream. +/// Added in version 1.4.0. +/// +#define SPXERR_UNEXPECTED_AUDIO_OUTPUT_FAILURE \ + AZAC_ERR_UNEXPECTED_AUDIO_OUTPUT_FAILURE + +/// +/// Gstreamer internal error. +/// Added in version 1.4.0. +/// +#define SPXERR_GSTREAMER_INTERNAL_ERROR \ + AZAC_ERR_GSTREAMER_INTERNAL_ERROR + +/// +/// Compressed container format not supported. +/// Added in version 1.4.0. +/// +#define SPXERR_CONTAINER_FORMAT_NOT_SUPPORTED_ERROR \ + AZAC_ERR_CONTAINER_FORMAT_NOT_SUPPORTED_ERROR + +/// +/// Codec extension or gstreamer not found. +/// Added in version 1.4.0. +/// +#define SPXERR_GSTREAMER_NOT_FOUND_ERROR \ + AZAC_ERR_GSTREAMER_NOT_FOUND_ERROR + +/// +/// The language specified is missing. +/// Added in version 1.5.0. +/// +#define SPXERR_INVALID_LANGUAGE \ + AZAC_ERR_INVALID_LANGUAGE + +/// +/// The API is not applicable. +/// Added in version 1.5.0. +/// +#define SPXERR_UNSUPPORTED_API_ERROR \ + AZAC_ERR_UNSUPPORTED_API_ERROR + +/// +/// The ring buffer is unavailable. +/// Added in version 1.8.0. +/// +#define SPXERR_RINGBUFFER_DATA_UNAVAILABLE \ + AZAC_ERR_RINGBUFFER_DATA_UNAVAILABLE + +/// +/// An unexpected error was encountered when trying to access the Conversation site. +/// Added in version 1.5.0. +/// +#define SPXERR_UNEXPECTED_CONVERSATION_SITE_FAILURE \ + AZAC_ERR_UNEXPECTED_CONVERSATION_SITE_FAILURE + +/// +/// An unexpected error was encountered when trying to access the Conversation site. +/// Added in version 1.8.0. +/// +#define SPXERR_UNEXPECTED_CONVERSATION_TRANSLATOR_SITE_FAILURE \ + AZAC_ERR_UNEXPECTED_CONVERSATION_TRANSLATOR_SITE_FAILURE + +/// +/// An asynchronous operation was canceled before it was executed. +/// Added in version 1.8.0. +/// +#define SPXERR_CANCELED \ + AZAC_ERR_CANCELED + +/// +/// Codec for compression could not be initialized. +/// Added in version 1.10.0. +/// +#define SPXERR_COMPRESS_AUDIO_CODEC_INITIFAILED \ + AZAC_ERR_COMPRESS_AUDIO_CODEC_INITIFAILED + +/// +/// Data not available. +/// Added in version 1.10.0. +/// +#define SPXERR_DATA_NOT_AVAILABLE \ + AZAC_ERR_DATA_NOT_AVAILABLE + +/// +/// Invalid result reason. +/// Added in version 1.12.0 +/// +#define SPXERR_INVALID_RESULT_REASON \ + AZAC_ERR_INVALID_RESULT_REASON + +/// +/// An unexpected error was encountered when trying to access the RNN-T site. +/// +#define SPXERR_UNEXPECTED_RNNT_SITE_FAILURE \ + AZAC_ERR_UNEXPECTED_RNNT_SITE_FAILURE + +/// +/// Sending of a network message failed. +/// +#define SPXERR_NETWORK_SEND_FAILED \ + AZAC_ERR_NETWORK_SEND_FAILED + +/// +/// Audio extension library not found. +/// Added in version 1.16.0. +/// +#define SPXERR_AUDIO_SYS_LIBRARY_NOT_FOUND \ + AZAC_ERR_AUDIO_SYS_LIBRARY_NOT_FOUND + +/// +/// An error in the audio-rendering system. +/// Added in version 1.20.0 +/// +#define SPXERR_LOUDSPEAKER_ERROR \ + AZAC_ERR_LOUDSPEAKER_ERROR + +/// +/// An unexpected error was encountered when trying to access the Vision site. +/// Added in version 1.15.0. +/// +#define SPXERR_VISION_SITE_FAILURE \ + AZAC_ERR_VISION_SITE_FAILURE + +/// +/// Stream number provided was invalid in the current context. +/// Added in version 1.15.0. +/// +#define SPXERR_MEDIA_INVALID_STREAM \ + AZAC_ERR_MEDIA_INVALID_STREAM + +/// +/// Offset required is invalid in the current context. +/// Added in version 1.15.0. +/// +#define SPXERR_MEDIA_INVALID_OFFSET \ + AZAC_ERR_MEDIA_INVALID_OFFSET + +/// +/// No more data is available in source. +/// Added in version 1.15.0. +/// +#define SPXERR_MEDIA_NO_MORE_DATA \ + AZAC_ERR_MEDIA_NO_MORE_DATA + +/// +/// Source has not been started. +/// Added in version 1.15.0. +/// +#define SPXERR_MEDIA_NOT_STARTED \ + AZAC_ERR_MEDIA_NOT_STARTED + +/// +/// Source has already been started. +/// Added in version 1.15.0. +/// +#define SPXERR_MEDIA_ALREADY_STARTED \ + AZAC_ERR_MEDIA_ALREADY_STARTED + +/// +/// Media device creation failed. +/// Added in version 1.18.0. +/// +#define SPXERR_MEDIA_DEVICE_CREATION_FAILED \ + AZAC_ERR_MEDIA_DEVICE_CREATION_FAILED + +/// +/// No devices of the selected category are available. +/// Added in version 1.18.0. +/// +#define SPXERR_MEDIA_NO_DEVICE_AVAILABLE \ + AZAC_ERR_MEDIA_NO_DEVICE_AVAILABLE + +/// +/// Enabled Voice Activity Detection while using keyword recognition is not allowed. +/// +#define SPXERR_VAD_COULD_NOT_USE_WITH_KEYWORD_RECOGNIZER \ + AZAC_ERR_VAD_COULD_NOT_USE_WITH_KEYWORD_RECOGNIZER + +/// +/// The specified RecoEngineAdapter could not be created. +/// +#define SPXERR_COULD_NOT_CREATE_ENGINE_ADAPTER \ + AZAC_ERR_COULD_NOT_CREATE_ENGINE_ADAPTER diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/CMakeLists.txt b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/CMakeLists.txt new file mode 100644 index 0000000..c5ee7ae --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/CMakeLists.txt @@ -0,0 +1,9 @@ +cmake_minimum_required(VERSION 3.19) + +project(cxx_headers) + +set(SRC_DIR "${PROJECT_SOURCE_DIR}") +add_library(${PROJECT_NAME} INTERFACE ${SPEECH_CXX_API_HEADERS}) +target_include_directories(${PROJECT_NAME} INTERFACE ${PROJECT_SOURCE_DIR}) +target_link_libraries(${PROJECT_NAME} INTERFACE c_headers) +set_target_properties(${PROJECT_NAME} PROPERTIES FOLDER api) diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/azac_api_cxx_common.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/azac_api_cxx_common.h new file mode 100644 index 0000000..f097d2e --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/azac_api_cxx_common.h @@ -0,0 +1,82 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/azai/license202106 for the full license information. +// + +#pragma once + +// TODO: TFS#3671215 - Vision: C/C++ azac_api* files are in shared include directory, speech and vision share + +#include +#include +#include +#include +#include +#include +#include + +#define AZAC_DISABLE_COPY_AND_MOVE(T) \ + /** \brief Disable copy constructor */ \ + T(const T&) = delete; \ + /** \brief Disable copy assignment */ \ + T& operator=(const T&) = delete; \ + /** \brief Disable move constructor */ \ + T(T&&) = delete; \ + /** \brief Disable move assignment */ \ + T& operator=(T&&) = delete + +#define AZAC_DISABLE_DEFAULT_CTORS(T) \ + /** \brief Disable default constructor */ \ + T() = delete; \ + AZAC_DISABLE_COPY_AND_MOVE(T) + +#if defined(__GNUG__) && defined(__linux__) && !defined(ANDROID) && !defined(__ANDROID__) +#include +#define SHOULD_HANDLE_FORCED_UNWIND 1 +#endif + +/*! \cond INTERNAL */ + +namespace Azure { +namespace AI { +namespace Core { +namespace _detail { + +template +class ProtectedAccess : public T +{ +public: + + static AZAC_HANDLE HandleFromPtr(T* ptr) { + if (ptr == nullptr) + { + return nullptr; + } + auto access = static_cast(ptr); + return (AZAC_HANDLE)(*access); + } + + static AZAC_HANDLE HandleFromConstPtr(const T* ptr) { + if (ptr == nullptr) + { + return nullptr; + } + auto access = static_cast(ptr); + return (AZAC_HANDLE)(*access); + } + + template + static std::shared_ptr FromHandle(AZAC_HANDLE handle, Args... extras) { + return T::FromHandle(handle, extras...); + } + + template + static std::shared_ptr Create(Args&&... args) { + return T::Create(std::forward(args)...); + } + +}; + +} } } } // Azure::AI::Core::Details + +/*! \endcond */ diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx.h new file mode 100644 index 0000000..44fe059 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx.h @@ -0,0 +1,117 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx.h: Master include header for public C++ API declarations +// + +#pragma once + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include + +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_audio_config.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_audio_config.h new file mode 100644 index 0000000..abfd623 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_audio_config.h @@ -0,0 +1,338 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_audio_config.h: Public API declarations for AudioConfig C++ class +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Audio { + + +/// +/// Represents audio input or output configuration. Audio input can be from a microphone, file, +/// or input stream. Audio output can be to a speaker, audio file output in WAV format, or output +/// stream. +/// +class AudioConfig +{ +public: + + /// + /// Internal operator used to get underlying handle value. + /// + /// A handle. + explicit operator SPXAUDIOCONFIGHANDLE() const { return m_haudioConfig.get(); } + + /// + /// Creates an AudioConfig object representing the default microphone on the system. + /// + /// A shared pointer to the AudioConfig object + static std::shared_ptr FromDefaultMicrophoneInput() + { + SPXAUDIOCONFIGHANDLE haudioConfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_config_create_audio_input_from_default_microphone(&haudioConfig)); + + auto config = new AudioConfig(haudioConfig); + return std::shared_ptr(config); + } + + /// + /// Creates an AudioConfig object representing the default microphone on the system. + /// + /// Audio processing options. + /// A shared pointer to the AudioConfig object + static std::shared_ptr FromDefaultMicrophoneInput(std::shared_ptr audioProcessingOptions) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, audioProcessingOptions == nullptr); + + SPXAUDIOCONFIGHANDLE haudioConfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_config_create_audio_input_from_default_microphone(&haudioConfig)); + SPX_THROW_ON_FAIL(audio_config_set_audio_processing_options(haudioConfig, static_cast(*audioProcessingOptions.get()))); + + auto config = new AudioConfig(haudioConfig); + return std::shared_ptr(config); + } + + /// + /// Creates an AudioConfig object representing a specific microphone on the system. + /// Added in version 1.3.0. + /// + /// Specifies the device name. Please refer to this page on how to retrieve platform-specific microphone names. + /// A shared pointer to the AudioConfig object + static std::shared_ptr FromMicrophoneInput(const SPXSTRING& deviceName) + { + SPXAUDIOCONFIGHANDLE haudioConfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_config_create_audio_input_from_a_microphone(&haudioConfig, Utils::ToUTF8(deviceName).c_str())); + + auto config = new AudioConfig(haudioConfig); + return std::shared_ptr(config); + } + + /// + /// Creates an AudioConfig object representing a specific microphone on the system. + /// + /// Specifies the device name. Please refer to this page on how to retrieve platform-specific microphone names. + /// Audio processing options. + /// A shared pointer to the AudioConfig object + static std::shared_ptr FromMicrophoneInput(const SPXSTRING& deviceName, std::shared_ptr audioProcessingOptions) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, audioProcessingOptions == nullptr); + + SPXAUDIOCONFIGHANDLE haudioConfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_config_create_audio_input_from_a_microphone(&haudioConfig, Utils::ToUTF8(deviceName).c_str())); + SPX_THROW_ON_FAIL(audio_config_set_audio_processing_options(haudioConfig, static_cast(*audioProcessingOptions.get()))); + + auto config = new AudioConfig(haudioConfig); + return std::shared_ptr(config); + } + + /// + /// Creates an AudioConfig object representing the specified file. + /// + /// Specifies the audio input file. + /// A shared pointer to the AudioConfig object + static std::shared_ptr FromWavFileInput(const SPXSTRING& fileName) + { + SPXAUDIOCONFIGHANDLE haudioConfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_config_create_audio_input_from_wav_file_name(&haudioConfig, Utils::ToUTF8(fileName).c_str())); + + auto config = new AudioConfig(haudioConfig); + return std::shared_ptr(config); + } + + /// + /// Creates an AudioConfig object representing the specified file. + /// + /// Specifies the audio input file. + /// Audio processing options. + /// A shared pointer to the AudioConfig object + static std::shared_ptr FromWavFileInput(const SPXSTRING& fileName, std::shared_ptr audioProcessingOptions) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, audioProcessingOptions == nullptr); + + SPXAUDIOCONFIGHANDLE haudioConfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_config_create_audio_input_from_wav_file_name(&haudioConfig, Utils::ToUTF8(fileName).c_str())); + SPX_THROW_ON_FAIL(audio_config_set_audio_processing_options(haudioConfig, static_cast(*audioProcessingOptions.get()))); + + auto config = new AudioConfig(haudioConfig); + return std::shared_ptr(config); + } + + /// + /// Creates an AudioConfig object representing the specified stream. + /// + /// Specifies the custom audio input stream. + /// A shared pointer to the AudioConfig object + static std::shared_ptr FromStreamInput(std::shared_ptr stream) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, stream == nullptr); + + SPXAUDIOCONFIGHANDLE haudioConfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_config_create_audio_input_from_stream(&haudioConfig, GetStreamHandle(stream))); + + auto config = new AudioConfig(haudioConfig); + return std::shared_ptr(config); + } + + /// + /// Creates an AudioConfig object representing the specified stream. + /// + /// Specifies the custom audio input stream. + /// Audio processing options. + /// A shared pointer to the AudioConfig object + static std::shared_ptr FromStreamInput(std::shared_ptr stream, std::shared_ptr audioProcessingOptions) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, stream == nullptr); + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, audioProcessingOptions == nullptr); + + SPXAUDIOCONFIGHANDLE haudioConfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_config_create_audio_input_from_stream(&haudioConfig, GetStreamHandle(stream))); + SPX_THROW_ON_FAIL(audio_config_set_audio_processing_options(haudioConfig, static_cast(*audioProcessingOptions.get()))); + + auto config = new AudioConfig(haudioConfig); + return std::shared_ptr(config); + } + + /// + /// Creates an AudioConfig object representing the default audio output device (speaker) on the system. + /// Added in version 1.4.0 + /// + /// A shared pointer to the AudioConfig object + static std::shared_ptr FromDefaultSpeakerOutput() + { + SPXAUDIOCONFIGHANDLE haudioConfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_config_create_audio_output_from_default_speaker(&haudioConfig)); + + auto config = new AudioConfig(haudioConfig); + return std::shared_ptr(config); + } + + /// + /// Creates an AudioConfig object representing a specific speaker on the system. + /// Added in version 1.14.0. + /// + /// Specifies the device name. Please refer to this page on how to retrieve platform-specific audio device names. + /// A shared pointer to the AudioConfig object + static std::shared_ptr FromSpeakerOutput(const SPXSTRING& deviceName) + { + SPXAUDIOCONFIGHANDLE haudioConfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_config_create_audio_output_from_a_speaker(&haudioConfig, Utils::ToUTF8(deviceName).c_str())); + + auto config = new AudioConfig(haudioConfig); + return std::shared_ptr(config); + } + + /// + /// Creates an AudioConfig object representing the specified file for audio output. + /// Added in version 1.4.0 + /// + /// Specifies the audio output file. The parent directory must already exist. + /// A shared pointer to the AudioConfig object + static std::shared_ptr FromWavFileOutput(const SPXSTRING& fileName) + { + SPXAUDIOCONFIGHANDLE haudioConfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_config_create_audio_output_from_wav_file_name(&haudioConfig, Utils::ToUTF8(fileName).c_str())); + + auto config = new AudioConfig(haudioConfig); + return std::shared_ptr(config); + } + + /// + /// Creates an AudioConfig object representing the specified output stream. + /// Added in version 1.4.0 + /// + /// Specifies the custom audio output stream. + /// A shared pointer to the AudioConfig object + static std::shared_ptr FromStreamOutput(std::shared_ptr stream) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, stream == nullptr); + + SPXAUDIOCONFIGHANDLE haudioConfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_config_create_audio_output_from_stream(&haudioConfig, GetOutputStreamHandle(stream))); + + auto config = new AudioConfig(haudioConfig); + config->m_outputStream = stream; + return std::shared_ptr(config); + } + + /// + /// Sets a property value by name. + /// + /// The property name. + /// The property value. + void SetProperty(const SPXSTRING& name, const SPXSTRING& value) + { + property_bag_set_string(m_propertybag, -1, Utils::ToUTF8(name).c_str(), Utils::ToUTF8(value).c_str()); + } + + /// + /// Sets a property value by ID. + /// + /// The property id. + /// The property value. + void SetProperty(PropertyId id, const SPXSTRING& value) + { + property_bag_set_string(m_propertybag, static_cast(id), nullptr, Utils::ToUTF8(value).c_str()); + } + + /// + /// Gets a property value by name. + /// + /// The parameter name. + /// The property value. + SPXSTRING GetProperty(const SPXSTRING& name) const + { + const char* value = property_bag_get_string(m_propertybag, -1, Utils::ToUTF8(name).c_str(), ""); + return Utils::ToSPXString(Utils::CopyAndFreePropertyString(value)); + } + + /// + /// Gets a property value by ID. + /// + /// The parameter id. + /// The property value. + SPXSTRING GetProperty(PropertyId id) const + { + const char* value = property_bag_get_string(m_propertybag, static_cast(id), nullptr, ""); + return Utils::ToSPXString(Utils::CopyAndFreePropertyString(value)); + } + + /// + /// Gets an instance of AudioProcessingOptions class which contains the parameters for audio processing used by Speech SDK. + /// + /// A shared pointer to the AudioProcessingOptions object. + std::shared_ptr GetAudioProcessingOptions() const + { + SPXAUDIOPROCESSINGOPTIONSHANDLE hoptions = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_config_get_audio_processing_options(m_haudioConfig, &hoptions)); + + return std::make_shared(hoptions); + } + + /// + /// Destructs the object. + /// + virtual ~AudioConfig() + { + property_bag_release(m_propertybag); + } + +protected: + + /*! \cond PROTECTED */ + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + explicit AudioConfig(SPXAUDIOCONFIGHANDLE haudioConfig) + : m_haudioConfig(haudioConfig) + { + SPX_THROW_ON_FAIL(audio_config_get_property_bag(m_haudioConfig, &m_propertybag)); + } + + /// + /// Internal helper method to get the audio stream format handle. + /// + static SPXAUDIOSTREAMHANDLE GetStreamHandle(std::shared_ptr stream) { return (SPXAUDIOSTREAMHANDLE)(*stream.get()); } + + /// + /// Internal helper method to get the audio output stream format handle. + /// + static SPXAUDIOSTREAMHANDLE GetOutputStreamHandle(std::shared_ptr stream) { return (SPXAUDIOSTREAMHANDLE)(*stream.get()); } + + /*! \endcond */ + +private: + + DISABLE_COPY_AND_MOVE(AudioConfig); + + /// + /// Internal member variable that holds the smart handle. + /// + SmartHandle m_haudioConfig; + + /// + /// Internal member variable that holds the properties of the audio config + /// + SPXPROPERTYBAGHANDLE m_propertybag; + + std::shared_ptr m_stream; + std::shared_ptr m_outputStream; +}; + + +} } } } // Microsoft::CognitiveServices::Speech::Audio diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_audio_data_stream.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_audio_data_stream.h new file mode 100644 index 0000000..de3d3b7 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_audio_data_stream.h @@ -0,0 +1,239 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_audio_data_stream.h: Public API declarations for AudioDataStream C++ class +// + +#pragma once + +#include + +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +class SpeechSynthesisResult; +class KeywordRecognitionResult; + +/// +/// Represents audio data stream used for operating audio data as a stream. +/// Added in version 1.4.0 +/// +class AudioDataStream : public std::enable_shared_from_this +{ +private: + + /// + /// Internal member variable that holds the smart handle. + /// + SmartHandle m_haudioStream; + + /*! \cond PRIVATE */ + + class PrivatePropertyCollection : public PropertyCollection + { + public: + PrivatePropertyCollection(SPXAUDIOSTREAMHANDLE hstream) : + PropertyCollection( + [=]() { + SPXPROPERTYBAGHANDLE hpropbag = SPXHANDLE_INVALID; + audio_data_stream_get_property_bag(hstream, &hpropbag); + return hpropbag; + }()) + { + } + }; + + /// + /// Internal member variable that holds the properties associating to the audio data stream. + /// + PrivatePropertyCollection m_properties; + + /*! \endcond */ + +public: + + /// + /// Destroy the instance. + /// + ~AudioDataStream() + { + DetachInput(); + } + + /// + /// Creates a memory backed AudioDataStream for the specified audio input file. + /// Added in version 1.14.0 + /// + /// Specifies the audio input file. + /// A shared pointer to AudioDataStream + static std::shared_ptr FromWavFileInput(const SPXSTRING& fileName) + { + SPXAUDIOSTREAMHANDLE hstream = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_data_stream_create_from_file(&hstream, Utils::ToUTF8(fileName).c_str())); + + auto stream = new AudioDataStream(hstream); + return std::shared_ptr(stream); + } + + /// + /// Creates a memory backed AudioDataStream from given speech synthesis result. + /// + /// The speech synthesis result. + /// A shared pointer to AudioDataStream + static std::shared_ptr FromResult(std::shared_ptr result); + + /// + /// Obtains the memory backed AudioDataStream associated with a given KeywordRecognition result. + /// + /// The keyword recognition result. + /// An audio stream with the input to the KeywordRecognizer starting from right before the Keyword. + static std::shared_ptr FromResult(std::shared_ptr result); + + /// + /// Get current status of the audio data stream. + /// + /// Current status + StreamStatus GetStatus() + { + Stream_Status status = StreamStatus_Unknown; + SPX_THROW_ON_FAIL(audio_data_stream_get_status(m_haudioStream, &status)); + return (StreamStatus)status; + } + + /// + /// Check whether the stream has enough data to be read. + /// + /// The requested data size in bytes. + /// A bool indicating whether the stream has enough data to be read. + bool CanReadData(uint32_t bytesRequested) + { + return audio_data_stream_can_read_data(m_haudioStream, bytesRequested); + } + + /// + /// Check whether the stream has enough data to be read, starting from the specified position. + /// + /// The position counting from start of the stream. + /// The requested data size in bytes. + /// A bool indicating whether the stream has enough data to be read. + bool CanReadData(uint32_t pos, uint32_t bytesRequested) + { + return audio_data_stream_can_read_data_from_position(m_haudioStream, bytesRequested, pos); + } + + /// + /// Reads a chunk of the audio data and fill it to given buffer + /// + /// A buffer to receive read data. + /// Size of the buffer. + /// Size of data filled to the buffer, 0 means end of stream + uint32_t ReadData(uint8_t* buffer, uint32_t bufferSize) + { + uint32_t filledSize = 0; + SPX_THROW_ON_FAIL(audio_data_stream_read(m_haudioStream, buffer, bufferSize, &filledSize)); + + return filledSize; + } + + /// + /// Reads a chunk of the audio data and fill it to given buffer, starting from the specified position. + /// + /// The position counting from start of the stream. + /// A buffer to receive read data. + /// Size of the buffer. + /// Size of data filled to the buffer, 0 means end of stream + uint32_t ReadData(uint32_t pos, uint8_t* buffer, uint32_t bufferSize) + { + uint32_t filledSize = 0; + SPX_THROW_ON_FAIL(audio_data_stream_read_from_position(m_haudioStream, buffer, bufferSize, pos, &filledSize)); + + return filledSize; + } + + /// + /// Save the audio data to a file, synchronously. + /// + /// The file name with full path. + void SaveToWavFile(const SPXSTRING& fileName) + { + SPX_THROW_ON_FAIL(audio_data_stream_save_to_wave_file(m_haudioStream, Utils::ToUTF8(fileName).c_str())); + } + + /// + /// Save the audio data to a file, asynchronously. + /// + /// The file name with full path. + /// An asynchronous operation representing the saving. + std::future SaveToWavFileAsync(const SPXSTRING& fileName) + { + auto keepAlive = this->shared_from_this(); + + auto future = std::async(std::launch::async, [keepAlive, this, fileName]() -> void { + SPX_THROW_ON_FAIL(audio_data_stream_save_to_wave_file(m_haudioStream, Utils::ToUTF8(fileName).c_str())); + }); + + return future; + } + + /// + /// Get current position of the audio data stream. + /// + /// Current position + uint32_t GetPosition() + { + uint32_t position = 0; + SPX_THROW_ON_FAIL(audio_data_stream_get_position(m_haudioStream, &position)); + return position; + } + + /// + /// Set current position of the audio data stream. + /// + /// Position to be set. + void SetPosition(uint32_t pos) + { + SPX_THROW_ON_FAIL(audio_data_stream_set_position(m_haudioStream, pos)); + } + + /// + /// Stops any more data from getting to the stream. + /// + void DetachInput() + { + SPX_THROW_ON_FAIL(audio_data_stream_detach_input(m_haudioStream)); + } + + /// + /// Explicit conversion operator. + /// + /// A handle. + explicit operator SPXAUDIOSTREAMHANDLE() { return m_haudioStream; } + + /// + /// Collection of additional SpeechSynthesisResult properties. + /// + const PropertyCollection& Properties; + +private: + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + explicit AudioDataStream(SPXAUDIOSTREAMHANDLE haudioStream) : + m_haudioStream(haudioStream), + m_properties(haudioStream), + Properties(m_properties) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + } +}; + +} } } // Microsoft::CognitiveServices::Speech diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_audio_processing_options.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_audio_processing_options.h new file mode 100644 index 0000000..de3354b --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_audio_processing_options.h @@ -0,0 +1,358 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_audio_processing_options.h: Public API declarations for AudioProcessingOptions and related C++ classes +// + +#pragma once +#include +#include +#include +#include +#include +#include + + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Audio { + +/// +/// Types of preset microphone array geometries. +/// See [Microphone Array Recommendations](/azure/cognitive-services/speech-service/speech-devices-sdk-microphone) for more details. +/// +enum class PresetMicrophoneArrayGeometry +{ + /// + /// Indicates that no geometry specified. Speech SDK will determine the microphone array geometry. + /// + Uninitialized, + /// + /// Indicates a microphone array with one microphone in the center and six microphones evenly spaced + /// in a circle with radius approximately equal to 42.5 mm. + /// + Circular7, + /// + /// Indicates a microphone array with one microphone in the center and three microphones evenly spaced + /// in a circle with radius approximately equal to 42.5 mm. + /// + Circular4, + /// + /// Indicates a microphone array with four linearly placed microphones with 40 mm spacing between them. + /// + Linear4, + /// + /// Indicates a microphone array with two linearly placed microphones with 40 mm spacing between them. + /// + Linear2, + /// + /// Indicates a microphone array with a single microphone. + /// + Mono, + /// + /// Indicates a microphone array with custom geometry. + /// + Custom +}; + +/// +/// Types of microphone arrays. +/// +enum class MicrophoneArrayType +{ + /// + /// Indicates that the microphone array has microphones in a straight line. + /// + Linear, + /// + /// Indicates that the microphone array has microphones in a plane. + /// + Planar +}; + +/// +/// Defines speaker reference channel position in input audio. +/// +enum class SpeakerReferenceChannel +{ + /// + /// Indicates that the input audio does not have a speaker reference channel. + /// + None, + /// + /// Indicates that the last channel in the input audio corresponds to the speaker + /// reference for echo cancellation. + /// + LastChannel +}; + +typedef AudioProcessingOptions_MicrophoneCoordinates MicrophoneCoordinates; + +/// +/// Represents the geometry of a microphone array. +/// +struct MicrophoneArrayGeometry +{ + /// + /// Type of microphone array. + /// + MicrophoneArrayType microphoneArrayType; + /// + /// Start angle for beamforming in degrees. + /// + uint16_t beamformingStartAngle; + /// + /// End angle for beamforming in degrees. + /// + uint16_t beamformingEndAngle; + /// + /// Coordinates of microphones in the microphone array. + /// + std::vector microphoneCoordinates; + + /// + /// Creates a new instance of MicrophoneArrayGeometry. + /// Beamforming start angle is set to zero. + /// Beamforming end angle is set to 180 degrees if microphoneArrayType is Linear, otherwise it is set to 360 degrees. + /// + /// Type of microphone array. + /// Coordinates of microphones in the microphone array. + MicrophoneArrayGeometry(MicrophoneArrayType microphoneArrayType, const std::vector& microphoneCoordinates) + { + this->microphoneArrayType = microphoneArrayType; + this->beamformingStartAngle = 0; + this->beamformingEndAngle = (microphoneArrayType == MicrophoneArrayType::Linear) ? 180 : 360; + this->microphoneCoordinates.resize(microphoneCoordinates.size()); + for (size_t i = 0; i < microphoneCoordinates.size(); i++) + { + this->microphoneCoordinates[i] = microphoneCoordinates[i]; + } + } + + /// + /// Creates a new instance of MicrophoneArrayGeometry. + /// + /// Type of microphone array. + /// Start angle for beamforming in degrees. + /// End angle for beamforming in degrees. + /// Coordinates of microphones in the microphone array. + MicrophoneArrayGeometry(MicrophoneArrayType microphoneArrayType, uint16_t beamformingStartAngle, uint16_t beamformingEndAngle, const std::vector& microphoneCoordinates) + { + this->microphoneArrayType = microphoneArrayType; + this->beamformingStartAngle = beamformingStartAngle; + this->beamformingEndAngle = beamformingEndAngle; + this->microphoneCoordinates.resize(microphoneCoordinates.size()); + for (size_t i = 0; i < microphoneCoordinates.size(); i++) + { + this->microphoneCoordinates[i] = microphoneCoordinates[i]; + } + } +}; + +/// +/// Represents audio processing options used with audio config class. +/// +class AudioProcessingOptions +{ +public: + + /// + /// Creates a new instance using the provided handle. + /// + /// A handle to audio processing options. + explicit AudioProcessingOptions(SPXAUDIOPROCESSINGOPTIONSHANDLE hoptions) + : m_hoptions(hoptions) + { + SPX_THROW_ON_FAIL(audio_processing_options_get_property_bag(m_hoptions, &m_propertybag)); + } + + /// + /// Destructs an instance of the AudioProcessingOptions class. + /// + ~AudioProcessingOptions() = default; + + /// + /// Internal operator used to get underlying handle value. + /// + /// A handle. + explicit operator SPXAUDIOPROCESSINGOPTIONSHANDLE() const { return m_hoptions.get(); } + + /// + /// Creates a new instance of the AudioProcessingOptions class. + /// + /// Specifies flags to control the audio processing performed by Speech SDK. It is bitwise OR of AUDIO_INPUT_PROCESSING_XXX constants. + /// The newly created AudioProcessingOptions wrapped inside a std::shared_ptr. + /// + /// This function should only be used when the audio input is from a microphone array. + /// On Windows, this function will try to query the microphone array geometry from the audio driver. Audio data is also read from speaker reference channel. + /// On Linux, it assumes that the microphone is a single channel microphone. + /// + static std::shared_ptr Create(int audioProcessingFlags) + { + SPXAUDIOPROCESSINGOPTIONSHANDLE hoptions = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_processing_options_create(&hoptions, audioProcessingFlags)); + + auto options = new AudioProcessingOptions(hoptions); + return std::shared_ptr(options); + } + + /// + /// Creates a new instance of the AudioProcessingOptions class with preset microphone array geometry. + /// + /// Specifies flags to control the audio processing performed by Speech SDK. It is bitwise OR of AUDIO_INPUT_PROCESSING_XXX constants. + /// Specifies the type of microphone array geometry. + /// Specifies the speaker reference channel position in the input audio. + /// The newly created AudioProcessingOptions wrapped inside a std::shared_ptr. + static std::shared_ptr Create(int audioProcessingFlags, PresetMicrophoneArrayGeometry microphoneArrayGeometry, SpeakerReferenceChannel speakerReferenceChannel = SpeakerReferenceChannel::None) + { + SPXAUDIOPROCESSINGOPTIONSHANDLE hoptions = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_processing_options_create_from_preset_microphone_array_geometry(&hoptions, audioProcessingFlags, (AudioProcessingOptions_PresetMicrophoneArrayGeometry)microphoneArrayGeometry, (AudioProcessingOptions_SpeakerReferenceChannel)speakerReferenceChannel)); + + auto options = new AudioProcessingOptions(hoptions); + return std::shared_ptr(options); + } + + /// + /// Creates a new instance of the AudioProcessingOptions class with microphone array geometry. + /// + /// Specifies flags to control the audio processing performed by Speech SDK. It is bitwise OR of AUDIO_INPUT_PROCESSING_XXX constants. + /// Specifies the microphone array geometry. + /// Specifies the speaker reference channel position in the input audio. + /// The newly created AudioProcessingOptions wrapped inside a std::shared_ptr. + static std::shared_ptr Create(int audioProcessingFlags, MicrophoneArrayGeometry microphoneArrayGeometry, SpeakerReferenceChannel speakerReferenceChannel = SpeakerReferenceChannel::None) + { + AudioProcessingOptions_MicrophoneArrayGeometry geometry + { + (AudioProcessingOptions_MicrophoneArrayType)microphoneArrayGeometry.microphoneArrayType, + microphoneArrayGeometry.beamformingStartAngle, + microphoneArrayGeometry.beamformingEndAngle, + (uint16_t)microphoneArrayGeometry.microphoneCoordinates.size(), + microphoneArrayGeometry.microphoneCoordinates.data() + }; + + SPXAUDIOPROCESSINGOPTIONSHANDLE hoptions = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_processing_options_create_from_microphone_array_geometry(&hoptions, audioProcessingFlags, &geometry, (AudioProcessingOptions_SpeakerReferenceChannel)speakerReferenceChannel)); + + auto options = new AudioProcessingOptions(hoptions); + return std::shared_ptr(options); + } + + /// + /// Returns the type of audio processing performed by Speech SDK. + /// + /// Bitwise OR of AUDIO_INPUT_PROCESSING_XXX constant flags indicating the input audio processing performed by Speech SDK. + int GetAudioProcessingFlags() const + { + int audioProcessingFlags; + SPX_THROW_ON_FAIL(audio_processing_options_get_audio_processing_flags(m_hoptions, &audioProcessingFlags)); + return audioProcessingFlags; + } + + /// + /// Returns the microphone array geometry of the microphone used for audio input. + /// + /// A value of type PresetMicrophoneArrayGeometry enum. + PresetMicrophoneArrayGeometry GetPresetMicrophoneArrayGeometry() const + { + PresetMicrophoneArrayGeometry microphoneArrayGeometry = PresetMicrophoneArrayGeometry::Uninitialized; + SPX_THROW_ON_FAIL(audio_processing_options_get_preset_microphone_array_geometry(m_hoptions, (AudioProcessingOptions_PresetMicrophoneArrayGeometry*)µphoneArrayGeometry)); + return microphoneArrayGeometry; + } + + /// + /// Returns the microphone array type of the microphone used for audio input. + /// + /// A value of type MicrophoneArrayType enum. + MicrophoneArrayType GetMicrophoneArrayType() const + { + MicrophoneArrayType microphoneArrayType = MicrophoneArrayType::Linear; + SPX_THROW_ON_FAIL(audio_processing_options_get_microphone_array_type(m_hoptions, (AudioProcessingOptions_MicrophoneArrayType*)µphoneArrayType)); + return microphoneArrayType; + } + + /// + /// Returns the start angle used for beamforming. + /// + /// Beamforming start angle. + uint16_t GetBeamformingStartAngle() const + { + uint16_t startAngle; + SPX_THROW_ON_FAIL(audio_processing_options_get_beamforming_start_angle(m_hoptions, &startAngle)); + return startAngle; + } + + /// + /// Returns the end angle used for beamforming. + /// + /// Beamforming end angle. + uint16_t GetBeamformingEndAngle() const + { + uint16_t endAngle; + SPX_THROW_ON_FAIL(audio_processing_options_get_beamforming_end_angle(m_hoptions, &endAngle)); + return endAngle; + } + + /// + /// Returns the coordinates of microphones in the microphone array used for audio input. + /// + /// A std::vector of MicrophoneCoordinates elements. + std::vector GetMicrophoneCoordinates() const + { + uint16_t microphoneCount; + SPX_THROW_ON_FAIL(audio_processing_options_get_microphone_count(m_hoptions, µphoneCount)); + + std::vector microphoneCoordinates(microphoneCount); + SPX_THROW_ON_FAIL(audio_processing_options_get_microphone_coordinates(m_hoptions, microphoneCoordinates.data(), microphoneCount)); + return microphoneCoordinates; + } + + /// + /// Returns the speaker reference channel position in the audio input. + /// + /// A value of type SpeakerReferenceChannel enum. + SpeakerReferenceChannel GetSpeakerReferenceChannel() const + { + SpeakerReferenceChannel speakerReferenceChannel = SpeakerReferenceChannel::None; + SPX_THROW_ON_FAIL(audio_processing_options_get_speaker_reference_channel(m_hoptions, (AudioProcessingOptions_SpeakerReferenceChannel*)&speakerReferenceChannel)); + return speakerReferenceChannel; + } + + /// + /// Sets a property value by name. + /// + /// The property name. + /// The property value. + void SetProperty(const SPXSTRING& name, const SPXSTRING& value) + { + property_bag_set_string(m_propertybag, -1, Utils::ToUTF8(name).c_str(), Utils::ToUTF8(value).c_str()); + } + + /// + /// Gets a property value by name. + /// + /// The parameter name. + /// The property value. + SPXSTRING GetProperty(const SPXSTRING& name) const + { + const char* value = property_bag_get_string(m_propertybag, -1, Utils::ToUTF8(name).c_str(), ""); + return Utils::ToSPXString(Utils::CopyAndFreePropertyString(value)); + } + +private: + + DISABLE_COPY_AND_MOVE(AudioProcessingOptions); + + /// + /// Internal member variable that holds the smart handle. + /// + SmartHandle m_hoptions; + + /// + /// Internal member variable that holds the properties of the audio processing options. + /// + SmartHandle m_propertybag; +}; + +} } } } // Microsoft::CognitiveServices::Speech::Audio diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_audio_stream.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_audio_stream.h new file mode 100644 index 0000000..6a3f7d2 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_audio_stream.h @@ -0,0 +1,995 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_audio_stream.h: Public API declarations for AudioInputStream / AudioOutputStream and related C++ classes +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +namespace Dialog { + class ActivityReceivedEventArgs; +} + +namespace Audio { + + + +class PullAudioInputStreamCallback; +class PushAudioInputStream; +class PullAudioInputStream; +class PushAudioOutputStreamCallback; +class PushAudioOutputStream; +class PullAudioOutputStream; + + +/// +/// Represents audio input stream used for custom audio input configurations. +/// +class AudioInputStream +{ +public: + + using ReadCallbackFunction_Type = ::std::function; + using CloseCallbackFunction_Type = ::std::function; + /// Added in version 1.5.0. + using GetPropertyCallbackFunction_Type = std::function; + + /// + /// Internal operator used to get underlying handle value. + /// + /// A handle. + explicit operator SPXAUDIOSTREAMHANDLE() const { return m_haudioStream.get(); } + + /// + /// Creates a memory backed PushAudioInputStream using the default format (16 kHz, 16 bit, mono PCM). + /// + /// A shared pointer to PushAudioInputStream + static std::shared_ptr CreatePushStream(); + + /// + /// Creates a memory backed PushAudioInputStream with the specified audio format. + /// + /// Audio stream format. + /// A shared pointer to PushAudioInputStream + static std::shared_ptr CreatePushStream(std::shared_ptr format); + + /// + /// Creates a PullAudioInputStream that delegates to the specified callback functions for Read() and Close() methods, using the default format (16 kHz, 16 bit, mono PCM). + /// + /// Context pointer to use when invoking the callbacks. + /// Read callback. + /// Close callback. + /// A shared pointer to PullAudioInputStream + static std::shared_ptr CreatePullStream(void* pvContext, CUSTOM_AUDIO_PULL_STREAM_READ_CALLBACK readCallback, CUSTOM_AUDIO_PULL_STREAM_CLOSE_CALLBACK closeCallback = nullptr); + + /// + /// Creates a PullAudioInputStream that delegates to the specified callback functions for Read(), Close() and GetProperty() methods + /// Added in version 1.5.0. + /// + /// Context pointer to use when invoking the callbacks. + /// Read callback. + /// Close callback. + /// GetProperty callback. + /// A shared pointer to PullAudioInputStream + static std::shared_ptr CreatePullStream(void* pvContext, CUSTOM_AUDIO_PULL_STREAM_READ_CALLBACK readCallback, CUSTOM_AUDIO_PULL_STREAM_CLOSE_CALLBACK closeCallback, CUSTOM_AUDIO_PULL_STREAM_GET_PROPERTY_CALLBACK getPropertyCallback); + + /// Creates a PullAudioInputStream that delegates to the specified callback functions for Read() and Close() methods, using the default format (16 kHz, 16 bit, mono PCM). + /// + /// Read callback. + /// Close callback. + /// A shared pointer to PullAudioInputStream + static std::shared_ptr CreatePullStream(ReadCallbackFunction_Type readCallback, CloseCallbackFunction_Type closeCallback = nullptr); + + /// + /// Creates a PullAudioInputStream that delegates to the specified callback functions for Read(), Close() and GetProperty() methods. + /// Added in version 1.5.0. + /// + /// Read callback. + /// Close callback. + /// Get property callback. + /// A shared pointer to PullAudioInputStream + static std::shared_ptr CreatePullStream(ReadCallbackFunction_Type readCallback, CloseCallbackFunction_Type closeCallback, GetPropertyCallbackFunction_Type getPropertyCallback); + + + /// + /// Creates a PullAudioInputStream that delegates to the specified callback interface for the Read() and Close() methods, using the default format (16 kHz, 16 bit, mono PCM). + /// + /// Shared pointer to PullAudioInputStreamCallback instance. + /// A shared pointer to PullAudioInputStream + static std::shared_ptr CreatePullStream(std::shared_ptr callback); + + /// + /// Creates a PullAudioInputStream that delegates to the specified callback functions for Read() and Close() methods. + /// + /// Audio stream format. + /// Context pointer to use when invoking the callbacks. + /// Read callback. + /// Close callback. + /// A shared pointer to PullAudioInputStream + static std::shared_ptr CreatePullStream(std::shared_ptr format, void* pvContext, CUSTOM_AUDIO_PULL_STREAM_READ_CALLBACK readCallback, CUSTOM_AUDIO_PULL_STREAM_CLOSE_CALLBACK closeCallback = nullptr); + + /// + /// Creates a PullAudioInputStream that delegates to the specified callback functions for Read(), Close() and GetProperty() methods. + /// Added in version 1.5.0. + /// + /// Audio stream format. + /// Context pointer to use when invoking the callbacks. + /// Read callback. + /// Close callback. + /// Get property callback. + /// A shared pointer to PullAudioInputStream + static std::shared_ptr CreatePullStream(std::shared_ptr format, void* pvContext, CUSTOM_AUDIO_PULL_STREAM_READ_CALLBACK readCallback, CUSTOM_AUDIO_PULL_STREAM_CLOSE_CALLBACK closeCallback, CUSTOM_AUDIO_PULL_STREAM_GET_PROPERTY_CALLBACK getPropertyCallback); + + /// Creates a PullAudioInputStream that delegates to the specified callback functions for Read() and Close() methods. + /// + /// Audio stream format. + /// Read callback. + /// Close callback. + /// A shared pointer to PullAudioInputStream + static std::shared_ptr CreatePullStream(std::shared_ptr format, ReadCallbackFunction_Type readCallback, CloseCallbackFunction_Type closeCallback = nullptr); + + /// + /// Creates a PullAudioInputStream that delegates to the specified callback functions for Read() and Close() methods. + /// Added in version 1.5.0. + /// + /// Audio stream format. + /// Read callback. + /// Close callback. + /// Get property callback. + /// A shared pointer to PullAudioInputStream + static std::shared_ptr CreatePullStream(std::shared_ptr format, ReadCallbackFunction_Type readCallback, CloseCallbackFunction_Type closeCallback, GetPropertyCallbackFunction_Type getPropertyCallback); + + + /// + /// Creates a PullAudioInputStream that delegates to the specified callback interface for the Read() and Close() methods, using the specified format. + /// + /// Audio stream format. + /// Shared pointer to PullAudioInputStreamCallback instance. + /// A shared pointer to PullAudioInputStream + static std::shared_ptr CreatePullStream(std::shared_ptr format, std::shared_ptr callback); + +protected: + + /*! \cond PROTECTED */ + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + explicit AudioInputStream(SPXAUDIOSTREAMHANDLE haudioStream) : m_haudioStream(haudioStream) { } + + /// + /// Destructor, does nothing. + /// + virtual ~AudioInputStream() {} + + /// + /// Internal helper method to get the default format if the specified format is nullptr. + /// + static std::shared_ptr UseDefaultFormatIfNull(std::shared_ptr format) { return format != nullptr ? format : AudioStreamFormat::GetDefaultInputFormat(); } + + /// + /// Internal helper method to get the audio stream format handle. + /// + static SPXAUDIOSTREAMFORMATHANDLE GetFormatHandle(std::shared_ptr format) { return (SPXAUDIOSTREAMFORMATHANDLE)(*format.get()); } + + /// + /// Internal member variable that holds the smart handle. + /// + SmartHandle m_haudioStream; + + protected: + static constexpr size_t m_maxPropertyLen = 1024; + + /*! \endcond */ + +private: + + DISABLE_COPY_AND_MOVE(AudioInputStream); +}; + + +/// +/// Represents memory backed push audio input stream used for custom audio input configurations. +/// +class PushAudioInputStream : public AudioInputStream +{ +public: + + /// + /// Destructor; closes the underlying stream if not already closed. + /// + virtual ~PushAudioInputStream() + { + if (audio_stream_is_handle_valid(m_haudioStream)) + { + CloseStream(); + } + } + + /// + /// Creates a memory backed PushAudioInputStream using the default format (16 kHz, 16 bit, mono PCM). + /// + /// A shared pointer to PushAudioInputStream + static std::shared_ptr Create() + { + return Create(nullptr); + } + + /// + /// Creates a memory backed PushAudioInputStream with the specified audio format. + /// + /// Audio stream format. + /// A shared pointer to PushAudioInputStream + static std::shared_ptr Create(std::shared_ptr format) + { + format = UseDefaultFormatIfNull(format); + + SPXAUDIOSTREAMHANDLE haudioStream = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_stream_create_push_audio_input_stream(&haudioStream, GetFormatHandle(format))); + + auto stream = new PushAudioInputStream(haudioStream); + return std::shared_ptr(stream); + } + + /// + /// Writes the audio data specified by making an internal copy of the data. + /// Note: The dataBuffer should not contain any audio header. + /// + /// The pointer to the audio buffer of which this function will make a copy. + /// The size of the buffer. + void Write(uint8_t* dataBuffer, uint32_t size) + { + SPX_THROW_ON_FAIL(push_audio_input_stream_write(m_haudioStream, dataBuffer, size)); + } + + /// + /// Set value of a property. The properties of the audio data should be set before writing the audio data. + /// Added in version 1.5.0. + /// + /// The id of property. See + /// value to set + void SetProperty(PropertyId id, const SPXSTRING& value) + { + SPX_THROW_ON_FAIL(push_audio_input_stream_set_property_by_id(m_haudioStream, static_cast(id), Utils::ToUTF8(value).c_str())); + } + + /// + /// Set value of a property. The properties of the audio data should be set before writing the audio data. + /// Added in version 1.5.0. + /// + /// The name of property. + /// value to set + void SetProperty(const SPXSTRING& propertyName, const SPXSTRING& value) + { + SPX_THROW_ON_FAIL(push_audio_input_stream_set_property_by_name(m_haudioStream, Utils::ToUTF8(propertyName.c_str()), Utils::ToUTF8(value.c_str()))); + } + + /// + /// Closes the stream. + /// + void Close() { SPX_THROW_ON_FAIL(CloseStream()); } + + +protected: + + /*! \cond PROTECTED */ + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + explicit PushAudioInputStream(SPXAUDIOSTREAMHANDLE haudioStream) : AudioInputStream(haudioStream) { } + + /*! \endcond */ + +private: + + DISABLE_COPY_AND_MOVE(PushAudioInputStream); + + SPXHR CloseStream() { return push_audio_input_stream_close(m_haudioStream); } +}; + + +/// +/// An interface that defines callback methods for an audio input stream. +/// +/// +/// Derive from this class and implement its function to provide your own +/// data as an audio input stream. +/// +class PullAudioInputStreamCallback +{ +public: + + /// + /// Destructor, does nothing. + /// + virtual ~PullAudioInputStreamCallback() {} + + /// + /// This function is called to synchronously get data from the audio stream. + /// Note: The dataBuffer returned by Read() should not contain any audio header. + /// + /// The pointer to the buffer to which to copy the audio data. + /// The size of the buffer. + /// The number of bytes copied into the buffer, or zero to indicate end of stream + virtual int Read(uint8_t* dataBuffer, uint32_t size) = 0; + + /// + /// This function is called to synchronously to get meta information associated to stream data, such as TimeStamp or UserId . + /// Added in version 1.5.0. + /// + /// The id of the property. + /// The value of the property. + virtual SPXSTRING GetProperty(PropertyId id) + { + if (PropertyId::DataBuffer_TimeStamp == id) + { + return ""; + } + else if (PropertyId::DataBuffer_UserId == id) + { + return ""; + } + else + { + return ""; + } + } + + /// + /// This function is called to close the audio stream. + /// + /// + virtual void Close() = 0; + +protected: + + /*! \cond PROTECTED */ + + /// + /// Constructor, does nothing. + /// + PullAudioInputStreamCallback() {}; + + /*! \endcond */ + +private: + + DISABLE_COPY_AND_MOVE(PullAudioInputStreamCallback); +}; + + +/// +/// Pull audio input stream class. +/// +class PullAudioInputStream : public AudioInputStream +{ +public: + + /// + /// Creates a PullAudioInputStream utilizing the specified Read() and Close() "C" callback functions pointers + /// Note: The dataBuffer returned by Read() should not contain any audio header. + /// + /// Context pointer to use when invoking the callbacks. + /// Read callback. + /// Close callback. + /// A shared pointer to PullAudioInputStream + static std::shared_ptr Create(void* pvContext, CUSTOM_AUDIO_PULL_STREAM_READ_CALLBACK readCallback, CUSTOM_AUDIO_PULL_STREAM_CLOSE_CALLBACK closeCallback = nullptr) + { + return Create(nullptr, pvContext, readCallback, closeCallback); + } + + /// + /// Creates a PullAudioInputStream utilizing the specified Read(), Close() and GetProperty() "C" callback functions pointers + /// Note: The dataBuffer returned by Read() should not contain any audio header. + /// Added in version 1.5.0. + /// + /// Context pointer to use when invoking the callbacks. + /// Read callback. + /// Close callback. + /// Get property callback. + /// A shared pointer to PullAudioInputStream + static std::shared_ptr Create(void* pvContext, CUSTOM_AUDIO_PULL_STREAM_READ_CALLBACK readCallback, CUSTOM_AUDIO_PULL_STREAM_CLOSE_CALLBACK closeCallback, CUSTOM_AUDIO_PULL_STREAM_GET_PROPERTY_CALLBACK getPropertyCallback) + { + return Create(nullptr, pvContext, readCallback, closeCallback, getPropertyCallback); + } + + /// Creates a PullAudioInputStream utilizing the specified Read() and Close() callback functions. + /// Note: The dataBuffer returned by Read() should not contain any audio header. + /// + /// Read callback. + /// Close callback. + /// A shared pointer to PullAudioInputStream + static std::shared_ptr Create(ReadCallbackFunction_Type readCallback, CloseCallbackFunction_Type closeCallback = nullptr) + { + return Create(nullptr, readCallback, closeCallback); + } + + /// + /// Creates a PullAudioInputStream utilizing the specified Read(), Close() and GetProperty() callback functions. + /// Note: The dataBuffer returned by Read() should not contain any audio header. + /// Added in version 1.5.0. + /// + /// Read callback. + /// Close callback. + /// Get property callback. + /// A shared pointer to PullAudioInputStream + static std::shared_ptr Create(ReadCallbackFunction_Type readCallback, CloseCallbackFunction_Type closeCallback, GetPropertyCallbackFunction_Type getPropertyCallback) + { + return Create(nullptr, readCallback, closeCallback, getPropertyCallback); + } + + /// + /// Creates a PullAudioInputStream utilizing the specified Close() callback function. + /// + /// Shared pointer to PullAudioInputStreamCallback instance. + /// A shared pointer to PullAudioInputStream + static std::shared_ptr Create(std::shared_ptr callback) + { + return Create(nullptr, callback); + } + + /// + /// Creates a PullAudioInputStream utilizing the specified Read() and Close() "C" callback functions pointers + /// Note: The dataBuffer returned by Read() should not contain any audio header. + /// + /// Audio stream format. + /// Context pointer to use when invoking the callbacks. + /// Read callback. + /// Close callback. + /// A shared pointer to PullAudioInputStream + static std::shared_ptr Create(std::shared_ptr format, void* pvContext, CUSTOM_AUDIO_PULL_STREAM_READ_CALLBACK readCallback, CUSTOM_AUDIO_PULL_STREAM_CLOSE_CALLBACK closeCallback = nullptr) + { + return Create(format, + [=](uint8_t* buffer, uint32_t size) -> int { return readCallback(pvContext, buffer, size); }, + [=]() { if (closeCallback != nullptr) { closeCallback(pvContext); } }); + } + + /// + /// Creates a PullAudioInputStream utilizing the specified Read(), Close() and GetProperty() "C" callback functions pointers + /// Note: The dataBuffer returned by Read() should not contain any audio header. + /// + /// Audio stream format. + /// Context pointer to use when invoking the callbacks. + /// Read callback. + /// Close callback. + /// Get property callback. + /// A shared pointer to PullAudioInputStream + static std::shared_ptr Create(std::shared_ptr format, void* pvContext, CUSTOM_AUDIO_PULL_STREAM_READ_CALLBACK readCallback, CUSTOM_AUDIO_PULL_STREAM_CLOSE_CALLBACK closeCallback, CUSTOM_AUDIO_PULL_STREAM_GET_PROPERTY_CALLBACK getPropertyCallback) + { + return Create(format, + [=](uint8_t* buffer, uint32_t size) -> int { return readCallback(pvContext, buffer, size); }, + [=]() { if (closeCallback != nullptr) { closeCallback(pvContext); } }, + [=](PropertyId id) -> SPXSTRING + { + uint8_t result[m_maxPropertyLen]; + getPropertyCallback(pvContext, static_cast(id), result, m_maxPropertyLen); + return reinterpret_cast(result); + }); + } + + /// + /// Creates a PullAudioInputStream utilizing the specified Read() and Close() callback functions. + /// Note: The dataBuffer returned by Read() should not contain any audio header. + /// + /// Audio stream format. + /// Read callback. + /// Close callback. + /// A shared pointer to PullAudioInputStream + static std::shared_ptr Create(std::shared_ptr format, ReadCallbackFunction_Type readCallback, CloseCallbackFunction_Type closeCallback = nullptr) + { + auto wrapper = std::make_shared(readCallback, closeCallback); + return Create(format, wrapper); + } + + /// + /// Creates a PullAudioInputStream utilizing the specified Read(), Close() and GetProperty() callback functions. + /// Note: The dataBuffer returned by Read() should not contain any audio header. + /// Added in version 1.5.0. + /// + /// Audio stream format. + /// Read callback. + /// Close callback. + /// Get property callback. + /// A shared pointer to PullAudioInputStream + static std::shared_ptr Create(std::shared_ptr format, ReadCallbackFunction_Type readCallback, CloseCallbackFunction_Type closeCallback, GetPropertyCallbackFunction_Type getPropertyCallback) + { + auto wrapper = std::make_shared(readCallback, closeCallback, getPropertyCallback); + return Create(format, wrapper); + } + + /// + /// Creates a PullAudioInputStream utilizing the specified Read() and Close() callback functions. + /// + /// Audio stream format. + /// Shared pointer to PullAudioInputStreamCallback instance. + /// A shared pointer to PullAudioInputStream + static std::shared_ptr Create(std::shared_ptr format, std::shared_ptr callback) + { + format = UseDefaultFormatIfNull(format); + + SPXAUDIOSTREAMHANDLE haudioStream = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_stream_create_pull_audio_input_stream(&haudioStream, GetFormatHandle(format))); + + auto stream = new PullAudioInputStream(haudioStream); + SPX_THROW_ON_FAIL(pull_audio_input_stream_set_callbacks(haudioStream, stream, ReadCallbackWrapper, CloseCallbackWrapper)); + SPX_THROW_ON_FAIL(pull_audio_input_stream_set_getproperty_callback(haudioStream, stream, GetPropertyCallbackWrapper)); + + stream->m_callback = callback; + + return std::shared_ptr(stream); + } + +protected: + + /*! \cond PROTECTED */ + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + explicit PullAudioInputStream(SPXAUDIOSTREAMHANDLE haudioStream) : AudioInputStream(haudioStream) { } + + class FunctionCallbackWrapper : public PullAudioInputStreamCallback + { + public: + + FunctionCallbackWrapper(ReadCallbackFunction_Type readCallback, CloseCallbackFunction_Type closeCallback, GetPropertyCallbackFunction_Type getPropertyCallback = nullptr) : + m_readCallback(readCallback), + m_closeCallback(closeCallback), + m_getPropertyCallback(getPropertyCallback) + { + }; + + /// Note: The dataBuffer returned by Read() should not contain any audio header. + int Read(uint8_t* dataBuffer, uint32_t size) override { return m_readCallback(dataBuffer, size); } + void Close() override { if (m_closeCallback != nullptr) m_closeCallback(); }; + SPXSTRING GetProperty(PropertyId id) override + { + if (m_getPropertyCallback != nullptr) + { + return m_getPropertyCallback(id); + } + else + { + return ""; + } + } + + private: + + DISABLE_COPY_AND_MOVE(FunctionCallbackWrapper); + + ReadCallbackFunction_Type m_readCallback; + CloseCallbackFunction_Type m_closeCallback; + GetPropertyCallbackFunction_Type m_getPropertyCallback; + + }; + + /*! \endcond */ + +private: + + DISABLE_COPY_AND_MOVE(PullAudioInputStream); + + static int ReadCallbackWrapper(void* pvContext, uint8_t* dataBuffer, uint32_t size) + { + PullAudioInputStream* ptr = (PullAudioInputStream*)pvContext; + return ptr->m_callback->Read(dataBuffer, size); + } + + static void CloseCallbackWrapper(void* pvContext) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + PullAudioInputStream* ptr = (PullAudioInputStream*)pvContext; + ptr->m_callback->Close(); + } + + static void GetPropertyCallbackWrapper(void *pvContext, int id, uint8_t* result, uint32_t size) + { + PullAudioInputStream* ptr = (PullAudioInputStream*)pvContext; + auto value = ptr->m_callback->GetProperty(static_cast(id)); + auto valueSize = value.size() + 1; + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, valueSize > size); + std::memcpy(result, value.c_str(), valueSize); + } + + std::shared_ptr m_callback; +}; + + +inline std::shared_ptr AudioInputStream::CreatePushStream() +{ + return PushAudioInputStream::Create(); +} + +inline std::shared_ptr AudioInputStream::CreatePushStream(std::shared_ptr format) +{ + return PushAudioInputStream::Create(format); +} + +inline std::shared_ptr AudioInputStream::CreatePullStream(void* pvContext, CUSTOM_AUDIO_PULL_STREAM_READ_CALLBACK readCallback, CUSTOM_AUDIO_PULL_STREAM_CLOSE_CALLBACK closeCallback) +{ + return PullAudioInputStream::Create(pvContext, readCallback, closeCallback); +} + +inline std::shared_ptr AudioInputStream::CreatePullStream(void* pvContext, CUSTOM_AUDIO_PULL_STREAM_READ_CALLBACK readCallback, CUSTOM_AUDIO_PULL_STREAM_CLOSE_CALLBACK closeCallback, CUSTOM_AUDIO_PULL_STREAM_GET_PROPERTY_CALLBACK getPropertyCallback) +{ + return PullAudioInputStream::Create(pvContext, readCallback, closeCallback, getPropertyCallback); +} + +inline std::shared_ptr AudioInputStream::CreatePullStream(ReadCallbackFunction_Type readCallback, CloseCallbackFunction_Type closeCallback) +{ + return PullAudioInputStream::Create(readCallback, closeCallback); +} + +inline std::shared_ptr AudioInputStream::CreatePullStream(ReadCallbackFunction_Type readCallback, CloseCallbackFunction_Type closeCallback, GetPropertyCallbackFunction_Type getPropertyCallback) +{ + return PullAudioInputStream::Create(readCallback, closeCallback, getPropertyCallback); +} + +inline std::shared_ptr AudioInputStream::CreatePullStream(std::shared_ptr callback) +{ + return PullAudioInputStream::Create(callback); +} + +inline std::shared_ptr AudioInputStream::CreatePullStream(std::shared_ptr format, void* pvContext, CUSTOM_AUDIO_PULL_STREAM_READ_CALLBACK readCallback, CUSTOM_AUDIO_PULL_STREAM_CLOSE_CALLBACK closeCallback) +{ + return PullAudioInputStream::Create(format, pvContext, readCallback, closeCallback); +} + +inline std::shared_ptr AudioInputStream::CreatePullStream(std::shared_ptr format, void* pvContext, CUSTOM_AUDIO_PULL_STREAM_READ_CALLBACK readCallback, CUSTOM_AUDIO_PULL_STREAM_CLOSE_CALLBACK closeCallback, CUSTOM_AUDIO_PULL_STREAM_GET_PROPERTY_CALLBACK getPropertyCallback) +{ + return PullAudioInputStream::Create(format, pvContext, readCallback, closeCallback, getPropertyCallback); +} + +inline std::shared_ptr AudioInputStream::CreatePullStream(std::shared_ptr format, ReadCallbackFunction_Type readCallback, CloseCallbackFunction_Type closeCallback) +{ + return PullAudioInputStream::Create(format, readCallback, closeCallback); +} + +inline std::shared_ptr AudioInputStream::CreatePullStream(std::shared_ptr format, ReadCallbackFunction_Type readCallback, CloseCallbackFunction_Type closeCallback, GetPropertyCallbackFunction_Type getPropertyCallback) +{ + return PullAudioInputStream::Create(format, readCallback, closeCallback, getPropertyCallback); +} + +inline std::shared_ptr AudioInputStream::CreatePullStream(std::shared_ptr format, std::shared_ptr callback) +{ + return PullAudioInputStream::Create(format, callback); +} + + +/// +/// Represents audio output stream used for custom audio output configurations. +/// Updated in version 1.7.0 +/// +class AudioOutputStream +{ +public: + + using WriteCallbackFunction_Type = ::std::function; + using CloseCallbackFunction_Type = ::std::function; + + /// + /// Internal operator used to get underlying handle value. + /// + /// A handle. + explicit operator SPXAUDIOSTREAMHANDLE() const { return m_haudioStream.get(); } + + /// + /// Creates a memory backed PullAudioOutputStream. + /// + /// A shared pointer to PullAudioOutputStream + static std::shared_ptr CreatePullStream(); + + /// + /// Creates a PushAudioOutputStream that delegates to the specified callback functions for Write() and Close() methods. + /// + /// Context pointer to use when invoking the callbacks. + /// Write callback. + /// Close callback. + /// A shared pointer to PushAudioOutputStream + static std::shared_ptr CreatePushStream(void* pvContext, CUSTOM_AUDIO_PUSH_STREAM_WRITE_CALLBACK writeCallback, CUSTOM_AUDIO_PUSH_STREAM_CLOSE_CALLBACK closeCallback = nullptr); + + /// + /// Creates a PushAudioOutputStream that delegates to the specified callback functions for Write() and Close() methods. + /// + /// Write callback. + /// Close callback. + /// A shared pointer to PushAudioOutputStream + static std::shared_ptr CreatePushStream(WriteCallbackFunction_Type writeCallback, CloseCallbackFunction_Type closeCallback = nullptr); + + /// + /// Creates a PushAudioOutputStream that delegates to the specified callback interface for Write() and Close() methods. + /// + /// Shared pointer to PushAudioOutputStreamCallback instance. + /// A shared pointer to PushAudioOutputStream + static std::shared_ptr CreatePushStream(std::shared_ptr callback); + +protected: + + /*! \cond PROTECTED */ + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + explicit AudioOutputStream(SPXAUDIOSTREAMHANDLE haudioStream) : m_haudioStream(haudioStream) { } + + /// + /// Destructor, does nothing. + /// + virtual ~AudioOutputStream() {} + + /// + /// Internal member variable that holds the smart handle. + /// + SmartHandle m_haudioStream; + + /*! \endcond */ + +private: + + DISABLE_COPY_AND_MOVE(AudioOutputStream); +}; + + +/// +/// Represents memory backed pull audio output stream used for custom audio output. +/// Updated in version 1.7.0 +/// +class PullAudioOutputStream : public AudioOutputStream +{ +public: + friend class Dialog::ActivityReceivedEventArgs; + + /// + /// Creates a memory backed PullAudioOutputStream. + /// + /// A shared pointer to PullAudioOutputStream + static std::shared_ptr Create() + { + SPXAUDIOSTREAMHANDLE haudioStream = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_stream_create_pull_audio_output_stream(&haudioStream)); + + auto stream = new PullAudioOutputStream(haudioStream); + return std::shared_ptr(stream); + } + + /// + /// Reads a chunk of the audio data and fill it to given buffer + /// + /// A buffer to receive read data. + /// Size of the buffer. + /// Size of data filled to the buffer, 0 means end of stream + inline uint32_t Read(uint8_t* buffer, uint32_t bufferSize) + { + uint32_t filledSize = 0; + SPX_THROW_ON_FAIL(pull_audio_output_stream_read(m_haudioStream, buffer, bufferSize, &filledSize)); + + return filledSize; + } + + +protected: + + /*! \cond PROTECTED */ + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + explicit PullAudioOutputStream(SPXAUDIOSTREAMHANDLE haudioStream) : AudioOutputStream(haudioStream) { } + + /*! \endcond */ + + +private: + + template + static std::shared_ptr SpxAllocSharedBuffer(size_t sizeInBytes) + { + auto ptr = reinterpret_cast(new uint8_t[sizeInBytes]); + auto deleter = [](T* p) { delete[] reinterpret_cast(p); }; + + std::shared_ptr buffer(ptr, deleter); + return buffer; + } + + +private: + + DISABLE_COPY_AND_MOVE(PullAudioOutputStream); + + std::vector m_audioData; +}; + + +/// +/// An interface that defines callback methods for an audio output stream. +/// Updated in version 1.7.0 +/// +/// +/// Derive from this class and implement its function to provide your own +/// data as an audio output stream. +/// +class PushAudioOutputStreamCallback +{ +public: + + /// + /// Destructor, does nothing. + /// + virtual ~PushAudioOutputStreamCallback() {} + + /// + /// This function is called to synchronously put data to the audio stream. + /// + /// The pointer to the buffer from which to consume the audio data. + /// The size of the buffer. + /// The number of bytes consumed from the buffer + virtual int Write(uint8_t* dataBuffer, uint32_t size) = 0; + + /// + /// This function is called to close the audio stream. + /// + /// + virtual void Close() = 0; + +protected: + + /*! \cond PROTECTED */ + + /// + /// Constructor, does nothing. + /// + PushAudioOutputStreamCallback() {}; + + /*! \endcond */ + +private: + + DISABLE_COPY_AND_MOVE(PushAudioOutputStreamCallback); +}; + + +/// +/// Push audio output stream class. +/// Added in version 1.4.0 +/// +class PushAudioOutputStream : public AudioOutputStream +{ +public: + + /// + /// Creates a PushAudioOutputStream utilizing the specified Write() and Close() "C" callback functions pointers + /// + /// Context pointer to use when invoking the callbacks. + /// Write callback. + /// Close callback. + /// A shared pointer to PushAudioOutputStream + static std::shared_ptr Create(void* pvContext, CUSTOM_AUDIO_PUSH_STREAM_WRITE_CALLBACK writeCallback, CUSTOM_AUDIO_PUSH_STREAM_CLOSE_CALLBACK closeCallback = nullptr) + { + return Create( + [=](uint8_t* buffer, uint32_t size) -> int { return writeCallback(pvContext, buffer, size); }, + [=]() { if (closeCallback != nullptr) { closeCallback(pvContext); } }); + } + + /// + /// Creates a PushAudioOutputStream utilizing the specified Write() and Close() callback functions. + /// + /// Write callback. + /// Close callback. + /// A shared pointer to PushAudioOutputStream + static std::shared_ptr Create(WriteCallbackFunction_Type writeCallback, CloseCallbackFunction_Type closeCallback = nullptr) + { + auto wrapper = std::make_shared(writeCallback, closeCallback); + return Create(wrapper); + } + + /// + /// Creates a PushAudioOutputStream utilizing the specified callback interface with Write() and Close() callback function. + /// + /// Shared pointer to PushAudioOutputStreamCallback instance. + /// A shared pointer to PushAudioOutputStream + static std::shared_ptr Create(std::shared_ptr callback) + { + SPXAUDIOSTREAMHANDLE haudioStream = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_stream_create_push_audio_output_stream(&haudioStream)); + + auto stream = new PushAudioOutputStream(haudioStream); + SPX_THROW_ON_FAIL(push_audio_output_stream_set_callbacks(haudioStream, stream, WriteCallbackWrapper, CloseCallbackWrapper)); + stream->m_callback = callback; + + return std::shared_ptr(stream); + } + +protected: + + /*! \cond PROTECTED */ + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + explicit PushAudioOutputStream(SPXAUDIOSTREAMHANDLE haudioStream) : AudioOutputStream(haudioStream) { } + + class FunctionCallbackWrapper : public PushAudioOutputStreamCallback + { + public: + + FunctionCallbackWrapper(WriteCallbackFunction_Type writeCallback, CloseCallbackFunction_Type closeCallback) : + m_writeCallback(writeCallback), + m_closeCallback(closeCallback) + { + }; + + int Write(uint8_t* dataBuffer, uint32_t size) override { return m_writeCallback(dataBuffer, size); } + void Close() override { if (m_closeCallback != nullptr) m_closeCallback(); }; + + private: + + DISABLE_COPY_AND_MOVE(FunctionCallbackWrapper); + + WriteCallbackFunction_Type m_writeCallback; + CloseCallbackFunction_Type m_closeCallback; + }; + + /*! \endcond */ + +private: + + DISABLE_COPY_AND_MOVE(PushAudioOutputStream); + + static int WriteCallbackWrapper(void* pvContext, uint8_t* dataBuffer, uint32_t size) + { + PushAudioOutputStream* ptr = (PushAudioOutputStream*)pvContext; + return ptr->m_callback->Write(dataBuffer, size); + } + + static void CloseCallbackWrapper(void* pvContext) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + PushAudioOutputStream* ptr = (PushAudioOutputStream*)pvContext; + ptr->m_callback->Close(); + } + + std::shared_ptr m_callback; +}; + + +inline std::shared_ptr AudioOutputStream::CreatePullStream() +{ + return PullAudioOutputStream::Create(); +} + +inline std::shared_ptr AudioOutputStream::CreatePushStream(void* pvContext, CUSTOM_AUDIO_PUSH_STREAM_WRITE_CALLBACK writeCallback, CUSTOM_AUDIO_PUSH_STREAM_CLOSE_CALLBACK closeCallback) +{ + return PushAudioOutputStream::Create(pvContext, writeCallback, closeCallback); +} + +inline std::shared_ptr AudioOutputStream::CreatePushStream(WriteCallbackFunction_Type writeCallback, CloseCallbackFunction_Type closeCallback) +{ + return PushAudioOutputStream::Create(writeCallback, closeCallback); +} + +inline std::shared_ptr AudioOutputStream::CreatePushStream(std::shared_ptr callback) +{ + return PushAudioOutputStream::Create(callback); +} + + +} } } } // Microsoft::CognitiveServices::Speech::Audio diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_audio_stream_format.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_audio_stream_format.h new file mode 100644 index 0000000..ea2156b --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_audio_stream_format.h @@ -0,0 +1,215 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_audio_stream_format.h: Public API declarations for AudioStreamFormat and related C++ classes +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include + + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Audio { + +/// +/// Defines supported audio stream container format. +/// Changed in version 1.4.0. +/// +enum class AudioStreamContainerFormat +{ + /// + /// Stream ContainerFormat definition for OGG OPUS. + /// + OGG_OPUS = 0x101, + + /// + /// Stream ContainerFormat definition for MP3. + /// + MP3 = 0x102, + + /// + /// Stream ContainerFormat definition for FLAC. Added in version 1.7.0. + /// + FLAC = 0x103, + + /// + /// Stream ContainerFormat definition for ALAW. Added in version 1.7.0. + /// + ALAW = 0x104, + + /// + /// Stream ContainerFormat definition for MULAW. Added in version 1.7.0. + /// + MULAW = 0x105, + + /// + /// Stream ContainerFormat definition for AMRNB. Currently not supported. + /// + AMRNB = 0x106, + + /// + /// Stream ContainerFormat definition for AMRWB. Currently not supported. + /// + AMRWB = 0x107, + + /// + /// Stream ContainerFormat definition for any other or unknown format. + /// + ANY = 0x108 +}; + +/// +/// Represents the format specified inside WAV container. +/// +enum class AudioStreamWaveFormat +{ + /// + /// AudioStreamWaveFormat definition for PCM (pulse-code modulated) data in integer format. + /// + PCM = 0x0001, + + /// + /// AudioStreamWaveFormat definition A-law-encoded format. + /// + ALAW = 0x0006, + + /// + /// AudioStreamWaveFormat definition for Mu-law-encoded format. + /// + MULAW = 0x0007, + + /// + /// AudioStreamWaveFormat definition for G.722-encoded format. + /// + G722 = 0x028F +}; + +/// +/// Class to represent the audio stream format used for custom audio input configurations. +/// Updated in version 1.5.0. +/// +class AudioStreamFormat +{ +public: + + /// + /// Destructor, does nothing. + /// + virtual ~AudioStreamFormat() {} + + /// + /// Internal operator used to get underlying handle value. + /// + /// A handle. + explicit operator SPXAUDIOSTREAMFORMATHANDLE() const { return m_hformat.get(); } + + /// + /// Creates an audio stream format object representing the default audio stream format (16 kHz, 16 bit, mono PCM). + /// + /// A shared pointer to AudioStreamFormat + static std::shared_ptr GetDefaultInputFormat() + { + SPXAUDIOSTREAMFORMATHANDLE hformat = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_stream_format_create_from_default_input(&hformat)); + + auto format = new AudioStreamFormat(hformat); + return std::shared_ptr(format); + } + + /// + /// Creates an audio stream format object with the specified PCM waveformat characteristics. + /// + /// Samples per second. + /// Bits per sample. + /// Number of channels in the waveform-audio data. + /// The format specified inside the WAV container. + /// A shared pointer to AudioStreamFormat + static std::shared_ptr GetWaveFormat(uint32_t samplesPerSecond, uint8_t bitsPerSample, uint8_t channels, AudioStreamWaveFormat waveFormat) + { + SPXAUDIOSTREAMFORMATHANDLE hformat = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_stream_format_create_from_waveformat(&hformat, samplesPerSecond, bitsPerSample, channels, (Audio_Stream_Wave_Format)waveFormat)); + + auto format = new AudioStreamFormat(hformat); + return std::shared_ptr(format); + } + + /// + /// Creates an audio stream format object with the specified PCM waveformat characteristics. + /// + /// Samples per second. + /// Bits per sample. + /// Number of channels in the waveform-audio data. + /// A shared pointer to AudioStreamFormat + static std::shared_ptr GetWaveFormatPCM(uint32_t samplesPerSecond, uint8_t bitsPerSample = 16, uint8_t channels = 1) + { + SPXAUDIOSTREAMFORMATHANDLE hformat = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_stream_format_create_from_waveformat(&hformat, samplesPerSecond, bitsPerSample, channels, Audio_Stream_Wave_Format::StreamWaveFormat_PCM)); + + auto format = new AudioStreamFormat(hformat); + return std::shared_ptr(format); + } + + /// + /// Creates an audio stream format object representing the default audio stream format (16 kHz, 16 bit, mono PCM). + /// Added in version 1.4.0 + /// + /// A shared pointer to AudioStreamFormat + static std::shared_ptr GetDefaultOutputFormat() + { + SPXAUDIOSTREAMFORMATHANDLE hformat = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_stream_format_create_from_default_output(&hformat)); + + auto format = new AudioStreamFormat(hformat); + return std::shared_ptr(format); + } + + /// + /// Creates an audio stream format object with the specified compressed audio container format, to be used as input format. + /// Support added in 1.4.0. + /// + /// + /// Formats are defined in AudioStreamContainerFormat enum. + /// + /// Compressed format type. + /// A shared pointer to AudioStreamFormat. + static std::shared_ptr GetCompressedFormat(AudioStreamContainerFormat compressedFormat) + { + SPXAUDIOSTREAMFORMATHANDLE hformat = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_stream_format_create_from_compressed_format(&hformat, (Audio_Stream_Container_Format)compressedFormat)); + + auto format = new AudioStreamFormat(hformat); + return std::shared_ptr(format); + } + +protected: + + /*! \cond PROTECTED */ + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + explicit AudioStreamFormat(SPXAUDIOSTREAMFORMATHANDLE hformat) : m_hformat(hformat) { } + + /*! \endcond */ + +private: + + DISABLE_COPY_AND_MOVE(AudioStreamFormat); + + /// + /// Internal member variable that holds the smart handle. + /// + SmartHandle m_hformat; +}; + + +} } } } // Microsoft::CognitiveServices::Speech::Audio diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_auto_detect_source_lang_config.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_auto_detect_source_lang_config.h new file mode 100644 index 0000000..e68f3fc --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_auto_detect_source_lang_config.h @@ -0,0 +1,141 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// Class that defines auto detection source configuration +/// Updated in 1.13.0 +/// +class AutoDetectSourceLanguageConfig +{ +public: + /// + /// Internal operator used to get underlying handle value. + /// + /// A handle. + explicit operator SPXAUTODETECTSOURCELANGCONFIGHANDLE() const { return m_hconfig; } + + /// + /// Creates an instance of the AutoDetectSourceLanguageConfig with open range as source languages + /// Note: only , embedded speech translation and multilingual support source language auto detection from open range, + /// for , please use AutoDetectSourceLanguageConfig with specific source languages. + /// Added in 1.13.0 + /// + /// A shared pointer to the new AutoDetectSourceLanguageConfig instance. + static std::shared_ptr FromOpenRange() + { + SPXAUTODETECTSOURCELANGCONFIGHANDLE hconfig = SPXHANDLE_INVALID; + + SPX_THROW_ON_FAIL(create_auto_detect_source_lang_config_from_open_range(&hconfig)); + auto ptr = new AutoDetectSourceLanguageConfig(hconfig); + return std::shared_ptr(ptr); + } + + /// + /// Creates an instance of the AutoDetectSourceLanguageConfig with source languages + /// + /// The list of source languages. + /// A shared pointer to the new AutoDetectSourceLanguageConfig instance. + static std::shared_ptr FromLanguages(const std::vector& languages) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, languages.empty()); + SPXAUTODETECTSOURCELANGCONFIGHANDLE hconfig = SPXHANDLE_INVALID; + + std::string languagesStr; + bool isFirst = true; + for (const SPXSTRING& language : languages) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, language.empty()); + if (!isFirst) + { + languagesStr += ","; + } + isFirst = false; + languagesStr += Utils::ToUTF8(language); + } + SPX_THROW_ON_FAIL(create_auto_detect_source_lang_config_from_languages(&hconfig, languagesStr.c_str())); + auto ptr = new AutoDetectSourceLanguageConfig(hconfig); + return std::shared_ptr(ptr); + } + + /// + /// Creates an instance of the AutoDetectSourceLanguageConfig with a list of source language config + /// + /// The list of source languages config + /// A shared pointer to the new AutoDetectSourceLanguageConfig instance. + static std::shared_ptr FromSourceLanguageConfigs(std::vector> configList) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, configList.empty()); + SPXAUTODETECTSOURCELANGCONFIGHANDLE hconfig = SPXHANDLE_INVALID; + + bool isFirst = true; + for (const auto& config : configList) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, config == nullptr); + if (isFirst) + { + SPX_THROW_ON_FAIL(create_auto_detect_source_lang_config_from_source_lang_config(&hconfig, Utils::HandleOrInvalid(config))); + isFirst = false; + } + else + { + SPX_THROW_ON_FAIL(add_source_lang_config_to_auto_detect_source_lang_config(hconfig, Utils::HandleOrInvalid(config))); + } + } + auto ptr = new AutoDetectSourceLanguageConfig(hconfig); + return std::shared_ptr(ptr); + } + + /// + /// Destructs the object. + /// + virtual ~AutoDetectSourceLanguageConfig() + { + auto_detect_source_lang_config_release(m_hconfig); + property_bag_release(m_propertybag); + } + +private: + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + explicit AutoDetectSourceLanguageConfig(SPXAUTODETECTSOURCELANGCONFIGHANDLE hconfig) + :m_hconfig(hconfig) + { + SPX_THROW_ON_FAIL(auto_detect_source_lang_config_get_property_bag(hconfig, &m_propertybag)); + } + + /// + /// Internal member variable that holds the config + /// + SPXAUTODETECTSOURCELANGCONFIGHANDLE m_hconfig; + + /// + /// Internal member variable that holds the properties of the speech config + /// + SPXPROPERTYBAGHANDLE m_propertybag; + + DISABLE_COPY_AND_MOVE(AutoDetectSourceLanguageConfig); +}; + +}}} + diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_auto_detect_source_lang_result.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_auto_detect_source_lang_result.h new file mode 100644 index 0000000..538ea58 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_auto_detect_source_lang_result.h @@ -0,0 +1,85 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// Contains auto detected source language result +/// Added in 1.8.0 +/// +class AutoDetectSourceLanguageResult +{ +public: + + /// + /// Creates an instance of AutoDetectSourceLanguageResult object for the speech recognition result. + /// + /// The speech recognition result. + /// A shared pointer to AutoDetectSourceLanguageResult. + static std::shared_ptr FromResult(std::shared_ptr result) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, result == nullptr); + auto ptr = new AutoDetectSourceLanguageResult(result); + return std::shared_ptr(ptr); + } + + /// + /// Creates an instance of AutoDetectSourceLanguageResult object for the speech translation result. + /// + /// The speech translation result. + /// A shared pointer to AutoDetectSourceLanguageResult. + static std::shared_ptr FromResult(std::shared_ptr result) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, result == nullptr); + auto ptr = new AutoDetectSourceLanguageResult(result); + return std::shared_ptr(ptr); + } + + /// + /// Creates an instance of AutoDetectSourceLanguageResult object for the convesation transcription result. + /// + /// The conversation transcription result. + /// A shared pointer to AutoDetectSourceLanguageResult. + static std::shared_ptr FromResult(std::shared_ptr result) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, result == nullptr); + auto ptr = new AutoDetectSourceLanguageResult(result); + return std::shared_ptr(ptr); + } + + /// + /// The language value + /// If this is empty, it means the system fails to detect the source language automatically + /// + const SPXSTRING Language; + +protected: + + /*! \cond PROTECTED */ + // Using RecognitionResult pointer, so this can cover all classes that inherit from RecognitionResult + AutoDetectSourceLanguageResult(std::shared_ptr result) : + Language(result->Properties.GetProperty(PropertyId::SpeechServiceConnection_AutoDetectSourceLanguageResult)) + { + } + + /*! \endcond */ + +private: + + DISABLE_DEFAULT_CTORS(AutoDetectSourceLanguageResult); +}; + +} } } // Microsoft::CognitiveServices::Speech diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_class_language_model.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_class_language_model.h new file mode 100644 index 0000000..a3099cc --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_class_language_model.h @@ -0,0 +1,70 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_class_language_model.h: Public API declarations for ClassLanguageModel C++ class +// + +#pragma once +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + + +/// +/// Represents a list of grammars for dynamic grammar scenarios. +/// Added in version 1.7.0. +/// +/// +/// ClassLanguageModels are only usable in specific scenarios and are not generally available. +/// +class ClassLanguageModel : public Grammar +{ +public: + + /// + /// Creates a class language model from a storage ID. + /// + /// The persisted storage ID of the language model. + /// The grammar list associated with the recognizer. + /// + /// Creating a ClassLanguageModel from a storage ID is only usable in specific scenarios and is not generally available. + /// + static std::shared_ptr FromStorageId(const SPXSTRING& storageId) + { + SPXGRAMMARHANDLE hgrammar = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(class_language_model_from_storage_id(&hgrammar, Utils::ToUTF8(storageId.c_str()))); + + return std::make_shared(hgrammar); + } + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// Class Language Model handle. + explicit ClassLanguageModel(SPXGRAMMARHANDLE hgrammar = SPXHANDLE_INVALID) : Grammar(hgrammar) { } + + /// + /// Assigns a grammar to a class in the language mode. + /// + /// Name of the class to assign the grammar to. + /// Grammar to assign. + template + void AssignClass(const SPXSTRING& className, std::shared_ptr grammar) + { + SPX_THROW_ON_FAIL(class_language_model_assign_class(m_hgrammar.get(), Utils::ToUTF8(className.c_str()), (SPXPHRASEHANDLE)(*grammar.get()))); + } + +private: + + DISABLE_COPY_AND_MOVE(ClassLanguageModel); +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_common.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_common.h new file mode 100644 index 0000000..2e8d382 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_common.h @@ -0,0 +1,16 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_common.h: Public API declarations for global C++ APIs/namespaces +// + +#pragma once + +#include +#include +#include +#include // must include after spxdebug.h or speechapi*.h (can NOT be included before) + +#define DISABLE_COPY_AND_MOVE(T) AZAC_DISABLE_COPY_AND_MOVE(T) +#define DISABLE_DEFAULT_CTORS(T) AZAC_DISABLE_DEFAULT_CTORS(T) diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_connection.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_connection.h new file mode 100644 index 0000000..c072254 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_connection.h @@ -0,0 +1,346 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// Connection is a proxy class for managing connection to the speech service of the specified Recognizer. +/// By default, a Recognizer autonomously manages connection to service when needed. +/// The Connection class provides additional methods for users to explicitly open or close a connection and +/// to subscribe to connection status changes. +/// The use of Connection is optional. It is intended for scenarios where fine tuning of application +/// behavior based on connection status is needed. Users can optionally call Open() to manually +/// initiate a service connection before starting recognition on the Recognizer associated with this Connection. +/// After starting a recognition, calling Open() or Close() might fail. This will not impact +/// the Recognizer or the ongoing recognition. Connection might drop for various reasons, the Recognizer will +/// always try to reinstitute the connection as required to guarantee ongoing operations. In all these cases +/// Connected/Disconnected events will indicate the change of the connection status. +/// Updated in version 1.17.0. +/// +class Connection : public std::enable_shared_from_this +{ + +public: + /// + /// Gets the Connection instance from the specified recognizer. + /// + /// The recognizer associated with the connection. + /// The Connection instance of the recognizer. + static std::shared_ptr FromRecognizer(std::shared_ptr recognizer) + { + SPX_INIT_HR(hr); + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, recognizer == nullptr); + + SPXCONNECTIONHANDLE handle = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(hr = ::connection_from_recognizer(recognizer->m_hreco, &handle)); + + return std::make_shared(handle); + } + + /// + /// Gets the Connection instance from the specified conversation translator. + /// + /// The conversation translator associated with the connection. + /// The Connection instance of the conversation translator. + static std::shared_ptr FromConversationTranslator(std::shared_ptr convTrans) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, convTrans == nullptr); + + SPXCONNECTIONHANDLE handle = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(::connection_from_conversation_translator(convTrans->m_handle, &handle)); + + return std::make_shared(handle); + } + + /// + /// Gets the Connection instance from the specified dialog service connector, used for observing and managing + /// connection and disconnection from the speech service. + /// + /// The dialog service connector associated with the connection. + /// The Connection instance of the dialog service connector. + static std::shared_ptr FromDialogServiceConnector(std::shared_ptr dialogServiceConnector) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, dialogServiceConnector == nullptr); + + SPXCONNECTIONHANDLE handle = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(::connection_from_dialog_service_connector(dialogServiceConnector->m_handle, &handle)); + + return std::make_shared(handle); + } + + /// + /// Gets the Connection instance from the specified speech synthesizer. + /// Added in version 1.17.0 + /// + /// The speech synthesizer associated with the connection. + /// The Connection instance of the speech synthesizer. + static std::shared_ptr FromSpeechSynthesizer(std::shared_ptr synthesizer) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, synthesizer == nullptr); + + SPXCONNECTIONHANDLE handle = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(::connection_from_speech_synthesizer(synthesizer->m_hsynth, &handle)); + + return std::make_shared(handle); + } + + /// + /// Starts to set up connection to the service. + /// Users can optionally call Open() to manually set up a connection in advance before starting recognition/synthesis on the + /// Recognizer/Synthesizer associated with this Connection. After starting recognition, calling Open() might fail, depending on + /// the process state of the Recognizer/Synthesizer. But the failure does not affect the state of the associated Recognizer/Synthesizer. + /// Note: On return, the connection might not be ready yet. Please subscribe to the Connected event to + /// be notified when the connection is established. + /// + /// Indicates whether the connection is used for continuous recognition or single-shot recognition. It takes no effect if the connection is from SpeechSynthsizer. + void Open(bool forContinuousRecognition) + { + SPX_THROW_HR_IF(SPXERR_INVALID_HANDLE, m_connectionHandle == SPXHANDLE_INVALID); + SPX_THROW_ON_FAIL(::connection_open(m_connectionHandle, forContinuousRecognition)); + } + + /// + /// Closes the connection the service. + /// Users can optionally call Close() to manually shutdown the connection of the associated Recognizer/Synthesizer. The call + /// might fail, depending on the process state of the Recognizer/Synthesizer. But the failure does not affect the state of the + /// associated Recognizer/Synthesizer. + /// + void Close() + { + SPX_THROW_HR_IF(SPXERR_INVALID_HANDLE, m_connectionHandle == SPXHANDLE_INVALID); + SPX_THROW_ON_FAIL(::connection_close(m_connectionHandle)); + } + + /// + /// Appends a parameter in a message to service. + /// Added in version 1.7.0. + /// + /// the message path. + /// Name of the property. + /// Value of the property. This is a json string. + /// void. + void SetMessageProperty(const SPXSTRING& path, const SPXSTRING& propertyName, const SPXSTRING& propertyValue) + { + SPX_THROW_HR_IF(SPXERR_INVALID_HANDLE, m_connectionHandle == SPXHANDLE_INVALID); + SPX_THROW_ON_FAIL(::connection_set_message_property(m_connectionHandle, Utils::ToUTF8(path).c_str(), Utils::ToUTF8(propertyName).c_str(), Utils::ToUTF8(propertyValue).c_str())); + } + + /// + /// Send a message to the speech service. + /// Added in version 1.7.0. + /// + /// The path of the message. + /// The payload of the message. This is a json string. + /// An empty future. + std::future SendMessageAsync(const SPXSTRING& path, const SPXSTRING& payload) + { + auto keep_alive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keep_alive, this, path, payload]() -> void { + SPX_THROW_HR_IF(SPXERR_INVALID_HANDLE, m_connectionHandle == SPXHANDLE_INVALID); + SPX_THROW_ON_FAIL(::connection_send_message(m_connectionHandle, Utils::ToUTF8(path.c_str()), Utils::ToUTF8(payload.c_str()))); + }); + return future; + } + + /// + /// Send a binary message to the speech service. + /// This method doesn't work for the connection of SpeechSynthesizer. + /// Added in version 1.10.0. + /// + /// The path of the message. + /// The binary payload of the message. + /// The size of the binary payload. + /// An empty future. + std::future SendMessageAsync(const SPXSTRING& path, uint8_t* payload, uint32_t size) + { + auto keep_alive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keep_alive, this, path, payload, size]() -> void { + SPX_THROW_HR_IF(SPXERR_INVALID_HANDLE, m_connectionHandle == SPXHANDLE_INVALID); + SPX_THROW_ON_FAIL(::connection_send_message_data(m_connectionHandle, Utils::ToUTF8(path.c_str()), payload, size)); + }); + return future; + } + + /// + /// The Connected event to indicate that the recognizer is connected to service. + /// + EventSignal Connected; + + /// + /// The Disconnected event to indicate that the recognizer is disconnected from service. + /// + EventSignal Disconnected; + + /// + /// The MessageReceived event to indicate that the underlying protocol received a message from the service. + /// Added in version 1.10.0. + /// + EventSignal MessageReceived; + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// The connection handle. + explicit Connection(SPXCONNECTIONHANDLE handle) : + Connected(GetConnectionEventConnectionsChangedCallback(), GetConnectionEventConnectionsChangedCallback()), + Disconnected(GetConnectionEventConnectionsChangedCallback(), GetConnectionEventConnectionsChangedCallback()), + MessageReceived(GetConnectionMessageEventConnectionsChangedCallback(), GetConnectionMessageEventConnectionsChangedCallback()), + m_connectionHandle(handle) + { + SPX_DBG_TRACE_FUNCTION(); + } + + /// + /// Destructor. + /// + ~Connection() + { + SPX_DBG_TRACE_FUNCTION(); + + try + { + Disconnected.DisconnectAll(); + Connected.DisconnectAll(); + } + catch (const std::exception& ex) + { + SPX_TRACE_ERROR("Exception caught in ~Connection(): %s", ex.what()); + (void)ex; + } + catch (...) + { + SPX_TRACE_ERROR("Unknown exception happened during ~Connection()."); + } + + if (m_connectionHandle != SPXHANDLE_INVALID) + { + ::connection_handle_release(m_connectionHandle); + m_connectionHandle = SPXHANDLE_INVALID; + } + } + +private: + DISABLE_COPY_AND_MOVE(Connection); + + SPXCONNECTIONHANDLE m_connectionHandle; + + static void FireConnectionEvent(bool firingConnectedEvent, SPXEVENTHANDLE event, void* context) + { + std::exception_ptr p; + try + { + std::unique_ptr connectionEvent{ new ConnectionEventArgs(event) }; + + auto connection = static_cast(context); + auto keepAlive = connection->shared_from_this(); + if (firingConnectedEvent) + { + connection->Connected.Signal(*connectionEvent.get()); + } + else + { + connection->Disconnected.Signal(*connectionEvent.get()); + } + } + +#ifdef SHOULD_HANDLE_FORCED_UNWIND + // Currently Python forcibly kills the thread by throwing __forced_unwind, + // taking care we propagate this exception further. + catch (abi::__forced_unwind&) + { + SPX_TRACE_ERROR("__forced_unwind exception caught in FireConnectionEvent."); + throw; + } +#endif + catch (...) + { + if (recognizer_event_handle_is_valid(event)) { + recognizer_event_handle_release(event); + } + SPX_TRACE_ERROR("Caught exception in FireConnectionEvent(%s). Will rethrow later.", firingConnectedEvent ? "Connected" : "Disconnected"); + throw; + } + + // ConnectionEventArgs doesn't hold hevent, and thus can't release it properly ... release it here + SPX_DBG_ASSERT(recognizer_event_handle_is_valid(event)); + recognizer_event_handle_release(event); + } + + static void FireEvent_Connected(SPXEVENTHANDLE event, void* context) + { + FireConnectionEvent(true, event, context); + } + + static void FireEvent_Disconnected(SPXEVENTHANDLE event, void* context) + { + FireConnectionEvent(false, event, context); + } + + static void FireEvent_MessageReceived(SPXEVENTHANDLE event, void* context) + { + std::unique_ptr connectionEvent { new ConnectionMessageEventArgs(event) }; + + auto connection = static_cast(context); + auto keepAlive = connection->shared_from_this(); + connection->MessageReceived.Signal(*connectionEvent.get()); + } + + void ConnectionEventConnectionsChanged(const EventSignal& connectionEvent) + { + if (m_connectionHandle != SPXHANDLE_INVALID) + { + SPX_DBG_TRACE_VERBOSE("%s: m_connectionHandle=0x%8p", __FUNCTION__, (void*)m_connectionHandle); + SPX_DBG_TRACE_VERBOSE_IF(!::connection_handle_is_valid(m_connectionHandle), "%s: m_connectionHandle is INVALID!!!", __FUNCTION__); + + if (&connectionEvent == &Connected) + { + SPX_THROW_ON_FAIL(connection_connected_set_callback(m_connectionHandle, Connected.IsConnected() ? FireEvent_Connected : nullptr, this)); + } + else if (&connectionEvent == &Disconnected) + { + SPX_THROW_ON_FAIL(connection_disconnected_set_callback(m_connectionHandle, Disconnected.IsConnected() ? FireEvent_Disconnected : nullptr, this)); + } + } + } + + void ConnectionMessageEventConnectionsChanged(const EventSignal& connectionEvent) + { + if (m_connectionHandle != SPXHANDLE_INVALID) + { + SPX_DBG_TRACE_VERBOSE("%s: m_connectionHandle=0x%8p", __FUNCTION__, (void*)m_connectionHandle); + SPX_DBG_TRACE_VERBOSE_IF(!::connection_handle_is_valid(m_connectionHandle), "%s: m_connectionHandle is INVALID!!!", __FUNCTION__); + + if (&connectionEvent == &MessageReceived) + { + SPX_THROW_ON_FAIL(connection_message_received_set_callback(m_connectionHandle, MessageReceived.IsConnected() ? FireEvent_MessageReceived : nullptr, this)); + } + } + } + + inline std::function&)> GetConnectionEventConnectionsChangedCallback() + { + return [=, this](const EventSignal& connectionEvent) { this->ConnectionEventConnectionsChanged(connectionEvent); }; + } + + inline std::function&)> GetConnectionMessageEventConnectionsChangedCallback() + { + return [=, this](const EventSignal& connectionEvent) { this->ConnectionMessageEventConnectionsChanged(connectionEvent); }; + } +}; + +} } } // Microsoft::CognitiveServices::Speech diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_connection_eventargs.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_connection_eventargs.h new file mode 100644 index 0000000..1e56f25 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_connection_eventargs.h @@ -0,0 +1,68 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + + +/// +/// Provides data for the ConnectionEvent. +/// Added in version 1.2.0. +/// +class ConnectionEventArgs : public SessionEventArgs +{ +protected: + /*! \cond PRIVATE */ + class PrivatePropertyCollection : public PropertyCollection + { + public: + PrivatePropertyCollection(SPXEVENTHANDLE hevent) : + PropertyCollection([=]() + { + SPXPROPERTYBAGHANDLE hpropbag = SPXHANDLE_INVALID; + recognizer_connection_event_get_property_bag(hevent, &hpropbag); + return hpropbag; + }()) + {} + }; + + PrivatePropertyCollection m_properties; + /*! \endcond */ + +public: + + /// + /// Constructor. + /// + /// Event handle. + explicit ConnectionEventArgs(SPXEVENTHANDLE hevent) : + SessionEventArgs(hevent), + m_properties(hevent), + Properties(m_properties) + { + }; + + /// + virtual ~ConnectionEventArgs() {} + + /// + /// Collection of additional properties. + /// + const PropertyCollection& Properties; + +private: + + DISABLE_COPY_AND_MOVE(ConnectionEventArgs); +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_connection_message.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_connection_message.h new file mode 100644 index 0000000..a1a9469 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_connection_message.h @@ -0,0 +1,152 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_connection_message.h: Public API declarations for ConnectionMessage C++ class +// + +#pragma once +#include +#include +#include +#include +#include +#include + + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + + +/// +/// ConnectionMessage represents implementation specific messages sent to and received from +/// the speech service. These messages are provided for debugging purposes and should not +/// be used for production use cases with the Azure Cognitive Services Speech Service. +/// Messages sent to and received from the Speech Service are subject to change without +/// notice. This includes message contents, headers, payloads, ordering, etc. +/// Added in version 1.10.0. +/// +class ConnectionMessage +{ +private: + + /*! \cond PRIVATE */ + + class PrivatePropertyCollection : public PropertyCollection + { + public: + PrivatePropertyCollection(SPXCONNECTIONMESSAGEHANDLE hcm) : + PropertyCollection( + [=]() { + SPXPROPERTYBAGHANDLE hpropbag = SPXHANDLE_INVALID; + ::connection_message_get_property_bag(hcm, &hpropbag); + return hpropbag; + }()) + { + } + }; + + SPXCONNECTIONMESSAGEHANDLE m_hcm; + PrivatePropertyCollection m_properties; + + /*! \endcond */ + +public: + + /// + /// Constructor. + /// + /// Event handle. + explicit ConnectionMessage(SPXCONNECTIONMESSAGEHANDLE hcm) : + m_hcm(hcm), + m_properties(hcm), + Properties(m_properties) + { + }; + + /// + /// Destructor. + /// + virtual ~ConnectionMessage() + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)m_hcm); + SPX_THROW_ON_FAIL(::connection_message_handle_release(m_hcm)); + } + + /// + /// Gets the message path. + /// + /// An std::string containing the message path. + std::string GetPath() const + { + SPX_THROW_HR_IF(SPXERR_INVALID_HANDLE, m_hcm == SPXHANDLE_INVALID); + return m_properties.GetProperty("connection.message.path"); + } + + /// + /// Checks to see if the ConnectionMessage is a text message. + /// See also IsBinaryMessage(). + /// + /// A bool indicated if the message payload is text. + bool IsTextMessage() const + { + SPX_THROW_HR_IF(SPXERR_INVALID_HANDLE, m_hcm == SPXHANDLE_INVALID); + return m_properties.GetProperty("connection.message.type") == "text"; + } + + /// + /// Checks to see if the ConnectionMessage is a binary message. + /// See also GetBinaryMessage(). + /// + /// A bool indicated if the message payload is binary. + bool IsBinaryMessage() const + { + SPX_THROW_HR_IF(SPXERR_INVALID_HANDLE, m_hcm == SPXHANDLE_INVALID); + return m_properties.GetProperty("connection.message.type") == "binary"; + } + + /// + /// Gets the text message payload. Typically the text message content-type is + /// application/json. To determine other content-types use + /// Properties.GetProperty("Content-Type"). + /// + /// An std::string containing the text message. + std::string GetTextMessage() const + { + SPX_THROW_HR_IF(SPXERR_INVALID_HANDLE, m_hcm == SPXHANDLE_INVALID); + return m_properties.GetProperty("connection.message.text.message"); + } + + /// + /// Gets the binary message payload. + /// + /// An std::vector containing the binary message. + std::vector GetBinaryMessage() const + { + SPX_THROW_HR_IF(SPXERR_INVALID_HANDLE, m_hcm == SPXHANDLE_INVALID); + auto size = ::connection_message_get_data_size(m_hcm); + + std::vector message(size); + SPX_THROW_ON_FAIL(::connection_message_get_data(m_hcm, message.data(), size)); + + return message; + } + + /// + /// A collection of properties and their values defined for this . + /// Message headers can be accessed via this collection (e.g. "Content-Type"). + /// + PropertyCollection& Properties; + +private: + + /*! \cond PRIVATE */ + + DISABLE_COPY_AND_MOVE(ConnectionMessage); + + /*! \endcond */ +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_connection_message_eventargs.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_connection_message_eventargs.h new file mode 100644 index 0000000..3ff6f79 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_connection_message_eventargs.h @@ -0,0 +1,79 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_connection_message_eventargs.h: Public API declarations for ConnectionMessageEventArgs C++ base class +// + +#pragma once +#include +#include +#include +#include +#include + + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + + +/// +/// Provides data for the ConnectionMessageEvent +/// +class ConnectionMessageEventArgs : public EventArgs +{ +private: + + /*! \cond PRIVATE */ + + SPXEVENTHANDLE m_hevent; + std::shared_ptr m_message; + + /*! \endcond */ + +public: + + /// + /// Constructor. Creates a new instance using the provided handle. + /// + /// Event handle. + explicit ConnectionMessageEventArgs(SPXEVENTHANDLE hevent) : + m_hevent(hevent), + m_message(std::make_shared(MessageHandleFromEventHandle(hevent))) + { + }; + + /// + /// Destructor. + /// + virtual ~ConnectionMessageEventArgs() + { + SPX_THROW_ON_FAIL(::connection_message_received_event_handle_release(m_hevent)); + } + + /// + /// Gets the associated with this . + /// + /// An `std::shared` containing the message. + std::shared_ptr GetMessage() const { return m_message; } + +private: + + /*! \cond PRIVATE */ + + DISABLE_COPY_AND_MOVE(ConnectionMessageEventArgs); + + SPXCONNECTIONMESSAGEHANDLE MessageHandleFromEventHandle(SPXEVENTHANDLE hevent) + { + SPXCONNECTIONMESSAGEHANDLE hcm = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(::connection_message_received_event_get_message(hevent, &hcm)); + return hcm; + } + + /*! \endcond */ + +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_conversation.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_conversation.h new file mode 100644 index 0000000..339f22c --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_conversation.h @@ -0,0 +1,340 @@ + +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_conversation.h: Public API declarations for Conversation C++ class +// + +#pragma once +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Transcription { + +/// +/// Class for conversation. +/// Added in version 1.8.0 +/// +class Conversation : public std::enable_shared_from_this +{ +public: + + static constexpr size_t MAX_CONVERSATION_ID_LEN = 1024; + + /// + /// Create a conversation using a speech config and an optional conversation id. + /// + /// A shared smart pointer of a speech config object. + /// Conversation Id. + /// A shared smart pointer of the created conversation object. + static std::future> CreateConversationAsync(std::shared_ptr speechConfig, const SPXSTRING& conversationId = SPXSTRING()) + { + auto future = std::async(std::launch::async, [conversationId, speechConfig]() -> std::shared_ptr { + SPXCONVERSATIONHANDLE hconversation; + SPX_THROW_ON_FAIL(conversation_create_from_config(&hconversation, (SPXSPEECHCONFIGHANDLE)(*speechConfig), Utils::ToUTF8(conversationId).c_str())); + return std::make_shared(hconversation); + }); + return future; + } + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// Recognizer handle. + explicit Conversation(SPXCONVERSATIONHANDLE hconversation) : + m_hconversation(hconversation), + m_properties(hconversation), + Properties(m_properties) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + } + + /// + /// Destructor. + /// + ~Conversation() + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + + ::conversation_release_handle(m_hconversation); + m_hconversation = SPXHANDLE_INVALID; + } + + /// + /// Internal operator used to get underlying handle value. + /// + /// A handle. + explicit operator SPXCONVERSATIONHANDLE () const { return m_hconversation; } + + /// + /// Get the conversation id. + /// + /// Conversation id. + SPXSTRING GetConversationId() + { + char id[MAX_CONVERSATION_ID_LEN + 1]; + std::memset(id, 0, MAX_CONVERSATION_ID_LEN); + SPX_THROW_ON_FAIL(conversation_get_conversation_id(m_hconversation, id, MAX_CONVERSATION_ID_LEN)); + return id; + } + + /// + /// Add a participant to a conversation using the user's id. + /// + /// Note: The returned participant can be used to remove. If the client changes the participant's attributes, + /// the changed attributes are passed on to the service only when the participant is added again. + /// + /// A user id. + /// a shared smart pointer of the participant. + std::future> AddParticipantAsync(const SPXSTRING& userId) + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this, userId]() -> std::shared_ptr { + const auto participant = Participant::From(userId); + SPX_THROW_ON_FAIL(conversation_update_participant(m_hconversation, true, (SPXPARTICIPANTHANDLE)(*participant))); + return participant; + }); + return future; + } + + /// + /// Add a participant to a conversation using the User object. + /// + /// A shared smart pointer to a User object. + /// The passed in User object. + std::future> AddParticipantAsync(const std::shared_ptr& user) + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this, user]() -> std::shared_ptr { + SPX_THROW_ON_FAIL(conversation_update_participant_by_user(m_hconversation, true, (SPXUSERHANDLE)(*user))); + return user; + }); + return future; + } + + /// + /// Add a participant to a conversation using the participant object + /// + /// A shared smart pointer to a participant object. + /// The passed in participant object. + std::future> AddParticipantAsync(const std::shared_ptr& participant) + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this, participant]() -> std::shared_ptr { + SPX_THROW_ON_FAIL(conversation_update_participant(m_hconversation, true, (SPXPARTICIPANTHANDLE)(*participant))); + return participant; + }); + return future; + } + + /// + /// Remove a participant from a conversation using the participant object + /// + /// A shared smart pointer of a participant object. + /// An empty future. + std::future RemoveParticipantAsync(const std::shared_ptr& participant) + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this, participant]() -> void { + SPX_THROW_ON_FAIL(conversation_update_participant(m_hconversation, false, (SPXPARTICIPANTHANDLE)(*participant))); + }); + return future; + } + + /// + /// Remove a participant from a conversation using the User object + /// + /// A smart pointer of a User. + /// An empty future. + std::future RemoveParticipantAsync(const std::shared_ptr& user) + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this, user]() -> void { + SPX_THROW_ON_FAIL(conversation_update_participant_by_user(m_hconversation, false, SPXUSERHANDLE(*user))); + }); + return future; + } + + /// + /// Remove a participant from a conversation using a user id string. + /// + /// A user id. + /// An empty future. + std::future RemoveParticipantAsync(const SPXSTRING& userId) + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this, userId]() -> void { + SPX_THROW_ON_FAIL(conversation_update_participant_by_user_id(m_hconversation, false, Utils::ToUTF8(userId.c_str()))); + }); + return future; + } + + /// + /// Ends the current conversation. + /// + /// An empty future. + std::future EndConversationAsync() + { + return RunAsync(::conversation_end_conversation); + } + + /// + /// Sets the authorization token that will be used for connecting the server. + /// + /// The authorization token. + void SetAuthorizationToken(const SPXSTRING& token) + { + Properties.SetProperty(PropertyId::SpeechServiceAuthorization_Token, token); + } + + /// + /// Gets the authorization token. + /// + /// Authorization token + SPXSTRING GetAuthorizationToken() + { + return Properties.GetProperty(PropertyId::SpeechServiceAuthorization_Token, SPXSTRING()); + } + + /// + /// Start the conversation. + /// + /// An empty future. + std::future StartConversationAsync() + { + return RunAsync(::conversation_start_conversation); + } + + /// + /// Deletes the conversation. Any participants that are still part of the converation + /// will be ejected after this call. + /// + /// An empty future. + std::future DeleteConversationAsync() + { + return RunAsync(::conversation_delete_conversation); + } + + /// + /// Locks the conversation. After this no new participants will be able to join. + /// + /// An empty future. + std::future LockConversationAsync() + { + return RunAsync(::conversation_lock_conversation); + } + + /// + /// Unlocks the conversation. + /// + /// An empty future. + std::future UnlockConversationAsync() + { + return RunAsync(::conversation_unlock_conversation); + } + + /// + /// Mutes all participants except for the host. This prevents others from generating + /// transcriptions, or sending text messages. + /// + /// An empty future. + std::future MuteAllParticipantsAsync() + { + return RunAsync(::conversation_mute_all_participants); + } + + /// + /// Allows other participants to generate transcriptions, or send text messages. + /// + /// An empty future. + std::future UnmuteAllParticipantsAsync() + { + return RunAsync(::conversation_unmute_all_participants); + } + + /// + /// Mutes a particular participant. This will prevent them generating new transcriptions, + /// or sending text messages. + /// + /// The identifier for the participant. + /// An empty future. + std::future MuteParticipantAsync(const SPXSTRING& participantId) + { + return RunAsync([participantId = Utils::ToUTF8(participantId)](auto handle) + { + return ::conversation_mute_participant(handle, participantId.c_str()); + }); + } + + /// + /// Unmutes a particular participant. + /// + /// The identifier for the participant. + /// An empty future. + std::future UnmuteParticipantAsync(const SPXSTRING& participantId) + { + return RunAsync([participantId = Utils::ToUTF8(participantId)](auto handle) + { + return ::conversation_unmute_participant(handle, participantId.c_str()); + }); + } + +private: + + /*! \cond PRIVATE */ + + SPXCONVERSATIONHANDLE m_hconversation; + + class PrivatePropertyCollection : public PropertyCollection + { + public: + PrivatePropertyCollection(SPXCONVERSATIONHANDLE hconv) : + PropertyCollection( + [=]() { + SPXPROPERTYBAGHANDLE hpropbag = SPXHANDLE_INVALID; + conversation_get_property_bag(hconv, &hpropbag); + return hpropbag; + }()) + { + } + }; + + PrivatePropertyCollection m_properties; + + inline std::future RunAsync(std::function func) + { + auto keepalive = this->shared_from_this(); + return std::async(std::launch::async, [keepalive, this, func]() + { + SPX_THROW_ON_FAIL(func(m_hconversation)); + }); + } + + /*! \endcond */ + +public: + /// + /// A collection of properties and their values defined for this . + /// + PropertyCollection& Properties; + +}; + +}}}} diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_conversation_transcriber.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_conversation_transcriber.h new file mode 100644 index 0000000..8d376a4 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_conversation_transcriber.h @@ -0,0 +1,509 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_conversation_transcriber.h: Public API declarations for ConversationTranscriber C++ class +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Transcription { + +class Session; + +/// +/// Class for ConversationTranscribers. +/// +class ConversationTranscriber final : public Recognizer +{ +public: + /// + /// Create a conversation transcriber from a speech config + /// + /// Speech configuration. + /// A smart pointer wrapped conversation transcriber pointer. + static std::shared_ptr FromConfig(std::shared_ptr speechconfig, std::nullptr_t) + { + SPXRECOHANDLE hreco; + SPX_THROW_ON_FAIL(::recognizer_create_conversation_transcriber_from_config( + &hreco, + Utils::HandleOrInvalid(speechconfig), + Utils::HandleOrInvalid(nullptr))); + return std::make_shared(hreco); + } + + /// + /// Create a conversation transcriber from a speech config and audio config. + /// + /// Speech configuration. + /// Audio configuration. + /// A smart pointer wrapped conversation transcriber pointer. + static std::shared_ptr FromConfig(std::shared_ptr speechconfig, std::shared_ptr audioInput = nullptr) + { + SPXRECOHANDLE hreco; + SPX_THROW_ON_FAIL(::recognizer_create_conversation_transcriber_from_config( + &hreco, + Utils::HandleOrInvalid(speechconfig), + Utils::HandleOrInvalid(audioInput))); + return std::make_shared(hreco); + } + + /// + /// Create a conversation transcriber from a speech config, auto detection source language config and audio config + /// + /// Speech configuration. + /// Auto detection source language config. + /// Audio configuration. + /// A smart pointer wrapped conversation trasncriber pointer. + static std::shared_ptr FromConfig( + std::shared_ptr speechconfig, + std::shared_ptr autoDetectSourceLangConfig, + std::shared_ptr audioInput = nullptr) + { + SPXRECOHANDLE hreco; + SPX_THROW_ON_FAIL(::recognizer_create_conversation_transcriber_from_auto_detect_source_lang_config( + &hreco, + Utils::HandleOrInvalid(speechconfig), + Utils::HandleOrInvalid(autoDetectSourceLangConfig), + Utils::HandleOrInvalid(audioInput))); + return std::make_shared(hreco); + } + + /// + /// Create a conversation transcriber from a speech config, source language config and audio config + /// + /// Speech configuration. + /// Source language config. + /// Audio configuration. + /// A smart pointer wrapped conversation transcriber pointer. + static std::shared_ptr FromConfig( + std::shared_ptr speechconfig, + std::shared_ptr sourceLanguageConfig, + std::shared_ptr audioInput = nullptr) + { + SPXRECOHANDLE hreco; + SPX_THROW_ON_FAIL(::recognizer_create_conversation_transcriber_from_source_lang_config( + &hreco, + Utils::HandleOrInvalid(speechconfig), + Utils::HandleOrInvalid(sourceLanguageConfig), + Utils::HandleOrInvalid(audioInput))); + return std::make_shared(hreco); + } + + /// + /// Create a conversation transcriber from a speech config, source language and audio config + /// + /// Speech configuration. + /// Source language. + /// Audio configuration. + /// A smart pointer wrapped conversation transcriber pointer. + static std::shared_ptr FromConfig( + std::shared_ptr speechconfig, + const SPXSTRING& sourceLanguage, + std::shared_ptr audioInput = nullptr) + { + return FromConfig(speechconfig, SourceLanguageConfig::FromLanguage(sourceLanguage), audioInput); + } + + /// + /// Asynchronously starts a conversation transcribing. + /// + /// An empty future. + std::future StartTranscribingAsync() + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this]() -> void { + SPX_INIT_HR(hr); + SPX_THROW_ON_FAIL(hr = recognizer_async_handle_release(m_hasyncStartContinuous)); // close any unfinished previous attempt + + SPX_EXITFN_ON_FAIL(hr = recognizer_start_continuous_recognition_async(m_hreco, &m_hasyncStartContinuous)); + SPX_EXITFN_ON_FAIL(hr = recognizer_start_continuous_recognition_async_wait_for(m_hasyncStartContinuous, UINT32_MAX)); + + SPX_EXITFN_CLEANUP: + auto releaseHr = recognizer_async_handle_release(m_hasyncStartContinuous); + SPX_REPORT_ON_FAIL(releaseHr); + m_hasyncStartContinuous = SPXHANDLE_INVALID; + + SPX_THROW_ON_FAIL(hr); + }); + + return future; + } + + /// + /// Asynchronously stops a conversation transcribing. + /// + /// An empty future. + std::future StopTranscribingAsync() + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this]() -> void { + SPX_INIT_HR(hr); + SPX_THROW_ON_FAIL(hr = recognizer_async_handle_release(m_hasyncStopContinuous)); // close any unfinished previous attempt + + SPX_EXITFN_ON_FAIL(hr = recognizer_stop_continuous_recognition_async(m_hreco, &m_hasyncStopContinuous)); + SPX_EXITFN_ON_FAIL(hr = recognizer_stop_continuous_recognition_async_wait_for(m_hasyncStopContinuous, UINT32_MAX)); + + SPX_EXITFN_CLEANUP: + auto releaseHr = recognizer_async_handle_release(m_hasyncStopContinuous); + SPX_REPORT_ON_FAIL(releaseHr); + m_hasyncStopContinuous = SPXHANDLE_INVALID; + + SPX_THROW_ON_FAIL(hr); + }); + + return future; + } + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// Recognizer handle. + explicit ConversationTranscriber(SPXRECOHANDLE hreco) throw() : + Recognizer(hreco), + SessionStarted(GetSessionEventConnectionsChangedCallback()), + SessionStopped(GetSessionEventConnectionsChangedCallback()), + SpeechStartDetected(GetRecognitionEventConnectionsChangedCallback()), + SpeechEndDetected(GetRecognitionEventConnectionsChangedCallback()), + Transcribing(GetRecoEventConnectionsChangedCallback()), + Transcribed(GetRecoEventConnectionsChangedCallback()), + Canceled(GetRecoCanceledEventConnectionsChangedCallback()), + m_hasyncStartContinuous(SPXHANDLE_INVALID), + m_hasyncStopContinuous(SPXHANDLE_INVALID), + m_properties(hreco), + Properties(m_properties) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + } + + /// + /// Destructor. + /// + ~ConversationTranscriber() + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + TermRecognizer(); + } + + /// + /// Signal for events indicating the start of a recognition session (operation). + /// + EventSignal SessionStarted; + + /// + /// Signal for events indicating the end of a recognition session (operation). + /// + EventSignal SessionStopped; + + /// + /// Signal for events indicating the start of speech. + /// + EventSignal SpeechStartDetected; + + /// + /// Signal for events indicating the end of speech. + /// + EventSignal SpeechEndDetected; + + /// + /// Signal for events containing intermediate recognition results. + /// + EventSignal Transcribing; + + /// + /// Signal for events containing final recognition results. + /// (indicating a successful recognition attempt). + /// + EventSignal Transcribed; + + /// + /// Signal for events containing canceled recognition results + /// (indicating a recognition attempt that was canceled as a result or a direct cancellation request + /// or, alternatively, a transport or protocol failure). + /// + EventSignal Canceled; + + /// + /// Sets the authorization token that will be used for connecting the server. + /// + /// The authorization token. + void SetAuthorizationToken(const SPXSTRING& token) + { + Properties.SetProperty(PropertyId::SpeechServiceAuthorization_Token, token); + } + + /// + /// Gets the authorization token. + /// + /// Authorization token + SPXSTRING GetAuthorizationToken() + { + return Properties.GetProperty(PropertyId::SpeechServiceAuthorization_Token, SPXSTRING()); + } + + protected: + /*! \cond PROTECTED */ + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// Recognizer handle. + virtual void TermRecognizer() override + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + + // Disconnect the event signals in reverse construction order + Canceled.DisconnectAll(); + Transcribed.DisconnectAll(); + Transcribing.DisconnectAll(); + SpeechEndDetected.DisconnectAll(); + SpeechStartDetected.DisconnectAll(); + SessionStopped.DisconnectAll(); + SessionStarted.DisconnectAll(); + + // Close the async handles we have open for Recognize, StartContinuous, and StopContinuous + for (auto handle : { &m_hasyncStartContinuous, &m_hasyncStopContinuous }) + { + if (*handle != SPXHANDLE_INVALID && ::recognizer_async_handle_is_valid(*handle)) + { + ::recognizer_async_handle_release(*handle); + *handle = SPXHANDLE_INVALID; + } + } + + // Ask the base to term + Recognizer::TermRecognizer(); + } + + void RecoEventConnectionsChanged(const EventSignal& recoEvent) + { + if (m_hreco != SPXHANDLE_INVALID) + { + SPX_DBG_TRACE_VERBOSE("%s: m_hreco=0x%8p", __FUNCTION__, (void*)m_hreco); + SPX_DBG_TRACE_VERBOSE_IF(!::recognizer_handle_is_valid(m_hreco), "%s: m_hreco is INVALID!!!", __FUNCTION__); + + if (&recoEvent == &Transcribing) + { + recognizer_recognizing_set_callback(m_hreco, Transcribing.IsConnected() ? FireEvent_Transcribing : nullptr, this); + } + else if (&recoEvent == &Transcribed) + { + recognizer_recognized_set_callback(m_hreco, Transcribed.IsConnected() ? FireEvent_Transcribed : nullptr, this); + } + } + } + + static void FireEvent_Transcribing(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hreco); + std::unique_ptr recoEvent{ new ConversationTranscriptionEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->Transcribing.Signal(*recoEvent.get()); + } + + static void FireEvent_Transcribed(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hreco); + std::unique_ptr recoEvent{ new ConversationTranscriptionEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->Transcribed.Signal(*recoEvent.get()); + } + + void RecoCanceledEventConnectionsChanged(const EventSignal& recoEvent) + { + if (m_hreco != SPXHANDLE_INVALID) + { + SPX_DBG_TRACE_VERBOSE("%s: m_hreco=0x%8p", __FUNCTION__, (void*)m_hreco); + SPX_DBG_TRACE_VERBOSE_IF(!::recognizer_handle_is_valid(m_hreco), "%s: m_hreco is INVALID!!!", __FUNCTION__); + + if (&recoEvent == &Canceled) + { + recognizer_canceled_set_callback(m_hreco, Canceled.IsConnected() ? FireEvent_Canceled : nullptr, this); + } + } + } + + static void FireEvent_Canceled(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hreco); + + auto ptr = new ConversationTranscriptionCanceledEventArgs(hevent); + std::shared_ptr recoEvent(ptr); + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->Canceled.Signal(*ptr); + } + + void SessionEventConnectionsChanged(const EventSignal& sessionEvent) + { + if (m_hreco != SPXHANDLE_INVALID) + { + SPX_DBG_TRACE_VERBOSE("%s: m_hreco=0x%8p", __FUNCTION__, (void*)m_hreco); + SPX_DBG_TRACE_VERBOSE_IF(!::recognizer_handle_is_valid(m_hreco), "%s: m_hreco is INVALID!!!", __FUNCTION__); + + if (&sessionEvent == &SessionStarted) + { + recognizer_session_started_set_callback(m_hreco, SessionStarted.IsConnected() ? FireEvent_SessionStarted : nullptr, this); + } + else if (&sessionEvent == &SessionStopped) + { + recognizer_session_stopped_set_callback(m_hreco, SessionStopped.IsConnected() ? FireEvent_SessionStopped : nullptr, this); + } + } + } + + static void FireEvent_SessionStarted(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hreco); + std::unique_ptr sessionEvent{ new SessionEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->SessionStarted.Signal(*sessionEvent.get()); + + // SessionEventArgs doesn't hold hevent, and thus can't release it properly ... release it here + SPX_DBG_ASSERT(recognizer_event_handle_is_valid(hevent)); + recognizer_event_handle_release(hevent); + } + + static void FireEvent_SessionStopped(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hreco); + std::unique_ptr sessionEvent{ new SessionEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->SessionStopped.Signal(*sessionEvent.get()); + + // SessionEventArgs doesn't hold hevent, and thus can't release it properly ... release it here + SPX_DBG_ASSERT(recognizer_event_handle_is_valid(hevent)); + recognizer_event_handle_release(hevent); + } + + void RecognitionEventConnectionsChanged(const EventSignal& recognitionEvent) + { + if (m_hreco != SPXHANDLE_INVALID) + { + SPX_DBG_TRACE_VERBOSE("%s: m_hreco=0x%8p", __FUNCTION__, (void*)m_hreco); + SPX_DBG_TRACE_VERBOSE_IF(!::recognizer_handle_is_valid(m_hreco), "%s: m_hreco is INVALID!!!", __FUNCTION__); + + if (&recognitionEvent == &SpeechStartDetected) + { + recognizer_speech_start_detected_set_callback(m_hreco, SpeechStartDetected.IsConnected() ? FireEvent_SpeechStartDetected : nullptr, this); + } + else if (&recognitionEvent == &SpeechEndDetected) + { + recognizer_speech_end_detected_set_callback(m_hreco, SpeechEndDetected.IsConnected() ? FireEvent_SpeechEndDetected : nullptr, this); + } + } + } + + static void FireEvent_SpeechStartDetected(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hreco); + std::unique_ptr recoEvent{ new RecognitionEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->SpeechStartDetected.Signal(*recoEvent.get()); + + // RecognitionEventArgs doesn't hold hevent, and thus can't release it properly ... release it here + SPX_DBG_ASSERT(recognizer_event_handle_is_valid(hevent)); + recognizer_event_handle_release(hevent); + } + + static void FireEvent_SpeechEndDetected(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hreco); + std::unique_ptr recoEvent{ new RecognitionEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->SpeechEndDetected.Signal(*recoEvent.get()); + + // RecognitionEventArgs doesn't hold hevent, and thus can't release it properly ... release it here + SPX_DBG_ASSERT(recognizer_event_handle_is_valid(hevent)); + recognizer_event_handle_release(hevent); + } + + /*! \endcond */ + +private: + + SPXASYNCHANDLE m_hasyncStartContinuous; + SPXASYNCHANDLE m_hasyncStopContinuous; + + DISABLE_DEFAULT_CTORS(ConversationTranscriber); + friend class Microsoft::CognitiveServices::Speech::Session; + + class PrivatePropertyCollection : public PropertyCollection + { + public: + PrivatePropertyCollection(SPXRECOHANDLE hreco) : + PropertyCollection( + [=]() { + SPXPROPERTYBAGHANDLE hpropbag = SPXHANDLE_INVALID; + recognizer_get_property_bag(hreco, &hpropbag); + return hpropbag; + }()) + { + } + }; + + PrivatePropertyCollection m_properties; + + inline std::function&)> GetSessionEventConnectionsChangedCallback() + { + return [=, this](const EventSignal& sessionEvent) { this->SessionEventConnectionsChanged(sessionEvent); }; + } + + inline std::function&)> GetRecoEventConnectionsChangedCallback() + { + return [=, this](const EventSignal& recoEvent) { this->RecoEventConnectionsChanged(recoEvent); }; + } + + inline std::function&)> GetRecoCanceledEventConnectionsChangedCallback() + { + return [=, this](const EventSignal& recoEvent) { this->RecoCanceledEventConnectionsChanged(recoEvent); }; + } + + inline std::function&)> GetRecognitionEventConnectionsChangedCallback() + { + return [=, this](const EventSignal& recoEvent) { this->RecognitionEventConnectionsChanged(recoEvent); }; + } + +public: + /// + /// A collection of properties and their values defined for this . + /// + PropertyCollection& Properties; +}; + + +} } } } // Microsoft::CognitiveServices::Speech::Transcription diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_conversation_transcription_eventargs.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_conversation_transcription_eventargs.h new file mode 100644 index 0000000..dd03343 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_conversation_transcription_eventargs.h @@ -0,0 +1,165 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_conversation_transcription_eventargs.h: Public API declarations for ConversationTranscriptionEventArgs C++ class +// + +#pragma once +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Transcription { + +/// +/// Class for conversation transcriber event arguments. +/// +class ConversationTranscriptionEventArgs : public RecognitionEventArgs +{ +private: + + SPXEVENTHANDLE m_hevent; + std::shared_ptr m_result; + +public: + + /// + /// Constructor. + /// + /// Event handle + explicit ConversationTranscriptionEventArgs(SPXEVENTHANDLE hevent) : + RecognitionEventArgs(hevent), + m_hevent(hevent), + m_result(std::make_shared(ResultHandleFromEventHandle(hevent))), + Result(m_result) + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)m_hevent); + }; + + /// + virtual ~ConversationTranscriptionEventArgs() + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)m_hevent); + SPX_THROW_ON_FAIL(recognizer_event_handle_release(m_hevent)); + }; + +#if defined(BINDING_OBJECTIVE_C) +private: +#endif + /// + /// Conversation transcriber result. + /// + std::shared_ptr Result; + +#if defined(BINDING_OBJECTIVE_C) +public: +#else +protected: +#endif + + /*! \cond PROTECTED */ + + /// + /// Conversation transcriber result. + /// + std::shared_ptr GetResult() const { return m_result; } + + /*! \endcond */ + +private: + + DISABLE_DEFAULT_CTORS(ConversationTranscriptionEventArgs); + + SPXRESULTHANDLE ResultHandleFromEventHandle(SPXEVENTHANDLE hevent) + { + SPXRESULTHANDLE hresult = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(recognizer_recognition_event_get_result(hevent, &hresult)); + return hresult; + } +}; + + +/// +/// Class for conversation transcriber canceled event arguments. +/// +class ConversationTranscriptionCanceledEventArgs : public ConversationTranscriptionEventArgs +{ +private: + + std::shared_ptr m_cancellation; + CancellationReason m_cancellationReason; + CancellationErrorCode m_errorCode; + +public: + + /// + /// Constructor. + /// + /// Event handle + explicit ConversationTranscriptionCanceledEventArgs(SPXEVENTHANDLE hevent) : + ConversationTranscriptionEventArgs(hevent), + m_cancellation(CancellationDetails::FromResult(GetResult())), + m_cancellationReason(m_cancellation->Reason), + m_errorCode(m_cancellation->ErrorCode), + Reason(m_cancellationReason), + ErrorCode(m_errorCode), + ErrorDetails(m_cancellation->ErrorDetails) + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p)", __FUNCTION__, (void*)this); + }; + + /// + virtual ~ConversationTranscriptionCanceledEventArgs() + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p)", __FUNCTION__, (void*)this); + }; + +#if defined(BINDING_OBJECTIVE_C) +private: +#endif + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunused-private-field" +#endif + /// + /// The reason the result was canceled. + /// + const CancellationReason& Reason; + + /// + /// The error code in case of an unsuccessful recognition ( is set to Error). + /// If Reason is not Error, ErrorCode is set to NoError. + /// + const CancellationErrorCode& ErrorCode; + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + + /// + /// The error message in case of an unsuccessful recognition ( is set to Error). + /// + const SPXSTRING ErrorDetails; + +#if defined(BINDING_OBJECTIVE_C) +public: +#else +private: +#endif + /// + /// CancellationDetails. + /// + std::shared_ptr GetCancellationDetails() const { return m_cancellation; } + +private: + + DISABLE_DEFAULT_CTORS(ConversationTranscriptionCanceledEventArgs); +}; +} } } } // Microsoft::CognitiveServices::Speech::Transcription diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_conversation_transcription_result.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_conversation_transcription_result.h new file mode 100644 index 0000000..c655c8f --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_conversation_transcription_result.h @@ -0,0 +1,72 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_conversation_transcription_result.h: Public API declarations for ConversationTranscription C++ class +// + +#pragma once +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Transcription { + +/// +/// Represents the result of a conversation transcriber. +/// +class ConversationTranscriptionResult final : public RecognitionResult +{ +public: + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// Result handle. + explicit ConversationTranscriptionResult(SPXRESULTHANDLE hresult) : + RecognitionResult(hresult), + SpeakerId(m_speakerId) + { + PopulateSpeakerFields(hresult, &m_speakerId); + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p) -- resultid=%s; reason=0x%x; text=%s, speakerid=%s, utteranceid=%s", __FUNCTION__, (void*)this, (void*)Handle, Utils::ToUTF8(ResultId).c_str(), Reason, Utils::ToUTF8(Text).c_str(), Utils::ToUTF8(SpeakerId).c_str()); + } + + /// + /// Destructor. + /// + ~ConversationTranscriptionResult() + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)Handle); + } + + /// + /// Unique Speaker id. + /// + const SPXSTRING& SpeakerId; + +private: + DISABLE_DEFAULT_CTORS(ConversationTranscriptionResult); + + void PopulateSpeakerFields(SPXRESULTHANDLE hresult, SPXSTRING* pspeakerId) + { + SPX_INIT_HR(hr); + + const size_t maxCharCount = 1024; + char sz[maxCharCount + 1] = {}; + + if (pspeakerId != nullptr && recognizer_result_handle_is_valid(hresult)) + { + SPX_THROW_ON_FAIL(hr = conversation_transcription_result_get_speaker_id(hresult, sz, maxCharCount)); + *pspeakerId = Utils::ToSPXString(sz); + } + } + + SPXSTRING m_speakerId; +}; + +} } } } // Microsoft::CognitiveServices::Speech::Transcription diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_conversation_translator.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_conversation_translator.h new file mode 100644 index 0000000..d23b53b --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_conversation_translator.h @@ -0,0 +1,448 @@ + +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_conversation_translator.h: Public API declarations for ConversationTranslator C++ class +// + +#pragma once + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +// Forward decl: facilitates friend use of Connection::FromConversationTranslator +class Connection; + +namespace Transcription { + + /// + /// A conversation translator that enables a connected experience where participants can use their + /// own devices to see everyone else's recognitions and IMs in their own languages. Participants + /// can also speak and send IMs to others. + /// Added in 1.9.0 + /// + class ConversationTranslator : public std::enable_shared_from_this + { + private: + /*! \cond PRIVATE */ + class PrivatePropertyCollection : public PropertyCollection + { + public: + PrivatePropertyCollection(SPXCONVERSATIONHANDLE hconvtrans) : + PropertyCollection([hconvtrans]() + { + SPXPROPERTYBAGHANDLE hpropbag = SPXHANDLE_INVALID; + conversation_translator_get_property_bag(hconvtrans, &hpropbag); + return hpropbag; + }()) + {} + }; + + SPXCONVERSATIONTRANSLATORHANDLE m_handle; + PrivatePropertyCollection m_properties; + /*! \endcond */ + + public: + /// + /// Creates a conversation translator from an audio config + /// + /// Audio configuration. + /// Smart pointer to conversation translator instance. + static std::shared_ptr FromConfig(std::shared_ptr audioConfig = nullptr) + { + SPXCONVERSATIONTRANSLATORHANDLE handle; + SPX_THROW_ON_FAIL(::conversation_translator_create_from_config( + &handle, + Utils::HandleOrInvalid(audioConfig) + )); + return std::shared_ptr(new ConversationTranslator(handle)); + } + + /// + /// Destructor + /// + virtual ~ConversationTranslator() + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + + // disconnect callbacks in reverse order + TextMessageReceived.DisconnectAll(); + Transcribed.DisconnectAll(); + Transcribing.DisconnectAll(); + ConversationExpiration.DisconnectAll(); + ParticipantsChanged.DisconnectAll(); + Canceled.DisconnectAll(); + SessionStopped.DisconnectAll(); + SessionStarted.DisconnectAll(); + + ::conversation_translator_handle_release(m_handle); + m_handle = SPXHANDLE_INVALID; + } + + /// + /// Signal for events indicating the start of a transcription session (operation). + /// + EventSignal SessionStarted; + + /// + /// Signal for events indicating the end of a transcription session (operation). + /// + EventSignal SessionStopped; + + /// + /// Signal for events containing canceled recognition results + /// (indicating a recognition attempt that was canceled as a result or a direct cancellation request + /// or, alternatively, a transport or protocol failure). + /// + EventSignal Canceled; + + /// + /// Signal for events indicating the conversation participants have changed. + /// + EventSignal ParticipantsChanged; + + /// + /// Signal for event indicating how many minutes are left until a conversation expires. + /// + EventSignal ConversationExpiration; + + /// + /// Signal for events containing intermediate translated conversation transcription results. + /// + EventSignal Transcribing; + + /// + /// Signal for events containing final translated conversation transcription results. + /// (indicating a successful recognition attempt). + /// + EventSignal Transcribed; + + /// + /// Raised when a text message is received from the conversation. + /// + EventSignal TextMessageReceived; + + /// + /// Joins a conversation. After you call this, you will start receiving events. + /// + /// The conversation instance to use. This instance can be used by the + /// host to manage the conversation. + /// The display name to use for the current participant in the conversation. + /// An asynchronous operation. + std::future JoinConversationAsync(std::shared_ptr conversation, const SPXSTRING& nickname) + { + return RunAsync([conversation, nickname](auto handle) + { + return ::conversation_translator_join( + handle, + Utils::HandleOrInvalid(conversation), + Utils::ToUTF8(nickname).c_str()); + }); + } + + /// + /// Joins a conversation. After you call this, you will start receiving events. + /// + /// The identifier of the conversation you want to join. + /// The display name of the current participant in the conversation. + /// The language the participant is using. + /// An asynchronous operation. + std::future JoinConversationAsync(const SPXSTRING& conversationId, const SPXSTRING& nickname, const SPXSTRING& language) + { + return RunAsync([conversationId, nickname, language](auto handle) + { + return ::conversation_translator_join_with_id( + handle, + Utils::ToUTF8(conversationId).c_str(), + Utils::ToUTF8(nickname).c_str(), + Utils::ToUTF8(language).c_str()); + }); + } + + /// + /// Starts sending audio to the conversation service for speech recognition. + /// + /// An asynchronous operation. + std::future StartTranscribingAsync() + { + return RunAsync(::conversation_translator_start_transcribing); + } + + /// + /// Stops sending audio to the conversation service. + /// + /// An asynchronous operation. + std::future StopTranscribingAsync() + { + return RunAsync(::conversation_translator_stop_transcribing); + } + + /// + /// Sends an instant message to all participants in the conversation. This instant message + /// will be translated into each participant's text language. + /// + /// The message to send. + /// An asynchronous operation. + std::future SendTextMessageAsync(const SPXSTRING& message) + { + return RunAsync([message](auto handle) + { + return ::conversation_translator_send_text_message( + handle, + Utils::ToUTF8(message).c_str()); + }); + } + + /// + /// Leaves the current conversation. After this is called, you will no longer receive any events. + /// + /// An asynchronous operation. + std::future LeaveConversationAsync() + { + return RunAsync(::conversation_translator_leave); + } + + /// + /// Sets the Cognitive Speech authorization token that will be used for connecting to the server. + /// + /// The authorization token. + /// The Azure region for this token. + void SetAuthorizationToken(const SPXSTRING& authToken, const SPXSTRING& region) + { + SPX_THROW_ON_FAIL(::conversation_translator_set_authorization_token( + m_handle, + Utils::ToUTF8(authToken).c_str(), + Utils::ToUTF8(region).c_str())); + } + + /// + /// Gets the authorization token. + /// + /// Authorization token + SPXSTRING GetAuthorizationToken() + { + return m_properties.GetProperty(PropertyId::SpeechServiceAuthorization_Token); + } + + /// + /// Gets your participant identifier + /// + /// Participant ID + SPXSTRING GetParticipantId() + { + return m_properties.GetProperty(PropertyId::Conversation_ParticipantId); + } + + /// + /// A collection of properties and their values defined for this . + /// + PropertyCollection& Properties; + + protected: + explicit ConversationTranslator(SPXCONVERSATIONTRANSLATORHANDLE handle) : + m_handle(handle), + m_properties(handle), + SessionStarted(BindHandler(&ConversationTranslator::OnSessionEventChanged)), + SessionStopped(BindHandler(&ConversationTranslator::OnSessionEventChanged)), + Canceled(BindHandler(&ConversationTranslator::OnCanceledEventChanged)), + ParticipantsChanged(BindHandler(&ConversationTranslator::OnParticipantsEventChanged)), + ConversationExpiration(BindHandler(&ConversationTranslator::OnExpirationEventChanged)), + Transcribing(BindHandler(&ConversationTranslator::OnTranscriptionEventChanged)), + Transcribed(BindHandler(&ConversationTranslator::OnTranscriptionEventChanged)), + TextMessageReceived(BindHandler(&ConversationTranslator::OnTextMessageEventChanged)), + Properties(m_properties) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + } + + static inline bool ValidateHandle(SPXCONVERSATIONTRANSLATORHANDLE handle, const char* function) + { + UNUSED(function); // not used in release builds + SPX_DBG_TRACE_VERBOSE("%s: handle=0x%8p", function, (void*)handle); + bool valid = ::conversation_translator_handle_is_valid(handle); + SPX_DBG_TRACE_VERBOSE_IF(!valid, "%s: handle is INVALID!!!", function); + return valid; + } + + void OnSessionEventChanged(const EventSignal& evt) + { + if (!ValidateHandle(m_handle, __FUNCTION__)) return; + + PCONV_TRANS_CALLBACK callback = nullptr; + + if (&evt == &SessionStarted) + { + if (SessionStarted.IsConnected()) + { + callback = [](auto, auto b, auto c) { FireEvent(b, c, &ConversationTranslator::SessionStarted); }; + } + + conversation_translator_session_started_set_callback(m_handle, callback, this); + } + else if (&evt == &SessionStopped) + { + if (SessionStopped.IsConnected()) + { + callback = [](auto, auto b, auto c) { FireEvent(b, c, &ConversationTranslator::SessionStopped); }; + } + + conversation_translator_session_stopped_set_callback(m_handle, callback, this); + } + } + + void OnCanceledEventChanged(const EventSignal&) + { + if (!ValidateHandle(m_handle, __FUNCTION__)) return; + + PCONV_TRANS_CALLBACK callback = nullptr; + if (Canceled.IsConnected()) + { + callback = [](auto, auto b, auto c) { FireEvent(b, c, &ConversationTranslator::Canceled); }; + } + + conversation_translator_canceled_set_callback(m_handle, callback, this); + } + + void OnParticipantsEventChanged(const EventSignal&) + { + if (!ValidateHandle(m_handle, __FUNCTION__)) return; + + PCONV_TRANS_CALLBACK callback = nullptr; + if (ParticipantsChanged.IsConnected()) + { + callback = [](auto, auto b, auto c) { FireEvent(b, c, &ConversationTranslator::ParticipantsChanged); }; + } + + conversation_translator_participants_changed_set_callback(m_handle, callback, this); + } + + void OnExpirationEventChanged(const EventSignal&) + { + if (!ValidateHandle(m_handle, __FUNCTION__)) return; + + PCONV_TRANS_CALLBACK callback = nullptr; + if (ConversationExpiration.IsConnected()) + { + callback = [](auto, auto b, auto c) { FireEvent(b, c, &ConversationTranslator::ConversationExpiration); }; + } + + conversation_translator_conversation_expiration_set_callback(m_handle, callback, this); + } + + void OnTranscriptionEventChanged(const EventSignal& evt) + { + if (!ValidateHandle(m_handle, __FUNCTION__)) return; + + PCONV_TRANS_CALLBACK callback = nullptr; + if (&evt == &Transcribing) + { + if (Transcribing.IsConnected()) + { + callback = [](auto, auto b, auto c) { FireEvent(b, c, &ConversationTranslator::Transcribing); }; + } + + conversation_translator_transcribing_set_callback(m_handle, callback, this); + } + else + { + if (Transcribed.IsConnected()) + { + callback = [](auto, auto b, auto c) { FireEvent(b, c, &ConversationTranslator::Transcribed); }; + } + + conversation_translator_transcribed_set_callback(m_handle, callback, this); + } + } + + void OnTextMessageEventChanged(const EventSignal&) + { + if (!ValidateHandle(m_handle, __FUNCTION__)) return; + + PCONV_TRANS_CALLBACK callback = nullptr; + if (TextMessageReceived.IsConnected()) + { + callback = [](auto, auto b, auto c) { FireEvent(b, c, &ConversationTranslator::TextMessageReceived); }; + } + + conversation_translator_text_message_recevied_set_callback(m_handle, callback, this); + } + + private: + /*! \cond PRIVATE */ + + friend class Microsoft::CognitiveServices::Speech::Connection; + + DISABLE_DEFAULT_CTORS(ConversationTranslator); + + inline std::future RunAsync(std::function func) + { + auto keepalive = this->shared_from_this(); + return std::async(std::launch::async, [keepalive, this, func]() + { + SPX_THROW_ON_FAIL(func(m_handle)); + }); + } + + template + inline std::function BindHandler(void (ConversationTranslator::*func)(TArg)) + { + return [this, func](TArg arg) + { + (this->*func)(arg); + }; + } + + static inline void FreeEventHandle(SPXEVENTHANDLE hEvt) + { + if (::conversation_translator_event_handle_is_valid(hEvt)) + { + ::conversation_translator_event_handle_release(hEvt); + } + } + + template + static inline void FireEvent(SPXEVENTHANDLE hEvt, void* pCtxt, EventSignal ConversationTranslator::*pEvent) + { + try + { + auto pThis = static_cast(pCtxt); + SPX_DBG_ASSERT(pThis != nullptr); + auto keepAlive = pThis->shared_from_this(); + + T eventArgs(hEvt); + (pThis->*pEvent).Signal(eventArgs); + + // event classes don't properly release the handles so do that here + FreeEventHandle(hEvt); + } + catch (std::exception& ex) + { + UNUSED(ex); + FreeEventHandle(hEvt); + throw; + } + catch (...) + { + FreeEventHandle(hEvt); + throw; + } + } + + /*! \endcond */ + }; + +}}}} diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_conversation_translator_events.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_conversation_translator_events.h new file mode 100644 index 0000000..0bc817c --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_conversation_translator_events.h @@ -0,0 +1,262 @@ + +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_conversation_translator_events.h: Public C++ class API declarations for ConversationTranslator related events +// + +#pragma once + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Transcription { + + /// + /// Helper class with additional methods + /// Added in 1.9.0 + /// + class EventHelper + { + protected: + template + static TVal GetValue(THandle hevent, SPXHR(SPXAPI_CALLTYPE * func)(THandle hevent, TVal* ptr)) + { + TVal value; + SPX_THROW_ON_FAIL(func(hevent, &value)); + return value; + } + + template + static SPXSTRING GetStringValue(THandle hevent, SPXHR(SPXAPI_CALLTYPE * func)(THandle hevent, char * psz, uint32_t cch)) + { + const uint32_t maxCharCount = 1024; + char sz[maxCharCount + 1]; + SPX_THROW_ON_FAIL(func(hevent, sz, maxCharCount)); + return Utils::ToSPXString(sz); + } + + template + static SPXSTRING GetStringValue(THandle hevent, SPXHR(SPXAPI_CALLTYPE* func)(THandle hevent, char* psz, uint32_t* pcch)) + { + // query the string length + uint32_t length = 0; + SPX_THROW_ON_FAIL(func(hevent, nullptr, &length)); + + // retrieve the string + std::unique_ptr buffer(new char[length]); + SPX_THROW_ON_FAIL(func(hevent, buffer.get(), &length)); + return Utils::ToSPXString(buffer.get()); + } + }; + + /// + /// Represents the result of a conversation translator recognition, or text message. + /// Added in 1.9.0 + /// + class ConversationTranslationResult : public Translation::TranslationRecognitionResult, public EventHelper + { + private: + SPXSTRING m_participantId; + SPXSTRING m_originalLang; + + public: + explicit ConversationTranslationResult(SPXRESULTHANDLE resultHandle) : + Translation::TranslationRecognitionResult(resultHandle), + m_participantId(GetStringValue(resultHandle, conversation_translator_result_get_user_id)), + m_originalLang(GetStringValue(resultHandle, conversation_translator_result_get_original_lang)), + ParticipantId(m_participantId), + OriginalLanguage(m_originalLang) + { + } + + /// + /// The unique participant identifier + /// + const SPXSTRING& ParticipantId; + + /// + /// Gets the language that the original recognition or text message is in + /// + const SPXSTRING& OriginalLanguage; + + private: + DISABLE_COPY_AND_MOVE(ConversationTranslationResult); + }; + + /// + /// Event arguments for the ConversationExpiration event. + /// Added in 1.9.0 + /// + class ConversationExpirationEventArgs : public SessionEventArgs, public EventHelper + { + private: + std::chrono::minutes m_expirationTime; + + public: + /// + /// Creates a new instance. + /// + /// The event handle. + explicit ConversationExpirationEventArgs(SPXEVENTHANDLE hevent) : + SessionEventArgs(hevent), + m_expirationTime(std::chrono::minutes(GetValue(hevent, conversation_translator_event_get_expiration_time))), + ExpirationTime(m_expirationTime) + { + } + + /// + /// How many minutes are left until the conversation expires + /// + const std::chrono::minutes& ExpirationTime; + + private: + DISABLE_COPY_AND_MOVE(ConversationExpirationEventArgs); + }; + + /// + /// Event arguments for the ParticipantsChanged event. + /// Added in 1.9.0 + /// + class ConversationParticipantsChangedEventArgs : public SessionEventArgs, public EventHelper + { + private: + ParticipantChangedReason m_reason; + std::vector> m_participants; + + public: + /// + /// Creates a new instance. + /// + /// The event handle. + explicit ConversationParticipantsChangedEventArgs(SPXEVENTHANDLE hevent) : + SessionEventArgs(hevent), + m_reason(GetValue(hevent, conversation_translator_event_get_participant_changed_reason)), + m_participants(GetParticipants(hevent)), + Reason(m_reason), + Participants(m_participants) + { + } + + /// + /// Why the participant changed event was raised (e.g. a participant joined) + /// + const ParticipantChangedReason& Reason; + + /// + /// The participant(s) that joined, left, or were updated + /// + const std::vector>& Participants; + + protected: + /*! \cond PROTECTED */ + + std::vector> GetParticipants(SPXEVENTHANDLE hevent) + { + std::vector> list; + + SPXPARTICIPANTHANDLE hparticipant = nullptr; + for (int i = 0; hparticipant != SPXHANDLE_INVALID; i++) + { + SPX_THROW_ON_FAIL(conversation_translator_event_get_participant_changed_at_index(hevent, i, &hparticipant)); + if (hparticipant != SPXHANDLE_INVALID) + { + list.push_back(std::make_shared(hparticipant)); + + // the Participant object correctly frees the handle so we don't need to do anything + // special here + } + } + + return list; + } + + /*! \endcond */ + + private: + DISABLE_COPY_AND_MOVE(ConversationParticipantsChangedEventArgs); + }; + + /// + /// Event arguments for the ConversationTranslator , + /// , or + /// events. + /// Added in 1.9.0 + /// + class ConversationTranslationEventArgs : public RecognitionEventArgs, public EventHelper + { + private: + std::shared_ptr m_result; + + public: + /// + /// Creates a new instance. + /// + /// The event handle returned by the C-API. + explicit ConversationTranslationEventArgs(SPXEVENTHANDLE hevent) + : RecognitionEventArgs(hevent), + m_result(std::make_shared(GetValue(hevent, recognizer_recognition_event_get_result))), + Result(m_result) + { + } + +#if defined(BINDING_OBJECTIVE_C) + private: +#endif + /// + /// Contains the conversation translation result. This could be for a canceled event, + /// a speech recognition, or a received text message. + /// + std::shared_ptr Result; + +#if defined(BINDING_OBJECTIVE_C) + public: +#else + protected: +#endif + + /*! \cond PROTECTED */ + + /// + /// Contains the conversation translation result. This could be for a canceled event, + /// a speech recognition, or a received text message. + /// + std::shared_ptr GetResult() const { return m_result; } + + /*! \endcond */ + + private: + DISABLE_COPY_AND_MOVE(ConversationTranslationEventArgs); + }; + + + /// + /// Event arguments for the conversation translator canceled event. + /// Added in 1.9.0 + /// + class ConversationTranslationCanceledEventArgs : public ConversationTranscriptionCanceledEventArgs + { + public: + /// + /// Creates a new instance. + /// + /// The event handle. + explicit ConversationTranslationCanceledEventArgs(SPXEVENTHANDLE hevent) : + ConversationTranscriptionCanceledEventArgs(hevent) + { } + }; + +}}}} diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_conversational_language_understanding_model.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_conversational_language_understanding_model.h new file mode 100644 index 0000000..1a2eb54 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_conversational_language_understanding_model.h @@ -0,0 +1,89 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license201809 for the full license information. +// +// speechapi_cxx_conversational_language_understanding_model.h: Public API declarations for PatternMatchingModel C++ class +// + +#pragma once +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Intent { + + /// + /// Represents a Conversational Language Understanding used for intent recognition. + /// + class ConversationalLanguageUnderstandingModel : public LanguageUnderstandingModel + { + public: + + /// + /// Creates a Conversational Language Understanding (CLU) model using the specified model ID. + /// + /// The Azure Language resource key. + /// The Azure Language resource endpoint. + /// The Conversational Language Understanding project name. + /// The Conversational Language Understanding deployment name. + /// A shared pointer to the Conversational Language Understanding model. + static std::shared_ptr FromResource(const SPXSTRING& languageResourceKey, const SPXSTRING& endpoint, const SPXSTRING& projectName, const SPXSTRING& deploymentName) + { + return std::shared_ptr { + new ConversationalLanguageUnderstandingModel(languageResourceKey, endpoint, projectName, deploymentName) + }; + } + + /// + /// Returns id for this model. Defaults to projectName-deploymentName. + /// + /// A string representing the id of this model. + SPXSTRING GetModelId() const { return m_modelId; } + + /// + /// Sets the id for this model. Defaults to projectName-deploymentName. + /// + /// A string representing the id of this model. + void SetModelId(SPXSTRING value) { m_modelId = value; } + + /// + /// This is the Azure language resource key to be used with this model. + /// + SPXSTRING languageResourceKey; + + /// + /// Conversational Language Understanding deployment endpoint to contact. + /// + SPXSTRING endpoint; + + /// + /// Conversational Language Understanding project name. + /// + SPXSTRING projectName; + + /// + /// Conversational Language Understanding deployment name. + /// + SPXSTRING deploymentName; + + private: + DISABLE_COPY_AND_MOVE(ConversationalLanguageUnderstandingModel); + + ConversationalLanguageUnderstandingModel(const SPXSTRING& languageResourceKey, const SPXSTRING& endpoint, const SPXSTRING& projectName, const SPXSTRING& deploymentName) : + LanguageUnderstandingModel(LanguageUnderstandingModelType::ConversationalLanguageUnderstandingModel), + languageResourceKey(languageResourceKey), + endpoint(endpoint), + projectName(projectName), + deploymentName(deploymentName) + { + m_modelId = projectName + "-" + deploymentName; + } + + SPXSTRING m_modelId; +}; + +} } } } // Microsoft::CognitiveServices::Speech::Intent diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_dialog_service_config.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_dialog_service_config.h new file mode 100644 index 0000000..641daaf --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_dialog_service_config.h @@ -0,0 +1,268 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +#pragma once + +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Dialog { + +/// +/// Class that defines base configurations for the dialog service connector object. +/// +class DialogServiceConfig +{ +protected: + /*! \cond PROTECTED */ + inline explicit DialogServiceConfig(SPXSPEECHCONFIGHANDLE h_config) : m_config{ h_config } + { + } + SpeechConfig m_config; + /*! \endcond */ + +public: + /// + /// Default destructor. + /// + virtual ~DialogServiceConfig() = default; + + /// + /// Internal operator used to get underlying handle value. + /// + /// A handle. + explicit operator SPXSPEECHCONFIGHANDLE() const { return static_cast(m_config); } + + /// + /// Sets a property value by name. + /// + /// The property name. + /// The property value. + void SetProperty(const SPXSTRING& name, const SPXSTRING& value) + { + m_config.SetProperty(name, value); + } + + /// + /// Sets a property value by ID. + /// + /// The property id. + /// The property value. + void SetProperty(PropertyId id, const SPXSTRING& value) + { + m_config.SetProperty(id, value); + } + + /// + /// Gets a property value by name. + /// + /// The parameter name. + /// The property value. + SPXSTRING GetProperty(const SPXSTRING& name) const + { + return m_config.GetProperty(name); + } + + /// + /// Gets a property value by ID. + /// + /// The parameter id. + /// The property value. + SPXSTRING GetProperty(PropertyId id) const + { + return m_config.GetProperty(id); + } + + /// + /// Sets a property value that will be passed to service using the specified channel. + /// + /// The property name. + /// The property value. + /// The channel used to pass the specified property to service. + void SetServiceProperty(const SPXSTRING& name, const SPXSTRING& value, ServicePropertyChannel channel) + { + m_config.SetServiceProperty(name, value, channel); + } + + + /// + /// Sets proxy configuration + /// + /// Note: Proxy functionality is not available on macOS. This function will have no effect on this platform. + /// + /// The host name of the proxy server, without the protocol scheme (`http://`) + /// The port number of the proxy server + /// The user name of the proxy server + /// The password of the proxy server + void SetProxy(const SPXSTRING& proxyHostName, uint32_t proxyPort, const SPXSTRING& proxyUserName = SPXSTRING(), const SPXSTRING& proxyPassword = SPXSTRING()) + { + m_config.SetProxy(proxyHostName, proxyPort, proxyUserName, proxyPassword); + } + + /// + /// Set the input language to the connector. + /// + /// Specifies the name of spoken language to be recognized in BCP-47 format. + void SetLanguage(const SPXSTRING& lang) + { + SetProperty(PropertyId::SpeechServiceConnection_RecoLanguage, lang); + } + + /// + /// Gets the input language to the connector. + /// The language is specified in BCP-47 format. + /// + /// The connetor language. + SPXSTRING GetLanguage() const + { + return GetProperty(PropertyId::SpeechServiceConnection_RecoLanguage); + } + +}; + +/// +/// Class that defines configurations for the dialog service connector object for using a Bot Framework backend. +/// +class BotFrameworkConfig final : public DialogServiceConfig +{ +public: + /// + /// Creates a bot framework service config instance with the specified subscription key and region. + /// + /// Subscription key associated with the bot + /// The region name (see the region page). + /// A shared pointer to the new bot framework config. + inline static std::shared_ptr FromSubscription(const SPXSTRING& subscription, const SPXSTRING& region) + { + SPXSPEECHCONFIGHANDLE h_config = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(bot_framework_config_from_subscription(&h_config, Utils::ToUTF8(subscription).c_str(), Utils::ToUTF8(region).c_str(), nullptr)); + return std::shared_ptr{ new BotFrameworkConfig(h_config) }; + } + + /// + /// Creates a bot framework service config instance with the specified subscription key and region. + /// + /// Subscription key associated with the bot + /// The region name (see the region page). + /// Identifier used to select a bot associated with this subscription. + /// A shared pointer to the new bot framework config. + inline static std::shared_ptr FromSubscription(const SPXSTRING& subscription, const SPXSTRING& region, const SPXSTRING& bot_Id) + { + SPXSPEECHCONFIGHANDLE h_config = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(bot_framework_config_from_subscription(&h_config, Utils::ToUTF8(subscription).c_str(), Utils::ToUTF8(region).c_str(), Utils::ToUTF8(bot_Id).c_str())); + return std::shared_ptr{ new BotFrameworkConfig(h_config) }; + } + + /// + /// Creates a bot framework service config instance with the specified authorization token and region. + /// Note: The caller needs to ensure that the authorization token is valid. Before the authorization token + /// expires, the caller needs to refresh it by calling this setter with a new valid token. + /// As configuration values are copied when creating a new connector, the new token value will not apply to connectors that have already been created. + /// For connectors that have been created before, you need to set authorization token of the corresponding connector + /// to refresh the token. Otherwise, the connectors will encounter errors during operation. + /// + /// The authorization token. + /// The region name (see the region page). + /// A shared pointer to the new bot framework config. + inline static std::shared_ptr FromAuthorizationToken(const SPXSTRING& authToken, const SPXSTRING& region) + { + SPXSPEECHCONFIGHANDLE h_config = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(bot_framework_config_from_authorization_token(&h_config, Utils::ToUTF8(authToken).c_str(), Utils::ToUTF8(region).c_str(), nullptr)); + return std::shared_ptr{ new BotFrameworkConfig(h_config) }; + } + + /// + /// Creates a bot framework service config instance with the specified authorization token and region. + /// Note: The caller needs to ensure that the authorization token is valid. Before the authorization token + /// expires, the caller needs to refresh it by calling this setter with a new valid token. + /// As configuration values are copied when creating a new connector, the new token value will not apply to connectors that have already been created. + /// For connectors that have been created before, you need to set authorization token of the corresponding connector + /// to refresh the token. Otherwise, the connectors will encounter errors during operation. + /// + /// The authorization token. + /// The region name (see the region page). + /// Identifier used to select a bot associated with this subscription. + /// A shared pointer to the new bot framework config. + inline static std::shared_ptr FromAuthorizationToken(const SPXSTRING& authToken, const SPXSTRING& region, const SPXSTRING& bot_Id) + { + SPXSPEECHCONFIGHANDLE h_config = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(bot_framework_config_from_authorization_token(&h_config, Utils::ToUTF8(authToken).c_str(), Utils::ToUTF8(region).c_str(), Utils::ToUTF8(bot_Id).c_str())); + return std::shared_ptr{ new BotFrameworkConfig(h_config) }; + } +private: + inline explicit BotFrameworkConfig(SPXSPEECHCONFIGHANDLE h_config): DialogServiceConfig{ h_config } + { + } +}; + +/// +/// Class that defines configurations for the dialog service connector object for using a CustomCommands backend. +/// +class CustomCommandsConfig: public DialogServiceConfig +{ +public: + /// + /// Creates a Custom Commands config instance with the specified application id, subscription key and region. + /// + /// Custom Commands application id. + /// Subscription key associated with the bot + /// The region name (see the region page). + /// A shared pointer to the new bot framework config. + inline static std::shared_ptr FromSubscription(const SPXSTRING& appId, const SPXSTRING& subscription, const SPXSTRING& region) + { + SPXSPEECHCONFIGHANDLE h_config = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(custom_commands_config_from_subscription(&h_config, Utils::ToUTF8(appId).c_str(), Utils::ToUTF8(subscription).c_str(), Utils::ToUTF8(region).c_str())); + return std::shared_ptr{ new CustomCommandsConfig(h_config) }; + } + + /// + /// Creates a Custom Commands config instance with the specified application id authorization token and region. + /// Note: The caller needs to ensure that the authorization token is valid. Before the authorization token + /// expires, the caller needs to refresh it by calling this setter with a new valid token. + /// As configuration values are copied when creating a new connector, the new token value will not apply to connectors that have already been created. + /// For connectors that have been created before, you need to set authorization token of the corresponding connector + /// to refresh the token. Otherwise, the connectors will encounter errors during operation. + /// + /// Custom Commands application id. + /// The authorization token. + /// The region name (see the region page). + /// A shared pointer to the new bot framework config. + inline static std::shared_ptr FromAuthorizationToken(const SPXSTRING& appId, const SPXSTRING& authToken, const SPXSTRING& region) + { + SPXSPEECHCONFIGHANDLE h_config = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(custom_commands_config_from_authorization_token(&h_config, Utils::ToUTF8(appId).c_str(), Utils::ToUTF8(authToken).c_str(), Utils::ToUTF8(region).c_str())); + return std::shared_ptr{ new CustomCommandsConfig(h_config) }; + } + + /// + /// Sets the corresponding backend application identifier. + /// + /// Application identifier. + inline void SetApplicationId(const SPXSTRING& applicationId) + { + SetProperty(PropertyId::Conversation_ApplicationId, applicationId); + } + + /// + /// Gets the application identifier. + /// + /// Speech Channel Secret Key. + inline SPXSTRING GetApplicationId() const + { + return GetProperty(PropertyId::Conversation_ApplicationId); + } + +private: + inline explicit CustomCommandsConfig(SPXSPEECHCONFIGHANDLE h_config): DialogServiceConfig{ h_config } + { + } +}; + +} } } } diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_dialog_service_connector.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_dialog_service_connector.h new file mode 100644 index 0000000..5c77c2c --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_dialog_service_connector.h @@ -0,0 +1,547 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_dialog_service_connector.h: Public API declarations for DialogServiceConnector C++ base class +// + +#pragma once +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +// Forward decl: facilities friend use use of Connection::FromDialogServiceConnector +class Connection; + +namespace Dialog { + +/// +/// Object used to connect DirectLineSpeech or CustomCommands. +/// +/// +/// Objects of this type are created via the factory method. +/// +class DialogServiceConnector : public std::enable_shared_from_this, public Utils::NonCopyable, public Utils::NonMovable +{ +public: + /// + /// Destroys the instance. + /// + virtual ~DialogServiceConnector() + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + + // Disconnect the event signals in reverse construction order + TurnStatusReceived.DisconnectAll(); + ActivityReceived.DisconnectAll(); + Canceled.DisconnectAll(); + SpeechEndDetected.DisconnectAll(); + SpeechStartDetected.DisconnectAll(); + SessionStopped.DisconnectAll(); + SessionStarted.DisconnectAll(); + Recognizing.DisconnectAll(); + Recognized.DisconnectAll(); + + if (m_handle != SPXHANDLE_INVALID) + { + ::dialog_service_connector_handle_release(m_handle); + SPX_DBG_TRACE_VERBOSE("%s: m_handle=0x%8p", __FUNCTION__, (void*)m_handle); + m_handle = SPXHANDLE_INVALID; + } + } + + /// + /// Creates a dialog service connector from a and an . + /// Users should use this function to create a dialog service connector. + /// + /// Dialog service config. + /// Audio config. + /// The shared smart pointer of the created dialog service connector. + /// + /// + /// auto audioConfig = Audio::AudioConfig::FromDefaultMicrophoneInput(); + /// auto config = CustomCommandsConfig::FromAuthorizationToken("my_app_id","my_auth_token", "my_region"); + /// auto connector = DialogServiceConnector::FromConfig(config, audioConfig); + /// + /// + /// + /// When speaking of we are referring to one of the classes that inherit from it. + /// The specific class to be used depends on the dialog backend being used: + ///
    + ///
  • for DirectLineSpeech
  • + ///
  • for CustomCommands
  • + ///
+ ///
+ static std::shared_ptr FromConfig(std::shared_ptr connectorConfig, std::shared_ptr audioConfig = nullptr) + { + SPXRECOHANDLE h_connector; + SPX_THROW_ON_FAIL(::dialog_service_connector_create_dialog_service_connector_from_config( + &h_connector, + Utils::HandleOrInvalid(connectorConfig), + Utils::HandleOrInvalid(audioConfig) + )); + return std::shared_ptr { new DialogServiceConnector(h_connector) }; + } + + /// + /// Connects with the back end. + /// + /// An asynchronous operation that starts the connection. + std::future ConnectAsync() + { + auto keep_alive = this->shared_from_this(); + return std::async(std::launch::async, [keep_alive, this]() + { + SPX_THROW_ON_FAIL(::dialog_service_connector_connect(m_handle)); + }); + } + + /// + /// Disconnects from the back end. + /// + /// An asynchronous operation that starts the disconnection. + std::future DisconnectAsync() + { + auto keep_alive = this->shared_from_this(); + return std::async(std::launch::async, [keep_alive, this]() + { + SPX_THROW_ON_FAIL(::dialog_service_connector_disconnect(m_handle)); + }); + } + + /// + /// Sends an activity to the backing dialog. + /// + /// Activity to send + /// An asynchronous operation that starts the operation. + std::future SendActivityAsync(const std::string& activity) + { + auto keep_alive = this->shared_from_this(); + return std::async(std::launch::async, [keep_alive, activity, this]() + { + std::array buffer; + SPX_THROW_ON_FAIL(::dialog_service_connector_send_activity(m_handle, activity.c_str(), buffer.data())); + return std::string{ buffer.data() }; + }); + } + + /// + /// Initiates keyword recognition. + /// + /// Specifies the keyword model to be used. + /// An asynchronous operation that starts the operation. + std::future StartKeywordRecognitionAsync(std::shared_ptr model) + { + auto keep_alive = this->shared_from_this(); + auto h_model = Utils::HandleOrInvalid(model); + return std::async(std::launch::async, [keep_alive, h_model, this]() + { + SPX_THROW_ON_FAIL(dialog_service_connector_start_keyword_recognition(m_handle, h_model)); + }); + } + + /// + /// Stop keyword recognition. + /// + /// An asynchronous operation that starts the operation. + std::future StopKeywordRecognitionAsync() + { + auto keep_alive = this->shared_from_this(); + return std::async(std::launch::async, [keep_alive, this]() + { + SPX_THROW_ON_FAIL(dialog_service_connector_stop_keyword_recognition(m_handle)); + }); + } + + /// + /// Starts a listening session that will terminate after the first utterance. + /// + /// An asynchronous operation that starts the operation. + std::future> ListenOnceAsync() + { + auto keep_alive = this->shared_from_this(); + return std::async(std::launch::async, [keep_alive, this]() + { + SPX_INIT_HR(hr); + + SPXRECOHANDLE h_result = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(dialog_service_connector_listen_once(m_handle, &h_result)); + + return std::make_shared(h_result); + }); + } + + /// + /// Requests that an active listening operation immediately finish, interrupting any ongoing + /// speaking, and provide a result reflecting whatever audio data has been captured so far. + /// + /// A task representing the asynchronous operation that stops an active listening session. + std::future StopListeningAsync() + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this]() -> void { + SPX_INIT_HR(hr); + // close any unfinished previous attempt + SPX_THROW_ON_FAIL(hr = speechapi_async_handle_release(m_hasyncStopContinuous)); + SPX_EXITFN_ON_FAIL(hr = dialog_service_connector_stop_listening_async(m_handle, &m_hasyncStopContinuous)); + SPX_EXITFN_ON_FAIL(hr = speechapi_async_wait_for(m_hasyncStopContinuous, UINT32_MAX)); + + SPX_EXITFN_CLEANUP: + auto releaseHr = speechapi_async_handle_release(m_hasyncStopContinuous); + SPX_REPORT_ON_FAIL(releaseHr); + m_hasyncStopContinuous = SPXHANDLE_INVALID; + + SPX_THROW_ON_FAIL(hr); + }); + + return future; + } + + /// + /// Sets the authorization token that will be used for connecting to the service. + /// Note: The caller needs to ensure that the authorization token is valid. Before the authorization token + /// expires, the caller needs to refresh it by calling this setter with a new valid token. + /// Otherwise, the connector will encounter errors during its operation. + /// + /// The authorization token. + void SetAuthorizationToken(const SPXSTRING& token) + { + Properties.SetProperty(PropertyId::SpeechServiceAuthorization_Token, token); + } + + /// + /// Gets the authorization token. + /// + /// Authorization token + SPXSTRING GetAuthorizationToken() + { + return Properties.GetProperty(PropertyId::SpeechServiceAuthorization_Token, SPXSTRING()); + } + + /// + /// Sets a JSON template that will be provided to the speech service for the next conversation. The service will + /// attempt to merge this template into all activities sent to the dialog backend, whether originated by the + /// client with SendActivityAsync or generated by the service, as is the case with speech-to-text results. + /// + /// + /// The activity payload, as a JSON string, to be merged into all applicable activity messages. + /// + void SetSpeechActivityTemplate(const SPXSTRING& activityTemplate) + { + Properties.SetProperty(PropertyId::Conversation_Speech_Activity_Template, activityTemplate); + } + + /// + /// Gets the JSON template that will be provided to the speech service for the next conversation. The service will + /// attempt to merge this template into all activities sent to the dialog backend, whether originated by the + /// client with SendActivityAsync or generated by the service, as is the case with speech-to-text results. + /// + /// The JSON activity template currently set that will be used on subsequent requests. + SPXSTRING GetSpeechActivityTemplate() + { + return Properties.GetProperty(PropertyId::Conversation_Speech_Activity_Template, SPXSTRING()); + } + + /// + /// Signal for events containing speech recognition results. + /// + EventSignal Recognized; + + /// + /// Signal for events containing intermediate recognition results. + /// + EventSignal Recognizing; + + /// + /// Signals that indicates the start of a listening session. + /// + EventSignal SessionStarted; + + /// + /// Signal that indicates the end of a listening session. + /// + EventSignal SessionStopped; + + /// + /// Signal that indicates the first detection of speech data in the current phrase. + /// + EventSignal SpeechStartDetected; + + /// + /// Signal that indicates the detected end of the current phrase's speech data. + /// + EventSignal SpeechEndDetected; + + /// + /// Signal for events relating to the cancellation of an interaction. The event indicates if the reason is a direct cancellation or an error. + /// + EventSignal Canceled; + + /// + /// Signals that an activity was received from the backend + /// + EventSignal ActivityReceived; + + /// + /// Signals that a turn status update was received from the backend + /// + EventSignal TurnStatusReceived; + +private: + /*! \cond PROTECTED */ + template + std::function&)> Callback(F f) + { + return [=, this](const EventSignal& evt) + { + (this->*f)(evt); + }; + } + + static void FireEvent_Recognized(SPXRECOHANDLE, SPXEVENTHANDLE h_event, void* pv_context) + { + auto keep_alive = static_cast(pv_context)->shared_from_this(); + SpeechRecognitionEventArgs event{ h_event }; + keep_alive->Recognized.Signal(event); + /* Not releasing the handle as SpeechRecognitionEventArgs manages it */ + } + + static void FireEvent_Recognizing(SPXRECOHANDLE, SPXEVENTHANDLE h_event, void* pv_context) + { + auto keep_alive = static_cast(pv_context)->shared_from_this(); + SpeechRecognitionEventArgs event{ h_event }; + keep_alive->Recognizing.Signal(event); + /* Not releasing the handle as SpeechRecognitionEventArgs manages it */ + } + + void RecognizerEventConnectionChanged(const EventSignal& reco_event) + { + if (m_handle != SPXHANDLE_INVALID) + { + SPX_DBG_TRACE_VERBOSE("%s: m_handle=0x%8p", __FUNCTION__, (void*)m_handle); + SPX_DBG_TRACE_VERBOSE_IF(!::dialog_service_connector_handle_is_valid(m_handle), "%s: m_handle is INVALID!!!", __FUNCTION__); + + if (&reco_event == &Recognizing) + { + ::dialog_service_connector_recognizing_set_callback(m_handle, Recognizing.IsConnected() ? DialogServiceConnector::FireEvent_Recognizing : nullptr, this); + } + else if (&reco_event == &Recognized) + { + ::dialog_service_connector_recognized_set_callback(m_handle, Recognized.IsConnected() ? DialogServiceConnector::FireEvent_Recognized : nullptr, this); + } + } + } + + static void FireEvent_SessionStarted(SPXRECOHANDLE, SPXEVENTHANDLE h_event, void* pv_context) + { + auto keep_alive = static_cast(pv_context)->shared_from_this(); + SessionEventArgs event{ h_event }; + keep_alive->SessionStarted.Signal(event); + + SPX_DBG_ASSERT(::recognizer_event_handle_is_valid(h_event)); + /* Releasing the event handle as SessionEventArgs doesn't keep the handle */ + ::recognizer_event_handle_release(h_event); + } + + static void FireEvent_SessionStopped(SPXRECOHANDLE, SPXEVENTHANDLE h_event, void* pv_context) + { + auto keep_alive = static_cast(pv_context)->shared_from_this(); + SessionEventArgs event{ h_event }; + keep_alive->SessionStopped.Signal(event); + + SPX_DBG_ASSERT(::recognizer_event_handle_is_valid(h_event)); + /* Releasing the event handle as SessionEventArgs doesn't keep the handle */ + ::recognizer_event_handle_release(h_event); + } + + void SessionEventConnectionChanged(const EventSignal& session_event) + { + if (m_handle != SPXHANDLE_INVALID) + { + SPX_DBG_TRACE_VERBOSE("%s: m_handle=0x%8p", __FUNCTION__, (void*)m_handle); + SPX_DBG_TRACE_VERBOSE_IF(!::dialog_service_connector_handle_is_valid(m_handle), "%s: m_handle is INVALID!!!", __FUNCTION__); + + if (&session_event == &SessionStarted) + { + ::dialog_service_connector_session_started_set_callback(m_handle, SessionStarted.IsConnected() ? DialogServiceConnector::FireEvent_SessionStarted : nullptr, this); + } + else if (&session_event == &SessionStopped) + { + ::dialog_service_connector_session_stopped_set_callback(m_handle, SessionStopped.IsConnected() ? DialogServiceConnector::FireEvent_SessionStopped : nullptr, this); + } + } + } + + static void FireEvent_SpeechStartDetected(SPXRECOHANDLE, SPXEVENTHANDLE h_event, void* pv_context) + { + auto keep_alive = static_cast(pv_context)->shared_from_this(); + RecognitionEventArgs event{ h_event }; + keep_alive->SpeechStartDetected.Signal(event); + + SPX_DBG_ASSERT(::recognizer_event_handle_is_valid(h_event)); + /* Releasing the event handle as RecognitionEventArgs doesn't manage handle lifetime */ + ::recognizer_event_handle_release(h_event); + } + + static void FireEvent_SpeechEndDetected(SPXRECOHANDLE, SPXEVENTHANDLE h_event, void* pv_context) + { + auto keep_alive = static_cast(pv_context)->shared_from_this(); + RecognitionEventArgs event{ h_event }; + keep_alive->SpeechEndDetected.Signal(event); + + SPX_DBG_ASSERT(::recognizer_event_handle_is_valid(h_event)); + /* Releasing the event handle as RecognitionEventArgs doesn't manage handle lifetime */ + ::recognizer_event_handle_release(h_event); + } + + void SpeechDetectionEventConnectionChanged(const EventSignal& speech_detection_event) + { + if (m_handle != SPXHANDLE_INVALID) + { + SPX_DBG_TRACE_VERBOSE("%s: m_handle=0x%8p", __FUNCTION__, (void*)m_handle); + SPX_DBG_TRACE_VERBOSE_IF(!::dialog_service_connector_handle_is_valid(m_handle), "%s: m_handle is INVALID!!!", __FUNCTION__); + + if (&speech_detection_event == &SpeechStartDetected) + { + ::dialog_service_connector_speech_start_detected_set_callback(m_handle, SpeechStartDetected.IsConnected() ? DialogServiceConnector::FireEvent_SpeechStartDetected : nullptr, this); + } + else if (&speech_detection_event == &SpeechEndDetected) + { + ::dialog_service_connector_speech_end_detected_set_callback(m_handle, SpeechEndDetected.IsConnected() ? DialogServiceConnector::FireEvent_SpeechEndDetected : nullptr, this); + } + } + } + + static void FireEvent_Canceled(SPXRECOHANDLE, SPXEVENTHANDLE h_event, void* pv_context) + { + auto keep_alive = static_cast(pv_context)->shared_from_this(); + SpeechRecognitionCanceledEventArgs event{ h_event }; + keep_alive->Canceled.Signal(event); + /* Not releasing the handle as SpeechRecognitionCanceledEventArgs manages it */ + } + + void CanceledEventConnectionChanged(const EventSignal& canceled_event) + { + if (m_handle != SPXHANDLE_INVALID) + { + SPX_DBG_TRACE_VERBOSE("%s: m_handle=0x%8p", __FUNCTION__, (void*)m_handle); + SPX_DBG_TRACE_VERBOSE_IF(!::dialog_service_connector_handle_is_valid(m_handle), "%s: m_handle is INVALID!!!", __FUNCTION__); + + if (&canceled_event == &Canceled) + { + ::dialog_service_connector_canceled_set_callback(m_handle, Canceled.IsConnected() ? DialogServiceConnector::FireEvent_Canceled : nullptr, this); + } + } + } + + static void FireEvent_ActivityReceived(SPXRECOHANDLE, SPXEVENTHANDLE h_event, void* pv_context) + { + auto keep_alive = static_cast(pv_context)->shared_from_this(); + ActivityReceivedEventArgs event{ h_event }; + keep_alive->ActivityReceived.Signal(event); + /* Not releasing the handle as ActivityReceivedEventArgs manages it */ + } + + void ActivityReceivedConnectionChanged(const EventSignal& activity_event) + { + if (m_handle != SPXHANDLE_INVALID) + { + SPX_DBG_TRACE_VERBOSE("%s: m_hreco=0x%8p", __FUNCTION__, (void*)m_handle); + SPX_DBG_TRACE_VERBOSE_IF(!::dialog_service_connector_handle_is_valid(m_handle), "%s: m_handle is INVALID!!!", __FUNCTION__); + + if (&activity_event == &ActivityReceived) + { + ::dialog_service_connector_activity_received_set_callback(m_handle, ActivityReceived.IsConnected() ? DialogServiceConnector::FireEvent_ActivityReceived : nullptr, this); + } + } + } + + static void FireEvent_TurnStatusReceived(SPXRECOHANDLE, SPXEVENTHANDLE h_event, void* pv_context) + { + auto keep_alive = static_cast(pv_context)->shared_from_this(); + TurnStatusReceivedEventArgs event{ h_event }; + keep_alive->TurnStatusReceived.Signal(event); + /* Not releasing the handle as TurnStatusReceivedEventArgs manages it */ + } + + void TurnStatusReceivedConnectionChanged(const EventSignal& turn_status_event) + { + if (m_handle != SPXHANDLE_INVALID) + { + SPX_DBG_TRACE_VERBOSE("%s: m_hreco=0x%8p", __FUNCTION__, (void*)m_handle); + SPX_DBG_TRACE_VERBOSE_IF(!::dialog_service_connector_handle_is_valid(m_handle), "%s: m_handle is INVALID!!!", __FUNCTION__); + + if (&turn_status_event == &TurnStatusReceived) + { + ::dialog_service_connector_turn_status_received_set_callback(m_handle, TurnStatusReceived.IsConnected() ? DialogServiceConnector::FireEvent_TurnStatusReceived : nullptr, this); + } + } + } + + class PrivatePropertyCollection : public PropertyCollection + { + public: + PrivatePropertyCollection(SPXRECOHANDLE h_connector) : + PropertyCollection( + [=](){ + SPXPROPERTYBAGHANDLE h_prop_bag = SPXHANDLE_INVALID; + dialog_service_connector_get_property_bag(h_connector, &h_prop_bag); + return h_prop_bag; + }()) + { + } + }; + + inline explicit DialogServiceConnector(SPXRECOHANDLE handle) : + Recognized{ Callback(&DialogServiceConnector::RecognizerEventConnectionChanged) }, + Recognizing{ Callback(&DialogServiceConnector::RecognizerEventConnectionChanged) }, + SessionStarted{ Callback(&DialogServiceConnector::SessionEventConnectionChanged) }, + SessionStopped{ Callback(&DialogServiceConnector::SessionEventConnectionChanged) }, + SpeechStartDetected{ Callback(&DialogServiceConnector::SpeechDetectionEventConnectionChanged) }, + SpeechEndDetected{ Callback(&DialogServiceConnector::SpeechDetectionEventConnectionChanged) }, + Canceled{ Callback(&DialogServiceConnector::CanceledEventConnectionChanged) }, + ActivityReceived{ Callback(&DialogServiceConnector::ActivityReceivedConnectionChanged) }, + TurnStatusReceived{ Callback(&DialogServiceConnector::TurnStatusReceivedConnectionChanged) }, + m_handle{ handle }, + m_properties{ handle }, + Properties{ m_properties } + { + } + +private: + friend class Microsoft::CognitiveServices::Speech::Connection; + SPXRECOHANDLE m_handle; + SPXASYNCHANDLE m_hasyncStopContinuous; + + PrivatePropertyCollection m_properties; + /*! \endcond */ +public: + /// + /// A collection of properties and their values defined for this . + /// + PropertyCollection& Properties; +}; + +} } } } diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_dialog_service_connector_eventargs.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_dialog_service_connector_eventargs.h new file mode 100644 index 0000000..f55f611 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_dialog_service_connector_eventargs.h @@ -0,0 +1,148 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once + +#include +#include + +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Dialog { + +// Forward declarations +class DialogServiceConnector; + +/// +/// Class for activity received event arguments. +/// +class ActivityReceivedEventArgs: public std::enable_shared_from_this +{ +public: + friend DialogServiceConnector; + /// + /// Releases the event. + /// + inline ~ActivityReceivedEventArgs() + { + SPX_THROW_ON_FAIL(::dialog_service_connector_activity_received_event_release(m_handle)); + } + + /// + /// Gets the activity associated with the event. + /// + /// The serialized activity activity. + inline std::string GetActivity() const + { + size_t size; + SPX_THROW_ON_FAIL(::dialog_service_connector_activity_received_event_get_activity_size(m_handle, &size)); + auto ptr = std::make_unique(size + 1); + SPX_THROW_ON_FAIL(::dialog_service_connector_activity_received_event_get_activity(m_handle, ptr.get(), size + 1)); + return std::string{ ptr.get() }; + } + + /// + /// Gets the audio associated with the event. + /// + /// The audio. + inline std::shared_ptr GetAudio() const + { + SPXAUDIOSTREAMHANDLE h_audio{ SPXHANDLE_INVALID }; + SPX_THROW_ON_FAIL(::dialog_service_connector_activity_received_event_get_audio(m_handle, &h_audio)); + if (h_audio == SPXHANDLE_INVALID) + { + return nullptr; + } + return std::shared_ptr(new Audio::PullAudioOutputStream(h_audio) ); + } + + /// + /// Checks if the event contains audio. + /// + /// True if the event contains audio, false otherwise. + inline bool HasAudio() const + { + return ::dialog_service_connector_activity_received_event_has_audio(m_handle); + } +private: + /*! \cond PROTECTED */ + inline ActivityReceivedEventArgs(SPXEVENTHANDLE h_event) : m_handle{ h_event } + { + } + + SPXEVENTHANDLE m_handle; + /*! \endcond */ +}; + +/// +/// Class for turn status event arguments. +/// +class TurnStatusReceivedEventArgs : public std::enable_shared_from_this +{ +public: + friend DialogServiceConnector; + /// + /// Releases the event. + /// + inline ~TurnStatusReceivedEventArgs() + { + SPX_THROW_ON_FAIL(::dialog_service_connector_turn_status_received_release(m_handle)); + } + + /// + /// Retrieves the interaction ID associated with this turn status event. Interaction generally correspond + /// to a single input signal (e.g. voice utterance) or data/activity transaction and will correlate to + /// 'replyToId' fields in Bot Framework activities. + /// + /// The interaction ID associated with the turn status. + inline std::string GetInteractionId() const + { + size_t size = 0; + SPX_THROW_ON_FAIL(::dialog_service_connector_turn_status_received_get_interaction_id_size(m_handle, &size)); + auto ptr = std::make_unique(size + 1); + SPX_THROW_ON_FAIL(::dialog_service_connector_turn_status_received_get_interaction_id(m_handle, ptr.get(), size + 1)); + return std::string{ ptr.get() }; + } + + /// + /// Retrieves the conversation ID associated with this turn status event. Conversations may span multiple + /// interactions and are the unit which a client may request resume/retry upon. + /// + /// The conversation ID associated with the turn status. + inline std::string GetConversationId() const + { + size_t size = 0; + SPX_THROW_ON_FAIL(::dialog_service_connector_turn_status_received_get_conversation_id_size(m_handle, &size)); + auto ptr = std::make_unique(size + 1); + SPX_THROW_ON_FAIL(::dialog_service_connector_turn_status_received_get_conversation_id(m_handle, ptr.get(), size + 1)); + return std::string{ ptr.get() }; + } + + /// + /// Retrieves the numeric status code associated with this turn status event. These generally correspond to + /// standard HTTP status codes such as 200 (OK), 400 (Failure/Bad Request), and 429 (Timeout/Throttled). + /// + /// The status code associated with this event, analolgous to standard HTTP codes. + inline int GetStatusCode() const + { + int cApiStatus = 404; + SPX_THROW_ON_FAIL(::dialog_service_connector_turn_status_received_get_status(m_handle, &cApiStatus)); + return cApiStatus; + } + +private: + /*! \cond PROTECTED */ + inline TurnStatusReceivedEventArgs(SPXEVENTHANDLE h_event) : m_handle{ h_event } + { + } + + SPXEVENTHANDLE m_handle; + /*! \endcond */ +}; + +} } } } diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_embedded_speech_config.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_embedded_speech_config.h new file mode 100644 index 0000000..61454a2 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_embedded_speech_config.h @@ -0,0 +1,324 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_embedded_speech_config.h: Public API declarations for EmbeddedSpeechConfig C++ class +// + +#pragma once + +#include + +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// Class that defines embedded (offline) speech configuration. +/// +class EmbeddedSpeechConfig +{ +protected: + /*! \cond PROTECTED */ + + SpeechConfig m_config; + + /*! \endcond */ + +public: + /// + /// Internal operator used to get the underlying handle value. + /// + /// A handle. + explicit operator SPXSPEECHCONFIGHANDLE() const + { + return static_cast(m_config); + } + + /// + /// Creates an instance of the embedded speech config with a specified offline model path. + /// + /// The folder path to search for offline models. + /// This can be a root path under which several models are located in subfolders, + /// or a direct path to a specific model folder. + /// + /// A shared pointer to the new embedded speech config instance. + static std::shared_ptr FromPath(const SPXSTRING& path) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, path.empty()); + SPXSPEECHCONFIGHANDLE hconfig = SPXHANDLE_INVALID; + + SPX_THROW_ON_FAIL(embedded_speech_config_create(&hconfig)); + SPX_THROW_ON_FAIL(embedded_speech_config_add_path(hconfig, Utils::ToUTF8(path).c_str())); + + auto ptr = new EmbeddedSpeechConfig(hconfig); + return std::shared_ptr(ptr); + } + + /// + /// Creates an instance of the embedded speech config with specified offline model paths. + /// + /// The folder paths to search for offline models. + /// These can be root paths under which several models are located in subfolders, + /// or direct paths to specific model folders. + /// + /// A shared pointer to the new embedded speech config instance. + static std::shared_ptr FromPaths(const std::vector& paths) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, paths.empty()); + SPXSPEECHCONFIGHANDLE hconfig = SPXHANDLE_INVALID; + + SPX_THROW_ON_FAIL(embedded_speech_config_create(&hconfig)); + for (const SPXSTRING& path : paths) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, path.empty()); + SPX_THROW_ON_FAIL(embedded_speech_config_add_path(hconfig, Utils::ToUTF8(path).c_str())); + } + + auto ptr = new EmbeddedSpeechConfig(hconfig); + return std::shared_ptr(ptr); + } + + /// + /// Gets a list of available speech recognition models. + /// + /// Speech recognition model info. + std::vector> GetSpeechRecognitionModels() + { + std::vector> models; + + uint32_t numModels = 0; + SPX_THROW_ON_FAIL(embedded_speech_config_get_num_speech_reco_models(static_cast(m_config), &numModels)); + + for (uint32_t i = 0; i < numModels; i++) + { + SPXSPEECHRECOMODELHANDLE hmodel = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(embedded_speech_config_get_speech_reco_model(static_cast(m_config), i, &hmodel)); + + auto model = std::make_shared(hmodel); + models.push_back(model); + } + + return models; + } + + /// + /// Sets the model for speech recognition. + /// + /// The model name. + /// The license text. + void SetSpeechRecognitionModel(const SPXSTRING& name, const SPXSTRING& license) + { + SPX_THROW_ON_FAIL(embedded_speech_config_set_speech_recognition_model( + static_cast(m_config), Utils::ToUTF8(name).c_str(), Utils::ToUTF8(license).c_str())); + } + + /// + /// Gets the model name for speech recognition. + /// + /// The speech recognition model name. + SPXSTRING GetSpeechRecognitionModelName() const + { + return GetProperty(PropertyId::SpeechServiceConnection_RecoModelName); + } + + /// + /// Sets the speech recognition output format. + /// + /// Speech recognition output format (simple or detailed). + void SetSpeechRecognitionOutputFormat(OutputFormat format) + { + m_config.SetOutputFormat(format); + } + + /// + /// Gets the speech recognition output format. + /// + /// Speech recognition output format (simple or detailed). + OutputFormat GetSpeechRecognitionOutputFormat() const + { + return m_config.GetOutputFormat(); + } + + /// + /// Sets the profanity option. This can be used to remove profane words or mask them. + /// + /// Profanity option value. + void SetProfanity(ProfanityOption profanity) + { + m_config.SetProfanity(profanity); + } + + /// + /// Sets the voice for embedded speech synthesis. + /// + /// The voice name of the embedded speech synthesis. + /// The license text. + void SetSpeechSynthesisVoice(const SPXSTRING& name, const SPXSTRING& license) + { + SPX_THROW_ON_FAIL(embedded_speech_config_set_speech_synthesis_voice( + static_cast(m_config), Utils::ToUTF8(name).c_str(), Utils::ToUTF8(license).c_str())); + } + + /// + /// Gets the voice name for embedded speech synthesis. + /// + /// The speech synthesis model name, i.e. the voice name. + SPXSTRING GetSpeechSynthesisVoiceName() const + { + return GetProperty(PropertyId::SpeechServiceConnection_SynthOfflineVoice); + } + + /// + /// Sets the speech synthesis output format (e.g. Riff16Khz16BitMonoPcm). + /// + /// Specifies the output format ID + void SetSpeechSynthesisOutputFormat(SpeechSynthesisOutputFormat formatId) + { + m_config.SetSpeechSynthesisOutputFormat(formatId); + } + + /// + /// Gets the speech synthesis output format. + /// + /// The speech synthesis output format. + SPXSTRING GetSpeechSynthesisOutputFormat() const + { + return m_config.GetSpeechSynthesisOutputFormat(); + } + + /// + /// Gets a list of available speech translation models. + /// + /// Speech translation model info. + std::vector> GetSpeechTranslationModels() + { + std::vector> models; + + uint32_t numModels = 0; + SPX_THROW_ON_FAIL(embedded_speech_config_get_num_speech_translation_models(static_cast(m_config), &numModels)); + + for (uint32_t i = 0; i < numModels; i++) + { + SPXSPEECHRECOMODELHANDLE hmodel = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(embedded_speech_config_get_speech_translation_model(static_cast(m_config), i, &hmodel)); + + auto model = std::make_shared(hmodel); + models.push_back(model); + } + + return models; + } + + /// + /// Sets the model for speech translation. + /// + /// Model name. + /// License text. + void SetSpeechTranslationModel(const SPXSTRING& name, const SPXSTRING& license) + { + SPX_THROW_ON_FAIL(embedded_speech_config_set_speech_translation_model( + static_cast(m_config), Utils::ToUTF8(name).c_str(), Utils::ToUTF8(license).c_str())); + } + + /// + /// Gets the model name for speech translation. + /// + /// The speech translation model name. + SPXSTRING GetSpeechTranslationModelName() const + { + return GetProperty(PropertyId::SpeechTranslation_ModelName); + } + + /// + /// Sets the model for keyword recognition. + /// This is for customer specific models that are tailored for detecting + /// wake words and direct commands. + /// + /// Model name. + /// License text. + void SetKeywordRecognitionModel(const SPXSTRING& name, const SPXSTRING& license) + { + SPX_THROW_ON_FAIL(embedded_speech_config_set_keyword_recognition_model( + static_cast(m_config), Utils::ToUTF8(name).c_str(), Utils::ToUTF8(license).c_str())); + } + + /// + /// Gets the model name for keyword recognition. + /// + /// The keyword recognition model name. + SPXSTRING GetKeywordRecognitionModelName() const + { + return GetProperty(PropertyId::KeywordRecognition_ModelName); + } + + /// + /// Sets a property value by name. + /// + /// The property name. + /// The property value. + void SetProperty(const SPXSTRING& name, const SPXSTRING& value) + { + m_config.SetProperty(name, value); + } + + /// + /// Sets a property value by ID. + /// + /// The property id. + /// The property value. + void SetProperty(PropertyId id, const SPXSTRING& value) + { + m_config.SetProperty(id, value); + } + + /// + /// Gets a property value by name. + /// + /// The parameter name. + /// The property value. + SPXSTRING GetProperty(const SPXSTRING& name) const + { + return m_config.GetProperty(name); + } + + /// + /// Gets a property value by ID. + /// + /// The parameter id. + /// The property value. + SPXSTRING GetProperty(PropertyId id) const + { + return m_config.GetProperty(id); + } + + /// + /// Destructs the object. + /// + virtual ~EmbeddedSpeechConfig() = default; + +protected: + /*! \cond PROTECTED */ + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + inline explicit EmbeddedSpeechConfig(SPXSPEECHCONFIGHANDLE hconfig) : m_config(hconfig) + { + } + + /*! \endcond */ + +private: + DISABLE_COPY_AND_MOVE(EmbeddedSpeechConfig); + + }; + +}}} diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_enums.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_enums.h new file mode 100644 index 0000000..2f54177 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_enums.h @@ -0,0 +1,1685 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_enums.h: Public API declarations for C++ enumerations +// + +#pragma once + +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +constexpr const char* TrueString = "true"; +constexpr const char* FalseString = "false"; +constexpr const char CommaDelim = ','; + +/// +/// Defines speech property ids. +/// Changed in version 1.4.0. +/// +enum class PropertyId +{ + /// + /// The Cognitive Services Speech Service subscription key. If you are using an intent recognizer, you need + /// to specify the LUIS endpoint key for your particular LUIS app. Under normal circumstances, you shouldn't + /// have to use this property directly. + /// Instead, use . + /// + SpeechServiceConnection_Key = 1000, + + /// + /// The Cognitive Services Speech Service endpoint (url). Under normal circumstances, you shouldn't + /// have to use this property directly. + /// Instead, use . + /// NOTE: This endpoint is not the same as the endpoint used to obtain an access token. + /// + SpeechServiceConnection_Endpoint = 1001, + + /// + /// The Cognitive Services Speech Service region. Under normal circumstances, you shouldn't have to + /// use this property directly. + /// Instead, use , , + /// , . + /// + SpeechServiceConnection_Region = 1002, + + /// + /// The Cognitive Services Speech Service authorization token (aka access token). Under normal circumstances, + /// you shouldn't have to use this property directly. + /// Instead, use , + /// , , + /// . + /// + SpeechServiceAuthorization_Token = 1003, + + /// + /// The Cognitive Services Speech Service authorization type. Currently unused. + /// + SpeechServiceAuthorization_Type = 1004, + + /// + /// The Cognitive Services Custom Speech or Custom Voice Service endpoint id. Under normal circumstances, you shouldn't + /// have to use this property directly. + /// Instead use . + /// NOTE: The endpoint id is available in the Custom Speech Portal, listed under Endpoint Details. + /// + SpeechServiceConnection_EndpointId = 1005, + + /// + /// The Cognitive Services Speech Service host (url). Under normal circumstances, you shouldn't + /// have to use this property directly. + /// Instead, use . + /// + SpeechServiceConnection_Host = 1006, + + /// + /// The host name of the proxy server used to connect to the Cognitive Services Speech Service. Under normal circumstances, + /// you shouldn't have to use this property directly. + /// Instead, use . + /// NOTE: This property id was added in version 1.1.0. + /// + SpeechServiceConnection_ProxyHostName = 1100, + + /// + /// The port of the proxy server used to connect to the Cognitive Services Speech Service. Under normal circumstances, + /// you shouldn't have to use this property directly. + /// Instead, use . + /// NOTE: This property id was added in version 1.1.0. + /// + SpeechServiceConnection_ProxyPort = 1101, + + /// + /// The user name of the proxy server used to connect to the Cognitive Services Speech Service. Under normal circumstances, + /// you shouldn't have to use this property directly. + /// Instead, use . + /// NOTE: This property id was added in version 1.1.0. + /// + SpeechServiceConnection_ProxyUserName = 1102, + + /// + /// The password of the proxy server used to connect to the Cognitive Services Speech Service. Under normal circumstances, + /// you shouldn't have to use this property directly. + /// Instead, use . + /// NOTE: This property id was added in version 1.1.0. + /// + SpeechServiceConnection_ProxyPassword = 1103, + + /// + /// The URL string built from speech configuration. + /// This property is intended to be read-only. The SDK is using it internally. + /// NOTE: Added in version 1.5.0. + /// + SpeechServiceConnection_Url = 1104, + + /// + /// Specifies the list of hosts for which proxies should not be used. This setting overrides all other configurations. + /// Hostnames are separated by commas and are matched in a case-insensitive manner. Wildcards are not supported. + /// + SpeechServiceConnection_ProxyHostBypass = 1105, + + /// + /// The list of comma separated languages used as target translation languages. Under normal circumstances, + /// you shouldn't have to use this property directly. Instead use + /// and . + /// + SpeechServiceConnection_TranslationToLanguages = 2000, + + /// + /// The name of the Cognitive Service Text to Speech Service voice. Under normal circumstances, you shouldn't have to use this + /// property directly. Instead use . + /// NOTE: Valid voice names can be found here. + /// + SpeechServiceConnection_TranslationVoice = 2001, + + /// + /// Translation features. For internal use. + /// + SpeechServiceConnection_TranslationFeatures = 2002, + + /// + /// The Language Understanding Service region. Under normal circumstances, you shouldn't have to use this property directly. + /// Instead use . + /// + SpeechServiceConnection_IntentRegion = 2003, + + /// + /// The Cognitive Services Speech Service recognition mode. Can be "INTERACTIVE", "CONVERSATION", "DICTATION". + /// This property is intended to be read-only. The SDK is using it internally. + /// + SpeechServiceConnection_RecoMode = 3000, + + /// + /// The spoken language to be recognized (in BCP-47 format). Under normal circumstances, you shouldn't have to use this property + /// directly. + /// Instead, use . + /// + SpeechServiceConnection_RecoLanguage = 3001, + + /// + /// The session id. This id is a universally unique identifier (aka UUID) representing a specific binding of an audio input stream + /// and the underlying speech recognition instance to which it is bound. Under normal circumstances, you shouldn't have to use this + /// property directly. + /// Instead use . + /// + Speech_SessionId = 3002, + + /// + /// The query parameters provided by users. They will be passed to service as URL query parameters. + /// Added in version 1.5.0 + /// + SpeechServiceConnection_UserDefinedQueryParameters = 3003, + + /// + /// The string to specify the backend to be used for speech recognition; + /// allowed options are online and offline. + /// Under normal circumstances, you shouldn't use this property directly. + /// Currently the offline option is only valid when EmbeddedSpeechConfig is used. + /// Added in version 1.19.0 + /// + SpeechServiceConnection_RecoBackend = 3004, + + /// + /// The name of the model to be used for speech recognition. + /// Under normal circumstances, you shouldn't use this property directly. + /// Currently this is only valid when EmbeddedSpeechConfig is used. + /// Added in version 1.19.0 + /// + SpeechServiceConnection_RecoModelName = 3005, + + /// + /// This property is deprecated. + /// + SpeechServiceConnection_RecoModelKey = 3006, + + /// + /// The path to the ini file of the model to be used for speech recognition. + /// Under normal circumstances, you shouldn't use this property directly. + /// Currently this is only valid when EmbeddedSpeechConfig is used. + /// Added in version 1.19.0 + /// + SpeechServiceConnection_RecoModelIniFile = 3007, + + /// + /// The spoken language to be synthesized (e.g. en-US) + /// Added in version 1.4.0 + /// + SpeechServiceConnection_SynthLanguage = 3100, + + /// + /// The name of the TTS voice to be used for speech synthesis + /// Added in version 1.4.0 + /// + SpeechServiceConnection_SynthVoice = 3101, + + /// + /// The string to specify TTS output audio format + /// Added in version 1.4.0 + /// + SpeechServiceConnection_SynthOutputFormat = 3102, + + /// + /// Indicates if use compressed audio format for speech synthesis audio transmission. + /// This property only affects when SpeechServiceConnection_SynthOutputFormat is set to a pcm format. + /// If this property is not set and GStreamer is available, SDK will use compressed format for synthesized audio transmission, + /// and decode it. You can set this property to "false" to use raw pcm format for transmission on wire. + /// Added in version 1.16.0 + /// + SpeechServiceConnection_SynthEnableCompressedAudioTransmission = 3103, + + /// + /// The string to specify TTS backend; valid options are online and offline. + /// Under normal circumstances, you shouldn't have to use this property directly. + /// Instead, use or + /// to set the synthesis backend to offline. + /// Added in version 1.19.0 + /// + SpeechServiceConnection_SynthBackend = 3110, + + /// + /// The data file path(s) for offline synthesis engine; only valid when synthesis backend is offline. + /// Under normal circumstances, you shouldn't have to use this property directly. + /// Instead, use or . + /// Added in version 1.19.0 + /// + SpeechServiceConnection_SynthOfflineDataPath = 3112, + + /// + /// The name of the offline TTS voice to be used for speech synthesis + /// Under normal circumstances, you shouldn't use this property directly. + /// Instead, use and . + /// Added in version 1.19.0 + /// + SpeechServiceConnection_SynthOfflineVoice = 3113, + + /// + /// This property is deprecated. + /// + SpeechServiceConnection_SynthModelKey = 3114, + + /// + /// The Cognitive Services Speech Service voices list api endpoint (url). Under normal circumstances, + /// you don't need to specify this property, SDK will construct it based on the region/host/endpoint of . + /// Added in version 1.16.0 + /// + SpeechServiceConnection_VoicesListEndpoint = 3130, + + /// + /// The initial silence timeout value (in milliseconds) used by the service. + /// Added in version 1.5.0 + /// + SpeechServiceConnection_InitialSilenceTimeoutMs = 3200, + + /// + /// The end silence timeout value (in milliseconds) used by the service. + /// Added in version 1.5.0 + /// + SpeechServiceConnection_EndSilenceTimeoutMs = 3201, + + /// + /// A boolean value specifying whether audio logging is enabled in the service or not. + /// Audio and content logs are stored either in Microsoft-owned storage, or in your own storage account linked + /// to your Cognitive Services subscription (Bring Your Own Storage (BYOS) enabled Speech resource). + /// Added in version 1.5.0. + /// + SpeechServiceConnection_EnableAudioLogging = 3202, + + /// + /// The speech service connection language identifier mode. + /// Can be "AtStart" (the default), or "Continuous". See [Language + /// Identification](https://aka.ms/speech/lid?pivots=programming-language-cpp) document. + /// Added in 1.25.0 + /// + SpeechServiceConnection_LanguageIdMode = 3205, + + /// + /// The speech service connection translation categoryId. + /// + SpeechServiceConnection_TranslationCategoryId = 3206, + + /// + /// The auto detect source languages + /// Added in version 1.8.0 + /// + SpeechServiceConnection_AutoDetectSourceLanguages = 3300, + + /// + /// The auto detect source language result + /// Added in version 1.8.0 + /// + SpeechServiceConnection_AutoDetectSourceLanguageResult = 3301, + + /// + /// The requested Cognitive Services Speech Service response output format (simple or detailed). Under normal circumstances, you shouldn't have + /// to use this property directly. + /// Instead use . + /// + SpeechServiceResponse_RequestDetailedResultTrueFalse = 4000, + + /// + /// The requested Cognitive Services Speech Service response output profanity level. Currently unused. + /// + SpeechServiceResponse_RequestProfanityFilterTrueFalse = 4001, + + /// + /// The requested Cognitive Services Speech Service response output profanity setting. + /// Allowed values are "masked", "removed", and "raw". + /// Added in version 1.5.0. + /// + SpeechServiceResponse_ProfanityOption = 4002, + + /// + /// A string value specifying which post processing option should be used by service. + /// Allowed values are "TrueText". + /// Added in version 1.5.0 + /// + SpeechServiceResponse_PostProcessingOption = 4003, + + /// + /// A boolean value specifying whether to include word-level timestamps in the response result. + /// Added in version 1.5.0 + /// + SpeechServiceResponse_RequestWordLevelTimestamps = 4004, + + /// + /// The number of times a word has to be in partial results to be returned. + /// Added in version 1.5.0 + /// + SpeechServiceResponse_StablePartialResultThreshold = 4005, + + /// + /// A string value specifying the output format option in the response result. Internal use only. + /// Added in version 1.5.0. + /// + SpeechServiceResponse_OutputFormatOption = 4006, + + /// + /// A boolean value specifying whether to include SNR (signal to noise ratio) in the response result. + /// Added in version 1.18.0 + /// + SpeechServiceResponse_RequestSnr = 4007, + + /// + /// A boolean value to request for stabilizing translation partial results by omitting words in the end. + /// Added in version 1.5.0. + /// + SpeechServiceResponse_TranslationRequestStablePartialResult = 4100, + + /// + /// A boolean value specifying whether to request WordBoundary events. + /// Added in version 1.21.0. + /// + SpeechServiceResponse_RequestWordBoundary = 4200, + + /// + /// A boolean value specifying whether to request punctuation boundary in WordBoundary Events. Default is true. + /// Added in version 1.21.0. + /// + SpeechServiceResponse_RequestPunctuationBoundary = 4201, + + /// + /// A boolean value specifying whether to request sentence boundary in WordBoundary Events. Default is false. + /// Added in version 1.21.0. + /// + SpeechServiceResponse_RequestSentenceBoundary = 4202, + + /// + /// A boolean value specifying whether the SDK should synchronize synthesis metadata events, + /// (e.g. word boundary, viseme, etc.) to the audio playback. This only takes effect when the audio is played through the SDK. + /// Default is true. + /// If set to false, the SDK will fire the events as they come from the service, which may be out of sync with the audio playback. + /// Added in version 1.31.0. + /// + SpeechServiceResponse_SynthesisEventsSyncToAudio = 4210, + + /// + /// The Cognitive Services Speech Service response output (in JSON format). This property is available on recognition result objects only. + /// + SpeechServiceResponse_JsonResult = 5000, + + /// + /// The Cognitive Services Speech Service error details (in JSON format). Under normal circumstances, you shouldn't have to + /// use this property directly. + /// Instead, use . + /// + SpeechServiceResponse_JsonErrorDetails = 5001, + + /// + /// The recognition latency in milliseconds. Read-only, available on final speech/translation/intent results. + /// This measures the latency between when an audio input is received by the SDK, and the moment the final result is received from the service. + /// The SDK computes the time difference between the last audio fragment from the audio input that is contributing to the final result, and the time the final result is received from the speech service. + /// Added in version 1.3.0. + /// + SpeechServiceResponse_RecognitionLatencyMs = 5002, + + /// + /// The recognition backend. Read-only, available on speech recognition results. + /// This indicates whether cloud (online) or embedded (offline) recognition was used to produce the result. + /// + SpeechServiceResponse_RecognitionBackend = 5003, + + /// + /// The speech synthesis first byte latency in milliseconds. Read-only, available on final speech synthesis results. + /// This measures the latency between when the synthesis is started to be processed, and the moment the first byte audio is available. + /// Added in version 1.17.0. + /// + SpeechServiceResponse_SynthesisFirstByteLatencyMs = 5010, + + /// + /// The speech synthesis all bytes latency in milliseconds. Read-only, available on final speech synthesis results. + /// This measures the latency between when the synthesis is started to be processed, and the moment the whole audio is synthesized. + /// Added in version 1.17.0. + /// + SpeechServiceResponse_SynthesisFinishLatencyMs = 5011, + + /// + /// The underrun time for speech synthesis in milliseconds. Read-only, available on results in SynthesisCompleted events. + /// This measures the total underrun time from is filled to synthesis completed. + /// Added in version 1.17.0. + /// + SpeechServiceResponse_SynthesisUnderrunTimeMs = 5012, + + /// + /// The speech synthesis connection latency in milliseconds. Read-only, available on final speech synthesis results. + /// This measures the latency between when the synthesis is started to be processed, and the moment the HTTP/WebSocket connection is established. + /// Added in version 1.26.0. + /// + SpeechServiceResponse_SynthesisConnectionLatencyMs = 5013, + + /// + /// The speech synthesis network latency in milliseconds. Read-only, available on final speech synthesis results. + /// This measures the network round trip time. + /// Added in version 1.26.0. + /// + SpeechServiceResponse_SynthesisNetworkLatencyMs = 5014, + + /// + /// The speech synthesis service latency in milliseconds. Read-only, available on final speech synthesis results. + /// This measures the service processing time to synthesize the first byte of audio. + /// Added in version 1.26.0. + /// + SpeechServiceResponse_SynthesisServiceLatencyMs = 5015, + + /// + /// Indicates which backend the synthesis is finished by. Read-only, available on speech synthesis results, except for the result in SynthesisStarted event + /// Added in version 1.17.0. + /// + SpeechServiceResponse_SynthesisBackend = 5020, + + /// + /// Determines if intermediate results contain speaker identification. + /// + /// + /// + /// Allowed values are "true" or "false". If set to "true", the intermediate results will contain speaker identification. + /// The default value if unset or set to an invalid value is "false". + /// + /// + /// This is currently only supported for scenarios using the + /// + /// + /// Adding in version 1.40. + /// + /// + SpeechServiceResponse_DiarizeIntermediateResults = 5025, + + /// + /// The cancellation reason. Currently unused. + /// + CancellationDetails_Reason = 6000, + + /// + /// The cancellation text. Currently unused. + /// + CancellationDetails_ReasonText = 6001, + + /// + /// The cancellation detailed text. Currently unused. + /// + CancellationDetails_ReasonDetailedText = 6002, + + /// + /// The Language Understanding Service response output (in JSON format). Available via . + /// + LanguageUnderstandingServiceResponse_JsonResult = 7000, + + /// + /// The device name for audio capture. Under normal circumstances, you shouldn't have to + /// use this property directly. + /// Instead, use . + /// NOTE: This property id was added in version 1.3.0. + /// + AudioConfig_DeviceNameForCapture = 8000, + + /// + /// The number of channels for audio capture. Internal use only. + /// NOTE: This property id was added in version 1.3.0. + /// + AudioConfig_NumberOfChannelsForCapture = 8001, + + /// + /// The sample rate (in Hz) for audio capture. Internal use only. + /// NOTE: This property id was added in version 1.3.0. + /// + AudioConfig_SampleRateForCapture = 8002, + + /// + /// The number of bits of each sample for audio capture. Internal use only. + /// NOTE: This property id was added in version 1.3.0. + /// + AudioConfig_BitsPerSampleForCapture = 8003, + + /// + /// The audio source. Allowed values are "Microphones", "File", and "Stream". + /// Added in version 1.3.0. + /// + AudioConfig_AudioSource = 8004, + + /// + /// The device name for audio render. Under normal circumstances, you shouldn't have to + /// use this property directly. + /// Instead, use . + /// Added in version 1.14.0 + /// + AudioConfig_DeviceNameForRender = 8005, + + /// + /// Playback buffer length in milliseconds, default is 50 milliseconds. + /// + AudioConfig_PlaybackBufferLengthInMs = 8006, + + /// + /// Audio processing options in JSON format. + /// + AudioConfig_AudioProcessingOptions = 8007, + + /// + /// The file name to write logs. + /// Added in version 1.4.0. + /// + Speech_LogFilename = 9001, + + /// + /// A duration of detected silence, measured in milliseconds, after which speech-to-text will determine a spoken + /// phrase has ended and generate a final Recognized result. Configuring this timeout may be helpful in situations + /// where spoken input is significantly faster or slower than usual and default segmentation behavior consistently + /// yields results that are too long or too short. Segmentation timeout values that are inappropriately high or low + /// can negatively affect speech-to-text accuracy; this property should be carefully configured and the resulting + /// behavior should be thoroughly validated as intended. + /// + /// For more information about timeout configuration that includes discussion of default behaviors, please visit + /// https://aka.ms/csspeech/timeouts. + /// + Speech_SegmentationSilenceTimeoutMs = 9002, + + /// + /// The maximum length of a spoken phrase when using the "Time" segmentation strategy. + /// As the length of a spoken phrase approaches this value, the will begin being reduced until either the phrase silence timeout is hit or the phrase reaches the maximum length. + /// + Speech_SegmentationMaximumTimeMs = 9003, + + /// + /// The strategy used to determine when a spoken phrase has ended and a final Recognized result should be generated. + /// Allowed values are "Default", "Time", and "Semantic". + /// + /// + /// Valid values are: + /// + /// + /// Default + /// Use the default strategy and settings as determined by the Speech Service. Use in most situations. + /// + /// + /// Time + /// Uses a time based strategy where the amount of silence between speech is used to determine when to generate a final result. + /// + /// + /// Semantic + /// Uses an AI model to deterine the end of a spoken phrase based on the content of the phrase. + /// + /// + /// + /// When using the time strategy, the property can be used to adjust the amount of silence needed to determine the end of a spoken phrase, + /// and the property can be used to adjust the maximum length of a spoken phrase. + /// + /// + /// The semantic strategy has no control properties available. + /// + /// + Speech_SegmentationStrategy = 9004, + + /// + /// Identifier used to connect to the backend service. + /// Added in version 1.5.0. + /// + Conversation_ApplicationId = 10000, + + /// + /// Type of dialog backend to connect to. + /// Added in version 1.7.0. + /// + Conversation_DialogType = 10001, + + /// + /// Silence timeout for listening + /// Added in version 1.5.0. + /// + Conversation_Initial_Silence_Timeout = 10002, + + /// + /// From id to be used on speech recognition activities + /// Added in version 1.5.0. + /// + Conversation_From_Id = 10003, + + /// + /// ConversationId for the session. + /// Added in version 1.8.0. + /// + Conversation_Conversation_Id = 10004, + + /// + /// Comma separated list of custom voice deployment ids. + /// Added in version 1.8.0. + /// + Conversation_Custom_Voice_Deployment_Ids = 10005, + + /// + /// Speech activity template, stamp properties in the template on the activity generated by the service for speech. + /// Added in version 1.10.0. + /// + Conversation_Speech_Activity_Template = 10006, + + /// + /// Your participant identifier in the current conversation. + /// Added in version 1.13.0 + /// + Conversation_ParticipantId = 10007, + + // If specified as true, request that the service send MessageStatus payloads via the ActivityReceived event + // handler. These messages communicate the outcome of ITurnContext resolution from the dialog system. + // Added in version 1.14.0. + Conversation_Request_Bot_Status_Messages = 10008, + + // Additional identifying information, such as a Direct Line token, used to authenticate with the backend service. + // Added in version 1.16.0. + Conversation_Connection_Id = 10009, + + /// + /// The time stamp associated to data buffer written by client when using Pull/Push audio input streams. + /// The time stamp is a 64-bit value with a resolution of 90 kHz. It is the same as the presentation timestamp in an MPEG transport stream. See https://en.wikipedia.org/wiki/Presentation_timestamp + /// Added in version 1.5.0. + /// + DataBuffer_TimeStamp = 11001, + + /// + /// The user id associated to data buffer written by client when using Pull/Push audio input streams. + /// Added in version 1.5.0. + /// + DataBuffer_UserId = 11002, + + /// + /// The reference text of the audio for pronunciation evaluation. + /// For this and the following pronunciation assessment parameters, see the table + /// [Pronunciation assessment parameters](/azure/cognitive-services/speech-service/rest-speech-to-text-short#pronunciation-assessment-parameters). + /// Under normal circumstances, you shouldn't have to use this property directly. + /// Instead, use or . + /// Added in version 1.14.0 + /// + PronunciationAssessment_ReferenceText = 12001, + + /// + /// The point system for pronunciation score calibration (FivePoint or HundredMark). + /// Under normal circumstances, you shouldn't have to use this property directly. + /// Instead, use . + /// Added in version 1.14.0 + /// + PronunciationAssessment_GradingSystem = 12002, + + /// + /// The pronunciation evaluation granularity (Phoneme, Word, or FullText). + /// Under normal circumstances, you shouldn't have to use this property directly. + /// Instead, use . + /// Added in version 1.14.0 + /// + PronunciationAssessment_Granularity = 12003, + + /// + /// Defines if enable miscue calculation. + /// With this enabled, the pronounced words will be compared to the reference text, + /// and will be marked with omission/insertion based on the comparison. The default setting is False. + /// Under normal circumstances, you shouldn't have to use this property directly. + /// Instead, use . + /// Added in version 1.14.0 + /// + PronunciationAssessment_EnableMiscue = 12005, + + /// + /// The pronunciation evaluation phoneme alphabet. The valid values are "SAPI" (default) and "IPA" + /// Under normal circumstances, you shouldn't have to use this property directly. + /// Instead, use . + /// Added in version 1.20.0 + /// + PronunciationAssessment_PhonemeAlphabet = 12006, + + /// + /// The pronunciation evaluation nbest phoneme count. + /// Under normal circumstances, you shouldn't have to use this property directly. + /// Instead, use . + /// Added in version 1.20.0 + /// + PronunciationAssessment_NBestPhonemeCount = 12007, + + /// + /// Whether to enable prosody assessment. + /// Under normal circumstances, you shouldn't have to use this property directly. + /// Instead, use . + /// Added in version 1.33.0 + /// + PronunciationAssessment_EnableProsodyAssessment = 12008, + + /// + /// The json string of pronunciation assessment parameters + /// Under normal circumstances, you shouldn't have to use this property directly. + /// Instead, use . + /// Added in version 1.14.0 + /// + PronunciationAssessment_Json = 12009, + + /// + /// Pronunciation assessment parameters. + /// This property is intended to be read-only. The SDK is using it internally. + /// Added in version 1.14.0 + /// + PronunciationAssessment_Params = 12010, + + /// + /// The content topic of the pronunciation assessment. + /// Under normal circumstances, you shouldn't have to use this property directly. + /// Instead, use . + /// Added in version 1.33.0 + /// + PronunciationAssessment_ContentTopic = 12020, + + /// + /// Speaker Recognition backend API version. + /// This property is added to allow testing and use of previous versions of Speaker Recognition APIs, where applicable. + /// Added in version 1.18.0 + /// + SpeakerRecognition_Api_Version = 13001, + + /// + /// The name of a model to be used for speech translation. + /// Do not use this property directly. + /// Currently this is only valid when EmbeddedSpeechConfig is used. + /// + SpeechTranslation_ModelName = 13100, + + /// + /// This property is deprecated. + /// + SpeechTranslation_ModelKey = 13101, + + /// + /// The name of a model to be used for keyword recognition. + /// Do not use this property directly. + /// Currently this is only valid when EmbeddedSpeechConfig is used. + /// + KeywordRecognition_ModelName = 13200, + + /// + /// This property is deprecated. + /// + KeywordRecognition_ModelKey = 13201, + + /// + /// Enable the collection of embedded speech performance metrics which can + /// be used to evaluate the capability of a device to use embedded speech. + /// The collected data is included in results from specific scenarios like + /// speech recognition. + /// The default setting is "false". Note that metrics may not be available + /// from all embedded speech scenarios. + /// + EmbeddedSpeech_EnablePerformanceMetrics = 13300, + + /// + /// The pitch of the synthesized speech. + /// + SpeechSynthesisRequest_Pitch = 14001, + + /// + /// The rate of the synthesized speech. + /// + SpeechSynthesisRequest_Rate = 14002, + + /// + /// The volume of the synthesized speech. + /// + SpeechSynthesisRequest_Volume = 14003, +}; + +/// +/// Output format. +/// +enum class OutputFormat +{ + Simple = 0, + Detailed = 1 +}; + +/// +/// Removes profanity (swearing), or replaces letters of profane words with stars. +/// Added in version 1.5.0. +/// +enum class ProfanityOption +{ + /// + /// Replaces letters in profane words with star characters. + /// + Masked = 0, + /// + /// Removes profane words. + /// + Removed = 1, + /// + /// Does nothing to profane words. + /// + Raw = 2 +}; + +/// +/// Specifies the possible reasons a recognition result might be generated. +/// +enum class ResultReason +{ + /// + /// Indicates speech could not be recognized. More details can be found in the NoMatchDetails object. + /// + NoMatch = 0, + + /// + /// Indicates that the recognition was canceled. More details can be found using the CancellationDetails object. + /// + Canceled = 1, + + /// + /// Indicates the speech result contains hypothesis text. + /// + RecognizingSpeech = 2, + + /// + /// Indicates the speech result contains final text that has been recognized. + /// Speech Recognition is now complete for this phrase. + /// + RecognizedSpeech = 3, + + /// + /// Indicates the intent result contains hypothesis text and intent. + /// + RecognizingIntent = 4, + + /// + /// Indicates the intent result contains final text and intent. + /// Speech Recognition and Intent determination are now complete for this phrase. + /// + RecognizedIntent = 5, + + /// + /// Indicates the translation result contains hypothesis text and its translation(s). + /// + TranslatingSpeech = 6, + + /// + /// Indicates the translation result contains final text and corresponding translation(s). + /// Speech Recognition and Translation are now complete for this phrase. + /// + TranslatedSpeech = 7, + + /// + /// Indicates the synthesized audio result contains a non-zero amount of audio data + /// + SynthesizingAudio = 8, + + /// + /// Indicates the synthesized audio is now complete for this phrase. + /// + SynthesizingAudioCompleted = 9, + + /// + /// Indicates the speech result contains (unverified) keyword text. + /// Added in version 1.3.0 + /// + RecognizingKeyword = 10, + + /// + /// Indicates that keyword recognition completed recognizing the given keyword. + /// Added in version 1.3.0 + /// + RecognizedKeyword = 11, + + /// + /// Indicates the speech synthesis is now started + /// Added in version 1.4.0 + /// + SynthesizingAudioStarted = 12, + + /// + /// Indicates the transcription result contains hypothesis text and its translation(s) for + /// other participants in the conversation. + /// Added in version 1.8.0 + /// + TranslatingParticipantSpeech = 13, + + /// + /// Indicates the transcription result contains final text and corresponding translation(s) + /// for other participants in the conversation. Speech Recognition and Translation are now + /// complete for this phrase. + /// Added in version 1.8.0 + /// + TranslatedParticipantSpeech = 14, + + /// + /// Indicates the transcription result contains the instant message and corresponding + /// translation(s). + /// Added in version 1.8.0 + /// + TranslatedInstantMessage = 15, + + /// + /// Indicates the transcription result contains the instant message for other participants + /// in the conversation and corresponding translation(s). + /// Added in version 1.8.0 + /// + TranslatedParticipantInstantMessage = 16, + + /// + /// Indicates the voice profile is being enrolling and customers need to send more audio to create a voice profile. + /// Added in version 1.12.0 + /// + EnrollingVoiceProfile = 17, + + /// + /// The voice profile has been enrolled. + /// Added in version 1.12.0 + /// + EnrolledVoiceProfile = 18, + + /// + /// Indicates successful identification of some speakers. + /// Added in version 1.12.0 + /// + RecognizedSpeakers = 19, + + /// + /// Indicates successfully verified one speaker. + /// Added in version 1.12.0 + /// + RecognizedSpeaker = 20, + + /// + /// Indicates a voice profile has been reset successfully. + /// Added in version 1.12.0 + /// + ResetVoiceProfile = 21, + + /// + /// Indicates a voice profile has been deleted successfully. + /// Added in version 1.12.0 + /// + DeletedVoiceProfile = 22, + + /// + /// Indicates the voices list has been retrieved successfully. + /// Added in version 1.16.0 + /// + VoicesListRetrieved = 23 +}; + +/// +/// Defines the possible reasons a recognition result might be canceled. +/// +enum class CancellationReason +{ + /// + /// Indicates that an error occurred during speech recognition. + /// + Error = 1, + + /// + /// Indicates that the end of the audio stream was reached. + /// + EndOfStream = 2, + + /// + /// Indicates that request was cancelled by the user. + /// Added in version 1.14.0 + /// + CancelledByUser = 3, +}; + +/// +/// Defines error code in case that CancellationReason is Error. +/// Added in version 1.1.0. +/// +enum class CancellationErrorCode +{ + /// + /// No error. + /// If CancellationReason is EndOfStream, CancellationErrorCode + /// is set to NoError. + /// + NoError = 0, + + /// + /// Indicates an authentication error. + /// An authentication error occurs if subscription key or authorization token is invalid, expired, + /// or does not match the region being used. + /// + AuthenticationFailure = 1, + + /// + /// Indicates that one or more recognition parameters are invalid or the audio format is not supported. + /// + BadRequest = 2, + + /// + /// Indicates that the number of parallel requests exceeded the number of allowed concurrent transcriptions for the subscription. + /// + TooManyRequests = 3, + + /// + /// Indicates that the free subscription used by the request ran out of quota. + /// + Forbidden = 4, + + /// + /// Indicates a connection error. + /// + ConnectionFailure = 5, + + /// + /// Indicates a time-out error when waiting for response from service. + /// + ServiceTimeout = 6, + + /// + /// Indicates that an error is returned by the service. + /// + ServiceError = 7, + + /// + /// Indicates that the service is currently unavailable. + /// + ServiceUnavailable = 8, + + /// + /// Indicates an unexpected runtime error. + /// + RuntimeError = 9, + + /// + /// Indicates the Speech Service is temporarily requesting a reconnect to a different endpoint. + /// + /// Used internally + ServiceRedirectTemporary = 10, + + /// + /// Indicates the Speech Service is permanently requesting a reconnect to a different endpoint. + /// + /// Used internally + ServiceRedirectPermanent = 11, + + /// + /// Indicates the embedded speech (SR or TTS) model is not available or corrupted. + /// + EmbeddedModelError = 12, +}; + +/// +/// Defines the possible reasons a recognition result might not be recognized. +/// +enum class NoMatchReason +{ + /// + /// Indicates that speech was detected, but not recognized. + /// + NotRecognized = 1, + + /// + /// Indicates that the start of the audio stream contained only silence, and the service timed out waiting for speech. + /// + InitialSilenceTimeout = 2, + + /// + /// Indicates that the start of the audio stream contained only noise, and the service timed out waiting for speech. + /// + InitialBabbleTimeout = 3, + + /// + /// Indicates that the spotted keyword has been rejected by the keyword verification service. + /// Added in version 1.5.0. + /// + KeywordNotRecognized = 4, + + /// + /// Indicates that the audio stream contained only silence after the last recognized phrase. + /// + EndSilenceTimeout = 5 +}; + +/// +/// Defines the possible types for an activity json value. +/// Added in version 1.5.0 +/// +enum class ActivityJSONType : int +{ + Null = 0, + Object = 1, + Array = 2, + String = 3, + Double = 4, + UInt = 5, + Int = 6, + Boolean = 7 +}; + + +/// +/// Defines the possible speech synthesis output audio formats. +/// Updated in version 1.19.0 +/// +enum class SpeechSynthesisOutputFormat +{ + /// + /// raw-8khz-8bit-mono-mulaw + /// + Raw8Khz8BitMonoMULaw = 1, + + /// + /// riff-16khz-16kbps-mono-siren + /// Unsupported by the service. Do not use this value. + /// + Riff16Khz16KbpsMonoSiren = 2, + + /// + /// audio-16khz-16kbps-mono-siren + /// Unsupported by the service. Do not use this value. + /// + Audio16Khz16KbpsMonoSiren = 3, + + /// + /// audio-16khz-32kbitrate-mono-mp3 + /// + Audio16Khz32KBitRateMonoMp3 = 4, + + /// + /// audio-16khz-128kbitrate-mono-mp3 + /// + Audio16Khz128KBitRateMonoMp3 = 5, + + /// + /// audio-16khz-64kbitrate-mono-mp3 + /// + Audio16Khz64KBitRateMonoMp3 = 6, + + /// + /// audio-24khz-48kbitrate-mono-mp3 + /// + Audio24Khz48KBitRateMonoMp3 =7, + + /// + /// audio-24khz-96kbitrate-mono-mp3 + /// + Audio24Khz96KBitRateMonoMp3 = 8, + + /// + /// audio-24khz-160kbitrate-mono-mp3 + /// + Audio24Khz160KBitRateMonoMp3 = 9, + + /// + /// raw-16khz-16bit-mono-truesilk + /// + Raw16Khz16BitMonoTrueSilk = 10, + + /// + /// riff-16khz-16bit-mono-pcm + /// + Riff16Khz16BitMonoPcm = 11, + + /// + /// riff-8khz-16bit-mono-pcm + /// + Riff8Khz16BitMonoPcm = 12, + + /// + /// riff-24khz-16bit-mono-pcm + /// + Riff24Khz16BitMonoPcm = 13, + + /// + /// riff-8khz-8bit-mono-mulaw + /// + Riff8Khz8BitMonoMULaw = 14, + + /// + /// raw-16khz-16bit-mono-pcm + /// + Raw16Khz16BitMonoPcm = 15, + + /// + /// raw-24khz-16bit-mono-pcm + /// + Raw24Khz16BitMonoPcm = 16, + + /// + /// raw-8khz-16bit-mono-pcm + /// + Raw8Khz16BitMonoPcm = 17, + + /// + /// ogg-16khz-16bit-mono-opus + /// + Ogg16Khz16BitMonoOpus = 18, + + /// + /// ogg-24khz-16bit-mono-opus + /// + Ogg24Khz16BitMonoOpus = 19, + + /// + /// raw-48khz-16bit-mono-pcm + /// + Raw48Khz16BitMonoPcm = 20, + + /// + /// riff-48khz-16bit-mono-pcm + /// + Riff48Khz16BitMonoPcm = 21, + + /// + /// audio-48khz-96kbitrate-mono-mp3 + /// + Audio48Khz96KBitRateMonoMp3 = 22, + + /// + /// audio-48khz-192kbitrate-mono-mp3 + /// + Audio48Khz192KBitRateMonoMp3 = 23, + + /// + /// ogg-48khz-16bit-mono-opus + /// Added in version 1.16.0 + /// + Ogg48Khz16BitMonoOpus = 24, + + /// + /// webm-16khz-16bit-mono-opus + /// Added in version 1.16.0 + /// + Webm16Khz16BitMonoOpus = 25, + + /// + /// webm-24khz-16bit-mono-opus + /// Added in version 1.16.0 + /// + Webm24Khz16BitMonoOpus = 26, + + /// + /// raw-24khz-16bit-mono-truesilk + /// Added in version 1.17.0 + /// + Raw24Khz16BitMonoTrueSilk = 27, + + /// + /// raw-8khz-8bit-mono-alaw + /// Added in version 1.17.0 + /// + Raw8Khz8BitMonoALaw = 28, + + /// + /// riff-8khz-8bit-mono-alaw + /// Added in version 1.17.0 + /// + Riff8Khz8BitMonoALaw = 29, + + /// + /// webm-24khz-16bit-24kbps-mono-opus + /// Audio compressed by OPUS codec in a WebM container, with bitrate of 24kbps, optimized for IoT scenario. + /// (Added in 1.19.0) + /// + Webm24Khz16Bit24KbpsMonoOpus = 30, + + /// + /// audio-16khz-16bit-32kbps-mono-opus + /// Audio compressed by OPUS codec without container, with bitrate of 32kbps. + /// (Added in 1.20.0) + /// + Audio16Khz16Bit32KbpsMonoOpus = 31, + + /// + /// audio-24khz-16bit-48kbps-mono-opus + /// Audio compressed by OPUS codec without container, with bitrate of 48kbps. + /// (Added in 1.20.0) + /// + Audio24Khz16Bit48KbpsMonoOpus = 32, + + /// + /// audio-24khz-16bit-24kbps-mono-opus + /// Audio compressed by OPUS codec without container, with bitrate of 24kbps. + /// (Added in 1.20.0) + /// + Audio24Khz16Bit24KbpsMonoOpus = 33, + + /// + /// raw-22050hz-16bit-mono-pcm + /// Raw PCM audio at 22050Hz sampling rate and 16-bit depth. + /// (Added in 1.22.0) + /// + Raw22050Hz16BitMonoPcm = 34, + + /// + /// riff-22050hz-16bit-mono-pcm + /// PCM audio at 22050Hz sampling rate and 16-bit depth, with RIFF header. + /// (Added in 1.22.0) + /// + Riff22050Hz16BitMonoPcm = 35, + + /// + /// raw-44100hz-16bit-mono-pcm + /// Raw PCM audio at 44100Hz sampling rate and 16-bit depth. + /// (Added in 1.22.0) + /// + Raw44100Hz16BitMonoPcm = 36, + + /// + /// riff-44100hz-16bit-mono-pcm + /// PCM audio at 44100Hz sampling rate and 16-bit depth, with RIFF header. + /// (Added in 1.22.0) + /// + Riff44100Hz16BitMonoPcm = 37, + + /// + /// amr-wb-16000hz + /// AMR-WB audio at 16kHz sampling rate. + /// (Added in 1.24.0) + /// + AmrWb16000Hz = 38, + + /// + /// g722-16khz-64kbps + /// G.722 audio at 16kHz sampling rate and 64kbps bitrate. + /// (Added in 1.38.0) + /// + G72216Khz64Kbps = 39 +}; + +/// +/// Defines the possible status of audio data stream. +/// Added in version 1.4.0 +/// +enum class StreamStatus +{ + /// + /// The audio data stream status is unknown + /// + Unknown = 0, + + /// + /// The audio data stream contains no data + /// + NoData = 1, + + /// + /// The audio data stream contains partial data of a speak request + /// + PartialData = 2, + + /// + /// The audio data stream contains all data of a speak request + /// + AllData = 3, + + /// + /// The audio data stream was canceled + /// + Canceled = 4 +}; + +/// +/// Defines channels used to pass property settings to service. +/// Added in version 1.5.0. +/// +enum class ServicePropertyChannel +{ + /// + /// Uses URI query parameter to pass property settings to service. + /// + UriQueryParameter = 0, + + /// + /// Uses HttpHeader to set a key/value in a HTTP header. + /// + HttpHeader = 1 +}; + +namespace Transcription +{ + /// + /// Why the participant changed event was raised + /// Added in version 1.8.0 + /// + enum class ParticipantChangedReason + { + /// + /// Participant has joined the conversation + /// + JoinedConversation = 0, + + /// + /// Participant has left the conversation. This could be voluntary, or involuntary + /// (e.g. they are experiencing networking issues) + /// + LeftConversation = 1, + + /// + /// The participants' state has changed (e.g. they became muted, changed their nickname) + /// + Updated = 2 + }; +} + +namespace Intent +{ + /// + /// Used to define the type of entity used for intent recognition. + /// + enum class EntityType + { + /// + /// This will match any text that fills the slot. + /// + Any = 0, + /// + /// This will match text that is contained within the list or any text if the mode is set to "fuzzy". + /// + List = 1, + /// + /// This will match cardinal and ordinal integers. + /// + PrebuiltInteger = 2 + }; + + /// + /// Used to define the type of entity used for intent recognition. + /// + enum class EntityMatchMode + { + /// + /// This is the basic or default mode of matching based on the EntityType + /// + Basic = 0, + /// + /// This will match only exact matches within the entities phrases. + /// + Strict = 1, + /// + /// This will match text within the slot the entity is in, but not require anything from that text. + /// + Fuzzy = 2 + }; + + /// + /// Used to define the greediness of the entity. + /// + enum class EntityGreed + { + /// + /// Lazy will match as little as possible. + /// + Lazy = 0, + /// + /// Greedy will match as much as possible. + /// + Greedy = 1, + }; +} +/// +/// Defines voice profile types +/// +enum class VoiceProfileType +{ + /// + /// Text independent speaker identification. + /// + TextIndependentIdentification = 1, + + /// + /// Text dependent speaker verification. + /// + TextDependentVerification = 2, + + /// + /// Text independent verification. + /// + TextIndependentVerification = 3 +}; + +/// +/// Defines the scope that a Recognition Factor is applied to. +/// +enum class RecognitionFactorScope +{ + /// + /// A Recognition Factor will apply to grammars that can be referenced as individual partial phrases. + /// + /// + /// Currently only applies to PhraseListGrammars + /// + PartialPhrase = 1, +}; + +/// +/// Defines the point system for pronunciation score calibration; default value is FivePoint. +/// Added in version 1.14.0 +/// +enum class PronunciationAssessmentGradingSystem +{ + /// + /// Five point calibration + /// + FivePoint = 1, + + /// + /// Hundred mark + /// + HundredMark = 2 +}; + +/// +/// Defines the pronunciation evaluation granularity; default value is Phoneme. +/// Added in version 1.14.0 +/// +enum class PronunciationAssessmentGranularity +{ + /// + /// Shows the score on the full text, word and phoneme level + /// + Phoneme = 1, + + /// + /// Shows the score on the full text and word level + /// + Word = 2, + + /// + /// Shows the score on the full text level only + /// + FullText = 3 +}; + +/// +/// Defines the type of synthesis voices +/// Added in version 1.16.0 +/// +enum class SynthesisVoiceType +{ + /// + /// Online neural voice + /// + OnlineNeural = 1, + + /// + /// Online standard voice + /// + OnlineStandard = 2, + + /// + /// Offline neural voice + /// + OfflineNeural = 3, + + /// + /// Offline standard voice + /// + OfflineStandard = 4 +}; + +/// +/// Defines the gender of synthesis voices +/// Added in version 1.17.0 +/// +enum class SynthesisVoiceGender +{ + /// + /// Gender unknown. + /// + Unknown = 0, + + /// + /// Female voice + /// + Female = 1, + + /// + /// Male voice + /// + Male = 2 +}; + +/// +/// Defines the boundary type of speech synthesis boundary event +/// Added in version 1.21.0 +/// +enum class SpeechSynthesisBoundaryType +{ + /// + /// Word boundary + /// + Word = 0, + + /// + /// Punctuation boundary + /// + Punctuation = 1, + + /// + /// Sentence boundary + /// + Sentence = 2 +}; + +/// +/// The strategy used to determine when a spoken phrase has ended and a final Recognized result should be generated. +/// Allowed values are "Default", "Time", and "Semantic". +/// +enum class SegmentationStrategy +{ + /// + /// Use the default strategy and settings as determined by the Speech Service. Use in most situations. + /// + Default = 0, + + /// + /// Uses a time based strategy where the amount of silence between speech is used to determine when to generate a final result. + /// + /// + /// When using the time strategy, the property can be used to adjust the amount of silence needed to determine the end of a spoken phrase, + /// and the property can be used to adjust the maximum length of a spoken phrase. + /// + Time = 1, + + /// + /// Uses an AI model to deterine the end of a spoken phrase based on the content of the phrase. + /// + /// + /// The semantic strategy has no control properties available. + /// + Semantic = 2 +}; + +} } } // Microsoft::CognitiveServices::Speech diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_event_logger.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_event_logger.h new file mode 100644 index 0000000..4e67cb1 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_event_logger.h @@ -0,0 +1,108 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Diagnostics { +namespace Logging { + +/// +/// Class with static methods to control callback-based SDK logging. +/// Turning on logging while running your Speech SDK scenario provides +/// detailed information from the SDK's core native components. If you +/// report an issue to Microsoft, you may be asked to provide logs to help +/// Microsoft diagnose the issue. Your application should not take dependency +/// on particular log strings, as they may change from one SDK release to another +/// without notice. +/// Use EventLogger when you want to get access to new log strings as soon +/// as they are available, and you need to further process them. For example, +/// integrating Speech SDK logs with your existing logging collection system. +/// Added in version 1.20.0 +/// +/// Event logging is a process wide construct. That means that if (for example) +/// you have multiple speech recognizer objects running in parallel, you can only register +/// one callback function to receive interleaved logs from all recognizers. You cannot register +/// a separate callback for each recognizer. +class EventLogger +{ +public: + using CallbackFunction_Type = ::std::function; + + /// + /// Register a callback function that will be invoked for each new log messages. + /// + /// callback function to call. Set a nullptr value + /// to stop the Event Logger. + /// You can only register one callback function. This call will happen on a working thread of the SDK, + /// so the log string should be copied somewhere for further processing by another thread, and the function should return immediately. + /// No heavy processing or network calls should be done in this callback function. + static void SetCallback(CallbackFunction_Type callback = nullptr) + { + AZAC_THROW_ON_FAIL(diagnostics_logmessage_set_callback(nullptr == callback ? nullptr : LineLogged)); + + SetOrGet(true, callback); + } + + /// + /// Sets or clears filters for callbacks. + /// Once filters are set, the callback will be invoked only if the log string + /// contains at least one of the strings specified by the filters. The match is case sensitive. + /// + /// Optional. Filters to use, or an empty list to clear previously set filters + static void SetFilters(std::initializer_list filters = {}) + { + std::string str = ""; + + if (filters.size() > 0) + { + std::ostringstream filtersCollapsed; + std::copy(filters.begin(), filters.end(), std::ostream_iterator(filtersCollapsed, ";")); + str = filtersCollapsed.str(); + } + + AZAC_THROW_ON_FAIL(diagnostics_logmessage_set_filters(str.c_str())); + } + + /// + /// Sets the level of the messages to be captured by the logger + /// + /// Maximum level of detail to be captured by the logger. + static void SetLevel(Level level) + { + const auto levelStr = Details::LevelToString(level); + diagnostics_set_log_level("event", levelStr); + } + +private: + static CallbackFunction_Type SetOrGet(bool set, CallbackFunction_Type callback) + { + static CallbackFunction_Type staticCallback = nullptr; + if (set) + { + staticCallback = callback; + } + return staticCallback; + } + + static void LineLogged(const char* line) + { + auto callback = SetOrGet(false, nullptr); + if (nullptr != callback) + { + callback(line); + } + } +}; +}}}}} diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_eventargs.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_eventargs.h new file mode 100644 index 0000000..8142268 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_eventargs.h @@ -0,0 +1,47 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_eventargs.h: Public API declarations for EventArgs C++ base class +// + +#pragma once +#include +#include + + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + + +/// +/// Base class for event arguments. +/// +class EventArgs +{ +public: + + /// + /// Destructor. + /// + virtual ~EventArgs() {} + +protected: + + /*! \cond PROTECTED */ + + /// + /// Constructor. + /// + EventArgs() {}; + + /*! \endcond */ + +private: + + DISABLE_COPY_AND_MOVE(EventArgs); +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_eventsignal.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_eventsignal.h new file mode 100644 index 0000000..5c544a6 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_eventsignal.h @@ -0,0 +1,202 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_eventsignal.h: Public API declarations for the EventSignal class. This derives from +// EventSignalBase and uses runtime type information (RTTI) to facilitate management and disconnection of handlers +// without explicit callback token management. +// + +#pragma once +#include +#include +#include +#include + +#include + +// TODO: TFS#3671067 - Vision: Consider moving majority of EventSignal to AI::Core::Details namespace, and refactoring Vision::Core::Events to inherit, and relay to private base + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// Clients can connect to the event signal to receive events, or disconnect from the event signal to stop receiving events. +/// +/// +/// At construction time, connect and disconnect callbacks can be provided that are called when +/// the number of connected clients changes from zero to one or one to zero, respectively. +/// +// +template +class EventSignal : public EventSignalBase +{ +public: + /// + /// Callback type that is used for signalling the event to connected clients. + /// + using CallbackFunction = std::function; + + /// + /// A monotonically increasing token used for registration, tracking, and unregistration of callbacks. + /// + using CallbackToken = uint32_t; + + /// + /// Type for callbacks used when any client connects to the signal (the number of connected clients changes from zero to one) or + /// the last client disconnects from the signal (the number of connected clients changes from one to zero). + /// + using NotifyCallback_Type = std::function&)>; + + /// + /// Constructs an event signal with empty register and disconnect callbacks. + /// + EventSignal() : EventSignal(nullptr) + { + } + + /// + /// Constructor. + /// + /// Callback to invoke if the number of connected clients changes from zero to one, or one to zero + EventSignal(NotifyCallback_Type connectedAndDisconnected) + : EventSignal(connectedAndDisconnected, connectedAndDisconnected) + { + } + + /// + /// Constructor. + /// + /// Callback to invoke if the number of connected clients changes from zero to one. + /// Callback to invoke if the number of connected clients changes from one to zero. + EventSignal(NotifyCallback_Type connected, NotifyCallback_Type disconnected) + : EventSignalBase() + , m_firstConnectedCallback(connected) + , m_lastDisconnectedCallback(disconnected) + { + } + + /// + /// Addition assignment operator overload. + /// Connects the provided callback to the event signal, see also . + /// + /// Callback to connect. + /// Event signal reference. + EventSignal& operator+=(CallbackFunction callback) + { + Connect(callback); + return *this; + } + + /// + /// Subtraction assignment operator overload. + /// Disconnects the provided callback from the event signal, see also . + /// + /// Callback to disconnect. + /// Event signal reference. + EventSignal& operator-=(CallbackFunction callback) + { + Disconnect(callback); + return *this; + } + + /// + /// Connects given callback function to the event signal, to be invoked when the event is signalled. + /// + /// + /// When the number of connected clients changes from zero to one, the connect callback will be called, if provided. + /// + /// Callback to connect. + void Connect(CallbackFunction callback) + { + std::unique_lock lock(m_mutex); + + auto shouldFireFirstConnected = m_callbacks.empty() && m_firstConnectedCallback != nullptr; + + (void)EventSignalBase::RegisterCallback(callback); + + lock.unlock(); + + if (shouldFireFirstConnected) + { + m_firstConnectedCallback(*this); + } + } + +#ifndef AZAC_CONFIG_CXX_NO_RTTI + /// + /// Disconnects given callback. + /// + /// + /// When the number of connected clients changes from one to zero, the disconnect callback will be called, if provided. + /// + /// Callback function. + void Disconnect(CallbackFunction callback) + { + std::unique_lock lock(m_mutex); + + auto itMatchingCallback = std::find_if( + m_callbacks.begin(), + m_callbacks.end(), + [&](const std::pair& item) + { + return callback.target_type() == item.second.target_type(); + }); + + auto removeHappened = EventSignal::UnregisterCallback(itMatchingCallback->first); + lock.unlock(); + if (removeHappened && m_callbacks.empty() && m_lastDisconnectedCallback != nullptr) + { + m_lastDisconnectedCallback(*this); + } + } +#else + void Disconnect(CallbackFunction) + { + // Callback disconnection without a stored token requires runtime type information. + // To remove callbacks with RTTI disabled, use UnregisterCallback(token). + SPX_THROW_HR(SPXERR_NOT_IMPL); + } +#endif + + /// + /// Disconnects all registered callbacks. + /// + void DisconnectAll() + { + std::unique_lock lock(m_mutex); + auto shouldFireLastDisconnected = !m_callbacks.empty() && m_lastDisconnectedCallback != nullptr; + + EventSignal::UnregisterAllCallbacks(); + + lock.unlock(); + + if (shouldFireLastDisconnected) + { + m_lastDisconnectedCallback(*this); + } + } + + /// + /// Signals the event with given arguments to all connected callbacks. + /// + /// Event arguments to signal. + void Signal(T t) + { + EventSignalBase::Signal(t); + } + +private: + using EventSignalBase::m_mutex; + using EventSignalBase::m_callbacks; + + NotifyCallback_Type m_firstConnectedCallback; + NotifyCallback_Type m_lastDisconnectedCallback; + + EventSignal(const EventSignal&) = delete; + EventSignal(const EventSignal&&) = delete; + EventSignal& operator=(const EventSignal&) = delete; +}; + +} } } // Microsoft::CognitiveServices::Speech diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_eventsignalbase.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_eventsignalbase.h new file mode 100644 index 0000000..0a0de7e --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_eventsignalbase.h @@ -0,0 +1,166 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_eventsignalbase.h: Public API declarations for EventSignalBase C++ template class +// + +#pragma once +#include +#include +#include +#include +#include + +// TODO: TFS#3671067 - Vision: Consider moving majority of EventSignal to AI::Core::Details namespace, and refactoring Vision::Core::Events to inherit, and relay to private base + +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// Clients can connect to the event signal to receive events, or disconnect from the event signal to stop receiving events. +/// +/// +/// At construction time, connect and disconnect callbacks can be provided that are called when +/// the number of connected clients changes from zero to one or one to zero, respectively. +/// +// +template +class EventSignalBase +{ +public: + /// + /// Constructs an event signal with empty connect and disconnect actions. + /// + EventSignalBase() : + m_nextCallbackToken(0) + { + } + + /// + /// Destructor. + /// + virtual ~EventSignalBase() + { + UnregisterAllCallbacks(); + } + + /// + /// Callback type that is used for signalling the event to connected clients. + /// + using CallbackFunction = std::function; + + /// + /// The argument type for the callback event + /// + using CallbackArgument = T; + + /// + /// A monotonically increasing token used for registration, tracking, and unregistration of callbacks. + /// + using CallbackToken = uint32_t; + + /// + /// Registers a callback to this EventSignalBase and assigns it a unique token. + /// + /// The callback to register. + /// + /// The new token associated with this registration that can be used for subsequent unregistration. + /// + CallbackToken RegisterCallback(CallbackFunction callback) + { + std::unique_lock lock(m_mutex); + + auto token = m_nextCallbackToken; + m_nextCallbackToken++; + + m_callbacks.emplace(token, callback); + + return token; + } + + /// + /// If present, unregisters a callback from this EventSource associated with the provided token. Tokens are + /// returned from RegisterCallback at the time of registration. + /// + /// + /// The token associated with the callback to be removed. This token is provided by the return value of + /// RegisterCallback at the time of registration. + /// + /// A value indicating whether any callback was unregistered in response to this request. + bool UnregisterCallback(CallbackToken token) + { + std::unique_lock lock(m_mutex); + return (bool)m_callbacks.erase(token); + } + + /// + /// Function call operator. + /// Signals the event with given arguments to connected clients, see also . + /// + /// Event arguments to signal. + void operator()(T t) + { + Signal(t); + } + + /// + /// Unregisters all registered callbacks. + /// + void UnregisterAllCallbacks() + { + std::unique_lock lock(m_mutex); + m_callbacks.clear(); + } + + /// + /// Signals the event with given arguments to all connected callbacks. + /// + /// Event arguments to signal. + void Signal(T t) + { + std::unique_lock lock(m_mutex); + + auto callbacksSnapshot = m_callbacks; + for (auto callbackCopyPair : callbacksSnapshot) + { + // now, while a callback is in progress, it can disconnect itself and any other connected + // callback. Check to see if the next one stored in the copy container is still connected. + bool stillConnected = (std::find_if(m_callbacks.begin(), m_callbacks.end(), + [&](const std::pair item) { + return callbackCopyPair.first == item.first; + }) != m_callbacks.end()); + + if (stillConnected) + { + callbackCopyPair.second(t); + } + } + } + + /// + /// Checks if a callback is connected. + /// + /// true if a callback is connected + bool IsConnected() const + { + std::unique_lock lock(m_mutex); + return !m_callbacks.empty(); + } + +protected: + std::map m_callbacks; + CallbackToken m_nextCallbackToken; + mutable std::recursive_mutex m_mutex; + +private: + EventSignalBase(const EventSignalBase&) = delete; + EventSignalBase(const EventSignalBase&&) = delete; + EventSignalBase& operator=(const EventSignalBase&) = delete; +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_file_logger.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_file_logger.h new file mode 100644 index 0000000..7638d09 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_file_logger.h @@ -0,0 +1,115 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Diagnostics { +namespace Logging { + +/// +/// Class with static methods to control file-based SDK logging. +/// Turning on logging while running your Speech SDK scenario provides +/// detailed information from the SDK's core native components. If you +/// report an issue to Microsoft, you may be asked to provide logs to help +/// Microsoft diagnose the issue. Your application should not take dependency +/// on particular log strings, as they may change from one SDK release to another +/// without notice. +/// FileLogger is the simplest logging solution and suitable for diagnosing +/// most on-device issues when running Speech SDK. +/// Added in version 1.20.0 +/// +/// File logging is a process wide construct. That means that if (for example) +/// you have multiple speech recognizer objects running in parallel, there will be one +/// log file containing interleaved logs lines from all recognizers. You cannot get a +/// separate log file for each recognizer. +class FileLogger +{ +public: + /// + /// Starts logging to a file. + /// + /// Path to a log file on local disk + /// Optional. If true, appends to existing log file. If false, creates a new log file + /// Note that each write operation to the file is immediately followed by a flush to disk. + /// For typical usage (e.g. one Speech Recognizer and a Solid State Drive (SSD)) this should not + /// cause performace issues. You may however want to avoid file logging when running many Speech + /// SDK recognizers or other SDK objects simultaneously. Use MemoryLogger or EventLogger instead. + static void Start(const SPXSTRING& filePath, bool append = false) + { + SPXPROPERTYBAGHANDLE hpropbag = SPXHANDLE_INVALID; + + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, filePath.empty()); + + SPX_THROW_ON_FAIL(property_bag_create(&hpropbag)); + SPX_THROW_ON_FAIL(property_bag_set_string(hpropbag, -1, "SPEECH-LogFilename", Utils::ToUTF8(filePath).c_str())); + SPX_THROW_ON_FAIL(property_bag_set_string(hpropbag, -1, "SPEECH-AppendToLogFile", append ? "1" : "0")); + SPX_THROW_ON_FAIL(diagnostics_log_start_logging(hpropbag, nullptr)); + SPX_THROW_ON_FAIL(property_bag_release(hpropbag)); + } + + /// + /// Stops logging to a file. + /// + /// This call is optional. If logging as been started, + /// the log file will be written when the process exists normally. + static void Stop() + { + SPX_THROW_ON_FAIL(diagnostics_log_stop_logging()); + } + + /// + /// Sets or clears the filters that apply to file logging. + /// Once filters are set, the callback will be invoked only if the log string + /// contains at least one of the strings specified by the filters. The match is case sensitive. + /// + /// Optional. Filters to use, or an empty list to remove previously set filters. + static void SetFilters(std::initializer_list filters = {}) + { + SPXPROPERTYBAGHANDLE hpropbag = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(property_bag_create(&hpropbag)); + + PropBagSetFilter(hpropbag, filters); + + SPX_THROW_ON_FAIL(diagnostics_log_apply_properties(hpropbag, nullptr)); + SPX_THROW_ON_FAIL(property_bag_release(hpropbag)); + } + + /// + /// Sets the level of the messages to be captured by the logger + /// + /// Maximum level of detail to be captured by the logger. + static void SetLevel(Level level) + { + const auto levelStr = Details::LevelToString(level); + diagnostics_set_log_level("memory", levelStr); + } + +private: + static void PropBagSetFilter(AZAC_HANDLE hpropbag, std::initializer_list filters) + { + std::string str = ""; + + if (filters.size() > 0) + { + std::ostringstream filtersCollapsed; + std::copy(filters.begin(), filters.end(), std::ostream_iterator(filtersCollapsed, ";")); + str = filtersCollapsed.str(); + } + + SPX_THROW_ON_FAIL(property_bag_set_string(hpropbag, -1, "SPEECH-LogFileFilters", str.c_str())); + } +}; + +}}}}} diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_grammar.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_grammar.h new file mode 100644 index 0000000..056e0a1 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_grammar.h @@ -0,0 +1,70 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_grammar.h: Public API declarations for Grammar C++ class +// + +#pragma once +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + + +/// +/// Represents base class grammar for customizing speech recognition. +/// Added in version 1.5.0. +/// +class Grammar +{ +public: + + /// + /// Creates a grammar from a storage ID. + /// Added in version 1.7.0. + /// + /// The persisted storage ID of the language model. + /// The grammar. + /// + /// Creating a grammar from a storage ID is only usable in specific scenarios and is not generally possible. + /// + static std::shared_ptr FromStorageId(const SPXSTRING& storageId) + { + SPXGRAMMARHANDLE hgrammar = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(grammar_create_from_storage_id(&hgrammar, Utils::ToUTF8(storageId.c_str()))); + + return std::make_shared(hgrammar); + } + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// Grammar handle. + explicit Grammar(SPXGRAMMARHANDLE hgrammar = SPXHANDLE_INVALID) : m_hgrammar(hgrammar) { } + + /// + /// Destructor, does nothing. + /// + virtual ~Grammar() { } + + /// + /// Internal. Explicit conversion operator. + /// + /// A handle. + explicit operator SPXGRAMMARHANDLE() { return m_hgrammar; } + +protected: + /*! \cond PROTECTED */ + DISABLE_COPY_AND_MOVE(Grammar); + + SmartHandle m_hgrammar; + /*! \endcond */ +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_grammar_list.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_grammar_list.h new file mode 100644 index 0000000..1118bca --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_grammar_list.h @@ -0,0 +1,90 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_grammar_list.h: Public API declarations for GrammarList C++ class +// + +#pragma once +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + + +/// +/// Represents a list of grammars for dynamic grammar scenarios. +/// Added in version 1.7.0. +/// +/// +/// GrammarLists are only usable in specific scenarios and are not generally available. +/// +class GrammarList : public Grammar +{ +public: + + /// + /// Creates a grammar lsit for the specified recognizer. + /// + /// The recognizer from which to obtain the grammar list. + /// The grammar list associated with the recognizer. + /// + /// Creating a grammar list from a recognizer is only usable in specific scenarios and is not generally available. + /// + template + static std::shared_ptr FromRecognizer(std::shared_ptr recognizer) + { + SPXRECOHANDLE hreco = recognizer != nullptr + ? (SPXRECOHANDLE)(*recognizer.get()) + : SPXHANDLE_INVALID; + + SPXGRAMMARHANDLE hgrammar = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(grammar_list_from_recognizer(&hgrammar, hreco)); + + return std::make_shared(hgrammar); + } + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// GrammarList handle. + explicit GrammarList(SPXGRAMMARHANDLE hgrammar = SPXHANDLE_INVALID) : Grammar(hgrammar) { } + + /// + /// Adds a single grammar to the current grammar list + /// + /// The grammar to add + /// + /// Currently Class Language Models are the only support grammars to add. + /// + template + void Add(std::shared_ptr grammar) + { + SPX_THROW_ON_FAIL(grammar_list_add_grammar(m_hgrammar.get(), (SPXGRAMMARHANDLE)(*grammar.get()))); + } + + /// + /// Sets the Recognition Factor applied to all grammars in a recognizer's GrammarList + /// + /// The RecognitionFactor to apply + /// The scope for the Recognition Factor being set + /// + /// The Recognition Factor is a numerical value greater than 0 modifies the default weight applied to supplied grammars. + /// Setting the Recognition Factor to 0 will disable the supplied grammars. + /// The default Recognition Factor is 1. + /// + void SetRecognitionFactor(double factor, RecognitionFactorScope scope) + { + SPX_THROW_ON_FAIL(grammar_list_set_recognition_factor(m_hgrammar.get(), factor, (GrammarList_RecognitionFactorScope)scope)); + } + +private: + DISABLE_COPY_AND_MOVE(GrammarList); +}; + +} } } // Microsoft::CognitiveServices::Speech diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_grammar_phrase.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_grammar_phrase.h new file mode 100644 index 0000000..2c35b9d --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_grammar_phrase.h @@ -0,0 +1,64 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_phrase_grammar.h: Public API declarations for GrammarPhrase C++ class +// + +#pragma once +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + + +/// +/// Represents a phrase that may be spoken by the user. +/// Added in version 1.5.0. +/// +class GrammarPhrase +{ +public: + + /// + /// Creates a grammar phrase using the specified phrase text. + /// + /// The text representing a phrase that may be spoken by the user. + /// A shared pointer to a grammar phrase. + static std::shared_ptr From(const SPXSTRING& text) + { + SPXPHRASEHANDLE hphrase = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(grammar_phrase_create_from_text(&hphrase, Utils::ToUTF8(text).c_str())); + return std::make_shared(hphrase); + } + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// Grammar phrase handle. + explicit GrammarPhrase(SPXPHRASEHANDLE hphrase) : m_hphrase(hphrase) { }; + + /// + /// Virtual destructor + /// + virtual ~GrammarPhrase() { } + + /// + /// Internal. Explicit conversion operator. + /// + /// A handle. + explicit operator SPXPHRASEHANDLE() { return m_hphrase; } + +private: + + DISABLE_DEFAULT_CTORS(GrammarPhrase); + + SmartHandle m_hphrase; +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_hybrid_speech_config.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_hybrid_speech_config.h new file mode 100644 index 0000000..39ca52e --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_hybrid_speech_config.h @@ -0,0 +1,161 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_hybrid_speech_config.h: Public API declarations for HybridSpeechConfig C++ class +// +#pragma once + +#include + +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// Class that defines hybrid (cloud and embedded) configurations for speech recognition or speech synthesis. +/// +class HybridSpeechConfig +{ +protected: + /*! \cond PROTECTED */ + + SpeechConfig m_config; + + /*! \endcond */ + +public: + /// + /// Internal operator used to get the underlying handle value. + /// + /// A handle. + explicit operator SPXSPEECHCONFIGHANDLE() const + { + return static_cast(m_config); + } + + /// + /// Creates an instance of the hybrid speech config with specified cloud and embedded speech configs. + /// + /// A shared smart pointer of a cloud speech config. + /// A shared smart pointer of an embedded speech config. + /// A shared pointer to the new hybrid speech config instance. + static std::shared_ptr FromConfigs( + std::shared_ptr cloudSpeechConfig, + std::shared_ptr embeddedSpeechConfig) + { + SPXSPEECHCONFIGHANDLE hconfig = SPXHANDLE_INVALID; + + SPX_THROW_ON_FAIL(hybrid_speech_config_create( + &hconfig, + Utils::HandleOrInvalid(cloudSpeechConfig), + Utils::HandleOrInvalid(embeddedSpeechConfig))); + + auto ptr = new HybridSpeechConfig(hconfig); + return std::shared_ptr(ptr); + } + + /// + /// Sets the speech recognition output format. + /// + /// Speech recognition output format (simple or detailed). + void SetSpeechRecognitionOutputFormat(OutputFormat format) + { + m_config.SetOutputFormat(format); + } + + /// + /// Gets the speech recognition output format. + /// + /// Speech recognition output format (simple or detailed). + OutputFormat GetSpeechRecognitionOutputFormat() const + { + return m_config.GetOutputFormat(); + } + + /// + /// Sets the speech synthesis output format (e.g. Riff16Khz16BitMonoPcm). + /// + /// Specifies the output format ID + void SetSpeechSynthesisOutputFormat(SpeechSynthesisOutputFormat formatId) + { + m_config.SetSpeechSynthesisOutputFormat(formatId); + } + + /// + /// Gets the speech synthesis output format. + /// + /// The speech synthesis output format. + SPXSTRING GetSpeechSynthesisOutputFormat() const + { + return m_config.GetSpeechSynthesisOutputFormat(); + } + + /// + /// Sets a property value by name. + /// + /// The property name. + /// The property value. + void SetProperty(const SPXSTRING& name, const SPXSTRING& value) + { + m_config.SetProperty(name, value); + } + + /// + /// Sets a property value by ID. + /// + /// The property id. + /// The property value. + void SetProperty(PropertyId id, const SPXSTRING& value) + { + m_config.SetProperty(id, value); + } + + /// + /// Gets a property value by name. + /// + /// The parameter name. + /// The property value. + SPXSTRING GetProperty(const SPXSTRING& name) const + { + return m_config.GetProperty(name); + } + + /// + /// Gets a property value by ID. + /// + /// The parameter id. + /// The property value. + SPXSTRING GetProperty(PropertyId id) const + { + return m_config.GetProperty(id); + } + + /// + /// Destructs the object. + /// + virtual ~HybridSpeechConfig() = default; + +protected: + /*! \cond PROTECTED */ + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + inline explicit HybridSpeechConfig(SPXSPEECHCONFIGHANDLE hconfig) : m_config(hconfig) + { + } + + /*! \endcond */ + +private: + DISABLE_COPY_AND_MOVE(HybridSpeechConfig); + + }; + +}}} diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_intent_recognition_eventargs.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_intent_recognition_eventargs.h new file mode 100644 index 0000000..dda06e8 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_intent_recognition_eventargs.h @@ -0,0 +1,169 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_intent_recognition_eventargs.h: Public API declarations for IntentRecognitionEventArgs C++ class +// + +#pragma once +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Intent { + + +/// +/// Class for intent recognition event arguments. +/// +class IntentRecognitionEventArgs : public RecognitionEventArgs +{ +private: + + SPXEVENTHANDLE m_hevent; + std::shared_ptr m_result; + +public: + + /// + /// Constructor. + /// + /// Event handle + explicit IntentRecognitionEventArgs(SPXEVENTHANDLE hevent) : + RecognitionEventArgs(hevent), + m_hevent(hevent), + m_result(std::make_shared(IntentResultHandleFromEventHandle(hevent))), + Result(m_result) + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)m_hevent); + }; + + /// + virtual ~IntentRecognitionEventArgs() + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)m_hevent); + SPX_THROW_ON_FAIL(recognizer_event_handle_release(m_hevent)); + }; + +#if defined(BINDING_OBJECTIVE_C) +private: +#endif + /// + /// Intent recognition event result. + /// + std::shared_ptr Result; + +#if defined(BINDING_OBJECTIVE_C) +public: +#else +protected: +#endif + + /*! \cond PROTECTED */ + + /// + /// Intent recognition event result. + /// + std::shared_ptr GetResult() const { return m_result; } + + /*! \endcond */ + +private: + + DISABLE_DEFAULT_CTORS(IntentRecognitionEventArgs); + + SPXRESULTHANDLE IntentResultHandleFromEventHandle(SPXEVENTHANDLE hevent) + { + SPXRESULTHANDLE hresult = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(recognizer_recognition_event_get_result(hevent, &hresult)); + return hresult; + } +}; + + +/// +/// Class for intent recognition canceled event arguments. +/// +class IntentRecognitionCanceledEventArgs final : public IntentRecognitionEventArgs +{ +private: + + std::shared_ptr m_cancellation; + CancellationReason m_cancellationReason; + CancellationErrorCode m_errorCode; + +public: + + /// + /// Constructor. + /// + /// Event handle + explicit IntentRecognitionCanceledEventArgs(SPXEVENTHANDLE hevent) : + IntentRecognitionEventArgs(hevent), + m_cancellation(CancellationDetails::FromResult(GetResult())), + m_cancellationReason(m_cancellation->Reason), + m_errorCode(m_cancellation->ErrorCode), + Reason(m_cancellationReason), + ErrorCode(m_errorCode), + ErrorDetails(m_cancellation->ErrorDetails) + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p)", __FUNCTION__, (void*)this); + }; + + /// + virtual ~IntentRecognitionCanceledEventArgs() + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p)", __FUNCTION__, (void*)this); + }; + +#if defined(BINDING_OBJECTIVE_C) +private: +#endif + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunused-private-field" +#endif + /// + /// The reason the result was canceled. + /// + const CancellationReason& Reason; + + /// + /// The error code in case of an unsuccessful recognition ( is set to Error). + /// If Reason is not Error, ErrorCode is set to NoError. + /// Added in version 1.1.0. + /// + const CancellationErrorCode& ErrorCode; + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + + /// + /// The error message in case of an unsuccessful recognition ( is set to Error). + /// + const SPXSTRING ErrorDetails; + +#if defined(BINDING_OBJECTIVE_C) +public: +#else +private: +#endif + /// + /// CancellationDetails. + /// + std::shared_ptr GetCancellationDetails() const { return m_cancellation; } + +private: + + DISABLE_DEFAULT_CTORS(IntentRecognitionCanceledEventArgs); +}; + + +} } } } // Microsoft::CognitiveServices::Speech::Intent diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_intent_recognition_result.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_intent_recognition_result.h new file mode 100644 index 0000000..360e846 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_intent_recognition_result.h @@ -0,0 +1,119 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_intent_recognition_result.h: Public API declarations for IntentRecognitionResult C++ class +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include + +#include "speechapi_c_json.h" + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Intent { + +/// +/// Represents the result of an intent recognition. +/// +class IntentRecognitionResult final : public RecognitionResult +{ +public: + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// Result handle. + explicit IntentRecognitionResult(SPXRESULTHANDLE hresult) : + RecognitionResult(hresult), + IntentId(m_intentId) + { + PopulateIntentFields(hresult, &m_intentId); + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p) -- resultid=%s; reason=0x%x; text=%s", __FUNCTION__, (void*)this, (void*)Handle, Utils::ToUTF8(ResultId).c_str(), Reason, Utils::ToUTF8(Text).c_str()); + } + + /// + /// A call to return a map of the entities found in the utterance. + /// + /// + /// A map with the entity name as a key and containing the value of the entity found in the utterance. + /// + /// + /// This currently does not report LUIS entities. + /// + const std::map& GetEntities() const + { + return m_entities; + } + + /// + /// Destructor. + /// + ~IntentRecognitionResult() + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)Handle); + } + + /// + /// Unique intent id. + /// + const SPXSTRING& IntentId; + +private: + DISABLE_DEFAULT_CTORS(IntentRecognitionResult); + + void PopulateIntentFields(SPXRESULTHANDLE hresult, SPXSTRING* pintentId) + { + SPX_INIT_HR(hr); + + const size_t maxCharCount = 1024; + char sz[maxCharCount+1] = {}; + + if (pintentId != nullptr && recognizer_result_handle_is_valid(hresult)) + { + SPX_THROW_ON_FAIL(hr = intent_result_get_intent_id(hresult, sz, maxCharCount)); + *pintentId = Utils::ToSPXString(sz); + } + + auto jsonSLE = Properties.GetProperty("LanguageUnderstandingSLE_JsonResult"); + SPXHANDLE parserHandle = SPXHANDLE_INVALID; + auto scopeGuard = Utils::MakeScopeGuard([&parserHandle]() + { + if (parserHandle != SPXHANDLE_INVALID) + { + ai_core_json_parser_handle_release(parserHandle); + } + }); + + auto root = ai_core_json_parser_create(&parserHandle, jsonSLE.c_str(), jsonSLE.size()); + int count = ai_core_json_item_count(parserHandle, root); + for (int i = 0; i < count; i++) + { + auto itemInt = ai_core_json_item_at(parserHandle, root, i, nullptr); + auto nameInt = ai_core_json_item_name(parserHandle, itemInt); + + // Need to use string copy here to force the ajv json parser to convert back to utf8. + auto name = ai_core_json_value_as_string_copy(parserHandle, nameInt, ""); + auto value = ai_core_json_value_as_string_copy(parserHandle, itemInt, ""); + if (value != nullptr && name != nullptr) + { + m_entities[name] = value; + } + } + + } + + SPXSTRING m_intentId; + std::map m_entities; +}; + + +} } } } // Microsoft::CognitiveServices::Speech::Recognition::Intent diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_intent_recognizer.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_intent_recognizer.h new file mode 100644 index 0000000..473053d --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_intent_recognizer.h @@ -0,0 +1,513 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_intent_recognizer.h: Public API declarations for IntentRecognizer C++ class +// + +#pragma once +#include +#include +#include +#include "speechapi_c_json.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Intent { + +/// +/// In addition to performing speech-to-text recognition, the IntentRecognizer extracts structured information +/// about the intent of the speaker, which can be used to drive further actions using dedicated intent triggers +/// (see ). +/// + class IntentRecognizer : public AsyncRecognizer + { + public: + + using BaseType = AsyncRecognizer; + + /// + /// Creates an intent recognizer from a speech config and an audio config. + /// Users should use this function to create a new instance of an intent recognizer. + /// + /// Speech configuration. + /// Audio configuration. + /// Instance of intent recognizer. + static std::shared_ptr FromConfig(std::shared_ptr speechConfig, std::shared_ptr audioInput = nullptr) + { + SPXRECOHANDLE hreco; + SPX_THROW_ON_FAIL(::recognizer_create_intent_recognizer_from_config( + &hreco, + HandleOrInvalid(speechConfig), + HandleOrInvalid(audioInput))); + return std::make_shared(hreco); + } + + /// + /// Creates an intent recognizer from an embedded speech config and an audio config. + /// Users should use this function to create a new instance of an intent recognizer. + /// Added in version 1.19.0 + /// + /// Embedded speech configuration. + /// Audio configuration. + /// Instance of intent recognizer. + static std::shared_ptr FromConfig(std::shared_ptr speechConfig, std::shared_ptr audioInput = nullptr) + { + SPXRECOHANDLE hreco; + SPX_THROW_ON_FAIL(::recognizer_create_intent_recognizer_from_config( + &hreco, + HandleOrInvalid(speechConfig), + HandleOrInvalid(audioInput))); + return std::make_shared(hreco); + } + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// Recognizer handle. + explicit IntentRecognizer(SPXRECOHANDLE hreco) : BaseType(hreco), Properties(m_properties) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + } + + /// + /// destructor + /// + ~IntentRecognizer() + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + TermRecognizer(); + } + + /// + /// Starts intent recognition, and returns after a single utterance is recognized. The end of a + /// single utterance is determined by listening for silence at the end or until a maximum of about 30 + /// seconds of audio is processed. The task returns the recognition text as result. + /// Note: Since RecognizeOnceAsync() returns only a single utterance, it is suitable only for single + /// shot recognition like command or query. + /// For long-running multi-utterance recognition, use StartContinuousRecognitionAsync() instead.. + /// + /// Future containing result value (a shared pointer to IntentRecognitionResult) + /// of the asynchronous intent recognition. + /// + std::future> RecognizeOnceAsync() override + { + return BaseType::RecognizeOnceAsyncInternal(); + } + + /// + /// Starts intent recognition, and generates a result from the text passed in. This is useful for testing and other times when the speech input + /// is not tied to the IntentRecognizer. + /// Note: The Intent Service does not currently support this so it is only valid for offline pattern matching or exact matching intents. + /// + /// The text to be evaluated. + /// Future containing result value (a shared pointer to IntentRecognitionResult) + /// of the asynchronous intent recognition. + /// + std::future> RecognizeOnceAsync(SPXSTRING text) + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this, text]() -> std::shared_ptr { + SPX_INIT_HR(hr); + + SPXRESULTHANDLE hresult = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(hr = intent_recognizer_recognize_text_once(m_hreco, Utils::ToUTF8(text).c_str(), &hresult)); + + return std::make_shared(hresult); + }); + return future; + } + + /// + /// Asynchronously initiates continuous intent recognition operation. + /// + /// An empty future. + std::future StartContinuousRecognitionAsync() override + { + return BaseType::StartContinuousRecognitionAsyncInternal(); + } + + /// + /// Asynchronously terminates ongoing continuous intent recognition operation. + /// + /// An empty future. + std::future StopContinuousRecognitionAsync() override + { + return BaseType::StopContinuousRecognitionAsyncInternal(); + } + + /// + /// Asynchronously initiates keyword recognition operation. + /// + /// Specifies the keyword model to be used. + /// An empty future. + std::future StartKeywordRecognitionAsync(std::shared_ptr model) override + { + return BaseType::StartKeywordRecognitionAsyncInternal(model); + } + + /// + /// Asynchronously terminates keyword recognition operation. + /// + /// An empty future. + std::future StopKeywordRecognitionAsync() override + { + return BaseType::StopKeywordRecognitionAsyncInternal(); + } + + /// + /// A collection of properties and their values defined for this . + /// + PropertyCollection& Properties; + + /// + /// Adds a simple phrase that may be spoken by the user, indicating a specific user intent. + /// This simple phrase can be a pattern including and enitity surrounded by braces. Such as "click the {checkboxName} checkbox". + /// + /// + /// The phrase corresponding to the intent. + /// Once recognized, the IntentRecognitionResult's IntentId property will match the simplePhrase specified here. + /// If any entities are specified and matched, they will be available in the IntentResult->GetEntities() call. + /// + void AddIntent(const SPXSTRING& simplePhrase) + { + auto trigger = IntentTrigger::From(simplePhrase); + return AddIntent(trigger, simplePhrase); + } + + /// + /// Adds a simple phrase that may be spoken by the user, indicating a specific user intent. + /// This simple phrase can be a pattern including and enitity surrounded by braces. Such as "click the {checkboxName} checkbox". + /// + /// The phrase corresponding to the intent. + /// A custom id string to be returned in the IntentRecognitionResult's IntentId property. + /// Once recognized, the result's intent id will match the id supplied here. + /// If any entities are specified and matched, they will be available in the IntentResult->GetEntities() call. + /// + void AddIntent(const SPXSTRING& simplePhrase, const SPXSTRING& intentId) + { + auto trigger = IntentTrigger::From(simplePhrase); + return AddIntent(trigger, intentId); + } + + /// + /// Adds a single intent by name from the specified Language Understanding Model. + /// For PatternMatchingModel and ConversationalLanguageUnderstandingModel types, this will clear + /// any existing models before enabling it. For these types, the intentName is ignored. + /// + /// The language understanding model containing the intent. + /// The name of the single intent to be included from the language understanding model. + /// Once recognized, the IntentRecognitionResult's IntentId property will contain the intentName specified here. + void AddIntent(std::shared_ptr model, const SPXSTRING& intentName) + { + switch (model->GetModelType()) + { + case LanguageUnderstandingModel::LanguageUnderstandingModelType::LanguageUnderstandingModel: + { + auto trigger = IntentTrigger::From(model, intentName); + AddIntent(trigger, intentName); + break; + } + case LanguageUnderstandingModel::LanguageUnderstandingModelType::PatternMatchingModel: + { + intent_recognizer_clear_language_models(m_hreco); + AddPatternMatchingModel(model); + break; + } + case LanguageUnderstandingModel::LanguageUnderstandingModelType::ConversationalLanguageUnderstandingModel: + { + intent_recognizer_clear_language_models(m_hreco); + auto cluModel = static_cast(model.get()); + intent_recognizer_add_conversational_language_understanding_model( + m_hreco, + cluModel->languageResourceKey.c_str(), + cluModel->endpoint.c_str(), + cluModel->projectName.c_str(), + cluModel->deploymentName.c_str()); + break; + } + default: + break; + } + } + + /// + /// Adds a single intent by name from the specified Language Understanding Model. + /// For PatternMatchingModel and ConversationalLanguageUnderstandingModel types, this will clear + /// any existing models before enabling it. For these types, the intentName and intentId are ignored. + /// + /// The language understanding model containing the intent. + /// The name of the single intent to be included from the language understanding model. + /// A custom id string to be returned in the IntentRecognitionResult's IntentId property. + void AddIntent(std::shared_ptr model, const SPXSTRING& intentName, const SPXSTRING& intentId) + { + switch (model->GetModelType()) + { + case LanguageUnderstandingModel::LanguageUnderstandingModelType::LanguageUnderstandingModel: + { + auto trigger = IntentTrigger::From(model, intentName); + AddIntent(trigger, intentId); + break; + } + case LanguageUnderstandingModel::LanguageUnderstandingModelType::PatternMatchingModel: + { + intent_recognizer_clear_language_models(m_hreco); + AddPatternMatchingModel(model); + break; + } + case LanguageUnderstandingModel::LanguageUnderstandingModelType::ConversationalLanguageUnderstandingModel: + { + intent_recognizer_clear_language_models(m_hreco); + auto cluModel = static_cast(model.get()); + intent_recognizer_add_conversational_language_understanding_model( + m_hreco, + cluModel->languageResourceKey.c_str(), + cluModel->endpoint.c_str(), + cluModel->projectName.c_str(), + cluModel->deploymentName.c_str()); + break; + } + default: + break; + } + } + + /// + /// Adds all intents from the specified Language Understanding Model. + /// For PatternMatchingModel and ConversationalLanguageUnderstandingModel types, this will clear + /// any existing models before enabling it. + /// + /// The language understanding model containing the intents. + /// Once recognized, the IntentRecognitionResult's IntentId property will contain the name of the intent recognized. + void AddAllIntents(std::shared_ptr model) + { + switch (model->GetModelType()) + { + case LanguageUnderstandingModel::LanguageUnderstandingModelType::LanguageUnderstandingModel: + { + auto trigger = IntentTrigger::From(model); + AddIntent(trigger, SPXSTRING_EMPTY); + break; + } + case LanguageUnderstandingModel::LanguageUnderstandingModelType::PatternMatchingModel: + { + intent_recognizer_clear_language_models(m_hreco); + AddPatternMatchingModel(model); + break; + } + case LanguageUnderstandingModel::LanguageUnderstandingModelType::ConversationalLanguageUnderstandingModel: + { + intent_recognizer_clear_language_models(m_hreco); + auto cluModel = static_cast(model.get()); + intent_recognizer_add_conversational_language_understanding_model( + m_hreco, + cluModel->languageResourceKey.c_str(), + cluModel->endpoint.c_str(), + cluModel->projectName.c_str(), + cluModel->deploymentName.c_str()); + break; + } + default: + break; + } + } + + /// + /// Adds all intents from the specified Language Understanding Model. + /// For PatternMatchingModel and ConversationalLanguageUnderstandingModel types, this will clear + /// any existing models before enabling it. + /// + /// The language understanding model containing the intents. + /// A custom string id to be returned in the IntentRecognitionResult's IntentId property. + void AddAllIntents(std::shared_ptr model, const SPXSTRING& intentId) + { + switch (model->GetModelType()) + { + case LanguageUnderstandingModel::LanguageUnderstandingModelType::LanguageUnderstandingModel: + { + auto trigger = IntentTrigger::From(model); + AddIntent(trigger, intentId); + break; + } + case LanguageUnderstandingModel::LanguageUnderstandingModelType::PatternMatchingModel: + { + intent_recognizer_clear_language_models(m_hreco); + AddPatternMatchingModel(model); + break; + } + case LanguageUnderstandingModel::LanguageUnderstandingModelType::ConversationalLanguageUnderstandingModel: + { + intent_recognizer_clear_language_models(m_hreco); + auto cluModel = static_cast(model.get()); + intent_recognizer_add_conversational_language_understanding_model( + m_hreco, + cluModel->languageResourceKey.c_str(), + cluModel->endpoint.c_str(), + cluModel->projectName.c_str(), + cluModel->deploymentName.c_str()); + break; + } + default: + break; + } + } + + /// + /// Adds the IntentTrigger specified. + /// + /// The IntentTrigger corresponding to the intent. + /// A custom string id to be returned in the IntentRecognitionResult's IntentId property. + void AddIntent(std::shared_ptr trigger, const SPXSTRING& intentId) + { + SPX_THROW_ON_FAIL(intent_recognizer_add_intent(m_hreco, Utils::ToUTF8(intentId).c_str(), (SPXTRIGGERHANDLE)(*trigger.get()))); + } + + /// + /// Sets the authorization token that will be used for connecting to the service. + /// Note: The caller needs to ensure that the authorization token is valid. Before the authorization token + /// expires, the caller needs to refresh it by calling this setter with a new valid token. + /// Otherwise, the recognizer will encounter errors during recognition. + /// + /// A string that represents the authorization token. + void SetAuthorizationToken(const SPXSTRING& token) + { + Properties.SetProperty(PropertyId::SpeechServiceAuthorization_Token, token); + } + + /// + /// Gets the authorization token. + /// + /// Authorization token + SPXSTRING GetAuthorizationToken() + { + return Properties.GetProperty(PropertyId::SpeechServiceAuthorization_Token, SPXSTRING()); + } + + /// + /// Takes a collection of language understanding models, makes a copy of them, and applies them to the recognizer. This application + /// happens at different times depending on the language understanding model type. + /// Simple Language Models will become active almost immediately whereas + /// language understanding models utilizing LUIS will become active on the next Speech turn. + /// This replaces any previously applied models. + /// + /// A vector of shared pointers to LanguageUnderstandingModels. + /// True if the application of the models takes effect immediately. Otherwise false. + bool ApplyLanguageModels(const std::vector>& collection) + { + bool result = true; + SPXTRIGGERHANDLE htrigger = SPXHANDLE_INVALID; + + // Clear existing language models. + SPX_THROW_ON_FAIL(intent_recognizer_clear_language_models(m_hreco)); + + // Add the new ones. + for (auto model : collection) + { + switch (model->GetModelType()) + { + case LanguageUnderstandingModel::LanguageUnderstandingModelType::LanguageUnderstandingModel: + SPX_THROW_ON_FAIL(intent_trigger_create_from_language_understanding_model(&htrigger, static_cast(*model), nullptr)); + intent_recognizer_add_intent(m_hreco, nullptr, htrigger); + result = false; + break; + case LanguageUnderstandingModel::LanguageUnderstandingModelType::PatternMatchingModel: + { + AddPatternMatchingModel(model); + break; + } + case LanguageUnderstandingModel::LanguageUnderstandingModelType::ConversationalLanguageUnderstandingModel: + { + auto cluModel = static_cast(model.get()); + intent_recognizer_add_conversational_language_understanding_model( + m_hreco, + cluModel->languageResourceKey.c_str(), + cluModel->endpoint.c_str(), + cluModel->projectName.c_str(), + cluModel->deploymentName.c_str()); + break; + } + default: + break; + } + + } + return result; + } + +private: + void AddPatternMatchingModel(const std::shared_ptr& luModel) const + { + auto model = static_cast(luModel.get()); + std::string modelId = model->GetModelId(); + + Utils::AbiHandle hModel(language_understanding_model__handle_release); + SPX_THROW_ON_FAIL(pattern_matching_model_create(&hModel, m_hreco, modelId.c_str())); + + PATTERN_MATCHING_MODEL_GET_STR_FROM_INDEX vectorGetter = [](void* context, size_t index, const char** phrase, size_t* phraseLen) -> AZACHR + { + try + { + SPX_RETURN_HR_IF(SPXERR_INVALID_ARG, context == nullptr || phrase == nullptr || phraseLen == nullptr); + + auto phrases = static_cast*>(context); + SPX_RETURN_HR_IF(SPXERR_OUT_OF_RANGE, index >= phrases->size()); + + *phrase = phrases->at(index).c_str(); + *phraseLen = phrases->at(index).length(); + return SPX_NOERROR; + } + catch (...) + { + return SPXERR_UNHANDLED_EXCEPTION; + } + }; + + for (const auto& entity : model->Entities) + { + SPX_THROW_ON_FAIL(pattern_matching_model_add_entity( + hModel, + entity.Id.c_str(), + (int)entity.Type, + (int)entity.Mode, + entity.Phrases.size(), + (void*)&entity.Phrases, + vectorGetter)); + } + + for (const auto& intent : model->Intents) + { + SPX_THROW_ON_FAIL(pattern_matching_model_add_intent( + hModel, + intent.Id.c_str(), + 0, // no priority at the moment so set to 0 + intent.Phrases.size(), + (void*)&intent.Phrases, + vectorGetter)); + } + + Utils::AbiHandle hTrigger(intent_trigger_handle_release); + SPX_THROW_ON_FAIL(intent_trigger_create_from_language_understanding_model(&hTrigger, hModel, "")); + + SPX_THROW_ON_FAIL(intent_recognizer_add_intent_with_model_id(m_hreco, hTrigger, modelId.c_str())); + } + + DISABLE_COPY_AND_MOVE(IntentRecognizer); + + friend class Microsoft::CognitiveServices::Speech::Session; +}; + + +} } } } // Microsoft::CognitiveServices::Speech::Intent diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_intent_trigger.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_intent_trigger.h new file mode 100644 index 0000000..b67babd --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_intent_trigger.h @@ -0,0 +1,87 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_intent_trigger.h: Public API declarations for IntentTrigger C++ class +// + +#pragma once +#include +#include +#include + + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Intent { + +/// +/// Represents an intent trigger. +/// +class IntentTrigger +{ +public: + + /// + /// Creates an intent trigger using the specified phrase. + /// + /// The simple phrase to create an intent trigger for. + /// A shared pointer to an intent trigger. + static std::shared_ptr From(const SPXSTRING& simplePhrase) + { + SPXTRIGGERHANDLE htrigger = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(intent_trigger_create_from_phrase(&htrigger, Utils::ToUTF8(simplePhrase).c_str())); + return std::make_shared(htrigger); + } + + /// + /// Creates an intent trigger using the specified LanguageUnderstandingModel. + /// + /// The LanguageUnderstandingModel to create an intent trigger for. + /// A shared pointer to an intent trigger. + static std::shared_ptr From(std::shared_ptr model) + { + SPXTRIGGERHANDLE htrigger = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(intent_trigger_create_from_language_understanding_model(&htrigger, (SPXLUMODELHANDLE)(*model.get()), nullptr)); + return std::make_shared(htrigger); + } + + /// + /// Creates an intent trigger using the specified LanguageUnderstandingModel and an intent name. + /// + /// The LanguageUnderstandingModel to create an intent trigger for. + /// The intent name to create an intent trigger for. + /// A shared pointer to an intent trigger. + static std::shared_ptr From(std::shared_ptr model, const SPXSTRING& intentName) + { + SPXTRIGGERHANDLE htrigger = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(intent_trigger_create_from_language_understanding_model(&htrigger, (SPXLUMODELHANDLE)(*model.get()), Utils::ToUTF8(intentName).c_str())); + return std::make_shared(htrigger); + } + + /// + /// Virtual destructor + /// + virtual ~IntentTrigger() { intent_trigger_handle_release(m_htrigger); m_htrigger = SPXHANDLE_INVALID; } + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// Trigger handle. + explicit IntentTrigger(SPXTRIGGERHANDLE htrigger) : m_htrigger(htrigger) { }; + + /// + /// Internal. Explicit conversion operator. + /// + /// A handle. + explicit operator SPXTRIGGERHANDLE() { return m_htrigger; } + +private: + DISABLE_DEFAULT_CTORS(IntentTrigger); + + SPXTRIGGERHANDLE m_htrigger; +}; + + +} } } } // Microsoft::CognitiveServices::Speech::Intent diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_keyword_recognition_eventargs.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_keyword_recognition_eventargs.h new file mode 100644 index 0000000..24411d5 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_keyword_recognition_eventargs.h @@ -0,0 +1,86 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_keyword_recognition_eventargs.h: Public API declarations for KeywordRecognitionEventArgs C++ class +// + +#pragma once +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// Class for the events emmited by the . +/// +class KeywordRecognitionEventArgs : public RecognitionEventArgs +{ +private: + + SPXEVENTHANDLE m_hevent; + std::shared_ptr m_result; + +public: + + /// + /// Constructor. + /// + /// Event handle + explicit KeywordRecognitionEventArgs(SPXEVENTHANDLE hevent) : + RecognitionEventArgs(hevent), + m_hevent(hevent), + m_result(std::make_shared(ResultHandleFromEventHandle(hevent))), + Result(m_result) + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)m_hevent); + }; + + /// + virtual ~KeywordRecognitionEventArgs() + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)m_hevent); + SPX_THROW_ON_FAIL(recognizer_event_handle_release(m_hevent)); + }; + +#if defined(BINDING_OBJECTIVE_C) +private: +#endif + /// + /// Keyword recognition event result. + /// + std::shared_ptr Result; + +#if defined(BINDING_OBJECTIVE_C) +public: +#else +protected: +#endif + + /*! \cond PROTECTED */ + + /// + /// Speech recognition event result. + /// + std::shared_ptr GetResult() const { return m_result; } + + /*! \endcond */ + +private: + + DISABLE_DEFAULT_CTORS(KeywordRecognitionEventArgs); + + SPXRESULTHANDLE ResultHandleFromEventHandle(SPXEVENTHANDLE hevent) + { + SPXRESULTHANDLE hresult = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(recognizer_recognition_event_get_result(hevent, &hresult)); + return hresult; + } +}; + +} } } // Microsoft::CognitiveServices::Speech diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_keyword_recognition_model.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_keyword_recognition_model.h new file mode 100644 index 0000000..afa56cd --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_keyword_recognition_model.h @@ -0,0 +1,101 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_keyword_recognition_model.h: Public API declarations for KeywordRecognitionModel C++ class +// + +#pragma once +#include +#include +#include +#include + + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// Represents keyword recognition model used with StartKeywordRecognitionAsync methods. +/// +class KeywordRecognitionModel +{ +public: + + /// + /// Creates a keyword recognition model using the specified file. + /// + /// The file name of the keyword recognition model. + /// A shared pointer to keyword recognition model. + static std::shared_ptr FromFile(const SPXSTRING& fileName) + { + SPXKEYWORDHANDLE hkeyword = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(keyword_recognition_model_create_from_file(Utils::ToUTF8(fileName).c_str(), &hkeyword)); + return std::make_shared(hkeyword); + } + + /// + /// Creates a keyword recognition model using the specified embedded speech config. + /// + /// Embedded speech config. + /// A shared pointer to keyword recognition model. + static std::shared_ptr FromConfig(std::shared_ptr embeddedSpeechConfig) + { + SPXKEYWORDHANDLE hkeyword = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(keyword_recognition_model_create_from_config( + Utils::HandleOrInvalid(embeddedSpeechConfig), &hkeyword)); + + return std::make_shared(hkeyword); + } + + /// + /// Creates a keyword recognition model using the specified embedded speech config + /// and user-defined wake words. + /// + /// Embedded speech config. + /// User-defined wake words. + /// A shared pointer to keyword recognition model. + static std::shared_ptr FromConfig( + std::shared_ptr embeddedSpeechConfig, const std::vector& userDefinedWakeWords) + { + SPXKEYWORDHANDLE hkeyword = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(keyword_recognition_model_create_from_config( + Utils::HandleOrInvalid(embeddedSpeechConfig), &hkeyword)); + + for (const SPXSTRING& wakeWord : userDefinedWakeWords) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, wakeWord.empty()); + SPX_THROW_ON_FAIL(keyword_recognition_model_add_user_defined_wake_word( + static_cast(hkeyword), Utils::ToUTF8(wakeWord).c_str())); + } + + return std::make_shared(hkeyword); + } + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// Keyword handle. + explicit KeywordRecognitionModel(SPXKEYWORDHANDLE hkeyword = SPXHANDLE_INVALID) : m_hkwmodel(hkeyword) { } + + /// + /// Virtual destructor. + /// + virtual ~KeywordRecognitionModel() { keyword_recognition_model_handle_release(m_hkwmodel); } + + /// + /// Internal. Explicit conversion operator. + /// + /// A handle. + explicit operator SPXKEYWORDHANDLE() { return m_hkwmodel; } + +private: + + DISABLE_COPY_AND_MOVE(KeywordRecognitionModel); + + SPXKEYWORDHANDLE m_hkwmodel; +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_keyword_recognition_result.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_keyword_recognition_result.h new file mode 100644 index 0000000..ddaa7ee --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_keyword_recognition_result.h @@ -0,0 +1,44 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_keyword_recognition_result.h: Public API declarations for the KeywordRecognitionResult C++ class +// + +#pragma once + +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// Class that defines the results emitted by the . +/// +class KeywordRecognitionResult : public RecognitionResult +{ +public: + + explicit KeywordRecognitionResult(SPXRESULTHANDLE hresult) : + RecognitionResult(hresult) + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p) -- resultid=%s; reason=0x%x; text=%s", __FUNCTION__, (void*)this, (void*)Handle, Utils::ToUTF8(ResultId).c_str(), Reason, Utils::ToUTF8(Text).c_str()); + } + + virtual ~KeywordRecognitionResult() = default; + +private: + DISABLE_DEFAULT_CTORS(KeywordRecognitionResult); +}; + +inline std::shared_ptr AudioDataStream::FromResult(std::shared_ptr result) +{ + auto resultHandle = result != nullptr ? static_cast(*result) : SPXHANDLE_INVALID; + auto streamHandle = Utils::CallFactoryMethodLeft(audio_data_stream_create_from_keyword_result, resultHandle); + return std::shared_ptr{ new AudioDataStream(streamHandle) }; +} + +} } } // Microsoft::CognitiveServices::Speech diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_keyword_recognizer.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_keyword_recognizer.h new file mode 100644 index 0000000..3807868 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_keyword_recognizer.h @@ -0,0 +1,213 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_keyword_recognizer.h: Public API declarations for KeywordRecognizer C++ class +// +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +namespace Utils { + template + std::function&)> Callback(U* callee, F f) + { + return [=](const EventSignal& evt) + { + (callee->*f)(evt); + }; + } +} + +/// +/// Recognizer type that is specialized to only handle keyword activation. +/// +/// +/// First, the object needs to be instantiated: +/// +/// auto audioConfig = AudioConfig::FromMicrophoneInput(); // Or an alternative input +/// auto recognizer = KeywordRecognizer::FromConfig(audioConfig); +/// +/// (optional) Then, the events need to be wired in order to receive notifications: +/// +/// recognizer->Recognized += [](const KeywordRecognitionEventArgs& event) +/// { +/// // Your logic here... +/// }; +/// +/// And finally, recognition needs to be started. +/// +/// auto keywordModel = KeywordRecognitionModel::FromFile(modelPath); +/// auto resultFuture = recognizer->RecognizeKeywordOnceAsync(keywordModel); +/// resultFuture.wait(); +/// auto result = resultFuture.get(); +/// +///
    +///
  • +///
  • +///
  • +///
  • +///
+///
+class KeywordRecognizer: public std::enable_shared_from_this +{ +public: + /// + /// Creates a KeywordRecognizer from an . The config is intended + /// to define the audio input to be used by the recognizer object. + /// + /// Defines the audio input to be used by the recognizer. + /// A new KeywordRecognizer that will consume audio from the specified input. + /// + /// If no audio config is provided, it will be equivalent to calling with a config constructed with + /// + /// + inline static std::shared_ptr FromConfig(std::shared_ptr audioConfig = nullptr) + { + auto hreco = Utils::CallFactoryMethodLeft( + ::recognizer_create_keyword_recognizer_from_audio_config, + Utils::HandleOrInvalid(audioConfig)); + return std::shared_ptr(new KeywordRecognizer(hreco)); + } + + /// + /// Destructor. + /// + ~KeywordRecognizer() + { + Canceled.DisconnectAll(); + Recognized.DisconnectAll(); + recognizer_handle_release(m_handle); + } + + /// + /// Starts a keyword recognition session. This session will last until the first keyword is recognized. When this happens, + /// a event will be raised and the session will end. To rearm the keyword, the method needs to be called + /// again after the event is emitted. + /// + /// The that describes the keyword we want to detect. + /// A future that resolves to a that resolves once a keyword is detected. + /// + /// Note that if no keyword is detected in the input, the task will never resolve (unless is called. + /// + inline std::future> RecognizeOnceAsync(std::shared_ptr model) + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, model, this]() + { + auto modelHandle = static_cast(*model); + + SPXRESULTHANDLE result = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(recognizer_recognize_keyword_once(m_handle, modelHandle, &result)); + + return std::make_shared(result); + }); + return future; + } + + /// + /// Stops a currently active keyword recognition session. + /// + /// A future that resolves when the active session (if any) is stopped. + inline std::future StopRecognitionAsync() + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this]() + { + SPX_THROW_ON_FAIL(recognizer_stop_keyword_recognition(m_handle)); + }); + return future; + } + + /// + /// Signal for events related to the recognition of keywords. + /// + EventSignal Recognized; + + /// + /// Signal for events relating to the cancellation of an interaction. The event indicates if the reason is a direct cancellation or an error. + /// + EventSignal Canceled; + +private: + /*! \cond PROTECTED */ + + static void FireEvent_Recognized(SPXRECOHANDLE, SPXEVENTHANDLE h_event, void* pv_context) + { + auto keep_alive = static_cast(pv_context)->shared_from_this(); + KeywordRecognitionEventArgs event{ h_event }; + keep_alive->Recognized.Signal(event); + } + + static void FireEvent_Canceled(SPXRECOHANDLE, SPXEVENTHANDLE h_event, void* pv_context) + { + auto keep_alive = static_cast(pv_context)->shared_from_this(); + SpeechRecognitionCanceledEventArgs event{ h_event }; + keep_alive->Canceled.Signal(event); + } + + void RecognizerEventConnectionChanged(const EventSignal& reco_event) + { + if (m_handle != SPXHANDLE_INVALID) + { + SPX_DBG_TRACE_VERBOSE("%s: m_handle=0x%8p", __FUNCTION__, (void*)m_handle); + SPX_DBG_TRACE_VERBOSE_IF(!::recognizer_handle_is_valid(m_handle), "%s: m_handle is INVALID!!!", __FUNCTION__); + + if (&reco_event== &Recognized) + { + ::recognizer_recognized_set_callback(m_handle, Recognized.IsConnected() ? KeywordRecognizer::FireEvent_Recognized : nullptr, this); + } + } + } + + void CanceledEventConnectionChanged(const EventSignal& canceled_event) + { + if (m_handle != SPXHANDLE_INVALID) + { + SPX_DBG_TRACE_VERBOSE("%s: m_handle=0x%8p", __FUNCTION__, (void*)m_handle); + SPX_DBG_TRACE_VERBOSE_IF(!::recognizer_handle_is_valid(m_handle), "%s: m_handle is INVALID!!!", __FUNCTION__); + + if (&canceled_event == &Canceled) + { + ::recognizer_canceled_set_callback(m_handle, Canceled.IsConnected() ? KeywordRecognizer::FireEvent_Canceled : nullptr, this); + } + } + } + + inline explicit KeywordRecognizer(SPXRECOHANDLE handle): + Recognized{ Utils::Callback(this, &KeywordRecognizer::RecognizerEventConnectionChanged) }, + Canceled{ Utils::Callback(this, &KeywordRecognizer::CanceledEventConnectionChanged) }, + m_properties{ Utils::CallFactoryMethodRight(recognizer_get_property_bag, handle) }, + m_handle{ handle }, + Properties { m_properties } + { + } + + PropertyCollection m_properties; + SPXRECOHANDLE m_handle; + /*! \endcond */ + +public: + /// + /// A collection of properties and their values defined for this . + /// + const PropertyCollection& Properties; +}; + + +} } } diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_language_understanding_model.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_language_understanding_model.h new file mode 100644 index 0000000..343a6b6 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_language_understanding_model.h @@ -0,0 +1,113 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_language_understanding_model.h: Public API declarations for LanguageUnderstandingModel C++ class +// + +#pragma once +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Intent { + +/// +/// Represents language understanding model used for intent recognition. +/// +class LanguageUnderstandingModel +{ +public: + + enum class LanguageUnderstandingModelType + { + PatternMatchingModel, + LanguageUnderstandingModel, + ConversationalLanguageUnderstandingModel + }; + + /// + /// Creates a language understanding (LUIS) model using the specified endpoint url. + /// + /// The endpoint url of a language understanding model. + /// A shared pointer to language understanding model. + static std::shared_ptr FromEndpoint(const SPXSTRING& uri) + { + SPXLUMODELHANDLE hlumodel = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(language_understanding_model_create_from_uri(&hlumodel, Utils::ToUTF8(uri).c_str())); + return std::make_shared(hlumodel); + } + + /// + /// Creates a language understanding model using the specified app id. + /// + /// A string that represents the application id of Language Understanding service. + /// A shared pointer to language understanding model. + static std::shared_ptr FromAppId(const SPXSTRING& appId) + { + SPXLUMODELHANDLE hlumodel = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(language_understanding_model_create_from_app_id(&hlumodel, Utils::ToUTF8(appId).c_str())); + return std::make_shared(hlumodel); + } + + /// + /// Creates a language understanding model using the specified hostname, subscription key and application id. + /// + /// A string that represents the subscription key of Language Understanding service. + /// A string that represents the application id of Language Understanding service. + /// A String that represents the region of the Language Understanding service (see the region page). + /// A shared pointer to language understanding model. + static std::shared_ptr FromSubscription(const SPXSTRING& subscriptionKey, const SPXSTRING& appId, const SPXSTRING& region) + { + SPXLUMODELHANDLE hlumodel = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(language_understanding_model_create_from_subscription(&hlumodel, Utils::ToUTF8(subscriptionKey).c_str(), Utils::ToUTF8(appId).c_str(), Utils::ToUTF8(region).c_str())); + return std::make_shared(hlumodel); + } + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// Language understanding model handle. + explicit LanguageUnderstandingModel(SPXLUMODELHANDLE hlumodel = SPXHANDLE_INVALID) : m_type(LanguageUnderstandingModelType::LanguageUnderstandingModel), m_hlumodel(hlumodel) { } + + /// + /// Virtual destructor. + /// + virtual ~LanguageUnderstandingModel() { language_understanding_model__handle_release(m_hlumodel); } + + /// + /// Internal. Explicit conversion operator. + /// + /// A handle. + explicit operator SPXLUMODELHANDLE() const { return m_hlumodel; } + + /// + /// Returns id for this model. + /// + /// An string representing the id of this model. + virtual SPXSTRING GetModelId() const { return Utils::ToSPXString(language_understanding_model_get_model_id(m_hlumodel)); } + + /// + /// Gets the model type. + /// + /// An enum representing the type of the model. + LanguageUnderstandingModelType GetModelType() const { return m_type; } +protected: + /// + /// Protected constructor for base classes to set type. + /// + /// Language understanding model type. + LanguageUnderstandingModel(LanguageUnderstandingModelType type) : m_type(type), m_hlumodel(SPXHANDLE_INVALID){} + + LanguageUnderstandingModelType m_type; +private: + DISABLE_COPY_AND_MOVE(LanguageUnderstandingModel); + + SPXLUMODELHANDLE m_hlumodel; +}; + + +} } } } // Microsoft::CognitiveServices::Speech::Intent diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_log_level.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_log_level.h new file mode 100644 index 0000000..ed130df --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_log_level.h @@ -0,0 +1,66 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/azai/vision/license for the full license information. +// + +#pragma once + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Diagnostics { +namespace Logging { + +/// +/// Defines the different available log levels. +/// +/// +/// This is used by different loggers to set the maximum level of detail they will output. +/// +/// +/// +/// +/// +/// +/// +enum class Level +{ + /// + /// Error logging level. Only errors will be logged. + /// + Error, + + /// + /// Warning logging level. Only errors and warnings will be logged. + /// + Warning, + + /// + /// Informational logging level. Only errors, warnings and informational log messages will be logged. + /// + Info, + + /// + /// Verbose logging level. All log messages will be logged. + /// + Verbose +}; + +/*! \cond INTERNAL */ +namespace Details +{ + inline const char * LevelToString(Level level) + { + switch (level) + { + case Level::Error: return "error"; + case Level::Warning: return "warning"; + case Level::Info: return "info"; + default: + case Level::Verbose: return "verbose"; + } + } +} +/*! \endcond */ + +}}}}} \ No newline at end of file diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_meeting.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_meeting.h new file mode 100644 index 0000000..1111e26 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_meeting.h @@ -0,0 +1,340 @@ + +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_meeting.h: Public API declarations for Meeting C++ class +// + +#pragma once +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Transcription { + +/// +/// Class for meeting. +/// +class Meeting : public std::enable_shared_from_this +{ +public: + + static constexpr size_t MAX_MEETING_ID_LEN = 1024; + + /// + /// Create a meeting using a speech config and a meeting id. + /// + /// A shared smart pointer of a speech config object. + /// meeting Id. + /// A shared smart pointer of the created meeting object. + static std::future> CreateMeetingAsync(std::shared_ptr speechConfig, const SPXSTRING& meetingId) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, meetingId.empty()); + auto future = std::async(std::launch::async, [meetingId, speechConfig]() -> std::shared_ptr { + SPXMEETINGHANDLE hmeeting; + SPX_THROW_ON_FAIL(meeting_create_from_config(&hmeeting, (SPXSPEECHCONFIGHANDLE)(*speechConfig), Utils::ToUTF8(meetingId).c_str())); + return std::make_shared(hmeeting); + }); + return future; + } + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// Recognizer handle. + explicit Meeting(SPXMEETINGHANDLE hmeeting) : + m_hmeeting(hmeeting), + m_properties(hmeeting), + Properties(m_properties) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + } + + /// + /// Destructor. + /// + ~Meeting() + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + + ::meeting_release_handle(m_hmeeting); + m_hmeeting = SPXHANDLE_INVALID; + } + + /// + /// Internal operator used to get underlying handle value. + /// + /// A handle. + explicit operator SPXMEETINGHANDLE () const { return m_hmeeting; } + + /// + /// Get the meeting id. + /// + /// Meeting id. + SPXSTRING GetMeetingId() + { + char id[MAX_MEETING_ID_LEN + 1]; + std::memset(id, 0, MAX_MEETING_ID_LEN); + SPX_THROW_ON_FAIL(meeting_get_meeting_id(m_hmeeting, id, MAX_MEETING_ID_LEN)); + return id; + } + + /// + /// Add a participant to a meeting using the user's id. + /// + /// Note: The returned participant can be used to remove. If the client changes the participant's attributes, + /// the changed attributes are passed on to the service only when the participant is added again. + /// + /// A user id. + /// a shared smart pointer of the participant. + std::future> AddParticipantAsync(const SPXSTRING& userId) + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this, userId]() -> std::shared_ptr { + const auto participant = Participant::From(userId); + SPX_THROW_ON_FAIL(meeting_update_participant(m_hmeeting, true, (SPXPARTICIPANTHANDLE)(*participant))); + return participant; + }); + return future; + } + + /// + /// Add a participant to a meeting using the User object. + /// + /// A shared smart pointer to a User object. + /// The passed in User object. + std::future> AddParticipantAsync(const std::shared_ptr& user) + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this, user]() -> std::shared_ptr { + SPX_THROW_ON_FAIL(meeting_update_participant_by_user(m_hmeeting, true, (SPXUSERHANDLE)(*user))); + return user; + }); + return future; + } + + /// + /// Add a participant to a meeting using the participant object + /// + /// A shared smart pointer to a participant object. + /// The passed in participant object. + std::future> AddParticipantAsync(const std::shared_ptr& participant) + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this, participant]() -> std::shared_ptr { + SPX_THROW_ON_FAIL(meeting_update_participant(m_hmeeting, true, (SPXPARTICIPANTHANDLE)(*participant))); + return participant; + }); + return future; + } + + /// + /// Remove a participant from a meeting using the participant object + /// + /// A shared smart pointer of a participant object. + /// An empty future. + std::future RemoveParticipantAsync(const std::shared_ptr& participant) + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this, participant]() -> void { + SPX_THROW_ON_FAIL(meeting_update_participant(m_hmeeting, false, (SPXPARTICIPANTHANDLE)(*participant))); + }); + return future; + } + + /// + /// Remove a participant from a meeting using the User object + /// + /// A smart pointer of a User. + /// An empty future. + std::future RemoveParticipantAsync(const std::shared_ptr& user) + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this, user]() -> void { + SPX_THROW_ON_FAIL(meeting_update_participant_by_user(m_hmeeting, false, SPXUSERHANDLE(*user))); + }); + return future; + } + + /// + /// Remove a participant from a meeting using a user id string. + /// + /// A user id. + /// An empty future. + std::future RemoveParticipantAsync(const SPXSTRING& userId) + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this, userId]() -> void { + SPX_THROW_ON_FAIL(meeting_update_participant_by_user_id(m_hmeeting, false, Utils::ToUTF8(userId.c_str()))); + }); + return future; + } + + /// + /// Ends the current meeting. + /// + /// An empty future. + std::future EndMeetingAsync() + { + return RunAsync(::meeting_end_meeting); + } + + /// + /// Sets the authorization token that will be used for connecting the server. + /// + /// The authorization token. + void SetAuthorizationToken(const SPXSTRING& token) + { + Properties.SetProperty(PropertyId::SpeechServiceAuthorization_Token, token); + } + + /// + /// Gets the authorization token. + /// + /// Authorization token + SPXSTRING GetAuthorizationToken() + { + return Properties.GetProperty(PropertyId::SpeechServiceAuthorization_Token, SPXSTRING()); + } + + /// + /// Start the meeting. + /// + /// An empty future. + std::future StartMeetingAsync() + { + return RunAsync(::meeting_start_meeting); + } + + /// + /// Deletes the meeting. Any participants that are still part of the meeting + /// will be ejected after this call. + /// + /// An empty future. + std::future DeleteMeetingAsync() + { + return RunAsync(::meeting_delete_meeting); + } + + /// + /// Locks the meeting. After this no new participants will be able to join. + /// + /// An empty future. + std::future LockMeetingAsync() + { + return RunAsync(::meeting_lock_meeting); + } + + /// + /// Unlocks the meeting. + /// + /// An empty future. + std::future UnlockMeetingAsync() + { + return RunAsync(::meeting_unlock_meeting); + } + + /// + /// Mutes all participants except for the host. This prevents others from generating + /// transcriptions, or sending text messages. + /// + /// An empty future. + std::future MuteAllParticipantsAsync() + { + return RunAsync(::meeting_mute_all_participants); + } + + /// + /// Allows other participants to generate transcriptions, or send text messages. + /// + /// An empty future. + std::future UnmuteAllParticipantsAsync() + { + return RunAsync(::meeting_unmute_all_participants); + } + + /// + /// Mutes a particular participant. This will prevent them generating new transcriptions, + /// or sending text messages. + /// + /// The identifier for the participant. + /// An empty future. + std::future MuteParticipantAsync(const SPXSTRING& participantId) + { + return RunAsync([participantId = Utils::ToUTF8(participantId)](auto handle) + { + return ::meeting_mute_participant(handle, participantId.c_str()); + }); + } + + /// + /// Unmutes a particular participant. + /// + /// The identifier for the participant. + /// An empty future. + std::future UnmuteParticipantAsync(const SPXSTRING& participantId) + { + return RunAsync([participantId = Utils::ToUTF8(participantId)](auto handle) + { + return ::meeting_unmute_participant(handle, participantId.c_str()); + }); + } + +private: + + /*! \cond PRIVATE */ + + SPXMEETINGHANDLE m_hmeeting; + + class PrivatePropertyCollection : public PropertyCollection + { + public: + PrivatePropertyCollection(SPXMEETINGHANDLE hmeeting) : + PropertyCollection( + [=]() { + SPXPROPERTYBAGHANDLE hpropbag = SPXHANDLE_INVALID; + meeting_get_property_bag(hmeeting, &hpropbag); + return hpropbag; + }()) + { + } + }; + + PrivatePropertyCollection m_properties; + + inline std::future RunAsync(std::function func) + { + auto keepalive = this->shared_from_this(); + return std::async(std::launch::async, [keepalive, this, func]() + { + SPX_THROW_ON_FAIL(func(m_hmeeting)); + }); + } + + /*! \endcond */ + +public: + /// + /// A collection of properties and their values defined for this . + /// + PropertyCollection& Properties; + +}; + +}}}} diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_meeting_transcriber.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_meeting_transcriber.h new file mode 100644 index 0000000..01609bf --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_meeting_transcriber.h @@ -0,0 +1,467 @@ + +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_meeting_transcriber.h: Public API declarations for MeetingTranscriber C++ class +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Transcription { + +class Session; + +/// +/// Class for meeting transcriber. +/// +class MeetingTranscriber : public Recognizer +{ +public: + + /// + /// Create a meeting transcriber from an audio config. + /// + /// Audio configuration. + /// A smart pointer wrapped meeting transcriber pointer. + static std::shared_ptr FromConfig(std::shared_ptr audioInput = nullptr) + { + SPXRECOHANDLE hreco = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(::recognizer_create_meeting_transcriber_from_config( &hreco, + Utils::HandleOrInvalid(audioInput))); + + return std::make_shared(hreco); + } + + /// + /// Join a meeting. + /// + /// A smart pointer of the meeting to be joined. + /// An empty future. + std::future JoinMeetingAsync(std::shared_ptr meeting) + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this, meeting]() -> void { + SPX_THROW_ON_FAIL(::recognizer_join_meeting(Utils::HandleOrInvalid(meeting), m_hreco)); + }); + + return future; + } + + /// + /// Leave a meeting. + /// + /// Note: After leaving a meeting, no transcribing or transcribed events will be sent to end users. End users need to join a meeting to get the events again. + /// + /// An empty future. + std::future LeaveMeetingAsync() + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this]() -> void { + SPX_THROW_ON_FAIL(::recognizer_leave_meeting(m_hreco)); + }); + + return future; + } + + /// + /// Asynchronously starts a meeting transcribing. + /// + /// An empty future. + std::future StartTranscribingAsync() + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this]() -> void { + SPX_INIT_HR(hr); + SPX_THROW_ON_FAIL(hr = recognizer_async_handle_release(m_hasyncStartContinuous)); // close any unfinished previous attempt + + SPX_EXITFN_ON_FAIL(hr = recognizer_start_continuous_recognition_async(m_hreco, &m_hasyncStartContinuous)); + SPX_EXITFN_ON_FAIL(hr = recognizer_start_continuous_recognition_async_wait_for(m_hasyncStartContinuous, UINT32_MAX)); + + SPX_EXITFN_CLEANUP: + auto releaseHr = recognizer_async_handle_release(m_hasyncStartContinuous); + SPX_REPORT_ON_FAIL(releaseHr); + m_hasyncStartContinuous = SPXHANDLE_INVALID; + + SPX_THROW_ON_FAIL(hr); + }); + + return future; + } + + /// + /// Asynchronously stops a meeting transcribing. + /// + /// An empty future. + std::future StopTranscribingAsync() + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this]() -> void { + + SPX_THROW_ON_FAIL(::recognizer_leave_meeting(m_hreco)); + + SPX_INIT_HR(hr); + SPX_THROW_ON_FAIL(hr = recognizer_async_handle_release(m_hasyncStopContinuous)); // close any unfinished previous attempt + + SPX_EXITFN_ON_FAIL(hr = recognizer_stop_continuous_recognition_async(m_hreco, &m_hasyncStopContinuous)); + SPX_EXITFN_ON_FAIL(hr = recognizer_stop_continuous_recognition_async_wait_for(m_hasyncStopContinuous, UINT32_MAX)); + + SPX_EXITFN_CLEANUP: + auto releaseHr = recognizer_async_handle_release(m_hasyncStopContinuous); + SPX_REPORT_ON_FAIL(releaseHr); + m_hasyncStopContinuous = SPXHANDLE_INVALID; + + SPX_THROW_ON_FAIL(hr); + }); + + return future; + } + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// Recognizer handle. + explicit MeetingTranscriber(SPXRECOHANDLE hreco) throw() : + Recognizer(hreco), + SessionStarted(GetSessionEventConnectionsChangedCallback()), + SessionStopped(GetSessionEventConnectionsChangedCallback()), + SpeechStartDetected(GetRecognitionEventConnectionsChangedCallback()), + SpeechEndDetected(GetRecognitionEventConnectionsChangedCallback()), + Transcribing(GetRecoEventConnectionsChangedCallback()), + Transcribed(GetRecoEventConnectionsChangedCallback()), + Canceled(GetRecoCanceledEventConnectionsChangedCallback()), + m_hasyncStartContinuous(SPXHANDLE_INVALID), + m_hasyncStopContinuous(SPXHANDLE_INVALID), + m_properties(hreco), + Properties(m_properties) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + } + + /// + /// Destructor. + /// + ~MeetingTranscriber() + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + TermRecognizer(); + } + + /// + /// Signal for events indicating the start of a recognition session (operation). + /// + EventSignal SessionStarted; + + /// + /// Signal for events indicating the end of a recognition session (operation). + /// + EventSignal SessionStopped; + + /// + /// Signal for events indicating the start of speech. + /// + EventSignal SpeechStartDetected; + + /// + /// Signal for events indicating the end of speech. + /// + EventSignal SpeechEndDetected; + + /// + /// Signal for events containing intermediate recognition results. + /// + EventSignal Transcribing; + + /// + /// Signal for events containing final recognition results. + /// (indicating a successful recognition attempt). + /// + EventSignal Transcribed; + + /// + /// Signal for events containing canceled recognition results + /// (indicating a recognition attempt that was canceled as a result or a direct cancellation request + /// or, alternatively, a transport or protocol failure). + /// + EventSignal Canceled; + + /// + /// Sets the authorization token that will be used for connecting the server. + /// + /// The authorization token. + void SetAuthorizationToken(const SPXSTRING& token) + { + Properties.SetProperty(PropertyId::SpeechServiceAuthorization_Token, token); + } + + /// + /// Gets the authorization token. + /// + /// Authorization token + SPXSTRING GetAuthorizationToken() + { + return Properties.GetProperty(PropertyId::SpeechServiceAuthorization_Token, SPXSTRING()); + } + +protected: + + /*! \cond PROTECTED */ + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// Recognizer handle. + virtual void TermRecognizer() override + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + + // Disconnect the event signals in reverse construction order + Canceled.DisconnectAll(); + Transcribed.DisconnectAll(); + Transcribing.DisconnectAll(); + SpeechEndDetected.DisconnectAll(); + SpeechStartDetected.DisconnectAll(); + SessionStopped.DisconnectAll(); + SessionStarted.DisconnectAll(); + + // Close the async handles we have open for Recognize, StartContinuous, and StopContinuous + for (auto handle : { &m_hasyncStartContinuous, &m_hasyncStopContinuous }) + { + if (*handle != SPXHANDLE_INVALID && ::recognizer_async_handle_is_valid(*handle)) + { + ::recognizer_async_handle_release(*handle); + *handle = SPXHANDLE_INVALID; + } + } + + // Ask the base to term + Recognizer::TermRecognizer(); + } + + void RecoEventConnectionsChanged(const EventSignal& recoEvent) + { + if (m_hreco != SPXHANDLE_INVALID) + { + SPX_DBG_TRACE_VERBOSE("%s: m_hreco=0x%8p", __FUNCTION__, (void*)m_hreco); + SPX_DBG_TRACE_VERBOSE_IF(!::recognizer_handle_is_valid(m_hreco), "%s: m_hreco is INVALID!!!", __FUNCTION__); + + if (&recoEvent == &Transcribing) + { + recognizer_recognizing_set_callback(m_hreco, Transcribing.IsConnected() ? FireEvent_Transcribing : nullptr, this); + } + else if (&recoEvent == &Transcribed) + { + recognizer_recognized_set_callback(m_hreco, Transcribed.IsConnected() ? FireEvent_Transcribed : nullptr, this); + } + } + } + + static void FireEvent_Transcribing(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hreco); + std::unique_ptr recoEvent{ new MeetingTranscriptionEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->Transcribing.Signal(*recoEvent.get()); + } + + static void FireEvent_Transcribed(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hreco); + std::unique_ptr recoEvent{ new MeetingTranscriptionEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->Transcribed.Signal(*recoEvent.get()); + } + + void RecoCanceledEventConnectionsChanged(const EventSignal& recoEvent) + { + if (m_hreco != SPXHANDLE_INVALID) + { + SPX_DBG_TRACE_VERBOSE("%s: m_hreco=0x%8p", __FUNCTION__, (void*)m_hreco); + SPX_DBG_TRACE_VERBOSE_IF(!::recognizer_handle_is_valid(m_hreco), "%s: m_hreco is INVALID!!!", __FUNCTION__); + + if (&recoEvent == &Canceled) + { + recognizer_canceled_set_callback(m_hreco, Canceled.IsConnected() ? FireEvent_Canceled : nullptr, this); + } + } + } + + static void FireEvent_Canceled(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hreco); + + auto ptr = new MeetingTranscriptionCanceledEventArgs(hevent); + std::shared_ptr recoEvent(ptr); + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->Canceled.Signal(*ptr); + } + + void SessionEventConnectionsChanged(const EventSignal& sessionEvent) + { + if (m_hreco != SPXHANDLE_INVALID) + { + SPX_DBG_TRACE_VERBOSE("%s: m_hreco=0x%8p", __FUNCTION__, (void*)m_hreco); + SPX_DBG_TRACE_VERBOSE_IF(!::recognizer_handle_is_valid(m_hreco), "%s: m_hreco is INVALID!!!", __FUNCTION__); + + if (&sessionEvent == &SessionStarted) + { + recognizer_session_started_set_callback(m_hreco, SessionStarted.IsConnected() ? FireEvent_SessionStarted : nullptr, this); + } + else if (&sessionEvent == &SessionStopped) + { + recognizer_session_stopped_set_callback(m_hreco, SessionStopped.IsConnected() ? FireEvent_SessionStopped : nullptr, this); + } + } + } + + static void FireEvent_SessionStarted(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hreco); + std::unique_ptr sessionEvent{ new SessionEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->SessionStarted.Signal(*sessionEvent.get()); + + // SessionEventArgs doesn't hold hevent, and thus can't release it properly ... release it here + SPX_DBG_ASSERT(recognizer_event_handle_is_valid(hevent)); + recognizer_event_handle_release(hevent); + } + + static void FireEvent_SessionStopped(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hreco); + std::unique_ptr sessionEvent{ new SessionEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->SessionStopped.Signal(*sessionEvent.get()); + + // SessionEventArgs doesn't hold hevent, and thus can't release it properly ... release it here + SPX_DBG_ASSERT(recognizer_event_handle_is_valid(hevent)); + recognizer_event_handle_release(hevent); + } + + void RecognitionEventConnectionsChanged(const EventSignal& recognitionEvent) + { + if (m_hreco != SPXHANDLE_INVALID) + { + SPX_DBG_TRACE_VERBOSE("%s: m_hreco=0x%8p", __FUNCTION__, (void*)m_hreco); + SPX_DBG_TRACE_VERBOSE_IF(!::recognizer_handle_is_valid(m_hreco), "%s: m_hreco is INVALID!!!", __FUNCTION__); + + if (&recognitionEvent == &SpeechStartDetected) + { + recognizer_speech_start_detected_set_callback(m_hreco, SpeechStartDetected.IsConnected() ? FireEvent_SpeechStartDetected : nullptr, this); + } + else if (&recognitionEvent == &SpeechEndDetected) + { + recognizer_speech_end_detected_set_callback(m_hreco, SpeechEndDetected.IsConnected() ? FireEvent_SpeechEndDetected : nullptr, this); + } + } + } + + static void FireEvent_SpeechStartDetected(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hreco); + std::unique_ptr recoEvent{ new RecognitionEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->SpeechStartDetected.Signal(*recoEvent.get()); + + // RecognitionEventArgs doesn't hold hevent, and thus can't release it properly ... release it here + SPX_DBG_ASSERT(recognizer_event_handle_is_valid(hevent)); + recognizer_event_handle_release(hevent); + } + + static void FireEvent_SpeechEndDetected(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hreco); + std::unique_ptr recoEvent{ new RecognitionEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->SpeechEndDetected.Signal(*recoEvent.get()); + + // RecognitionEventArgs doesn't hold hevent, and thus can't release it properly ... release it here + SPX_DBG_ASSERT(recognizer_event_handle_is_valid(hevent)); + recognizer_event_handle_release(hevent); + } + + /*! \endcond */ + +private: + + SPXASYNCHANDLE m_hasyncStartContinuous; + SPXASYNCHANDLE m_hasyncStopContinuous; + + DISABLE_DEFAULT_CTORS(MeetingTranscriber); + friend class Microsoft::CognitiveServices::Speech::Session; + + class PrivatePropertyCollection : public PropertyCollection + { + public: + PrivatePropertyCollection(SPXRECOHANDLE hreco) : + PropertyCollection( + [=]() { + SPXPROPERTYBAGHANDLE hpropbag = SPXHANDLE_INVALID; + recognizer_get_property_bag(hreco, &hpropbag); + return hpropbag; + }()) + { + } + }; + + PrivatePropertyCollection m_properties; + + inline std::function&)> GetSessionEventConnectionsChangedCallback() + { + return [=, this](const EventSignal& sessionEvent) { this->SessionEventConnectionsChanged(sessionEvent); }; + } + + inline std::function&)> GetRecoEventConnectionsChangedCallback() + { + return [=, this](const EventSignal& recoEvent) { this->RecoEventConnectionsChanged(recoEvent); }; + } + + inline std::function&)> GetRecoCanceledEventConnectionsChangedCallback() + { + return [=, this](const EventSignal& recoEvent) { this->RecoCanceledEventConnectionsChanged(recoEvent); }; + } + + inline std::function&)> GetRecognitionEventConnectionsChangedCallback() + { + return [=, this](const EventSignal& recoEvent) { this->RecognitionEventConnectionsChanged(recoEvent); }; + } + +public: + /// + /// A collection of properties and their values defined for this . + /// + PropertyCollection& Properties; +}; + +} } } } // Microsoft::CognitiveServices::Speech::Transcription diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_meeting_transcription_eventargs.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_meeting_transcription_eventargs.h new file mode 100644 index 0000000..4ff1db1 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_meeting_transcription_eventargs.h @@ -0,0 +1,168 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_meeting_transcription_eventargs.h: Public API declarations for MeetingTranscriptionEventArgs C++ class +// + +#pragma once +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Transcription { + +/// +/// Class for meeting transcriber event arguments. +/// +class MeetingTranscriptionEventArgs : public RecognitionEventArgs +{ +private: + + SPXEVENTHANDLE m_hevent; + std::shared_ptr m_result; + +public: + + /// + /// Constructor. + /// + /// Event handle + explicit MeetingTranscriptionEventArgs(SPXEVENTHANDLE hevent) : + RecognitionEventArgs(hevent), + m_hevent(hevent), + m_result(std::make_shared(ResultHandleFromEventHandle(hevent))), + Result(m_result) + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)m_hevent); + }; + + /// + virtual ~MeetingTranscriptionEventArgs() + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)m_hevent); + SPX_THROW_ON_FAIL(recognizer_event_handle_release(m_hevent)); + }; + +#if defined(BINDING_OBJECTIVE_C) +private: +#endif + /// + /// Meeting transcriber result. + /// + std::shared_ptr Result; + +#if defined(BINDING_OBJECTIVE_C) +public: +#else +protected: +#endif + + /*! \cond PROTECTED */ + + /// + /// Meeting transcriber result. + /// + std::shared_ptr GetResult() const { return m_result; } + + /*! \endcond */ + +private: + + DISABLE_DEFAULT_CTORS(MeetingTranscriptionEventArgs); + + SPXRESULTHANDLE ResultHandleFromEventHandle(SPXEVENTHANDLE hevent) + { + SPXRESULTHANDLE hresult = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(recognizer_recognition_event_get_result(hevent, &hresult)); + return hresult; + } +}; + + +/// +/// Class for meeting transcriber canceled event arguments. +/// Added in version 1.5.0. +/// +class MeetingTranscriptionCanceledEventArgs : public MeetingTranscriptionEventArgs +{ +private: + + std::shared_ptr m_cancellation; + CancellationReason m_cancellationReason; + CancellationErrorCode m_errorCode; + +public: + + /// + /// Constructor. + /// + /// Event handle + explicit MeetingTranscriptionCanceledEventArgs(SPXEVENTHANDLE hevent) : + MeetingTranscriptionEventArgs(hevent), + m_cancellation(CancellationDetails::FromResult(GetResult())), + m_cancellationReason(m_cancellation->Reason), + m_errorCode(m_cancellation->ErrorCode), + Reason(m_cancellationReason), + ErrorCode(m_errorCode), + ErrorDetails(m_cancellation->ErrorDetails) + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p)", __FUNCTION__, (void*)this); + }; + + /// + virtual ~MeetingTranscriptionCanceledEventArgs() + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p)", __FUNCTION__, (void*)this); + }; + +#if defined(BINDING_OBJECTIVE_C) +private: +#endif + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunused-private-field" +#endif + /// + /// The reason the result was canceled. + /// + const CancellationReason& Reason; + + /// + /// The error code in case of an unsuccessful recognition ( is set to Error). + /// If Reason is not Error, ErrorCode is set to NoError. + /// + const CancellationErrorCode& ErrorCode; + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + + /// + /// The error message in case of an unsuccessful recognition ( is set to Error). + /// + const SPXSTRING ErrorDetails; + +#if defined(BINDING_OBJECTIVE_C) +public: +#else +private: +#endif + /// + /// CancellationDetails. + /// + std::shared_ptr GetCancellationDetails() const { return m_cancellation; } + +private: + + DISABLE_DEFAULT_CTORS(MeetingTranscriptionCanceledEventArgs); +}; + + +} } } } // Microsoft::CognitiveServices::Speech::Transcription diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_meeting_transcription_result.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_meeting_transcription_result.h new file mode 100644 index 0000000..9b54fb8 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_meeting_transcription_result.h @@ -0,0 +1,96 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_meeting_transcriber_result.h: Public API declarations for MeetingTranscription C++ class +// + +#pragma once +#include +#include +#include +#include +#include + + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Transcription { + +/// +/// Represents the result of a meeting transcriber. +/// +class MeetingTranscriptionResult final : public RecognitionResult +{ +public: + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// Result handle. + explicit MeetingTranscriptionResult(SPXRESULTHANDLE hresult) : + RecognitionResult(hresult), + UserId(m_userId), + UtteranceId(m_utteranceId) + { + PopulateSpeakerFields(hresult, &m_userId); + PopulateUtteranceFields(hresult, &m_utteranceId); + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p) -- resultid=%s; reason=0x%x; text=%s, userid=%s, utteranceid=%s", __FUNCTION__, (void*)this, (void*)Handle, Utils::ToUTF8(ResultId).c_str(), Reason, Utils::ToUTF8(Text).c_str(), Utils::ToUTF8(UserId).c_str(), Utils::ToUTF8(UtteranceId).c_str()); + } + + /// + /// Destructor. + /// + ~MeetingTranscriptionResult() + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)Handle); + } + + /// + /// Unique Speaker id. + /// + const SPXSTRING& UserId; + + /// + /// Unique id that is consistent across all the intermediates and final speech recognition result from one user. + /// + const SPXSTRING& UtteranceId; + +private: + DISABLE_DEFAULT_CTORS(MeetingTranscriptionResult); + + void PopulateSpeakerFields(SPXRESULTHANDLE hresult, SPXSTRING* puserId) + { + SPX_INIT_HR(hr); + + const size_t maxCharCount = 1024; + char sz[maxCharCount + 1] = {}; + + if (puserId != nullptr && recognizer_result_handle_is_valid(hresult)) + { + SPX_THROW_ON_FAIL(hr = meeting_transcription_result_get_user_id(hresult, sz, maxCharCount)); + *puserId = Utils::ToSPXString(sz); + } + } + + void PopulateUtteranceFields(SPXRESULTHANDLE hresult, SPXSTRING* putteranceId) + { + SPX_INIT_HR(hr); + + const size_t maxCharCount = 1024; + char sz[maxCharCount + 1] = {}; + + if (putteranceId != nullptr && recognizer_result_handle_is_valid(hresult)) + { + SPX_THROW_ON_FAIL(hr = meeting_transcription_result_get_utterance_id(hresult, sz, maxCharCount)); + *putteranceId = Utils::ToSPXString(sz); + } + } + + SPXSTRING m_userId; + SPXSTRING m_utteranceId; +}; + + +} } } } // Microsoft::CognitiveServices::Speech::Transcription diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_memory_logger.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_memory_logger.h new file mode 100644 index 0000000..2e1644f --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_memory_logger.h @@ -0,0 +1,163 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Diagnostics { +namespace Logging { + +/// +/// Class with static methods to control SDK logging into an in-memory buffer. +/// Turning on logging while running your Speech SDK scenario provides +/// detailed information from the SDK's core native components. If you +/// report an issue to Microsoft, you may be asked to provide logs to help +/// Microsoft diagnose the issue. Your application should not take dependency +/// on particular log strings, as they may change from one SDK release to another +/// without notice. +/// MemoryLogger is designed for the case where you want to get access to logs +/// that were taken in the short duration before some unexpected event happens. +/// For example, if you are running a Speech Recognizer, you may want to dump the MemoryLogger +/// after getting an event indicating recognition was canceled due to some error. +/// The size of the memory buffer is fixed at 2MB and cannot be changed. This is +/// a "ring" buffer, that is, new log strings written replace the oldest ones +/// in the buffer. +/// Added in version 1.20.0 +/// +/// Memory logging is a process wide construct. That means that if (for example) +/// you have multiple speech recognizer objects running in parallel, there will be one +/// memory buffer containing interleaved logs from all recognizers. You cannot get a +/// separate logs for each recognizer. +class MemoryLogger +{ +public: + /// + /// Starts logging into the internal memory buffer. + /// + static void Start() + { + diagnostics_log_memory_start_logging(); + } + + /// + /// Stops logging into the internal memory buffer. + /// + static void Stop() + { + diagnostics_log_memory_stop_logging(); + } + + /// + /// Sets or clears filters for memory logging. + /// Once filters are set, memory logger will only be updated with log strings + /// containing at least one of the strings specified by the filters. The match is case sensitive. + /// + /// Optional. Filters to use, or an empty list to remove previously set filters. + static void SetFilters(std::initializer_list filters = {}) + { + std::string collapsedFilters = MemoryLogger::CollapseFilters(filters); + + diagnostics_log_memory_set_filters(collapsedFilters.c_str()); + } + + /// + /// Writes the content of the whole memory buffer to the specified file. + /// It does not block other SDK threads from continuing to log into the buffer. + /// + /// Path to a log file on local disk. + /// This does not reset (clear) the memory buffer. + static void Dump(const SPXSTRING& filePath) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, filePath.empty()); + + SPX_THROW_ON_FAIL(diagnostics_log_memory_dump(Utils::ToUTF8(filePath).c_str(), nullptr, false, false)); + } + + /// + /// Writes the content of the whole memory buffer to an object that implements std::ostream. + /// For example, std::cout (for console output). + /// It does not block other SDK threads from continuing to log into the buffer. + /// + /// std::ostream object to write to. + /// This does not reset (clear) the memory buffer. + static void Dump(std::ostream& outStream) + { + auto start = diagnostics_log_memory_get_line_num_oldest(); + auto stop = diagnostics_log_memory_get_line_num_newest(); + for (auto i = start; + i < stop; + i++) + { + const char* line = diagnostics_log_memory_get_line(i); + if (line) + { + outStream << line; + } + } + } + + /// + /// Returns the content of the whole memory buffer as a vector of strings. + /// It does not block other SDK threads from continuing to log into the buffer. + /// + /// A vector with the contents of the memory buffer copied into it. + /// This does not reset (clear) the memory buffer. + static std::vector Dump() + { + std::vector results; + + auto start = diagnostics_log_memory_get_line_num_oldest(); + auto stop = diagnostics_log_memory_get_line_num_newest(); + for (auto i = start; + i < stop; + i++) + { + const char* line = diagnostics_log_memory_get_line(i); + if (line) + { + results.push_back(line); + } + } + + return results; + } + + /// + /// Sets the level of the messages to be captured by the logger + /// + /// Maximum level of detail to be captured by the logger. + static void SetLevel(Level level) + { + const auto levelStr = Details::LevelToString(level); + diagnostics_set_log_level("memory", levelStr); + } + +private: + static std::string CollapseFilters(std::initializer_list filters) + { + std::string str = ""; + + if (filters.size() > 0) + { + std::ostringstream filtersCollapsed; + std::copy(filters.begin(), filters.end(), std::ostream_iterator(filtersCollapsed, ";")); + str = filtersCollapsed.str(); + } + + return str; + } +}; + +}}}}} diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_participant.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_participant.h new file mode 100644 index 0000000..b87ee40 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_participant.h @@ -0,0 +1,222 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_participant.h: Public API declarations for Participant C++ class +// + +#pragma once +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Transcription { + +/// +/// Represents a participant in a conversation. +/// Added in version 1.5.0. +/// +class Participant +{ +private: + SPXPARTICIPANTHANDLE m_hparticipant; + SPXSTRING m_avatar; + SPXSTRING m_id; + SPXSTRING m_displayName; + bool m_isTts; + bool m_isMuted; + bool m_isHost; + +public: + /// + /// Create a participant using user id, her/his preferred language and her/his voice signature. + /// If voice signature is empty then user will not be identified. + /// + /// A user ids. + /// The preferred languages of the user. It can be optional. + /// The voice signature of the user. It can be optional. + /// A smart pointer of Participant + static std::shared_ptr From(const SPXSTRING& userId, const SPXSTRING& preferredLanguage = {}, const SPXSTRING& voiceSignature = {}) + { + SPXPARTICIPANTHANDLE hparticipant = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(participant_create_handle(&hparticipant, Utils::ToUTF8(userId.c_str()), Utils::ToUTF8(preferredLanguage.c_str()), Utils::ToUTF8(voiceSignature.c_str()))); + return std::make_shared(hparticipant); + } + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// participant handle. + explicit Participant(SPXPARTICIPANTHANDLE hparticipant = SPXHANDLE_INVALID) : + m_hparticipant(hparticipant), + m_avatar(), + m_id(), + m_displayName(), + m_isTts(false), + m_isMuted(false), + m_isHost(false), + Id(m_id), + Avatar(m_avatar), + DisplayName(m_displayName), + IsUsingTts(m_isTts), + IsMuted(m_isMuted), + IsHost(m_isHost), + m_properties(hparticipant), + Properties(m_properties) + { + LoadConversationParticipantProperties(hparticipant); + } + + /// + /// Virtual destructor. + /// + virtual ~Participant() { participant_release_handle(m_hparticipant); } + + /// + /// Get the identifier for the participant. + /// + const SPXSTRING& Id; + + /// + /// Gets the colour of the user's avatar as an HTML hex string (e.g. FF0000 for red). + /// + const SPXSTRING& Avatar; + + /// + /// The participant's display name. Please note that each participant within the same conversation must + /// have a different display name. Duplicate names within the same conversation are not allowed. You can + /// use the Id property as another way to refer to each participant. + /// + const SPXSTRING& DisplayName; + + /// + /// Gets whether or not the participant is using Text To Speech (TTS). + /// + const bool& IsUsingTts; + + /// + /// Gets whether or not this participant is muted. + /// + const bool& IsMuted; + + /// + /// Gets whether or not this participant is the host. + /// + const bool& IsHost; + + /// + /// Internal operator used to get underlying handle value. + /// + /// A handle. + explicit operator SPXPARTICIPANTHANDLE() const { return m_hparticipant; } + + /// + /// Set preferred language. + /// + /// The preferred language, such as "en-us". + void SetPreferredLanguage(const std::string& preferredLanguage) + { + SPX_THROW_ON_FAIL(participant_set_preferred_langugage(m_hparticipant, Utils::ToUTF8(preferredLanguage.c_str()))); + } + + /// + /// Set voice signature. + /// If voice signature is empty then user will not be identified. + /// + /// The participant's voice signature." + void SetVoiceSignature(const std::string& voiceSignature) + { + SPX_THROW_ON_FAIL(participant_set_voice_signature(m_hparticipant, Utils::ToUTF8(voiceSignature.c_str()))); + } + +private: + + /*! \cond PRIVATE */ + + DISABLE_COPY_AND_MOVE(Participant); + + class PrivatePropertyCollection : public PropertyCollection + { + public: + PrivatePropertyCollection(SPXPARTICIPANTHANDLE hparticipant) : + PropertyCollection( + [=]() { + SPXPROPERTYBAGHANDLE hpropbag = SPXHANDLE_INVALID; + participant_get_property_bag(hparticipant, &hpropbag); + return hpropbag; + }()) + { + } + }; + + PrivatePropertyCollection m_properties; + + SPXSTRING TryLoadString(SPXEVENTHANDLE hevent, SPXHR(SPXAPI_CALLTYPE * func)(SPXEVENTHANDLE, char*, uint32_t *)) + { + std::unique_ptr psz; + try + { + // query the string length + uint32_t length = 0; + + // don't use SPX_THROW_ON_FAIL since that creates a handle for exceptions that will leak + // since we don't care about them. + SPXHR hr = func(hevent, nullptr, &length); + if (SPX_FAILED(hr) || length == 0) + { + return SPXSTRING{}; + } + + psz = std::unique_ptr(new char[length]); + hr = func(hevent, psz.get(), &length); + if (SPX_FAILED(hr)) + { + return SPXSTRING{}; + } + + return Utils::ToSPXString(psz.get()); + } + catch (...) + { + // ignore errors since not all participants have the properties we need + return SPXSTRING{}; + } + } + + void LoadConversationParticipantProperties(SPXPARTICIPANTHANDLE hParticipant) + { + m_id = TryLoadString(hParticipant, conversation_translator_participant_get_id); + m_avatar = TryLoadString(hParticipant, conversation_translator_participant_get_avatar); + m_displayName = TryLoadString(hParticipant, conversation_translator_participant_get_displayname); + + bool val; + if (SPX_SUCCEEDED(conversation_translator_participant_get_is_using_tts(hParticipant, &val))) + { + m_isTts = val; + } + + if (SPX_SUCCEEDED(conversation_translator_participant_get_is_muted(hParticipant, &val))) + { + m_isMuted = val; + } + + if (SPX_SUCCEEDED(conversation_translator_participant_get_is_host(hParticipant, &val))) + { + m_isHost = val; + } + } + + /*! \endcond */ + +public: + + /// + /// Collection of additional participant properties. + /// + PropertyCollection& Properties; +}; + +}}}} diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_pattern_matching_entity.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_pattern_matching_entity.h new file mode 100644 index 0000000..823518f --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_pattern_matching_entity.h @@ -0,0 +1,46 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_pattern_matching_entity.h: Public API declarations for PatternMatchingEntity C++ struct +// + +#pragma once +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Intent { + +/// +/// Represents a pattern matching entity used for intent recognition. +/// +struct PatternMatchingEntity +{ + /// + /// An Id used to define this Entity if it is matched. This id must appear in an intent phrase + /// or it will never be matched. + /// + SPXSTRING Id; + + /// + /// The Type of this Entity. + /// + EntityType Type; + + /// + /// The EntityMatchMode of this Entity. + /// + EntityMatchMode Mode; + + /// + /// If the Type is List these phrases will be used as the list. + /// + std::vector Phrases; + +}; + +} } } } // Microsoft::CognitiveServices::Speech::Intent diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_pattern_matching_intent.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_pattern_matching_intent.h new file mode 100644 index 0000000..0670291 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_pattern_matching_intent.h @@ -0,0 +1,36 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_pattern_matching_intent.h: Public API declarations for PatternMatchingIntent C++ struct +// + +#pragma once +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Intent { + +/// +/// Represents a pattern matching intent used for intent recognition. +/// +struct PatternMatchingIntent +{ + /// + /// Phrases and patterns that will trigger this intent. At least one phrase must exist to be able to + /// apply this intent to an IntentRecognizer. + /// + std::vector Phrases; + + /// + /// An Id used to define this Intent if it is matched. If no Id is specified, then the first phrase in Phrases + /// will be used. + /// + SPXSTRING Id; +}; + +} } } } // Microsoft::CognitiveServices::Speech::Intent diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_pattern_matching_model.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_pattern_matching_model.h new file mode 100644 index 0000000..2cb8954 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_pattern_matching_model.h @@ -0,0 +1,372 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_pattern_matching_model.h: Public API declarations for PatternMatchingModel C++ class +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Intent { + + /// + /// Represents a pattern matching model used for intent recognition. + /// + class PatternMatchingModel : public LanguageUnderstandingModel + { + public: + + /// + /// Creates a pattern matching model using the specified model ID. + /// + /// A string that represents a unique Id for this model. + /// A shared pointer to pattern matching model. + static std::shared_ptr FromModelId(const SPXSTRING& modelId) + { + return std::shared_ptr { + new PatternMatchingModel(modelId) + }; + } + + /// + /// Creates a pattern matching model using the specified .json file. This should follow the Microsoft LUIS JSON export schema. + /// + /// A string that representing the path to a '.json' file. + /// A shared pointer to pattern matching model. + static std::shared_ptr FromJSONFile(const SPXSTRING& filepath) + { + FILE* fp; + int err; +#ifdef _MSC_VER + err = fopen_s(&fp, filepath.c_str(), "r"); +#else + fp = fopen(filepath.c_str(), "r"); + if (fp == NULL) + { + err = -1; + } + else + { + err = 0; + } +#endif + if (err == 0 && fp != NULL) + { + char buffer[1024] = {}; + size_t numread = 0; + std::string fileContents = ""; +#ifdef _MSC_VER + while ((numread = fread_s((void**)&buffer, sizeof(buffer), sizeof(char), sizeof(buffer), fp)) != 0) +#else + while ((numread = fread((void**)&buffer, sizeof(char), sizeof(buffer), fp)) != 0) +#endif + { + fileContents.append(buffer, numread); + } + fclose(fp); + return ParseJSONFile(fileContents); + } + else + { + SPX_TRACE_ERROR("Attempt to read %s failed.", SPXERR_FILE_OPEN_FAILED, filepath.c_str()); + return nullptr; + } + } + + /// + /// Creates a PatternMatchingModel using the specified istream pointing to an .json file in the LUIS json format. + /// This assumes the stream is already open and has permission to read. + /// + /// A stream that representing a '.json' file. + /// A shared pointer to pattern matching model. + static std::shared_ptr FromJSONFileStream(std::istream& iStream) + { + std::istreambuf_iterator iterator{iStream}; + std::string str(iterator, {}); + return ParseJSONFile(str); + } + + /// + /// Returns id for this model. + /// + /// A string representing the id of this model. + SPXSTRING GetModelId() const { return m_modelId; } + + /// + /// This container of Intents is used to define all the Intents this model will look for. + /// + std::vector Intents; + + /// + /// This container of Intents is used to define all the Intents this model will look for. + /// + std::vector Entities; + + private: + DISABLE_COPY_AND_MOVE(PatternMatchingModel); + + PatternMatchingModel(const SPXSTRING& modelId) : LanguageUnderstandingModel(LanguageUnderstandingModelType::PatternMatchingModel), m_modelId(modelId) {} + + SPXSTRING m_modelId; + + static std::shared_ptr ParseJSONFile(const std::string& fileContents) + { + auto model = std::shared_ptr(new PatternMatchingModel("")); + AZAC_HANDLE parserHandle; + auto root = ai_core_json_parser_create(&parserHandle, fileContents.c_str(), fileContents.size()); + if (!ai_core_json_parser_handle_is_valid(parserHandle)) + { + SPX_TRACE_ERROR("Attempt to parse language understanding json file failed.", SPXERR_UNSUPPORTED_FORMAT); + return nullptr; + } + int count = ai_core_json_item_count(parserHandle, root); + for (int i = 0; i < count; i++) + { + auto itemInt = ai_core_json_item_at(parserHandle, root, i, nullptr); + auto nameInt = ai_core_json_item_name(parserHandle, itemInt); + size_t nameSize; + auto name = ai_core_json_value_as_string_ptr(parserHandle, nameInt, &nameSize); + + size_t valueSize = 0; + auto value = ai_core_json_value_as_string_ptr(parserHandle, itemInt, &valueSize); + if (name != nullptr) + { + auto nameStr = std::string(name, nameSize); + if (nameStr == "luis_schema_version") + { + // We support any version that we are able to pull data out of. + } + else if (nameStr == "prebuiltEntities") + { + int prebuiltcount = ai_core_json_item_count(parserHandle, itemInt); + for (int j = 0; j < prebuiltcount; j++) + { + ParsePrebuiltEntityJson(parserHandle, model, itemInt, j); + } + } + else if (nameStr == "name") + { + model->m_modelId = std::string(value, valueSize); + } + else if (nameStr == "patternAnyEntities" || nameStr == "entities") + { + int anyCount = ai_core_json_item_count(parserHandle, itemInt); + for (int j = 0; j < anyCount; j++) + { + ParseEntityJson(parserHandle, model, itemInt, j); + } + } + else if (nameStr == "patterns") + { + int patternCount = ai_core_json_item_count(parserHandle, itemInt); + for (int j = 0; j < patternCount; j++) + { + ParsePatternJson(parserHandle, model, itemInt, j); + } + } + else if (nameStr == "closedLists") + { + int listCount = ai_core_json_item_count(parserHandle, itemInt); + for (int j = 0; j < listCount; j++) + { + ParseListEntityJson(parserHandle, model, itemInt, j); + } + } + } + } + return model; + } + + static void ParsePrebuiltEntityJson(AZAC_HANDLE parserHandle, std::shared_ptr model, int itemInt, int index) + { + auto subItemInt = ai_core_json_item_at(parserHandle, itemInt, index, nullptr); + int subItemCount = ai_core_json_item_count(parserHandle, subItemInt); + size_t nameSize = 0; + size_t valueSize = 0; + for (int subItemIndex = 0; subItemIndex < subItemCount; subItemIndex++) + { + auto prebuiltPairInt = ai_core_json_item_at(parserHandle, subItemInt, subItemIndex, nullptr); + auto nameInt = ai_core_json_item_name(parserHandle, prebuiltPairInt); + auto name = ai_core_json_value_as_string_ptr(parserHandle, nameInt, &nameSize); + if (name != nullptr) + { + auto nameStr = std::string(name, nameSize); + auto value = ai_core_json_value_as_string_ptr(parserHandle, prebuiltPairInt, &valueSize); + if (nameStr == "name" && value != nullptr) + { + auto valueStr = std::string(value, valueSize); + if (valueStr == "number") + { + model->Entities.push_back({ "number", EntityType::PrebuiltInteger, EntityMatchMode::Basic, {} }); + } + // ignore any other prebuilt types as they are not supported. + } + } + } + } + + static void ParseEntityJson(AZAC_HANDLE parserHandle, std::shared_ptr model, int itemInt, int index) + { + auto subItemInt = ai_core_json_item_at(parserHandle, itemInt, index, nullptr); + int subItemCount = ai_core_json_item_count(parserHandle, subItemInt); + size_t nameSize = 0; + size_t valueSize = 0; + for (int subItemIndex = 0; subItemIndex < subItemCount; subItemIndex++) + { + auto entityPairInt = ai_core_json_item_at(parserHandle, subItemInt, subItemIndex, nullptr); + auto nameInt = ai_core_json_item_name(parserHandle, entityPairInt); + auto name = ai_core_json_value_as_string_ptr(parserHandle, nameInt, &nameSize); + if (name != nullptr) + { + auto nameStr = std::string(name, nameSize); + auto value = ai_core_json_value_as_string_ptr(parserHandle, entityPairInt, &valueSize); + if (nameStr == "name" && value != nullptr) + { + model->Entities.push_back({ std::string(value, valueSize), EntityType::Any, EntityMatchMode::Basic, {}}); + } + // ignore any other pairs since we only care about the name. + } + } + } + + static void ParseListEntityJson(AZAC_HANDLE parserHandle, std::shared_ptr model, int itemInt, int index) + { + auto subItemInt = ai_core_json_item_at(parserHandle, itemInt, index, nullptr); + int subItemCount = ai_core_json_item_count(parserHandle, subItemInt); + size_t nameSize = 0; + size_t valueSize = 0; + // Default to Strict matching. + PatternMatchingEntity entity{ "", EntityType::List, EntityMatchMode::Strict, {} }; + for (int subItemIndex = 0; subItemIndex < subItemCount; subItemIndex++) + { + auto listPairInt = ai_core_json_item_at(parserHandle, subItemInt, subItemIndex, nullptr); + auto nameInt = ai_core_json_item_name(parserHandle, listPairInt); + auto name = ai_core_json_value_as_string_ptr(parserHandle, nameInt, &nameSize); + if (name != nullptr) + { + auto nameStr = std::string(name, nameSize); + if (nameStr == "name") + { + auto value = ai_core_json_value_as_string_ptr(parserHandle, listPairInt, &valueSize); + if (value != nullptr) + { + entity.Id = std::string(value, valueSize); + } + } + if (nameStr == "subLists") + { + ParseSubList(parserHandle, entity, listPairInt); + } + // ignore any other pairs since we only care about the name. + } + } + model->Entities.push_back(entity); + } + + static void ParseSubList(AZAC_HANDLE parserHandle, PatternMatchingEntity& entity, int listPairInt) + { + size_t nameSize = 0; + size_t valueSize = 0; + auto subListCount = ai_core_json_item_count(parserHandle, listPairInt); + for (int subListIndex = 0; subListIndex < subListCount; subListIndex++) + { + auto subListItemInt = ai_core_json_item_at(parserHandle, listPairInt, subListIndex, nullptr); + auto subListItemCount = ai_core_json_item_count(parserHandle, subListItemInt); + for (int subListItemIndex = 0; subListItemIndex < subListItemCount; subListItemIndex++) + { + auto subListPairInt = ai_core_json_item_at(parserHandle, subListItemInt, subListItemIndex, nullptr); + auto nameInt = ai_core_json_item_name(parserHandle, subListPairInt); + auto name = ai_core_json_value_as_string_ptr(parserHandle, nameInt, &nameSize); + if (name != nullptr) + { + auto nameStr = std::string(name, nameSize); + if (nameStr == "canonicalForm") + { + auto value = ai_core_json_value_as_string_ptr(parserHandle, subListPairInt, &valueSize); + if (value != nullptr) + { + entity.Phrases.push_back(std::string(value, valueSize)); + } + } + else if (nameStr == "list") + { + auto subListSynonymInt = ai_core_json_item_at(parserHandle, subListItemInt, subListItemIndex, nullptr); + auto subListSynonymItemCount = ai_core_json_item_count(parserHandle, subListSynonymInt); + for (int subListSynonymIndex = 0; subListSynonymIndex < subListSynonymItemCount; subListSynonymIndex++) + { + auto subListSynonymEntryInt = ai_core_json_item_at(parserHandle, subListSynonymInt, subListSynonymIndex, nullptr); + auto value = ai_core_json_value_as_string_ptr(parserHandle, subListSynonymEntryInt, &valueSize); + if (value != nullptr) + { + entity.Phrases.push_back(std::string(value, valueSize)); + } + } + } + } + } + } + } + + static void ParsePatternJson(AZAC_HANDLE parserHandle, std::shared_ptr model, int itemInt, int index) + { + auto subItemInt = ai_core_json_item_at(parserHandle, itemInt, index, nullptr); + int subItemCount = ai_core_json_item_count(parserHandle, subItemInt); + size_t nameSize = 0; + size_t valueSize = 0; + std::string patternStr, intentIdStr; + for (int subItemIndex = 0; subItemIndex < subItemCount; subItemIndex++) + { + auto entityPairInt = ai_core_json_item_at(parserHandle, subItemInt, subItemIndex, nullptr); + auto nameInt = ai_core_json_item_name(parserHandle, entityPairInt); + auto name = ai_core_json_value_as_string_ptr(parserHandle, nameInt, &nameSize); + if (name != nullptr) + { + auto nameStr = std::string(name, nameSize); + auto value = ai_core_json_value_as_string_ptr(parserHandle, entityPairInt, &valueSize); + if (value != nullptr) + { + if (nameStr == "pattern") + { + patternStr = std::string(value, valueSize); + } + else if (nameStr == "intent") + { + intentIdStr = std::string(value, valueSize); + } + } + // ignore any other pairs since we only care about the name. + } + } + if (!patternStr.empty() && !intentIdStr.empty()) + { + bool added = false; + for (auto& intent : model->Intents) + { + if (intent.Id == intentIdStr) + { + intent.Phrases.push_back(patternStr); + added = true; + break; + } + } + if (!added) + { + model->Intents.push_back({ {patternStr}, intentIdStr}); + } + } + } + +}; + +} } } } // Microsoft::CognitiveServices::Speech::Intent diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_phrase_list_grammar.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_phrase_list_grammar.h new file mode 100644 index 0000000..dbcece2 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_phrase_list_grammar.h @@ -0,0 +1,92 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_phrase_list_grammar.h: Public API declarations for PhraseListGrammar C++ class +// + +#pragma once +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + + +/// +/// Represents a phrase list grammar for dynamic grammar scenarios. +/// Added in version 1.5.0. +/// +class PhraseListGrammar : public Grammar +{ +public: + + /// + /// Creates a phrase list grammar for the specified recognizer. + /// + /// The recognizer from which to obtain the phrase list grammar. + /// A shared pointer to phrase list grammar. + template + static std::shared_ptr FromRecognizer(std::shared_ptr recognizer) + { + return FromRecognizer(recognizer, Utils::ToSPXString(nullptr)); + } + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// Phrase list grammar handle. + explicit PhraseListGrammar(SPXGRAMMARHANDLE hgrammar = SPXHANDLE_INVALID) : Grammar(hgrammar) { } + + /// + /// Adds a simple phrase that may be spoken by the user. + /// + /// The phrase to be added. + void AddPhrase(const SPXSTRING& text) + { + auto phrase = GrammarPhrase::From(text); + SPX_THROW_ON_FAIL(phrase_list_grammar_add_phrase(m_hgrammar.get(), (SPXPHRASEHANDLE)(*phrase.get()))); + } + + /// + /// Clears all phrases from the phrase list grammar. + /// + void Clear() + { + SPX_THROW_ON_FAIL(phrase_list_grammar_clear(m_hgrammar.get())); + } + +protected: + + /// + /// Internal. Creates a phrase list grammar for the specified recognizer, with the specified name. + /// + /// The recognizer from which to obtain the phrase list grammar. + /// The name of the phrase list grammar to create. + /// A shared pointer to phrase list grammar. + template + static std::shared_ptr FromRecognizer(std::shared_ptr recognizer, const SPXSTRING& name) + { + SPXRECOHANDLE hreco = recognizer != nullptr + ? (SPXRECOHANDLE)(*recognizer.get()) + : SPXHANDLE_INVALID; + + SPXGRAMMARHANDLE hgrammar = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(phrase_list_grammar_from_recognizer_by_name(&hgrammar, hreco, Utils::ToUTF8(name.c_str()))); + + return std::make_shared(hgrammar); + } + + +private: + + DISABLE_COPY_AND_MOVE(PhraseListGrammar); +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_pronunciation_assessment_config.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_pronunciation_assessment_config.h new file mode 100644 index 0000000..88ddc99 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_pronunciation_assessment_config.h @@ -0,0 +1,222 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once + +#include + +#include "speechapi_cxx_properties.h" +#include "speechapi_cxx_string_helpers.h" +#include "speechapi_cxx_utils.h" +#include "speechapi_cxx_common.h" +#include "speechapi_cxx_enums.h" +#include +#include "speechapi_c_pronunciation_assessment_config.h" + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// Class that defines pronunciation assessment configuration +/// Added in 1.14.0 +/// +class PronunciationAssessmentConfig +{ +public: + /// + /// Internal operator used to get underlying handle value. + /// + /// A handle. + explicit operator SPXPRONUNCIATIONASSESSMENTCONFIGHANDLE() const { return m_hconfig; } + + /// + /// Creates an instance of the PronunciationAssessmentConfig + /// For parameter details, see the table + /// [Pronunciation assessment parameters](/azure/cognitive-services/speech-service/rest-speech-to-text-short#pronunciation-assessment-parameters). + /// + /// The reference text + /// The point system for score calibration + /// The evaluation granularity + /// If enables miscue calculation. When true, the pronounced words are compared to the reference text, and are marked with omission/insertion based on the comparison; when false, the recognized text will always be reference text. + /// A shared pointer to the new PronunciationAssessmentConfig instance. + static std::shared_ptr Create(const std::string& referenceText, + PronunciationAssessmentGradingSystem gradingSystem = + PronunciationAssessmentGradingSystem::FivePoint, + PronunciationAssessmentGranularity granularity = + PronunciationAssessmentGranularity::Phoneme, + bool enableMiscue = false) + { + SPXPRONUNCIATIONASSESSMENTCONFIGHANDLE hconfig = SPXHANDLE_INVALID; + + SPX_THROW_ON_FAIL( + create_pronunciation_assessment_config(&hconfig, Utils::ToUTF8(referenceText).c_str(), + static_cast(gradingSystem), + static_cast(granularity), + enableMiscue)); + const auto ptr = new PronunciationAssessmentConfig(hconfig); + return std::shared_ptr(ptr); + } + + /// + /// Creates an instance of the PronunciationAssessmentConfig + /// For parameters details, see the table + /// [Pronunciation assessment parameters](/azure/cognitive-services/speech-service/rest-speech-to-text-short#pronunciation-assessment-parameters). + /// + /// The reference text + /// The point system for score calibration + /// The evaluation granularity + /// If enables miscue calculation + /// A shared pointer to the new PronunciationAssessmentConfig instance. + static std::shared_ptr Create(const std::wstring& referenceText, + PronunciationAssessmentGradingSystem gradingSystem = + PronunciationAssessmentGradingSystem::FivePoint, + PronunciationAssessmentGranularity granularity = + PronunciationAssessmentGranularity::Phoneme, + bool enableMiscue = false) + { + return Create(Utils::ToUTF8(referenceText), gradingSystem, granularity, enableMiscue); + } + + /// + /// Creates an instance of the PronunciationAssessmentConfig from json. See the table + /// [Pronunciation assessment parameters](/azure/cognitive-services/speech-service/rest-speech-to-text-short#pronunciation-assessment-parameters). + /// + /// The json string containing the pronunciation assessment parameters. + /// A shared pointer to the new PronunciationAssessmentConfig instance. + static std::shared_ptr CreateFromJson(const SPXSTRING& json) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, json.empty()); + SPXAUTODETECTSOURCELANGCONFIGHANDLE hconfig = SPXHANDLE_INVALID; + + SPX_THROW_ON_FAIL(create_pronunciation_assessment_config_from_json(&hconfig, Utils::ToUTF8(json).c_str())); + const auto ptr = new PronunciationAssessmentConfig(hconfig); + return std::shared_ptr(ptr); + } + + /// + /// Gets to json string of pronunciation assessment parameters. + /// + /// json string of pronunciation assessment parameters. + SPXSTRING ToJson() const + { + const char* jsonCch = pronunciation_assessment_config_to_json(m_hconfig); + return Utils::ToSPXString(Utils::CopyAndFreePropertyString(jsonCch)); + } + + /// + /// Gets the reference text. + /// + /// The reference text. + SPXSTRING GetReferenceText() + { + const char* value = property_bag_get_string(m_propertybag, static_cast(PropertyId::PronunciationAssessment_ReferenceText), nullptr, ""); + return Utils::ToSPXString(Utils::CopyAndFreePropertyString(value)); + } + + /// + /// Sets the reference text. + /// + /// The reference text. + void SetReferenceText(const std::string& referenceText) + { + property_bag_set_string(m_propertybag, static_cast(PropertyId::PronunciationAssessment_ReferenceText), nullptr, referenceText.c_str()); + } + + /// + /// Sets the reference text. + /// + /// The reference text. + void SetReferenceText(const std::wstring& referenceText) + { + SetReferenceText(Utils::ToUTF8(referenceText)); + } + + /// + /// Sets phoneme alphabet. Valid values are: "SAPI" (default) and "IPA". + /// + /// Added in version 1.20.0. + /// The phoneme alphabet. + void SetPhonemeAlphabet(const SPXSTRING& phonemeAlphabet) + { + property_bag_set_string(m_propertybag, static_cast(PropertyId::PronunciationAssessment_PhonemeAlphabet), nullptr, Utils::ToUTF8(phonemeAlphabet).c_str()); + } + + /// + /// Sets nbest phoneme count in the result. + /// + /// Added in version 1.20.0. + /// The nbest phoneme count. + void SetNBestPhonemeCount(int count) + { + property_bag_set_string(m_propertybag, static_cast(PropertyId::PronunciationAssessment_NBestPhonemeCount), nullptr, std::to_string(count).c_str()); + } + + /// + /// Enables prosody assessment. + /// + /// Added in version 1.33.0. + void EnableProsodyAssessment() + { + property_bag_set_string(m_propertybag, static_cast(PropertyId::PronunciationAssessment_EnableProsodyAssessment), nullptr, "true"); + } + + /// + /// Enables the content assessment with topic. + /// + /// Added in version 1.33.0. + /// The content topic. + void EnableContentAssessmentWithTopic(const SPXSTRING& contentTopic) + { + property_bag_set_string(m_propertybag, static_cast(PropertyId::PronunciationAssessment_ContentTopic), nullptr, Utils::ToUTF8(contentTopic).c_str()); + } + + /// + /// Applies the settings in this config to a Recognizer. + /// + /// The target Recognizer. + void ApplyTo(std::shared_ptr recognizer) const + { + SPX_INIT_HR(hr); + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, recognizer == nullptr); + + SPX_THROW_ON_FAIL(hr =::pronunciation_assessment_config_apply_to_recognizer(m_hconfig, recognizer->m_hreco)); + } + + /// + /// Destructs the object. + /// + virtual ~PronunciationAssessmentConfig() + { + pronunciation_assessment_config_release(m_hconfig); + property_bag_release(m_propertybag); + } + +private: + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + explicit PronunciationAssessmentConfig(SPXPRONUNCIATIONASSESSMENTCONFIGHANDLE hconfig) + :m_hconfig(hconfig) + { + SPX_THROW_ON_FAIL(pronunciation_assessment_config_get_property_bag(hconfig, &m_propertybag)); + } + + /// + /// Internal member variable that holds the config + /// + SPXPRONUNCIATIONASSESSMENTCONFIGHANDLE m_hconfig; + + /// + /// Internal member variable that holds the properties of the speech config + /// + SPXPROPERTYBAGHANDLE m_propertybag; + + DISABLE_COPY_AND_MOVE(PronunciationAssessmentConfig); +}; + +}}} + diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_pronunciation_assessment_result.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_pronunciation_assessment_result.h new file mode 100644 index 0000000..aedbcb7 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_pronunciation_assessment_result.h @@ -0,0 +1,142 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_pronunciation_assessment_result.h: Public API declarations for PronunciationAssessmentResult C++ class +// + +#pragma once +#include +#include +#include +#include + + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// Class for content assessment results. +/// +class PronunciationContentAssessmentResult +{ +public: + /// + /// The score indicating the grammar of the given speech. + /// + const double GrammarScore; + + /// + /// The score indicating the vocabulary of the given speech. + /// + const double VocabularyScore; + + /// + /// The score indicating the topic of the given speech. + /// + const double TopicScore; + + /*! \cond INTERNAL */ + + PronunciationContentAssessmentResult(const PropertyCollection& properties) : + GrammarScore(std::stod(properties.GetProperty("ContentAssessment_GrammarScore", "-1"))), + VocabularyScore(std::stod(properties.GetProperty("ContentAssessment_VocabularyScore", "-1"))), + TopicScore(std::stod(properties.GetProperty("ContentAssessment_TopicScore", "-1"))) + { + } + + /*! \endcond */ + +}; + + +/// +/// Class for pronunciation assessment results. +/// +class PronunciationAssessmentResult +{ +public: + + /// + /// Creates a pronunciation assessment result object from recognition result + /// If nullptr is returned, it means the assessment is failed. + /// + /// recognition result + /// A shared pointer to the created PronunciationAssessmentResult instance. + static std::shared_ptr FromResult(std::shared_ptr result) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, result == nullptr); + if (result->Properties.GetProperty("AccuracyScore").empty() && result->Properties.GetProperty("ContentAssessment_GrammarScore").empty()) + { + return nullptr; + } + auto ptr = new PronunciationAssessmentResult(result->Properties); + return std::shared_ptr(ptr); + } + + /// + /// The score indicating the pronunciation accuracy of the given speech, which indicates + /// how closely the phonemes match a native speaker's pronunciation. + /// If this is less 0, it means the pronunciation assessment failed. + /// + const double AccuracyScore; + + /// + /// The overall score indicating the pronunciation quality of the given speech. + /// This is calculated from AccuracyScore, FluencyScore and CompletenessScore with weight. + /// If this is less 0, it means the pronunciation assessment failed. + /// + const double PronunciationScore; + + /// + /// The score indicating the completeness of the given speech by calculating the ratio of pronounced words towards entire input. + /// If this is less 0, it means the pronunciation assessment failed. + /// + const double CompletenessScore; + + /// + /// The score indicating the fluency of the given speech. + /// If this is less 0, it means the pronunciation assessment failed. + /// + const double FluencyScore; + + /// + /// The score indicating the prosody of the given speech. + /// If this is less 0, it means the prosody assessment is not enabled. + /// + const double ProsodyScore; + + /// + /// The content assessment result. Only available when content assessment is enabled. + /// + std::shared_ptr ContentAssessmentResult; + + +protected: + + /*! \cond PROTECTED */ + + + explicit PronunciationAssessmentResult(const PropertyCollection& properties) : + AccuracyScore(std::stod(properties.GetProperty("AccuracyScore", "-1"))), + PronunciationScore(std::stod(properties.GetProperty("PronScore", "-1"))), + CompletenessScore(std::stod(properties.GetProperty("CompletenessScore", "-1"))), + FluencyScore(std::stod(properties.GetProperty("FluencyScore", "-1"))), + ProsodyScore(std::stod(properties.GetProperty("ProsodyScore", "-1"))) + { + if (!properties.GetProperty("ContentAssessment_GrammarScore").empty()) + { + this->ContentAssessmentResult = std::make_shared(properties); + } + } + + /*! \endcond */ + +private: + + DISABLE_DEFAULT_CTORS(PronunciationAssessmentResult); +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_properties.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_properties.h new file mode 100644 index 0000000..5a8a7b8 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_properties.h @@ -0,0 +1,99 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +class KeywordRecognizer; + +/// +/// Class to retrieve or set a property value from a property collection. +/// +class PropertyCollection +{ +public: + + /// + /// Destructor. + /// + ~PropertyCollection() + { + if (property_bag_is_valid(m_propbag)) + { + property_bag_release(m_propbag); + m_propbag = SPXHANDLE_INVALID; + } + } + + /// + /// Set value of a property. + /// + /// The id of the property. See + /// value to set + void SetProperty(PropertyId propertyID, const SPXSTRING& value) + { + property_bag_set_string(m_propbag, (int)propertyID, NULL, Utils::ToUTF8(value).c_str()); + } + + /// + /// Set value of a property. + /// + /// The name of property. + /// value to set + void SetProperty(const SPXSTRING& propertyName, const SPXSTRING& value) + { + property_bag_set_string(m_propbag, -1, Utils::ToUTF8(propertyName).c_str(), Utils::ToUTF8(value).c_str()); + } + + /// + /// Returns value of a property. + /// If the property value is not defined, the specified default value is returned. + /// + /// The id of the property. See + /// The default value which is returned if no value is defined for the property (empty string by default). + /// value of the property. + SPXSTRING GetProperty(PropertyId propertyID, const SPXSTRING& defaultValue = SPXSTRING()) const + { + const char* propCch = property_bag_get_string(m_propbag, static_cast(propertyID), nullptr, Utils::ToUTF8(defaultValue).c_str()); + return Utils::ToSPXString(Utils::CopyAndFreePropertyString(propCch)); + } + + /// + /// Returns value of a property. + /// If the property value is not defined, the specified default value is returned. + /// + /// The name of the property. + /// The default value which is returned if no value is defined for the property (empty string by default). + /// value of the property. + SPXSTRING GetProperty(const SPXSTRING& propertyName, const SPXSTRING& defaultValue = SPXSTRING()) const + { + const char* propCch = property_bag_get_string(m_propbag, -1, Utils::ToUTF8(propertyName).c_str(), Utils::ToUTF8(defaultValue).c_str()); + return Utils::ToSPXString(Utils::CopyAndFreePropertyString(propCch)); + } + +protected: + friend class KeywordRecognizer; + + /*! \cond PROTECTED */ + + PropertyCollection(SPXPROPERTYBAGHANDLE propbag) : m_propbag(propbag) {} + + /*! \endcond */ + +private: + + DISABLE_COPY_AND_MOVE(PropertyCollection); + + SPXPROPERTYBAGHANDLE m_propbag; +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_recognition_async_recognizer.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_recognition_async_recognizer.h new file mode 100644 index 0000000..ebb63d4 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_recognition_async_recognizer.h @@ -0,0 +1,473 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_recognition_async_recognizer.h: Public API declarations for AsyncRecognizer C++ template class +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// AsyncRecognizer abstract base class. +/// +template +class AsyncRecognizer : public Recognizer +{ +public: + + /// + /// Performs recognition in a non-blocking (asynchronous) mode. + /// + /// Future containing result value (a shared pointer to RecoResult) + /// of the asynchronous recognition. + /// + virtual std::future> RecognizeOnceAsync() = 0; + + /// + /// Asynchronously initiates continuous recognition operation. + /// + /// An empty future. + virtual std::future StartContinuousRecognitionAsync() = 0; + + /// + /// Asynchronously terminates ongoing continuous recognition operation. + /// + /// An empty future. + virtual std::future StopContinuousRecognitionAsync() = 0; + + /// + /// Asynchronously initiates keyword recognition operation. + /// + /// The keyword recognition model that specifies the keyword to be recognized. + /// An asynchronous operation that starts the keyword recognition. + virtual std::future StartKeywordRecognitionAsync(std::shared_ptr model) = 0; + + /// + /// Asynchronously terminates ongoing keyword recognition operation. + /// + /// An empty future. + virtual std::future StopKeywordRecognitionAsync() = 0; + + /// + /// Signal for events indicating the start of a recognition session (operation). + /// + EventSignal SessionStarted; + + /// + /// Signal for events indicating the end of a recognition session (operation). + /// + EventSignal SessionStopped; + + /// + /// Signal for events indicating the start of speech. + /// + EventSignal SpeechStartDetected; + + /// + /// Signal for events indicating the end of speech. + /// + EventSignal SpeechEndDetected; + + /// + /// Signal for events containing intermediate recognition results. + /// + EventSignal Recognizing; + + /// + /// Signal for events containing final recognition results. + /// (indicating a successful recognition attempt). + /// + EventSignal Recognized; + + /// + /// Signal for events containing canceled recognition results + /// (indicating a recognition attempt that was canceled as a result or a direct cancellation request + /// or, alternatively, a transport or protocol failure). + /// + EventSignal Canceled; + +protected: + + /*! \cond PROTECTED */ + + explicit AsyncRecognizer(SPXRECOHANDLE hreco) throw() : + Recognizer(hreco), + SessionStarted(GetSessionEventConnectionsChangedCallback()), + SessionStopped(GetSessionEventConnectionsChangedCallback()), + SpeechStartDetected(GetRecognitionEventConnectionsChangedCallback()), + SpeechEndDetected(GetRecognitionEventConnectionsChangedCallback()), + Recognizing(GetRecoEventConnectionsChangedCallback()), + Recognized(GetRecoEventConnectionsChangedCallback()), + Canceled(GetRecoCanceledEventConnectionsChangedCallback()), + m_properties(hreco), + m_hasyncRecognize(SPXHANDLE_INVALID), + m_hasyncStartContinuous(SPXHANDLE_INVALID), + m_hasyncStopContinuous(SPXHANDLE_INVALID), + m_hasyncStartKeyword(SPXHANDLE_INVALID), + m_hasyncStopKeyword(SPXHANDLE_INVALID) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + }; + + virtual ~AsyncRecognizer() + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + TermRecognizer(); + }; + + virtual void TermRecognizer() override + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + + // Disconnect the event signals in reverse construction order + Canceled.DisconnectAll(); + Recognized.DisconnectAll(); + Recognizing.DisconnectAll(); + SpeechEndDetected.DisconnectAll(); + SpeechStartDetected.DisconnectAll(); + SessionStopped.DisconnectAll(); + SessionStarted.DisconnectAll(); + + // Close the async handles we have open for Recognize, StartContinuous, and StopContinuous + for (auto handle : { &m_hasyncRecognize, &m_hasyncStartContinuous, &m_hasyncStopContinuous }) + { + if (*handle != SPXHANDLE_INVALID && ::recognizer_async_handle_is_valid(*handle)) + { + ::recognizer_async_handle_release(*handle); + *handle = SPXHANDLE_INVALID; + } + } + + // Ask the base to term + Recognizer::TermRecognizer(); + } + + std::future> RecognizeOnceAsyncInternal() + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this]() -> std::shared_ptr { + SPX_INIT_HR(hr); + + SPXRESULTHANDLE hresult = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(hr = recognizer_recognize_once(m_hreco, &hresult)); + + return std::make_shared(hresult); + }); + + return future; + }; + + std::future StartContinuousRecognitionAsyncInternal() + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this]() -> void { + SPX_INIT_HR(hr); + SPX_THROW_ON_FAIL(hr = recognizer_async_handle_release(m_hasyncStartContinuous)); // close any unfinished previous attempt + + SPX_EXITFN_ON_FAIL(hr = recognizer_start_continuous_recognition_async(m_hreco, &m_hasyncStartContinuous)); + SPX_EXITFN_ON_FAIL(hr = recognizer_start_continuous_recognition_async_wait_for(m_hasyncStartContinuous, UINT32_MAX)); + + SPX_EXITFN_CLEANUP: + auto releaseHr = recognizer_async_handle_release(m_hasyncStartContinuous); + SPX_REPORT_ON_FAIL(releaseHr); + m_hasyncStartContinuous = SPXHANDLE_INVALID; + + SPX_THROW_ON_FAIL(hr); + }); + + return future; + }; + + std::future StopContinuousRecognitionAsyncInternal() + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this]() -> void { + SPX_INIT_HR(hr); + SPX_THROW_ON_FAIL(hr = recognizer_async_handle_release(m_hasyncStopContinuous)); // close any unfinished previous attempt + + SPX_EXITFN_ON_FAIL(hr = recognizer_stop_continuous_recognition_async(m_hreco, &m_hasyncStopContinuous)); + SPX_EXITFN_ON_FAIL(hr = recognizer_stop_continuous_recognition_async_wait_for(m_hasyncStopContinuous, UINT32_MAX)); + + SPX_EXITFN_CLEANUP: + auto releaseHr = recognizer_async_handle_release(m_hasyncStopContinuous); + SPX_REPORT_ON_FAIL(releaseHr); + m_hasyncStopContinuous = SPXHANDLE_INVALID; + + SPX_THROW_ON_FAIL(hr); + }); + + return future; + } + + std::future StartKeywordRecognitionAsyncInternal(std::shared_ptr model) + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, model, this]() -> void { + SPX_INIT_HR(hr); + SPX_THROW_ON_FAIL(hr = recognizer_async_handle_release(m_hasyncStartKeyword)); // close any unfinished previous attempt + + auto hkeyword = (SPXKEYWORDHANDLE)(*model.get()); + SPX_EXITFN_ON_FAIL(hr = recognizer_start_keyword_recognition_async(m_hreco, hkeyword, &m_hasyncStartKeyword)); + SPX_EXITFN_ON_FAIL(hr = recognizer_start_keyword_recognition_async_wait_for(m_hasyncStartKeyword, UINT32_MAX)); + + SPX_EXITFN_CLEANUP: + auto releaseHr = recognizer_async_handle_release(m_hasyncStartKeyword); + SPX_REPORT_ON_FAIL(releaseHr); + m_hasyncStartKeyword = SPXHANDLE_INVALID; + + SPX_THROW_ON_FAIL(hr); + }); + + return future; + }; + + std::future StopKeywordRecognitionAsyncInternal() + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this]() -> void { + SPX_INIT_HR(hr); + SPX_THROW_ON_FAIL(hr = recognizer_async_handle_release(m_hasyncStopKeyword)); // close any unfinished previous attempt + + SPX_EXITFN_ON_FAIL(hr = recognizer_stop_keyword_recognition_async(m_hreco, &m_hasyncStopKeyword)); + SPX_EXITFN_ON_FAIL(hr = recognizer_stop_keyword_recognition_async_wait_for(m_hasyncStopKeyword, UINT32_MAX)); + + SPX_EXITFN_CLEANUP: + auto releaseHr = recognizer_async_handle_release(m_hasyncStopKeyword); + SPX_REPORT_ON_FAIL(releaseHr); + m_hasyncStartKeyword = SPXHANDLE_INVALID; + + SPX_THROW_ON_FAIL(hr); + }); + + return future; + }; + + virtual void RecoEventConnectionsChanged(const EventSignal& recoEvent) + { + if (m_hreco != SPXHANDLE_INVALID) + { + SPX_DBG_TRACE_VERBOSE("%s: m_hreco=0x%8p", __FUNCTION__, (void*)m_hreco); + SPX_DBG_TRACE_VERBOSE_IF(!::recognizer_handle_is_valid(m_hreco), "%s: m_hreco is INVALID!!!", __FUNCTION__); + + if (&recoEvent == &Recognizing) + { + recognizer_recognizing_set_callback(m_hreco, Recognizing.IsConnected() ? AsyncRecognizer::FireEvent_Recognizing: nullptr, this); + } + else if (&recoEvent == &Recognized) + { + recognizer_recognized_set_callback(m_hreco, Recognized.IsConnected() ? AsyncRecognizer::FireEvent_Recognized: nullptr, this); + } + } + } + + virtual void RecoCanceledEventConnectionsChanged(const EventSignal& recoEvent) + { + if (m_hreco != SPXHANDLE_INVALID) + { + SPX_DBG_TRACE_VERBOSE("%s: m_hreco=0x%8p", __FUNCTION__, (void*)m_hreco); + SPX_DBG_TRACE_VERBOSE_IF(!::recognizer_handle_is_valid(m_hreco), "%s: m_hreco is INVALID!!!", __FUNCTION__); + + if (&recoEvent == &Canceled) + { + recognizer_canceled_set_callback(m_hreco, Canceled.IsConnected() ? AsyncRecognizer::FireEvent_Canceled : nullptr, this); + } + } + } + + virtual void RecognitionEventConnectionsChanged(const EventSignal& recognitionEvent) + { + if (m_hreco != SPXHANDLE_INVALID) + { + SPX_DBG_TRACE_VERBOSE("%s: m_hreco=0x%8p", __FUNCTION__, (void*)m_hreco); + SPX_DBG_TRACE_VERBOSE_IF(!::recognizer_handle_is_valid(m_hreco), "%s: m_hreco is INVALID!!!", __FUNCTION__); + + if (&recognitionEvent == &SpeechStartDetected) + { + recognizer_speech_start_detected_set_callback(m_hreco, SpeechStartDetected.IsConnected() ? AsyncRecognizer::FireEvent_SpeechStartDetected : nullptr, this); + } + else if (&recognitionEvent == &SpeechEndDetected) + { + recognizer_speech_end_detected_set_callback(m_hreco, SpeechEndDetected.IsConnected() ? AsyncRecognizer::FireEvent_SpeechEndDetected : nullptr, this); + } + } + } + + virtual void SessionEventConnectionsChanged(const EventSignal& sessionEvent) + { + if (m_hreco != SPXHANDLE_INVALID) + { + SPX_DBG_TRACE_VERBOSE("%s: m_hreco=0x%8p", __FUNCTION__, (void*)m_hreco); + SPX_DBG_TRACE_VERBOSE_IF(!::recognizer_handle_is_valid(m_hreco), "%s: m_hreco is INVALID!!!", __FUNCTION__); + + if (&sessionEvent == &SessionStarted) + { + recognizer_session_started_set_callback(m_hreco, SessionStarted.IsConnected() ? AsyncRecognizer::FireEvent_SessionStarted: nullptr, this); + } + else if (&sessionEvent == &SessionStopped) + { + recognizer_session_stopped_set_callback(m_hreco, SessionStopped.IsConnected() ? AsyncRecognizer::FireEvent_SessionStopped : nullptr, this); + } + } + } + + static void FireEvent_SessionStarted(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hreco); + std::unique_ptr sessionEvent { new SessionEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->SessionStarted.Signal(*sessionEvent.get()); + + // SessionEventArgs doesn't hold hevent, and thus can't release it properly ... release it here + SPX_DBG_ASSERT(recognizer_event_handle_is_valid(hevent)); + recognizer_event_handle_release(hevent); + } + + static void FireEvent_SessionStopped(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hreco); + std::unique_ptr sessionEvent { new SessionEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->SessionStopped.Signal(*sessionEvent.get()); + + // SessionEventArgs doesn't hold hevent, and thus can't release it properly ... release it here + SPX_DBG_ASSERT(recognizer_event_handle_is_valid(hevent)); + recognizer_event_handle_release(hevent); + } + + static void FireEvent_SpeechStartDetected(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hreco); + std::unique_ptr recoEvent{ new RecognitionEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->SpeechStartDetected.Signal(*recoEvent.get()); + + // RecognitionEventArgs doesn't hold hevent, and thus can't release it properly ... release it here + SPX_DBG_ASSERT(recognizer_event_handle_is_valid(hevent)); + recognizer_event_handle_release(hevent); + } + + static void FireEvent_SpeechEndDetected(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hreco); + std::unique_ptr recoEvent{ new RecognitionEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->SpeechEndDetected.Signal(*recoEvent.get()); + + // RecognitionEventArgs doesn't hold hevent, and thus can't release it properly ... release it here + SPX_DBG_ASSERT(recognizer_event_handle_is_valid(hevent)); + recognizer_event_handle_release(hevent); + } + + static void FireEvent_Recognizing(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hreco); + std::unique_ptr recoEvent { new RecoEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->Recognizing.Signal(*recoEvent.get()); + } + + static void FireEvent_Recognized(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hreco); + std::unique_ptr recoEvent { new RecoEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->Recognized.Signal(*recoEvent.get()); + } + + static void FireEvent_Canceled(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hreco); + + auto ptr = new RecoCanceledEventArgs(hevent); + std::shared_ptr recoEvent(ptr); + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->Canceled.Signal(*ptr); + } + + class PrivatePropertyCollection : public PropertyCollection + { + public: + PrivatePropertyCollection(SPXRECOHANDLE hreco) : + PropertyCollection( + [=](){ + SPXPROPERTYBAGHANDLE hpropbag = SPXHANDLE_INVALID; + recognizer_get_property_bag(hreco, &hpropbag); + return hpropbag; + }()) + { + } + }; + + PrivatePropertyCollection m_properties; + + SPXASYNCHANDLE m_hasyncRecognize; + SPXASYNCHANDLE m_hasyncStartContinuous; + SPXASYNCHANDLE m_hasyncStopContinuous; + SPXASYNCHANDLE m_hasyncStartKeyword; + SPXASYNCHANDLE m_hasyncStopKeyword; + + template + static Handle HandleOrInvalid(std::shared_ptr audioInput) + { + return audioInput == nullptr + ? (Handle)SPXHANDLE_INVALID + : (Handle)(*audioInput.get()); + } + + /*! \endcond */ + +private: + + DISABLE_DEFAULT_CTORS(AsyncRecognizer); + + inline std::function&)> GetSessionEventConnectionsChangedCallback() + { + return [=, this](const EventSignal& sessionEvent) { this->SessionEventConnectionsChanged(sessionEvent); }; + } + + inline std::function&)> GetRecoEventConnectionsChangedCallback() + { + return [=, this](const EventSignal& recoEvent) { this->RecoEventConnectionsChanged(recoEvent); }; + } + + inline std::function&)> GetRecoCanceledEventConnectionsChangedCallback() + { + return [=, this](const EventSignal& recoEvent) { this->RecoCanceledEventConnectionsChanged(recoEvent); }; + } + + inline std::function&)> GetRecognitionEventConnectionsChangedCallback() + { + return [=, this](const EventSignal& recoEvent) { this->RecognitionEventConnectionsChanged(recoEvent); }; + } +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_recognition_base_async_recognizer.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_recognition_base_async_recognizer.h new file mode 100644 index 0000000..5723e46 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_recognition_base_async_recognizer.h @@ -0,0 +1,53 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_recognition_base_async_recognizer.h: Public API declarations for BaseAsyncRecognizer C++ class +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// BaseAsyncRecognizer class. +/// +class BaseAsyncRecognizer : public AsyncRecognizer +{ +protected: + + /*! \cond PROTECTED */ + + using BaseType = AsyncRecognizer; + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + explicit BaseAsyncRecognizer(SPXRECOHANDLE hreco) : + BaseType(hreco) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + } + + ~BaseAsyncRecognizer() + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + TermRecognizer(); + } + + DISABLE_DEFAULT_CTORS(BaseAsyncRecognizer); + + /*! \endcond */ +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_recognition_eventargs.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_recognition_eventargs.h new file mode 100644 index 0000000..59713c8 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_recognition_eventargs.h @@ -0,0 +1,68 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_recognition_eventargs.h: Public API declarations for RecognitionEventArgs C++ base class +// + +#pragma once +#include +#include +#include + + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + + +/// +/// Provides data for the RecognitionEvent. +/// +class RecognitionEventArgs : public SessionEventArgs +{ +public: + + /// + /// Constructor. Creates a new instance using the provided handle. + /// + /// Event handle. + explicit RecognitionEventArgs(SPXEVENTHANDLE hevent) : + SessionEventArgs(hevent), + Offset(m_offset), + m_offset(GetOffset(hevent)) + { + }; + + /// + virtual ~RecognitionEventArgs() {} + + /// + /// The offset of recognition event + /// + const uint64_t& Offset; + +protected: + + /*! \cond PROTECTED */ + + /// + /// Extract offset from given event handle + /// + static uint64_t GetOffset(SPXEVENTHANDLE hevent) + { + uint64_t offset = 0; + SPX_THROW_ON_FAIL(recognizer_recognition_event_get_offset(hevent, &offset)); + return offset; + } + + /*! \endcond */ + +private: + + DISABLE_COPY_AND_MOVE(RecognitionEventArgs); + uint64_t m_offset; +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_recognition_result.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_recognition_result.h new file mode 100644 index 0000000..f8b8f6e --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_recognition_result.h @@ -0,0 +1,310 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_recognition_result.h: Public API declarations for RecognitionResult C++ base class and related enum class +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include + + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + + +/// +/// Contains detailed information about result of a recognition operation. +/// +class RecognitionResult +{ +private: + + /*! \cond PRIVATE */ + + class PrivatePropertyCollection : public PropertyCollection + { + public: + PrivatePropertyCollection(SPXRESULTHANDLE hresult) : + PropertyCollection( + [=]() { + SPXPROPERTYBAGHANDLE hpropbag = SPXHANDLE_INVALID; + result_get_property_bag(hresult, &hpropbag); + return hpropbag; + }()) + { + } + }; + + PrivatePropertyCollection m_properties; + + /*! \endcond */ + +public: + + /// + /// Virtual destructor. + /// + virtual ~RecognitionResult() + { + ::recognizer_result_handle_release(m_hresult); + m_hresult = SPXHANDLE_INVALID; + }; + + /// + /// Unique result id. + /// + const SPXSTRING& ResultId; + + /// + /// Recognition reason. + /// + const Speech::ResultReason& Reason; + + /// + /// Normalized text generated by a speech recognition engine from recognized input. + /// + const SPXSTRING& Text; + + /// + /// Duration of recognized speech in ticks. + /// A single tick represents one hundred nanoseconds or one ten-millionth of a second. + /// + /// Duration of recognized speech in ticks. + uint64_t Duration() const { return m_duration; } + + /// + /// Offset of the recognized speech in ticks. + /// A single tick represents one hundred nanoseconds or one ten-millionth of a second. + /// + /// Offset of the recognized speech in ticks. + uint64_t Offset() const { return m_offset; } + + /// + /// Collection of additional RecognitionResult properties. + /// + const PropertyCollection& Properties; + + /// + /// Internal. Explicit conversion operator. + /// + /// A handle. + explicit operator SPXRESULTHANDLE() { return m_hresult; } + + +protected: + + /*! \cond PROTECTED */ + + explicit RecognitionResult(SPXRESULTHANDLE hresult) : + m_properties(hresult), + ResultId(m_resultId), + Reason(m_reason), + Text(m_text), + Properties(m_properties), + Handle(m_hresult), + m_hresult(hresult) + { + PopulateResultFields(hresult, &m_resultId, &m_reason, &m_text); + } + + const SPXRESULTHANDLE& Handle; + + /*! \endcond */ + +private: + + DISABLE_DEFAULT_CTORS(RecognitionResult); + + void PopulateResultFields(SPXRESULTHANDLE hresult, SPXSTRING* resultId, Speech::ResultReason* reason, SPXSTRING* text) + { + + SPX_INIT_HR(hr); + + const size_t maxCharCount = 2048; + char sz[maxCharCount + 1] = {}; + + if (resultId != nullptr) + { + SPX_THROW_ON_FAIL(hr = result_get_result_id(hresult, sz, maxCharCount)); + *resultId = Utils::ToSPXString(sz); + } + + if (reason != nullptr) + { + Result_Reason resultReason; + SPX_THROW_ON_FAIL(hr = result_get_reason(hresult, &resultReason)); + *reason = (Speech::ResultReason)resultReason; + } + + if (text != nullptr) + { + SPX_THROW_ON_FAIL(hr = result_get_text(hresult, sz, maxCharCount)); + *text = Utils::ToSPXString(sz); + } + + SPX_THROW_ON_FAIL(hr = result_get_offset(hresult, &m_offset)); + SPX_THROW_ON_FAIL(hr = result_get_duration(hresult, &m_duration)); + } + + SPXRESULTHANDLE m_hresult; + + SPXSTRING m_resultId; + Speech::ResultReason m_reason; + SPXSTRING m_text; + uint64_t m_offset; + uint64_t m_duration; +}; + + +/// +/// Contains detailed information about why a result was canceled. +/// +class CancellationDetails +{ +private: + + CancellationReason m_reason; + CancellationErrorCode m_errorCode; + +public: + + /// + /// Creates an instance of CancellationDetails object for the canceled RecognitionResult. + /// + /// The result that was canceled. + /// A shared pointer to CancellationDetails. + static std::shared_ptr FromResult(std::shared_ptr result) + { + // VSTS 1407221 + // SPX_THROW_HR_IF(result->Reason != ResultReason::Canceled, SPXERR_INVALID_ARG); + auto ptr = new CancellationDetails(result.get()); + auto cancellation = std::shared_ptr(ptr); + return cancellation; + } + + /// + /// The reason the result was canceled. + /// + const CancellationReason& Reason; + + /// + /// The error code in case of an unsuccessful recognition ( is set to Error). + /// If Reason is not Error, ErrorCode is set to NoError. + /// Added in version 1.1.0. + /// + const CancellationErrorCode& ErrorCode; + + /// + /// The error message in case of an unsuccessful recognition ( is set to Error). + /// + const SPXSTRING ErrorDetails; + +protected: + + /*! \cond PROTECTED */ + + CancellationDetails(RecognitionResult* result) : + m_reason(GetCancellationReason(result)), + m_errorCode(GetCancellationErrorCode(result)), + Reason(m_reason), + ErrorCode(m_errorCode), + ErrorDetails(result->Properties.GetProperty(PropertyId::SpeechServiceResponse_JsonErrorDetails)) + { + } + + /*! \endcond */ + +private: + + DISABLE_DEFAULT_CTORS(CancellationDetails); + + Speech::CancellationReason GetCancellationReason(RecognitionResult* result) + { + Result_CancellationReason reason = CancellationReason_Error; + + SPXRESULTHANDLE hresult = (SPXRESULTHANDLE)(*result); + SPX_IFFAILED_THROW_HR(result_get_reason_canceled(hresult, &reason)); + + return (Speech::CancellationReason)reason; + } + + Speech::CancellationErrorCode GetCancellationErrorCode(RecognitionResult* result) + { + Result_CancellationErrorCode errorCode = CancellationErrorCode_NoError; + + SPXRESULTHANDLE hresult = (SPXRESULTHANDLE)(*result); + SPX_IFFAILED_THROW_HR(result_get_canceled_error_code(hresult, &errorCode)); + + return (Speech::CancellationErrorCode)errorCode; + } + +}; + + +/// +/// Contains detailed information for NoMatch recognition results. +/// +class NoMatchDetails +{ +private: + + NoMatchReason m_reason; + +public: + + /// + /// Creates an instance of NoMatchDetails object for NoMatch RecognitionResults. + /// + /// The recognition result that was not recognized. + /// A shared pointer to NoMatchDetails. + static std::shared_ptr FromResult(std::shared_ptr result) + { + // VSTS 1407221 + // SPX_THROW_HR_IF(SPXERR_INVALID_ARG, result->Reason != ResultReason::NoMatch); + auto ptr = new NoMatchDetails(result.get()); + auto noMatch = std::shared_ptr(ptr); + return noMatch; + } + + /// + /// The reason the result was not recognized. + /// + const NoMatchReason& Reason; + +protected: + + /*! \cond PROTECTED */ + + NoMatchDetails(RecognitionResult* result) : + m_reason(GetNoMatchReason(result)), + Reason(m_reason) + { + } + + /*! \endcond */ + +private: + + DISABLE_DEFAULT_CTORS(NoMatchDetails); + + Speech::NoMatchReason GetNoMatchReason(RecognitionResult* result) + { + Result_NoMatchReason reason = NoMatchReason_NotRecognized; + + SPXRESULTHANDLE hresult = (SPXRESULTHANDLE)(*result); + SPX_IFFAILED_THROW_HR(result_get_no_match_reason(hresult, &reason)); + + return (Speech::NoMatchReason)reason; + } + +}; + +} } } // Microsoft::CognitiveServices::Speech diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_recognizer.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_recognizer.h new file mode 100644 index 0000000..a63e451 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_recognizer.h @@ -0,0 +1,72 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_recognizer.h: Public API declarations for Recognizer C++ base class +// + +#pragma once +#include +#include +#include + + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// Recognizer base class. +/// +class Recognizer : public std::enable_shared_from_this +{ + friend class Connection; + friend class PronunciationAssessmentConfig; +public: + + /// + /// Internal operator used to get underlying handle value. + /// + /// A handle. + explicit operator SPXRECOHANDLE() const { return m_hreco; } + +protected: + + /*! \cond PROTECTED */ + + explicit Recognizer(SPXRECOHANDLE hreco) : + m_hreco(hreco) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + SPX_DBG_TRACE_VERBOSE("%s: m_hreco=0x%8p", __FUNCTION__, (void*)m_hreco); + } + + virtual ~Recognizer() + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + TermRecognizer(); + } + + virtual void TermRecognizer() + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + + if (m_hreco != SPXHANDLE_INVALID) + { + ::recognizer_handle_release(m_hreco); + m_hreco = SPXHANDLE_INVALID; + SPX_DBG_TRACE_VERBOSE("%s: m_hreco=0x%8p", __FUNCTION__, (void*)m_hreco); + } + } + + SPXRECOHANDLE m_hreco; + + /*! \endcond */ + +private: + + DISABLE_DEFAULT_CTORS(Recognizer); +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_session.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_session.h new file mode 100644 index 0000000..92c6c38 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_session.h @@ -0,0 +1,86 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_session.h: Public API declarations for Session C++ class +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/*! \cond PRIVATE */ + +class Session +{ +private: + + class PrivatePropertyCollection : public PropertyCollection + { + public: + PrivatePropertyCollection(SPXSESSIONHANDLE hsession) : + PropertyCollection( + [=](){ + SPXPROPERTYBAGHANDLE hpropbag = SPXHANDLE_INVALID; + session_get_property_bag(hsession, &hpropbag); + return hpropbag; + }()) + { + } + }; + + PrivatePropertyCollection m_properties; + +public: + + template + static std::shared_ptr FromRecognizer(std::shared_ptr recognizer) + { + SPX_INIT_HR(hr); + + SPXSESSIONHANDLE hsession = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(hr = ::session_from_recognizer(recognizer->m_hreco, &hsession)); + + return std::make_shared(hsession); + } + + explicit Session(SPXSESSIONHANDLE hsession) : + m_properties(hsession), + Properties(m_properties), + m_hsession(hsession) + { + SPX_DBG_TRACE_FUNCTION(); + } + + virtual ~Session() + { + SPX_DBG_TRACE_FUNCTION(); + + if (m_hsession != SPXHANDLE_INVALID) + { + ::session_handle_release(m_hsession); + m_hsession = SPXHANDLE_INVALID; + } + } + + PropertyCollection& Properties; + +private: + + DISABLE_COPY_AND_MOVE(Session); + + SPXSESSIONHANDLE m_hsession; +}; + +/*! \endcond */ + +} } } // Microsoft::CognitiveServices::Speech diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_session_eventargs.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_session_eventargs.h new file mode 100644 index 0000000..117de20 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_session_eventargs.h @@ -0,0 +1,73 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_session_eventargs.h: Public API declarations for SessionEventArgs C++ class +// + +#pragma once +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + + +/// +/// Base class for session event arguments. +/// +class SessionEventArgs : public EventArgs +{ +public: + + /// + /// Constructor. + /// + /// Event handle + explicit SessionEventArgs(SPXEVENTHANDLE hevent) : + SessionId(m_sessionId), + m_sessionId(GetSessionId(hevent)) + { + }; + + /// + virtual ~SessionEventArgs() {} + + /// + /// Session identifier (a GUID in string format). + /// + const SPXSTRING& SessionId; + + +protected: + + /*! \cond PROTECTED */ + + /// + /// Extract session identifier from given event handle + /// + static const SPXSTRING GetSessionId(SPXEVENTHANDLE hevent) + { + static const auto cchMaxUUID = 36; + static const auto cchMaxSessionId = cchMaxUUID + 1; + char sessionId[cchMaxSessionId] = {}; + + SPX_THROW_ON_FAIL(recognizer_session_event_get_session_id(hevent, sessionId, cchMaxSessionId)); + return Utils::ToSPXString(sessionId); + }; + + /*! \endcond */ + +private: + + DISABLE_DEFAULT_CTORS(SessionEventArgs); + + SPXSTRING m_sessionId; +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_smart_handle.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_smart_handle.h new file mode 100644 index 0000000..d4727b9 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_smart_handle.h @@ -0,0 +1,60 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_smart_handle.h: Public API declarations for SmartHandle class and related typedef +// + +#pragma once +#include + + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + + +typedef SPXHR(SPXAPI_CALLTYPE *SmartHandleCloseFunction)(SPXHANDLE); + + +/// +/// Smart handle class. +/// +template +class SmartHandle +{ +public: + + SmartHandle(T handle = SPXHANDLE_INVALID) : m_handle(handle) { }; + ~SmartHandle() { reset(); } + + explicit operator T&() const { return m_handle; } + + T get() const { return m_handle; } + operator T() const { return m_handle; } + + T* operator &() + { + SPX_THROW_HR_IF(SPXERR_ALREADY_INITIALIZED, !InvalidHandle(m_handle)); + return &m_handle; + } + + void reset() + { + if (!InvalidHandle(m_handle)) + { + closeFunction(m_handle); + m_handle = SPXHANDLE_INVALID; + } + } + +private: + + static bool InvalidHandle(T t) { return t == nullptr || t == SPXHANDLE_INVALID; } + + DISABLE_COPY_AND_MOVE(SmartHandle); + T m_handle; +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_source_lang_config.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_source_lang_config.h new file mode 100644 index 0000000..a312525 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_source_lang_config.h @@ -0,0 +1,91 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once + +#include +#include +#include + +#include "speechapi_cxx_properties.h" +#include +#include +#include "speechapi_c_common.h" + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// Class that defines source language configuration, added in 1.8.0 +/// +class SourceLanguageConfig +{ +public: + /// + /// Internal operator used to get underlying handle value. + /// + /// A handle. + explicit operator SPXSOURCELANGCONFIGHANDLE() const { return m_hconfig; } + + /// + /// Creates an instance of the SourceLanguageConfig with source language + /// + /// The source language + /// A shared pointer to the new SourceLanguageConfig instance. + static std::shared_ptr FromLanguage(const SPXSTRING& language) + { + SPXSOURCELANGCONFIGHANDLE hconfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(source_lang_config_from_language(&hconfig, language.c_str())); + auto ptr = new SourceLanguageConfig(hconfig); + return std::shared_ptr(ptr); + } + + /// + /// Creates an instance of the SourceLanguageConfig with source language and custom endpoint id. A custom endpoint id corresponds to custom models. + /// + /// The source language + /// The custom endpoint id + /// A shared pointer to the new SourceLanguageConfig instance. + static std::shared_ptr FromLanguage(const SPXSTRING& language, const SPXSTRING& endpointId) + { + SPXSOURCELANGCONFIGHANDLE hconfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(source_lang_config_from_language_and_endpointId(&hconfig, language.c_str(), endpointId.c_str())); + auto ptr = new SourceLanguageConfig(hconfig); + return std::shared_ptr(ptr); + } + + /// + /// Destructs the object. + /// + virtual ~SourceLanguageConfig() + { + source_lang_config_release(m_hconfig); + property_bag_release(m_propertybag); + } + +private: + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + explicit SourceLanguageConfig(SPXSOURCELANGCONFIGHANDLE hconfig) + :m_hconfig(hconfig) + { + SPX_THROW_ON_FAIL(source_lang_config_get_property_bag(hconfig, &m_propertybag)); + } + + /// + /// Internal member variable that holds the config + /// + SPXSOURCELANGCONFIGHANDLE m_hconfig; + + /// + /// Internal member variable that holds the properties of the config + /// + SPXPROPERTYBAGHANDLE m_propertybag; + + DISABLE_COPY_AND_MOVE(SourceLanguageConfig); +}; +}}} diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_source_language_recognizer.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_source_language_recognizer.h new file mode 100644 index 0000000..36b851a --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_source_language_recognizer.h @@ -0,0 +1,173 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_source_language_recognizer.h: Public API declarations for SourceLanguageRecognizer C++ class +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +class Session; + +/// +/// Class for source language recognizers. +/// You can use this class for standalone language detection. +/// Added in version 1.17.0 +/// +class SourceLanguageRecognizer final : public AsyncRecognizer +{ +public: + + using BaseType = AsyncRecognizer; + + /// + /// Create a source language recognizer from a speech config, auto detection source language config and audio config + /// + /// Speech configuration + /// Auto detection source language config + /// Audio configuration + /// A smart pointer wrapped source language recognizer pointer. + static std::shared_ptr FromConfig( + std::shared_ptr speechconfig, + std::shared_ptr autoDetectSourceLangConfig, + std::shared_ptr audioInput = nullptr) + { + SPXRECOHANDLE hreco; + SPX_THROW_ON_FAIL(::recognizer_create_source_language_recognizer_from_auto_detect_source_lang_config( + &hreco, + HandleOrInvalid(speechconfig), + HandleOrInvalid(autoDetectSourceLangConfig), + HandleOrInvalid(audioInput))); + return std::make_shared(hreco); + } + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// Recognizer handle. + explicit SourceLanguageRecognizer(SPXRECOHANDLE hreco) : BaseType(hreco), Properties(m_properties) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + } + + /// + /// Destructor. + /// + ~SourceLanguageRecognizer() + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + TermRecognizer(); + } + + /// + /// Starts speech recognition, and returns after a single utterance is recognized. The end of a + /// single utterance is determined by listening for silence at the end or until a maximum of about 30 + /// seconds of audio is processed. The task returns the recognition text as result. + /// Note: Since RecognizeOnceAsync() returns only a single utterance, it is suitable only for single + /// shot recognition like command or query. + /// For long-running multi-utterance recognition, use StartContinuousRecognitionAsync() instead. + /// + /// Future containing result value (a shared pointer to SpeechRecognitionResult) + /// of the asynchronous speech recognition. + /// + std::future> RecognizeOnceAsync() override + { + return BaseType::RecognizeOnceAsyncInternal(); + } + + /// + /// Asynchronously initiates continuous speech recognition operation. + /// + /// An empty future. + std::future StartContinuousRecognitionAsync() override + { + return BaseType::StartContinuousRecognitionAsyncInternal(); + } + + /// + /// Asynchronously terminates ongoing continuous speech recognition operation. + /// + /// An empty future. + std::future StopContinuousRecognitionAsync() override + { + return BaseType::StopContinuousRecognitionAsyncInternal(); + } + + /// + /// Asynchronously initiates keyword recognition operation. + /// + /// Specifies the keyword model to be used. + /// An empty future. + std::future StartKeywordRecognitionAsync(std::shared_ptr model) override + { + return BaseType::StartKeywordRecognitionAsyncInternal(model); + } + + /// + /// Asynchronously terminates keyword recognition operation. + /// + /// An empty future. + std::future StopKeywordRecognitionAsync() override + { + return BaseType::StopKeywordRecognitionAsyncInternal(); + } + + /// + /// A collection of properties and their values defined for this . + /// + PropertyCollection& Properties; + + /// + /// Gets the endpoint ID of a customized speech model that is used for speech recognition. + /// + /// the endpoint ID of a customized speech model that is used for speech recognition + SPXSTRING GetEndpointId() + { + return Properties.GetProperty(PropertyId::SpeechServiceConnection_EndpointId, SPXSTRING()); + } + + /// + /// Sets the authorization token that will be used for connecting to the service. + /// Note: The caller needs to ensure that the authorization token is valid. Before the authorization token + /// expires, the caller needs to refresh it by calling this setter with a new valid token. + /// Otherwise, the recognizer will encounter errors during recognition. + /// + /// The authorization token. + void SetAuthorizationToken(const SPXSTRING& token) + { + Properties.SetProperty(PropertyId::SpeechServiceAuthorization_Token, token); + } + + /// + /// Gets the authorization token. + /// + /// Authorization token + SPXSTRING GetAuthorizationToken() + { + return Properties.GetProperty(PropertyId::SpeechServiceAuthorization_Token, SPXSTRING()); + } + +private: + DISABLE_DEFAULT_CTORS(SourceLanguageRecognizer); + friend class Microsoft::CognitiveServices::Speech::Session; +}; +} } } // Microsoft::CognitiveServices::Speech diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speaker_identification_model.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speaker_identification_model.h new file mode 100644 index 0000000..786e3e7 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speaker_identification_model.h @@ -0,0 +1,77 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_speaker_identification_model.h: Public API declarations for SpeakerIdentificationModel C++ class +// + +#pragma once +#include +#include +#include + +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Speaker { + +/// +/// Represents speaker identification model used with speaker recognition class. +/// Added in version 1.12.0 +/// +class SpeakerIdentificationModel : public std::enable_shared_from_this +{ +public: + + /// + /// Creates a speaker identification model using the voice profiles. + /// + /// a vector of voice profiles. + /// A shared pointer to speaker identification model. + static std::shared_ptr FromProfiles(const std::vector>& profiles) + { + SPXSIMODELHANDLE hsimodel = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(speaker_identification_model_create(&hsimodel)); + for (auto& profile : profiles) + { + SPX_THROW_ON_FAIL(speaker_identification_model_add_profile(hsimodel, (SPXVOICEPROFILEHANDLE)(*profile))); + } + + return std::shared_ptr{ new SpeakerIdentificationModel(hsimodel) }; + } + + /// + /// Virtual destructor. + /// + virtual ~SpeakerIdentificationModel() { speaker_identification_model_release_handle(m_simodel); } + + /// + /// Internal. Explicit conversion operator. + /// + /// A handle. + explicit operator SPXSIMODELHANDLE() { return m_simodel; } + +protected: + + /*! \cond PROTECTED */ + + /// + /// Internal constructor. Creates a speaker identification model using the provided handle. + /// + /// speaker identification handle. + explicit SpeakerIdentificationModel(SPXSIMODELHANDLE hsimodel = SPXHANDLE_INVALID) : m_simodel(hsimodel) { } + + + /*! \endcond */ + +private: + + DISABLE_COPY_AND_MOVE(SpeakerIdentificationModel); + + SPXSIMODELHANDLE m_simodel; +}; + +} } } } // Microsoft::CognitiveServices::Speech::Speaker diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speaker_recognition_result.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speaker_recognition_result.h new file mode 100644 index 0000000..d37e643 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speaker_recognition_result.h @@ -0,0 +1,236 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_speaker_recognition_result.h: Public API declarations for SpeakerRecognitionResult C++ class +// + +#pragma once +#include +#include +#include + +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Speaker { + +/// +/// Represents speaker recognition result. +/// Added in 1.12.0 +/// +class SpeakerRecognitionResult +{ + +private: + + /// Internal member variable that holds the speakerRecognition result handle. + /// + SPXRESULTHANDLE m_hresult; + + /*! \cond PRIVATE */ + + class PrivatePropertyCollection : public PropertyCollection + { + public: + PrivatePropertyCollection(SPXRESULTHANDLE hresult) : + PropertyCollection( + [hresult]() { + SPXPROPERTYBAGHANDLE hpropbag = SPXHANDLE_INVALID; + result_get_property_bag(hresult, &hpropbag); + return hpropbag; + }()) + { + } + }; + + /// + /// Internal member variable that holds the properties associating to the speaker recognition result. + /// + PrivatePropertyCollection m_properties; + + /*! \endcond */ + +public: + + /// + /// Creates a new instance using the provided handle. + /// + /// Result handle. + explicit SpeakerRecognitionResult(SPXRESULTHANDLE hresult) : + m_hresult(hresult), + m_properties(hresult), + ResultId(m_resultId), + Reason(m_reason), + ProfileId(m_profileId), + Properties(m_properties), + m_profileId(Properties.GetProperty("speakerrecognition.profileid","")), + m_score(std::stof(Properties.GetProperty("speakerrecognition.score", "0.0"))) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + PopulateResultFields(hresult, &m_resultId, &m_reason); + } + + /// + /// Explicit conversion operator. + /// + /// A handle. + explicit operator SPXRESULTHANDLE() { return m_hresult; } + + /// + /// Destructor. + /// + ~SpeakerRecognitionResult() + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + recognizer_result_handle_release(m_hresult); + } + + /// + /// Unique result id. + /// + const SPXSTRING& ResultId; + + /// + /// Reason of the speaker recognition result. + /// + const ResultReason& Reason; + + /// + /// The profile id of the first verified/identified speaker. The rest of recognized speakers can be retrieved by parsing the json result string in the Properties. + /// + const SPXSTRING& ProfileId; + + /// + /// Collection of additional properties. + /// + const PropertyCollection& Properties; + + /// + /// Returns a similarity score. + /// + /// A float number indicating the similarity between input audio and targeted voice profile.This number is between 0 and 1. A higher number means higher similarity.< / returns> + double GetScore() const + { + return m_score; + } + +private: + + /*! \cond PRIVATE */ + + DISABLE_DEFAULT_CTORS(SpeakerRecognitionResult); + + void PopulateResultFields(SPXRESULTHANDLE hresult, SPXSTRING* resultId, Speech::ResultReason* reason) + { + SPX_INIT_HR(hr); + + const size_t maxCharCount = 2048; + char sz[maxCharCount + 1] = {}; + + if (resultId != nullptr) + { + SPX_THROW_ON_FAIL(hr = result_get_result_id(hresult, sz, maxCharCount)); + *resultId = Utils::ToSPXString(sz); + } + + if (reason != nullptr) + { + Result_Reason resultReason; + SPX_THROW_ON_FAIL(hr = result_get_reason(hresult, &resultReason)); + *reason = (Speech::ResultReason)resultReason; + } + } + + SPXSTRING m_resultId; + ResultReason m_reason; + SPXSTRING m_profileId; + float m_score; + + /*! \endcond */ +}; + +/// +/// Represents the details of a canceled speaker recognition result. +/// +class SpeakerRecognitionCancellationDetails +{ +private: + + CancellationReason m_reason; + CancellationErrorCode m_errorCode; + +public: + + /// + /// Creates an instance of SpeakerRecognitionCancellationDetails object for the canceled speaker recognition result. + /// + /// The result that was canceled. + /// A shared pointer to SpeakerRecognitionCancellationDetails. + static std::shared_ptr FromResult(std::shared_ptr result) + { + return std::shared_ptr { new SpeakerRecognitionCancellationDetails(result.get()) }; + } + + /// + /// The reason the result was canceled. + /// + const CancellationReason& Reason; + + /// + /// The error code in case of an unsuccessful speaker recognition ( is set to Error). + /// If Reason is not Error, ErrorCode is set to NoError. + /// + const CancellationErrorCode& ErrorCode; + + /// + /// The error message in case of an unsuccessful speaker recognition ( is set to Error). + /// + const SPXSTRING ErrorDetails; + +protected: + + /*! \cond PROTECTED */ + + SpeakerRecognitionCancellationDetails(SpeakerRecognitionResult* result) : + m_reason(GetCancellationReason(result)), + m_errorCode(GetCancellationErrorCode(result)), + Reason(m_reason), + ErrorCode(m_errorCode), + ErrorDetails(result->Properties.GetProperty(PropertyId::SpeechServiceResponse_JsonErrorDetails)) + { + } + + /*! \endcond */ + +private: + + DISABLE_DEFAULT_CTORS(SpeakerRecognitionCancellationDetails); + + + CancellationReason GetCancellationReason(SpeakerRecognitionResult* result) + { + Result_CancellationReason reason = CancellationReason_Error; + + SPXRESULTHANDLE hresult = (SPXRESULTHANDLE)(*result); + SPX_IFFAILED_THROW_HR(result_get_reason_canceled(hresult, &reason)); + + return static_cast(reason); + } + + CancellationErrorCode GetCancellationErrorCode(SpeakerRecognitionResult* result) + { + Result_CancellationErrorCode errorCode = CancellationErrorCode_NoError; + + SPXRESULTHANDLE hresult = (SPXRESULTHANDLE)(*result); + SPX_IFFAILED_THROW_HR(result_get_canceled_error_code(hresult, &errorCode)); + + return static_cast(errorCode); + } +}; + +} } } } // Microsoft::CognitiveServices::Speech::Speaker diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speaker_recognizer.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speaker_recognizer.h new file mode 100644 index 0000000..6922573 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speaker_recognizer.h @@ -0,0 +1,142 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_speaker_recognizer.h: Public API declarations for speaker recognizer C++ class +// + +#pragma once +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Speaker { + +/// +/// Perform speaker recognition. +/// Added in version 1.12.0 +/// +class SpeakerRecognizer : public std::enable_shared_from_this +{ +public: + + /// + /// Create a speaker recognizer from a speech config and audio config. + /// + /// A shared smart pointer of a speech config. + /// A shared smart pointer of a audio config. + /// A smart pointer wrapped speaker recognizer pointer. + static std::shared_ptr FromConfig(std::shared_ptr speechconfig, std::shared_ptr audioInput) + { + SPXSPEAKERIDHANDLE hSpeakerRecognizerHandle; + SPX_THROW_ON_FAIL(::recognizer_create_speaker_recognizer_from_config( + &hSpeakerRecognizerHandle, + Utils::HandleOrInvalid(speechconfig), + Utils::HandleOrInvalid(audioInput))); + return std::shared_ptr{ new SpeakerRecognizer(hSpeakerRecognizerHandle) }; + } + + /// + /// Destructor. + /// + virtual ~SpeakerRecognizer() + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + + ::speaker_recognizer_release_handle(m_hSpeakerRecognizer); + m_hSpeakerRecognizer = SPXHANDLE_INVALID; + } + + /// + /// Verify the speaker in the verification model. + /// + /// A shared smart pointer of a speaker verficiation model. + /// A smart pointer wrapped speaker recognition result future. + std::future> RecognizeOnceAsync(std::shared_ptr model) + { + return RunAsync(speaker_recognizer_verify, model); + } + + /// + /// Identify the speakers in the Speaker Identification Model. + /// + /// A shared smart pointer of a speaker identification model. + /// A smart pointer wrapped speaker recognition result future. + std::future> RecognizeOnceAsync(std::shared_ptr model) + { + return RunAsync(speaker_recognizer_identify, model); + } + +protected: + + /*! \cond PROTECTED */ + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// A Speaker Recognizer handle. + explicit SpeakerRecognizer(SPXSPEAKERIDHANDLE hSpeakerRecognizer) : + m_hSpeakerRecognizer(hSpeakerRecognizer), + m_properties(hSpeakerRecognizer), + Properties(m_properties) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + } + + /*! \endcond */ + +private: + + /*! \cond PRIVATE */ + + SPXSPEAKERIDHANDLE m_hSpeakerRecognizer; + + class PrivatePropertyCollection : public PropertyCollection + { + public: + PrivatePropertyCollection(SPXSPEAKERIDHANDLE hSpeakerRecognizer) : + PropertyCollection( + [=]() { + SPXPROPERTYBAGHANDLE hpropbag = SPXHANDLE_INVALID; + speaker_recognizer_get_property_bag(hSpeakerRecognizer, &hpropbag); + return hpropbag; + }()) + { + } + }; + + PrivatePropertyCollection m_properties; + + template < class SpeakerModelPtrType, class SpeakerModelHandleType> + inline std::future> RunAsync(std::function func, std::shared_ptr model) + { + auto keepalive = this->shared_from_this(); + return std::async(std::launch::async, [keepalive, this, func, model]() + { + SPXRESULTHANDLE hResultHandle = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(func(m_hSpeakerRecognizer, (SpeakerModelHandleType)(*model), &hResultHandle)); + return std::shared_ptr { new SpeakerRecognitionResult{ hResultHandle } }; + }); + } + + /*! \endcond */ + +public: + + /// + /// A collection of properties and their values defined for this . + /// + PropertyCollection& Properties; +}; + +} } } } // Microsoft::CognitiveServices::Speech::Speaker diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speaker_verification_model.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speaker_verification_model.h new file mode 100644 index 0000000..14ffd59 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speaker_verification_model.h @@ -0,0 +1,71 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_speaker_verification_model.h: Public API declarations for SpeakerVerificationModel C++ class +// + +#pragma once +#include +#include + +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Speaker { + +/// +/// Represents speaker verification model used with speaker recognition class. +/// Added in version 1.12.0 +/// +class SpeakerVerificationModel : public std::enable_shared_from_this +{ +public: + + /// + /// Creates a speaker verification model using the voice profile. + /// + /// The voice profile. + /// A shared pointer to speaker verification model. + static std::shared_ptr FromProfile(std::shared_ptr profile) + { + SPXSVMODELHANDLE hsvmodel = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(speaker_verification_model_create(&hsvmodel, (SPXVOICEPROFILEHANDLE)(*profile))); + return std::shared_ptr{ new SpeakerVerificationModel(hsvmodel) }; + } + + /// + /// Virtual destructor. + /// + virtual ~SpeakerVerificationModel() { speaker_verification_model_release_handle(m_svmodel); } + + /// + /// Internal. Explicit conversion operator. + /// + /// A handle. + explicit operator SPXSVMODELHANDLE() { return m_svmodel; } + +protected: + + /*! \cond PROTECTED */ + + /// + /// Internal constructor. Creates a new instance of speaker verification model using the provided handle. + /// + /// speaker verification model handle. + explicit SpeakerVerificationModel(SPXSIMODELHANDLE hsvmodel = SPXHANDLE_INVALID) : m_svmodel(hsvmodel) { } + + /*! \endcond */ + +private: + + DISABLE_COPY_AND_MOVE(SpeakerVerificationModel); + + SPXSVMODELHANDLE m_svmodel; +}; + +} } } } // Microsoft::CognitiveServices::Speech::Speaker diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speech_config.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speech_config.h new file mode 100644 index 0000000..b2f9db7 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speech_config.h @@ -0,0 +1,491 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_speech_config.h: Public API declarations for SpeechConfig C++ class +// +#pragma once + +#include + +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +namespace Dialog { class DialogServiceConfig; } +class EmbeddedSpeechConfig; +class HybridSpeechConfig; + +/// +/// Class that defines configurations for speech / intent recognition, or speech synthesis. +/// +class SpeechConfig +{ +public: + friend Dialog::DialogServiceConfig; + friend EmbeddedSpeechConfig; + friend HybridSpeechConfig; + + /// + /// Internal operator used to get underlying handle value. + /// + /// A handle. + explicit operator SPXSPEECHCONFIGHANDLE() const { return m_hconfig; } + + /// + /// Creates an instance of the speech config with specified subscription key and region. + /// + /// The subscription key. + /// The region name (see the region page). + /// A shared pointer to the new speech config instance. + static std::shared_ptr FromSubscription(const SPXSTRING& subscription, const SPXSTRING& region) + { + SPXSPEECHCONFIGHANDLE hconfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(speech_config_from_subscription(&hconfig, Utils::ToUTF8(subscription).c_str(), Utils::ToUTF8(region).c_str())); + + auto ptr = new SpeechConfig(hconfig); + return std::shared_ptr(ptr); + } + + /// + /// Creates an instance of the speech config with specified authorization token and region. + /// Note: The caller needs to ensure that the authorization token is valid. Before the authorization token + /// expires, the caller needs to refresh it by calling this setter with a new valid token. + /// As configuration values are copied when creating a new recognizer, the new token value will not apply to recognizers that have already been created. + /// For recognizers that have been created before, you need to set authorization token of the corresponding recognizer + /// to refresh the token. Otherwise, the recognizers will encounter errors during recognition. + /// + /// The authorization token. + /// The region name (see the region page). + /// A shared pointer to the new speech config instance. + static std::shared_ptr FromAuthorizationToken(const SPXSTRING& authToken, const SPXSTRING& region) + { + SPXSPEECHCONFIGHANDLE hconfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(speech_config_from_authorization_token(&hconfig, Utils::ToUTF8(authToken).c_str(), Utils::ToUTF8(region).c_str())); + + auto ptr = new SpeechConfig(hconfig); + return std::shared_ptr(ptr); + } + + /// + /// Creates an instance of the speech config with specified endpoint and subscription. + /// This method is intended only for users who use a non-standard service endpoint. + /// Note: The query parameters specified in the endpoint URI are not changed, even if they are set by any other APIs. + /// For example, if the recognition language is defined in URI as query parameter "language=de-DE", and also set by SetSpeechRecognitionLanguage("en-US"), + /// the language setting in URI takes precedence, and the effective language is "de-DE". + /// Only the parameters that are not specified in the endpoint URI can be set by other APIs. + /// Note: To use an authorization token with FromEndpoint, use FromEndpoint(const SPXSTRING&), + /// and then call SetAuthorizationToken() on the created SpeechConfig instance. + /// + /// The service endpoint to connect to. + /// The subscription key. + /// A shared pointer to the new speech config instance. + static std::shared_ptr FromEndpoint(const SPXSTRING& endpoint, const SPXSTRING& subscription) + { + SPXSPEECHCONFIGHANDLE hconfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(speech_config_from_endpoint(&hconfig, Utils::ToUTF8(endpoint).c_str(), Utils::ToUTF8(subscription).c_str())); + + auto ptr = new SpeechConfig(hconfig); + return std::shared_ptr(ptr); + } + + /// + /// Creates an instance of SpeechConfig with specified endpoint. + /// This method is intended only for users who use a non-standard service endpoint. + /// Note: The query parameters specified in the endpoint URI are not changed, even if they are set by any other APIs. + /// Whether a specific query parameter is supported or not, depends on the endpoint and scenario. + /// For example, if the recognition language is defined in URI as query parameter "language=de-DE", and also set by SetSpeechRecognitionLanguage("en-US"), + /// the language setting in URI takes precedence, and the effective language is "de-DE". + /// The example only applies when the endpoint and scenario combination supports language as a query parameter. + /// Only the parameters that are not specified in the endpoint URI can be set by other APIs. + /// Note: If the endpoint requires a subscription key for authentication, use FromEndpoint(const SPXSTRING&, const SPXSTRING&) to pass + /// the subscription key as parameter. + /// To use an authorization token with FromEndpoint, use this method to create a SpeechConfig instance, and then + /// call SetAuthorizationToken() on the created SpeechConfig instance. + /// Note: Added in version 1.5.0. + /// + /// The service endpoint URI to connect to. + /// A shared pointer to the new speech config instance. + static std::shared_ptr FromEndpoint(const SPXSTRING& endpoint) + { + SPXSPEECHCONFIGHANDLE hconfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(speech_config_from_endpoint(&hconfig, Utils::ToUTF8(endpoint).c_str(), nullptr)); + + auto ptr = new SpeechConfig(hconfig); + return std::shared_ptr(ptr); + } + + /// + /// Creates an instance of the speech config with specified host and subscription. + /// This method is intended only for users who use a non-default service host. Standard resource path will be assumed. + /// For services with a non-standard resource path or no path at all, use FromEndpoint instead. + /// Note: Query parameters are not allowed in the host URI and must be set by other APIs. + /// Note: To use an authorization token with FromHost, use FromHost(const SPXSTRING&), + /// and then call SetAuthorizationToken() on the created SpeechConfig instance. + /// Note: Added in version 1.8.0. + /// + /// The service host to connect to. Format is "protocol://host:port" where ":port" is optional. + /// The subscription key. + /// A shared pointer to the new speech config instance. + static std::shared_ptr FromHost(const SPXSTRING& host, const SPXSTRING& subscription) + { + SPXSPEECHCONFIGHANDLE hconfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(speech_config_from_host(&hconfig, Utils::ToUTF8(host).c_str(), Utils::ToUTF8(subscription).c_str())); + + auto ptr = new SpeechConfig(hconfig); + return std::shared_ptr(ptr); + } + + /// + /// Creates an instance of SpeechConfig with specified host. + /// This method is intended only for users who use a non-default service host. Standard resource path will be assumed. + /// For services with a non-standard resource path or no path at all, use FromEndpoint instead. + /// Note: Query parameters are not allowed in the host URI and must be set by other APIs. + /// Note: If the host requires a subscription key for authentication, use FromHost(const SPXSTRING&, const SPXSTRING&) to pass + /// the subscription key as parameter. + /// To use an authorization token with FromHost, use this method to create a SpeechConfig instance, and then + /// call SetAuthorizationToken() on the created SpeechConfig instance. + /// Note: Added in version 1.8.0. + /// + /// The service host URI to connect to. Format is "protocol://host:port" where ":port" is optional. + /// A shared pointer to the new speech config instance. + static std::shared_ptr FromHost(const SPXSTRING& host) + { + SPXSPEECHCONFIGHANDLE hconfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(speech_config_from_host(&hconfig, Utils::ToUTF8(host).c_str(), nullptr)); + + auto ptr = new SpeechConfig(hconfig); + return std::shared_ptr(ptr); + } + + /// + /// Set the input language to the speech recognizer. + /// + /// Specifies the name of spoken language to be recognized in BCP-47 format. + void SetSpeechRecognitionLanguage(const SPXSTRING& lang) + { + property_bag_set_string(m_propertybag, static_cast(PropertyId::SpeechServiceConnection_RecoLanguage), nullptr, Utils::ToUTF8(lang).c_str()); + } + + /// + /// Gets the input language to the speech recognition. + /// The language is specified in BCP-47 format. + /// + /// The speech recognition language. + SPXSTRING GetSpeechRecognitionLanguage() const + { + return GetProperty(PropertyId::SpeechServiceConnection_RecoLanguage); + } + + /// + /// Sets the language of the speech synthesizer. + /// Added in version 1.4.0 + /// + /// Specifies the name of language (e.g. en-US) + void SetSpeechSynthesisLanguage(const SPXSTRING& lang) + { + SPX_THROW_ON_FAIL(property_bag_set_string(m_propertybag, static_cast(PropertyId::SpeechServiceConnection_SynthLanguage), nullptr, Utils::ToUTF8(lang).c_str())); + } + + /// + /// Gets the language of the speech synthesizer. + /// Added in version 1.4.0 + /// + /// The speech synthesis language. + SPXSTRING GetSpeechSynthesisLanguage() const + { + return GetProperty(PropertyId::SpeechServiceConnection_SynthLanguage); + } + + /// + /// Set the voice of the speech synthesizer. + /// Added in version 1.4.0 + /// + /// Specifies the name of voice + void SetSpeechSynthesisVoiceName(const SPXSTRING& voiceName) + { + SPX_THROW_ON_FAIL(property_bag_set_string(m_propertybag, static_cast(PropertyId::SpeechServiceConnection_SynthVoice), nullptr, Utils::ToUTF8(voiceName).c_str())); + } + + /// + /// Gets the voice of the speech synthesizer. + /// Added in version 1.4.0 + /// + /// The speech synthesis voice name. + SPXSTRING GetSpeechSynthesisVoiceName() const + { + return GetProperty(PropertyId::SpeechServiceConnection_SynthVoice); + } + + /// + /// Sets the speech synthesis output format (e.g. Riff16Khz16BitMonoPcm). + /// Added in version 1.4.0 + /// + /// Specifies the output format ID + void SetSpeechSynthesisOutputFormat(SpeechSynthesisOutputFormat formatId) + { + SPX_THROW_ON_FAIL(speech_config_set_audio_output_format(m_hconfig, static_cast(formatId))); + } + + /// + /// Gets the speech synthesis output format. + /// Added in version 1.4.0 + /// + /// The speech synthesis output format. + SPXSTRING GetSpeechSynthesisOutputFormat() const + { + return GetProperty(PropertyId::SpeechServiceConnection_SynthOutputFormat); + } + + /// + /// Sets the endpoint ID of Custom Speech or Custom Voice. + /// + /// Endpoint ID. + void SetEndpointId(const SPXSTRING& endpointId) + { + property_bag_set_string(m_propertybag, static_cast(PropertyId::SpeechServiceConnection_EndpointId), nullptr, Utils::ToUTF8(endpointId).c_str()); + } + + /// + /// Gets the endpoint ID of Custom Speech or Custom Voice. + /// + /// Endpoint ID. + SPXSTRING GetEndpointId() const + { + return GetProperty(PropertyId::SpeechServiceConnection_EndpointId); + } + + /// + /// Sets the authorization token to connect to the service. + /// Note: The caller needs to ensure that the authorization token is valid. Before the authorization token + /// expires, the caller needs to refresh it by calling this setter with a new valid token. + /// As configuration values are copied when creating a new recognizer, the new token value will not apply to recognizers that have already been created. + /// For recognizers that have been created before, you need to set authorization token of the corresponding recognizer + /// to refresh the token. Otherwise, the recognizers will encounter errors during recognition. + /// + /// The authorization token. + void SetAuthorizationToken(const SPXSTRING& token) + { + property_bag_set_string(m_propertybag, static_cast(PropertyId::SpeechServiceAuthorization_Token), nullptr, Utils::ToUTF8(token).c_str()); + } + + /// + /// Gets the authorization token to connect to the service. + /// + /// The authorization token. + SPXSTRING GetAuthorizationToken() const + { + return GetProperty(PropertyId::SpeechServiceAuthorization_Token); + } + + /// + /// Gets the subscription key that is used to create Speech Recognizer or Intent Recognizer or Translation Recognizer or Speech Synthesizer. + /// + /// The subscription key. + SPXSTRING GetSubscriptionKey() const + { + return GetProperty(PropertyId::SpeechServiceConnection_Key); + } + + /// + /// Gets the region key that used to create Speech Recognizer or Intent Recognizer or Translation Recognizer or speech Synthesizer. + /// + /// Region. + SPXSTRING GetRegion() const + { + return GetProperty(PropertyId::SpeechServiceConnection_Region); + } + + /// + /// Gets speech recognition output format (simple or detailed). + /// Note: This output format is for speech recognition result, use to get synthesized audio output format. + /// + /// Speech recognition output format. + OutputFormat GetOutputFormat() const + { + auto result = GetProperty(PropertyId::SpeechServiceResponse_RequestDetailedResultTrueFalse); + return result == Utils::ToSPXString(TrueString) ? OutputFormat::Detailed : OutputFormat::Simple; + } + + /// + /// Sets speech recognition output format (simple or detailed). + /// Note: This output format is for speech recognition result, use to set synthesized audio output format. + /// + /// Speech recognition output format + void SetOutputFormat(OutputFormat format) + { + property_bag_set_string(m_propertybag, static_cast(PropertyId::SpeechServiceResponse_RequestDetailedResultTrueFalse), nullptr, + format == OutputFormat::Detailed ? Utils::ToUTF8(TrueString) : Utils::ToUTF8(FalseString)); + } + + /// + /// Sets profanity option. + /// Added in version 1.5.0. + /// + /// Profanity option value. + void SetProfanity(ProfanityOption profanity) + { + SPX_THROW_ON_FAIL(speech_config_set_profanity(m_hconfig, (SpeechConfig_ProfanityOption)profanity)); + } + + /// + /// Enables audio logging in service. + /// Added in version 1.5.0. + /// + /// + /// Audio and content logs are stored either in Microsoft-owned storage, or in your own storage account linked + /// to your Cognitive Services subscription (Bring Your Own Storage (BYOS) enabled Speech resource). + /// + void EnableAudioLogging() + { + property_bag_set_string(m_propertybag, static_cast(PropertyId::SpeechServiceConnection_EnableAudioLogging), nullptr, TrueString); + } + + /// + /// Includes word-level timestamps in response result. + /// Added in version 1.5.0. + /// + void RequestWordLevelTimestamps() + { + property_bag_set_string(m_propertybag, static_cast(PropertyId::SpeechServiceResponse_RequestWordLevelTimestamps), nullptr, TrueString); + } + + /// + /// Enables dictation mode. Only supported in speech continuous recognition. + /// Added in version 1.5.0. + /// + void EnableDictation() + { + property_bag_set_string(m_propertybag, static_cast(PropertyId::SpeechServiceConnection_RecoMode), nullptr, "DICTATION"); + } + + /// + /// Sets proxy configuration + /// Added in version 1.1.0 + /// + /// Note: Proxy functionality is not available on macOS. This function will have no effect on this platform. + /// + /// The host name of the proxy server, without the protocol scheme (`http://`) + /// The port number of the proxy server + /// The user name of the proxy server + /// The password of the proxy server + void SetProxy(const SPXSTRING& proxyHostName, uint32_t proxyPort, const SPXSTRING& proxyUserName = SPXSTRING(), const SPXSTRING& proxyPassword = SPXSTRING()) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, proxyHostName.empty()); + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, proxyPort == 0); + + property_bag_set_string(m_propertybag, static_cast(PropertyId::SpeechServiceConnection_ProxyHostName), nullptr, + Utils::ToUTF8(proxyHostName).c_str()); + property_bag_set_string(m_propertybag, static_cast(PropertyId::SpeechServiceConnection_ProxyPort), nullptr, + std::to_string(proxyPort).c_str()); + if (!proxyUserName.empty()) + { + property_bag_set_string(m_propertybag, static_cast(PropertyId::SpeechServiceConnection_ProxyUserName), nullptr, + Utils::ToUTF8(proxyUserName).c_str()); + } + if (!proxyPassword.empty()) + { + property_bag_set_string(m_propertybag, static_cast(PropertyId::SpeechServiceConnection_ProxyPassword), nullptr, + Utils::ToUTF8(proxyPassword).c_str()); + } + } + + /// + /// Sets a property value by name. + /// + /// The property name. + /// The property value. + void SetProperty(const SPXSTRING& name, const SPXSTRING& value) + { + property_bag_set_string(m_propertybag, -1, Utils::ToUTF8(name).c_str(), Utils::ToUTF8(value).c_str()); + } + + /// + /// Gets a property value by name. + /// + /// The parameter name. + /// The property value. + SPXSTRING GetProperty(const SPXSTRING& name) const + { + const char* value = property_bag_get_string(m_propertybag, -1, Utils::ToUTF8(name).c_str(), ""); + return Utils::ToSPXString(Utils::CopyAndFreePropertyString(value)); + } + + /// + /// Gets a property value by ID. + /// + /// The parameter id. + /// The property value. + SPXSTRING GetProperty(PropertyId id) const + { + const char* value = property_bag_get_string(m_propertybag, static_cast(id), nullptr, ""); + return Utils::ToSPXString(Utils::CopyAndFreePropertyString(value)); + } + + /// + /// Sets a property value by ID. + /// + /// The property id. + /// The property value. + void SetProperty(PropertyId id, const SPXSTRING& value) + { + property_bag_set_string(m_propertybag, static_cast(id), nullptr, Utils::ToUTF8(value).c_str()); + } + + /// + /// Sets a property value that will be passed to service using the specified channel. + /// Added in version 1.5.0. + /// + /// The property name. + /// The property value. + /// The channel used to pass the specified property to service. + void SetServiceProperty(const SPXSTRING& name, const SPXSTRING& value, ServicePropertyChannel channel) + { + SPX_THROW_ON_FAIL(speech_config_set_service_property(m_hconfig, Utils::ToUTF8(name).c_str(), Utils::ToUTF8(value).c_str(), (SpeechConfig_ServicePropertyChannel)channel)); + } + + /// + /// Destructs the object. + /// + virtual ~SpeechConfig() + { + speech_config_release(m_hconfig); + property_bag_release(m_propertybag); + } + +protected: + + /*! \cond PROTECTED */ + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + explicit SpeechConfig(SPXSPEECHCONFIGHANDLE hconfig) + :m_hconfig(hconfig) + { + SPX_THROW_ON_FAIL(speech_config_get_property_bag(hconfig, &m_propertybag)); + } + + /// + /// Internal member variable that holds the speech config + /// + SPXSPEECHCONFIGHANDLE m_hconfig; + + /// + /// Internal member variable that holds the properties of the speech config + /// + SPXPROPERTYBAGHANDLE m_propertybag; + + /*! \endcond */ + +private: + DISABLE_COPY_AND_MOVE(SpeechConfig); + + }; + +}}} diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speech_recognition_eventargs.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speech_recognition_eventargs.h new file mode 100644 index 0000000..f9106a6 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speech_recognition_eventargs.h @@ -0,0 +1,169 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_speech_recognition_eventargs.h: Public API declarations for SpeechRecognitionEventArgs C++ class +// + +#pragma once +#include +#include +#include +#include +#include + + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + + +/// +/// Class for speech recognition event arguments. +/// +class SpeechRecognitionEventArgs : public RecognitionEventArgs +{ +private: + + SPXEVENTHANDLE m_hevent; + std::shared_ptr m_result; + +public: + + /// + /// Constructor. + /// + /// Event handle + explicit SpeechRecognitionEventArgs(SPXEVENTHANDLE hevent) : + RecognitionEventArgs(hevent), + m_hevent(hevent), + m_result(std::make_shared(ResultHandleFromEventHandle(hevent))), + Result(m_result) + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)m_hevent); + }; + + /// + virtual ~SpeechRecognitionEventArgs() + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)m_hevent); + SPX_THROW_ON_FAIL(recognizer_event_handle_release(m_hevent)); + }; + +#if defined(BINDING_OBJECTIVE_C) +private: +#endif + /// + /// Speech recognition event result. + /// + std::shared_ptr Result; + +#if defined(BINDING_OBJECTIVE_C) +public: +#else +protected: +#endif + + /*! \cond PROTECTED */ + + /// + /// Speech recognition event result. + /// + std::shared_ptr GetResult() const { return m_result; } + + /*! \endcond */ + +private: + + DISABLE_DEFAULT_CTORS(SpeechRecognitionEventArgs); + + SPXRESULTHANDLE ResultHandleFromEventHandle(SPXEVENTHANDLE hevent) + { + SPXRESULTHANDLE hresult = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(recognizer_recognition_event_get_result(hevent, &hresult)); + return hresult; + } +}; + + +/// +/// Class for speech recognition canceled event arguments. +/// +class SpeechRecognitionCanceledEventArgs final : public SpeechRecognitionEventArgs +{ +private: + + std::shared_ptr m_cancellation; + CancellationReason m_cancellationReason; + CancellationErrorCode m_errorCode; + +public: + + /// + /// Constructor. + /// + /// Event handle + explicit SpeechRecognitionCanceledEventArgs(SPXEVENTHANDLE hevent) : + SpeechRecognitionEventArgs(hevent), + m_cancellation(CancellationDetails::FromResult(GetResult())), + m_cancellationReason(m_cancellation->Reason), + m_errorCode(m_cancellation->ErrorCode), + Reason(m_cancellationReason), + ErrorCode(m_errorCode), + ErrorDetails(m_cancellation->ErrorDetails) + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p)", __FUNCTION__, (void*)this); + }; + + /// + virtual ~SpeechRecognitionCanceledEventArgs() + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p)", __FUNCTION__, (void*)this); + }; + +#if defined(BINDING_OBJECTIVE_C) +private: +#endif + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunused-private-field" +#endif + /// + /// The reason the result was canceled. + /// + const CancellationReason& Reason; + + /// + /// The error code in case of an unsuccessful recognition ( is set to Error). + /// If Reason is not Error, ErrorCode is set to NoError. + /// Added in version 1.1.0. + /// + const CancellationErrorCode& ErrorCode; + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + + /// + /// The error message in case of an unsuccessful recognition ( is set to Error). + /// + const SPXSTRING ErrorDetails; + +#if defined(BINDING_OBJECTIVE_C) +public: +#else +private: +#endif + /// + /// CancellationDetails. + /// + std::shared_ptr GetCancellationDetails() const { return m_cancellation; } + +private: + + DISABLE_DEFAULT_CTORS(SpeechRecognitionCanceledEventArgs); +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speech_recognition_model.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speech_recognition_model.h new file mode 100644 index 0000000..7f7bb70 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speech_recognition_model.h @@ -0,0 +1,108 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_speech_recognition_model.h: Public API declarations for SpeechRecognitionModel C++ class +// + +#pragma once +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// Speech recognition model information. +/// +class SpeechRecognitionModel +{ +private: + + /// + /// Internal member variable that holds the model handle. + /// + SPXSPEECHRECOMODELHANDLE m_hmodel; + +public: + + /// + /// Creates a new instance using the provided handle. + /// + /// Model handle. + explicit SpeechRecognitionModel(SPXSPEECHRECOMODELHANDLE hmodel) : + m_hmodel(hmodel), + Name(m_name), + Locales(m_locales), + Path(m_path), + Version(m_version) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + + m_name = Utils::ToSPXString(Utils::CopyAndFreePropertyString(speech_recognition_model_get_name(m_hmodel))); + m_locales = Utils::Split(Utils::CopyAndFreePropertyString(speech_recognition_model_get_locales(m_hmodel)), '|'); + m_path = Utils::ToSPXString(Utils::CopyAndFreePropertyString(speech_recognition_model_get_path(m_hmodel))); + m_version = Utils::ToSPXString(Utils::CopyAndFreePropertyString(speech_recognition_model_get_version(m_hmodel))); + } + + /// + /// Explicit conversion operator. + /// + /// Model handle. + explicit operator SPXSPEECHRECOMODELHANDLE() { return m_hmodel; } + + /// + /// Destructor. + /// + ~SpeechRecognitionModel() + { + speech_recognition_model_handle_release(m_hmodel); + } + + /// + /// Model name. + /// + const SPXSTRING& Name; + + /// + /// Locales of the model in BCP-47 format. + /// + const std::vector& Locales; + + /// + /// Model path (only valid for offline models). + /// + const SPXSTRING& Path; + + /// + /// Model version. + /// + const SPXSTRING& Version; + +private: + + DISABLE_DEFAULT_CTORS(SpeechRecognitionModel); + + /// + /// Internal member variable that holds the model name. + /// + SPXSTRING m_name; + + /// + /// Internal member variable that holds the model locales. + /// + std::vector m_locales; + + /// + /// Internal member variable that holds the model path. + /// + SPXSTRING m_path; + + /// + /// Internal member variable that holds the model version. + /// + SPXSTRING m_version; +}; + +} } } // Microsoft::CognitiveServices::Speech diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speech_recognition_result.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speech_recognition_result.h new file mode 100644 index 0000000..24f0f37 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speech_recognition_result.h @@ -0,0 +1,45 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_speech_recognition_result.h: Public API declarations for SpeechRecognitionResult C++ class +// + +#pragma once +#include +#include +#include +#include +#include + + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + + +/// +/// Base class for speech recognition results. +/// +class SpeechRecognitionResult : public RecognitionResult +{ +public: + + explicit SpeechRecognitionResult(SPXRESULTHANDLE hresult) : + RecognitionResult(hresult) + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p) -- resultid=%s; reason=0x%x; text=%s", __FUNCTION__, (void*)this, (void*)Handle, Utils::ToUTF8(ResultId).c_str(), Reason, Utils::ToUTF8(Text).c_str()); + } + + virtual ~SpeechRecognitionResult() + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)Handle); + } + + +private: + DISABLE_DEFAULT_CTORS(SpeechRecognitionResult); +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speech_recognizer.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speech_recognizer.h new file mode 100644 index 0000000..1c4e122 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speech_recognizer.h @@ -0,0 +1,351 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_speech_recognizer.h: Public API declarations for SpeechRecognizer C++ class +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +class Session; + +/// +/// Class for speech recognizers. +/// +class SpeechRecognizer final : public AsyncRecognizer +{ +public: + + using BaseType = AsyncRecognizer; + + /// + /// Create a speech recognizer from a speech config + /// + /// Speech configuration. + /// A smart pointer wrapped speech recognizer pointer. + static std::shared_ptr FromConfig(std::shared_ptr speechconfig, std::nullptr_t) + { + SPXRECOHANDLE hreco; + SPX_THROW_ON_FAIL(::recognizer_create_speech_recognizer_from_config( + &hreco, + HandleOrInvalid(speechconfig), + HandleOrInvalid(nullptr))); + return std::make_shared(hreco); + } + + /// + /// Create a speech recognizer from an embedded speech config. + /// Added in version 1.19.0 + /// + /// Embedded speech configuration. + /// A smart pointer wrapped speech recognizer pointer. + static std::shared_ptr FromConfig(std::shared_ptr speechConfig, std::nullptr_t) + { + SPXRECOHANDLE hreco; + SPX_THROW_ON_FAIL(::recognizer_create_speech_recognizer_from_config( + &hreco, + HandleOrInvalid(speechConfig), + HandleOrInvalid(nullptr))); + return std::make_shared(hreco); + } + + /// + /// Create a speech recognizer from a hybrid speech config. + /// + /// Hybrid speech configuration. + /// A smart pointer wrapped speech recognizer pointer. + static std::shared_ptr FromConfig(std::shared_ptr speechConfig, std::nullptr_t) + { + SPXRECOHANDLE hreco; + SPX_THROW_ON_FAIL(::recognizer_create_speech_recognizer_from_config( + &hreco, + HandleOrInvalid(speechConfig), + HandleOrInvalid(nullptr))); + return std::make_shared(hreco); + } + + /// + /// Create a speech recognizer from a speech config and audio config. + /// + /// Speech configuration. + /// Audio configuration. + /// A smart pointer wrapped speech recognizer pointer. + static std::shared_ptr FromConfig(std::shared_ptr speechconfig, std::shared_ptr audioInput = nullptr) + { + SPXRECOHANDLE hreco; + SPX_THROW_ON_FAIL(::recognizer_create_speech_recognizer_from_config( + &hreco, + HandleOrInvalid(speechconfig), + HandleOrInvalid(audioInput))); + return std::make_shared(hreco); + } + + /// + /// Create a speech recognizer from an embedded speech config and audio config. + /// Added in version 1.19.0 + /// + /// Embedded speech configuration. + /// Audio configuration. + /// A smart pointer wrapped speech recognizer pointer. + static std::shared_ptr FromConfig(std::shared_ptr speechConfig, std::shared_ptr audioConfig = nullptr) + { + SPXRECOHANDLE hreco; + SPX_THROW_ON_FAIL(::recognizer_create_speech_recognizer_from_config( + &hreco, + HandleOrInvalid(speechConfig), + HandleOrInvalid(audioConfig))); + return std::make_shared(hreco); + } + + /// + /// Create a speech recognizer from a hybrid speech config and audio config. + /// + /// Hybrid speech configuration. + /// Audio configuration. + /// A smart pointer wrapped speech recognizer pointer. + static std::shared_ptr FromConfig(std::shared_ptr speechConfig, std::shared_ptr audioConfig = nullptr) + { + SPXRECOHANDLE hreco; + SPX_THROW_ON_FAIL(::recognizer_create_speech_recognizer_from_config( + &hreco, + HandleOrInvalid(speechConfig), + HandleOrInvalid(audioConfig))); + return std::make_shared(hreco); + } + + /// + /// Create a speech recognizer from a speech config, auto detection source language config and audio config + /// Added in 1.8.0 + /// + /// Speech configuration. + /// Auto detection source language config. + /// Audio configuration. + /// A smart pointer wrapped speech recognizer pointer. + static std::shared_ptr FromConfig( + std::shared_ptr speechconfig, + std::shared_ptr autoDetectSourceLangConfig, + std::shared_ptr audioInput = nullptr) + { + SPXRECOHANDLE hreco; + SPX_THROW_ON_FAIL(::recognizer_create_speech_recognizer_from_auto_detect_source_lang_config( + &hreco, + HandleOrInvalid(speechconfig), + HandleOrInvalid(autoDetectSourceLangConfig), + HandleOrInvalid(audioInput))); + return std::make_shared(hreco); + } + + /// + /// Create a speech recognizer from an embedded speech config, auto detection source language config and audio config + /// Added in 1.20.0 + /// + /// Embedded speech configuration. + /// Auto detection source language config. + /// Audio configuration. + /// A smart pointer wrapped speech recognizer pointer. + static std::shared_ptr FromConfig( + std::shared_ptr speechconfig, + std::shared_ptr autoDetectSourceLangConfig, + std::shared_ptr audioInput = nullptr) + { + SPXRECOHANDLE hreco; + SPX_THROW_ON_FAIL(::recognizer_create_speech_recognizer_from_auto_detect_source_lang_config( + &hreco, + HandleOrInvalid(speechconfig), + HandleOrInvalid(autoDetectSourceLangConfig), + HandleOrInvalid(audioInput))); + return std::make_shared(hreco); + } + + /// + /// Create a speech recognizer from a hybrid speech config, auto detection source language config and audio config + /// + /// Hybrid speech configuration. + /// Auto detection source language config. + /// Audio configuration. + /// A smart pointer wrapped speech recognizer pointer. + static std::shared_ptr FromConfig( + std::shared_ptr speechconfig, + std::shared_ptr autoDetectSourceLangConfig, + std::shared_ptr audioInput = nullptr) + { + SPXRECOHANDLE hreco; + SPX_THROW_ON_FAIL(::recognizer_create_speech_recognizer_from_auto_detect_source_lang_config( + &hreco, + HandleOrInvalid(speechconfig), + HandleOrInvalid(autoDetectSourceLangConfig), + HandleOrInvalid(audioInput))); + return std::make_shared(hreco); + } + + /// + /// Create a speech recognizer from a speech config, source language config and audio config + /// Added in 1.8.0 + /// + /// Speech configuration. + /// Source language config. + /// Audio configuration. + /// A smart pointer wrapped speech recognizer pointer. + static std::shared_ptr FromConfig( + std::shared_ptr speechconfig, + std::shared_ptr sourceLanguageConfig, + std::shared_ptr audioInput = nullptr) + { + SPXRECOHANDLE hreco; + SPX_THROW_ON_FAIL(::recognizer_create_speech_recognizer_from_source_lang_config( + &hreco, + HandleOrInvalid(speechconfig), + HandleOrInvalid(sourceLanguageConfig), + HandleOrInvalid(audioInput))); + return std::make_shared(hreco); + } + + /// + /// Create a speech recognizer from a speech config, source language and audio config + /// Added in 1.8.0 + /// + /// Speech configuration. + /// Source language. + /// Audio configuration. + /// A smart pointer wrapped speech recognizer pointer. + static std::shared_ptr FromConfig( + std::shared_ptr speechconfig, + const SPXSTRING& sourceLanguage, + std::shared_ptr audioInput = nullptr) + { + return FromConfig(speechconfig, SourceLanguageConfig::FromLanguage(sourceLanguage), audioInput); + } + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// Recognizer handle. + explicit SpeechRecognizer(SPXRECOHANDLE hreco) : BaseType(hreco), Properties(m_properties) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + } + + /// + /// Destructor. + /// + ~SpeechRecognizer() + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + TermRecognizer(); + } + + /// + /// Starts speech recognition, and returns after a single utterance is recognized. The end of a + /// single utterance is determined by listening for silence at the end or until a maximum of about 30 + /// seconds of audio is processed. The task returns the recognition text as result. + /// Note: Since RecognizeOnceAsync() returns only a single utterance, it is suitable only for single + /// shot recognition like command or query. + /// For long-running multi-utterance recognition, use StartContinuousRecognitionAsync() instead. + /// + /// Future containing result value (a shared pointer to SpeechRecognitionResult) + /// of the asynchronous speech recognition. + /// + std::future> RecognizeOnceAsync() override + { + return BaseType::RecognizeOnceAsyncInternal(); + } + + /// + /// Asynchronously initiates continuous speech recognition operation. + /// + /// An empty future. + std::future StartContinuousRecognitionAsync() override + { + return BaseType::StartContinuousRecognitionAsyncInternal(); + } + + /// + /// Asynchronously terminates ongoing continuous speech recognition operation. + /// + /// An empty future. + std::future StopContinuousRecognitionAsync() override + { + return BaseType::StopContinuousRecognitionAsyncInternal(); + } + + /// + /// Asynchronously initiates keyword recognition operation. + /// + /// Specifies the keyword model to be used. + /// An empty future. + std::future StartKeywordRecognitionAsync(std::shared_ptr model) override + { + return BaseType::StartKeywordRecognitionAsyncInternal(model); + } + + /// + /// Asynchronously terminates keyword recognition operation. + /// + /// An empty future. + std::future StopKeywordRecognitionAsync() override + { + return BaseType::StopKeywordRecognitionAsyncInternal(); + } + + /// + /// A collection of properties and their values defined for this . + /// + PropertyCollection& Properties; + + /// + /// Gets the endpoint ID of a customized speech model that is used for speech recognition. + /// + /// the endpoint ID of a customized speech model that is used for speech recognition + SPXSTRING GetEndpointId() + { + return Properties.GetProperty(PropertyId::SpeechServiceConnection_EndpointId, SPXSTRING()); + } + + /// + /// Sets the authorization token that will be used for connecting to the service. + /// Note: The caller needs to ensure that the authorization token is valid. Before the authorization token + /// expires, the caller needs to refresh it by calling this setter with a new valid token. + /// Otherwise, the recognizer will encounter errors during recognition. + /// + /// The authorization token. + void SetAuthorizationToken(const SPXSTRING& token) + { + Properties.SetProperty(PropertyId::SpeechServiceAuthorization_Token, token); + } + + /// + /// Gets the authorization token. + /// + /// Authorization token + SPXSTRING GetAuthorizationToken() + { + return Properties.GetProperty(PropertyId::SpeechServiceAuthorization_Token, SPXSTRING()); + } + +private: + DISABLE_DEFAULT_CTORS(SpeechRecognizer); + friend class Microsoft::CognitiveServices::Speech::Session; +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speech_synthesis_bookmark_eventargs.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speech_synthesis_bookmark_eventargs.h new file mode 100644 index 0000000..46535e8 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speech_synthesis_bookmark_eventargs.h @@ -0,0 +1,81 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// Class for speech synthesis bookmark event arguments. +/// Added in version 1.16.0 +/// +class SpeechSynthesisBookmarkEventArgs : public EventArgs +{ +private: + + SPXEVENTHANDLE m_hEvent; + +public: + + /// + /// Constructor. + /// + /// Event handle + explicit SpeechSynthesisBookmarkEventArgs(SPXEVENTHANDLE hevent) : + m_hEvent(hevent), + ResultId(m_resultId), + Text(m_text) + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)m_hEvent); + synthesizer_bookmark_event_get_values(hevent, &m_audioOffset); + AudioOffset = m_audioOffset; + + m_text = Utils::ToSPXString(Utils::CopyAndFreePropertyString(synthesizer_event_get_text(hevent))); + + const size_t maxCharCount = 256; + char sz[maxCharCount + 1]; + SPX_THROW_ON_FAIL(synthesizer_event_get_result_id(hevent, sz, maxCharCount)); + m_resultId = Utils::ToSPXString(sz); + }; + + /// + virtual ~SpeechSynthesisBookmarkEventArgs() + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)m_hEvent); + SPX_THROW_ON_FAIL(synthesizer_event_handle_release(m_hEvent)); + } + + /// + /// Unique result id. + /// Added in version 1.25.0 + /// + const SPXSTRING& ResultId; + + /// + /// Audio offset, in ticks (100 nanoseconds). + /// + uint64_t AudioOffset; + + /// + /// The bookmark text. + /// + const SPXSTRING& Text; + +private: + + DISABLE_DEFAULT_CTORS(SpeechSynthesisBookmarkEventArgs); + + SPXSTRING m_resultId; + uint64_t m_audioOffset{ 0 }; + SPXSTRING m_text; +}; + +} } } // Microsoft::CognitiveServices::Speech diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speech_synthesis_eventargs.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speech_synthesis_eventargs.h new file mode 100644 index 0000000..b6d8321 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speech_synthesis_eventargs.h @@ -0,0 +1,70 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_speech_synthesis_eventargs.h: Public API declarations for SpeechSynthesisEventArgs C++ class +// + +#pragma once +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + + +/// +/// Class for speech synthesis event arguments. +/// Added in version 1.4.0 +/// +class SpeechSynthesisEventArgs : public EventArgs +{ +private: + SPXEVENTHANDLE m_hevent; + std::shared_ptr m_result; + +public: + + /// + /// Constructor. + /// + /// Event handle + explicit SpeechSynthesisEventArgs(SPXEVENTHANDLE hevent) : + m_hevent(hevent), + m_result(std::make_shared(ResultHandleFromEventHandle(hevent))), + Result(m_result) + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)m_hevent); + }; + + /// + virtual ~SpeechSynthesisEventArgs() + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)m_hevent); + SPX_THROW_ON_FAIL(synthesizer_event_handle_release(m_hevent)); + } + + /// + /// Speech synthesis event result. + /// + std::shared_ptr Result; + + +private: + + DISABLE_DEFAULT_CTORS(SpeechSynthesisEventArgs); + + SPXRESULTHANDLE ResultHandleFromEventHandle(SPXEVENTHANDLE hevent) + { + SPXRESULTHANDLE hresult = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(synthesizer_synthesis_event_get_result(hevent, &hresult)); + return hresult; + } + +}; + +} } } // Microsoft::CognitiveServices::Speech diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speech_synthesis_request.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speech_synthesis_request.h new file mode 100644 index 0000000..3790906 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speech_synthesis_request.h @@ -0,0 +1,239 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_speech_config.h: Public API declarations for SpeechConfig C++ class +// +#pragma once + +#include + +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// Class that defines the speech synthesis request. +/// This class is in preview and is subject to change. +/// Added in version 1.37.0 +/// +class SpeechSynthesisRequest +{ +public: + + /// + /// Represents an input stream for speech synthesis request. + /// Note: This class is in preview and may be subject to change in future versions. + /// Added in version 1.37.0 + /// + class InputStream + { + public: + friend class SpeechSynthesisRequest; + /// + /// Send a piece of text to the speech synthesis service to be synthesized. + /// + /// The text piece to be synthesized. + void Write(const SPXSTRING &text) + { + m_parent.SendTextPiece(text); + } + + /// + /// Finish the text input. + /// + void Close() + { + m_parent.FinishInput(); + } + + private: + InputStream(SpeechSynthesisRequest& parent) + : m_parent(parent) + { + } + SpeechSynthesisRequest& m_parent; + DISABLE_COPY_AND_MOVE(InputStream); + }; + + /// + /// Internal operator used to get underlying handle value. + /// + /// A handle. + explicit operator SPXREQUESTHANDLE() const { return m_hrequest; } + + /// + /// Creates a speech synthesis request, with text streaming is enabled. + /// + /// A shared pointer to the new speech synthesis request instance. + static std::shared_ptr NewTextStreamingRequest() + { + SPXREQUESTHANDLE hrequest = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(speech_synthesis_request_create(true, false, nullptr, 0, &hrequest)); + + auto ptr = new SpeechSynthesisRequest(hrequest); + return std::shared_ptr(ptr); + } + + /// + /// Gets the input stream for the speech synthesis request. + /// + /// The input stream. + InputStream& GetInputStream() + { + return m_inputStream; + } + + /// + /// Sets the pitch of the synthesized speech. + /// + /// The pitch of the synthesized speech. + void SetPitch(const SPXSTRING& pitch) { + SetProperty(PropertyId::SpeechSynthesisRequest_Pitch, pitch); + } + + /// + /// Set the speaking rate. + /// + /// The speaking rate. + void SetRate(const SPXSTRING& rate) { + SetProperty(PropertyId::SpeechSynthesisRequest_Rate, rate); + } + + /// + /// Set the speaking volume. + /// + /// The speaking volume. + void SetVolume(const SPXSTRING& volume) { + SetProperty(PropertyId::SpeechSynthesisRequest_Volume, volume); + } + + /// + /// Destructs the object. + /// + virtual ~SpeechSynthesisRequest() + { + speech_synthesis_request_release(m_hrequest); + property_bag_release(m_propertybag); + } + +protected: + + /*! \cond PROTECTED */ + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + explicit SpeechSynthesisRequest(SPXREQUESTHANDLE hrequest) + :m_hrequest(hrequest), + m_inputStream(*this) + { + SPX_THROW_ON_FAIL(speech_synthesis_request_get_property_bag(hrequest, &m_propertybag)); + } + + /// + /// Internal member variable that holds the speech synthesis request handle. + /// + SPXREQUESTHANDLE m_hrequest; + + /// + /// Internal member variable that holds the properties of the speech synthesis request. + /// + SPXPROPERTYBAGHANDLE m_propertybag; + + InputStream m_inputStream; + + /// + /// Send a piece of text to the speech synthesis service to be synthesized, used in text streaming mode. + /// + /// The text piece to be synthesized. + void SendTextPiece(const SPXSTRING& text) + { + auto u8text = Utils::ToUTF8(text); + SPX_THROW_ON_FAIL(speech_synthesis_request_send_text_piece(m_hrequest, u8text.c_str(), static_cast(u8text.length()))); + } + + /// + /// Finish the text input, used in text streaming mode. + /// + void FinishInput() + { + SPX_THROW_ON_FAIL(speech_synthesis_request_finish(m_hrequest)); + } + + /// + /// Sets a property value by ID. + /// + /// The property id. + /// The property value. + void SetProperty(PropertyId id, const SPXSTRING& value) + { + property_bag_set_string(m_propertybag, static_cast(id), nullptr, Utils::ToUTF8(value).c_str()); + } + + /*! \endcond */ + +private: + DISABLE_COPY_AND_MOVE(SpeechSynthesisRequest); + + + +}; + +/// +/// Class that defines the speech synthesis request for personal voice (aka.ms/azureai/personal-voice). +/// This class is in preview and is subject to change. +/// Added in version 1.39.0 +/// +class PersonalVoiceSynthesisRequest: public SpeechSynthesisRequest +{ +public: + + /// + /// Creates a personal voice speech synthesis request, with text streaming is enabled. + /// + /// The name of the personal voice to be used for synthesis. + /// The name of the model. E.g., DragonLatestNeural or PhoenixLatestNeural + /// A shared pointer to the new speech synthesis request instance. + static std::shared_ptr NewTextStreamingRequest(const std::string& personalVoiceName, const std::string& modelName) + { + SPXREQUESTHANDLE hrequest = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(speech_synthesis_request_create(true, false, nullptr, 0, &hrequest)); + + SPX_THROW_ON_FAIL(speech_synthesis_request_set_voice(hrequest, nullptr, personalVoiceName.c_str(), modelName.c_str())); + + auto ptr = new PersonalVoiceSynthesisRequest(hrequest); + return std::shared_ptr(ptr); + } + + /// + /// Destructs the object. + /// + virtual ~PersonalVoiceSynthesisRequest() + { + + } + +protected: + + /*! \cond PROTECTED */ + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + explicit PersonalVoiceSynthesisRequest(SPXREQUESTHANDLE hrequest) + :SpeechSynthesisRequest(hrequest) + {} + + /*! \endcond */ + +private: + DISABLE_COPY_AND_MOVE(PersonalVoiceSynthesisRequest); + +}; + +}}} diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speech_synthesis_result.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speech_synthesis_result.h new file mode 100644 index 0000000..145e6ec --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speech_synthesis_result.h @@ -0,0 +1,310 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_speech_synthesis_result.h: Public API declarations for SpeechSynthesisResult C++ class +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// Contains information about result from text-to-speech synthesis. +/// Added in version 1.4.0 +/// +class SpeechSynthesisResult +{ +private: + + /// + /// Internal member variable that holds the tts result handle. + /// + SPXRESULTHANDLE m_hresult; + + /*! \cond PRIVATE */ + + class PrivatePropertyCollection : public PropertyCollection + { + public: + PrivatePropertyCollection(SPXRESULTHANDLE hresult) : + PropertyCollection( + [=]() { + SPXPROPERTYBAGHANDLE hpropbag = SPXHANDLE_INVALID; + synth_result_get_property_bag(hresult, &hpropbag); + return hpropbag; + }()) + { + } + }; + + /// + /// Internal member variable that holds the properties associating to the tts result. + /// + PrivatePropertyCollection m_properties; + + /*! \endcond */ + +public: + + /// + /// Creates a new instance using the provided handle. + /// + /// Result handle. + explicit SpeechSynthesisResult(SPXRESULTHANDLE hresult) : + m_hresult(hresult), + m_properties(hresult), + ResultId(m_resultId), + Reason(m_reason), + AudioDuration(m_audioDuration), + Properties(m_properties) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + + const size_t maxCharCount = 1024; + char sz[maxCharCount + 1]; + + SPX_THROW_ON_FAIL(synth_result_get_result_id(hresult, sz, maxCharCount)); + m_resultId = Utils::ToSPXString(sz); + + Result_Reason resultReason; + SPX_THROW_ON_FAIL(synth_result_get_reason(hresult, &resultReason)); + m_reason = static_cast(resultReason); + + uint32_t audioLength = 0; + uint64_t audioDuration = 0; + SPX_THROW_ON_FAIL(synth_result_get_audio_length_duration(m_hresult, &audioLength, &audioDuration)); + m_audioDuration = std::chrono::milliseconds(audioDuration); + + m_audioData = std::make_shared>(audioLength); + + if (audioLength > 0) + { + uint32_t filledSize = 0; + SPX_THROW_ON_FAIL(synth_result_get_audio_data(m_hresult, m_audioData->data(), audioLength, &filledSize)); + } + } + + /// + /// Gets the size of synthesized audio in bytes. + /// + /// Length of synthesized audio + uint32_t GetAudioLength() + { + return static_cast(m_audioData->size()); + } + + /// + /// Gets the synthesized audio. + /// + /// Synthesized audio data + std::shared_ptr> GetAudioData() + { + return m_audioData; + } + + /// + /// Explicit conversion operator. + /// + /// A handle. + explicit operator SPXRESULTHANDLE() { return m_hresult; } + + /// + /// Destructor. + /// + ~SpeechSynthesisResult() + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + synthesizer_result_handle_release(m_hresult); + } + + /// + /// Unique result id. + /// + const SPXSTRING& ResultId; + + /// + /// Reason of the synthesis result. + /// + const ResultReason& Reason; + + /// + /// Time duration of the synthesized audio, only valid for completed synthsis. + /// Added in version 1.21.0 + /// + const std::chrono::milliseconds& AudioDuration; + + /// + /// Collection of additional SpeechSynthesisResult properties. + /// + const PropertyCollection& Properties; + +private: + + DISABLE_DEFAULT_CTORS(SpeechSynthesisResult); + + /// + /// Internal member variable that holds the result ID. + /// + SPXSTRING m_resultId; + + /// + /// Internal member variable that holds the result reason. + /// + ResultReason m_reason; + + /// + /// Internal member variable that holds the audio data + /// + std::shared_ptr> m_audioData; + + /// + /// Internal member variable that holds the audio duration + // + std::chrono::milliseconds m_audioDuration; +}; + + +/// +/// Contains detailed information about why a result was canceled. +/// Added in version 1.4.0 +/// +class SpeechSynthesisCancellationDetails +{ +private: + + CancellationReason m_reason; + CancellationErrorCode m_errorCode; + +public: + + /// + /// Creates an instance of SpeechSynthesisCancellationDetails object for the canceled SpeechSynthesisResult. + /// + /// The result that was canceled. + /// A shared pointer to CancellationDetails. + static std::shared_ptr FromResult(std::shared_ptr result) + { + auto ptr = new SpeechSynthesisCancellationDetails(result.get()); + auto cancellation = std::shared_ptr(ptr); + return cancellation; + } + + /// + /// Creates an instance of SpeechSynthesisCancellationDetails object for the canceled SpeechSynthesisResult. + /// + /// The audio data stream that was canceled. + /// A shared pointer to CancellationDetails. + static std::shared_ptr FromStream(std::shared_ptr stream) + { + auto ptr = new SpeechSynthesisCancellationDetails(stream.get()); + auto cancellation = std::shared_ptr(ptr); + return cancellation; + } + + /// + /// The reason the result was canceled. + /// + const CancellationReason& Reason; + + /// + /// The error code in case of an unsuccessful speech synthesis ( is set to Error). + /// If Reason is not Error, ErrorCode is set to NoError. + /// + const CancellationErrorCode& ErrorCode; + + /// + /// The error message in case of an unsuccessful speech synthesis ( is set to Error). + /// + const SPXSTRING ErrorDetails; + +private: + + DISABLE_DEFAULT_CTORS(SpeechSynthesisCancellationDetails); + + SpeechSynthesisCancellationDetails(SpeechSynthesisResult* result) : + m_reason(GetCancellationReason(result)), + m_errorCode(GetCancellationErrorCode(result)), + Reason(m_reason), + ErrorCode(m_errorCode), + ErrorDetails(result->Properties.GetProperty(PropertyId::CancellationDetails_ReasonDetailedText)) + { + } + + SpeechSynthesisCancellationDetails(AudioDataStream* stream) : + m_reason(GetCancellationReason(stream)), + m_errorCode(GetCancellationErrorCode(stream)), + Reason(m_reason), + ErrorCode(m_errorCode), + ErrorDetails(stream->Properties.GetProperty(PropertyId::CancellationDetails_ReasonDetailedText)) + { + } + + Speech::CancellationReason GetCancellationReason(SpeechSynthesisResult* result) + { + Result_CancellationReason reason = CancellationReason_Error; + + SPXRESULTHANDLE hresult = (SPXRESULTHANDLE)(*result); + SPX_IFFAILED_THROW_HR(synth_result_get_reason_canceled(hresult, &reason)); + + return static_cast(reason); + } + + Speech::CancellationErrorCode GetCancellationErrorCode(SpeechSynthesisResult* result) + { + Result_CancellationErrorCode errorCode = CancellationErrorCode_NoError; + + SPXRESULTHANDLE hresult = (SPXRESULTHANDLE)(*result); + SPX_IFFAILED_THROW_HR(synth_result_get_canceled_error_code(hresult, &errorCode)); + + return static_cast(errorCode); + } + + Speech::CancellationReason GetCancellationReason(AudioDataStream* stream) + { + Result_CancellationReason reason = CancellationReason_Error; + + SPXAUDIOSTREAMHANDLE hstream = (SPXAUDIOSTREAMHANDLE)(*stream); + SPX_IFFAILED_THROW_HR(audio_data_stream_get_reason_canceled(hstream, &reason)); + + return static_cast(reason); + } + + Speech::CancellationErrorCode GetCancellationErrorCode(AudioDataStream* stream) + { + Result_CancellationErrorCode errorCode = CancellationErrorCode_NoError; + + SPXAUDIOSTREAMHANDLE hstream = (SPXAUDIOSTREAMHANDLE)(*stream); + SPX_IFFAILED_THROW_HR(audio_data_stream_get_canceled_error_code(hstream, &errorCode)); + + return static_cast(errorCode); + } +}; + +inline std::shared_ptr AudioDataStream::FromResult(std::shared_ptr result) +{ + SPXRESULTHANDLE hresult = SPXHANDLE_INVALID; + if (result != nullptr) + { + hresult = (SPXRESULTHANDLE)(*result.get()); + } + + SPXAUDIOSTREAMHANDLE hstream = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_data_stream_create_from_result(&hstream, hresult)); + + auto stream = new AudioDataStream(hstream); + return std::shared_ptr(stream); +} + +} } } // Microsoft::CognitiveServices::Speech diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speech_synthesis_viseme_eventargs.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speech_synthesis_viseme_eventargs.h new file mode 100644 index 0000000..726b95f --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speech_synthesis_viseme_eventargs.h @@ -0,0 +1,88 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// Class for speech synthesis viseme event arguments. +/// Added in version 1.16.0 +/// +class SpeechSynthesisVisemeEventArgs : public EventArgs +{ +private: + + SPXEVENTHANDLE m_hEvent; + +public: + + /// + /// Constructor. + /// + /// Event handle + explicit SpeechSynthesisVisemeEventArgs(SPXEVENTHANDLE hevent) : + m_hEvent(hevent), + ResultId(m_resultId), + Animation(m_animation) + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)m_hEvent); + synthesizer_viseme_event_get_values(hevent, &m_audioOffset, &m_visemeId); + AudioOffset = m_audioOffset; + VisemeId = m_visemeId; + + m_animation = Utils::ToSPXString(Utils::CopyAndFreePropertyString(synthesizer_viseme_event_get_animation(hevent))); + + const size_t maxCharCount = 256; + char sz[maxCharCount + 1]; + SPX_THROW_ON_FAIL(synthesizer_event_get_result_id(hevent, sz, maxCharCount)); + m_resultId = Utils::ToSPXString(sz); + }; + + /// + virtual ~SpeechSynthesisVisemeEventArgs() + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)m_hEvent); + SPX_THROW_ON_FAIL(synthesizer_event_handle_release(m_hEvent)); + } + + /// + /// Unique result id. + /// Added in version 1.25.0 + /// + const SPXSTRING& ResultId; + + /// + /// Audio offset, in ticks (100 nanoseconds). + /// + uint64_t AudioOffset; + + /// + /// Viseme ID. + /// + uint32_t VisemeId; + + /// + /// Animation, could be svg or other format. + /// + const SPXSTRING& Animation; + +private: + + DISABLE_DEFAULT_CTORS(SpeechSynthesisVisemeEventArgs); + + SPXSTRING m_resultId; + uint64_t m_audioOffset{ 0 }; + uint32_t m_visemeId { 0 }; + SPXSTRING m_animation; +}; + +} } } // Microsoft::CognitiveServices::Speech diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speech_synthesis_word_boundary_eventargs.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speech_synthesis_word_boundary_eventargs.h new file mode 100644 index 0000000..c44fe60 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speech_synthesis_word_boundary_eventargs.h @@ -0,0 +1,120 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_speech_synthesis_word_boundary_eventargs.h: Public API declarations for SpeechSynthesisWordBoundaryEventArgs C++ class +// + +#pragma once +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + + +/// +/// Class for speech synthesis word boundary event arguments. +/// Added in version 1.7.0 +/// +class SpeechSynthesisWordBoundaryEventArgs : public EventArgs +{ +private: + + SPXEVENTHANDLE m_hEvent; + +public: + + /// + /// Constructor. + /// + /// Event handle + explicit SpeechSynthesisWordBoundaryEventArgs(SPXEVENTHANDLE hevent) : + m_hEvent(hevent), + ResultId(m_resultId), + Duration(m_duration), + Text(m_text), + BoundaryType(m_boundaryType) + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)m_hEvent); + uint64_t durationTicks; + SpeechSynthesis_BoundaryType boundaryType = SpeechSynthesis_BoundaryType_Word; + synthesizer_word_boundary_event_get_values(hevent, &m_audioOffset, &durationTicks, &m_textOffset, &m_wordLength, &boundaryType); + m_duration = std::chrono::milliseconds(durationTicks / static_cast(10000)); + m_boundaryType = static_cast(boundaryType); + AudioOffset = m_audioOffset; + TextOffset = m_textOffset; + WordLength = m_wordLength; + m_text = Utils::ToSPXString(Utils::CopyAndFreePropertyString(synthesizer_event_get_text(hevent))); + + const size_t maxCharCount = 256; + char sz[maxCharCount + 1]; + SPX_THROW_ON_FAIL(synthesizer_event_get_result_id(hevent, sz, maxCharCount)); + m_resultId = Utils::ToSPXString(sz); + }; + + /// + virtual ~SpeechSynthesisWordBoundaryEventArgs() + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)m_hEvent); + SPX_THROW_ON_FAIL(synthesizer_event_handle_release(m_hEvent)); + } + + /// + /// Unique result id. + /// Added in version 1.25.0 + /// + const SPXSTRING& ResultId; + + /// + /// Word boundary audio offset. + /// + uint64_t AudioOffset; + + /// + /// Time duration of the audio. + /// Added in version 1.21.0 + /// + const std::chrono::milliseconds& Duration; + + /// + /// Word boundary text offset. + /// + uint32_t TextOffset; + + /// + /// Word boundary word length. + /// + uint32_t WordLength; + + /// + /// The text. + /// Added in version 1.21.0 + /// + const SPXSTRING& Text; + + /// + /// Word boundary type. + /// Added in version 1.21.0 + /// + const SpeechSynthesisBoundaryType& BoundaryType; + +private: + + DISABLE_DEFAULT_CTORS(SpeechSynthesisWordBoundaryEventArgs); + + SPXSTRING m_resultId; + uint64_t m_audioOffset{ 0 }; + std::chrono::milliseconds m_duration{ 0 }; + uint32_t m_textOffset{ 0 }; + uint32_t m_wordLength{ 0 }; + SPXSTRING m_text; + SpeechSynthesisBoundaryType m_boundaryType{ SpeechSynthesisBoundaryType::Word }; +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speech_synthesizer.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speech_synthesizer.h new file mode 100644 index 0000000..c8b8ce8 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speech_synthesizer.h @@ -0,0 +1,793 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_speech_synthesizer.h: Public API declarations for SpeechSynthesizer C++ class +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// Class for speech synthesizer. +/// Updated in version 1.14.0 +/// +class SpeechSynthesizer : public std::enable_shared_from_this +{ + friend class Connection; +private: + + /// + /// Internal member variable that holds the speech synthesizer handle. + /// + SPXSYNTHHANDLE m_hsynth; + + std::shared_ptr m_audioConfig; + + /*! \cond PRIVATE */ + + class PrivatePropertyCollection : public PropertyCollection + { + public: + PrivatePropertyCollection(SPXSYNTHHANDLE hsynth) : + PropertyCollection( + [=]() { + SPXPROPERTYBAGHANDLE hpropbag = SPXHANDLE_INVALID; + synthesizer_get_property_bag(hsynth, &hpropbag); + return hpropbag; + }()) + { + } + }; + + /// + /// Internal member variable that holds the properties of the speech synthesizer + /// + PrivatePropertyCollection m_properties; + + /*! \endcond */ + +public: + + /// + /// Create a speech synthesizer from a speech config. + /// + /// Speech configuration. + /// A smart pointer wrapped speech synthesizer pointer. + static std::shared_ptr FromConfig(std::shared_ptr speechconfig, std::nullptr_t) + { + SPXSYNTHHANDLE hsynth = SPXHANDLE_INVALID; + + SPX_THROW_ON_FAIL(::synthesizer_create_speech_synthesizer_from_config( + &hsynth, + Utils::HandleOrInvalid(speechconfig), + SPXHANDLE_INVALID)); + + auto ptr = new SpeechSynthesizer(hsynth); + return std::shared_ptr(ptr); + } + + /// + /// Create a speech synthesizer from an embedded speech config. + /// Added in version 1.19.0 + /// + /// Embedded speech configuration. + /// A smart pointer wrapped speech synthesizer pointer. + static std::shared_ptr FromConfig(std::shared_ptr speechconfig, std::nullptr_t) + { + SPXSYNTHHANDLE hsynth = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(::synthesizer_create_speech_synthesizer_from_config( + &hsynth, + Utils::HandleOrInvalid(speechconfig), + SPXHANDLE_INVALID)); + auto ptr = new SpeechSynthesizer(hsynth); + return std::shared_ptr(ptr); + } + + /// + /// Create a speech synthesizer from a hybrid speech config. + /// + /// Hybrid speech configuration. + /// A smart pointer wrapped speech synthesizer pointer. + static std::shared_ptr FromConfig(std::shared_ptr speechconfig, std::nullptr_t) + { + SPXSYNTHHANDLE hsynth = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(::synthesizer_create_speech_synthesizer_from_config( + &hsynth, + Utils::HandleOrInvalid(speechconfig), + SPXHANDLE_INVALID)); + auto ptr = new SpeechSynthesizer(hsynth); + return std::shared_ptr(ptr); + } + + /// + /// Create a speech synthesizer from a speech config and audio config. + /// + /// Speech configuration. + /// Audio configuration. + /// A smart pointer wrapped speech synthesizer pointer. + static std::shared_ptr FromConfig( + std::shared_ptr speechconfig, + std::shared_ptr audioconfig = Audio::AudioConfig::FromDefaultSpeakerOutput()) + { + SPXSYNTHHANDLE hsynth = SPXHANDLE_INVALID; + + SPX_THROW_ON_FAIL(::synthesizer_create_speech_synthesizer_from_config( + &hsynth, + Utils::HandleOrInvalid(speechconfig), + Utils::HandleOrInvalid(audioconfig))); + + auto ptr = new SpeechSynthesizer(hsynth); + auto synthesizer = std::shared_ptr(ptr); + synthesizer->m_audioConfig = audioconfig; + return synthesizer; + } + + /// + /// Create a speech synthesizer from an embedded speech config and audio config. + /// Added in version 1.19.0 + /// + /// Embedded speech configuration. + /// Audio configuration. + /// A smart pointer wrapped speech synthesizer pointer. + static std::shared_ptr FromConfig( + std::shared_ptr speechconfig, + std::shared_ptr audioconfig = Audio::AudioConfig::FromDefaultSpeakerOutput()) + { + SPXSYNTHHANDLE hsynth = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(::synthesizer_create_speech_synthesizer_from_config( + &hsynth, + Utils::HandleOrInvalid(speechconfig), + Utils::HandleOrInvalid(audioconfig))); + auto ptr = new SpeechSynthesizer(hsynth); + auto synthesizer = std::shared_ptr(ptr); + synthesizer->m_audioConfig = audioconfig; + return synthesizer; + } + + /// + /// Create a speech synthesizer from a hybrid speech config and audio config. + /// + /// Hybrid speech configuration. + /// Audio configuration. + /// A smart pointer wrapped speech synthesizer pointer. + static std::shared_ptr FromConfig( + std::shared_ptr speechconfig, + std::shared_ptr audioconfig = Audio::AudioConfig::FromDefaultSpeakerOutput()) + { + SPXSYNTHHANDLE hsynth = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(::synthesizer_create_speech_synthesizer_from_config( + &hsynth, + Utils::HandleOrInvalid(speechconfig), + Utils::HandleOrInvalid(audioconfig))); + auto ptr = new SpeechSynthesizer(hsynth); + auto synthesizer = std::shared_ptr(ptr); + synthesizer->m_audioConfig = audioconfig; + return synthesizer; + } + + /// + /// Create a speech synthesizer from a speech config, auto detection source language config and audio config + /// Added in 1.13.0 + /// + /// Speech configuration. + /// Auto detection source language config. + /// Audio configuration. + /// A smart pointer wrapped speech synthesizer pointer. + static std::shared_ptr FromConfig( + std::shared_ptr speechconfig, + std::shared_ptr autoDetectSourceLangConfig, + std::shared_ptr audioconfig = Audio::AudioConfig::FromDefaultSpeakerOutput()) + { + SPXSYNTHHANDLE hsynth; + + SPX_THROW_ON_FAIL(::synthesizer_create_speech_synthesizer_from_auto_detect_source_lang_config( + &hsynth, + Utils::HandleOrInvalid(speechconfig), + Utils::HandleOrInvalid(autoDetectSourceLangConfig), + Utils::HandleOrInvalid(audioconfig))); + + auto ptr = new SpeechSynthesizer(hsynth); + auto synthesizer = std::shared_ptr(ptr); + synthesizer->m_audioConfig = audioconfig; + return synthesizer; + } + + /// + /// Execute the speech synthesis on plain text, synchronously. + /// + /// The plain text for synthesis. + /// A smart pointer wrapping a speech synthesis result. + std::shared_ptr SpeakText(const std::string& text) + { + SPXRESULTHANDLE hresult = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(::synthesizer_speak_text(m_hsynth, text.data(), static_cast(text.length()), &hresult)); + + return std::make_shared(hresult); + } + + /// + /// Execute the speech synthesis on plain text, synchronously. + /// Added in 1.9.0 + /// + /// The plain text for synthesis. + /// A smart pointer wrapping a speech synthesis result. + std::shared_ptr SpeakText(const std::wstring& text) + { + return SpeakText(Utils::ToUTF8(text)); + } + + /// + /// Execute the speech synthesis on SSML, synchronously. + /// + /// The SSML for synthesis. + /// A smart pointer wrapping a speech synthesis result. + std::shared_ptr SpeakSsml(const std::string& ssml) + { + SPXRESULTHANDLE hresult = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(::synthesizer_speak_ssml(m_hsynth, ssml.data(), static_cast(ssml.length()), &hresult)); + + return std::make_shared(hresult); + } + + /// + /// Execute the speech synthesis on SSML, synchronously. + /// Added in version 1.9.0 + /// + /// The SSML for synthesis. + /// A smart pointer wrapping a speech synthesis result. + std::shared_ptr SpeakSsml(const std::wstring& ssml) + { + return SpeakSsml(Utils::ToUTF8(ssml)); + } + + /// + /// Execute the speech synthesis on request, synchronously. + /// This API could be used to synthesize speech from an input text stream, to reduce latency for text generation scenarios. + /// Note: the feature is in preview and is subject to change. + /// Added in version 1.37.0 + /// + /// The synthesis request. + /// A smart pointer wrapping a speech synthesis result. + std::shared_ptr Speak(const std::shared_ptr& request) + { + SPXRESULTHANDLE hresult = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(::synthesizer_speak_request(m_hsynth, Utils::HandleOrInvalid(request), &hresult)); + + return std::make_shared(hresult); + } + + /// + /// Execute the speech synthesis on plain text, asynchronously. + /// + /// The plain text for synthesis. + /// An asynchronous operation representing the synthesis. It returns a value of as result. + std::future> SpeakTextAsync(const std::string& text) + { + auto keepAlive = this->shared_from_this(); + + auto future = std::async(std::launch::async, [keepAlive, this, text]() -> std::shared_ptr { + SPXRESULTHANDLE hresult = SPXHANDLE_INVALID; + SPXASYNCHANDLE hasync = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(::synthesizer_speak_text_async(m_hsynth, text.data(), static_cast(text.length()), &hasync)); + SPX_EXITFN_ON_FAIL(::synthesizer_speak_async_wait_for(hasync, UINT32_MAX, &hresult)); + + SPX_EXITFN_CLEANUP: + auto releaseHr = synthesizer_async_handle_release(hasync); + SPX_REPORT_ON_FAIL(releaseHr); + + return std::make_shared(hresult); + }); + + return future; + } + + /// + /// Execute the speech synthesis on plain text, asynchronously. + /// Added in version 1.9.0 + /// + /// The plain text for synthesis. + /// An asynchronous operation representing the synthesis. It returns a value of as result. + std::future> SpeakTextAsync(const std::wstring& text) + { + return SpeakTextAsync(Utils::ToUTF8(text)); + } + + /// + /// Execute the speech synthesis on SSML, asynchronously. + /// + /// The SSML for synthesis. + /// An asynchronous operation representing the synthesis. It returns a value of as result. + std::future> SpeakSsmlAsync(const std::string& ssml) + { + auto keepAlive = this->shared_from_this(); + + auto future = std::async(std::launch::async, [keepAlive, this, ssml]() -> std::shared_ptr { + SPXRESULTHANDLE hresult = SPXHANDLE_INVALID; + SPXASYNCHANDLE hasync = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(::synthesizer_speak_ssml_async(m_hsynth, ssml.data(), static_cast(ssml.length()), &hasync)); + SPX_EXITFN_ON_FAIL(::synthesizer_speak_async_wait_for(hasync, UINT32_MAX, &hresult)); + + SPX_EXITFN_CLEANUP: + auto releaseHr = synthesizer_async_handle_release(hasync); + SPX_REPORT_ON_FAIL(releaseHr); + + return std::make_shared(hresult); + }); + + return future; + } + + /// + /// Execute the speech synthesis on SSML, asynchronously. + /// Added in version 1.9.0 + /// + /// The SSML for synthesis. + /// An asynchronous operation representing the synthesis. It returns a value of as result. + std::future> SpeakSsmlAsync(const std::wstring& ssml) + { + return SpeakSsmlAsync(Utils::ToUTF8(ssml)); + } + + /// + /// Execute the speech synthesis on on request, synchronously. + /// This API could be used to synthesize speech from an input text stream, to reduce latency for text generation scenarios. + /// Note: the feature is in preview and is subject to change. + /// Added in version 1.37.0 + /// + /// The synthesis request. + /// An asynchronous operation representing the synthesis. It returns a value of as result. + std::future> SpeakAsync(const std::shared_ptr& request) + { + auto keepAlive = this->shared_from_this(); + + auto future = std::async(std::launch::async, [keepAlive, this, request]() -> std::shared_ptr { + SPXRESULTHANDLE hresult = SPXHANDLE_INVALID; + SPXASYNCHANDLE hasync = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(::synthesizer_speak_request_async(m_hsynth, Utils::HandleOrInvalid(request), &hasync)); + SPX_EXITFN_ON_FAIL(::synthesizer_speak_async_wait_for(hasync, UINT32_MAX, &hresult)); + + SPX_EXITFN_CLEANUP: + auto releaseHr = synthesizer_async_handle_release(hasync); + SPX_REPORT_ON_FAIL(releaseHr); + + return std::make_shared(hresult); + }); + + return future; + } + + /// + /// Start the speech synthesis on plain text, synchronously. + /// + /// The plain text for synthesis. + /// A smart pointer wrapping a speech synthesis result. + std::shared_ptr StartSpeakingText(const std::string& text) + { + SPXRESULTHANDLE hresult = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(::synthesizer_start_speaking_text(m_hsynth, text.data(), static_cast(text.length()), &hresult)); + + return std::make_shared(hresult); + } + + /// + /// Start the speech synthesis on plain text, synchronously. + /// Added in version 1.9.0 + /// + /// The plain text for synthesis. + /// A smart pointer wrapping a speech synthesis result. + std::shared_ptr StartSpeakingText(const std::wstring& text) + { + return StartSpeakingText(Utils::ToUTF8(text)); + } + + /// + /// Start the speech synthesis on SSML, synchronously. + /// + /// The SSML for synthesis. + /// A smart pointer wrapping a speech synthesis result. + std::shared_ptr StartSpeakingSsml(const std::string& ssml) + { + SPXRESULTHANDLE hresult = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(::synthesizer_start_speaking_ssml(m_hsynth, ssml.data(), static_cast(ssml.length()), &hresult)); + + return std::make_shared(hresult); + } + + /// + /// Start the speech synthesis on SSML, synchronously. + /// Added in version 1.9.0 + /// + /// The SSML for synthesis. + /// A smart pointer wrapping a speech synthesis result. + std::shared_ptr StartSpeakingSsml(const std::wstring& ssml) + { + return StartSpeakingSsml(Utils::ToUTF8(ssml)); + } + + /// + /// Start the speech synthesis on on request, synchronously. + /// This API could be used to synthesize speech from an input text stream, to reduce latency for text generation scenarios. + /// Note: the feature is in preview and is subject to change. + /// Added in version 1.37.0 + /// + /// The synthesis request. + /// A smart pointer wrapping a speech synthesis result. + std::shared_ptr StartSpeaking(const std::shared_ptr& request) + { + SPXRESULTHANDLE hresult = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(::synthesizer_start_speaking_request(m_hsynth, Utils::HandleOrInvalid(request), &hresult)); + + return std::make_shared(hresult); + } + + /// + /// Start the speech synthesis on plain text, asynchronously. + /// + /// The plain text for synthesis. + /// An asynchronous operation representing the synthesis. It returns a value of as result. + std::future> StartSpeakingTextAsync(const std::string& text) + { + auto keepAlive = this->shared_from_this(); + + auto future = std::async(std::launch::async, [keepAlive, this, text]() -> std::shared_ptr { + SPXRESULTHANDLE hresult = SPXHANDLE_INVALID; + SPXASYNCHANDLE hasync = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(::synthesizer_start_speaking_text_async(m_hsynth, text.data(), static_cast(text.length()), &hasync)); + SPX_EXITFN_ON_FAIL(::synthesizer_speak_async_wait_for(hasync, UINT32_MAX, &hresult)); + + SPX_EXITFN_CLEANUP: + auto releaseHr = synthesizer_async_handle_release(hasync); + SPX_REPORT_ON_FAIL(releaseHr); + + return std::make_shared(hresult); + }); + + return future; + } + + /// + /// Start the speech synthesis on plain text, asynchronously. + /// Added in version 1.9.0 + /// + /// The plain text for synthesis. + /// An asynchronous operation representing the synthesis. It returns a value of as result. + std::future> StartSpeakingTextAsync(const std::wstring& text) + { + return StartSpeakingTextAsync(Utils::ToUTF8(text)); + } + + /// + /// Start the speech synthesis on SSML, asynchronously. + /// + /// The SSML for synthesis. + /// An asynchronous operation representing the synthesis. It returns a value of as result. + std::future> StartSpeakingSsmlAsync(const std::string& ssml) + { + auto keepAlive = this->shared_from_this(); + + auto future = std::async(std::launch::async, [keepAlive, this, ssml]() -> std::shared_ptr { + SPXRESULTHANDLE hresult = SPXHANDLE_INVALID; + SPXASYNCHANDLE hasync = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(::synthesizer_start_speaking_ssml_async(m_hsynth, ssml.data(), static_cast(ssml.length()), &hasync)); + SPX_EXITFN_ON_FAIL(::synthesizer_speak_async_wait_for(hasync, UINT32_MAX, &hresult)); + + SPX_EXITFN_CLEANUP: + auto releaseHr = synthesizer_async_handle_release(hasync); + SPX_REPORT_ON_FAIL(releaseHr); + + return std::make_shared(hresult); + }); + + return future; + } + + /// + /// Start the speech synthesis on SSML, asynchronously. + /// Added in version 1.9.0 + /// + /// The SSML for synthesis. + /// An asynchronous operation representing the synthesis. It returns a value of as result. + std::future> StartSpeakingSsmlAsync(const std::wstring& ssml) + { + return StartSpeakingSsmlAsync(Utils::ToUTF8(ssml)); + } + + /// + /// Stop the speech synthesis, asynchronously. + /// Added in version 1.14.0 + /// + /// An empty future. + std::future StopSpeakingAsync() + { + auto keepAlive = this->shared_from_this(); + + auto future = std::async(std::launch::async, [keepAlive, this]() -> void { + SPXASYNCHANDLE hasyncStop = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(::synthesizer_stop_speaking_async(m_hsynth, &hasyncStop)); + SPX_EXITFN_ON_FAIL(::synthesizer_stop_speaking_async_wait_for(hasyncStop, UINT32_MAX)); + + SPX_EXITFN_CLEANUP: + auto releaseHr = synthesizer_async_handle_release(hasyncStop); + SPX_REPORT_ON_FAIL(releaseHr); + }); + + return future; + } + + /// + /// Get the available voices, asynchronously. + /// Added in version 1.16.0 + /// + /// Specify the locale of voices, in BCP-47 format; or leave it empty to get all available voices. + /// An asynchronous operation representing the voices list. It returns a value of as result. + std::future> GetVoicesAsync(const SPXSTRING& locale = SPXSTRING()) + { + const auto keepAlive = this->shared_from_this(); + + auto future = std::async(std::launch::async, [keepAlive, locale, this]() -> std::shared_ptr { + SPXRESULTHANDLE hresult = SPXHANDLE_INVALID; + SPXASYNCHANDLE hasync = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(::synthesizer_get_voices_list_async(m_hsynth, Utils::ToUTF8(locale).c_str(), &hasync)); + SPX_EXITFN_ON_FAIL(::synthesizer_get_voices_list_async_wait_for(hasync, UINT32_MAX, &hresult)); + + SPX_EXITFN_CLEANUP: + auto releaseHr = synthesizer_async_handle_release(hasync); + SPX_REPORT_ON_FAIL(releaseHr); + + return std::make_shared(hresult); + }); + + return future; + } + + /// + /// Sets the authorization token that will be used for connecting to the service. + /// Note: The caller needs to ensure that the authorization token is valid. Before the authorization token + /// expires, the caller needs to refresh it by calling this setter with a new valid token. + /// Otherwise, the synthesizer will encounter errors while speech synthesis. + /// Added in version 1.7.0 + /// + /// The authorization token. + void SetAuthorizationToken(const SPXSTRING& token) + { + Properties.SetProperty(PropertyId::SpeechServiceAuthorization_Token, token); + } + + /// + /// Gets the authorization token. + /// Added in version 1.7.0 + /// + /// Authorization token + SPXSTRING GetAuthorizationToken() const + { + return Properties.GetProperty(PropertyId::SpeechServiceAuthorization_Token, SPXSTRING()); + } + + /// + /// Destructor. + /// + ~SpeechSynthesizer() + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + + // Disconnect the event signals in reverse construction order + BookmarkReached.DisconnectAll(); + VisemeReceived.DisconnectAll(); + WordBoundary.DisconnectAll(); + SynthesisCanceled.DisconnectAll(); + SynthesisCompleted.DisconnectAll(); + Synthesizing.DisconnectAll(); + SynthesisStarted.DisconnectAll(); + + synthesizer_handle_release(m_hsynth); + } + + /// + /// A collection of properties and their values defined for this . + /// + PropertyCollection& Properties; + + /// + /// The event signals that a speech synthesis result is received when the synthesis just started. + /// + EventSignal SynthesisStarted; + + /// + /// The event signals that a speech synthesis result is received while the synthesis is on going. + /// + EventSignal Synthesizing; + + /// + /// The event signals that a speech synthesis result is received when the synthesis completed. + /// + EventSignal SynthesisCompleted; + + /// + /// The event signals that a speech synthesis result is received when the synthesis is canceled. + /// + EventSignal SynthesisCanceled; + + /// + /// The event signals that a speech synthesis word boundary is received while the synthesis is on going. + /// Added in version 1.7.0 + /// + EventSignal WordBoundary; + + /// + /// The event signals that a speech synthesis viseme event is received while the synthesis is on going. + /// Added in version 1.16.0 + /// + EventSignal VisemeReceived; + + /// + /// The event signals that a speech synthesis bookmark is reached while the synthesis is on going. + /// Added in version 1.16.0 + /// + EventSignal BookmarkReached; + +private: + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// Synthesizer handle. + explicit SpeechSynthesizer(SPXSYNTHHANDLE hsynth) : + m_hsynth(hsynth), + m_properties(hsynth), + Properties(m_properties), + SynthesisStarted(GetSpeechSynthesisEventConnectionsChangedCallback()), + Synthesizing(GetSpeechSynthesisEventConnectionsChangedCallback()), + SynthesisCompleted(GetSpeechSynthesisEventConnectionsChangedCallback()), + SynthesisCanceled(GetSpeechSynthesisEventConnectionsChangedCallback()), + WordBoundary(GetWordBoundaryEventConnectionsChangedCallback()), + VisemeReceived(GetVisemeEventConnectionsChangedCallback()), + BookmarkReached(GetBookmarkEventConnectionsChangedCallback()) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + } + + std::function&)> GetSpeechSynthesisEventConnectionsChangedCallback() + { + return [=, this](const EventSignal& eventSignal) { + if (&eventSignal == &SynthesisStarted) + { + synthesizer_started_set_callback(m_hsynth, SynthesisStarted.IsConnected() ? FireEvent_SynthesisStarted : nullptr, this); + } + else if (&eventSignal == &Synthesizing) + { + synthesizer_synthesizing_set_callback(m_hsynth, Synthesizing.IsConnected() ? FireEvent_Synthesizing : nullptr, this); + } + else if (&eventSignal == &SynthesisCompleted) + { + synthesizer_completed_set_callback(m_hsynth, SynthesisCompleted.IsConnected() ? FireEvent_SynthesisCompleted : nullptr, this); + } + else if (&eventSignal == &SynthesisCanceled) + { + synthesizer_canceled_set_callback(m_hsynth, SynthesisCanceled.IsConnected() ? FireEvent_SynthesisCanceled : nullptr, this); + } + }; + } + + std::function&)> GetWordBoundaryEventConnectionsChangedCallback() + { + return [=, this](const EventSignal& eventSignal) { + if (&eventSignal == &WordBoundary) + { + synthesizer_word_boundary_set_callback(m_hsynth, WordBoundary.IsConnected() ? FireEvent_WordBoundary : nullptr, this); + } + }; + } + + std::function&)> GetVisemeEventConnectionsChangedCallback() + { + return [=, this](const EventSignal& eventSignal) { + if (&eventSignal == &VisemeReceived) + { + synthesizer_viseme_received_set_callback(m_hsynth, VisemeReceived.IsConnected() ? FireEvent_VisemeReceived : nullptr, this); + } + }; + } + + std::function&)> GetBookmarkEventConnectionsChangedCallback() + { + return [=, this](const EventSignal& eventSignal) { + if (&eventSignal == &BookmarkReached) + { + synthesizer_bookmark_reached_set_callback(m_hsynth, BookmarkReached.IsConnected() ? FireEvent_BookmarkReached : nullptr, this); + } + }; + } + + static void FireEvent_SynthesisStarted(SPXSYNTHHANDLE hsynth, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hsynth); + std::unique_ptr synthEvent{ new SpeechSynthesisEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->SynthesisStarted.Signal(*synthEvent.get()); + } + + static void FireEvent_Synthesizing(SPXSYNTHHANDLE hsynth, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hsynth); + std::unique_ptr synthEvent{ new SpeechSynthesisEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->Synthesizing.Signal(*synthEvent.get()); + } + + static void FireEvent_SynthesisCompleted(SPXSYNTHHANDLE hsynth, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hsynth); + std::unique_ptr synthEvent{ new SpeechSynthesisEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->SynthesisCompleted.Signal(*synthEvent.get()); + } + + static void FireEvent_SynthesisCanceled(SPXSYNTHHANDLE hsynth, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hsynth); + std::unique_ptr synthEvent{ new SpeechSynthesisEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->SynthesisCanceled.Signal(*synthEvent.get()); + } + + static void FireEvent_WordBoundary(SPXSYNTHHANDLE hsynth, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hsynth); + std::unique_ptr wordBoundaryEvent{ new SpeechSynthesisWordBoundaryEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->WordBoundary.Signal(*wordBoundaryEvent.get()); + } + + static void FireEvent_VisemeReceived(SPXSYNTHHANDLE hsynth, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hsynth); + std::unique_ptr visemeReceivedEvent{ new SpeechSynthesisVisemeEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->VisemeReceived.Signal(*visemeReceivedEvent.get()); + } + + static void FireEvent_BookmarkReached(SPXSYNTHHANDLE hsynth, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hsynth); + std::unique_ptr bookmarkReachedEvent{ new SpeechSynthesisBookmarkEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->BookmarkReached.Signal(*bookmarkReachedEvent.get()); + } +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speech_translation_config.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speech_translation_config.h new file mode 100644 index 0000000..1b7d785 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speech_translation_config.h @@ -0,0 +1,213 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once + +#include +#include + +#include "speechapi_c_common.h" +#include "speechapi_c_speech_config.h" +#include "speechapi_c_speech_translation_config.h" +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Translation { + +/// +/// Class that defines configurations for translation with speech input. +/// +class SpeechTranslationConfig final : public SpeechConfig +{ +public: + /// + /// Creates an instance of the speech translation config with specified subscription key and region. + /// + /// The subscription key. + /// The region name (see the region page). + /// Shared pointer to the speech translation config instance. + static std::shared_ptr FromSubscription(const SPXSTRING& subscription, const SPXSTRING& region) + { + SPXSPEECHCONFIGHANDLE hconfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(speech_translation_config_from_subscription(&hconfig, Utils::ToUTF8(subscription).c_str(), Utils::ToUTF8(region).c_str())); + return std::shared_ptr(new SpeechTranslationConfig(hconfig)); + } + + /// + /// Creates an instance of the speech translation config with specified authorization token and region. + /// + /// The authorization token. + /// The region name (see the region page). + /// Shared pointer to the speech translation config instance. + static std::shared_ptr FromAuthorizationToken(const SPXSTRING& authToken, const SPXSTRING& region) + { + SPXSPEECHCONFIGHANDLE hconfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(speech_translation_config_from_authorization_token(&hconfig, Utils::ToUTF8(authToken).c_str(), Utils::ToUTF8(region).c_str())); + return std::shared_ptr(new SpeechTranslationConfig(hconfig)); + } + + // + /// Creates an instance of the speech translation config with specified endpoint and subscription. + /// This method is intended only for users who use a non-standard service endpoint. + /// Note: The query parameters specified in the endpoint URI are not changed, even if they are set by any other APIs. + /// For example, if the recognition language is defined in URI as query parameter "language=de-DE", and also set by SetSpeechRecognitionLanguage("en-US"), + /// the language setting in URI takes precedence, and the effective language is "de-DE". + /// Only the parameters that are not specified in the endpoint URI can be set by other APIs. + /// Note: To use an authorization token with FromEndpoint, please use FromEndpoint(const SPXSTRING&), + /// and then call SetAuthorizationToken() on the created SpeechTranslationConfig instance. + /// + /// The service endpoint to connect to. + /// The subscription key. + /// Shared pointer to the new SpeechTranslationConfig instance. + static std::shared_ptr FromEndpoint(const SPXSTRING& endpoint, const SPXSTRING& subscription) + { + SPXSPEECHCONFIGHANDLE hconfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(speech_translation_config_from_endpoint(&hconfig, Utils::ToUTF8(endpoint).c_str(), Utils::ToUTF8(subscription).c_str())); + return std::shared_ptr(new SpeechTranslationConfig(hconfig)); + } + + /// + /// Creates an instance of the speech translation config with specified endpoint. + /// This method is intended only for users who use a non-standard service endpoint. + /// Note: The query parameters specified in the endpoint URI are not changed, even if they are set by any other APIs. + /// For example, if the recognition language is defined in URI as query parameter "language=de-DE", and also set by SetSpeechRecognitionLanguage("en-US"), + /// the language setting in URI takes precedence, and the effective language is "de-DE". + /// Only the parameters that are not specified in the endpoint URI can be set by other APIs. + /// Note: if the endpoint requires a subscription key for authentication, please use FromEndpoint(const SPXSTRING&, const SPXSTRING&) to pass + /// the subscription key as parameter. + /// To use an authorization token with FromEndpoint, use this method to create a SpeechTranslationConfig instance, and then + /// call SetAuthorizationToken() on the created SpeechTranslationConfig instance. + /// Note: Added in version 1.5.0. + /// + /// The service endpoint to connect to. + /// A shared pointer to the new SpeechTranslationConfig instance. + static std::shared_ptr FromEndpoint(const SPXSTRING& endpoint) + { + SPXSPEECHCONFIGHANDLE hconfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(speech_translation_config_from_endpoint(&hconfig, Utils::ToUTF8(endpoint).c_str(), nullptr)); + return std::shared_ptr(new SpeechTranslationConfig(hconfig)); + } + + /// + /// Creates an instance of the speech translation config with specified host and subscription. + /// This method is intended only for users who use a non-default service host. Standard resource path will be assumed. + /// For services with a non-standard resource path or no path at all, use FromEndpoint instead. + /// Note: Query parameters are not allowed in the host URI and must be set by other APIs. + /// Note: To use an authorization token with FromHost, use FromHost(const SPXSTRING&), + /// and then call SetAuthorizationToken() on the created SpeechTranslationConfig instance. + /// Note: Added in version 1.8.0. + /// + /// The service host to connect to. Format is "protocol://host:port" where ":port" is optional. + /// The subscription key. + /// Shared pointer to the new SpeechTranslationConfig instance. + static std::shared_ptr FromHost(const SPXSTRING& host, const SPXSTRING& subscription) + { + SPXSPEECHCONFIGHANDLE hconfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(speech_translation_config_from_host(&hconfig, Utils::ToUTF8(host).c_str(), Utils::ToUTF8(subscription).c_str())); + return std::shared_ptr(new SpeechTranslationConfig(hconfig)); + } + + /// + /// Creates an instance of the speech translation config with specified host. + /// This method is intended only for users who use a non-default service host. Standard resource path will be assumed. + /// For services with a non-standard resource path or no path at all, use FromEndpoint instead. + /// Note: Query parameters are not allowed in the host URI and must be set by other APIs. + /// Note: If the host requires a subscription key for authentication, use FromHost(const SPXSTRING&, const SPXSTRING&) to pass + /// the subscription key as parameter. + /// To use an authorization token with FromHost, use this method to create a SpeechTranslationConfig instance, and then + /// call SetAuthorizationToken() on the created SpeechTranslationConfig instance. + /// Note: Added in version 1.8.0. + /// + /// The service host to connect to. Format is "protocol://host:port" where ":port" is optional. + /// A shared pointer to the new SpeechTranslationConfig instance. + static std::shared_ptr FromHost(const SPXSTRING& host) + { + SPXSPEECHCONFIGHANDLE hconfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(speech_translation_config_from_host(&hconfig, Utils::ToUTF8(host).c_str(), nullptr)); + return std::shared_ptr(new SpeechTranslationConfig(hconfig)); + } + + /// + /// Adds a target language for translation. + /// + /// Translation target language to add. + void AddTargetLanguage(const SPXSTRING& language) + { + SPX_THROW_ON_FAIL(speech_translation_config_add_target_language(m_hconfig, Utils::ToUTF8(language).c_str())); + } + + /// + /// Removes a target language for translation. + /// Added in release 1.7.0. + /// + /// Translation target language to remove. + void RemoveTargetLanguage(const SPXSTRING& language) + { + SPX_THROW_ON_FAIL(speech_translation_config_remove_target_language(m_hconfig, Utils::ToUTF8(language).c_str())); + } + + /// + /// Sets a Category Id that will be passed to service. Category Id is used to find the custom model. + /// + /// Category Id to set. + void SetCustomModelCategoryId(const SPXSTRING& categoryId) + { + SPX_THROW_ON_FAIL(speech_translation_config_set_custom_model_category_id(m_hconfig, Utils::ToUTF8(categoryId).c_str())); + } + + /// + /// Gets target languages for translation. + /// + /// Vector of translation target languages. + std::vector GetTargetLanguages() const + { + std::vector result; + auto targetLanguages = Utils::ToUTF8(GetProperty(PropertyId::SpeechServiceConnection_TranslationToLanguages)); + if (targetLanguages.empty()) + return result; + + // Getting languages one by one. + std::stringstream languageStream(targetLanguages); + std::string token; + while (std::getline(languageStream, token, CommaDelim)) + { + result.push_back(Utils::ToSPXString(token)); + } + return result; + } + + /// + /// Sets output voice name. + /// + /// Voice name to set. + void SetVoiceName(const SPXSTRING& voice) + { + property_bag_set_string(m_propertybag, static_cast(PropertyId::SpeechServiceConnection_TranslationVoice), nullptr, Utils::ToUTF8(voice).c_str()); + } + + /// + /// Gets output voice name. + /// + /// Output voice name. + SPXSTRING GetVoiceName() const + { + return GetProperty(PropertyId::SpeechServiceConnection_TranslationVoice); + } + +private: + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + explicit SpeechTranslationConfig(SPXSPEECHCONFIGHANDLE hconfig) : SpeechConfig(hconfig) { } + + DISABLE_COPY_AND_MOVE(SpeechTranslationConfig); + +}; + +}}}} diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speech_translation_model.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speech_translation_model.h new file mode 100644 index 0000000..b94513d --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_speech_translation_model.h @@ -0,0 +1,120 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_speech_translation_model.h: Public API declarations for SpeechTranslationModel C++ class +// + +#pragma once +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// Speech translation model information. +/// +class SpeechTranslationModel +{ +private: + + /// + /// Internal member variable that holds the model handle. + /// + SPXSPEECHRECOMODELHANDLE m_hmodel; + +public: + + /// + /// Creates a new instance using the provided handle. + /// + /// Model handle. + explicit SpeechTranslationModel(SPXSPEECHRECOMODELHANDLE hmodel) : + m_hmodel(hmodel), + Name(m_name), + SourceLanguages(m_sourceLanguages), + TargetLanguages(m_targetLanguages), + Path(m_path), + Version(m_version) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + + m_name = Utils::ToSPXString(Utils::CopyAndFreePropertyString(speech_translation_model_get_name(m_hmodel))); + m_sourceLanguages = Utils::Split(Utils::CopyAndFreePropertyString(speech_translation_model_get_source_languages(m_hmodel)), '|'); + m_targetLanguages = Utils::Split(Utils::CopyAndFreePropertyString(speech_translation_model_get_target_languages(m_hmodel)), '|'); + m_path = Utils::ToSPXString(Utils::CopyAndFreePropertyString(speech_translation_model_get_path(m_hmodel))); + m_version = Utils::ToSPXString(Utils::CopyAndFreePropertyString(speech_translation_model_get_version(m_hmodel))); + } + + /// + /// Explicit conversion operator. + /// + /// Model handle. + explicit operator SPXSPEECHRECOMODELHANDLE() { return m_hmodel; } + + /// + /// Destructor. + /// + ~SpeechTranslationModel() + { + speech_translation_model_handle_release(m_hmodel); + } + + /// + /// Model name. + /// + const SPXSTRING& Name; + + /// + /// Source languages that the model supports. + /// + const std::vector& SourceLanguages; + + /// + /// Target languages that the model supports. + /// + const std::vector& TargetLanguages; + + /// + /// Model path (only valid for offline models). + /// + const SPXSTRING& Path; + + /// + /// Model version. + /// + const SPXSTRING& Version; + +private: + + DISABLE_DEFAULT_CTORS(SpeechTranslationModel); + + /// + /// Internal member variable that holds the model name. + /// + SPXSTRING m_name; + + /// + /// Internal member variable that holds the model source languages. + /// + std::vector m_sourceLanguages; + + /// + /// Internal member variable that holds the model target languages. + /// + std::vector m_targetLanguages; + + /// + /// Internal member variable that holds the model path. + /// + SPXSTRING m_path; + + /// + /// Internal member variable that holds the model version. + /// + SPXSTRING m_version; +}; + +} } } // Microsoft::CognitiveServices::Speech diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_string_helpers.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_string_helpers.h new file mode 100644 index 0000000..4dcbb0e --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_string_helpers.h @@ -0,0 +1,137 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#define SPXSTRING std::string +#define SPXSTRING_EMPTY std::string() + +namespace Microsoft{ +namespace CognitiveServices { +namespace Speech { +namespace Utils { + +namespace Details { + + inline std::string to_string(const std::wstring& value) + { + const auto size = pal_wstring_to_string(nullptr, value.c_str(), 0); + auto buffer = std::make_unique(size); + pal_wstring_to_string(buffer.get(), value.c_str(), size); + return std::string{ buffer.get() }; + } + + inline std::wstring to_string(const std::string& value) + { + const auto size = pal_string_to_wstring(nullptr, value.c_str(), 0); + auto buffer = std::make_unique(size); + pal_string_to_wstring(buffer.get(), value.c_str(), size); + return std::wstring{ buffer.get() }; + } +} + +inline std::string ToSPXString(const char* value) +{ + return value == nullptr ? "" : value; +} + +inline std::string ToSPXString(const std::string& value) +{ + return value; +} + +inline std::string ToUTF8(const std::wstring& value) +{ + return Details::to_string(value); +} + +inline std::string ToUTF8(const wchar_t* value) +{ + if (!value) + return ""; + return ToUTF8(std::wstring(value)); +} + +inline std::string ToUTF8(const std::string& value) +{ + return value; +} + +inline const char* ToUTF8(const char* value) +{ + return value; +} + +inline static std::string CopyAndFreePropertyString(const char* value) +{ + std::string copy = (value == nullptr) ? "" : value; + property_bag_free_string(value); + return copy; +} + +template +inline static size_t Find(const TCHAR* pStr, const size_t numChars, const TCHAR find, size_t startAt = 0) +{ + for (size_t i = startAt; i < numChars; i++) + { + TCHAR c = pStr[i]; + if (c == '\0') + { + break; + } + else if (c == find) + { + return i; + } + } + + return (std::numeric_limits::max)(); // weird syntax to avoid Windows min/max macros +} + +template +static std::vector> Split(const TCHAR* pStr, const size_t numChars, const TCHAR delim) +{ + std::vector> result; + if (pStr == nullptr) + { + return result; + } + + size_t start = 0; + size_t end = Find(pStr, numChars, delim, 0); + while (end != (std::numeric_limits::max)()) + { + result.push_back(std::basic_string(pStr + start, end - start)); + start = end + 1; + end = Find(pStr, numChars, delim, start); + } + + if (start < numChars) + { + result.push_back(std::basic_string(pStr + start, numChars - start)); + } + + return result; +} + +template +inline static std::vector> Split(const std::basic_string& str, const TCHAR delim) +{ + return Split(str.c_str(), str.size(), delim); +} + +}}}} diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_synthesis_voices_result.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_synthesis_voices_result.h new file mode 100644 index 0000000..cc287c3 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_synthesis_voices_result.h @@ -0,0 +1,165 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_synthesis_voices_result.h: Public API declarations for SynthesisVoicesResult C++ class +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// Contains information about result from voices list of speech synthesizers. +/// Added in version 1.16.0 +/// +class SynthesisVoicesResult +{ +private: + + /// + /// Internal member variable that holds the voices list result handle. + /// + SPXRESULTHANDLE m_hresult; + + /*! \cond PRIVATE */ + + class PrivatePropertyCollection : public PropertyCollection + { + public: + PrivatePropertyCollection(SPXRESULTHANDLE hresult) : + PropertyCollection( + [=]() { + SPXPROPERTYBAGHANDLE hpropbag = SPXHANDLE_INVALID; + synthesis_voices_result_get_property_bag(hresult, &hpropbag); + return hpropbag; + }()) + { + } + }; + + /// + /// Internal member variable that holds the properties associating to the voices list result. + /// + PrivatePropertyCollection m_properties; + + /*! \endcond */ + +public: + + /// + /// Creates a new instance using the provided handle. + /// + /// Result handle. + explicit SynthesisVoicesResult(SPXRESULTHANDLE hresult) : + m_hresult(hresult), + m_properties(hresult), + Voices(m_voices), + ErrorDetails(m_errorDetails), + ResultId(m_resultId), + Reason(m_reason), + Properties(m_properties) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + + uint32_t voiceNum; + SPX_THROW_ON_FAIL(::synthesis_voices_result_get_voice_num(hresult, &voiceNum)); + m_voices = std::vector>(voiceNum); + + for (uint32_t i = 0; i < voiceNum; ++i) + { + SPXRESULTHANDLE hVoice = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(::synthesis_voices_result_get_voice_info(m_hresult, i, &hVoice)); + m_voices[i] = std::make_shared(hVoice); + } + + const size_t maxCharCount = 1024; + char sz[maxCharCount + 1]; + SPX_THROW_ON_FAIL(synthesis_voices_result_get_result_id(hresult, sz, maxCharCount)); + m_resultId = Utils::ToSPXString(sz); + + Result_Reason resultReason = ResultReason_NoMatch; + SPX_THROW_ON_FAIL(synthesis_voices_result_get_reason(hresult, &resultReason)); + m_reason = static_cast(resultReason); + + m_errorDetails = m_properties.GetProperty(PropertyId::CancellationDetails_ReasonDetailedText); + } + + /// + /// Explicit conversion operator. + /// + /// A handle. + explicit operator SPXRESULTHANDLE() { return m_hresult; } + + /// + /// Destructor. + /// + ~SynthesisVoicesResult() + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + synthesizer_result_handle_release(m_hresult); + } + + /// + /// Retrieved voices. + /// + const std::vector>& Voices; + + /// + /// Error details. + /// + const SPXSTRING& ErrorDetails; + + /// + /// Unique result id. + /// + const SPXSTRING& ResultId; + + /// + /// Reason of the voices list result. + /// + const ResultReason& Reason; + + /// + /// Collection of additional SynthesisVoicesResult properties. + /// + const PropertyCollection& Properties; + +private: + + DISABLE_DEFAULT_CTORS(SynthesisVoicesResult); + + /// + /// Internal member variable that holds the result ID. + /// + SPXSTRING m_resultId; + + /// + /// Internal member variable that holds the result reason. + /// + ResultReason m_reason; + + /// + /// Internal member variable that holds the voices list. + /// + std::vector> m_voices; + + /// + /// Internal member variable that holds the error details. + /// + SPXSTRING m_errorDetails; +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_translation_eventargs.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_translation_eventargs.h new file mode 100644 index 0000000..42f54ef --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_translation_eventargs.h @@ -0,0 +1,235 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include +#include +#include +#include +#include + + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Translation { + + +/// +/// Defines payload that is sent with the event or . +/// +class TranslationRecognitionEventArgs : public RecognitionEventArgs +{ +private: + + SPXEVENTHANDLE m_hevent; + std::shared_ptr m_result; + +public: + + /// + /// It is intended for internal use only. It creates an instance of . + /// + /// The handle returned by recognizer in C-API. + explicit TranslationRecognitionEventArgs(SPXEVENTHANDLE hevent) : + RecognitionEventArgs(hevent), + m_hevent(hevent), + m_result(std::make_shared(ResultHandleFromEventHandle(hevent))), + Result(m_result) + { + UNUSED(m_hevent); + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)m_hevent); + }; + + /// + /// Destructs the instance. + /// + virtual ~TranslationRecognitionEventArgs() + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)m_hevent); + recognizer_event_handle_release(m_hevent); + }; + +#if defined(BINDING_OBJECTIVE_C) +private: +#endif + /// + /// Contains the translation recognition result. + /// + std::shared_ptr Result; + +#if defined(BINDING_OBJECTIVE_C) +public: +#else +protected: +#endif + + /*! \cond PROTECTED */ + + /// + /// Contains the translation text result. + /// + std::shared_ptr GetResult() const { return m_result; } + + /*! \endcond */ + +private: + DISABLE_DEFAULT_CTORS(TranslationRecognitionEventArgs); + + SPXRESULTHANDLE ResultHandleFromEventHandle(SPXEVENTHANDLE hevent) + { + SPXRESULTHANDLE hresult = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(recognizer_recognition_event_get_result(hevent, &hresult)); + return hresult; + } +}; + + +/// +/// Class for translation recognition canceled event arguments. +/// +class TranslationRecognitionCanceledEventArgs final : public TranslationRecognitionEventArgs +{ +private: + + std::shared_ptr m_cancellation; + CancellationReason m_cancellationReason; + CancellationErrorCode m_errorCode; + +public: + + /// + /// Constructor. + /// + /// Event handle + explicit TranslationRecognitionCanceledEventArgs(SPXEVENTHANDLE hevent) : + TranslationRecognitionEventArgs(hevent), + m_cancellation(CancellationDetails::FromResult(GetResult())), + m_cancellationReason(m_cancellation->Reason), + m_errorCode(m_cancellation->ErrorCode), + Reason(m_cancellationReason), + ErrorCode(m_errorCode), + ErrorDetails(m_cancellation->ErrorDetails) + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p)", __FUNCTION__, (void*)this); + }; + + /// + virtual ~TranslationRecognitionCanceledEventArgs() + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p)", __FUNCTION__, (void*)this); + }; + +#if defined(BINDING_OBJECTIVE_C) +private: +#endif + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunused-private-field" +#endif + /// + /// The reason the result was canceled. + /// + const CancellationReason& Reason; + + /// + /// The error code in case of an unsuccessful recognition ( is set to Error). + /// If Reason is not Error, ErrorCode is set to NoError. + /// Added in version 1.1.0. + /// + const CancellationErrorCode& ErrorCode; + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + + /// + /// The error message in case of an unsuccessful recognition ( is set to Error). + /// + const SPXSTRING ErrorDetails; + +#if defined(BINDING_OBJECTIVE_C) +public: +#else +private: +#endif + /// + /// CancellationDetails. + /// + std::shared_ptr GetCancellationDetails() const { return m_cancellation; } + +private: + + DISABLE_DEFAULT_CTORS(TranslationRecognitionCanceledEventArgs); +}; + + + +/// +/// Defines payload that is sent with the event . +/// +class TranslationSynthesisEventArgs final : public SessionEventArgs +{ +private: + + SPXEVENTHANDLE m_hevent; + std::shared_ptr m_result; + +public: + /// + /// It is intended for internal use only. It creates an instance of . + /// + /// The handle returned by recognizer in C-API. + explicit TranslationSynthesisEventArgs(SPXEVENTHANDLE hevent) : + SessionEventArgs(hevent), + m_hevent(hevent), + m_result(std::make_shared(SynthesisResultHandleFromEventHandle(hevent))), + Result(m_result) + { + UNUSED(m_hevent); + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)m_hevent); + }; + + /// + /// Destructs the instance. + /// + virtual ~TranslationSynthesisEventArgs() + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)m_hevent); + recognizer_event_handle_release(m_hevent); + }; + +#if defined(BINDING_OBJECTIVE_C) +private: +#endif + /// + /// Contains the translation synthesis result. + /// + std::shared_ptr Result; + +#if defined(BINDING_OBJECTIVE_C) +public: +#else +private: +#endif + /// + /// Contains the translation synthesis result. + /// + std::shared_ptr GetResult() const { return m_result; } + +private: + + DISABLE_DEFAULT_CTORS(TranslationSynthesisEventArgs); + + SPXRESULTHANDLE SynthesisResultHandleFromEventHandle(SPXEVENTHANDLE hevent) + { + SPXRESULTHANDLE hresult = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(recognizer_recognition_event_get_result(hevent, &hresult)); + return hresult; + } +}; + +} } } } // Microsoft::CognitiveServices::Speech::Translation diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_translation_recognizer.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_translation_recognizer.h new file mode 100644 index 0000000..867487d --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_translation_recognizer.h @@ -0,0 +1,352 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_translation_recognizer.h: Public API declarations for translation recognizer in C++. +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Translation { + +/// +/// Performs translation on the speech input. +/// +class TranslationRecognizer final : public AsyncRecognizer +{ +public: + /// + /// Create a translation recognizer from a speech config + /// + /// Speech configuration. + /// A smart pointer wrapped speech recognizer pointer. + static std::shared_ptr FromConfig(std::shared_ptr speechconfig, std::nullptr_t) + { + SPXRECOHANDLE hreco; + SPX_THROW_ON_FAIL(::recognizer_create_translation_recognizer_from_config( + &hreco, + HandleOrInvalid(speechconfig), + HandleOrInvalid(nullptr))); + return std::make_shared(hreco); + } + + /// + /// Create a translation recognizer from an embedded speech config + /// + /// Embedded speech configuration. + /// A smart pointer wrapped translation recognizer pointer. + static std::shared_ptr FromConfig(std::shared_ptr speechConfig, std::nullptr_t) + { + SPXRECOHANDLE hreco; + SPX_THROW_ON_FAIL(::recognizer_create_translation_recognizer_from_config( + &hreco, + HandleOrInvalid(speechConfig), + HandleOrInvalid(nullptr))); + return std::make_shared(hreco); + } + + /// + /// Create a translation recognizer from a hybrid speech config + /// + /// Hybrid speech configuration. + /// A smart pointer wrapped translation recognizer pointer. + static std::shared_ptr FromConfig(std::shared_ptr speechConfig, std::nullptr_t) + { + SPXRECOHANDLE hreco; + SPX_THROW_ON_FAIL(::recognizer_create_translation_recognizer_from_config( + &hreco, + HandleOrInvalid(speechConfig), + HandleOrInvalid(nullptr))); + return std::make_shared(hreco); + } + + /// + /// Create a translation recognizer from a translation config and an audio config. + /// Users should use this function to create a translation recognizer. + /// + /// Speech translation config. + /// Audio config. + /// The shared smart pointer of the created translation recognizer. + static std::shared_ptr FromConfig(std::shared_ptr speechconfig, std::shared_ptr audioInput = nullptr) + { + SPXRECOHANDLE hreco { SPXHANDLE_INVALID }; + SPX_THROW_ON_FAIL(::recognizer_create_translation_recognizer_from_config( + &hreco, + HandleOrInvalid(speechconfig), + HandleOrInvalid(audioInput))); + return std::make_shared(hreco); + } + + /// + /// Create a translation recognizer from an embedded speech config and audio config. + /// + /// Embedded speech config. + /// Audio config. + /// A smart pointer wrapped translation recognizer pointer. + static std::shared_ptr FromConfig(std::shared_ptr speechConfig, std::shared_ptr audioConfig = nullptr) + { + SPXRECOHANDLE hreco{ SPXHANDLE_INVALID }; + SPX_THROW_ON_FAIL(::recognizer_create_translation_recognizer_from_config( + &hreco, + HandleOrInvalid(speechConfig), + HandleOrInvalid(audioConfig))); + return std::make_shared(hreco); + } + + /// + /// Create a translation recognizer from a hybrid speech config and audio config. + /// + /// Hybrid speech config. + /// Audio config. + /// A smart pointer wrapped translation recognizer pointer. + static std::shared_ptr FromConfig(std::shared_ptr speechConfig, std::shared_ptr audioConfig = nullptr) + { + SPXRECOHANDLE hreco{ SPXHANDLE_INVALID }; + SPX_THROW_ON_FAIL(::recognizer_create_translation_recognizer_from_config( + &hreco, + HandleOrInvalid(speechConfig), + HandleOrInvalid(audioConfig))); + return std::make_shared(hreco); + } + + /// + /// Create a translation recognizer from a translation config, auto detection source language config and an audio config. + /// Users should use this function to create a translation recognizer. + /// + /// Speech translation config. + /// Auto detection source language config. + /// Audio config. + /// The shared smart pointer of the created translation recognizer. + static std::shared_ptr FromConfig( + std::shared_ptr speechconfig, + std::shared_ptr autoDetectSourceLangConfig, + std::shared_ptr audioInput = nullptr) + { + SPXRECOHANDLE hreco { SPXHANDLE_INVALID }; + SPX_THROW_ON_FAIL(::recognizer_create_translation_recognizer_from_auto_detect_source_lang_config( + &hreco, + HandleOrInvalid(speechconfig), + HandleOrInvalid(autoDetectSourceLangConfig), + HandleOrInvalid(audioInput))); + return std::make_shared(hreco); + } + + /// + /// Create a translation recognizer from an embedded speech config, auto detection source language config and audio config. + /// + /// Embedded speech config. + /// Auto detection source language config. + /// Audio config. + /// The shared smart pointer of the created translation recognizer. + static std::shared_ptr FromConfig( + std::shared_ptr speechConfig, + std::shared_ptr autoDetectSourceLangConfig, + std::shared_ptr audioInput = nullptr) + { + SPXRECOHANDLE hreco{ SPXHANDLE_INVALID }; + SPX_THROW_ON_FAIL(::recognizer_create_translation_recognizer_from_auto_detect_source_lang_config( + &hreco, + HandleOrInvalid(speechConfig), + HandleOrInvalid(autoDetectSourceLangConfig), + HandleOrInvalid(audioInput))); + return std::make_shared(hreco); + } + + // The AsyncRecognizer only deals with events for translation text result. The audio output event + // is managed by OnTranslationSynthesisResult. + using BaseType = AsyncRecognizer; + + /// + /// It is intended for internal use only. It creates an instance of . + /// + /// + /// It is recommended to use SpeechTranslationConfig to create an instance of . This method is mainly + /// used in case where a recognizer handle has been created by methods via C-API. + /// + /// The handle of the recognizer that is returned by C-API. + explicit TranslationRecognizer(SPXRECOHANDLE hreco) : + BaseType(hreco), + Properties(m_properties), + Synthesizing(GetTranslationAudioEventConnectionsChangedCallback()) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + } + + /// + /// Deconstruct the instance. + /// + ~TranslationRecognizer() + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + Synthesizing.DisconnectAll(); + TermRecognizer(); + } + + /// + /// Starts translation recognition, and returns after a single utterance is recognized. The end of a + /// single utterance is determined by listening for silence at the end or until a maximum of about 30 + /// seconds of audio is processed. The task returns the recognized text as well as the translation. + /// Note: Since RecognizeOnceAsync() returns only a single utterance, it is suitable only for single + /// shot recognition like command or query. + /// For long-running multi-utterance recognition, use StartContinuousRecognitionAsync() instead. + /// + /// An asynchronous operation representing the recognition. It returns a value of as result. + std::future> RecognizeOnceAsync() override + { + return BaseType::RecognizeOnceAsyncInternal(); + } + + /// + /// Starts translation on a continous audio stream, until StopContinuousRecognitionAsync() is called. + /// User must subscribe to events to receive recognition results. + /// + /// An asynchronous operation that starts the translation. + std::future StartContinuousRecognitionAsync() override + { + return BaseType::StartContinuousRecognitionAsyncInternal(); + } + + /// + /// Stops continuous translation. + /// + /// A task representing the asynchronous operation that stops the translation. + std::future StopContinuousRecognitionAsync() override { return BaseType::StopContinuousRecognitionAsyncInternal(); } + + /// + /// Starts keyword recognition on a continuous audio stream, until StopKeywordRecognitionAsync() is called. + /// + /// Specifies the keyword model to be used. + /// An asynchronous operation that starts the keyword recognition. + std::future StartKeywordRecognitionAsync(std::shared_ptr model) override + { + return BaseType::StartKeywordRecognitionAsyncInternal(model); + }; + + /// + /// Stops continuous keyword recognition. + /// + /// A task representing the asynchronous operation that stops the keyword recognition. + std::future StopKeywordRecognitionAsync() override + { + return BaseType::StopKeywordRecognitionAsyncInternal(); + }; + + /// + /// Sets the authorization token that will be used for connecting to the service. + /// Note: The caller needs to ensure that the authorization token is valid. Before the authorization token + /// expires, the caller needs to refresh it by calling this setter with a new valid token. + /// Otherwise, the recognizer will encounter errors during recognition. + /// + /// A string that represents the endpoint id. + void SetAuthorizationToken(const SPXSTRING& token) + { + Properties.SetProperty(PropertyId::SpeechServiceAuthorization_Token, token); + } + + /// + /// Gets the authorization token. + /// + /// Authorization token + SPXSTRING GetAuthorizationToken() + { + return Properties.GetProperty(PropertyId::SpeechServiceAuthorization_Token, SPXSTRING()); + } + + /// + /// Adds a target language for translation. + /// Added in version 1.7.0. + /// + /// Translation target language to add. + void AddTargetLanguage(const SPXSTRING& language) + { + SPX_THROW_HR_IF(SPXERR_INVALID_HANDLE, m_hreco == SPXHANDLE_INVALID); + SPX_THROW_ON_FAIL(::translator_add_target_language(m_hreco, Utils::ToUTF8(language).c_str())); + } + + /// + /// Removes a target language for translation. + /// Added in version 1.7.0. + /// + /// Translation target language to remove. + void RemoveTargetLanguage(const SPXSTRING& language) + { + SPX_THROW_HR_IF(SPXERR_INVALID_HANDLE, m_hreco == SPXHANDLE_INVALID); + SPX_THROW_ON_FAIL(::translator_remove_target_language(m_hreco, Utils::ToUTF8(language).c_str())); + } + + /// + /// Gets target languages for translation. + /// Added in version 1.7.0. + /// + /// Vector of translation target languages. + std::vector GetTargetLanguages() const + { + std::vector result; + auto targetLanguages = Utils::ToUTF8(Properties.GetProperty(PropertyId::SpeechServiceConnection_TranslationToLanguages)); + if (targetLanguages.empty()) + return result; + + // Getting languages one by one. + std::stringstream languageStream(targetLanguages); + std::string token; + while (std::getline(languageStream, token, CommaDelim)) + { + result.push_back(Utils::ToSPXString(token)); + } + return result; + } + + /// + /// The collection of properties and their values defined for this . + /// + PropertyCollection& Properties; + + /// + /// The event signals that a translation synthesis result is received. + /// + EventSignal Synthesizing; + +private: + + DISABLE_DEFAULT_CTORS(TranslationRecognizer); + + friend class Microsoft::CognitiveServices::Speech::Session; + + std::function&)> GetTranslationAudioEventConnectionsChangedCallback() + { + return [=, this](const EventSignal& audioEvent) { + if (&audioEvent == &Synthesizing) + { + translator_synthesizing_audio_set_callback(m_hreco, Synthesizing.IsConnected() ? FireEvent_TranslationSynthesisResult : nullptr, this); + } + }; + } + + static void FireEvent_TranslationSynthesisResult(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hreco); + std::unique_ptr recoEvent{ new TranslationSynthesisEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->Synthesizing.Signal(*recoEvent.get()); + } +}; +} } } } // Microsoft::CognitiveServices::Speech::Translation diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_translation_result.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_translation_result.h new file mode 100644 index 0000000..bbabcd6 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_translation_result.h @@ -0,0 +1,175 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_translation_result.h: Public API declarations for TranslationResult C++ class +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Translation { + +/// +/// Defines the translation text result. +/// +class TranslationRecognitionResult : public RecognitionResult +{ +private: + + std::map m_translations; + +public: + /// + /// It is intended for internal use only. It creates an instance of . + /// + /// The handle of the result returned by recognizer in C-API. + explicit TranslationRecognitionResult(SPXRESULTHANDLE resultHandle) : + RecognitionResult(resultHandle), + Translations(m_translations) + { + PopulateResultFields(resultHandle); + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p) -- resultid=%s.", __FUNCTION__, (void*)this, (void*)Handle, ResultId.c_str()); + }; + + /// + /// Destructs the instance. + /// + virtual ~TranslationRecognitionResult() + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)Handle); + } + + /// + /// Presents the translation results. Each item in the map is a key value pair, where key is the language tag of the translated text, + /// and value is the translation text in that language. + /// + const std::map& Translations; + +private: + void PopulateResultFields(SPXRESULTHANDLE resultHandle) + { + SPX_INIT_HR(hr); + + size_t count = 0; + hr = translation_text_result_get_translation_count(resultHandle, &count); + SPX_THROW_ON_FAIL(hr); + + size_t maxLanguageSize = 0; + size_t maxTextSize = 0; + + for (size_t i = 0; i < count; i++) + { + size_t languageSize = 0; + size_t textSize = 0; + + hr = translation_text_result_get_translation(resultHandle, i, nullptr, nullptr, &languageSize, &textSize); + SPX_THROW_ON_FAIL(hr); + + maxLanguageSize = (std::max)(maxLanguageSize, languageSize); + maxTextSize = (std::max)(maxTextSize, textSize); + } + + auto targetLanguage = std::make_unique(maxLanguageSize); + auto translationText = std::make_unique(maxTextSize); + for (size_t i = 0; i < count; i++) + { + hr = translation_text_result_get_translation(resultHandle, i, targetLanguage.get(), translationText.get(), &maxLanguageSize, &maxTextSize); + SPX_THROW_ON_FAIL(hr); + m_translations[Utils::ToSPXString(targetLanguage.get())] = Utils::ToSPXString(translationText.get()); + } + + SPX_DBG_TRACE_VERBOSE("Translation phrases: numberentries: %d", (int)m_translations.size()); +#ifdef _DEBUG + for (const auto& cf : m_translations) + { + (void)(cf); // prevent warning for cf when compiling release builds + SPX_DBG_TRACE_VERBOSE(" phrase for %s: %s", cf.first.c_str(), cf.second.c_str()); + } +#endif + }; + + DISABLE_DEFAULT_CTORS(TranslationRecognitionResult); +}; + + +/// +/// Defines the translation synthesis result, i.e. the voice output of the translated text in the target language. +/// +class TranslationSynthesisResult +{ +private: + + ResultReason m_reason; + std::vector m_audioData; + +public: + /// + /// It is intended for internal use only. It creates an instance of + /// + /// The handle of the result returned by recognizer in C-API. + explicit TranslationSynthesisResult(SPXRESULTHANDLE resultHandle) : + Reason(m_reason), + Audio(m_audioData) + { + PopulateResultFields(resultHandle); + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p) reason=0x%x", __FUNCTION__, (void*)this, (void*)resultHandle, Reason); + }; + + /// + /// Destructs the instance. + /// + virtual ~TranslationSynthesisResult() + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p)", __FUNCTION__, (void*)this); + }; + + /// + /// Recognition reason. + /// + const ResultReason& Reason; + + /// + /// The voice output of the translated text in the target language. + /// + const std::vector& Audio; + + +private: + + DISABLE_DEFAULT_CTORS(TranslationSynthesisResult); + + void PopulateResultFields(SPXRESULTHANDLE resultHandle) + { + SPX_INIT_HR(hr); + + Result_Reason resultReason = ResultReason_NoMatch; + SPX_THROW_ON_FAIL(hr = result_get_reason(resultHandle, &resultReason)); + m_reason = (ResultReason)resultReason; + + size_t bufLen = 0; + hr = translation_synthesis_result_get_audio_data(resultHandle, nullptr, &bufLen); + if (hr == SPXERR_BUFFER_TOO_SMALL) + { + m_audioData.resize(bufLen); + hr = translation_synthesis_result_get_audio_data(resultHandle, m_audioData.data(), &bufLen); + } + SPX_THROW_ON_FAIL(hr); + + SPX_DBG_TRACE_VERBOSE("Translation synthesis: audio length: %zu, vector size: %zu", bufLen, m_audioData.size()); + }; +}; + + +} } } } // Microsoft::CognitiveServices::Speech::Translation diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_user.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_user.h new file mode 100644 index 0000000..0f0a597 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_user.h @@ -0,0 +1,77 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_user.h: Public API declarations for User C++ class +// + +#pragma once + +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Transcription { + +constexpr size_t MAX_USER_ID_LEN = 1024; + +/// +/// Represents a user in a conversation. +/// Added in version 1.5.0. +/// +class User +{ +public: + + /// + /// Create a user with identification string. + /// + /// A user id. + /// A user object + static std::shared_ptr FromUserId(const SPXSTRING& userId) + { + SPXUSERHANDLE m_huser = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(user_create_from_id(Utils::ToUTF8(userId).c_str(), &m_huser)); + return std::make_shared(m_huser); + } + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// A user handle. + explicit User(SPXUSERHANDLE huser = SPXHANDLE_INVALID) : m_huser(huser) { } + + /// + /// Virtual destructor. + /// + virtual ~User() { user_release_handle(m_huser); } + + /// + /// Internal. Explicit conversion operator. + /// + /// A handle. + explicit operator SPXUSERHANDLE() const { return m_huser; } + + /// + /// Get user's id. + /// + /// user's id. + SPXSTRING GetId() const + { + char user_id[MAX_USER_ID_LEN+1]; + std::memset(user_id, 0, MAX_USER_ID_LEN+1); + SPX_THROW_ON_FAIL(user_get_id(m_huser, user_id, MAX_USER_ID_LEN)); + + return user_id; + } + +private: + + DISABLE_COPY_AND_MOVE(User); + + SPXUSERHANDLE m_huser; + +}; + +}}}} diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_utils.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_utils.h new file mode 100644 index 0000000..21b4a98 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_utils.h @@ -0,0 +1,312 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_utils.h: General utility classes and functions. +// + +#pragma once + +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Utils { + +/// +/// Base class that disables the copy constructor +/// +struct NonCopyable +{ + /// + /// Default destructor. + /// + NonCopyable() = default; + + /// + /// Virtual destructor. + /// + virtual ~NonCopyable() = default; + + /// + /// Disable copy constructor. + /// + NonCopyable(const NonCopyable&) = delete; + + /// + /// Disable copy assignment operator. + /// + /// Reference to the object. + NonCopyable& operator=(const NonCopyable &) = delete; +}; + +/// +/// Base class that disables the move constructor +/// +struct NonMovable +{ + /// + /// Default destructor. + /// + NonMovable() = default; + + /// + /// Virtual destructor. + /// + virtual ~NonMovable() = default; + + /// + /// Disable move constructor. + /// + NonMovable(NonMovable &&) = delete; + + /// + /// Disable move assignment operator. + /// + /// Reference to the object. + NonMovable& operator=(NonMovable &&) = delete; +}; + +template +SPXHANDLE CallFactoryMethodRight(F method, Args&&... args) +{ + SPXHANDLE handle; + auto hr = method(std::forward(args)..., &handle); + SPX_THROW_ON_FAIL(hr); + return handle; +} + +template +SPXHANDLE CallFactoryMethodLeft(F method, Args&&... args) +{ + SPXHANDLE handle; + auto hr = method(&handle, std::forward(args)...); + SPX_THROW_ON_FAIL(hr); + return handle; +} + +/// +/// Helper class implementing the scope guard idiom. +/// (The given function will be executed on destruction) +/// +template +class ScopeGuard +{ +public: + ScopeGuard(ScopeGuard&&) = default; + ScopeGuard(const ScopeGuard&) = delete; + + explicit ScopeGuard(F f): m_fn{ f } + {} + + ~ScopeGuard() + { + m_fn(); + } + +private: + F m_fn; +}; + +/// +/// Creates a scope guard with the given function. +/// +template +ScopeGuard MakeScopeGuard(F fn) +{ + return ScopeGuard{ fn }; +} + +/// +/// A wrapper around ABI handles that simplifies resource cleanup on exit +/// +/// The type of the ABI handle +/// The default value to set the handle to when initialising or after destroying +/// The return type of the free function +/// The signature of the free function called to release the ABI handle +template< + typename THandle, + typename TRet = AZACHR, + typename TFreeFunc = TRet(AZAC_API_CALLTYPE*)(THandle)> +class AbiHandleWrapper : public NonCopyable +{ +private: + THandle m_handle; + TFreeFunc m_free; + bool m_isValid; + +public: + /// + /// The signature of the free function + /// + using FreeFunc = TFreeFunc; + + /// + /// Creates and ABI handle wrapper for SPXHANDLE types initializing the handle + /// to be SPXHANDLE_INVALID + /// + /// The function used to release the ABI handle + template< + typename IsHandle = THandle, + std::enable_if_t::value, bool> = true + > + AbiHandleWrapper(TFreeFunc freeFunc) : + m_handle{ SPXHANDLE_INVALID }, + m_free{ freeFunc }, + m_isValid{ false } + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, freeFunc == nullptr); + } + + /// + /// Creates an ABI handle wrapper + /// + /// The function used to release the ABI handle + template< + typename IsHandle = THandle, + std::enable_if_t::value, bool> = true + > + AbiHandleWrapper(TFreeFunc freeFunc) : + m_handle{ nullptr }, + m_free{ freeFunc }, + m_isValid{ false } + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, freeFunc == nullptr); + } + + /// + /// Creates an ABI handle wrapper + /// + /// The function used to release the ABI handle + /// The initial ABI handle value + AbiHandleWrapper(TFreeFunc freeFunc, THandle&& handle) : + m_handle{ std::move(handle) }, + m_free{ freeFunc }, + m_isValid{ true } + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, freeFunc == nullptr); + } + + /// + /// Destructor + /// + ~AbiHandleWrapper() { Destroy(); } + + /// + /// Move constructor + /// + /// The other item being moved + AbiHandleWrapper(AbiHandleWrapper&& other) : + m_handle{ other.m_handle }, + m_free{ other.m_free }, + m_isValid{ other.m_isValid } + { + other.m_handle = THandle{}; + other.m_free = nullptr; + other.m_isValid = false; + } + + /// + /// Move assignment operator + /// + /// The item being moved + /// Reference to ABI handle + AbiHandleWrapper& operator=(AbiHandleWrapper&& other) + { + if (this != &other) + { + Destroy(); + + m_handle = std::move(other.m_handle); + m_free = other.m_free; + m_isValid = other.m_isValid; + + other.m_free = nullptr; + other.m_isValid = false; + } + + return *this; + } + + /// + /// Helper to simplify assigning a new ABI handle value to this wrapper + /// + /// The handle to assign + /// Reference to assigned handle + THandle& operator=(const THandle& other) + { + Destroy(); + + m_handle = other; + return m_handle; + } + + /// + /// Gets the address of the ABI handle. This is useful when calling ABI functions that set the value + /// + THandle* operator&() { return &m_handle; } + + /// + /// Gets the ABI handle value + /// + operator THandle() const { return m_handle; } + +private: + void Destroy() + { + if (m_isValid) + { + m_isValid = false; + if (m_free != nullptr) + { + m_free(m_handle); + } + } + } +}; + +/// +/// A wrapper around ABI handles +/// +using AbiHandle = AbiHandleWrapper; + +/// +/// A wrapper around strings allocated in the ABI layer +/// +using AbiStringHandle = AbiHandleWrapper; + +/// +/// Function that converts a handle to its underlying type. +/// +/// Handle type. +/// Object type. +/// Object from which to get the handle. +template +inline Handle HandleOrInvalid(std::shared_ptr obj) +{ + return obj == nullptr + ? static_cast(SPXHANDLE_INVALID) + : static_cast(*obj.get()); +} + + +template +struct TypeList {}; + +template class F, typename L> +struct TypeListIfAny; + +template class F> +struct TypeListIfAny> +{ + static constexpr bool value{ false }; +}; + +template class F, typename U, typename... Us> +struct TypeListIfAny> +{ + static constexpr bool value = F::value || Microsoft::CognitiveServices::Speech::Utils::TypeListIfAny>::value; +}; + +} } } } diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_voice_info.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_voice_info.h new file mode 100644 index 0000000..d220e4c --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_voice_info.h @@ -0,0 +1,196 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_voice_info.h: Public API declarations for VoiceInfo C++ class +// + +#pragma once +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// Contains information about synthesis voice info +/// Updated in version 1.17.0 +/// +class VoiceInfo +{ +private: + + /// + /// Internal member variable that holds the voice info handle. + /// + SPXRESULTHANDLE m_hresult; + + /*! \cond PRIVATE */ + + class PrivatePropertyCollection : public PropertyCollection + { + public: + PrivatePropertyCollection(SPXRESULTHANDLE hresult) : + PropertyCollection( + [=]() { + SPXPROPERTYBAGHANDLE hpropbag = SPXHANDLE_INVALID; + voice_info_get_property_bag(hresult, &hpropbag); + return hpropbag; + }()) + { + } + }; + + /// + /// Internal member variable that holds the properties associating to the voice info. + /// + PrivatePropertyCollection m_properties; + + /*! \endcond */ + +public: + + /// + /// Creates a new instance using the provided handle. + /// + /// Result handle. + explicit VoiceInfo(SPXRESULTHANDLE hresult) : + m_hresult(hresult), + m_properties(hresult), + Name(m_name), + Locale(m_locale), + ShortName(m_shortName), + LocalName(m_localName), + Gender(m_gender), + VoiceType(m_voiceType), + StyleList(m_styleList), + VoicePath(m_voicePath), + Properties(m_properties) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + + m_name = Utils::ToSPXString(Utils::CopyAndFreePropertyString(voice_info_get_name(m_hresult))); + m_locale = Utils::ToSPXString(Utils::CopyAndFreePropertyString(voice_info_get_locale(m_hresult))); + m_shortName = Utils::ToSPXString(Utils::CopyAndFreePropertyString(voice_info_get_short_name(m_hresult))); + m_localName = Utils::ToSPXString(Utils::CopyAndFreePropertyString(voice_info_get_local_name(m_hresult))); + m_styleList = Utils::Split(Utils::CopyAndFreePropertyString(voice_info_get_style_list(m_hresult)), '|'); + Synthesis_VoiceType voiceType; + SPX_THROW_ON_FAIL(voice_info_get_voice_type(hresult, &voiceType)); + m_voiceType = static_cast(voiceType); + m_voicePath = Utils::ToSPXString(Utils::CopyAndFreePropertyString(voice_info_get_voice_path(m_hresult))); + auto gender = Properties.GetProperty("Gender"); + m_gender = gender == "Female" ? SynthesisVoiceGender::Female : gender == "Male" ? SynthesisVoiceGender::Male : SynthesisVoiceGender::Unknown; + } + + /// + /// Explicit conversion operator. + /// + /// A handle. + explicit operator SPXRESULTHANDLE() { return m_hresult; } + + /// + /// Destructor. + /// + ~VoiceInfo() + { + voice_info_handle_release(m_hresult); + } + + /// + /// Voice name. + /// + const SPXSTRING& Name; + + /// + /// Locale of the voice. + /// + const SPXSTRING& Locale; + + /// + /// Short name. + /// + const SPXSTRING& ShortName; + + /// + /// Local name. + /// + const SPXSTRING& LocalName; + + /// + /// Gender. + /// Added in version 1.17.0 + /// + const SynthesisVoiceGender& Gender; + + /// + /// Local name. + /// + const SynthesisVoiceType& VoiceType; + + /// + /// Style list + /// + const std::vector& StyleList; + + /// + /// Voice path, only valid for offline voices. + /// + const SPXSTRING& VoicePath; + + /// + /// Collection of additional VoiceInfo properties. + /// + const PropertyCollection& Properties; + +private: + + DISABLE_DEFAULT_CTORS(VoiceInfo); + + /// + /// Internal member variable that holds the name. + /// + SPXSTRING m_name; + + /// + /// Internal member variable that holds the locale. + /// + SPXSTRING m_locale; + + /// + /// Internal member variable that holds the short name. + /// + SPXSTRING m_shortName; + + /// + /// Internal member variable that holds the local name. + /// + SPXSTRING m_localName; + + /// + /// Internal member variable that holds the gender. + /// + SynthesisVoiceGender m_gender; + + /// + /// Internal member variable that holds the voice type. + /// + SynthesisVoiceType m_voiceType; + + /// + /// Internal member variable that holds the style list. + /// + std::vector m_styleList; + + /// + /// Internal member variable that holds the voice path. + /// + SPXSTRING m_voicePath; +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_voice_profile.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_voice_profile.h new file mode 100644 index 0000000..78c8c38 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_voice_profile.h @@ -0,0 +1,109 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_voice_profile.h: Public API declarations for VoiceProfile C++ class +// + +#pragma once +#include + +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Speaker { + +// Forward declaration for friends. +class VoiceProfileClient; + +/// +/// Class for VoiceProfile. +/// Added in version 1.12.0 +/// +class VoiceProfile : public std::enable_shared_from_this +{ +public: + + static std::shared_ptr FromId(const SPXSTRING& Id, VoiceProfileType voiceProfileType = VoiceProfileType::TextIndependentIdentification) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + SPXVOICEPROFILEHANDLE hVoiceProfile; + SPX_THROW_ON_FAIL(::create_voice_profile_from_id_and_type(&hVoiceProfile,Utils::ToUTF8(Id).c_str(), static_cast(voiceProfileType))); + return std::shared_ptr { new VoiceProfile(hVoiceProfile) }; + } + + /// + /// Destructor. + /// + virtual ~VoiceProfile() + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + ::voice_profile_release_handle(m_hVoiceProfile); + m_hVoiceProfile = SPXHANDLE_INVALID; + } + + /// + /// Get a voice profile id. + /// + /// the voice profile id. + const SPXSTRING GetId() const + { + // query the string length + uint32_t length = 0; + SPX_THROW_ON_FAIL(voice_profile_get_id(m_hVoiceProfile, nullptr, &length)); + + // retrieve the string + std::unique_ptr buffer(new char[length]); + SPX_THROW_ON_FAIL(voice_profile_get_id(m_hVoiceProfile, buffer.get(), &length)); + return Utils::ToSPXString(buffer.get()); + } + + /// + /// Get the VoiceProfileType from the VoiceProfile. + /// + /// + VoiceProfileType GetType() const + { + int type = -1; + SPX_THROW_ON_FAIL(voice_profile_get_type(m_hVoiceProfile, &type)); + return static_cast(type); + } + + /// + /// Internal. Explicit conversion operator. + /// + /// A handle. + explicit operator SPXVOICEPROFILEHANDLE() { return m_hVoiceProfile; } + +protected: + + /*! \cond PROTECTED */ + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// Voice Profile handle. + explicit VoiceProfile(SPXVOICEPROFILEHANDLE hVoiceProfile) : + m_hVoiceProfile(hVoiceProfile) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + } + + /*! \endcond */ + +private: + + /*! \cond PRIVATE */ + friend Microsoft::CognitiveServices::Speech::Speaker::VoiceProfileClient; + DISABLE_DEFAULT_CTORS(VoiceProfile); + + SPXVOICEPROFILEHANDLE m_hVoiceProfile; + + /*! \endcond */ +}; + +} } } } // Microsoft::CognitiveServices::Speech::Speaker diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_voice_profile_client.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_voice_profile_client.h new file mode 100644 index 0000000..17b5dcd --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_voice_profile_client.h @@ -0,0 +1,262 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_voice_profile_client.h: Public API declarations for VoiceProfileClient C++ class +// + +#pragma once +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Speaker { + +/// +/// Class for VoiceProfileClient. +/// This class creates voice profile client for creating, doing enrollment, deleting and reseting a voice profile. +/// Added in version 1.12.0 +/// +class VoiceProfileClient : public std::enable_shared_from_this +{ +private: + + /*! \cond PRIVATE */ + + SPXVOICEPROFILECLIENTHANDLE m_hVoiceProfileClient; + + class PrivatePropertyCollection : public PropertyCollection + { + public: + PrivatePropertyCollection(SPXVOICEPROFILECLIENTHANDLE hclient) : + PropertyCollection( + [=]() { + SPXPROPERTYBAGHANDLE hpropbag = SPXHANDLE_INVALID; + voice_profile_client_get_property_bag(hclient, &hpropbag); + return hpropbag; + }()) + { + } + }; + + PrivatePropertyCollection m_properties; + + /*! \endcond */ + +public: + + /// + /// Create a Voice Profile Client from a speech config + /// + /// Speech configuration. + /// A smart pointer wrapped voice profile client pointer. + static std::shared_ptr FromConfig(std::shared_ptr speechConfig) + { + SPXVOICEPROFILECLIENTHANDLE hVoiceProfileClient; + SPX_THROW_ON_FAIL(::create_voice_profile_client_from_config(&hVoiceProfileClient, Utils::HandleOrInvalid(speechConfig))); + return std::shared_ptr{ new VoiceProfileClient(hVoiceProfileClient)}; + } + + /// + /// Destructor. + /// + virtual ~VoiceProfileClient() + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + + ::voice_profile_client_release_handle(m_hVoiceProfileClient); + m_hVoiceProfileClient = SPXHANDLE_INVALID; + } + + /// + /// Create a Voice Profile. + /// + /// a VoiceProfile type. + /// a locale, e.g "en-us" + /// A smart pointer wrapped voice profile client object. + std::future> CreateProfileAsync(VoiceProfileType profileType, const SPXSTRING& locale) + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [profileType, locale, this, keepAlive]() -> std::shared_ptr { + SPXVOICEPROFILEHANDLE hVoiceProfileHandle; + SPX_THROW_ON_FAIL(::create_voice_profile(m_hVoiceProfileClient, static_cast(profileType), Utils::ToUTF8(locale).c_str(), &hVoiceProfileHandle)); + return std::shared_ptr { new VoiceProfile(hVoiceProfileHandle) }; + }); + + return future; + } + + /// + /// Enroll a Voice Profile. + /// + /// a voice profile object. + /// an audio Input. + /// A smart pointer wrapped voice profile enrollment result object. + std::future> EnrollProfileAsync(std::shared_ptr profile, std::shared_ptr audioInput = nullptr) + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [profile, audioInput, this, keepAlive]() -> std::shared_ptr { + SPXRESULTHANDLE hresult; + SPX_THROW_ON_FAIL(::enroll_voice_profile(m_hVoiceProfileClient, + Utils::HandleOrInvalid(profile), + Utils::HandleOrInvalid(audioInput), + &hresult)); + return std::make_shared(hresult); + }); + return future; + } + + /// + /// Delete a Voice Profile. + /// + /// a voice profile object. + /// A smart pointer wrapped voice profile result object. + std::future> DeleteProfileAsync(std::shared_ptr profile) + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [profile, this, keepAlive]() -> std::shared_ptr { + SPXRESULTHANDLE hResultHandle; + SPX_THROW_ON_FAIL(::delete_voice_profile(m_hVoiceProfileClient, + Utils::HandleOrInvalid(profile), + &hResultHandle)); + return std::make_shared(hResultHandle); + }); + return future; + } + + /// + /// Reset a Voice Profile. + /// + /// a voice profile object. + /// A smart pointer wrapped voice profile result object. + std::future> ResetProfileAsync(std::shared_ptr profile) + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [profile, this, keepAlive]() -> std::shared_ptr { + SPXRESULTHANDLE hResultHandle; + SPX_THROW_ON_FAIL(::reset_voice_profile(m_hVoiceProfileClient, + Utils::HandleOrInvalid(profile), + &hResultHandle)); + return std::make_shared(hResultHandle); + }); + return future; + } + + /// + /// Retrieve an enrollment result given the id and type of the Voice Profile. + /// + /// The VoiceProfile Id. + /// The VoiceProfileType. + /// A future of the retrieved VoiceProfileEnrollmentResult. + std::future> RetrieveEnrollmentResultAsync(const SPXSTRING& voiceProfileId, VoiceProfileType voiceProfileType) + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [voiceProfileId, voiceProfileType, this, keepAlive]() -> std::shared_ptr { + SPXRESULTHANDLE hResultHandle; + SPX_THROW_ON_FAIL(::retrieve_enrollment_result(m_hVoiceProfileClient, Utils::ToUTF8(voiceProfileId).c_str(), static_cast(voiceProfileType), &hResultHandle)); + return std::make_shared(hResultHandle); + }); + return future; + } + + /// + /// Retrieve an enrollment result given the Voice Profile. + /// + /// a voice profile object. + /// + std::future> RetrieveEnrollmentResultAsync(const VoiceProfile& voiceProfile) + { + return RetrieveEnrollmentResultAsync(voiceProfile.GetId(), voiceProfile.GetType()); + } + + /// + /// Get all profiles having the given type. + /// + /// The VoiceProfileType. + /// A future of a vector of extant VoiceProfiles. + std::future>> GetAllProfilesAsync(VoiceProfileType voiceProfileType) + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [voiceProfileType, this, keepAlive]() -> std::vector> + { + std::vector> list; + + size_t numChars = 0; + char* json = nullptr; + auto deleteJsonOnEixt = Utils::MakeScopeGuard([&json]() { + ::property_bag_free_string(json); + }); + + SPX_THROW_ON_FAIL(::get_profiles_json(m_hVoiceProfileClient, static_cast(voiceProfileType), &json, &numChars)); + + auto profileList = Utils::Split(json, numChars, '|'); + for (auto& profile: profileList) + { + list.push_back(VoiceProfile::FromId(Utils::ToSPXString(profile), voiceProfileType)); + } + + return list; + }); + return future; + } + + std::future> GetActivationPhrasesAsync(VoiceProfileType voiceProfileType, const SPXSTRING& locale) + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [voiceProfileType, locale, this, keepAlive]() -> std::shared_ptr { + SPXRESULTHANDLE hresult; + SPX_THROW_ON_FAIL(::get_activation_phrases(m_hVoiceProfileClient, + Utils::ToUTF8(locale).c_str(), + static_cast(voiceProfileType), + &hresult)); + return std::make_shared(hresult); + }); + return future; + } + /// + /// A collection of properties and their values defined for this . + /// + PropertyCollection& Properties; + + /// + /// Internal. Explicit conversion operator. + /// + /// A handle. + explicit operator SPXVOICEPROFILECLIENTHANDLE() { return m_hVoiceProfileClient; } + +protected: + + /*! \cond PROTECTED */ + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// Recognizer handle. + explicit VoiceProfileClient(SPXVOICEPROFILECLIENTHANDLE hVoiceProfileClient) : + m_hVoiceProfileClient(hVoiceProfileClient), + m_properties(hVoiceProfileClient), + Properties(m_properties) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + } + + /*! \endcond */ + +private: + + DISABLE_DEFAULT_CTORS(VoiceProfileClient); +}; + +} } } } // Microsoft::CognitiveServices::Speech::Speaker diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_voice_profile_enrollment_result.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_voice_profile_enrollment_result.h new file mode 100644 index 0000000..22decb5 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_voice_profile_enrollment_result.h @@ -0,0 +1,242 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_voice_profile_enrollment_result.h: Public API declarations for VoiceProfileEnrollmentResult C++ class +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Speaker { + +/// +/// A enum that represents the timing information of an enrollment. +/// Added in version 1.12.0. +/// +enum class EnrollmentInfoType +{ + /// + /// Number of enrollment audios accepted for this profile. + /// + EnrollmentsCount = 0, + + /// + /// Total length of enrollment audios accepted for this profile. + /// + EnrollmentsLength = 1, + + /// + /// Summation of pure speech(which is the amount of audio after removing silence and non - speech segments) across all profile enrollments. + /// + EnrollmentsSpeechLength = 2, + + /// + /// Amount of pure speech (which is the amount of audio after removing silence and non-speech segments) needed to complete profile enrollment. + /// + RemainingEnrollmentsSpeechLength = 3, + + /// + /// Number of enrollment audios needed to complete profile enrollment. + /// + RemainingEnrollmentsCount = 4, + + /// + /// This enrollment audio length in hundred nanoseconds. + /// + AudioLength = 5, + + /// + /// This enrollment audio pure speech(which is the amount of audio after removing silence and non - speech segments) length in hundred nanoseconds. + /// + AudioSpeechLength = 6 +}; + +/// +/// Represents the result of an enrollment. +/// Added in version 1.12.0. +/// +class VoiceProfileEnrollmentResult final : public RecognitionResult +{ +private: + + SPXSTRING m_profileId; + const int enrollmentsCount; + const uint64_t enrollmentsLength; + const uint64_t enrollmentsSpeechLength; + const int remainingEnrollmentsCount; + const uint64_t remainingEnrollmentsSpeechLength; + const uint64_t audioLength; + const uint64_t audioSpeechLength; + const SPXSTRING createdDateTime; + const SPXSTRING lastUpdatedDateTime; + +public: + + /// + /// Creates a new instance using the provided handle. + /// + /// Result handle. + explicit VoiceProfileEnrollmentResult(SPXRESULTHANDLE hresult) : + RecognitionResult(hresult), + m_profileId(Properties.GetProperty("enrollment.profileId", "")), + enrollmentsCount(std::stoi(Properties.GetProperty("enrollment.enrollmentsCount", "0"))), + enrollmentsLength(static_cast(std::stoll(Properties.GetProperty("enrollment.enrollmentsLengthInSec", "0")))), + enrollmentsSpeechLength(static_cast(std::stoll(Properties.GetProperty("enrollment.enrollmentsSpeechLengthInSec", "0")))), + remainingEnrollmentsCount(std::stoi(Properties.GetProperty("enrollment.remainingEnrollmentsCount", "0"))), + remainingEnrollmentsSpeechLength(std::stoll(Properties.GetProperty("enrollment.remainingEnrollmentsSpeechLengthInSec", "0"))), + audioLength(static_cast(std::stoll(Properties.GetProperty("enrollment.audioLengthInSec", "0")))), + audioSpeechLength(static_cast(std::stoll(Properties.GetProperty("enrollment.audioSpeechLengthInSec", "0")))), + createdDateTime(Properties.GetProperty("enrollment.createdDateTime", "")), + lastUpdatedDateTime(Properties.GetProperty("enrollment.lastUpdatedDateTime", "")), + ProfileId(m_profileId) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + } + + /// + /// Destructor. + /// + virtual ~VoiceProfileEnrollmentResult() + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + } + + /// + /// The profile id of the speaker in the enrollment. + /// + const SPXSTRING& ProfileId; + + /// + /// Retrieve a textual representation of the created time of the voice profile. + /// + /// + const SPXSTRING& GetCreatedTime() const + { + return createdDateTime; + } + + /// + /// Retrieve a textual representation of the last updated time of the voice profile. + /// + /// + const SPXSTRING& GetLastUpdatedDateTime() const + { + return lastUpdatedDateTime; + } + + /// + /// Enrollment information in ticks. + /// A single tick represents one hundred nanoseconds or one ten-millionth of a second. + /// + /// an enum of EnrollmentInfoType. + /// Duration of recognized speech in ticks. + uint64_t GetEnrollmentInfo(EnrollmentInfoType type) const + { + switch (type) + { + case EnrollmentInfoType::EnrollmentsCount: + return static_cast(enrollmentsCount); + + case EnrollmentInfoType::EnrollmentsLength: + return enrollmentsLength; + + case EnrollmentInfoType::EnrollmentsSpeechLength: + return enrollmentsSpeechLength; + + case EnrollmentInfoType::RemainingEnrollmentsCount: + return static_cast(remainingEnrollmentsCount); + + case EnrollmentInfoType::RemainingEnrollmentsSpeechLength: + return remainingEnrollmentsSpeechLength; + + case EnrollmentInfoType::AudioLength: + return audioLength; + + case EnrollmentInfoType::AudioSpeechLength: + return audioSpeechLength; + + default: + throw std::runtime_error("Invalid enrollmentInfoType!"); + } + } + +private: + + DISABLE_DEFAULT_CTORS(VoiceProfileEnrollmentResult); + +}; + +/// +/// Represents the cancellation details of a result of an enrollment. +/// Added in version 1.12.0. +/// +class VoiceProfileEnrollmentCancellationDetails +{ +private: + + CancellationErrorCode m_errorCode; + +public: + + /// + /// Create an object that represents the details of a canceled enrollment result. + /// + /// a voice profile enrollment result object. + /// a smart pointer of voice profile enrollment cancellation details object. + static std::shared_ptr FromResult(std::shared_ptr result) + { + return std::shared_ptr { new VoiceProfileEnrollmentCancellationDetails(result.get()) }; + } + + /// + /// The error code in case of an unsuccessful enrollment ( is set to Error). + /// + const CancellationErrorCode& ErrorCode; + + /// + /// The error message in case of an unsuccessful enrollment ( is set to Error). + /// + const SPXSTRING ErrorDetails; + +protected: + + /*! \cond PROTECTED */ + + VoiceProfileEnrollmentCancellationDetails(VoiceProfileEnrollmentResult* result) : + m_errorCode(GetCancellationErrorCode(result)), + ErrorCode(m_errorCode), + ErrorDetails(result->Properties.GetProperty(PropertyId::SpeechServiceResponse_JsonErrorDetails)) + { + } + + /*! \endcond */ + +private: + DISABLE_DEFAULT_CTORS(VoiceProfileEnrollmentCancellationDetails); + + /*! \cond PRIVATE */ + + CancellationErrorCode GetCancellationErrorCode(VoiceProfileEnrollmentResult* result) + { + Result_CancellationErrorCode errorCode = CancellationErrorCode_NoError; + + SPXRESULTHANDLE hresult = (SPXRESULTHANDLE)(*result); + SPX_IFFAILED_THROW_HR(result_get_canceled_error_code(hresult, &errorCode)); + + return static_cast(errorCode); + } + + /*! \endcond */ +}; + +} } } } // Microsoft::CognitiveServices::Speech::Speaker diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_voice_profile_phrase_result.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_voice_profile_phrase_result.h new file mode 100644 index 0000000..5695413 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_voice_profile_phrase_result.h @@ -0,0 +1,194 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_speech_voice_profile_phrase_result.h: Public API declarations for VoiceProfilePhraseResult C++ class +// + +#pragma once +#include +#include +#include +#include +#include +#include + +namespace Microsoft { + namespace CognitiveServices { + namespace Speech { + namespace Speaker { + + /// + /// Class for VoiceProfilePhraseResult. + /// This class represents the result of requesting valid activation phrases for speaker recognition. + /// Added in version 1.18.0 + /// + class VoiceProfilePhraseResult + { + private: + + /*! \cond PRIVATE */ + + class PrivatePropertyCollection : public PropertyCollection + { + public: + PrivatePropertyCollection(SPXRESULTHANDLE hresult) : + PropertyCollection( + [=]() { + SPXPROPERTYBAGHANDLE hpropbag = SPXHANDLE_INVALID; + result_get_property_bag(hresult, &hpropbag); + return hpropbag; + }()) + { + } + }; + + PrivatePropertyCollection m_properties; + + /*! \endcond */ + + public: + explicit VoiceProfilePhraseResult(SPXRESULTHANDLE hresult) : + m_properties(hresult), + ResultId(m_resultId), + Reason(m_reason), + Properties(m_properties), + m_phrases(std::make_shared>(Utils::Split(m_properties.GetProperty("speakerrecognition.phrases", ""), '|'))), + m_hresult(hresult) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + + PopulateResultFields(hresult, &m_resultId, &m_reason); + } + + virtual ~VoiceProfilePhraseResult() + { + ::recognizer_result_handle_release(m_hresult); + m_hresult = SPXHANDLE_INVALID; + } + + /// + /// Unique result id. + /// + const SPXSTRING& ResultId; + + /// + /// Voice profile result reason. + /// + const ResultReason& Reason; + + /// + /// A collection of properties and their values defined for this . + /// + PropertyCollection& Properties; + + /// + /// Gets the activation phrases. + /// + /// Vector of phrases in string form + std::shared_ptr> GetPhrases() + { + return m_phrases; + } + + /// + /// Internal. Explicit conversion operator. + /// + /// A handle. + explicit operator SPXRESULTHANDLE() { return m_hresult; } + + private: + DISABLE_DEFAULT_CTORS(VoiceProfilePhraseResult); + + void PopulateResultFields(SPXRESULTHANDLE hresult, SPXSTRING* resultId, Speech::ResultReason* reason) + { + SPX_INIT_HR(hr); + + const size_t maxCharCount = 2048; + char sz[maxCharCount + 1] = {}; + + if (resultId != nullptr) + { + SPX_THROW_ON_FAIL(hr = result_get_result_id(hresult, sz, maxCharCount)); + *resultId = Utils::ToSPXString(sz); + } + + if (reason != nullptr) + { + Result_Reason resultReason; + SPX_THROW_ON_FAIL(hr = result_get_reason(hresult, &resultReason)); + *reason = (Speech::ResultReason)resultReason; + } + } + + ResultReason m_reason; + SPXSTRING m_resultId; + std::shared_ptr> m_phrases; + SPXRESULTHANDLE m_hresult; + }; + + /// + /// Class for VoiceProfilePhraseCancellationDetails. + /// This class represents error details of a voice profile result. + /// + class VoiceProfilePhraseCancellationDetails + { + private: + CancellationErrorCode m_errorCode; + + public: + + /// + /// Creates an instance of VoiceProfilePhraseCancellationDetails object for the canceled VoiceProfile. + /// + /// The result that was canceled. + /// A shared pointer to VoiceProfilePhraseCancellationDetails. + static std::shared_ptr FromResult(std::shared_ptr result) + { + return std::shared_ptr { new VoiceProfilePhraseCancellationDetails(result.get()) }; + } + + /// + /// The error code in case of an unsuccessful voice profile action( is set to Error). + /// If Reason is not Error, ErrorCode is set to NoError. + /// + const CancellationErrorCode& ErrorCode; + + /// + /// The error message in case of an unsuccessful voice profile action( is set to Error). + /// + const SPXSTRING ErrorDetails; + + protected: + + /*! \cond PROTECTED */ + + VoiceProfilePhraseCancellationDetails(VoiceProfilePhraseResult* result) : + m_errorCode(GetCancellationErrorCode(result)), + ErrorCode(m_errorCode), + ErrorDetails(result->Properties.GetProperty(PropertyId::SpeechServiceResponse_JsonErrorDetails)) + { + } + + /*! \endcond */ + + private: + DISABLE_DEFAULT_CTORS(VoiceProfilePhraseCancellationDetails); + + + CancellationErrorCode GetCancellationErrorCode(VoiceProfilePhraseResult* result) + { + UNUSED(result); + Result_CancellationErrorCode errorCode = CancellationErrorCode_NoError; + + SPXRESULTHANDLE hresult = (SPXRESULTHANDLE)(*result); + SPX_IFFAILED_THROW_HR(result_get_canceled_error_code(hresult, &errorCode)); + + return (CancellationErrorCode)errorCode; + } + }; + + } + } + } +} diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_voice_profile_result.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_voice_profile_result.h new file mode 100644 index 0000000..83d0db1 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/include/cxx_api/speechapi_cxx_voice_profile_result.h @@ -0,0 +1,180 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_speech_voice_profile_result.h: Public API declarations for VoiceProfileResult C++ class +// + +#pragma once +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Speaker { + +/// +/// Class for VoiceProfileResult. +/// This class represents the result of processing voice profiles. +/// Added in version 1.12.0 +/// +class VoiceProfileResult +{ +private: + + /*! \cond PRIVATE */ + + class PrivatePropertyCollection : public PropertyCollection + { + public: + PrivatePropertyCollection(SPXRESULTHANDLE hresult) : + PropertyCollection( + [=]() { + SPXPROPERTYBAGHANDLE hpropbag = SPXHANDLE_INVALID; + result_get_property_bag(hresult, &hpropbag); + return hpropbag; + }()) + { + } + }; + + PrivatePropertyCollection m_properties; + + /*! \endcond */ + +public: + explicit VoiceProfileResult(SPXRESULTHANDLE hresult) : + m_properties(hresult), + ResultId(m_resultId), + Reason(m_reason), + Properties(m_properties), + m_hresult(hresult) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + + PopulateResultFields(hresult, &m_resultId, &m_reason); + } + + virtual ~VoiceProfileResult() + { + ::recognizer_result_handle_release(m_hresult); + m_hresult = SPXHANDLE_INVALID; + } + + /// + /// Unique result id. + /// + const SPXSTRING& ResultId; + + /// + /// Voice profile result reason. + /// + const ResultReason& Reason; + + /// + /// A collection of properties and their values defined for this . + /// + PropertyCollection& Properties; + + /// + /// Internal. Explicit conversion operator. + /// + /// A handle. + explicit operator SPXRESULTHANDLE() { return m_hresult; } + +private: + DISABLE_DEFAULT_CTORS(VoiceProfileResult); + + void PopulateResultFields(SPXRESULTHANDLE hresult, SPXSTRING* resultId, Speech::ResultReason* reason) + { + SPX_INIT_HR(hr); + + const size_t maxCharCount = 2048; + char sz[maxCharCount + 1] = {}; + + if (resultId != nullptr) + { + SPX_THROW_ON_FAIL(hr = result_get_result_id(hresult, sz, maxCharCount)); + *resultId = Utils::ToSPXString(sz); + } + + if (reason != nullptr) + { + Result_Reason resultReason; + SPX_THROW_ON_FAIL(hr = result_get_reason(hresult, &resultReason)); + *reason = (Speech::ResultReason)resultReason; + } + } + + ResultReason m_reason; + SPXSTRING m_resultId; + SPXRESULTHANDLE m_hresult; +}; + +/// +/// Class for VoiceProfileCancellationDetails. +/// This class represents error details of a voice profile result. +/// +class VoiceProfileCancellationDetails +{ +private: + CancellationErrorCode m_errorCode; + +public: + + /// + /// Creates an instance of VoiceProfileCancellationDetails object for the canceled VoiceProfile. + /// + /// The result that was canceled. + /// A shared pointer to VoiceProfileCancellationDetails. + static std::shared_ptr FromResult(std::shared_ptr result) + { + return std::shared_ptr { new VoiceProfileCancellationDetails(result.get()) }; + } + + /// + /// The error code in case of an unsuccessful voice profile action( is set to Error). + /// If Reason is not Error, ErrorCode is set to NoError. + /// + const CancellationErrorCode& ErrorCode; + + /// + /// The error message in case of an unsuccessful voice profile action( is set to Error). + /// + const SPXSTRING ErrorDetails; + +protected: + + /*! \cond PROTECTED */ + + VoiceProfileCancellationDetails(VoiceProfileResult* result) : + m_errorCode(GetCancellationErrorCode(result)), + ErrorCode(m_errorCode), + ErrorDetails(result->Properties.GetProperty(PropertyId::SpeechServiceResponse_JsonErrorDetails)) + { + } + + /*! \endcond */ + +private: + DISABLE_DEFAULT_CTORS(VoiceProfileCancellationDetails); + + + CancellationErrorCode GetCancellationErrorCode(VoiceProfileResult* result) + { + UNUSED(result); + Result_CancellationErrorCode errorCode = CancellationErrorCode_NoError; + + SPXRESULTHANDLE hresult = (SPXRESULTHANDLE)(*result); + SPX_IFFAILED_THROW_HR(result_get_canceled_error_code(hresult, &errorCode)); + + return (CancellationErrorCode)errorCode; + } +}; + +} } } } // Microsoft::CognitiveServices::Speech::Speaker diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/libs/Microsoft.CognitiveServices.Speech.core.lib b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/libs/Microsoft.CognitiveServices.Speech.core.lib new file mode 100644 index 0000000..4d5b5b8 Binary files /dev/null and b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/libs/Microsoft.CognitiveServices.Speech.core.lib differ diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/libs/Runtime/Microsoft.CognitiveServices.Speech.core.dll b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/libs/Runtime/Microsoft.CognitiveServices.Speech.core.dll new file mode 100644 index 0000000..36ab3d9 Binary files /dev/null and b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/libs/Runtime/Microsoft.CognitiveServices.Speech.core.dll differ diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/libs/Runtime/Microsoft.CognitiveServices.Speech.extension.audio.sys.dll b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/libs/Runtime/Microsoft.CognitiveServices.Speech.extension.audio.sys.dll new file mode 100644 index 0000000..0fa9ce8 Binary files /dev/null and b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/libs/Runtime/Microsoft.CognitiveServices.Speech.extension.audio.sys.dll differ diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/libs/Runtime/Microsoft.CognitiveServices.Speech.extension.codec.dll b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/libs/Runtime/Microsoft.CognitiveServices.Speech.extension.codec.dll new file mode 100644 index 0000000..450cf1c Binary files /dev/null and b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/libs/Runtime/Microsoft.CognitiveServices.Speech.extension.codec.dll differ diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/libs/Runtime/Microsoft.CognitiveServices.Speech.extension.kws.dll b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/libs/Runtime/Microsoft.CognitiveServices.Speech.extension.kws.dll new file mode 100644 index 0000000..866c7b9 Binary files /dev/null and b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/libs/Runtime/Microsoft.CognitiveServices.Speech.extension.kws.dll differ diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/libs/Runtime/Microsoft.CognitiveServices.Speech.extension.kws.ort.dll b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/libs/Runtime/Microsoft.CognitiveServices.Speech.extension.kws.ort.dll new file mode 100644 index 0000000..769e306 Binary files /dev/null and b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/libs/Runtime/Microsoft.CognitiveServices.Speech.extension.kws.ort.dll differ diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/libs/Runtime/Microsoft.CognitiveServices.Speech.extension.lu.dll b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/libs/Runtime/Microsoft.CognitiveServices.Speech.extension.lu.dll new file mode 100644 index 0000000..c8d8219 Binary files /dev/null and b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/AzureWrapper/libs/Runtime/Microsoft.CognitiveServices.Speech.extension.lu.dll differ diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/SpeexDSP/include/speex/speex_buffer.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/SpeexDSP/include/speex/speex_buffer.h new file mode 100644 index 0000000..9e85cfc --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/SpeexDSP/include/speex/speex_buffer.h @@ -0,0 +1,68 @@ +/* Copyright (C) 2007 Jean-Marc Valin + + File: speex_buffer.h + This is a very simple ring buffer implementation. It is not thread-safe + so you need to do your own locking. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef SPEEX_BUFFER_H +#define SPEEX_BUFFER_H + +#include "speexdsp_types.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct SpeexBuffer_; +typedef struct SpeexBuffer_ SpeexBuffer; + +SpeexBuffer *speex_buffer_init(int size); + +void speex_buffer_destroy(SpeexBuffer *st); + +int speex_buffer_write(SpeexBuffer *st, void *data, int len); + +int speex_buffer_writezeros(SpeexBuffer *st, int len); + +int speex_buffer_read(SpeexBuffer *st, void *data, int len); + +int speex_buffer_get_available(SpeexBuffer *st); + +int speex_buffer_resize(SpeexBuffer *st, int len); + +#ifdef __cplusplus +} +#endif + +#endif + + + + diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/SpeexDSP/include/speex/speex_echo.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/SpeexDSP/include/speex/speex_echo.h new file mode 100644 index 0000000..4c1aa5a --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/SpeexDSP/include/speex/speex_echo.h @@ -0,0 +1,170 @@ +/* Copyright (C) Jean-Marc Valin */ +/** + @file speex_echo.h + @brief Echo cancellation +*/ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef SPEEX_ECHO_H +#define SPEEX_ECHO_H +/** @defgroup SpeexEchoState SpeexEchoState: Acoustic echo canceller + * This is the acoustic echo canceller module. + * @{ + */ +#include "speexdsp_types.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** Obtain frame size used by the AEC */ +#define SPEEX_ECHO_GET_FRAME_SIZE 3 + +/** Set sampling rate */ +#define SPEEX_ECHO_SET_SAMPLING_RATE 24 +/** Get sampling rate */ +#define SPEEX_ECHO_GET_SAMPLING_RATE 25 + +/* Can't set window sizes */ +/** Get size of impulse response (int32) */ +#define SPEEX_ECHO_GET_IMPULSE_RESPONSE_SIZE 27 + +/* Can't set window content */ +/** Get impulse response (int32[]) */ +#define SPEEX_ECHO_GET_IMPULSE_RESPONSE 29 + +/** Internal echo canceller state. Should never be accessed directly. */ +struct SpeexEchoState_; + +/** @class SpeexEchoState + * This holds the state of the echo canceller. You need one per channel. +*/ + +/** Internal echo canceller state. Should never be accessed directly. */ +typedef struct SpeexEchoState_ SpeexEchoState; + +/** Creates a new echo canceller state + * @param frame_size Number of samples to process at one time (should correspond to 10-20 ms) + * @param filter_length Number of samples of echo to cancel (should generally correspond to 100-500 ms) + * @return Newly-created echo canceller state + */ +SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length); + +/** Creates a new multi-channel echo canceller state + * @param frame_size Number of samples to process at one time (should correspond to 10-20 ms) + * @param filter_length Number of samples of echo to cancel (should generally correspond to 100-500 ms) + * @param nb_mic Number of microphone channels + * @param nb_speakers Number of speaker channels + * @return Newly-created echo canceller state + */ +SpeexEchoState *speex_echo_state_init_mc(int frame_size, int filter_length, int nb_mic, int nb_speakers); + +/** Destroys an echo canceller state + * @param st Echo canceller state +*/ +void speex_echo_state_destroy(SpeexEchoState *st); + +/** Performs echo cancellation a frame, based on the audio sent to the speaker (no delay is added + * to playback in this form) + * + * @param st Echo canceller state + * @param rec Signal from the microphone (near end + far end echo) + * @param play Signal played to the speaker (received from far end) + * @param out Returns near-end signal with echo removed + */ +void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *rec, const spx_int16_t *play, spx_int16_t *out); + +/** Performs echo cancellation a frame (deprecated) */ +void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *rec, const spx_int16_t *play, spx_int16_t *out, spx_int32_t *Yout); + +/** Perform echo cancellation using internal playback buffer, which is delayed by two frames + * to account for the delay introduced by most soundcards (but it could be off!) + * @param st Echo canceller state + * @param rec Signal from the microphone (near end + far end echo) + * @param out Returns near-end signal with echo removed +*/ +void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out); + +/** Let the echo canceller know that a frame was just queued to the soundcard + * @param st Echo canceller state + * @param play Signal played to the speaker (received from far end) +*/ +void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play); + +/** Reset the echo canceller to its original state + * @param st Echo canceller state + */ +void speex_echo_state_reset(SpeexEchoState *st); + +/** Used like the ioctl function to control the echo canceller parameters + * + * @param st Echo canceller state + * @param request ioctl-type request (one of the SPEEX_ECHO_* macros) + * @param ptr Data exchanged to-from function + * @return 0 if no error, -1 if request in unknown + */ +int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr); + + + +struct SpeexDecorrState_; + +typedef struct SpeexDecorrState_ SpeexDecorrState; + + +/** Create a state for the channel decorrelation algorithm + This is useful for multi-channel echo cancellation only + * @param rate Sampling rate + * @param channels Number of channels (it's a bit pointless if you don't have at least 2) + * @param frame_size Size of the frame to process at ones (counting samples *per* channel) +*/ +SpeexDecorrState *speex_decorrelate_new(int rate, int channels, int frame_size); + +/** Remove correlation between the channels by modifying the phase and possibly + adding noise in a way that is not (or little) perceptible. + * @param st Decorrelator state + * @param in Input audio in interleaved format + * @param out Result of the decorrelation (out *may* alias in) + * @param strength How much alteration of the audio to apply from 0 to 100. +*/ +void speex_decorrelate(SpeexDecorrState *st, const spx_int16_t *in, spx_int16_t *out, int strength); + +/** Destroy a Decorrelation state + * @param st State to destroy +*/ +void speex_decorrelate_destroy(SpeexDecorrState *st); + + +#ifdef __cplusplus +} +#endif + + +/** @}*/ +#endif diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/SpeexDSP/include/speex/speex_jitter.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/SpeexDSP/include/speex/speex_jitter.h new file mode 100644 index 0000000..8fc8d7e --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/SpeexDSP/include/speex/speex_jitter.h @@ -0,0 +1,197 @@ +/* Copyright (C) 2002 Jean-Marc Valin */ +/** + @file speex_jitter.h + @brief Adaptive jitter buffer for Speex +*/ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + - Neither the name of the Xiph.org Foundation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef SPEEX_JITTER_H +#define SPEEX_JITTER_H +/** @defgroup JitterBuffer JitterBuffer: Adaptive jitter buffer + * This is the jitter buffer that reorders UDP/RTP packets and adjusts the buffer size + * to maintain good quality and low latency. + * @{ + */ + +#include "speexdsp_types.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** Generic adaptive jitter buffer state */ +struct JitterBuffer_; + +/** Generic adaptive jitter buffer state */ +typedef struct JitterBuffer_ JitterBuffer; + +/** Definition of an incoming packet */ +typedef struct _JitterBufferPacket JitterBufferPacket; + +/** Definition of an incoming packet */ +struct _JitterBufferPacket { + char *data; /**< Data bytes contained in the packet */ + spx_uint32_t len; /**< Length of the packet in bytes */ + spx_uint32_t timestamp; /**< Timestamp for the packet */ + spx_uint32_t span; /**< Time covered by the packet (same units as timestamp) */ + spx_uint16_t sequence; /**< RTP Sequence number if available (0 otherwise) */ + spx_uint32_t user_data; /**< Put whatever data you like here (it's ignored by the jitter buffer) */ +}; + +/** Packet has been retrieved */ +#define JITTER_BUFFER_OK 0 +/** Packet is lost or is late */ +#define JITTER_BUFFER_MISSING 1 +/** A "fake" packet is meant to be inserted here to increase buffering */ +#define JITTER_BUFFER_INSERTION 2 +/** There was an error in the jitter buffer */ +#define JITTER_BUFFER_INTERNAL_ERROR -1 +/** Invalid argument */ +#define JITTER_BUFFER_BAD_ARGUMENT -2 + + +/** Set minimum amount of extra buffering required (margin) */ +#define JITTER_BUFFER_SET_MARGIN 0 +/** Get minimum amount of extra buffering required (margin) */ +#define JITTER_BUFFER_GET_MARGIN 1 +/* JITTER_BUFFER_SET_AVAILABLE_COUNT wouldn't make sense */ + +/** Get the amount of available packets currently buffered */ +#define JITTER_BUFFER_GET_AVAILABLE_COUNT 3 +/** Included because of an early misspelling (will remove in next release) */ +#define JITTER_BUFFER_GET_AVALIABLE_COUNT 3 + +/** Assign a function to destroy unused packet. When setting that, the jitter + buffer no longer copies packet data. */ +#define JITTER_BUFFER_SET_DESTROY_CALLBACK 4 +/** */ +#define JITTER_BUFFER_GET_DESTROY_CALLBACK 5 + +/** Tell the jitter buffer to only adjust the delay in multiples of the step parameter provided */ +#define JITTER_BUFFER_SET_DELAY_STEP 6 +/** */ +#define JITTER_BUFFER_GET_DELAY_STEP 7 + +/** Tell the jitter buffer to only do concealment in multiples of the size parameter provided */ +#define JITTER_BUFFER_SET_CONCEALMENT_SIZE 8 +#define JITTER_BUFFER_GET_CONCEALMENT_SIZE 9 + +/** Absolute max amount of loss that can be tolerated regardless of the delay. Typical loss + should be half of that or less. */ +#define JITTER_BUFFER_SET_MAX_LATE_RATE 10 +#define JITTER_BUFFER_GET_MAX_LATE_RATE 11 + +/** Equivalent cost of one percent late packet in timestamp units */ +#define JITTER_BUFFER_SET_LATE_COST 12 +#define JITTER_BUFFER_GET_LATE_COST 13 + + +/** Initialises jitter buffer + * + * @param step_size Starting value for the size of concleanment packets and delay + adjustment steps. Can be changed at any time using JITTER_BUFFER_SET_DELAY_STEP + and JITTER_BUFFER_GET_CONCEALMENT_SIZE. + * @return Newly created jitter buffer state + */ +JitterBuffer *jitter_buffer_init(int step_size); + +/** Restores jitter buffer to its original state + * + * @param jitter Jitter buffer state + */ +void jitter_buffer_reset(JitterBuffer *jitter); + +/** Destroys jitter buffer + * + * @param jitter Jitter buffer state + */ +void jitter_buffer_destroy(JitterBuffer *jitter); + +/** Put one packet into the jitter buffer + * + * @param jitter Jitter buffer state + * @param packet Incoming packet +*/ +void jitter_buffer_put(JitterBuffer *jitter, const JitterBufferPacket *packet); + +/** Get one packet from the jitter buffer + * + * @param jitter Jitter buffer state + * @param packet Returned packet + * @param desired_span Number of samples (or units) we wish to get from the buffer (no guarantee) + * @param current_timestamp Timestamp for the returned packet +*/ +int jitter_buffer_get(JitterBuffer *jitter, JitterBufferPacket *packet, spx_int32_t desired_span, spx_int32_t *start_offset); + +/** Used right after jitter_buffer_get() to obtain another packet that would have the same timestamp. + * This is mainly useful for media where a single "frame" can be split into several packets. + * + * @param jitter Jitter buffer state + * @param packet Returned packet + */ +int jitter_buffer_get_another(JitterBuffer *jitter, JitterBufferPacket *packet); + +/** Get pointer timestamp of jitter buffer + * + * @param jitter Jitter buffer state +*/ +int jitter_buffer_get_pointer_timestamp(JitterBuffer *jitter); + +/** Advance by one tick + * + * @param jitter Jitter buffer state +*/ +void jitter_buffer_tick(JitterBuffer *jitter); + +/** Telling the jitter buffer about the remaining data in the application buffer + * @param jitter Jitter buffer state + * @param rem Amount of data buffered by the application (timestamp units) + */ +void jitter_buffer_remaining_span(JitterBuffer *jitter, spx_uint32_t rem); + +/** Used like the ioctl function to control the jitter buffer parameters + * + * @param jitter Jitter buffer state + * @param request ioctl-type request (one of the JITTER_BUFFER_* macros) + * @param ptr Data exchanged to-from function + * @return 0 if no error, -1 if request in unknown +*/ +int jitter_buffer_ctl(JitterBuffer *jitter, int request, void *ptr); + +int jitter_buffer_update_delay(JitterBuffer *jitter, JitterBufferPacket *packet, spx_int32_t *start_offset); + +/* @} */ + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/SpeexDSP/include/speex/speex_preprocess.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/SpeexDSP/include/speex/speex_preprocess.h new file mode 100644 index 0000000..a2e1210 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/SpeexDSP/include/speex/speex_preprocess.h @@ -0,0 +1,219 @@ +/* Copyright (C) 2003 Epic Games + Written by Jean-Marc Valin */ +/** + * @file speex_preprocess.h + * @brief Speex preprocessor. The preprocess can do noise suppression, + * residual echo suppression (after using the echo canceller), automatic + * gain control (AGC) and voice activity detection (VAD). +*/ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef SPEEX_PREPROCESS_H +#define SPEEX_PREPROCESS_H +/** @defgroup SpeexPreprocessState SpeexPreprocessState: The Speex preprocessor + * This is the Speex preprocessor. The preprocess can do noise suppression, + * residual echo suppression (after using the echo canceller), automatic + * gain control (AGC) and voice activity detection (VAD). + * @{ + */ + +#include "speexdsp_types.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** State of the preprocessor (one per channel). Should never be accessed directly. */ +struct SpeexPreprocessState_; + +/** State of the preprocessor (one per channel). Should never be accessed directly. */ +typedef struct SpeexPreprocessState_ SpeexPreprocessState; + + +/** Creates a new preprocessing state. You MUST create one state per channel processed. + * @param frame_size Number of samples to process at one time (should correspond to 10-20 ms). Must be + * the same value as that used for the echo canceller for residual echo cancellation to work. + * @param sampling_rate Sampling rate used for the input. + * @return Newly created preprocessor state +*/ +SpeexPreprocessState *speex_preprocess_state_init(int frame_size, int sampling_rate); + +/** Destroys a preprocessor state + * @param st Preprocessor state to destroy +*/ +void speex_preprocess_state_destroy(SpeexPreprocessState *st); + +/** Preprocess a frame + * @param st Preprocessor state + * @param x Audio sample vector (in and out). Must be same size as specified in speex_preprocess_state_init(). + * @return Bool value for voice activity (1 for speech, 0 for noise/silence), ONLY if VAD turned on. +*/ +int speex_preprocess_run(SpeexPreprocessState *st, spx_int16_t *x); + +/** Preprocess a frame (deprecated, use speex_preprocess_run() instead)*/ +int speex_preprocess(SpeexPreprocessState *st, spx_int16_t *x, spx_int32_t *echo); + +/** Update preprocessor state, but do not compute the output + * @param st Preprocessor state + * @param x Audio sample vector (in only). Must be same size as specified in speex_preprocess_state_init(). +*/ +void speex_preprocess_estimate_update(SpeexPreprocessState *st, spx_int16_t *x); + +/** Used like the ioctl function to control the preprocessor parameters + * @param st Preprocessor state + * @param request ioctl-type request (one of the SPEEX_PREPROCESS_* macros) + * @param ptr Data exchanged to-from function + * @return 0 if no error, -1 if request in unknown +*/ +int speex_preprocess_ctl(SpeexPreprocessState *st, int request, void *ptr); + + + +/** Set preprocessor denoiser state */ +#define SPEEX_PREPROCESS_SET_DENOISE 0 +/** Get preprocessor denoiser state */ +#define SPEEX_PREPROCESS_GET_DENOISE 1 + +/** Set preprocessor Automatic Gain Control state */ +#define SPEEX_PREPROCESS_SET_AGC 2 +/** Get preprocessor Automatic Gain Control state */ +#define SPEEX_PREPROCESS_GET_AGC 3 + +/** Set preprocessor Voice Activity Detection state */ +#define SPEEX_PREPROCESS_SET_VAD 4 +/** Get preprocessor Voice Activity Detection state */ +#define SPEEX_PREPROCESS_GET_VAD 5 + +/** Set preprocessor Automatic Gain Control level (float) */ +#define SPEEX_PREPROCESS_SET_AGC_LEVEL 6 +/** Get preprocessor Automatic Gain Control level (float) */ +#define SPEEX_PREPROCESS_GET_AGC_LEVEL 7 + +/** Set preprocessor dereverb state */ +#define SPEEX_PREPROCESS_SET_DEREVERB 8 +/** Get preprocessor dereverb state */ +#define SPEEX_PREPROCESS_GET_DEREVERB 9 + +/** Set preprocessor dereverb level */ +#define SPEEX_PREPROCESS_SET_DEREVERB_LEVEL 10 +/** Get preprocessor dereverb level */ +#define SPEEX_PREPROCESS_GET_DEREVERB_LEVEL 11 + +/** Set preprocessor dereverb decay */ +#define SPEEX_PREPROCESS_SET_DEREVERB_DECAY 12 +/** Get preprocessor dereverb decay */ +#define SPEEX_PREPROCESS_GET_DEREVERB_DECAY 13 + +/** Set probability required for the VAD to go from silence to voice */ +#define SPEEX_PREPROCESS_SET_PROB_START 14 +/** Get probability required for the VAD to go from silence to voice */ +#define SPEEX_PREPROCESS_GET_PROB_START 15 + +/** Set probability required for the VAD to stay in the voice state (integer percent) */ +#define SPEEX_PREPROCESS_SET_PROB_CONTINUE 16 +/** Get probability required for the VAD to stay in the voice state (integer percent) */ +#define SPEEX_PREPROCESS_GET_PROB_CONTINUE 17 + +/** Set maximum attenuation of the noise in dB (negative number) */ +#define SPEEX_PREPROCESS_SET_NOISE_SUPPRESS 18 +/** Get maximum attenuation of the noise in dB (negative number) */ +#define SPEEX_PREPROCESS_GET_NOISE_SUPPRESS 19 + +/** Set maximum attenuation of the residual echo in dB (negative number) */ +#define SPEEX_PREPROCESS_SET_ECHO_SUPPRESS 20 +/** Get maximum attenuation of the residual echo in dB (negative number) */ +#define SPEEX_PREPROCESS_GET_ECHO_SUPPRESS 21 + +/** Set maximum attenuation of the residual echo in dB when near end is active (negative number) */ +#define SPEEX_PREPROCESS_SET_ECHO_SUPPRESS_ACTIVE 22 +/** Get maximum attenuation of the residual echo in dB when near end is active (negative number) */ +#define SPEEX_PREPROCESS_GET_ECHO_SUPPRESS_ACTIVE 23 + +/** Set the corresponding echo canceller state so that residual echo suppression can be performed (NULL for no residual echo suppression) */ +#define SPEEX_PREPROCESS_SET_ECHO_STATE 24 +/** Get the corresponding echo canceller state */ +#define SPEEX_PREPROCESS_GET_ECHO_STATE 25 + +/** Set maximal gain increase in dB/second (int32) */ +#define SPEEX_PREPROCESS_SET_AGC_INCREMENT 26 + +/** Get maximal gain increase in dB/second (int32) */ +#define SPEEX_PREPROCESS_GET_AGC_INCREMENT 27 + +/** Set maximal gain decrease in dB/second (int32) */ +#define SPEEX_PREPROCESS_SET_AGC_DECREMENT 28 + +/** Get maximal gain decrease in dB/second (int32) */ +#define SPEEX_PREPROCESS_GET_AGC_DECREMENT 29 + +/** Set maximal gain in dB (int32) */ +#define SPEEX_PREPROCESS_SET_AGC_MAX_GAIN 30 + +/** Get maximal gain in dB (int32) */ +#define SPEEX_PREPROCESS_GET_AGC_MAX_GAIN 31 + +/* Can't set loudness */ +/** Get loudness */ +#define SPEEX_PREPROCESS_GET_AGC_LOUDNESS 33 + +/* Can't set gain */ +/** Get current gain (int32 percent) */ +#define SPEEX_PREPROCESS_GET_AGC_GAIN 35 + +/* Can't set spectrum size */ +/** Get spectrum size for power spectrum (int32) */ +#define SPEEX_PREPROCESS_GET_PSD_SIZE 37 + +/* Can't set power spectrum */ +/** Get power spectrum (int32[] of squared values) */ +#define SPEEX_PREPROCESS_GET_PSD 39 + +/* Can't set noise size */ +/** Get spectrum size for noise estimate (int32) */ +#define SPEEX_PREPROCESS_GET_NOISE_PSD_SIZE 41 + +/* Can't set noise estimate */ +/** Get noise estimate (int32[] of squared values) */ +#define SPEEX_PREPROCESS_GET_NOISE_PSD 43 + +/* Can't set speech probability */ +/** Get speech probability in last frame (int32). */ +#define SPEEX_PREPROCESS_GET_PROB 45 + +/** Set preprocessor Automatic Gain Control level (int32) */ +#define SPEEX_PREPROCESS_SET_AGC_TARGET 46 +/** Get preprocessor Automatic Gain Control level (int32) */ +#define SPEEX_PREPROCESS_GET_AGC_TARGET 47 + +#ifdef __cplusplus +} +#endif + +/** @}*/ +#endif diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/SpeexDSP/include/speex/speex_resampler.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/SpeexDSP/include/speex/speex_resampler.h new file mode 100644 index 0000000..901de37 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/SpeexDSP/include/speex/speex_resampler.h @@ -0,0 +1,343 @@ +/* Copyright (C) 2007 Jean-Marc Valin + + File: speex_resampler.h + Resampling code + + The design goals of this code are: + - Very fast algorithm + - Low memory requirement + - Good *perceptual* quality (and not best SNR) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +*/ + + +#ifndef SPEEX_RESAMPLER_H +#define SPEEX_RESAMPLER_H + +#ifdef OUTSIDE_SPEEX + +/********* WARNING: MENTAL SANITY ENDS HERE *************/ + +/* If the resampler is defined outside of Speex, we change the symbol names so that + there won't be any clash if linking with Speex later on. */ + +/* #define RANDOM_PREFIX your software name here */ +#ifndef RANDOM_PREFIX +#error "Please define RANDOM_PREFIX (above) to something specific to your project to prevent symbol name clashes" +#endif + +#define CAT_PREFIX2(a,b) a ## b +#define CAT_PREFIX(a,b) CAT_PREFIX2(a, b) + +#define speex_resampler_init CAT_PREFIX(RANDOM_PREFIX,_resampler_init) +#define speex_resampler_init_frac CAT_PREFIX(RANDOM_PREFIX,_resampler_init_frac) +#define speex_resampler_destroy CAT_PREFIX(RANDOM_PREFIX,_resampler_destroy) +#define speex_resampler_process_float CAT_PREFIX(RANDOM_PREFIX,_resampler_process_float) +#define speex_resampler_process_int CAT_PREFIX(RANDOM_PREFIX,_resampler_process_int) +#define speex_resampler_process_interleaved_float CAT_PREFIX(RANDOM_PREFIX,_resampler_process_interleaved_float) +#define speex_resampler_process_interleaved_int CAT_PREFIX(RANDOM_PREFIX,_resampler_process_interleaved_int) +#define speex_resampler_set_rate CAT_PREFIX(RANDOM_PREFIX,_resampler_set_rate) +#define speex_resampler_get_rate CAT_PREFIX(RANDOM_PREFIX,_resampler_get_rate) +#define speex_resampler_set_rate_frac CAT_PREFIX(RANDOM_PREFIX,_resampler_set_rate_frac) +#define speex_resampler_get_ratio CAT_PREFIX(RANDOM_PREFIX,_resampler_get_ratio) +#define speex_resampler_set_quality CAT_PREFIX(RANDOM_PREFIX,_resampler_set_quality) +#define speex_resampler_get_quality CAT_PREFIX(RANDOM_PREFIX,_resampler_get_quality) +#define speex_resampler_set_input_stride CAT_PREFIX(RANDOM_PREFIX,_resampler_set_input_stride) +#define speex_resampler_get_input_stride CAT_PREFIX(RANDOM_PREFIX,_resampler_get_input_stride) +#define speex_resampler_set_output_stride CAT_PREFIX(RANDOM_PREFIX,_resampler_set_output_stride) +#define speex_resampler_get_output_stride CAT_PREFIX(RANDOM_PREFIX,_resampler_get_output_stride) +#define speex_resampler_get_input_latency CAT_PREFIX(RANDOM_PREFIX,_resampler_get_input_latency) +#define speex_resampler_get_output_latency CAT_PREFIX(RANDOM_PREFIX,_resampler_get_output_latency) +#define speex_resampler_skip_zeros CAT_PREFIX(RANDOM_PREFIX,_resampler_skip_zeros) +#define speex_resampler_reset_mem CAT_PREFIX(RANDOM_PREFIX,_resampler_reset_mem) +#define speex_resampler_strerror CAT_PREFIX(RANDOM_PREFIX,_resampler_strerror) + +#define spx_int16_t short +#define spx_int32_t int +#define spx_uint16_t unsigned short +#define spx_uint32_t unsigned int + +#define speex_assert(cond) + +#else /* OUTSIDE_SPEEX */ + +#include "speexdsp_types.h" + +#endif /* OUTSIDE_SPEEX */ + +#ifdef __cplusplus +extern "C" { +#endif + +#define SPEEX_RESAMPLER_QUALITY_MAX 10 +#define SPEEX_RESAMPLER_QUALITY_MIN 0 +#define SPEEX_RESAMPLER_QUALITY_DEFAULT 4 +#define SPEEX_RESAMPLER_QUALITY_VOIP 3 +#define SPEEX_RESAMPLER_QUALITY_DESKTOP 5 + +enum { + RESAMPLER_ERR_SUCCESS = 0, + RESAMPLER_ERR_ALLOC_FAILED = 1, + RESAMPLER_ERR_BAD_STATE = 2, + RESAMPLER_ERR_INVALID_ARG = 3, + RESAMPLER_ERR_PTR_OVERLAP = 4, + RESAMPLER_ERR_OVERFLOW = 5, + + RESAMPLER_ERR_MAX_ERROR +}; + +struct SpeexResamplerState_; +typedef struct SpeexResamplerState_ SpeexResamplerState; + +/** Create a new resampler with integer input and output rates. + * @param nb_channels Number of channels to be processed + * @param in_rate Input sampling rate (integer number of Hz). + * @param out_rate Output sampling rate (integer number of Hz). + * @param quality Resampling quality between 0 and 10, where 0 has poor quality + * and 10 has very high quality. + * @return Newly created resampler state + * @retval NULL Error: not enough memory + */ +SpeexResamplerState *speex_resampler_init(spx_uint32_t nb_channels, + spx_uint32_t in_rate, + spx_uint32_t out_rate, + int quality, + int *err); + +/** Create a new resampler with fractional input/output rates. The sampling + * rate ratio is an arbitrary rational number with both the numerator and + * denominator being 32-bit integers. + * @param nb_channels Number of channels to be processed + * @param ratio_num Numerator of the sampling rate ratio + * @param ratio_den Denominator of the sampling rate ratio + * @param in_rate Input sampling rate rounded to the nearest integer (in Hz). + * @param out_rate Output sampling rate rounded to the nearest integer (in Hz). + * @param quality Resampling quality between 0 and 10, where 0 has poor quality + * and 10 has very high quality. + * @return Newly created resampler state + * @retval NULL Error: not enough memory + */ +SpeexResamplerState *speex_resampler_init_frac(spx_uint32_t nb_channels, + spx_uint32_t ratio_num, + spx_uint32_t ratio_den, + spx_uint32_t in_rate, + spx_uint32_t out_rate, + int quality, + int *err); + +/** Destroy a resampler state. + * @param st Resampler state + */ +void speex_resampler_destroy(SpeexResamplerState *st); + +/** Resample a float array. The input and output buffers must *not* overlap. + * @param st Resampler state + * @param channel_index Index of the channel to process for the multi-channel + * base (0 otherwise) + * @param in Input buffer + * @param in_len Number of input samples in the input buffer. Returns the + * number of samples processed + * @param out Output buffer + * @param out_len Size of the output buffer. Returns the number of samples written + */ +int speex_resampler_process_float(SpeexResamplerState *st, + spx_uint32_t channel_index, + const float *in, + spx_uint32_t *in_len, + float *out, + spx_uint32_t *out_len); + +/** Resample an int array. The input and output buffers must *not* overlap. + * @param st Resampler state + * @param channel_index Index of the channel to process for the multi-channel + * base (0 otherwise) + * @param in Input buffer + * @param in_len Number of input samples in the input buffer. Returns the number + * of samples processed + * @param out Output buffer + * @param out_len Size of the output buffer. Returns the number of samples written + */ +int speex_resampler_process_int(SpeexResamplerState *st, + spx_uint32_t channel_index, + const spx_int16_t *in, + spx_uint32_t *in_len, + spx_int16_t *out, + spx_uint32_t *out_len); + +/** Resample an interleaved float array. The input and output buffers must *not* overlap. + * @param st Resampler state + * @param in Input buffer + * @param in_len Number of input samples in the input buffer. Returns the number + * of samples processed. This is all per-channel. + * @param out Output buffer + * @param out_len Size of the output buffer. Returns the number of samples written. + * This is all per-channel. + */ +int speex_resampler_process_interleaved_float(SpeexResamplerState *st, + const float *in, + spx_uint32_t *in_len, + float *out, + spx_uint32_t *out_len); + +/** Resample an interleaved int array. The input and output buffers must *not* overlap. + * @param st Resampler state + * @param in Input buffer + * @param in_len Number of input samples in the input buffer. Returns the number + * of samples processed. This is all per-channel. + * @param out Output buffer + * @param out_len Size of the output buffer. Returns the number of samples written. + * This is all per-channel. + */ +int speex_resampler_process_interleaved_int(SpeexResamplerState *st, + const spx_int16_t *in, + spx_uint32_t *in_len, + spx_int16_t *out, + spx_uint32_t *out_len); + +/** Set (change) the input/output sampling rates (integer value). + * @param st Resampler state + * @param in_rate Input sampling rate (integer number of Hz). + * @param out_rate Output sampling rate (integer number of Hz). + */ +int speex_resampler_set_rate(SpeexResamplerState *st, + spx_uint32_t in_rate, + spx_uint32_t out_rate); + +/** Get the current input/output sampling rates (integer value). + * @param st Resampler state + * @param in_rate Input sampling rate (integer number of Hz) copied. + * @param out_rate Output sampling rate (integer number of Hz) copied. + */ +void speex_resampler_get_rate(SpeexResamplerState *st, + spx_uint32_t *in_rate, + spx_uint32_t *out_rate); + +/** Set (change) the input/output sampling rates and resampling ratio + * (fractional values in Hz supported). + * @param st Resampler state + * @param ratio_num Numerator of the sampling rate ratio + * @param ratio_den Denominator of the sampling rate ratio + * @param in_rate Input sampling rate rounded to the nearest integer (in Hz). + * @param out_rate Output sampling rate rounded to the nearest integer (in Hz). + */ +int speex_resampler_set_rate_frac(SpeexResamplerState *st, + spx_uint32_t ratio_num, + spx_uint32_t ratio_den, + spx_uint32_t in_rate, + spx_uint32_t out_rate); + +/** Get the current resampling ratio. This will be reduced to the least + * common denominator. + * @param st Resampler state + * @param ratio_num Numerator of the sampling rate ratio copied + * @param ratio_den Denominator of the sampling rate ratio copied + */ +void speex_resampler_get_ratio(SpeexResamplerState *st, + spx_uint32_t *ratio_num, + spx_uint32_t *ratio_den); + +/** Set (change) the conversion quality. + * @param st Resampler state + * @param quality Resampling quality between 0 and 10, where 0 has poor + * quality and 10 has very high quality. + */ +int speex_resampler_set_quality(SpeexResamplerState *st, + int quality); + +/** Get the conversion quality. + * @param st Resampler state + * @param quality Resampling quality between 0 and 10, where 0 has poor + * quality and 10 has very high quality. + */ +void speex_resampler_get_quality(SpeexResamplerState *st, + int *quality); + +/** Set (change) the input stride. + * @param st Resampler state + * @param stride Input stride + */ +void speex_resampler_set_input_stride(SpeexResamplerState *st, + spx_uint32_t stride); + +/** Get the input stride. + * @param st Resampler state + * @param stride Input stride copied + */ +void speex_resampler_get_input_stride(SpeexResamplerState *st, + spx_uint32_t *stride); + +/** Set (change) the output stride. + * @param st Resampler state + * @param stride Output stride + */ +void speex_resampler_set_output_stride(SpeexResamplerState *st, + spx_uint32_t stride); + +/** Get the output stride. + * @param st Resampler state copied + * @param stride Output stride + */ +void speex_resampler_get_output_stride(SpeexResamplerState *st, + spx_uint32_t *stride); + +/** Get the latency introduced by the resampler measured in input samples. + * @param st Resampler state + */ +int speex_resampler_get_input_latency(SpeexResamplerState *st); + +/** Get the latency introduced by the resampler measured in output samples. + * @param st Resampler state + */ +int speex_resampler_get_output_latency(SpeexResamplerState *st); + +/** Make sure that the first samples to go out of the resamplers don't have + * leading zeros. This is only useful before starting to use a newly created + * resampler. It is recommended to use that when resampling an audio file, as + * it will generate a file with the same length. For real-time processing, + * it is probably easier not to use this call (so that the output duration + * is the same for the first frame). + * @param st Resampler state + */ +int speex_resampler_skip_zeros(SpeexResamplerState *st); + +/** Reset a resampler so a new (unrelated) stream can be processed. + * @param st Resampler state + */ +int speex_resampler_reset_mem(SpeexResamplerState *st); + +/** Returns the English meaning for an error code + * @param err Error code + * @return English string + */ +const char *speex_resampler_strerror(int err); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/SpeexDSP/include/speex/speexdsp_config_types.h.in b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/SpeexDSP/include/speex/speexdsp_config_types.h.in new file mode 100644 index 0000000..5ea7b55 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/SpeexDSP/include/speex/speexdsp_config_types.h.in @@ -0,0 +1,12 @@ +#ifndef __SPEEX_TYPES_H__ +#define __SPEEX_TYPES_H__ + +@INCLUDE_STDINT@ + +typedef @SIZE16@ spx_int16_t; +typedef @USIZE16@ spx_uint16_t; +typedef @SIZE32@ spx_int32_t; +typedef @USIZE32@ spx_uint32_t; + +#endif + diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/SpeexDSP/include/speex/speexdsp_types.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/SpeexDSP/include/speex/speexdsp_types.h new file mode 100644 index 0000000..4b4a76a --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/SpeexDSP/include/speex/speexdsp_types.h @@ -0,0 +1,126 @@ +/* speexdsp_types.h taken from libogg */ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2002 * + * by the Xiph.Org Foundation http://www.xiph.org/ * + * * + ******************************************************************** + + function: #ifdef jail to whip a few platforms into the UNIX ideal. + last mod: $Id: os_types.h 7524 2004-08-11 04:20:36Z conrad $ + + ********************************************************************/ +/** + @file speexdsp_types.h + @brief Speex types +*/ +#ifndef _SPEEX_TYPES_H +#define _SPEEX_TYPES_H + +#if defined(_WIN32) + +# if defined(__CYGWIN__) +# include <_G_config.h> + typedef _G_int32_t spx_int32_t; + typedef _G_uint32_t spx_uint32_t; + typedef _G_int16_t spx_int16_t; + typedef _G_uint16_t spx_uint16_t; +# elif defined(__MINGW32__) + typedef short spx_int16_t; + typedef unsigned short spx_uint16_t; + typedef int spx_int32_t; + typedef unsigned int spx_uint32_t; +# elif defined(__MWERKS__) + typedef int spx_int32_t; + typedef unsigned int spx_uint32_t; + typedef short spx_int16_t; + typedef unsigned short spx_uint16_t; +# else + /* MSVC/Borland */ + typedef __int32 spx_int32_t; + typedef unsigned __int32 spx_uint32_t; + typedef __int16 spx_int16_t; + typedef unsigned __int16 spx_uint16_t; +# endif + +#elif defined(__MACOS__) + +# include + typedef SInt16 spx_int16_t; + typedef UInt16 spx_uint16_t; + typedef SInt32 spx_int32_t; + typedef UInt32 spx_uint32_t; + +#elif (defined(__APPLE__) && defined(__MACH__)) /* MacOS X Framework build */ + +# include + typedef int16_t spx_int16_t; + typedef u_int16_t spx_uint16_t; + typedef int32_t spx_int32_t; + typedef u_int32_t spx_uint32_t; + +#elif defined(__BEOS__) + + /* Be */ +# include + typedef int16_t spx_int16_t; + typedef u_int16_t spx_uint16_t; + typedef int32_t spx_int32_t; + typedef u_int32_t spx_uint32_t; + +#elif defined (__EMX__) + + /* OS/2 GCC */ + typedef short spx_int16_t; + typedef unsigned short spx_uint16_t; + typedef int spx_int32_t; + typedef unsigned int spx_uint32_t; + +#elif defined (DJGPP) + + /* DJGPP */ + typedef short spx_int16_t; + typedef int spx_int32_t; + typedef unsigned int spx_uint32_t; + +#elif defined(R5900) + + /* PS2 EE */ + typedef int spx_int32_t; + typedef unsigned spx_uint32_t; + typedef short spx_int16_t; + +#elif defined(__SYMBIAN32__) + + /* Symbian GCC */ + typedef signed short spx_int16_t; + typedef unsigned short spx_uint16_t; + typedef signed int spx_int32_t; + typedef unsigned int spx_uint32_t; + +#elif defined(CONFIG_TI_C54X) || defined (CONFIG_TI_C55X) + + typedef short spx_int16_t; + typedef unsigned short spx_uint16_t; + typedef long spx_int32_t; + typedef unsigned long spx_uint32_t; + +#elif defined(CONFIG_TI_C6X) + + typedef short spx_int16_t; + typedef unsigned short spx_uint16_t; + typedef int spx_int32_t; + typedef unsigned int spx_uint32_t; + +#else + +#include "speexdsp_config_types.h" + +#endif + +#endif /* _SPEEX_TYPES_H */ diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/SpeexDSP/lib/libspeexdsp-1.dll b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/SpeexDSP/lib/libspeexdsp-1.dll new file mode 100644 index 0000000..a17d2f1 Binary files /dev/null and b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/SpeexDSP/lib/libspeexdsp-1.dll differ diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/SpeexDSP/lib/libspeexdsp.dll.a b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/SpeexDSP/lib/libspeexdsp.dll.a new file mode 100644 index 0000000..be51055 Binary files /dev/null and b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/SpeexDSP/lib/libspeexdsp.dll.a differ diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/fvad/include/fvad.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/fvad/include/fvad.h new file mode 100644 index 0000000..d410d28 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/fvad/include/fvad.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2016 Daniel Pirch. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef FVAD_H_ +#define FVAD_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +/* + * Type for a VAD instance, an opaque object created using fvad_new(). + */ +typedef struct Fvad Fvad; + + +/* + * Creates and initializes a VAD instance. + * + * On success, returns a pointer to the new VAD instance, which should + * eventually be deleted using fvad_free(). + * + * Returns NULL in case of a memory allocation error. + */ +Fvad *fvad_new(void); + +/* + * Frees the dynamic memory of a specified VAD instance. + */ +void fvad_free(Fvad *inst); + + +/* + * Reinitializes a VAD instance, clearing all state and resetting mode and + * sample rate to defaults. + */ +void fvad_reset(Fvad *inst); + + +/* + * Changes the VAD operating ("aggressiveness") mode of a VAD instance. + * + * A more aggressive (higher mode) VAD is more restrictive in reporting speech. + * Put in other words the probability of being speech when the VAD returns 1 is + * increased with increasing mode. As a consequence also the missed detection + * rate goes up. + * + * Valid modes are 0 ("quality"), 1 ("low bitrate"), 2 ("aggressive"), and 3 + * ("very aggressive"). The default mode is 0. + * + * Returns 0 on success, or -1 if the specified mode is invalid. + */ +int fvad_set_mode(Fvad* inst, int mode); + + +/* + * Sets the input sample rate in Hz for a VAD instance. + * + * Valid values are 8000, 16000, 32000 and 48000. The default is 8000. Note + * that internally all processing will be done 8000 Hz; input data in higher + * sample rates will just be downsampled first. + * + * Returns 0 on success, or -1 if the passed value is invalid. + */ +int fvad_set_sample_rate(Fvad* inst, int sample_rate); + + +/* + * Calculates a VAD decision for an audio frame. + * + * `frame` is an array of `length` signed 16-bit samples. Only frames with a + * length of 10, 20 or 30 ms are supported, so for example at 8 kHz, `length` + * must be either 80, 160 or 240. + * + * Returns : 1 - (active voice), + * 0 - (non-active Voice), + * -1 - (invalid frame length). + */ +int fvad_process(Fvad* inst, const int16_t* frame, size_t length); + +#ifdef __cplusplus +} +#endif + +#endif // FVAD_H_ diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/fvad/lib/fvad.lib b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/fvad/lib/fvad.lib new file mode 100644 index 0000000..150f694 Binary files /dev/null and b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/fvad/lib/fvad.lib differ diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/portaudio/include/pa_jack.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/portaudio/include/pa_jack.h new file mode 100644 index 0000000..750d116 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/portaudio/include/pa_jack.h @@ -0,0 +1,77 @@ +#ifndef PA_JACK_H +#define PA_JACK_H + +/* + * $Id: + * PortAudio Portable Real-Time Audio Library + * JACK-specific extensions + * + * Copyright (c) 1999-2000 Ross Bencina and Phil Burk + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files + * (the "Software"), to deal in the Software without restriction, + * including without limitation the rights to use, copy, modify, merge, + * publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * The text above constitutes the entire PortAudio license; however, + * the PortAudio community also makes the following non-binding requests: + * + * Any person wishing to distribute modifications to the Software is + * requested to send the modifications to the original developer so that + * they can be incorporated into the canonical version. It is also + * requested that these non-binding requests be included along with the + * license above. + */ + +/** @file + * @ingroup public_header + * @brief JACK-specific PortAudio API extension header file. + */ + +#include "portaudio.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** Set the JACK client name. + * + * During Pa_Initialize, When PA JACK connects as a client of the JACK server, it requests a certain + * name, which is for instance prepended to port names. By default this name is "PortAudio". The + * JACK server may append a suffix to the client name, in order to avoid clashes among clients that + * try to connect with the same name (e.g., different PA JACK clients). + * + * This function must be called before Pa_Initialize, otherwise it won't have any effect. Note that + * the string is not copied, but instead referenced directly, so it must not be freed for as long as + * PA might need it. + * @sa PaJack_GetClientName + */ +PaError PaJack_SetClientName( const char* name ); + +/** Get the JACK client name used by PA JACK. + * + * The caller is responsible for freeing the returned pointer. + */ +PaError PaJack_GetClientName(const char** clientName); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/portaudio/include/pa_linux_alsa.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/portaudio/include/pa_linux_alsa.h new file mode 100644 index 0000000..c940615 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/portaudio/include/pa_linux_alsa.h @@ -0,0 +1,107 @@ +#ifndef PA_LINUX_ALSA_H +#define PA_LINUX_ALSA_H + +/* + * $Id$ + * PortAudio Portable Real-Time Audio Library + * ALSA-specific extensions + * + * Copyright (c) 1999-2000 Ross Bencina and Phil Burk + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files + * (the "Software"), to deal in the Software without restriction, + * including without limitation the rights to use, copy, modify, merge, + * publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * The text above constitutes the entire PortAudio license; however, + * the PortAudio community also makes the following non-binding requests: + * + * Any person wishing to distribute modifications to the Software is + * requested to send the modifications to the original developer so that + * they can be incorporated into the canonical version. It is also + * requested that these non-binding requests be included along with the + * license above. + */ + +/** @file + * @ingroup public_header + * @brief ALSA-specific PortAudio API extension header file. + */ + +#include "portaudio.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct PaAlsaStreamInfo +{ + unsigned long size; + PaHostApiTypeId hostApiType; + unsigned long version; + + const char *deviceString; +} +PaAlsaStreamInfo; + +/** Initialize host API specific structure, call this before setting relevant attributes. */ +void PaAlsa_InitializeStreamInfo( PaAlsaStreamInfo *info ); + +/** Instruct whether to enable real-time priority when starting the audio thread. + * + * If this is turned on by the stream is started, the audio callback thread will be created + * with the FIFO scheduling policy, which is suitable for realtime operation. + **/ +void PaAlsa_EnableRealtimeScheduling( PaStream *s, int enable ); + +#if 0 +void PaAlsa_EnableWatchdog( PaStream *s, int enable ); +#endif + +/** Get the ALSA-lib card index of this stream's input device. */ +PaError PaAlsa_GetStreamInputCard( PaStream *s, int *card ); + +/** Get the ALSA-lib card index of this stream's output device. */ +PaError PaAlsa_GetStreamOutputCard( PaStream *s, int *card ); + +/** Set the number of periods (buffer fragments) to configure devices with. + * + * By default the number of periods is 4, this is the lowest number of periods that works well on + * the author's soundcard. + * @param numPeriods The number of periods. + */ +PaError PaAlsa_SetNumPeriods( int numPeriods ); + +/** Set the maximum number of times to retry opening busy device (sleeping for a + * short interval inbetween). + */ +PaError PaAlsa_SetRetriesBusy( int retries ); + +/** Set the path and name of ALSA library file if PortAudio is configured to load it dynamically (see + * PA_ALSA_DYNAMIC). This setting will overwrite the default name set by PA_ALSA_PATHNAME define. + * @param pathName Full path with filename. Only filename can be used, but dlopen() will lookup default + * searchable directories (/usr/lib;/usr/local/lib) then. + */ +void PaAlsa_SetLibraryPathName( const char *pathName ); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/portaudio/include/pa_mac_core.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/portaudio/include/pa_mac_core.h new file mode 100644 index 0000000..beb5396 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/portaudio/include/pa_mac_core.h @@ -0,0 +1,191 @@ +#ifndef PA_MAC_CORE_H +#define PA_MAC_CORE_H +/* + * PortAudio Portable Real-Time Audio Library + * Macintosh Core Audio specific extensions + * portaudio.h should be included before this file. + * + * Copyright (c) 2005-2006 Bjorn Roche + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files + * (the "Software"), to deal in the Software without restriction, + * including without limitation the rights to use, copy, modify, merge, + * publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * The text above constitutes the entire PortAudio license; however, + * the PortAudio community also makes the following non-binding requests: + * + * Any person wishing to distribute modifications to the Software is + * requested to send the modifications to the original developer so that + * they can be incorporated into the canonical version. It is also + * requested that these non-binding requests be included along with the + * license above. + */ + +/** @file + * @ingroup public_header + * @brief CoreAudio-specific PortAudio API extension header file. + */ + +#include "portaudio.h" + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + + +/** + * A pointer to a paMacCoreStreamInfo may be passed as + * the hostApiSpecificStreamInfo in the PaStreamParameters struct + * when opening a stream or querying the format. Use NULL, for the + * defaults. Note that for duplex streams, flags for input and output + * should be the same or behaviour is undefined. + */ +typedef struct +{ + unsigned long size; /**size of whole structure including this header */ + PaHostApiTypeId hostApiType; /**host API for which this data is intended */ + unsigned long version; /**structure version */ + unsigned long flags; /** flags to modify behaviour */ + SInt32 const * channelMap; /** Channel map for HAL channel mapping , if not needed, use NULL;*/ + unsigned long channelMapSize; /** Channel map size for HAL channel mapping , if not needed, use 0;*/ +} PaMacCoreStreamInfo; + +/** + * Functions + */ + + +/** Use this function to initialize a paMacCoreStreamInfo struct + * using the requested flags. Note that channel mapping is turned + * off after a call to this function. + * @param data The datastructure to initialize + * @param flags The flags to initialize the datastructure with. +*/ +void PaMacCore_SetupStreamInfo( PaMacCoreStreamInfo *data, unsigned long flags ); + +/** call this after pa_SetupMacCoreStreamInfo to use channel mapping as described in notes.txt. + * @param data The stream info structure to assign a channel mapping to + * @param channelMap The channel map array, as described in notes.txt. This array pointer will be used directly (ie the underlying data will not be copied), so the caller should not free the array until after the stream has been opened. + * @param channelMapSize The size of the channel map array. + */ +void PaMacCore_SetupChannelMap( PaMacCoreStreamInfo *data, const SInt32 * const channelMap, unsigned long channelMapSize ); + +/** + * Retrieve the AudioDeviceID of the input device assigned to an open stream + * + * @param s The stream to query. + * + * @return A valid AudioDeviceID, or NULL if an error occurred. + */ +AudioDeviceID PaMacCore_GetStreamInputDevice( PaStream* s ); + +/** + * Retrieve the AudioDeviceID of the output device assigned to an open stream + * + * @param s The stream to query. + * + * @return A valid AudioDeviceID, or NULL if an error occurred. + */ +AudioDeviceID PaMacCore_GetStreamOutputDevice( PaStream* s ); + +/** + * Returns a statically allocated string with the device's name + * for the given channel. NULL will be returned on failure. + * + * This function's implementation is not complete! + * + * @param device The PortAudio device index. + * @param channel The channel number who's name is requested. + * @return a statically allocated string with the name of the device. + * Because this string is statically allocated, it must be + * copied if it is to be saved and used by the user after + * another call to this function. + * + */ +const char *PaMacCore_GetChannelName( int device, int channelIndex, bool input ); + + +/** Retrieve the range of legal native buffer sizes for the specified device, in sample frames. + + @param device The global index of the PortAudio device about which the query is being made. + @param minBufferSizeFrames A pointer to the location which will receive the minimum buffer size value. + @param maxBufferSizeFrames A pointer to the location which will receive the maximum buffer size value. + + @see kAudioDevicePropertyBufferFrameSizeRange in the CoreAudio SDK. + */ +PaError PaMacCore_GetBufferSizeRange( PaDeviceIndex device, + long *minBufferSizeFrames, long *maxBufferSizeFrames ); + + +/** + * Flags + */ + +/** + * The following flags alter the behaviour of PA on the mac platform. + * they can be ORed together. These should work both for opening and + * checking a device. + */ + +/** Allows PortAudio to change things like the device's frame size, + * which allows for much lower latency, but might disrupt the device + * if other programs are using it, even when you are just Querying + * the device. */ +#define paMacCoreChangeDeviceParameters (0x01) + +/** In combination with the above flag, + * causes the stream opening to fail, unless the exact sample rates + * are supported by the device. */ +#define paMacCoreFailIfConversionRequired (0x02) + +/** These flags set the SR conversion quality, if required. The weird ordering + * allows Maximum Quality to be the default.*/ +#define paMacCoreConversionQualityMin (0x0100) +#define paMacCoreConversionQualityMedium (0x0200) +#define paMacCoreConversionQualityLow (0x0300) +#define paMacCoreConversionQualityHigh (0x0400) +#define paMacCoreConversionQualityMax (0x0000) + +/** + * Here are some "preset" combinations of flags (above) to get to some + * common configurations. THIS IS OVERKILL, but if more flags are added + * it won't be. + */ + +/**This is the default setting: do as much sample rate conversion as possible + * and as little mucking with the device as possible. */ +#define paMacCorePlayNice (0x00) +/**This setting is tuned for pro audio apps. It allows SR conversion on input + and output, but it tries to set the appropriate SR on the device.*/ +#define paMacCorePro (0x01) +/**This is a setting to minimize CPU usage and still play nice.*/ +#define paMacCoreMinimizeCPUButPlayNice (0x0100) +/**This is a setting to minimize CPU usage, even if that means interrupting the device. */ +#define paMacCoreMinimizeCPU (0x0101) + + +#ifdef __cplusplus +} +#endif /** __cplusplus */ + +#endif /** PA_MAC_CORE_H */ diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/portaudio/include/pa_win_ds.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/portaudio/include/pa_win_ds.h new file mode 100644 index 0000000..8081abd --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/portaudio/include/pa_win_ds.h @@ -0,0 +1,95 @@ +#ifndef PA_WIN_DS_H +#define PA_WIN_DS_H +/* + * $Id: $ + * PortAudio Portable Real-Time Audio Library + * DirectSound specific extensions + * + * Copyright (c) 1999-2007 Ross Bencina and Phil Burk + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files + * (the "Software"), to deal in the Software without restriction, + * including without limitation the rights to use, copy, modify, merge, + * publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * The text above constitutes the entire PortAudio license; however, + * the PortAudio community also makes the following non-binding requests: + * + * Any person wishing to distribute modifications to the Software is + * requested to send the modifications to the original developer so that + * they can be incorporated into the canonical version. It is also + * requested that these non-binding requests be included along with the + * license above. + */ + +/** @file + @ingroup public_header + @brief DirectSound-specific PortAudio API extension header file. +*/ + +#include "portaudio.h" +#include "pa_win_waveformat.h" + +#ifdef __cplusplus +extern "C" +{ +#endif /* __cplusplus */ + + +#define paWinDirectSoundUseLowLevelLatencyParameters (0x01) +#define paWinDirectSoundUseChannelMask (0x04) + + +typedef struct PaWinDirectSoundStreamInfo{ + unsigned long size; /**< sizeof(PaWinDirectSoundStreamInfo) */ + PaHostApiTypeId hostApiType; /**< paDirectSound */ + unsigned long version; /**< 2 */ + + unsigned long flags; /**< enable other features of this struct */ + + /** + low-level latency setting support + Sets the size of the DirectSound host buffer. + When flags contains the paWinDirectSoundUseLowLevelLatencyParameters + this size will be used instead of interpreting the generic latency + parameters to Pa_OpenStream(). If the flag is not set this value is ignored. + + If the stream is a full duplex stream the implementation requires that + the values of framesPerBuffer for input and output match (if both are specified). + */ + unsigned long framesPerBuffer; + + /** + support for WAVEFORMATEXTENSIBLE channel masks. If flags contains + paWinDirectSoundUseChannelMask this allows you to specify which speakers + to address in a multichannel stream. Constants for channelMask + are specified in pa_win_waveformat.h + + */ + PaWinWaveFormatChannelMask channelMask; + +}PaWinDirectSoundStreamInfo; + + + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* PA_WIN_DS_H */ diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/portaudio/include/pa_win_wasapi.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/portaudio/include/pa_win_wasapi.h new file mode 100644 index 0000000..c046afd --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/portaudio/include/pa_win_wasapi.h @@ -0,0 +1,729 @@ +#ifndef PA_WIN_WASAPI_H +#define PA_WIN_WASAPI_H +/* + * $Id: $ + * PortAudio Portable Real-Time Audio Library + * WASAPI specific extensions + * + * Copyright (c) 1999-2018 Ross Bencina and Phil Burk + * Copyright (c) 2006-2010 David Viens + * Copyright (c) 2010-2018 Dmitry Kostjuchenko + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files + * (the "Software"), to deal in the Software without restriction, + * including without limitation the rights to use, copy, modify, merge, + * publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * The text above constitutes the entire PortAudio license; however, + * the PortAudio community also makes the following non-binding requests: + * + * Any person wishing to distribute modifications to the Software is + * requested to send the modifications to the original developer so that + * they can be incorporated into the canonical version. It is also + * requested that these non-binding requests be included along with the + * license above. + */ + +/** @file + @ingroup public_header + @brief WASAPI-specific PortAudio API extension header file. +*/ + +#include "portaudio.h" +#include "pa_win_waveformat.h" + +#ifdef __cplusplus +extern "C" +{ +#endif /* __cplusplus */ + + +/* Stream setup flags. */ +typedef enum PaWasapiFlags +{ + /* put WASAPI into exclusive mode */ + paWinWasapiExclusive = (1 << 0), + + /* allow to skip internal PA processing completely */ + paWinWasapiRedirectHostProcessor = (1 << 1), + + /* assign custom channel mask */ + paWinWasapiUseChannelMask = (1 << 2), + + /* select non-Event driven method of data read/write + Note: WASAPI Event driven core is capable of 2ms latency!!!, but Polling + method can only provide 15-20ms latency. */ + paWinWasapiPolling = (1 << 3), + + /* force custom thread priority setting, must be used if PaWasapiStreamInfo::threadPriority + is set to a custom value */ + paWinWasapiThreadPriority = (1 << 4), + + /* force explicit sample format and do not allow PA to select suitable working format, API will + fail if provided sample format is not supported by audio hardware in Exclusive mode + or system mixer in Shared mode */ + paWinWasapiExplicitSampleFormat = (1 << 5), + + /* allow API to insert system-level channel matrix mixer and sample rate converter to allow + playback formats that do not match the current configured system settings. + this is in particular required for streams not matching the system mixer sample rate. + only applies in Shared mode. */ + paWinWasapiAutoConvert = (1 << 6) +} +PaWasapiFlags; +#define paWinWasapiExclusive (paWinWasapiExclusive) +#define paWinWasapiRedirectHostProcessor (paWinWasapiRedirectHostProcessor) +#define paWinWasapiUseChannelMask (paWinWasapiUseChannelMask) +#define paWinWasapiPolling (paWinWasapiPolling) +#define paWinWasapiThreadPriority (paWinWasapiThreadPriority) +#define paWinWasapiExplicitSampleFormat (paWinWasapiExplicitSampleFormat) +#define paWinWasapiAutoConvert (paWinWasapiAutoConvert) + + +/* Stream state. + + @note Multiple states can be united into a bitmask. + @see PaWasapiStreamStateCallback, PaWasapi_SetStreamStateHandler +*/ +typedef enum PaWasapiStreamState +{ + /* state change was caused by the error: + + Example: + 1) If thread execution stopped due to AUDCLNT_E_RESOURCES_INVALIDATED then state + value will contain paWasapiStreamStateError|paWasapiStreamStateThreadStop. + */ + paWasapiStreamStateError = (1 << 0), + + /* processing thread is preparing to start execution */ + paWasapiStreamStateThreadPrepare = (1 << 1), + + /* processing thread started execution (enters its loop) */ + paWasapiStreamStateThreadStart = (1 << 2), + + /* processing thread stopped execution */ + paWasapiStreamStateThreadStop = (1 << 3) +} +PaWasapiStreamState; +#define paWasapiStreamStateError (paWasapiStreamStateError) +#define paWasapiStreamStateThreadPrepare (paWasapiStreamStateThreadPrepare) +#define paWasapiStreamStateThreadStart (paWasapiStreamStateThreadStart) +#define paWasapiStreamStateThreadStop (paWasapiStreamStateThreadStop) + + +/* Host processor. + + Allows to skip internal PA processing completely. paWinWasapiRedirectHostProcessor flag + must be set to the PaWasapiStreamInfo::flags member in order to have host processor + redirected to this callback. + + Use with caution! inputFrames and outputFrames depend solely on final device setup. + To query max values of inputFrames/outputFrames use PaWasapi_GetFramesPerHostBuffer. +*/ +typedef void (*PaWasapiHostProcessorCallback) (void *inputBuffer, long inputFrames, + void *outputBuffer, long outputFrames, void *userData); + + +/* Stream state handler. + + @param pStream Pointer to PaStream object. + @param stateFlags State flags, a collection of values from PaWasapiStreamState enum. + @param errorId Error id provided by system API (HRESULT). + @param userData Pointer to user data. + + @see PaWasapiStreamState +*/ +typedef void (*PaWasapiStreamStateCallback) (PaStream *pStream, unsigned int stateFlags, + unsigned int errorId, void *pUserData); + + +/* Device role. */ +typedef enum PaWasapiDeviceRole +{ + eRoleRemoteNetworkDevice = 0, + eRoleSpeakers, + eRoleLineLevel, + eRoleHeadphones, + eRoleMicrophone, + eRoleHeadset, + eRoleHandset, + eRoleUnknownDigitalPassthrough, + eRoleSPDIF, + eRoleHDMI, + eRoleUnknownFormFactor +} +PaWasapiDeviceRole; + + +/* Jack connection type. */ +typedef enum PaWasapiJackConnectionType +{ + eJackConnTypeUnknown, + eJackConnType3Point5mm, + eJackConnTypeQuarter, + eJackConnTypeAtapiInternal, + eJackConnTypeRCA, + eJackConnTypeOptical, + eJackConnTypeOtherDigital, + eJackConnTypeOtherAnalog, + eJackConnTypeMultichannelAnalogDIN, + eJackConnTypeXlrProfessional, + eJackConnTypeRJ11Modem, + eJackConnTypeCombination +} +PaWasapiJackConnectionType; + + +/* Jack geometric location. */ +typedef enum PaWasapiJackGeoLocation +{ + eJackGeoLocUnk = 0, + eJackGeoLocRear = 0x1, /* matches EPcxGeoLocation::eGeoLocRear */ + eJackGeoLocFront, + eJackGeoLocLeft, + eJackGeoLocRight, + eJackGeoLocTop, + eJackGeoLocBottom, + eJackGeoLocRearPanel, + eJackGeoLocRiser, + eJackGeoLocInsideMobileLid, + eJackGeoLocDrivebay, + eJackGeoLocHDMI, + eJackGeoLocOutsideMobileLid, + eJackGeoLocATAPI, + eJackGeoLocReserved5, + eJackGeoLocReserved6, +} +PaWasapiJackGeoLocation; + + +/* Jack general location. */ +typedef enum PaWasapiJackGenLocation +{ + eJackGenLocPrimaryBox = 0, + eJackGenLocInternal, + eJackGenLocSeparate, + eJackGenLocOther +} +PaWasapiJackGenLocation; + + +/* Jack's type of port. */ +typedef enum PaWasapiJackPortConnection +{ + eJackPortConnJack = 0, + eJackPortConnIntegratedDevice, + eJackPortConnBothIntegratedAndJack, + eJackPortConnUnknown +} +PaWasapiJackPortConnection; + + +/* Thread priority. */ +typedef enum PaWasapiThreadPriority +{ + eThreadPriorityNone = 0, + eThreadPriorityAudio, //!< Default for Shared mode. + eThreadPriorityCapture, + eThreadPriorityDistribution, + eThreadPriorityGames, + eThreadPriorityPlayback, + eThreadPriorityProAudio, //!< Default for Exclusive mode. + eThreadPriorityWindowManager +} +PaWasapiThreadPriority; + + +/* Stream descriptor. */ +typedef struct PaWasapiJackDescription +{ + unsigned long channelMapping; + unsigned long color; /* derived from macro: #define RGB(r,g,b) ((COLORREF)(((BYTE)(r)|((WORD)((BYTE)(g))<<8))|(((DWORD)(BYTE)(b))<<16))) */ + PaWasapiJackConnectionType connectionType; + PaWasapiJackGeoLocation geoLocation; + PaWasapiJackGenLocation genLocation; + PaWasapiJackPortConnection portConnection; + unsigned int isConnected; +} +PaWasapiJackDescription; + + +/** Stream category. + Note: + - values are equal to WASAPI AUDIO_STREAM_CATEGORY enum + - supported since Windows 8.0, noop on earlier versions + - values 1,2 are deprecated on Windows 10 and not included into enumeration + + @version Available as of 19.6.0 +*/ +typedef enum PaWasapiStreamCategory +{ + eAudioCategoryOther = 0, + eAudioCategoryCommunications = 3, + eAudioCategoryAlerts = 4, + eAudioCategorySoundEffects = 5, + eAudioCategoryGameEffects = 6, + eAudioCategoryGameMedia = 7, + eAudioCategoryGameChat = 8, + eAudioCategorySpeech = 9, + eAudioCategoryMovie = 10, + eAudioCategoryMedia = 11 +} +PaWasapiStreamCategory; + + +/** Stream option. + Note: + - values are equal to WASAPI AUDCLNT_STREAMOPTIONS enum + - supported since Windows 8.1, noop on earlier versions + + @version Available as of 19.6.0 +*/ +typedef enum PaWasapiStreamOption +{ + eStreamOptionNone = 0, //!< default + eStreamOptionRaw = 1, //!< bypass WASAPI Audio Engine DSP effects, supported since Windows 8.1 + eStreamOptionMatchFormat = 2 //!< force WASAPI Audio Engine into a stream format, supported since Windows 10 +} +PaWasapiStreamOption; + + +/* Stream descriptor. */ +typedef struct PaWasapiStreamInfo +{ + unsigned long size; /**< sizeof(PaWasapiStreamInfo) */ + PaHostApiTypeId hostApiType; /**< paWASAPI */ + unsigned long version; /**< 1 */ + + unsigned long flags; /**< collection of PaWasapiFlags */ + + /** Support for WAVEFORMATEXTENSIBLE channel masks. If flags contains + paWinWasapiUseChannelMask this allows you to specify which speakers + to address in a multichannel stream. Constants for channelMask + are specified in pa_win_waveformat.h. Will be used only if + paWinWasapiUseChannelMask flag is specified. + */ + PaWinWaveFormatChannelMask channelMask; + + /** Delivers raw data to callback obtained from GetBuffer() methods skipping + internal PortAudio processing inventory completely. userData parameter will + be the same that was passed to Pa_OpenStream method. Will be used only if + paWinWasapiRedirectHostProcessor flag is specified. + */ + PaWasapiHostProcessorCallback hostProcessorOutput; + PaWasapiHostProcessorCallback hostProcessorInput; + + /** Specifies thread priority explicitly. Will be used only if paWinWasapiThreadPriority flag + is specified. + + Please note, if Input/Output streams are opened simultaneously (Full-Duplex mode) + you shall specify same value for threadPriority or othervise one of the values will be used + to setup thread priority. + */ + PaWasapiThreadPriority threadPriority; + + /** Stream category. + @see PaWasapiStreamCategory + @version Available as of 19.6.0 + */ + PaWasapiStreamCategory streamCategory; + + /** Stream option. + @see PaWasapiStreamOption + @version Available as of 19.6.0 + */ + PaWasapiStreamOption streamOption; +} +PaWasapiStreamInfo; + + +/** Returns pointer to WASAPI's IAudioClient object of the stream. + + @param pStream Pointer to PaStream object. + @param pAudioClient Pointer to pointer of IAudioClient. + @param bOutput TRUE (1) for output stream, FALSE (0) for input stream. + + @return Error code indicating success or failure. +*/ +PaError PaWasapi_GetAudioClient( PaStream *pStream, void **pAudioClient, int bOutput ); + + +/** Update device list. + + This function is available if PA_WASAPI_MAX_CONST_DEVICE_COUNT is defined during compile time + with maximum constant WASAPI device count (recommended value - 32). + If PA_WASAPI_MAX_CONST_DEVICE_COUNT is set to 0 (or not defined) during compile time the implementation + will not define PaWasapi_UpdateDeviceList() and thus updating device list can only be possible by calling + Pa_Terminate() and then Pa_Initialize(). + + @return Error code indicating success or failure. +*/ +PaError PaWasapi_UpdateDeviceList(); + + +/** Get current audio format of the device assigned to the opened stream. + + Format is represented by PaWinWaveFormat or WAVEFORMATEXTENSIBLE structure. + Use this function to reconfirm format if PA's processor is overridden and + paWinWasapiRedirectHostProcessor flag is specified. + + @param pStream Pointer to PaStream object. + @param pFormat Pointer to PaWinWaveFormat or WAVEFORMATEXTENSIBLE structure. + @param formatSize Size of PaWinWaveFormat or WAVEFORMATEXTENSIBLE structure in bytes. + @param bOutput TRUE (1) for output stream, FALSE (0) for input stream. + + @return Non-negative value indicating the number of bytes copied into format descriptor + or, a PaErrorCode (which is always negative) if PortAudio is not initialized + or an error is encountered. +*/ +int PaWasapi_GetDeviceCurrentFormat( PaStream *pStream, void *pFormat, unsigned int formatSize, int bOutput ); + + +/** Get default audio format for the device in Shared Mode. + + Format is represented by PaWinWaveFormat or WAVEFORMATEXTENSIBLE structure and obtained + by getting the device property with a PKEY_AudioEngine_DeviceFormat key. + + @param pFormat Pointer to PaWinWaveFormat or WAVEFORMATEXTENSIBLE structure. + @param formatSize Size of PaWinWaveFormat or WAVEFORMATEXTENSIBLE structure in bytes. + @param device Device index. + + @return Non-negative value indicating the number of bytes copied into format descriptor + or, a PaErrorCode (which is always negative) if PortAudio is not initialized + or an error is encountered. +*/ +int PaWasapi_GetDeviceDefaultFormat( void *pFormat, unsigned int formatSize, PaDeviceIndex device ); + + +/** Get mix audio format for the device in Shared Mode. + + Format is represented by PaWinWaveFormat or WAVEFORMATEXTENSIBLE structureand obtained by + IAudioClient::GetMixFormat. + + @param pFormat Pointer to PaWinWaveFormat or WAVEFORMATEXTENSIBLE structure. + @param formatSize Size of PaWinWaveFormat or WAVEFORMATEXTENSIBLE structure in bytes. + @param device Device index. + + @return Non-negative value indicating the number of bytes copied into format descriptor + or, a PaErrorCode (which is always negative) if PortAudio is not initialized + or an error is encountered. +*/ +int PaWasapi_GetDeviceMixFormat( void *pFormat, unsigned int formatSize, PaDeviceIndex device ); + + +/** Get device role (PaWasapiDeviceRole enum). + + @param device Device index. + + @return Non-negative value indicating device role or, a PaErrorCode (which is always negative) + if PortAudio is not initialized or an error is encountered. +*/ +int/*PaWasapiDeviceRole*/ PaWasapi_GetDeviceRole( PaDeviceIndex device ); + + +/** Get device IMMDevice pointer + + @param device Device index. + @param pAudioClient Pointer to pointer of IMMDevice. + + @return Error code indicating success or failure. +*/ +PaError PaWasapi_GetIMMDevice( PaDeviceIndex device, void **pIMMDevice ); + + +/** Boost thread priority of calling thread (MMCSS). + + Use it for Blocking Interface only inside the thread which makes calls to Pa_WriteStream/Pa_ReadStream. + + @param pTask Handle to pointer to priority task. Must be used with PaWasapi_RevertThreadPriority + method to revert thread priority to initial state. + + @param priorityClass Id of thread priority of PaWasapiThreadPriority type. Specifying + eThreadPriorityNone does nothing. + + @return Error code indicating success or failure. + @see PaWasapi_RevertThreadPriority +*/ +PaError PaWasapi_ThreadPriorityBoost( void **pTask, PaWasapiThreadPriority priorityClass ); + + +/** Boost thread priority of calling thread (MMCSS). + + Use it for Blocking Interface only inside the thread which makes calls to Pa_WriteStream/Pa_ReadStream. + + @param pTask Task handle obtained by PaWasapi_BoostThreadPriority method. + + @return Error code indicating success or failure. + @see PaWasapi_BoostThreadPriority +*/ +PaError PaWasapi_ThreadPriorityRevert( void *pTask ); + + +/** Get number of frames per host buffer. + + It is max value of frames of WASAPI buffer which can be locked for operations. + Use this method as helper to find out max values of inputFrames/outputFrames + of PaWasapiHostProcessorCallback. + + @param pStream Pointer to PaStream object. + @param pInput Pointer to variable to receive number of input frames. Can be NULL. + @param pOutput Pointer to variable to receive number of output frames. Can be NULL. + + @return Error code indicating success or failure. + @see PaWasapiHostProcessorCallback +*/ +PaError PaWasapi_GetFramesPerHostBuffer( PaStream *pStream, unsigned int *pInput, unsigned int *pOutput ); + + +/** Get number of jacks associated with a WASAPI device. + + Use this method to determine if there are any jacks associated with the provided WASAPI device. + Not all audio devices will support this capability. This is valid for both input and output devices. + + @note Not available on UWP platform. + + @param device Device index. + @param pJackCount Pointer to variable to receive number of jacks. + + @return Error code indicating success or failure. + @see PaWasapi_GetJackDescription + */ +PaError PaWasapi_GetJackCount( PaDeviceIndex device, int *pJackCount ); + + +/** Get the jack description associated with a WASAPI device and jack number. + + Before this function is called, use PaWasapi_GetJackCount to determine the + number of jacks associated with device. If jcount is greater than zero, then + each jack from 0 to jcount can be queried with this function to get the jack + description. + + @note Not available on UWP platform. + + @param device Device index. + @param jackIndex Jack index. + @param pJackDescription Pointer to PaWasapiJackDescription. + + @return Error code indicating success or failure. + @see PaWasapi_GetJackCount + */ +PaError PaWasapi_GetJackDescription( PaDeviceIndex device, int jackIndex, PaWasapiJackDescription *pJackDescription ); + + +/** Set stream state handler. + + @param pStream Pointer to PaStream object. + @param fnStateHandler Pointer to state handling function. + @param pUserData Pointer to user data. + + @return Error code indicating success or failure. +*/ +PaError PaWasapi_SetStreamStateHandler( PaStream *pStream, PaWasapiStreamStateCallback fnStateHandler, void *pUserData ); + + +/** Set default device Id. + + By default implementation will use the DEVINTERFACE_AUDIO_RENDER and + DEVINTERFACE_AUDIO_CAPTURE Ids if device Id is not provided explicitly. These default Ids + will not allow to use Exclusive mode on UWP/WinRT platform and thus you must provide + device Id explicitly via this API before calling the Pa_OpenStream(). + + Device Ids on UWP platform are obtainable via: + Windows::Media::Devices::MediaDevice::GetDefaultAudioRenderId() or + Windows::Media::Devices::MediaDevice::GetDefaultAudioCaptureId() API. + + After the call completes, memory referenced by pointers can be freed, as implementation keeps its own copy. + + Call this function before calling Pa_IsFormatSupported() when Exclusive mode is requested. + + See an example in the IMPORTANT notes. + + @note UWP/WinRT platform only. + + @param pId Device Id, pointer to the 16-bit Unicode string (WCHAR). If NULL then device Id + will be reset to the default, e.g. DEVINTERFACE_AUDIO_RENDER or DEVINTERFACE_AUDIO_CAPTURE. + @param bOutput TRUE (1) for output (render), FALSE (0) for input (capture). + + @return Error code indicating success or failure. Will return paIncompatibleStreamHostApi if library is not compiled + for UWP/WinRT platform. If Id is longer than PA_WASAPI_DEVICE_ID_LEN characters paBufferTooBig will + be returned. +*/ +PaError PaWasapiWinrt_SetDefaultDeviceId( const unsigned short *pId, int bOutput ); + + +/** Populate the device list. + + By default the implementation will rely on DEVINTERFACE_AUDIO_RENDER and DEVINTERFACE_AUDIO_CAPTURE as + default devices. If device Id is provided by PaWasapiWinrt_SetDefaultDeviceId() then those + device Ids will be used as default and only devices for the device list. + + By populating the device list you can provide an additional available audio devices of the system to PA + which are obtainable by: + Windows::Devices::Enumeration::DeviceInformation::FindAllAsync(selector) where selector is obtainable by + Windows::Media::Devices::MediaDevice::GetAudioRenderSelector() or + Windows::Media::Devices::MediaDevice::GetAudioCaptureSelector() API. + + After the call completes, memory referenced by pointers can be freed, as implementation keeps its own copy. + + You must call PaWasapi_UpdateDeviceList() to update the internal device list of the implementation after + calling this function. + + See an example in the IMPORTANT notes. + + @note UWP/WinRT platform only. + + @param pId Array of device Ids, pointer to the array of pointers of 16-bit Unicode string (WCHAR). If NULL + and count is also 0 then device Ids will be reset to the default. Required. + @param pName Array of device Names, pointer to the array of pointers of 16-bit Unicode string (WCHAR). Optional. + @param pRole Array of device Roles, see PaWasapiDeviceRole and PaWasapi_GetDeviceRole() for more details. Optional. + @param count Number of devices, the number of array elements (pId, pName, pRole). Maximum count of devices + is limited by PA_WASAPI_DEVICE_MAX_COUNT. + @param bOutput TRUE (1) for output (render), FALSE (0) for input (capture). + + @return Error code indicating success or failure. Will return paIncompatibleStreamHostApi if library is not compiled + for UWP/WinRT platform. If Id is longer than PA_WASAPI_DEVICE_ID_LEN characters paBufferTooBig will + be returned. If Name is longer than PA_WASAPI_DEVICE_NAME_LEN characters paBufferTooBig will + be returned. +*/ +PaError PaWasapiWinrt_PopulateDeviceList( const unsigned short **pId, const unsigned short **pName, + const PaWasapiDeviceRole *pRole, unsigned int count, int bOutput ); + + +/* + IMPORTANT: + + WASAPI is implemented for Callback and Blocking interfaces. It supports Shared and Exclusive + share modes. + + Exclusive Mode: + + Exclusive mode allows to deliver audio data directly to hardware bypassing + software mixing. + Exclusive mode is specified by 'paWinWasapiExclusive' flag. + + Callback Interface: + + Provides best audio quality with low latency. Callback interface is implemented in + two versions: + + 1) Event-Driven: + This is the most powerful WASAPI implementation which provides glitch-free + audio at around 3ms latency in Exclusive mode. Lowest possible latency for this mode is + 3 ms for HD Audio class audio chips. For the Shared mode latency can not be + lower than 20 ms. + + 2) Poll-Driven: + Polling is another 2-nd method to operate with WASAPI. It is less efficient than Event-Driven + and provides latency at around 10-13ms. Polling must be used to overcome a system bug + under Windows Vista x64 when application is WOW64(32-bit) and Event-Driven method simply + times out (event handle is never signalled on buffer completion). Please note, such WOW64 bug + does not exist in Vista x86 or Windows 7. + Polling can be setup by specifying 'paWinWasapiPolling' flag. Our WASAPI implementation detects + WOW64 bug and sets 'paWinWasapiPolling' automatically. + + Thread priority: + + Normally thread priority is set automatically and does not require modification. Although + if user wants some tweaking thread priority can be modified by setting 'paWinWasapiThreadPriority' + flag and specifying 'PaWasapiStreamInfo::threadPriority' with value from PaWasapiThreadPriority + enum. + + Blocking Interface: + + Blocking interface is implemented but due to above described Poll-Driven method can not + deliver lowest possible latency. Specifying too low latency in Shared mode will result in + distorted audio although Exclusive mode adds stability. + + 8.24 format: + + If paCustomFormat is specified as sample format then the implementation will understand it + as valid 24-bits inside 32-bit container (e.g. wBitsPerSample = 32, Samples.wValidBitsPerSample = 24). + + By using paCustomFormat there will be small optimization when samples are be copied + with Copy_24_To_24 by PA processor instead of conversion from packed 3-byte (24-bit) data + with Int24_To_Int32. + + Pa_IsFormatSupported: + + To check format with correct Share Mode (Exclusive/Shared) you must supply PaWasapiStreamInfo + with flags paWinWasapiExclusive set through member of PaStreamParameters::hostApiSpecificStreamInfo + structure. + + If paWinWasapiExplicitSampleFormat flag is provided then implementation will not try to select + suitable close format and will return an error instead of paFormatIsSupported. By specifying + paWinWasapiExplicitSampleFormat flag it is possible to find out what sample formats are + supported by Exclusive or Shared modes. + + Pa_OpenStream: + + To set desired Share Mode (Exclusive/Shared) you must supply + PaWasapiStreamInfo with flags paWinWasapiExclusive set through member of + PaStreamParameters::hostApiSpecificStreamInfo structure. + + Coding style for parameters and structure members of the public API: + + 1) bXXX - boolean, [1 (TRUE), 0 (FALSE)] + 2) pXXX - pointer + 3) fnXXX - pointer to function + 4) structure members are never prefixed with a type distinguisher + + + UWP/WinRT: + + This platform has number of limitations which do not allow to enumerate audio devices without + an additional external help. Enumeration is possible though from C++/CX, check the related API + Windows::Devices::Enumeration::DeviceInformation::FindAllAsync(). + + The main limitation is an absence of the device enumeration from inside the PA's implementation. + This problem can be solved by using the following functions: + + PaWasapiWinrt_SetDefaultDeviceId() - to set default input/output device, + PaWasapiWinrt_PopulateDeviceList() - to populate device list with devices. + + Here is an example of populating the device list which can also be updated dynamically depending on + whether device was removed from or added to the system: + + ---------------- + + std::vector ids, names; + std::vector role; + + ids.resize(count); + names.resize(count); + role.resize(count); + + for (UINT32 i = 0; i < count; ++i) + { + ids[i] = (const UINT16 *)device_ids[i].c_str(); + names[i] = (const UINT16 *)device_names[i].c_str(); + role[i] = eRoleUnknownFormFactor; + } + + PaWasapiWinrt_SetDefaultDeviceId((const UINT16 *)default_device_id.c_str(), !capture); + PaWasapiWinrt_PopulateDeviceList(ids.data(), names.data(), role.data(), count, !capture); + PaWasapi_UpdateDeviceList(); + + ---------------- +*/ + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* PA_WIN_WASAPI_H */ diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/portaudio/include/pa_win_waveformat.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/portaudio/include/pa_win_waveformat.h new file mode 100644 index 0000000..251562d --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/portaudio/include/pa_win_waveformat.h @@ -0,0 +1,199 @@ +#ifndef PA_WIN_WAVEFORMAT_H +#define PA_WIN_WAVEFORMAT_H + +/* + * PortAudio Portable Real-Time Audio Library + * Windows WAVEFORMAT* data structure utilities + * portaudio.h should be included before this file. + * + * Copyright (c) 2007 Ross Bencina + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files + * (the "Software"), to deal in the Software without restriction, + * including without limitation the rights to use, copy, modify, merge, + * publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * The text above constitutes the entire PortAudio license; however, + * the PortAudio community also makes the following non-binding requests: + * + * Any person wishing to distribute modifications to the Software is + * requested to send the modifications to the original developer so that + * they can be incorporated into the canonical version. It is also + * requested that these non-binding requests be included along with the + * license above. + */ + +/** @file + @ingroup public_header + @brief Windows specific PortAudio API extension and utilities header file. +*/ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + The following #defines for speaker channel masks are the same + as those in ksmedia.h, except with PAWIN_ prepended, KSAUDIO_ removed + in some cases, and casts to PaWinWaveFormatChannelMask added. +*/ + +typedef unsigned long PaWinWaveFormatChannelMask; + +/* Speaker Positions: */ +#define PAWIN_SPEAKER_FRONT_LEFT ((PaWinWaveFormatChannelMask)0x1) +#define PAWIN_SPEAKER_FRONT_RIGHT ((PaWinWaveFormatChannelMask)0x2) +#define PAWIN_SPEAKER_FRONT_CENTER ((PaWinWaveFormatChannelMask)0x4) +#define PAWIN_SPEAKER_LOW_FREQUENCY ((PaWinWaveFormatChannelMask)0x8) +#define PAWIN_SPEAKER_BACK_LEFT ((PaWinWaveFormatChannelMask)0x10) +#define PAWIN_SPEAKER_BACK_RIGHT ((PaWinWaveFormatChannelMask)0x20) +#define PAWIN_SPEAKER_FRONT_LEFT_OF_CENTER ((PaWinWaveFormatChannelMask)0x40) +#define PAWIN_SPEAKER_FRONT_RIGHT_OF_CENTER ((PaWinWaveFormatChannelMask)0x80) +#define PAWIN_SPEAKER_BACK_CENTER ((PaWinWaveFormatChannelMask)0x100) +#define PAWIN_SPEAKER_SIDE_LEFT ((PaWinWaveFormatChannelMask)0x200) +#define PAWIN_SPEAKER_SIDE_RIGHT ((PaWinWaveFormatChannelMask)0x400) +#define PAWIN_SPEAKER_TOP_CENTER ((PaWinWaveFormatChannelMask)0x800) +#define PAWIN_SPEAKER_TOP_FRONT_LEFT ((PaWinWaveFormatChannelMask)0x1000) +#define PAWIN_SPEAKER_TOP_FRONT_CENTER ((PaWinWaveFormatChannelMask)0x2000) +#define PAWIN_SPEAKER_TOP_FRONT_RIGHT ((PaWinWaveFormatChannelMask)0x4000) +#define PAWIN_SPEAKER_TOP_BACK_LEFT ((PaWinWaveFormatChannelMask)0x8000) +#define PAWIN_SPEAKER_TOP_BACK_CENTER ((PaWinWaveFormatChannelMask)0x10000) +#define PAWIN_SPEAKER_TOP_BACK_RIGHT ((PaWinWaveFormatChannelMask)0x20000) + +/* Bit mask locations reserved for future use */ +#define PAWIN_SPEAKER_RESERVED ((PaWinWaveFormatChannelMask)0x7FFC0000) + +/* Used to specify that any possible permutation of speaker configurations */ +#define PAWIN_SPEAKER_ALL ((PaWinWaveFormatChannelMask)0x80000000) + +/* DirectSound Speaker Config */ +#define PAWIN_SPEAKER_DIRECTOUT 0 +#define PAWIN_SPEAKER_MONO (PAWIN_SPEAKER_FRONT_CENTER) +#define PAWIN_SPEAKER_STEREO (PAWIN_SPEAKER_FRONT_LEFT | PAWIN_SPEAKER_FRONT_RIGHT) +#define PAWIN_SPEAKER_QUAD (PAWIN_SPEAKER_FRONT_LEFT | PAWIN_SPEAKER_FRONT_RIGHT | \ + PAWIN_SPEAKER_BACK_LEFT | PAWIN_SPEAKER_BACK_RIGHT) +#define PAWIN_SPEAKER_SURROUND (PAWIN_SPEAKER_FRONT_LEFT | PAWIN_SPEAKER_FRONT_RIGHT | \ + PAWIN_SPEAKER_FRONT_CENTER | PAWIN_SPEAKER_BACK_CENTER) +#define PAWIN_SPEAKER_5POINT1 (PAWIN_SPEAKER_FRONT_LEFT | PAWIN_SPEAKER_FRONT_RIGHT | \ + PAWIN_SPEAKER_FRONT_CENTER | PAWIN_SPEAKER_LOW_FREQUENCY | \ + PAWIN_SPEAKER_BACK_LEFT | PAWIN_SPEAKER_BACK_RIGHT) +#define PAWIN_SPEAKER_7POINT1 (PAWIN_SPEAKER_FRONT_LEFT | PAWIN_SPEAKER_FRONT_RIGHT | \ + PAWIN_SPEAKER_FRONT_CENTER | PAWIN_SPEAKER_LOW_FREQUENCY | \ + PAWIN_SPEAKER_BACK_LEFT | PAWIN_SPEAKER_BACK_RIGHT | \ + PAWIN_SPEAKER_FRONT_LEFT_OF_CENTER | PAWIN_SPEAKER_FRONT_RIGHT_OF_CENTER) +#define PAWIN_SPEAKER_5POINT1_SURROUND (PAWIN_SPEAKER_FRONT_LEFT | PAWIN_SPEAKER_FRONT_RIGHT | \ + PAWIN_SPEAKER_FRONT_CENTER | PAWIN_SPEAKER_LOW_FREQUENCY | \ + PAWIN_SPEAKER_SIDE_LEFT | PAWIN_SPEAKER_SIDE_RIGHT) +#define PAWIN_SPEAKER_7POINT1_SURROUND (PAWIN_SPEAKER_FRONT_LEFT | PAWIN_SPEAKER_FRONT_RIGHT | \ + PAWIN_SPEAKER_FRONT_CENTER | PAWIN_SPEAKER_LOW_FREQUENCY | \ + PAWIN_SPEAKER_BACK_LEFT | PAWIN_SPEAKER_BACK_RIGHT | \ + PAWIN_SPEAKER_SIDE_LEFT | PAWIN_SPEAKER_SIDE_RIGHT) +/* + According to the Microsoft documentation: + The following are obsolete 5.1 and 7.1 settings (they lack side speakers). Note this means + that the default 5.1 and 7.1 settings (KSAUDIO_SPEAKER_5POINT1 and KSAUDIO_SPEAKER_7POINT1 are + similarly obsolete but are unchanged for compatibility reasons). +*/ +#define PAWIN_SPEAKER_5POINT1_BACK PAWIN_SPEAKER_5POINT1 +#define PAWIN_SPEAKER_7POINT1_WIDE PAWIN_SPEAKER_7POINT1 + +/* DVD Speaker Positions */ +#define PAWIN_SPEAKER_GROUND_FRONT_LEFT PAWIN_SPEAKER_FRONT_LEFT +#define PAWIN_SPEAKER_GROUND_FRONT_CENTER PAWIN_SPEAKER_FRONT_CENTER +#define PAWIN_SPEAKER_GROUND_FRONT_RIGHT PAWIN_SPEAKER_FRONT_RIGHT +#define PAWIN_SPEAKER_GROUND_REAR_LEFT PAWIN_SPEAKER_BACK_LEFT +#define PAWIN_SPEAKER_GROUND_REAR_RIGHT PAWIN_SPEAKER_BACK_RIGHT +#define PAWIN_SPEAKER_TOP_MIDDLE PAWIN_SPEAKER_TOP_CENTER +#define PAWIN_SPEAKER_SUPER_WOOFER PAWIN_SPEAKER_LOW_FREQUENCY + + +/* + PaWinWaveFormat is defined here to provide compatibility with + compilation environments which don't have headers defining + WAVEFORMATEXTENSIBLE (e.g. older versions of MSVC, Borland C++ etc. + + The fields for WAVEFORMATEX and WAVEFORMATEXTENSIBLE are declared as an + unsigned char array here to avoid clients who include this file having + a dependency on windows.h and mmsystem.h, and also to to avoid having + to write separate packing pragmas for each compiler. +*/ +#define PAWIN_SIZEOF_WAVEFORMATEX 18 +#define PAWIN_SIZEOF_WAVEFORMATEXTENSIBLE (PAWIN_SIZEOF_WAVEFORMATEX + 22) + +typedef struct{ + unsigned char fields[ PAWIN_SIZEOF_WAVEFORMATEXTENSIBLE ]; + unsigned long extraLongForAlignment; /* ensure that compiler aligns struct to DWORD */ +} PaWinWaveFormat; + +/* + WAVEFORMATEXTENSIBLE fields: + + union { + WORD wValidBitsPerSample; + WORD wSamplesPerBlock; + WORD wReserved; + } Samples; + DWORD dwChannelMask; + GUID SubFormat; +*/ + +#define PAWIN_INDEXOF_WVALIDBITSPERSAMPLE (PAWIN_SIZEOF_WAVEFORMATEX+0) +#define PAWIN_INDEXOF_DWCHANNELMASK (PAWIN_SIZEOF_WAVEFORMATEX+2) +#define PAWIN_INDEXOF_SUBFORMAT (PAWIN_SIZEOF_WAVEFORMATEX+6) + + +/* + Valid values to pass for the waveFormatTag PaWin_InitializeWaveFormatEx and + PaWin_InitializeWaveFormatExtensible functions below. These must match + the standard Windows WAVE_FORMAT_* values. +*/ +#define PAWIN_WAVE_FORMAT_PCM (1) +#define PAWIN_WAVE_FORMAT_IEEE_FLOAT (3) +#define PAWIN_WAVE_FORMAT_DOLBY_AC3_SPDIF (0x0092) +#define PAWIN_WAVE_FORMAT_WMA_SPDIF (0x0164) + + +/* + returns PAWIN_WAVE_FORMAT_PCM or PAWIN_WAVE_FORMAT_IEEE_FLOAT + depending on the sampleFormat parameter. +*/ +int PaWin_SampleFormatToLinearWaveFormatTag( PaSampleFormat sampleFormat ); + +/* + Use the following two functions to initialize the waveformat structure. +*/ + +void PaWin_InitializeWaveFormatEx( PaWinWaveFormat *waveFormat, + int numChannels, PaSampleFormat sampleFormat, int waveFormatTag, double sampleRate ); + + +void PaWin_InitializeWaveFormatExtensible( PaWinWaveFormat *waveFormat, + int numChannels, PaSampleFormat sampleFormat, int waveFormatTag, double sampleRate, + PaWinWaveFormatChannelMask channelMask ); + + +/* Map a channel count to a speaker channel mask */ +PaWinWaveFormatChannelMask PaWin_DefaultChannelMask( int numChannels ); + + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* PA_WIN_WAVEFORMAT_H */ diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/portaudio/include/pa_win_wdmks.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/portaudio/include/pa_win_wdmks.h new file mode 100644 index 0000000..bc2f689 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/portaudio/include/pa_win_wdmks.h @@ -0,0 +1,137 @@ +#ifndef PA_WIN_WDMKS_H +#define PA_WIN_WDMKS_H +/* + * $Id$ + * PortAudio Portable Real-Time Audio Library + * WDM/KS specific extensions + * + * Copyright (c) 1999-2007 Ross Bencina and Phil Burk + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files + * (the "Software"), to deal in the Software without restriction, + * including without limitation the rights to use, copy, modify, merge, + * publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * The text above constitutes the entire PortAudio license; however, + * the PortAudio community also makes the following non-binding requests: + * + * Any person wishing to distribute modifications to the Software is + * requested to send the modifications to the original developer so that + * they can be incorporated into the canonical version. It is also + * requested that these non-binding requests be included along with the + * license above. + */ + +/** @file + @ingroup public_header + @brief WDM Kernel Streaming-specific PortAudio API extension header file. +*/ + + +#include "portaudio.h" + +#include + +#ifdef __cplusplus +extern "C" +{ +#endif /* __cplusplus */ + + /** Flags to indicate valid fields in PaWinWDMKSInfo. + @see PaWinWDMKSInfo + @version Available as of 19.5.0. + */ + typedef enum PaWinWDMKSFlags + { + /** Makes WDMKS use the supplied latency figures instead of relying on the frame size reported + by the WaveCyclic device. Use at own risk! + */ + paWinWDMKSOverrideFramesize = (1 << 0), + + /** Makes WDMKS (output stream) use the given channelMask instead of the default. + @version Available as of 19.5.0. + */ + paWinWDMKSUseGivenChannelMask = (1 << 1), + + } PaWinWDMKSFlags; + + typedef struct PaWinWDMKSInfo{ + unsigned long size; /**< sizeof(PaWinWDMKSInfo) */ + PaHostApiTypeId hostApiType; /**< paWDMKS */ + unsigned long version; /**< 1 */ + + /** Flags indicate which fields are valid. + @see PaWinWDMKSFlags + @version Available as of 19.5.0. + */ + unsigned long flags; + + /** The number of packets to use for WaveCyclic devices, range is [2, 8]. Set to zero for default value of 2. */ + unsigned noOfPackets; + + /** If paWinWDMKSUseGivenChannelMask bit is set in flags, use this as channelMask instead of default. + @see PaWinWDMKSFlags + @version Available as of 19.5.0. + */ + unsigned channelMask; + } PaWinWDMKSInfo; + + typedef enum PaWDMKSType + { + Type_kNotUsed, + Type_kWaveCyclic, + Type_kWaveRT, + Type_kCnt, + } PaWDMKSType; + + typedef enum PaWDMKSSubType + { + SubType_kUnknown, + SubType_kNotification, + SubType_kPolled, + SubType_kCnt, + } PaWDMKSSubType; + + typedef struct PaWinWDMKSDeviceInfo { + wchar_t filterPath[MAX_PATH]; /**< KS filter path in Unicode! */ + wchar_t topologyPath[MAX_PATH]; /**< Topology filter path in Unicode! */ + PaWDMKSType streamingType; + GUID deviceProductGuid; /**< The product GUID of the device (if supported) */ + } PaWinWDMKSDeviceInfo; + + typedef struct PaWDMKSDirectionSpecificStreamInfo + { + PaDeviceIndex device; + unsigned channels; /**< No of channels the device is opened with */ + unsigned framesPerHostBuffer; /**< No of frames of the device buffer */ + int endpointPinId; /**< Endpoint pin ID (on topology filter if topologyName is not empty) */ + int muxNodeId; /**< Only valid for input */ + PaWDMKSSubType streamingSubType; /**< Not known until device is opened for streaming */ + } PaWDMKSDirectionSpecificStreamInfo; + + typedef struct PaWDMKSSpecificStreamInfo { + PaWDMKSDirectionSpecificStreamInfo input; + PaWDMKSDirectionSpecificStreamInfo output; + } PaWDMKSSpecificStreamInfo; + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* PA_WIN_DS_H */ diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/portaudio/include/pa_win_wmme.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/portaudio/include/pa_win_wmme.h new file mode 100644 index 0000000..814022b --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/portaudio/include/pa_win_wmme.h @@ -0,0 +1,185 @@ +#ifndef PA_WIN_WMME_H +#define PA_WIN_WMME_H +/* + * $Id$ + * PortAudio Portable Real-Time Audio Library + * MME specific extensions + * + * Copyright (c) 1999-2000 Ross Bencina and Phil Burk + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files + * (the "Software"), to deal in the Software without restriction, + * including without limitation the rights to use, copy, modify, merge, + * publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * The text above constitutes the entire PortAudio license; however, + * the PortAudio community also makes the following non-binding requests: + * + * Any person wishing to distribute modifications to the Software is + * requested to send the modifications to the original developer so that + * they can be incorporated into the canonical version. It is also + * requested that these non-binding requests be included along with the + * license above. + */ + +/** @file + @ingroup public_header + @brief WMME-specific PortAudio API extension header file. +*/ + +#include "portaudio.h" +#include "pa_win_waveformat.h" + +#ifdef __cplusplus +extern "C" +{ +#endif /* __cplusplus */ + + +/* The following are flags which can be set in + PaWinMmeStreamInfo's flags field. +*/ + +#define paWinMmeUseLowLevelLatencyParameters (0x01) +#define paWinMmeUseMultipleDevices (0x02) /* use mme specific multiple device feature */ +#define paWinMmeUseChannelMask (0x04) + +/* By default, the mme implementation drops the processing thread's priority + to THREAD_PRIORITY_NORMAL and sleeps the thread if the CPU load exceeds 100% + This flag disables any priority throttling. The processing thread will always + run at THREAD_PRIORITY_TIME_CRITICAL. +*/ +#define paWinMmeDontThrottleOverloadedProcessingThread (0x08) + +/* Flags for non-PCM spdif passthrough. +*/ +#define paWinMmeWaveFormatDolbyAc3Spdif (0x10) +#define paWinMmeWaveFormatWmaSpdif (0x20) + + +typedef struct PaWinMmeDeviceAndChannelCount{ + PaDeviceIndex device; + int channelCount; +}PaWinMmeDeviceAndChannelCount; + + +typedef struct PaWinMmeStreamInfo{ + unsigned long size; /**< sizeof(PaWinMmeStreamInfo) */ + PaHostApiTypeId hostApiType; /**< paMME */ + unsigned long version; /**< 1 */ + + unsigned long flags; + + /* low-level latency setting support + These settings control the number and size of host buffers in order + to set latency. They will be used instead of the generic parameters + to Pa_OpenStream() if flags contains the PaWinMmeUseLowLevelLatencyParameters + flag. + + If PaWinMmeStreamInfo structures with PaWinMmeUseLowLevelLatencyParameters + are supplied for both input and output in a full duplex stream, then the + input and output framesPerBuffer must be the same, or the larger of the + two must be a multiple of the smaller, otherwise a + paIncompatibleHostApiSpecificStreamInfo error will be returned from + Pa_OpenStream(). + */ + unsigned long framesPerBuffer; + unsigned long bufferCount; /* formerly numBuffers */ + + /* multiple devices per direction support + If flags contains the PaWinMmeUseMultipleDevices flag, + this functionality will be used, otherwise the device parameter to + Pa_OpenStream() will be used instead. + If devices are specified here, the corresponding device parameter + to Pa_OpenStream() should be set to paUseHostApiSpecificDeviceSpecification, + otherwise an paInvalidDevice error will result. + The total number of channels across all specified devices + must agree with the corresponding channelCount parameter to + Pa_OpenStream() otherwise a paInvalidChannelCount error will result. + */ + PaWinMmeDeviceAndChannelCount *devices; + unsigned long deviceCount; + + /* + support for WAVEFORMATEXTENSIBLE channel masks. If flags contains + paWinMmeUseChannelMask this allows you to specify which speakers + to address in a multichannel stream. Constants for channelMask + are specified in pa_win_waveformat.h + + */ + PaWinWaveFormatChannelMask channelMask; + +}PaWinMmeStreamInfo; + + +/** Retrieve the number of wave in handles used by a PortAudio WinMME stream. + Returns zero if the stream is output only. + + @return A non-negative value indicating the number of wave in handles + or, a PaErrorCode (which are always negative) if PortAudio is not initialized + or an error is encountered. + + @see PaWinMME_GetStreamInputHandle +*/ +int PaWinMME_GetStreamInputHandleCount( PaStream* stream ); + + +/** Retrieve a wave in handle used by a PortAudio WinMME stream. + + @param stream The stream to query. + @param handleIndex The zero based index of the wave in handle to retrieve. This + should be in the range [0, PaWinMME_GetStreamInputHandleCount(stream)-1]. + + @return A valid wave in handle, or NULL if an error occurred. + + @see PaWinMME_GetStreamInputHandle +*/ +HWAVEIN PaWinMME_GetStreamInputHandle( PaStream* stream, int handleIndex ); + + +/** Retrieve the number of wave out handles used by a PortAudio WinMME stream. + Returns zero if the stream is input only. + + @return A non-negative value indicating the number of wave out handles + or, a PaErrorCode (which are always negative) if PortAudio is not initialized + or an error is encountered. + + @see PaWinMME_GetStreamOutputHandle +*/ +int PaWinMME_GetStreamOutputHandleCount( PaStream* stream ); + + +/** Retrieve a wave out handle used by a PortAudio WinMME stream. + + @param stream The stream to query. + @param handleIndex The zero based index of the wave out handle to retrieve. + This should be in the range [0, PaWinMME_GetStreamOutputHandleCount(stream)-1]. + + @return A valid wave out handle, or NULL if an error occurred. + + @see PaWinMME_GetStreamOutputHandleCount +*/ +HWAVEOUT PaWinMME_GetStreamOutputHandle( PaStream* stream, int handleIndex ); + + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* PA_WIN_WMME_H */ diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/portaudio/include/portaudio.h b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/portaudio/include/portaudio.h new file mode 100644 index 0000000..5d84731 --- /dev/null +++ b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/portaudio/include/portaudio.h @@ -0,0 +1,1228 @@ +#ifndef PORTAUDIO_H +#define PORTAUDIO_H +/* + * $Id$ + * PortAudio Portable Real-Time Audio Library + * PortAudio API Header File + * Latest version available at: http://www.portaudio.com/ + * + * Copyright (c) 1999-2002 Ross Bencina and Phil Burk + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files + * (the "Software"), to deal in the Software without restriction, + * including without limitation the rights to use, copy, modify, merge, + * publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * The text above constitutes the entire PortAudio license; however, + * the PortAudio community also makes the following non-binding requests: + * + * Any person wishing to distribute modifications to the Software is + * requested to send the modifications to the original developer so that + * they can be incorporated into the canonical version. It is also + * requested that these non-binding requests be included along with the + * license above. + */ + +/** @file + @ingroup public_header + @brief The portable PortAudio API. +*/ + + +#ifdef __cplusplus +extern "C" +{ +#endif /* __cplusplus */ + +/** Retrieve the release number of the currently running PortAudio build. + For example, for version "19.5.1" this will return 0x00130501. + + @see paMakeVersionNumber +*/ +int Pa_GetVersion( void ); + +/** Retrieve a textual description of the current PortAudio build, + e.g. "PortAudio V19.5.0-devel, revision 1952M". + The format of the text may change in the future. Do not try to parse the + returned string. + + @deprecated As of 19.5.0, use Pa_GetVersionInfo()->versionText instead. +*/ +const char* Pa_GetVersionText( void ); + +/** + Generate a packed integer version number in the same format used + by Pa_GetVersion(). Use this to compare a specified version number with + the currently running version. For example: + + @code + if( Pa_GetVersion() < paMakeVersionNumber(19,5,1) ) {} + @endcode + + @see Pa_GetVersion, Pa_GetVersionInfo + @version Available as of 19.5.0. +*/ +#define paMakeVersionNumber(major, minor, subminor) \ + (((major)&0xFF)<<16 | ((minor)&0xFF)<<8 | ((subminor)&0xFF)) + + +/** + A structure containing PortAudio API version information. + @see Pa_GetVersionInfo, paMakeVersionNumber + @version Available as of 19.5.0. +*/ +typedef struct PaVersionInfo { + int versionMajor; + int versionMinor; + int versionSubMinor; + /** + This is currently the Git revision hash but may change in the future. + The versionControlRevision is updated by running a script before compiling the library. + If the update does not occur, this value may refer to an earlier revision. + */ + const char *versionControlRevision; + /** Version as a string, for example "PortAudio V19.5.0-devel, revision 1952M" */ + const char *versionText; +} PaVersionInfo; + +/** Retrieve version information for the currently running PortAudio build. + @return A pointer to an immutable PaVersionInfo structure. + + @note This function can be called at any time. It does not require PortAudio + to be initialized. The structure pointed to is statically allocated. Do not + attempt to free it or modify it. + + @see PaVersionInfo, paMakeVersionNumber + @version Available as of 19.5.0. +*/ +const PaVersionInfo* Pa_GetVersionInfo( void ); + + +/** Error codes returned by PortAudio functions. + Note that with the exception of paNoError, all PaErrorCodes are negative. +*/ + +typedef int PaError; +typedef enum PaErrorCode +{ + paNoError = 0, + + paNotInitialized = -10000, + paUnanticipatedHostError, + paInvalidChannelCount, + paInvalidSampleRate, + paInvalidDevice, + paInvalidFlag, + paSampleFormatNotSupported, + paBadIODeviceCombination, + paInsufficientMemory, + paBufferTooBig, + paBufferTooSmall, + paNullCallback, + paBadStreamPtr, + paTimedOut, + paInternalError, + paDeviceUnavailable, + paIncompatibleHostApiSpecificStreamInfo, + paStreamIsStopped, + paStreamIsNotStopped, + paInputOverflowed, + paOutputUnderflowed, + paHostApiNotFound, + paInvalidHostApi, + paCanNotReadFromACallbackStream, + paCanNotWriteToACallbackStream, + paCanNotReadFromAnOutputOnlyStream, + paCanNotWriteToAnInputOnlyStream, + paIncompatibleStreamHostApi, + paBadBufferPtr +} PaErrorCode; + + +/** Translate the supplied PortAudio error code into a human readable + message. +*/ +const char *Pa_GetErrorText( PaError errorCode ); + + +/** Library initialization function - call this before using PortAudio. + This function initializes internal data structures and prepares underlying + host APIs for use. With the exception of Pa_GetVersion(), Pa_GetVersionText(), + and Pa_GetErrorText(), this function MUST be called before using any other + PortAudio API functions. + + If Pa_Initialize() is called multiple times, each successful + call must be matched with a corresponding call to Pa_Terminate(). + Pairs of calls to Pa_Initialize()/Pa_Terminate() may overlap, and are not + required to be fully nested. + + Note that if Pa_Initialize() returns an error code, Pa_Terminate() should + NOT be called. + + @return paNoError if successful, otherwise an error code indicating the cause + of failure. + + @see Pa_Terminate +*/ +PaError Pa_Initialize( void ); + + +/** Library termination function - call this when finished using PortAudio. + This function deallocates all resources allocated by PortAudio since it was + initialized by a call to Pa_Initialize(). In cases where Pa_Initialise() has + been called multiple times, each call must be matched with a corresponding call + to Pa_Terminate(). The final matching call to Pa_Terminate() will automatically + close any PortAudio streams that are still open. + + Pa_Terminate() MUST be called before exiting a program which uses PortAudio. + Failure to do so may result in serious resource leaks, such as audio devices + not being available until the next reboot. + + @return paNoError if successful, otherwise an error code indicating the cause + of failure. + + @see Pa_Initialize +*/ +PaError Pa_Terminate( void ); + + + +/** The type used to refer to audio devices. Values of this type usually + range from 0 to (Pa_GetDeviceCount()-1), and may also take on the PaNoDevice + and paUseHostApiSpecificDeviceSpecification values. + + @see Pa_GetDeviceCount, paNoDevice, paUseHostApiSpecificDeviceSpecification +*/ +typedef int PaDeviceIndex; + + +/** A special PaDeviceIndex value indicating that no device is available, + or should be used. + + @see PaDeviceIndex +*/ +#define paNoDevice ((PaDeviceIndex)-1) + + +/** A special PaDeviceIndex value indicating that the device(s) to be used + are specified in the host api specific stream info structure. + + @see PaDeviceIndex +*/ +#define paUseHostApiSpecificDeviceSpecification ((PaDeviceIndex)-2) + + +/* Host API enumeration mechanism */ + +/** The type used to enumerate to host APIs at runtime. Values of this type + range from 0 to (Pa_GetHostApiCount()-1). + + @see Pa_GetHostApiCount +*/ +typedef int PaHostApiIndex; + + +/** Retrieve the number of available host APIs. Even if a host API is + available it may have no devices available. + + @return A non-negative value indicating the number of available host APIs + or, a PaErrorCode (which are always negative) if PortAudio is not initialized + or an error is encountered. + + @see PaHostApiIndex +*/ +PaHostApiIndex Pa_GetHostApiCount( void ); + + +/** Retrieve the index of the default host API. The default host API will be + the lowest common denominator host API on the current platform and is + unlikely to provide the best performance. + + @return A non-negative value ranging from 0 to (Pa_GetHostApiCount()-1) + indicating the default host API index or, a PaErrorCode (which are always + negative) if PortAudio is not initialized or an error is encountered. +*/ +PaHostApiIndex Pa_GetDefaultHostApi( void ); + + +/** Unchanging unique identifiers for each supported host API. This type + is used in the PaHostApiInfo structure. The values are guaranteed to be + unique and to never change, thus allowing code to be written that + conditionally uses host API specific extensions. + + New type ids will be allocated when support for a host API reaches + "public alpha" status, prior to that developers should use the + paInDevelopment type id. + + @see PaHostApiInfo +*/ +typedef enum PaHostApiTypeId +{ + paInDevelopment=0, /* use while developing support for a new host API */ + paDirectSound=1, + paMME=2, + paASIO=3, + paSoundManager=4, + paCoreAudio=5, + paOSS=7, + paALSA=8, + paAL=9, + paBeOS=10, + paWDMKS=11, + paJACK=12, + paWASAPI=13, + paAudioScienceHPI=14 +} PaHostApiTypeId; + + +/** A structure containing information about a particular host API. */ + +typedef struct PaHostApiInfo +{ + /** this is struct version 1 */ + int structVersion; + /** The well known unique identifier of this host API @see PaHostApiTypeId */ + PaHostApiTypeId type; + /** A textual description of the host API for display on user interfaces. */ + const char *name; + + /** The number of devices belonging to this host API. This field may be + used in conjunction with Pa_HostApiDeviceIndexToDeviceIndex() to enumerate + all devices for this host API. + @see Pa_HostApiDeviceIndexToDeviceIndex + */ + int deviceCount; + + /** The default input device for this host API. The value will be a + device index ranging from 0 to (Pa_GetDeviceCount()-1), or paNoDevice + if no default input device is available. + */ + PaDeviceIndex defaultInputDevice; + + /** The default output device for this host API. The value will be a + device index ranging from 0 to (Pa_GetDeviceCount()-1), or paNoDevice + if no default output device is available. + */ + PaDeviceIndex defaultOutputDevice; + +} PaHostApiInfo; + + +/** Retrieve a pointer to a structure containing information about a specific + host Api. + + @param hostApi A valid host API index ranging from 0 to (Pa_GetHostApiCount()-1) + + @return A pointer to an immutable PaHostApiInfo structure describing + a specific host API. If the hostApi parameter is out of range or an error + is encountered, the function returns NULL. + + The returned structure is owned by the PortAudio implementation and must not + be manipulated or freed. The pointer is only guaranteed to be valid between + calls to Pa_Initialize() and Pa_Terminate(). +*/ +const PaHostApiInfo * Pa_GetHostApiInfo( PaHostApiIndex hostApi ); + + +/** Convert a static host API unique identifier, into a runtime + host API index. + + @param type A unique host API identifier belonging to the PaHostApiTypeId + enumeration. + + @return A valid PaHostApiIndex ranging from 0 to (Pa_GetHostApiCount()-1) or, + a PaErrorCode (which are always negative) if PortAudio is not initialized + or an error is encountered. + + The paHostApiNotFound error code indicates that the host API specified by the + type parameter is not available. + + @see PaHostApiTypeId +*/ +PaHostApiIndex Pa_HostApiTypeIdToHostApiIndex( PaHostApiTypeId type ); + + +/** Convert a host-API-specific device index to standard PortAudio device index. + This function may be used in conjunction with the deviceCount field of + PaHostApiInfo to enumerate all devices for the specified host API. + + @param hostApi A valid host API index ranging from 0 to (Pa_GetHostApiCount()-1) + + @param hostApiDeviceIndex A valid per-host device index in the range + 0 to (Pa_GetHostApiInfo(hostApi)->deviceCount-1) + + @return A non-negative PaDeviceIndex ranging from 0 to (Pa_GetDeviceCount()-1) + or, a PaErrorCode (which are always negative) if PortAudio is not initialized + or an error is encountered. + + A paInvalidHostApi error code indicates that the host API index specified by + the hostApi parameter is out of range. + + A paInvalidDevice error code indicates that the hostApiDeviceIndex parameter + is out of range. + + @see PaHostApiInfo +*/ +PaDeviceIndex Pa_HostApiDeviceIndexToDeviceIndex( PaHostApiIndex hostApi, + int hostApiDeviceIndex ); + + + +/** Structure used to return information about a host error condition. +*/ +typedef struct PaHostErrorInfo{ + PaHostApiTypeId hostApiType; /**< the host API which returned the error code */ + long errorCode; /**< the error code returned */ + const char *errorText; /**< a textual description of the error if available, otherwise a zero-length string */ +}PaHostErrorInfo; + + +/** Return information about the last host error encountered. The error + information returned by Pa_GetLastHostErrorInfo() will never be modified + asynchronously by errors occurring in other PortAudio owned threads + (such as the thread that manages the stream callback.) + + This function is provided as a last resort, primarily to enhance debugging + by providing clients with access to all available error information. + + @return A pointer to an immutable structure constraining information about + the host error. The values in this structure will only be valid if a + PortAudio function has previously returned the paUnanticipatedHostError + error code. +*/ +const PaHostErrorInfo* Pa_GetLastHostErrorInfo( void ); + + + +/* Device enumeration and capabilities */ + +/** Retrieve the number of available devices. The number of available devices + may be zero. + + @return A non-negative value indicating the number of available devices or, + a PaErrorCode (which are always negative) if PortAudio is not initialized + or an error is encountered. +*/ +PaDeviceIndex Pa_GetDeviceCount( void ); + + +/** Retrieve the index of the default input device. The result can be + used in the inputDevice parameter to Pa_OpenStream(). + + @return The default input device index for the default host API, or paNoDevice + if no default input device is available or an error was encountered. +*/ +PaDeviceIndex Pa_GetDefaultInputDevice( void ); + + +/** Retrieve the index of the default output device. The result can be + used in the outputDevice parameter to Pa_OpenStream(). + + @return The default output device index for the default host API, or paNoDevice + if no default output device is available or an error was encountered. + + @note + On the PC, the user can specify a default device by + setting an environment variable. For example, to use device #1. +
+ set PA_RECOMMENDED_OUTPUT_DEVICE=1
+
+ The user should first determine the available device ids by using + the supplied application "pa_devs". +*/ +PaDeviceIndex Pa_GetDefaultOutputDevice( void ); + + +/** The type used to represent monotonic time in seconds. PaTime is + used for the fields of the PaStreamCallbackTimeInfo argument to the + PaStreamCallback and as the result of Pa_GetStreamTime(). + + PaTime values have unspecified origin. + + @see PaStreamCallback, PaStreamCallbackTimeInfo, Pa_GetStreamTime +*/ +typedef double PaTime; + + +/** A type used to specify one or more sample formats. Each value indicates + a possible format for sound data passed to and from the stream callback, + Pa_ReadStream and Pa_WriteStream. + + The standard formats paFloat32, paInt16, paInt32, paInt24, paInt8 + and aUInt8 are usually implemented by all implementations. + + The floating point representation (paFloat32) uses +1.0 and -1.0 as the + maximum and minimum respectively. + + paUInt8 is an unsigned 8 bit format where 128 is considered "ground" + + The paNonInterleaved flag indicates that audio data is passed as an array + of pointers to separate buffers, one buffer for each channel. Usually, + when this flag is not used, audio data is passed as a single buffer with + all channels interleaved. + + @see Pa_OpenStream, Pa_OpenDefaultStream, PaDeviceInfo + @see paFloat32, paInt16, paInt32, paInt24, paInt8 + @see paUInt8, paCustomFormat, paNonInterleaved +*/ +typedef unsigned long PaSampleFormat; + + +#define paFloat32 ((PaSampleFormat) 0x00000001) /**< @see PaSampleFormat */ +#define paInt32 ((PaSampleFormat) 0x00000002) /**< @see PaSampleFormat */ +#define paInt24 ((PaSampleFormat) 0x00000004) /**< Packed 24 bit format. @see PaSampleFormat */ +#define paInt16 ((PaSampleFormat) 0x00000008) /**< @see PaSampleFormat */ +#define paInt8 ((PaSampleFormat) 0x00000010) /**< @see PaSampleFormat */ +#define paUInt8 ((PaSampleFormat) 0x00000020) /**< @see PaSampleFormat */ +#define paCustomFormat ((PaSampleFormat) 0x00010000) /**< @see PaSampleFormat */ + +#define paNonInterleaved ((PaSampleFormat) 0x80000000) /**< @see PaSampleFormat */ + +/** A structure providing information and capabilities of PortAudio devices. + Devices may support input, output or both input and output. +*/ +typedef struct PaDeviceInfo +{ + int structVersion; /* this is struct version 2 */ + const char *name; + PaHostApiIndex hostApi; /**< note this is a host API index, not a type id*/ + + int maxInputChannels; + int maxOutputChannels; + + /** Default latency values for interactive performance. */ + PaTime defaultLowInputLatency; + PaTime defaultLowOutputLatency; + /** Default latency values for robust non-interactive applications (eg. playing sound files). */ + PaTime defaultHighInputLatency; + PaTime defaultHighOutputLatency; + + double defaultSampleRate; +} PaDeviceInfo; + + +/** Retrieve a pointer to a PaDeviceInfo structure containing information + about the specified device. + @return A pointer to an immutable PaDeviceInfo structure. If the device + parameter is out of range the function returns NULL. + + @param device A valid device index in the range 0 to (Pa_GetDeviceCount()-1) + + @note PortAudio manages the memory referenced by the returned pointer, + the client must not manipulate or free the memory. The pointer is only + guaranteed to be valid between calls to Pa_Initialize() and Pa_Terminate(). + + @see PaDeviceInfo, PaDeviceIndex +*/ +const PaDeviceInfo* Pa_GetDeviceInfo( PaDeviceIndex device ); + + +/** Parameters for one direction (input or output) of a stream. +*/ +typedef struct PaStreamParameters +{ + /** A valid device index in the range 0 to (Pa_GetDeviceCount()-1) + specifying the device to be used or the special constant + paUseHostApiSpecificDeviceSpecification which indicates that the actual + device(s) to use are specified in hostApiSpecificStreamInfo. + This field must not be set to paNoDevice. + */ + PaDeviceIndex device; + + /** The number of channels of sound to be delivered to the + stream callback or accessed by Pa_ReadStream() or Pa_WriteStream(). + It can range from 1 to the value of maxInputChannels in the + PaDeviceInfo record for the device specified by the device parameter. + */ + int channelCount; + + /** The sample format of the buffer provided to the stream callback, + a_ReadStream() or Pa_WriteStream(). It may be any of the formats described + by the PaSampleFormat enumeration. + */ + PaSampleFormat sampleFormat; + + /** The desired latency in seconds. Where practical, implementations should + configure their latency based on these parameters, otherwise they may + choose the closest viable latency instead. Unless the suggested latency + is greater than the absolute upper limit for the device implementations + should round the suggestedLatency up to the next practical value - ie to + provide an equal or higher latency than suggestedLatency wherever possible. + Actual latency values for an open stream may be retrieved using the + inputLatency and outputLatency fields of the PaStreamInfo structure + returned by Pa_GetStreamInfo(). + @see default*Latency in PaDeviceInfo, *Latency in PaStreamInfo + */ + PaTime suggestedLatency; + + /** An optional pointer to a host api specific data structure + containing additional information for device setup and/or stream processing. + hostApiSpecificStreamInfo is never required for correct operation, + if not used it should be set to NULL. + */ + void *hostApiSpecificStreamInfo; + +} PaStreamParameters; + + +/** Return code for Pa_IsFormatSupported indicating success. */ +#define paFormatIsSupported (0) + +/** Determine whether it would be possible to open a stream with the specified + parameters. + + @param inputParameters A structure that describes the input parameters used to + open a stream. The suggestedLatency field is ignored. See PaStreamParameters + for a description of these parameters. inputParameters must be NULL for + output-only streams. + + @param outputParameters A structure that describes the output parameters used + to open a stream. The suggestedLatency field is ignored. See PaStreamParameters + for a description of these parameters. outputParameters must be NULL for + input-only streams. + + @param sampleRate The required sampleRate. For full-duplex streams it is the + sample rate for both input and output + + @return Returns 0 if the format is supported, and an error code indicating why + the format is not supported otherwise. The constant paFormatIsSupported is + provided to compare with the return value for success. + + @see paFormatIsSupported, PaStreamParameters +*/ +PaError Pa_IsFormatSupported( const PaStreamParameters *inputParameters, + const PaStreamParameters *outputParameters, + double sampleRate ); + + + +/* Streaming types and functions */ + + +/** + A single PaStream can provide multiple channels of real-time + streaming audio input and output to a client application. A stream + provides access to audio hardware represented by one or more + PaDevices. Depending on the underlying Host API, it may be possible + to open multiple streams using the same device, however this behavior + is implementation defined. Portable applications should assume that + a PaDevice may be simultaneously used by at most one PaStream. + + Pointers to PaStream objects are passed between PortAudio functions that + operate on streams. + + @see Pa_OpenStream, Pa_OpenDefaultStream, Pa_OpenDefaultStream, Pa_CloseStream, + Pa_StartStream, Pa_StopStream, Pa_AbortStream, Pa_IsStreamActive, + Pa_GetStreamTime, Pa_GetStreamCpuLoad + +*/ +typedef void PaStream; + + +/** Can be passed as the framesPerBuffer parameter to Pa_OpenStream() + or Pa_OpenDefaultStream() to indicate that the stream callback will + accept buffers of any size. +*/ +#define paFramesPerBufferUnspecified (0) + + +/** Flags used to control the behavior of a stream. They are passed as + parameters to Pa_OpenStream or Pa_OpenDefaultStream. Multiple flags may be + ORed together. + + @see Pa_OpenStream, Pa_OpenDefaultStream + @see paNoFlag, paClipOff, paDitherOff, paNeverDropInput, + paPrimeOutputBuffersUsingStreamCallback, paPlatformSpecificFlags +*/ +typedef unsigned long PaStreamFlags; + +/** @see PaStreamFlags */ +#define paNoFlag ((PaStreamFlags) 0) + +/** Disable default clipping of out of range samples. + @see PaStreamFlags +*/ +#define paClipOff ((PaStreamFlags) 0x00000001) + +/** Disable default dithering. + @see PaStreamFlags +*/ +#define paDitherOff ((PaStreamFlags) 0x00000002) + +/** Flag requests that where possible a full duplex stream will not discard + overflowed input samples without calling the stream callback. This flag is + only valid for full duplex callback streams and only when used in combination + with the paFramesPerBufferUnspecified (0) framesPerBuffer parameter. Using + this flag incorrectly results in a paInvalidFlag error being returned from + Pa_OpenStream and Pa_OpenDefaultStream. + + @see PaStreamFlags, paFramesPerBufferUnspecified +*/ +#define paNeverDropInput ((PaStreamFlags) 0x00000004) + +/** Call the stream callback to fill initial output buffers, rather than the + default behavior of priming the buffers with zeros (silence). This flag has + no effect for input-only and blocking read/write streams. + + @see PaStreamFlags +*/ +#define paPrimeOutputBuffersUsingStreamCallback ((PaStreamFlags) 0x00000008) + +/** A mask specifying the platform specific bits. + @see PaStreamFlags +*/ +#define paPlatformSpecificFlags ((PaStreamFlags)0xFFFF0000) + +/** + Timing information for the buffers passed to the stream callback. + + Time values are expressed in seconds and are synchronised with the time base used by Pa_GetStreamTime() for the associated stream. + + @see PaStreamCallback, Pa_GetStreamTime +*/ +typedef struct PaStreamCallbackTimeInfo{ + PaTime inputBufferAdcTime; /**< The time when the first sample of the input buffer was captured at the ADC input */ + PaTime currentTime; /**< The time when the stream callback was invoked */ + PaTime outputBufferDacTime; /**< The time when the first sample of the output buffer will output the DAC */ +} PaStreamCallbackTimeInfo; + + +/** + Flag bit constants for the statusFlags to PaStreamCallback. + + @see paInputUnderflow, paInputOverflow, paOutputUnderflow, paOutputOverflow, + paPrimingOutput +*/ +typedef unsigned long PaStreamCallbackFlags; + +/** In a stream opened with paFramesPerBufferUnspecified, indicates that + input data is all silence (zeros) because no real data is available. In a + stream opened without paFramesPerBufferUnspecified, it indicates that one or + more zero samples have been inserted into the input buffer to compensate + for an input underflow. + @see PaStreamCallbackFlags +*/ +#define paInputUnderflow ((PaStreamCallbackFlags) 0x00000001) + +/** In a stream opened with paFramesPerBufferUnspecified, indicates that data + prior to the first sample of the input buffer was discarded due to an + overflow, possibly because the stream callback is using too much CPU time. + Otherwise indicates that data prior to one or more samples in the + input buffer was discarded. + @see PaStreamCallbackFlags +*/ +#define paInputOverflow ((PaStreamCallbackFlags) 0x00000002) + +/** Indicates that output data (or a gap) was inserted, possibly because the + stream callback is using too much CPU time. + @see PaStreamCallbackFlags +*/ +#define paOutputUnderflow ((PaStreamCallbackFlags) 0x00000004) + +/** Indicates that output data will be discarded because no room is available. + @see PaStreamCallbackFlags +*/ +#define paOutputOverflow ((PaStreamCallbackFlags) 0x00000008) + +/** Some of all of the output data will be used to prime the stream, input + data may be zero. + @see PaStreamCallbackFlags +*/ +#define paPrimingOutput ((PaStreamCallbackFlags) 0x00000010) + +/** + Allowable return values for the PaStreamCallback. + @see PaStreamCallback +*/ +typedef enum PaStreamCallbackResult +{ + paContinue=0, /**< Signal that the stream should continue invoking the callback and processing audio. */ + paComplete=1, /**< Signal that the stream should stop invoking the callback and finish once all output samples have played. */ + paAbort=2 /**< Signal that the stream should stop invoking the callback and finish as soon as possible. */ +} PaStreamCallbackResult; + + +/** + Functions of type PaStreamCallback are implemented by PortAudio clients. + They consume, process or generate audio in response to requests from an + active PortAudio stream. + + When a stream is running, PortAudio calls the stream callback periodically. + The callback function is responsible for processing buffers of audio samples + passed via the input and output parameters. + + The PortAudio stream callback runs at very high or real-time priority. + It is required to consistently meet its time deadlines. Do not allocate + memory, access the file system, call library functions or call other functions + from the stream callback that may block or take an unpredictable amount of + time to complete. + + In order for a stream to maintain glitch-free operation the callback + must consume and return audio data faster than it is recorded and/or + played. PortAudio anticipates that each callback invocation may execute for + a duration approaching the duration of frameCount audio frames at the stream + sample rate. It is reasonable to expect to be able to utilise 70% or more of + the available CPU time in the PortAudio callback. However, due to buffer size + adaption and other factors, not all host APIs are able to guarantee audio + stability under heavy CPU load with arbitrary fixed callback buffer sizes. + When high callback CPU utilisation is required the most robust behavior + can be achieved by using paFramesPerBufferUnspecified as the + Pa_OpenStream() framesPerBuffer parameter. + + @param input and @param output are either arrays of interleaved samples or; + if non-interleaved samples were requested using the paNonInterleaved sample + format flag, an array of buffer pointers, one non-interleaved buffer for + each channel. + + The format, packing and number of channels used by the buffers are + determined by parameters to Pa_OpenStream(). + + @param frameCount The number of sample frames to be processed by + the stream callback. + + @param timeInfo Timestamps indicating the ADC capture time of the first sample + in the input buffer, the DAC output time of the first sample in the output buffer + and the time the callback was invoked. + See PaStreamCallbackTimeInfo and Pa_GetStreamTime() + + @param statusFlags Flags indicating whether input and/or output buffers + have been inserted or will be dropped to overcome underflow or overflow + conditions. + + @param userData The value of a user supplied pointer passed to + Pa_OpenStream() intended for storing synthesis data etc. + + @return + The stream callback should return one of the values in the + ::PaStreamCallbackResult enumeration. To ensure that the callback continues + to be called, it should return paContinue (0). Either paComplete or paAbort + can be returned to finish stream processing, after either of these values is + returned the callback will not be called again. If paAbort is returned the + stream will finish as soon as possible. If paComplete is returned, the stream + will continue until all buffers generated by the callback have been played. + This may be useful in applications such as soundfile players where a specific + duration of output is required. However, it is not necessary to utilize this + mechanism as Pa_StopStream(), Pa_AbortStream() or Pa_CloseStream() can also + be used to stop the stream. The callback must always fill the entire output + buffer irrespective of its return value. + + @see Pa_OpenStream, Pa_OpenDefaultStream + + @note With the exception of Pa_GetStreamCpuLoad() it is not permissible to call + PortAudio API functions from within the stream callback. +*/ +typedef int PaStreamCallback( + const void *input, void *output, + unsigned long frameCount, + const PaStreamCallbackTimeInfo* timeInfo, + PaStreamCallbackFlags statusFlags, + void *userData ); + + +/** Opens a stream for either input, output or both. + + @param stream The address of a PaStream pointer which will receive + a pointer to the newly opened stream. + + @param inputParameters A structure that describes the input parameters used by + the opened stream. See PaStreamParameters for a description of these parameters. + inputParameters must be NULL for output-only streams. + + @param outputParameters A structure that describes the output parameters used by + the opened stream. See PaStreamParameters for a description of these parameters. + outputParameters must be NULL for input-only streams. + + @param sampleRate The desired sampleRate. For full-duplex streams it is the + sample rate for both input and output. Note that the actual sampleRate + may differ very slightly from the desired rate because of hardware limitations. + The exact rate can be queried using Pa_GetStreamInfo(). If nothing close + to the desired sampleRate is available then the open will fail and return an error. + + @param framesPerBuffer The number of frames passed to the stream callback + function, or the preferred block granularity for a blocking read/write stream. + The special value paFramesPerBufferUnspecified (0) may be used to request that + the stream callback will receive an optimal (and possibly varying) number of + frames based on host requirements and the requested latency settings. + Note: With some host APIs, the use of non-zero framesPerBuffer for a callback + stream may introduce an additional layer of buffering which could introduce + additional latency. PortAudio guarantees that the additional latency + will be kept to the theoretical minimum however, it is strongly recommended + that a non-zero framesPerBuffer value only be used when your algorithm + requires a fixed number of frames per stream callback. + + @param streamFlags Flags which modify the behavior of the streaming process. + This parameter may contain a combination of flags ORed together. Some flags may + only be relevant to certain buffer formats. + + @param streamCallback A pointer to a client supplied function that is responsible + for processing and filling input and output buffers. If this parameter is NULL + the stream will be opened in 'blocking read/write' mode. In blocking mode, + the client can receive sample data using Pa_ReadStream and write sample data + using Pa_WriteStream, the number of samples that may be read or written + without blocking is returned by Pa_GetStreamReadAvailable and + Pa_GetStreamWriteAvailable respectively. + + @param userData A client supplied pointer which is passed to the stream callback + function. It could for example, contain a pointer to instance data necessary + for processing the audio buffers. This parameter is ignored if streamCallback + is NULL. + + @return + Upon success Pa_OpenStream() returns paNoError and places a pointer to a + valid PaStream in the stream argument. The stream is inactive (stopped). + If a call to Pa_OpenStream() fails, a non-zero error code is returned (see + PaError for possible error codes) and the value of stream is invalid. + + @see PaStreamParameters, PaStreamCallback, Pa_ReadStream, Pa_WriteStream, + Pa_GetStreamReadAvailable, Pa_GetStreamWriteAvailable +*/ +PaError Pa_OpenStream( PaStream** stream, + const PaStreamParameters *inputParameters, + const PaStreamParameters *outputParameters, + double sampleRate, + unsigned long framesPerBuffer, + PaStreamFlags streamFlags, + PaStreamCallback *streamCallback, + void *userData ); + + +/** A simplified version of Pa_OpenStream() that opens the default input + and/or output devices. + + @param stream The address of a PaStream pointer which will receive + a pointer to the newly opened stream. + + @param numInputChannels The number of channels of sound that will be supplied + to the stream callback or returned by Pa_ReadStream. It can range from 1 to + the value of maxInputChannels in the PaDeviceInfo record for the default input + device. If 0 the stream is opened as an output-only stream. + + @param numOutputChannels The number of channels of sound to be delivered to the + stream callback or passed to Pa_WriteStream. It can range from 1 to the value + of maxOutputChannels in the PaDeviceInfo record for the default output device. + If 0 the stream is opened as an output-only stream. + + @param sampleFormat The sample format of both the input and output buffers + provided to the callback or passed to and from Pa_ReadStream and Pa_WriteStream. + sampleFormat may be any of the formats described by the PaSampleFormat + enumeration. + + @param sampleRate Same as Pa_OpenStream parameter of the same name. + @param framesPerBuffer Same as Pa_OpenStream parameter of the same name. + @param streamCallback Same as Pa_OpenStream parameter of the same name. + @param userData Same as Pa_OpenStream parameter of the same name. + + @return As for Pa_OpenStream + + @see Pa_OpenStream, PaStreamCallback +*/ +PaError Pa_OpenDefaultStream( PaStream** stream, + int numInputChannels, + int numOutputChannels, + PaSampleFormat sampleFormat, + double sampleRate, + unsigned long framesPerBuffer, + PaStreamCallback *streamCallback, + void *userData ); + + +/** Closes an audio stream. If the audio stream is active it + discards any pending buffers as if Pa_AbortStream() had been called. +*/ +PaError Pa_CloseStream( PaStream *stream ); + + +/** Functions of type PaStreamFinishedCallback are implemented by PortAudio + clients. They can be registered with a stream using the Pa_SetStreamFinishedCallback + function. Once registered they are called when the stream becomes inactive + (ie once a call to Pa_StopStream() will not block). + A stream will become inactive after the stream callback returns non-zero, + or when Pa_StopStream or Pa_AbortStream is called. For a stream providing audio + output, if the stream callback returns paComplete, or Pa_StopStream() is called, + the stream finished callback will not be called until all generated sample data + has been played. + + @param userData The userData parameter supplied to Pa_OpenStream() + + @see Pa_SetStreamFinishedCallback +*/ +typedef void PaStreamFinishedCallback( void *userData ); + + +/** Register a stream finished callback function which will be called when the + stream becomes inactive. See the description of PaStreamFinishedCallback for + further details about when the callback will be called. + + @param stream a pointer to a PaStream that is in the stopped state - if the + stream is not stopped, the stream's finished callback will remain unchanged + and an error code will be returned. + + @param streamFinishedCallback a pointer to a function with the same signature + as PaStreamFinishedCallback, that will be called when the stream becomes + inactive. Passing NULL for this parameter will un-register a previously + registered stream finished callback function. + + @return on success returns paNoError, otherwise an error code indicating the cause + of the error. + + @see PaStreamFinishedCallback +*/ +PaError Pa_SetStreamFinishedCallback( PaStream *stream, PaStreamFinishedCallback* streamFinishedCallback ); + + +/** Commences audio processing. +*/ +PaError Pa_StartStream( PaStream *stream ); + + +/** Terminates audio processing. It waits until all pending + audio buffers have been played before it returns. +*/ +PaError Pa_StopStream( PaStream *stream ); + + +/** Terminates audio processing immediately without waiting for pending + buffers to complete. +*/ +PaError Pa_AbortStream( PaStream *stream ); + + +/** Determine whether the stream is stopped. + A stream is considered to be stopped prior to a successful call to + Pa_StartStream and after a successful call to Pa_StopStream or Pa_AbortStream. + If a stream callback returns a value other than paContinue the stream is NOT + considered to be stopped. + + @return Returns one (1) when the stream is stopped, zero (0) when + the stream is running or, a PaErrorCode (which are always negative) if + PortAudio is not initialized or an error is encountered. + + @see Pa_StopStream, Pa_AbortStream, Pa_IsStreamActive +*/ +PaError Pa_IsStreamStopped( PaStream *stream ); + + +/** Determine whether the stream is active. + A stream is active after a successful call to Pa_StartStream(), until it + becomes inactive either as a result of a call to Pa_StopStream() or + Pa_AbortStream(), or as a result of a return value other than paContinue from + the stream callback. In the latter case, the stream is considered inactive + after the last buffer has finished playing. + + @return Returns one (1) when the stream is active (ie playing or recording + audio), zero (0) when not playing or, a PaErrorCode (which are always negative) + if PortAudio is not initialized or an error is encountered. + + @see Pa_StopStream, Pa_AbortStream, Pa_IsStreamStopped +*/ +PaError Pa_IsStreamActive( PaStream *stream ); + + + +/** A structure containing unchanging information about an open stream. + @see Pa_GetStreamInfo +*/ + +typedef struct PaStreamInfo +{ + /** this is struct version 1 */ + int structVersion; + + /** The input latency of the stream in seconds. This value provides the most + accurate estimate of input latency available to the implementation. It may + differ significantly from the suggestedLatency value passed to Pa_OpenStream(). + The value of this field will be zero (0.) for output-only streams. + @see PaTime + */ + PaTime inputLatency; + + /** The output latency of the stream in seconds. This value provides the most + accurate estimate of output latency available to the implementation. It may + differ significantly from the suggestedLatency value passed to Pa_OpenStream(). + The value of this field will be zero (0.) for input-only streams. + @see PaTime + */ + PaTime outputLatency; + + /** The sample rate of the stream in Hertz (samples per second). In cases + where the hardware sample rate is inaccurate and PortAudio is aware of it, + the value of this field may be different from the sampleRate parameter + passed to Pa_OpenStream(). If information about the actual hardware sample + rate is not available, this field will have the same value as the sampleRate + parameter passed to Pa_OpenStream(). + */ + double sampleRate; + +} PaStreamInfo; + + +/** Retrieve a pointer to a PaStreamInfo structure containing information + about the specified stream. + @return A pointer to an immutable PaStreamInfo structure. If the stream + parameter is invalid, or an error is encountered, the function returns NULL. + + @param stream A pointer to an open stream previously created with Pa_OpenStream. + + @note PortAudio manages the memory referenced by the returned pointer, + the client must not manipulate or free the memory. The pointer is only + guaranteed to be valid until the specified stream is closed. + + @see PaStreamInfo +*/ +const PaStreamInfo* Pa_GetStreamInfo( PaStream *stream ); + + +/** Returns the current time in seconds for a stream according to the same clock used + to generate callback PaStreamCallbackTimeInfo timestamps. The time values are + monotonically increasing and have unspecified origin. + + Pa_GetStreamTime returns valid time values for the entire life of the stream, + from when the stream is opened until it is closed. Starting and stopping the stream + does not affect the passage of time returned by Pa_GetStreamTime. + + This time may be used for synchronizing other events to the audio stream, for + example synchronizing audio to MIDI. + + @return The stream's current time in seconds, or 0 if an error occurred. + + @see PaTime, PaStreamCallback, PaStreamCallbackTimeInfo +*/ +PaTime Pa_GetStreamTime( PaStream *stream ); + + +/** Retrieve CPU usage information for the specified stream. + The "CPU Load" is a fraction of total CPU time consumed by a callback stream's + audio processing routines including, but not limited to the client supplied + stream callback. This function does not work with blocking read/write streams. + + This function may be called from the stream callback function or the + application. + + @return + A floating point value, typically between 0.0 and 1.0, where 1.0 indicates + that the stream callback is consuming the maximum number of CPU cycles possible + to maintain real-time operation. A value of 0.5 would imply that PortAudio and + the stream callback was consuming roughly 50% of the available CPU time. The + return value may exceed 1.0. A value of 0.0 will always be returned for a + blocking read/write stream, or if an error occurs. +*/ +double Pa_GetStreamCpuLoad( PaStream* stream ); + + +/** Read samples from an input stream. The function doesn't return until + the entire buffer has been filled - this may involve waiting for the operating + system to supply the data. + + @param stream A pointer to an open stream previously created with Pa_OpenStream. + + @param buffer A pointer to a buffer of sample frames. The buffer contains + samples in the format specified by the inputParameters->sampleFormat field + used to open the stream, and the number of channels specified by + inputParameters->numChannels. If non-interleaved samples were requested using + the paNonInterleaved sample format flag, buffer is a pointer to the first element + of an array of buffer pointers, one non-interleaved buffer for each channel. + + @param frames The number of frames to be read into buffer. This parameter + is not constrained to a specific range, however high performance applications + will want to match this parameter to the framesPerBuffer parameter used + when opening the stream. + + @return On success PaNoError will be returned, or PaInputOverflowed if input + data was discarded by PortAudio after the previous call and before this call. +*/ +PaError Pa_ReadStream( PaStream* stream, + void *buffer, + unsigned long frames ); + + +/** Write samples to an output stream. This function doesn't return until the + entire buffer has been written - this may involve waiting for the operating + system to consume the data. + + @param stream A pointer to an open stream previously created with Pa_OpenStream. + + @param buffer A pointer to a buffer of sample frames. The buffer contains + samples in the format specified by the outputParameters->sampleFormat field + used to open the stream, and the number of channels specified by + outputParameters->numChannels. If non-interleaved samples were requested using + the paNonInterleaved sample format flag, buffer is a pointer to the first element + of an array of buffer pointers, one non-interleaved buffer for each channel. + + @param frames The number of frames to be written from buffer. This parameter + is not constrained to a specific range, however high performance applications + will want to match this parameter to the framesPerBuffer parameter used + when opening the stream. + + @return On success PaNoError will be returned, or paOutputUnderflowed if + additional output data was inserted after the previous call and before this + call. +*/ +PaError Pa_WriteStream( PaStream* stream, + const void *buffer, + unsigned long frames ); + + +/** Retrieve the number of frames that can be read from the stream without + waiting. + + @return Returns a non-negative value representing the maximum number of frames + that can be read from the stream without blocking or busy waiting or, a + PaErrorCode (which are always negative) if PortAudio is not initialized or an + error is encountered. +*/ +signed long Pa_GetStreamReadAvailable( PaStream* stream ); + + +/** Retrieve the number of frames that can be written to the stream without + waiting. + + @return Returns a non-negative value representing the maximum number of frames + that can be written to the stream without blocking or busy waiting or, a + PaErrorCode (which are always negative) if PortAudio is not initialized or an + error is encountered. +*/ +signed long Pa_GetStreamWriteAvailable( PaStream* stream ); + + +/* Miscellaneous utilities */ + + +/** Retrieve the size of a given sample format in bytes. + + @return The size in bytes of a single sample in the specified format, + or paSampleFormatNotSupported if the format is not supported. +*/ +PaError Pa_GetSampleSize( PaSampleFormat format ); + + +/** Put the caller to sleep for at least 'msec' milliseconds. This function is + provided only as a convenience for authors of portable code (such as the tests + and examples in the PortAudio distribution.) + + The function may sleep longer than requested so don't rely on this for accurate + musical timing. +*/ +void Pa_Sleep( long msec ); + + + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* PORTAUDIO_H */ diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/portaudio/lib/portaudio_x64.dll b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/portaudio/lib/portaudio_x64.dll new file mode 100644 index 0000000..db3a218 Binary files /dev/null and b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/portaudio/lib/portaudio_x64.dll differ diff --git a/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/portaudio/lib/portaudio_x64.lib b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/portaudio/lib/portaudio_x64.lib new file mode 100644 index 0000000..eb72de1 Binary files /dev/null and b/Unreal/Plugins/AvatarCore_STT/Source/ThirdParty/portaudio/lib/portaudio_x64.lib differ