diff --git a/Unreal/Content/Project/BP/Avatars/Avatar_Jasmin.uasset b/Unreal/Content/Project/BP/Avatars/Avatar_Jasmin.uasset new file mode 100644 index 0000000..8fdc287 --- /dev/null +++ b/Unreal/Content/Project/BP/Avatars/Avatar_Jasmin.uasset @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98b21f75efc4f17042d062e42a449b4096a97e76256f252b719449f73a458310 +size 51115 diff --git a/Unreal/Content/Project/BP/BP_Project_Manager.uasset b/Unreal/Content/Project/BP/BP_Project_Manager.uasset index 8f59cfd..5dc65d5 100644 --- a/Unreal/Content/Project/BP/BP_Project_Manager.uasset +++ b/Unreal/Content/Project/BP/BP_Project_Manager.uasset @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:da440e3f560df4fd2bac7c8ebfb51168292918a620c384e43e0a25c76e14bbcd -size 2144023 +oid sha256:6b132873035354562ba6179d95823c14ebe756090828bad929598a8f85d3380f +size 2165326 diff --git a/Unreal/Content/Project/BP/EnumsAndStructs/E_Avatars.uasset b/Unreal/Content/Project/BP/EnumsAndStructs/E_Avatars.uasset deleted file mode 100644 index 9818a32..0000000 --- a/Unreal/Content/Project/BP/EnumsAndStructs/E_Avatars.uasset +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d02d8b434bab892116c1c46ea0c9e336a1d6df2f402e87b2d361a02e035e623a -size 1848 diff --git a/Unreal/Content/Project/BP/EnumsAndStructs/E_Project_State.uasset b/Unreal/Content/Project/BP/EnumsAndStructs/E_Project_State.uasset deleted file mode 100644 index edc7e06..0000000 --- a/Unreal/Content/Project/BP/EnumsAndStructs/E_Project_State.uasset +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5781ca272992c4d739eb37805388b418d624a3c0f4368f0a91790e6a2bedbd30 -size 4104 diff --git a/Unreal/Content/Project/BP/EnumsAndStructs/S_ConfigSettings.uasset b/Unreal/Content/Project/BP/EnumsAndStructs/S_ConfigSettings.uasset index abefd9b..d20f00d 100644 --- a/Unreal/Content/Project/BP/EnumsAndStructs/S_ConfigSettings.uasset +++ b/Unreal/Content/Project/BP/EnumsAndStructs/S_ConfigSettings.uasset @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9d5924df7516945d6defa5231ddae15f221c0ca8c2ea54b40710424b54dbd03d -size 58682 +oid sha256:fd8ed89d11c5faddf9b023195c0bbf8190c075d8baaa1d289967f028fb2333a2 +size 56048 diff --git a/Unreal/Content/Project/BP/Environments/BP_Environment_FogWithRing.uasset b/Unreal/Content/Project/BP/Environments/BP_Environment_FogWithRing.uasset index dc26590..617eb2f 100644 --- a/Unreal/Content/Project/BP/Environments/BP_Environment_FogWithRing.uasset +++ b/Unreal/Content/Project/BP/Environments/BP_Environment_FogWithRing.uasset @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7ed12d4e6ab2426f490d0881ac70db3ef65038c956dcd6acc03c36a86166998e -size 1251055 +oid sha256:bdbd928ea7039839f499039930495618b1ee1b7308a61162c028e52df2daf572 +size 1210649 diff --git a/Unreal/Content/Project/BP/Modes/Data/E_Mode.uasset b/Unreal/Content/Project/BP/Modes/Data/E_Mode.uasset deleted file mode 100644 index 4ed9eac..0000000 --- a/Unreal/Content/Project/BP/Modes/Data/E_Mode.uasset +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a39ae70a5a406bae60529620873c028675ce5263ca756dfb61b9f6b3246a9a61 -size 1636 diff --git a/Unreal/Content/Project/Widgets/W_Main.uasset b/Unreal/Content/Project/Widgets/W_Main.uasset index 4e061a2..f2739fa 100644 --- a/Unreal/Content/Project/Widgets/W_Main.uasset +++ b/Unreal/Content/Project/Widgets/W_Main.uasset @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:44515a4b7ccf4b7aa8397f9edf605e2f90a864e073b3cf422aab472313f1c4a8 -size 143782 +oid sha256:90099403e32d25b5460759e3023f706c1aa0b56e35c57829c391e92ffba01ad2 +size 134836 diff --git a/Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Private/AIBaseManager.cpp b/Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Private/AIBaseManager.cpp index 33e68a8..c1d3dcb 100644 --- a/Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Private/AIBaseManager.cpp +++ b/Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Private/AIBaseManager.cpp @@ -17,6 +17,8 @@ void UAIBaseManager::InitAIManager(UAIBaseConfig* AIConfig, bool DebugMode, AAct CurrentConfig = AIConfig; CurrentConfig->AddToRoot(); + UAIBaseManager::AddRepeatSystemInstruction(); + // Store the reference actor for world context WorldReferenceActor = InWorldReferenceActor; @@ -122,7 +124,7 @@ void UAIBaseManager::SetNewState(EAvatarCoreAIState NewState, bool ForceState) } } -void UAIBaseManager::SendResponse(const FString& Response, bool NotifyDelay = false, FString OverrideInstruction = "") +void UAIBaseManager::SendResponse(const FString& Response, bool NotifyDelay = false, bool TriggerResponse = true) { AnswerCache.Empty(); ResponseID++; @@ -144,10 +146,10 @@ void UAIBaseManager::RepeatText(FString TextToRepeat, bool DoRephrase) ResponseID++; FString Instruction; if (DoRephrase) - Instruction = "Repeat the text in your own words."; + Instruction = "Repeat the text in your own words: " + TextToRepeat; else - Instruction = "Repeat the text exactly word for word."; - SendResponse(TextToRepeat, false, Instruction); + Instruction = "[REPEAT] " + TextToRepeat; + SendResponse(Instruction, false, true); } void UAIBaseManager::ClearAI() @@ -378,36 +380,100 @@ void UAIBaseManager::OnAIResponse(const FString& Chunk, bool IsFinal) } } -void UAIBaseManager::AddSystemInstruction(const FName Name, const FString NewSystemInstruction) +void UAIBaseManager::AddSystemInstruction(const FName Name, const FString NewSystemInstruction, bool AddAsFirst = false) { UAIBaseManager::RemoveSystemInstruction(Name); FSystemInstruction tmpEntry; tmpEntry.Name = Name; tmpEntry.Instruction = NewSystemInstruction; - CurrentConfig->SystemPromps.Add(tmpEntry); + + if(AddAsFirst) + { + TArray tmpSystemPrompts; + tmpSystemPrompts.Add(tmpEntry); + tmpSystemPrompts.Append(CurrentConfig->SystemPrompts); + CurrentConfig->SystemPrompts = tmpSystemPrompts; + } + else + CurrentConfig->SystemPrompts.Add(tmpEntry); + BroadcastAILog(FString::Printf(TEXT("AI Manager added System Instruction %s"), *Name.ToString())); } void UAIBaseManager::ClearAllSystemInstructios() { - CurrentConfig->SystemPromps.Empty(); + CurrentConfig->SystemPrompts.Empty(); BroadcastAILog(FString::Printf(TEXT("AI Manager wiped all System Instructions"))); + UAIBaseManager::AddRepeatSystemInstruction(); +} + +void UAIBaseManager::AddRepeatSystemInstruction() +{ + UAIBaseManager::AddSystemInstruction(TEXT("Repeat Text"), TEXT("If the text starts with [REPEAT], repeat the text exactly word for word."), true); } void UAIBaseManager::RemoveSystemInstruction(const FName Name) { // Iterate in reverse to safely remove while iterating - for (int32 i = CurrentConfig->SystemPromps.Num() - 1; i >= 0; --i) + for (int32 i = CurrentConfig->SystemPrompts.Num() - 1; i >= 0; --i) { - if (CurrentConfig->SystemPromps[i].Name == Name) + if (CurrentConfig->SystemPrompts[i].Name == Name) { - CurrentConfig->SystemPromps.RemoveAt(i); + CurrentConfig->SystemPrompts.RemoveAt(i); BroadcastAILog(FString::Printf(TEXT("AI Manager removed System Instruction %s"), *Name.ToString())); } } } +FString UAIBaseManager::GetSystemInstructionPromptString(bool AsJsonString = false) +{ + FString prompt; + + if(AsJsonString) + { + TArray> JsonArray; + + for (const FSystemInstruction& Item : CurrentConfig->SystemPrompts) + { + if (!Item.Instruction.IsEmpty()) + { + // Each entry: { "Name": "Instruction" } + TSharedPtr Obj = MakeShared(); + Obj->SetStringField(Item.Name.ToString(), Item.Instruction); + JsonArray.Add(MakeShared(Obj)); + } + } + + // Add a fallback if no entries exist + if (JsonArray.Num() == 0) + { + TSharedPtr DefaultObj = MakeShared(); + DefaultObj->SetStringField(TEXT("Default"), TEXT("Just do exactly what the user wants you to do.")); + JsonArray.Add(MakeShared(DefaultObj)); + } + + // 2) Serialize array into a single JSON string + TSharedRef> Writer = TJsonWriterFactory<>::Create(&prompt); + FJsonSerializer::Serialize(JsonArray, Writer); + } + else + { + for (int32 Index = 0; Index < CurrentConfig->SystemPrompts.Num(); ++Index) + { + FSystemInstruction& Item = CurrentConfig->SystemPrompts[Index]; + if (!Item.Instruction.IsEmpty()) + { + prompt += "# " + Item.Name.ToString() + TEXT("\r\n"); + prompt += Item.Instruction; + if(Index < CurrentConfig->SystemPrompts.Num() - 1) + prompt += TEXT("\r\n\r\n"); + } + } + } + return prompt; +} + void UAIBaseManager::ResetRequestTimeout() { UAIBaseManager::ClearRequestTimeout(); diff --git a/Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Private/RealtimeAPI/AvatarCoreAIRealtime.cpp b/Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Private/RealtimeAPI/AvatarCoreAIRealtime.cpp index ad33890..c960702 100644 --- a/Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Private/RealtimeAPI/AvatarCoreAIRealtime.cpp +++ b/Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Private/RealtimeAPI/AvatarCoreAIRealtime.cpp @@ -37,15 +37,10 @@ void UAvatarCoreAIRealtime::DeactivateAI() UAIBaseManager::DeactivateAI(); } -void UAvatarCoreAIRealtime::SendResponse(const FString& Response, bool NotifyDelay, FString OverrideInstruction) +void UAvatarCoreAIRealtime::SendResponse(const FString& Response, bool NotifyDelay = false, bool TriggerResponse = true) { - UAIBaseManager::SendResponse(Response, NotifyDelay, OverrideInstruction); - FString RequestWithInstruction; - if (OverrideInstruction.IsEmpty()) - RequestWithInstruction = Response; - else - RequestWithInstruction = OverrideInstruction + ": " + Response; - UAvatarCoreAIRealtime::CreateConversationItem(RequestWithInstruction, EOpenAIRoleType::User, true); + UAIBaseManager::SendResponse(Response, NotifyDelay, TriggerResponse); + UAvatarCoreAIRealtime::CreateConversationItem(Response, EOpenAIRoleType::User, TriggerResponse); } void UAvatarCoreAIRealtime::ClearAI() @@ -73,35 +68,7 @@ void UAvatarCoreAIRealtime::UpdateSession() // Build session object TSharedPtr SessionObj = MakeShareable(new FJsonObject); - FString InstructionsJsonString; - - // 1) Build an array of JSON objects, each having one field: Name -> Instruction - { - TArray> JsonArray; - - for (const FSystemInstruction& Item : CurrentConfig->SystemPromps) - { - if (!Item.Instruction.IsEmpty()) - { - // Each entry: { "Name": "Instruction" } - TSharedPtr Obj = MakeShared(); - Obj->SetStringField(Item.Name.ToString(), Item.Instruction); - JsonArray.Add(MakeShared(Obj)); - } - } - - // Add a fallback if no entries exist - if (JsonArray.Num() == 0) - { - TSharedPtr DefaultObj = MakeShared(); - DefaultObj->SetStringField(TEXT("Default"), TEXT("Just do exactly what the user wants you to do.")); - JsonArray.Add(MakeShared(DefaultObj)); - } - - // 2) Serialize array into a single JSON string - TSharedRef> Writer = TJsonWriterFactory<>::Create(&InstructionsJsonString); - FJsonSerializer::Serialize(JsonArray, Writer); - } + FString InstructionsString = UAIBaseManager::GetSystemInstructionPromptString(false); if (RealtimeConfig->AIModelAudioOutput) Modalities.Add(TEXT("audio")); @@ -114,7 +81,7 @@ void UAvatarCoreAIRealtime::UpdateSession() }()); // 3) Store serialized JSON array as a *string* field in SessionObj - SessionObj->SetStringField(TEXT("instructions"), InstructionsJsonString); + SessionObj->SetStringField(TEXT("instructions"), InstructionsString); SessionObj->SetStringField(TEXT("type"), TEXT("realtime")); diff --git a/Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Public/AIBaseConfig.h b/Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Public/AIBaseConfig.h index eabcd2b..546d7de 100644 --- a/Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Public/AIBaseConfig.h +++ b/Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Public/AIBaseConfig.h @@ -26,7 +26,7 @@ public: // All those neat little system prompts that make our avatars sooo great UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreAI|Settings", meta = (ExposeOnSpawn = "true")) - TArray SystemPromps; + TArray SystemPrompts; UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreAI|MCP", meta = (ExposeOnSpawn = "true")) bool bUseMCPServer = true; diff --git a/Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Public/AIBaseManager.h b/Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Public/AIBaseManager.h index 1115724..2bcba76 100644 --- a/Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Public/AIBaseManager.h +++ b/Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Public/AIBaseManager.h @@ -97,7 +97,7 @@ public: * Send Response/Question to the AI Model. If NotifyDelay is true call the DelayedAnswer Event when time defined in AIConfig has passed. */ UFUNCTION(BlueprintCallable, Category = "AvatarCoreAI") - virtual void SendResponse(const FString& Response, bool NotifyDelay, FString OverrideInstruction); + virtual void SendResponse(const FString& Response, bool NotifyDelay, bool TriggerResponse); /** * Make the AI Model repeat the Text. @@ -132,21 +132,31 @@ public: /** * Add a new system instruction by name. */ - UFUNCTION(BlueprintCallable, Category = "AvatarCoreAI") - void AddSystemInstruction(const FName Name, const FString NewSystemInstruction); + UFUNCTION(BlueprintCallable, Category = "AvatarCoreAI|System Instruction") + void AddSystemInstruction(const FName Name, const FString NewSystemInstruction, bool AddAsFirst); /** * Remove a system instruction by name. */ - UFUNCTION(BlueprintCallable, Category = "AvatarCoreAI") + UFUNCTION(BlueprintCallable, Category = "AvatarCoreAI|System Instruction") void RemoveSystemInstruction(const FName Name); + /** + * Parse to System Prompt + */ + UFUNCTION(BlueprintCallable, Category = "AvatarCoreAI|System Instruction") + FString GetSystemInstructionPromptString(bool AsJsonString); + /** * Clear all System Instruction. */ - UFUNCTION(BlueprintCallable, Category = "AvatarCoreAI") + UFUNCTION(BlueprintCallable, Category = "AvatarCoreAI|System Instruction") void ClearAllSystemInstructios(); + // Add the prompt that let the avatar repeat what we want it to say + UFUNCTION(BlueprintCallable, Category = "AvatarCoreAI|System Instruction") + void AddRepeatSystemInstruction(); + /** * Timeout Handling */ diff --git a/Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Public/RealtimeAPI/AvatarCoreAIRealtime.h b/Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Public/RealtimeAPI/AvatarCoreAIRealtime.h index 235135e..c3946ae 100644 --- a/Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Public/RealtimeAPI/AvatarCoreAIRealtime.h +++ b/Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Public/RealtimeAPI/AvatarCoreAIRealtime.h @@ -156,7 +156,7 @@ public: void ActivateAI() override; void DeactivateAI() override; void UpdateSession() override; - void SendResponse(const FString& Response, bool NotifyDelay = false, FString OverrideInstruction = "") override; + void SendResponse(const FString& Response, bool NotifyDelay, bool TriggerResponse) override; void ClearAI() override; void ConnectToWebSocket(); diff --git a/Unreal/Plugins/AvatarCore_Manager/Content/AvatarCoreManager.uasset b/Unreal/Plugins/AvatarCore_Manager/Content/AvatarCoreManager.uasset index f0b2ca9..38da8b4 100644 --- a/Unreal/Plugins/AvatarCore_Manager/Content/AvatarCoreManager.uasset +++ b/Unreal/Plugins/AvatarCore_Manager/Content/AvatarCoreManager.uasset @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b99eab0b83063646cb668bd66f6d5dab77be92e8e5782511f0a101281fc245f5 -size 1626086 +oid sha256:70ebf66e06a334b073c9150f3caae380ac366b5cd027530881f4aaf339b273f1 +size 1628877 diff --git a/Unreal/Plugins/AvatarCore_Manager/Content/Widgets/Debug/Pages/W_DebugAvatarCoreAI.uasset b/Unreal/Plugins/AvatarCore_Manager/Content/Widgets/Debug/Pages/W_DebugAvatarCoreAI.uasset index 8a29a0e..e8f50a6 100644 --- a/Unreal/Plugins/AvatarCore_Manager/Content/Widgets/Debug/Pages/W_DebugAvatarCoreAI.uasset +++ b/Unreal/Plugins/AvatarCore_Manager/Content/Widgets/Debug/Pages/W_DebugAvatarCoreAI.uasset @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dc2193098aaa0e3c5bb8d00a711cefa678557da9aabfc1d22ae58bf6a8ddad0a -size 158885 +oid sha256:b5d7ed7cde4efecb4c39245638a0f971b4c8457dfa576f2b7f8e8a6071177ef5 +size 154135 diff --git a/Unreal/Plugins/AvatarCore_Manager/Content/Widgets/Debug/Pages/W_DebugAvatarCoreSTT.uasset b/Unreal/Plugins/AvatarCore_Manager/Content/Widgets/Debug/Pages/W_DebugAvatarCoreSTT.uasset index 3cf3fe9..b16fe4c 100644 --- a/Unreal/Plugins/AvatarCore_Manager/Content/Widgets/Debug/Pages/W_DebugAvatarCoreSTT.uasset +++ b/Unreal/Plugins/AvatarCore_Manager/Content/Widgets/Debug/Pages/W_DebugAvatarCoreSTT.uasset @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:92ffb24e377a5502803cf1223b5f3924758469a4129b5d5d674dcf43b25f6be9 -size 202187 +oid sha256:9b729898a8d727e8d5304169e98a085b89a3bf4c8e2be35fcac86ec991425963 +size 216800 diff --git a/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Processor/Azure/AzureRunnable.cpp b/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Processor/Azure/AzureRunnable.cpp index 982f83c..2c1f921 100644 --- a/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Processor/Azure/AzureRunnable.cpp +++ b/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Processor/Azure/AzureRunnable.cpp @@ -79,10 +79,25 @@ bool FAzureRunnable::Init() // Bind Recognized event for final results Recognizer->Recognized.Connect([WeakOwner](const auto& EventArgs) { FString RecognizedText = UTF8_TO_TCHAR(EventArgs.Result->Text.c_str()); - AsyncTask(ENamedThreads::GameThread, [WeakOwner, RecognizedText]() { + FString DetectedLangUE = TEXT("LANGUAGE_NOT_DETECTED"); + + try + { + auto LangResult = SpeechSDK::AutoDetectSourceLanguageResult::FromResult(EventArgs.Result); + if (LangResult && !LangResult->Language.empty()) + { + DetectedLangUE = UTF8_TO_TCHAR(LangResult->Language.c_str()); + } + } + catch (...) + { + // Keep default LANGUAGE_NOT_DETECTED + } + + AsyncTask(ENamedThreads::GameThread, [WeakOwner, RecognizedText, DetectedLangUE]() { if (WeakOwner.IsValid() && WeakOwner.Get() != nullptr) { - WeakOwner->OnRecognized(RecognizedText); + WeakOwner->OnRecognized(RecognizedText, DetectedLangUE); } }); }); diff --git a/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Processor/Azure/STTProcessorAzure.cpp b/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Processor/Azure/STTProcessorAzure.cpp index 10231eb..3974866 100644 --- a/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Processor/Azure/STTProcessorAzure.cpp +++ b/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Processor/Azure/STTProcessorAzure.cpp @@ -148,11 +148,13 @@ void USTTProcessorAzure::OnRecognizing(const FString& RecognizedText) USTTProcessorBase::OnTranscriptionIntermediateResult(TranscriptionCounter, *intermediateResult); } -void USTTProcessorAzure::OnRecognized(const FString& RecognizedText) +void USTTProcessorAzure::OnRecognized(const FString& RecognizedText, const FString& Language) { if (IsValid(STTManager) && STTManager->IsBlocked()) return; + this->DetectedLanguage = Language; + if (!intermediateResult.IsEmpty()) intermediateResult += " " + RecognizedText; else @@ -161,7 +163,7 @@ void USTTProcessorAzure::OnRecognized(const FString& RecognizedText) USTTProcessorBase::OnTranscriptionIntermediateResult(TranscriptionCounter, *intermediateResult); } else { - USTTProcessorBase::OnTranscriptionResult(TranscriptionCounter, *intermediateResult); + USTTProcessorBase::OnTranscriptionResult(TranscriptionCounter, *intermediateResult, this->DetectedLanguage); intermediateResult.Empty(); } } @@ -179,7 +181,7 @@ void USTTProcessorAzure::OnRunnableEnded() bTranscriptionRunning = false; if (!intermediateResult.IsEmpty()) { - STTManager->OnTranscriptionReceived.Broadcast(TranscriptionCounter, *intermediateResult); + STTManager->OnTranscriptionReceived.Broadcast(TranscriptionCounter, *intermediateResult, this->DetectedLanguage); intermediateResult.Empty(); } diff --git a/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Processor/STTProcessorBase.cpp b/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Processor/STTProcessorBase.cpp index da01c66..4f46495 100644 --- a/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Processor/STTProcessorBase.cpp +++ b/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Processor/STTProcessorBase.cpp @@ -11,13 +11,13 @@ bool USTTProcessorBase::IsTranscriptionRunning() return bTranscriptionRunning; } -void USTTProcessorBase::OnTranscriptionResult(int32 TranscriptionID, FString TranscriptionResult) +void USTTProcessorBase::OnTranscriptionResult(int32 TranscriptionID, FString TranscriptionResult, FString Language) { bTranscriptionRunning = false; if (IsValid(STTManager)) { STTManager->UserSpeechStateChanged(ESTTTalkingState::SILENCE); - STTManager->SendTranscription(TranscriptionID, TranscriptionResult); + STTManager->SendTranscription(TranscriptionID, TranscriptionResult, Language); } } diff --git a/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Processor/Whisper/STTProcessorWhisper.cpp b/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Processor/Whisper/STTProcessorWhisper.cpp index 11775fe..5dbc608 100644 --- a/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Processor/Whisper/STTProcessorWhisper.cpp +++ b/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Processor/Whisper/STTProcessorWhisper.cpp @@ -269,10 +269,19 @@ void USTTProcessorWhisper::BuildMultipartBody(const TArray& WavData, cons AppendStringToBody(OutBody, TEXT("Content-Disposition: form-data; name=\"model\"\r\n\r\n")); AppendStringToBody(OutBody, TranscribeModelEnumToString(WhisperProcessorConfig->Model) + TEXT("\r\n")); + if (WhisperProcessorConfig->Model == EOpenAITranscriptionModel::Whisper1) + { + BoundaryLine = FString::Printf(TEXT("--%s\r\n"), *Boundary); + AppendStringToBody(OutBody, BoundaryLine); + AppendStringToBody(OutBody, TEXT("Content-Disposition: form-data; name=\"response_format\"\r\n\r\n")); + AppendStringToBody(OutBody, TEXT("verbose_json\r\n")); + } + if (!Prompt.IsEmpty()) { BoundaryLine = FString::Printf(TEXT("--%s\r\n"), *Boundary); AppendStringToBody(OutBody, BoundaryLine); + AppendStringToBody(OutBody, TEXT("Content-Disposition: form-data; name=\"prompt\"\r\n\r\n")); AppendStringToBody(OutBody, Prompt + TEXT("\r\n")); } @@ -432,7 +441,15 @@ void USTTProcessorWhisper::SendWhisperRequest(TArray&& WavData) return; } - Self->OnTranscriptionResult(TranscriptionId, Text); + UE_LOG(LogTemp, Warning, TEXT("OpenAI says: %s"), *JsonString); + + FString Language; + if (RootObject->TryGetStringField(TEXT("language"), Language) && !Language.IsEmpty()) + { + Self->DetectedLanguage = Language; + } + + Self->OnTranscriptionResult(TranscriptionId, Text, Self->DetectedLanguage); }); ActiveRequests.Add(RequestPtr); diff --git a/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/STTManagerBase.cpp b/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/STTManagerBase.cpp index 607b147..47c371e 100644 --- a/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/STTManagerBase.cpp +++ b/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/STTManagerBase.cpp @@ -226,9 +226,9 @@ float USTTManagerBase::GetRemainingTalkingTimeFactor() return 1; } -void USTTManagerBase::DebugSTTInput(FString DebugSentence) +void USTTManagerBase::DebugSTTInput(FString DebugSentence, FString Language = "de") { - OnTranscriptionReceived.Broadcast(0, DebugSentence); + OnTranscriptionReceived.Broadcast(0, DebugSentence, Language); } void USTTManagerBase::PTTStateChanged(bool BtnPressed) @@ -325,9 +325,9 @@ void USTTManagerBase::SendTranscriptionChunk(uint32 TranscriptionID, FString con OnTranscriptionChunkReceived.Broadcast(TranscriptionID, USTTManagerBase::CheckForWordReplacement(content)); } -void USTTManagerBase::SendTranscription(uint32 TranscriptionID, FString content) +void USTTManagerBase::SendTranscription(uint32 TranscriptionID, FString content, FString Language) { - OnTranscriptionReceived.Broadcast(TranscriptionID, USTTManagerBase::CheckForWordReplacement(content)); + OnTranscriptionReceived.Broadcast(TranscriptionID, USTTManagerBase::CheckForWordReplacement(content), Language); } void USTTManagerBase::SendUIStateChanged() diff --git a/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Processor/Azure/STTProcessorAzure.h b/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Processor/Azure/STTProcessorAzure.h index 133df41..081bf1f 100644 --- a/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Processor/Azure/STTProcessorAzure.h +++ b/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Processor/Azure/STTProcessorAzure.h @@ -52,7 +52,7 @@ private: public: void OnRecognizing(const FString& RecognizedText); - void OnRecognized(const FString& RecognizedText); + void OnRecognized(const FString& RecognizedText, const FString& Language); UFUNCTION() void OnConnectionSuccess(); diff --git a/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Processor/STTProcessorBase.h b/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Processor/STTProcessorBase.h index a802b16..32377e9 100644 --- a/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Processor/STTProcessorBase.h +++ b/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Processor/STTProcessorBase.h @@ -33,7 +33,7 @@ public: bool IsTranscriptionRunning(); UFUNCTION() - void OnTranscriptionResult(int32 TranscriptionID, FString TranscriptionResult); + void OnTranscriptionResult(int32 TranscriptionID, FString TranscriptionResult, FString Language); UFUNCTION() void OnTranscriptionIntermediateResult(int32 TranscriptionID, FString TranscriptionIntermediateResult); @@ -52,4 +52,6 @@ protected: bool bTranscriptionRunning = false; int32 TranscriptionCounter = 0; + FString DetectedLanguage = "LANGUAGE_NOT_DETECTED"; + }; diff --git a/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/STTManagerBase.h b/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/STTManagerBase.h index b459f74..c8ec57d 100644 --- a/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/STTManagerBase.h +++ b/Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/STTManagerBase.h @@ -13,7 +13,7 @@ DECLARE_DYNAMIC_MULTICAST_DELEGATE_TwoParams(FMulticastDelegateTranscriptionChunkReceived, int32, TranscribeID, FString, Content); -DECLARE_DYNAMIC_MULTICAST_DELEGATE_TwoParams(FMulticastDelegateTranscriptionReceived, int32, TranscribeID, FString, Content); +DECLARE_DYNAMIC_MULTICAST_DELEGATE_ThreeParams(FMulticastDelegateTranscriptionReceived, int32, TranscribeID, FString, Content, FString, Language); DECLARE_DYNAMIC_MULTICAST_DELEGATE_OneParam(FMulticastDelegateSpeechStateChanged, ESTTTalkingState, TalkingState); DECLARE_DYNAMIC_MULTICAST_DELEGATE_OneParam(FMulticastDelegateSpeechStateChangedForUI, ESTTTalkingState, TalkingState); DECLARE_DYNAMIC_MULTICAST_DELEGATE_OneParam(FMulticastDelegateSTTBlocked, bool, IsBlocked); @@ -119,7 +119,7 @@ public: float GetRemainingTalkingTimeFactor(); UFUNCTION(BlueprintCallable, Category = "AvatarCoreSTT") - void DebugSTTInput(FString DebugSentence); + void DebugSTTInput(FString DebugSentence, FString Language); UFUNCTION(BlueprintCallable, Category = "AvatarCoreSTT") void PTTStateChanged(bool BtnPressed); @@ -143,7 +143,7 @@ public: void SendTranscriptionChunk(uint32 TranscriptionID, FString content); UFUNCTION() - void SendTranscription(uint32 TranscriptionID, FString content); + void SendTranscription(uint32 TranscriptionID, FString content, FString Language); UFUNCTION() void SendUIStateChanged();