Browse Source

Matched with Base

master
Tillman Staffen 4 weeks ago
parent
commit
fb4503ea8e
  1. 1
      Unreal/Config/DefaultGame.ini
  2. BIN
      Unreal/Content/Project/AnimationTesting/BP_AnimationTesting_Manager.uasset
  3. BIN
      Unreal/Content/Project/AnimationTesting/Data/E_AnimationTesting_Avatars.uasset
  4. BIN
      Unreal/Content/Project/AnimationTesting/M_Animation_Testing.umap
  5. BIN
      Unreal/Content/Project/AnimationTesting/Materials/MI_ProcGrid_AnimMap.uasset
  6. BIN
      Unreal/Content/Project/BP/Avatars/Avatar_Ben_BREX.uasset
  7. BIN
      Unreal/Content/Project/BP/BP_Project_Manager.uasset
  8. BIN
      Unreal/Content/Project/BP/EnumsAndStructs/S_ConfigSettings.uasset
  9. BIN
      Unreal/Content/Project/Maps/M_Startup.umap
  10. BIN
      Unreal/Content/Project/Materials/2D_Environment/MM_2D_Environment.uasset
  11. 33
      Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/AvatarCore_AI.Build.cs
  12. 297
      Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Private/AIBaseManager.cpp
  13. 55
      Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Private/AvatarCore_AI.cpp
  14. 37
      Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Private/MCP/MCPUnrealCommand.cpp
  15. 570
      Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Private/OpenRouter/AvatarCoreAIOpenRouter.cpp
  16. 96
      Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Private/RealtimeAPI/AvatarCoreAIRealtime.cpp
  17. 77
      Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Public/AIBaseConfig.h
  18. 69
      Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Public/AIBaseManager.h
  19. 9
      Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Public/AvatarCoreAIEnumsAndStructs.h
  20. 5
      Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Public/AvatarCore_AI.h
  21. 2
      Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Public/MCP/FastMCP/FastMCPConfig.h
  22. 25
      Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Public/MCP/MCPUnrealCommand.h
  23. 74
      Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Public/OpenRouter/AvatarCoreAIOpenRouter.h
  24. 7
      Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Public/OpenRouter/OpenRouterConfig.cpp
  25. 47
      Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Public/OpenRouter/OpenRouterConfig.h
  26. 5
      Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Public/RealtimeAPI/AvatarCoreAIRealtime.h
  27. 43
      Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Public/RealtimeAPI/RealtimeAPIConfig.h
  28. 1
      Unreal/Plugins/AvatarCore_AI/Source/ThirdParty/MCPServer/FastMCP/.gitignore
  29. 5
      Unreal/Plugins/AvatarCore_AI/Source/ThirdParty/MCPServer/FastMCP/AddDocumentsToDatabase.bat
  30. 68
      Unreal/Plugins/AvatarCore_AI/Source/ThirdParty/MCPServer/FastMCP/AddDocumentsToDatabase.py
  31. 142
      Unreal/Plugins/AvatarCore_AI/Source/ThirdParty/MCPServer/FastMCP/FastMCPServer.bat
  32. 41
      Unreal/Plugins/AvatarCore_AI/Source/ThirdParty/MCPServer/FastMCP/FastMCPServer.py
  33. 4
      Unreal/Plugins/AvatarCore_AI/Source/ThirdParty/MCPServer/FastMCP/StartPythonVenv.bat
  34. 5
      Unreal/Plugins/AvatarCore_AI/Source/ThirdParty/MCPServer/FastMCP/TestSearchDatabase.bat
  35. 64
      Unreal/Plugins/AvatarCore_AI/Source/ThirdParty/MCPServer/FastMCP/TestSearchDatabase.py
  36. 9
      Unreal/Plugins/AvatarCore_AI/Source/ThirdParty/MCPServer/FastMCP/WipeDatabase.bat
  37. 370
      Unreal/Plugins/AvatarCore_AI/Source/ThirdParty/MCPServer/FastMCP/document_vectordb.py
  38. 32
      Unreal/Plugins/AvatarCore_AI/Source/ThirdParty/MCPServer/FastMCP/requirements.txt
  39. BIN
      Unreal/Plugins/AvatarCore_AI/Source/ThirdParty/MCPServer/FastMCP_ForContentFolder.zip
  40. BIN
      Unreal/Plugins/AvatarCore_Manager/Content/AvatarCoreManager.uasset
  41. BIN
      Unreal/Plugins/AvatarCore_Manager/Content/StateManagement/States/BP_Configurable_QnA_State.uasset
  42. BIN
      Unreal/Plugins/AvatarCore_Manager/Content/Widgets/Debug/Modules/W_AvatarCoreModuleEntry.uasset
  43. BIN
      Unreal/Plugins/AvatarCore_Manager/Content/Widgets/Debug/Pages/W_DebugAvatarCoreSTT.uasset
  44. BIN
      Unreal/Plugins/AvatarCore_Manager/Content/Widgets/Debug/Pages/W_DebugAvatarCoreTTS.uasset
  45. BIN
      Unreal/Plugins/AvatarCore_Manager/Content/Widgets/StartupScreen/W_AvatarCoreStartupScreen.uasset
  46. 141
      Unreal/Plugins/AvatarCore_Manager/Source/AvatarCore_Manager/Private/FL_AvatarCoreManager.cpp
  47. 64
      Unreal/Plugins/AvatarCore_Manager/Source/AvatarCore_Manager/Public/AvatarCore_ManagerEnums.h
  48. 8
      Unreal/Plugins/AvatarCore_Manager/Source/AvatarCore_Manager/Public/FL_AvatarCoreManager.h
  49. BIN
      Unreal/Plugins/AvatarCore_MetaHuman/Content/Animation/AnimBPs/AvatarCore_AnimInst_BodyForRetarget.uasset
  50. BIN
      Unreal/Plugins/AvatarCore_MetaHuman/Content/Animation/AnimBPs/AvatarCore_AnimInst_Face.uasset
  51. BIN
      Unreal/Plugins/AvatarCore_MetaHuman/Content/BP/MetaHuman/BaseAvatar.uasset
  52. BIN
      Unreal/Plugins/AvatarCore_MetaHuman/Content/Materials/MI_GrayTexture_Body_Cascadeur.uasset
  53. BIN
      Unreal/Plugins/AvatarCore_MetaHuman/Content/Materials/MI_GrayTexture_Head.uasset
  54. BIN
      Unreal/Plugins/AvatarCore_MetaHuman/Content/Materials/MI_GrayTexture_Head_Cascadeur.uasset
  55. BIN
      Unreal/Plugins/AvatarCore_MetaHuman/Content/Materials/M_GrayTexture_Body.uasset
  56. BIN
      Unreal/Plugins/AvatarCore_MetaHuman/Content/Materials/M_GrayTexture_Head.uasset
  57. 147
      Unreal/Plugins/AvatarCore_STT/CLAUDE.md
  58. BIN
      Unreal/Plugins/AvatarCore_STT/Content/Preprocessor/STTPreprocessor250ms.uasset
  59. 8
      Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/AvatarCore_STT.Build.cs
  60. 62
      Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Preprocessor/STTPreprocessorBuffer.cpp
  61. 14
      Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Preprocessor/STTPreprocessorConverter.cpp
  62. 4
      Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Preprocessor/STTPreprocessorDebugger.cpp
  63. 26
      Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Preprocessor/STTPreprocessorPTT.cpp
  64. 12
      Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Preprocessor/STTPreprocessorSpeexDSP.cpp
  65. 39
      Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Preprocessor/STTPreprocessorVAD.cpp
  66. 9
      Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Preprocessor/STTPreprocessorWebRTC.cpp
  67. 11
      Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Processor/Azure/AzureRunnable.cpp
  68. 151
      Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Processor/Azure/STTProcessorAzure.cpp
  69. 103
      Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Processor/Parakeet/STTParakeetProcessorBase.cpp
  70. 5
      Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Processor/RealtimeAPI/STTProcessorRealtimeAPI.cpp
  71. 7
      Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Processor/STTProcessorDebugSaveWav.cpp
  72. 88
      Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Processor/Whisper/STTProcessorWhisper.cpp
  73. 2
      Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Recorder/STTRecorderAudioData.cpp
  74. 8
      Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Recorder/STTRecorderDebugFile.cpp
  75. 2
      Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Recorder/STTRecorderMicrophone.cpp
  76. 2
      Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Recorder/STTRecorderPrimaryMicrophone.cpp
  77. 2
      Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Recorder/STTRecorderUnrealMicrophone.cpp
  78. 12
      Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/STTManagerBase.cpp
  79. 4
      Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Preprocessor/STTPreprocessorBase.h
  80. 17
      Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Preprocessor/STTPreprocessorBuffer.h
  81. 2
      Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Preprocessor/STTPreprocessorConverter.h
  82. 2
      Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Preprocessor/STTPreprocessorDebugger.h
  83. 7
      Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Preprocessor/STTPreprocessorPTT.h
  84. 2
      Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Preprocessor/STTPreprocessorSpeexDSP.h
  85. 3
      Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Preprocessor/STTPreprocessorVAD.h
  86. 3
      Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Preprocessor/STTPreprocessorWebRTC.h
  87. 23
      Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Processor/Azure/STTAzureProcessorConfig.h
  88. 12
      Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Processor/Azure/STTProcessorAzure.h
  89. 7
      Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Processor/Parakeet/STTParakeetProcessorBase.h
  90. 3
      Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Processor/Parakeet/STTParakeetProcessorConfig.h
  91. 2
      Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Processor/RealtimeAPI/STTProcessorRealtimeAPI.h
  92. 7
      Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Processor/STTBaseProcessorConfig.h
  93. 2
      Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Processor/STTProcessorBase.h
  94. 2
      Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Processor/STTProcessorDebugSaveWav.h
  95. 7
      Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Processor/Whisper/STTProcessorWhisper.h
  96. 2
      Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Recorder/STTRecorderBase.h
  97. 7
      Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/STTManagerBase.h
  98. 62
      Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/STTStructs.h
  99. 70
      Unreal/Plugins/AvatarCore_TTS/Source/AvatarCore_TTS/Private/Cartesia/CartesiaTTSManager.cpp
  100. 4
      Unreal/Plugins/AvatarCore_TTS/Source/AvatarCore_TTS/Private/Cartesia/TTSCartesiaConfig.cpp

1
Unreal/Config/DefaultGame.ini

@ -118,6 +118,7 @@ bSkipMovies=False
+DirectoriesToAlwaysStageAsUFS=(Path="Schema")
+DirectoriesToAlwaysStageAsNonUFS=(Path="Schema")
+DirectoriesToAlwaysStageAsNonUFS=(Path="Certificates")
+DirectoriesToAlwaysStageAsNonUFS=(Path="DB")
bRetainStagedDirectory=False
CustomStageCopyHandler=

BIN
Unreal/Content/Project/AnimationTesting/BP_AnimationTesting_Manager.uasset (Stored with Git LFS)

Binary file not shown.

BIN
Unreal/Content/Project/AnimationTesting/Data/E_AnimationTesting_Avatars.uasset (Stored with Git LFS)

Binary file not shown.

BIN
Unreal/Content/Project/AnimationTesting/M_Animation_Testing.umap (Stored with Git LFS)

Binary file not shown.

BIN
Unreal/Content/Project/AnimationTesting/Materials/MI_ProcGrid_AnimMap.uasset (Stored with Git LFS)

Binary file not shown.

BIN
Unreal/Content/Project/BP/Avatars/Avatar_Ben_BREX.uasset (Stored with Git LFS)

Binary file not shown.

BIN
Unreal/Content/Project/BP/BP_Project_Manager.uasset (Stored with Git LFS)

Binary file not shown.

BIN
Unreal/Content/Project/BP/EnumsAndStructs/S_ConfigSettings.uasset (Stored with Git LFS)

Binary file not shown.

BIN
Unreal/Content/Project/Maps/M_Startup.umap (Stored with Git LFS)

Binary file not shown.

BIN
Unreal/Content/Project/Materials/2D_Environment/MM_2D_Environment.uasset (Stored with Git LFS)

Binary file not shown.

33
Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/AvatarCore_AI.Build.cs

@ -55,36 +55,17 @@ public class AvatarCore_AI : ModuleRules
// ... add private dependencies that you statically link with here ...
}
);
DynamicallyLoadedModuleNames.AddRange(
// Will only be added in Editor Build
if(Target.bBuildEditor)
PrivateDependencyModuleNames.Add("DeveloperToolSettings");
DynamicallyLoadedModuleNames.AddRange(
new string[]
{
// ... add any modules that your module loads dynamically here ...
}
);
// Package MCPServer folder with shipping builds
string MCPServerPath = Path.Combine(ModuleDirectory, "..", "ThirdParty", "MCPServer");
if (Directory.Exists(MCPServerPath))
{
// Add all files in the MCPServer directory recursively
foreach (string FilePath in Directory.GetFiles(MCPServerPath, "*", SearchOption.AllDirectories))
{
string RelativePath = Path.GetRelativePath(MCPServerPath, FilePath);
string TargetPath = "$(BinaryOutputDir)/MCPServer/" + RelativePath.Replace('\\', '/');
RuntimeDependencies.Add(TargetPath, FilePath, StagedFileType.NonUFS);
}
PublicDefinitions.Add("WITH_MCP_SERVER=1");
}
else
{
PublicDefinitions.Add("WITH_MCP_SERVER=0");
}
// Ensure ThirdParty/PiperTTS is packaged in all builds (including shipping)
string MCPPath = System.IO.Path.Combine(ModuleDirectory, "..", "ThirdParty", "MCPServer");
RuntimeDependencies.Add(System.IO.Path.Combine(MCPPath, "*.*"));
RuntimeDependencies.Add(System.IO.Path.Combine(MCPPath, "**", "*.*"));
}
}

297
Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Private/AIBaseManager.cpp

@ -27,31 +27,24 @@ void UAIBaseManager::InitAIManager(UAIBaseConfig* AIConfig, bool DebugMode, AAct
World = WorldReferenceActor->GetWorld();
}
// Add all UnrealCommands to root
// Register command classes; instances are created on-demand when a command is invoked
for (TSubclassOf<UMCPUnrealCommand> CommandClass : CurrentConfig->UnrealCommands)
{
if (CommandClass != nullptr)
if (!CommandClass) continue;
UMCPUnrealCommand* CDO = CommandClass->GetDefaultObject<UMCPUnrealCommand>();
if (CDO->GetCommandName().IsEmpty())
{
UMCPUnrealCommand* Command = NewObject<UMCPUnrealCommand>(this, CommandClass);
if (Command->GetCommandName().IsEmpty())
{
BroadcastAIError(TEXT("Command has empty CommandName!"), EAvatarCoreAIError::InvalidConfig);
continue;
}
else
{
Command->AddToRoot();
Command->InitMCPCommand(World);
UnrealCommands.Add(Command);
UnrealCommandsToolInfos.Add(Command->GetToolInfo());
}
BroadcastAIError(TEXT("Command has empty CommandName!"), EAvatarCoreAIError::InvalidConfig);
continue;
}
UnrealCommandClasses.Add(CommandClass);
UnrealCommandsToolInfos.Add(CDO->GetToolInfo());
}
this->AddToRoot();
bIsRooted = true;
if(AIConfig->bUseMCPServer && AIConfig->MCPManagerClass != nullptr)
if(AIConfig->GlobalAISettings.bUseMCPServer && AIConfig->MCPManagerClass != nullptr)
{
BroadcastAILog(TEXT("Initializing MCP Server..."), true);
@ -61,8 +54,8 @@ void UAIBaseManager::InitAIManager(UAIBaseConfig* AIConfig, bool DebugMode, AAct
// Bind to MCP events for forwarding
MCPManager->OnMCPLog.AddDynamic(this, &UAIBaseManager::OnMCPLogReceived);
MCPManager->OnMCPCommandDone.AddDynamic(this, &UAIBaseManager::CommandFinished);
MCPManager->OnMCPCommandFailed.AddDynamic(this, &UAIBaseManager::CommandFailed);
MCPManager->OnMCPCommandDone.AddDynamic(this, &UAIBaseManager::MCPCommandFinished);
MCPManager->OnMCPCommandFailed.AddDynamic(this, &UAIBaseManager::MCPCommandFailed);
MCPManager->OnMCPManagerError.AddDynamic(this, &UAIBaseManager::OnMCPErrorReceived);
MCPManager->OnMCPManagerStateChanged.AddDynamic(this, &UAIBaseManager::OnMCPStateChanged);
@ -102,15 +95,11 @@ void UAIBaseManager::DeinitAIManager()
CurrentConfig->MCPConfig->RemoveFromRoot();
}
ActiveCommands.Empty();
UnrealCommandClasses.Empty();
UnrealCommandsToolInfos.Empty();
if(CurrentConfig && CurrentConfig->IsRooted()){
// Remove all UnrealCommands from root before clearing config
for (UMCPUnrealCommand* Command : UnrealCommands)
{
if (Command && Command->IsRooted())
{
Command->RemoveFromRoot();
}
}
CurrentConfig->RemoveFromRoot();
CurrentConfig = nullptr;
}
@ -130,7 +119,7 @@ void UAIBaseManager::OnAIReady()
{
FAIMessage QueuedPrompt;
ResponseQueue.Dequeue(QueuedPrompt);
UAIBaseManager::SendResponse(QueuedPrompt, false, true);
UAIBaseManager::SendResponse(QueuedPrompt, false);
}
}
@ -154,20 +143,16 @@ void UAIBaseManager::SetNewState(EAvatarCoreAIState NewState, bool ForceState)
}
}
void UAIBaseManager::SendResponse(FAIMessage Message, bool NotifyDelay, bool TriggerResponse)
void UAIBaseManager::SendResponse(FAIMessage Message, bool NotifyDelay)
{
if (CurrentAIState != EAvatarCoreAIState::Ready && TriggerResponse) {
FAIMessage tmpPrompt;
tmpPrompt.Message = Message.Message;
tmpPrompt.Role = Message.Role;
ResponseQueue.Enqueue(tmpPrompt);
if (CurrentAIState != EAvatarCoreAIState::Ready)
{
ResponseQueue.Enqueue(Message);
if(CurrentAIState == EAvatarCoreAIState::Disconnected)
ActivateAI();
ActivateAI();
return;
}
AddMessageToArray(Message);
AnswerCache.Empty();
ResponseID++;
LastRequest = Message.Message;
@ -176,11 +161,14 @@ void UAIBaseManager::SendResponse(FAIMessage Message, bool NotifyDelay, bool Tri
ActivateAI();
return;
}
UAIBaseManager::SetNewState(EAvatarCoreAIState::Processing);
if (Message.bTriggerResponse)
UAIBaseManager::SetNewState(EAvatarCoreAIState::Processing);
BroadcastAILog(FString::Printf(TEXT("AI Manager sent question/response: %s"), *Message.Message));
if (NotifyDelay)
UAIBaseManager::StartDelayedAnswerTimer();
SendResponseChild(Message, NotifyDelay, TriggerResponse);
SendResponseChild(Message, NotifyDelay);
if(Message.Role == EAvatarCoreAIPromptRole::User || Message.Role == EAvatarCoreAIPromptRole::Tool)
AddMessageToArray(Message);
}
void UAIBaseManager::RepeatText(FString TextToRepeat, bool DoRephrase)
@ -195,7 +183,7 @@ void UAIBaseManager::RepeatText(FString TextToRepeat, bool DoRephrase)
FAIMessage tmpPrompt;
tmpPrompt.Message = Instruction;
tmpPrompt.Role = EAvatarCoreAIPromptRole::System;
SendResponse(tmpPrompt, false, true);
SendResponse(tmpPrompt, false);
}
void UAIBaseManager::ClearAI()
@ -211,21 +199,22 @@ void UAIBaseManager::ClearAI()
//Extend in Child
}
void UAIBaseManager::BroadcastAILog(const FString& Message, bool ShowAlways)
void UAIBaseManager::BroadcastAILog(const FString& Message, bool ShowAlways, bool VeryVerbose)
{
if (!bDebugMode && !ShowAlways)
return;
if (IsInGameThread())
{
OnAILog.Broadcast(Message);
OnAILog.Broadcast(Message, VeryVerbose);
}
else
{
FString Copy = Message;
AsyncTask(ENamedThreads::GameThread, [this, Copy]()
bool CopyVerbosity = VeryVerbose;
AsyncTask(ENamedThreads::GameThread, [this, Copy, CopyVerbosity]()
{
OnAILog.Broadcast(Copy);
OnAILog.Broadcast(Copy, CopyVerbosity);
});
}
}
@ -249,36 +238,27 @@ void UAIBaseManager::BroadcastAIError(const FString& ErrorMessage, EAvatarCoreAI
// Thread-safe critical section for AnswerCache
static FCriticalSection AnswerCacheCriticalSection;
void UAIBaseManager::AddUnrealCommand(UMCPUnrealCommand* Command)
{;
if (Command->GetCommandName().IsEmpty())
void UAIBaseManager::AddUnrealCommand(TSubclassOf<UMCPUnrealCommand> CommandClass)
{
if (!CommandClass) return;
UMCPUnrealCommand* CDO = CommandClass->GetDefaultObject<UMCPUnrealCommand>();
if (CDO->GetCommandName().IsEmpty())
{
BroadcastAIError(TEXT("Command has empty CommandName!"), EAvatarCoreAIError::InvalidConfig);
return;
}
else
{
UnrealCommands.Add(Command);
UnrealCommandsToolInfos.Add(Command->GetToolInfo());
if (!Command->IsRooted())
{
Command->AddToRoot();
}
}
UnrealCommandClasses.Add(CommandClass);
UnrealCommandsToolInfos.Add(CDO->GetToolInfo());
}
void UAIBaseManager::RemoveUnrealCommand(const FString& CommandName)
{
for (int32 i = UnrealCommands.Num() - 1; i >= 0; --i)
for (int32 i = UnrealCommandClasses.Num() - 1; i >= 0; --i)
{
UMCPUnrealCommand* Command = UnrealCommands[i];
if (Command && Command->GetCommandName() == CommandName)
UMCPUnrealCommand* CDO = UnrealCommandClasses[i]->GetDefaultObject<UMCPUnrealCommand>();
if (CDO && CDO->GetCommandName().Equals(CommandName, ESearchCase::IgnoreCase))
{
if (Command->IsRooted())
{
Command->RemoveFromRoot();
}
UnrealCommands.RemoveAt(i);
UnrealCommandClasses.RemoveAt(i);
UnrealCommandsToolInfos.RemoveAt(i);
break;
}
@ -300,23 +280,27 @@ TArray<FMCPToolInfo> UAIBaseManager::GetAvailableCommands()
}
}
void UAIBaseManager::RunMCPCommand(FString CommandName, FString Payload)
void UAIBaseManager::RunMCPCommand(FString CommandName, FString Payload, FString ToolCallId)
{
ClearRequestTimeout();
if (!CurrentConfig) {
BroadcastAIError(TEXT("No config loaded for RunCommand"), EAvatarCoreAIError::InvalidConfig);
return;
}
UMCPUnrealCommand* FoundUnrealCommand = nullptr;
for (UMCPUnrealCommand* Command : UnrealCommands) {
if (Command && Command->GetCommandName().ToLower().Equals(CommandName.ToLower())) {
FoundUnrealCommand = Command;
// Find the registered class matching the command name
TSubclassOf<UMCPUnrealCommand> FoundClass = nullptr;
for (TSubclassOf<UMCPUnrealCommand> CmdClass : UnrealCommandClasses)
{
if (CmdClass && CmdClass->GetDefaultObject<UMCPUnrealCommand>()->GetCommandName().Equals(CommandName, ESearchCase::IgnoreCase))
{
FoundClass = CmdClass;
break;
}
}
if (!FoundUnrealCommand && !MCPManager || !FoundUnrealCommand && !MCPManager->HasCommand(CommandName)) {
if (!FoundClass && (!MCPManager || !MCPManager->HasCommand(CommandName))) {
BroadcastAIError(FString::Printf(TEXT("Command '%s' not found in Unreal Commands or MCP"), *CommandName), EAvatarCoreAIError::MCPError);
return;
}
@ -325,36 +309,45 @@ void UAIBaseManager::RunMCPCommand(FString CommandName, FString Payload)
SetNewState(EAvatarCoreAIState::GettingInfo);
functionCallRunning = true;
if (FoundUnrealCommand)
if (FoundClass)
{
UWorld* World = nullptr;
if (WorldReferenceActor.IsValid()) {
if (WorldReferenceActor.IsValid())
World = WorldReferenceActor->GetWorld();
}
// Remove all previous bindings
FoundUnrealCommand->OnCommandDone.Clear();
FoundUnrealCommand->OnCommandFailed.Clear();
// Bind events to this instance
FoundUnrealCommand->OnCommandDone.AddDynamic(this, &UAIBaseManager::CommandFinished);
FoundUnrealCommand->OnCommandFailed.AddDynamic(this, &UAIBaseManager::CommandFailed);
// Execute the command
FoundUnrealCommand->Execute(World, Payload);
// Create a fresh instance for this invocation; ActiveCommands (UPROPERTY) keeps it alive
UMCPUnrealCommand* Cmd = NewObject<UMCPUnrealCommand>(this, FoundClass);
Cmd->Id = ToolCallId;
Cmd->SetWorldContext(WorldReferenceActor.Get());
ActiveCommands.Add(Cmd);
Cmd->OnCommandDone.AddDynamic(this, &UAIBaseManager::CommandFinished);
Cmd->OnCommandFailed.AddDynamic(this, &UAIBaseManager::CommandFailed);
Cmd->InitMCPCommand(World);
Cmd->Execute(World, Payload);
return;
}
if(MCPManager && MCPManager->HasCommand(CommandName))
if (MCPManager && MCPManager->HasCommand(CommandName))
{
if (!ToolCallId.IsEmpty())
MCPToolCallIds.Add(CommandName, ToolCallId);
MCPManager->ExecuteCommand(CommandName, Payload);
return;
}
}
void UAIBaseManager::ClearMCPCommand()
{
for (UMCPUnrealCommand* Command : UnrealCommands) {
Command->OnCommandDone.Clear();
Command->OnCommandFailed.Clear();
for (UMCPUnrealCommand* Command : ActiveCommands)
{
if (Command)
{
Command->OnCommandDone.Clear();
Command->OnCommandFailed.Clear();
}
}
ActiveCommands.Empty();
}
FString UAIBaseManager::GetRoleAsString(EAvatarCoreAIPromptRole Role)
@ -370,29 +363,76 @@ FString UAIBaseManager::GetRoleAsString(EAvatarCoreAIPromptRole Role)
}
}
void UAIBaseManager::CommandFinished(const FString& Command, const FString& Payload)
void UAIBaseManager::CommandFinished(const FAIMessage& Message)
{
ActiveCommands.RemoveAll([&Message](UMCPUnrealCommand* Cmd)
{
return Cmd && (Message.Id.IsEmpty() || Cmd->Id.Equals(Message.Id));
});
SetNewState(EAvatarCoreAIState::Ready);
functionCallRunning = false;
if (bDebugMode)
BroadcastAILog(FString::Printf(TEXT("Command ran successfully. Answer: %s"), *Message.Message), true);
else
BroadcastAILog(TEXT("Command ran successfully."), true);
SendResponse(Message, false);
}
void UAIBaseManager::CommandFailed(const FAIMessage& Message)
{
ActiveCommands.RemoveAll([&Message](UMCPUnrealCommand* Cmd)
{
return Cmd && (Message.Id.IsEmpty() || Cmd->Id.Equals(Message.Id));
});
functionCallRunning = false;
SetNewState(EAvatarCoreAIState::Ready);
BroadcastAILog(FString::Printf(TEXT("Command failed. Sending: %s"), *Message.Message), true);
SendResponse(Message, false);
}
void UAIBaseManager::MCPCommandFinished(const FString& Command, const FString& Payload)
{
FString FoundId;
FString* MCPId = MCPToolCallIds.Find(Command);
if (MCPId)
{
FoundId = *MCPId;
MCPToolCallIds.Remove(Command);
}
SetNewState(EAvatarCoreAIState::Ready);
functionCallRunning = false;
if (bDebugMode)
BroadcastAILog(FString::Printf(TEXT("Command '%s' ran successfully. Answer: %s"), *Command, *Payload), true);
BroadcastAILog(FString::Printf(TEXT("MCP Command '%s' ran successfully. Answer: %s"), *Command, *Payload), true);
else
BroadcastAILog(FString::Printf(TEXT("Command '%s' ran successfully."), *Command), true);
BroadcastAILog(FString::Printf(TEXT("MCP Command '%s' ran successfully."), *Command), true);
FAIMessage FinishedCommandMessage;
FinishedCommandMessage.Role = EAvatarCoreAIPromptRole::System;
FinishedCommandMessage.Message = Payload;
SendResponse(FinishedCommandMessage, false, true);
if (!FoundId.IsEmpty())
{
FinishedCommandMessage.Role = EAvatarCoreAIPromptRole::Tool;
FinishedCommandMessage.Id = FoundId;
}
else
{
FinishedCommandMessage.Role = EAvatarCoreAIPromptRole::System;
}
SendResponse(FinishedCommandMessage, false);
}
void UAIBaseManager::CommandFailed(const FString& Command, const FString& Payload)
void UAIBaseManager::MCPCommandFailed(const FString& Command, const FString& Payload)
{
functionCallRunning = false;
SetNewState(EAvatarCoreAIState::Ready);
BroadcastAILog(FString::Printf(TEXT("Command '%s' failed. Sending: %s"), *Command, *Payload), true);
BroadcastAILog(FString::Printf(TEXT("MCP Command '%s' failed. Sending: %s"), *Command, *Payload), true);
FAIMessage FailedCommandMessage;
FailedCommandMessage.Role = EAvatarCoreAIPromptRole::System;
FailedCommandMessage.Message = Payload;
SendResponse(FailedCommandMessage, false, true);
SendResponse(FailedCommandMessage, false);
}
void UAIBaseManager::AddMessageToArray(FAIMessage NewMessage)
@ -400,7 +440,7 @@ void UAIBaseManager::AddMessageToArray(FAIMessage NewMessage)
PreviousMessages.Add(NewMessage);
// Remove oldest if over limit
if (CurrentConfig->MaxMessages > -1 && PreviousMessages.Num() > CurrentConfig->MaxMessages)
if (CurrentConfig->GlobalAISettings.MaxMessages > -1 && PreviousMessages.Num() > CurrentConfig->GlobalAISettings.MaxMessages)
{
PreviousMessages.RemoveAt(0); // removes oldest (first element)
}
@ -414,13 +454,13 @@ TArray<FAIMessage> UAIBaseManager::GetAllPreviousMessage()
void UAIBaseManager::StartDelayedAnswerTimer()
{
UAIBaseManager::ClearDelayedAnswerTimer();
if(CurrentConfig->DelayAnswerSeconds > 0.0f)
if(CurrentConfig->GlobalAISettings.DelayAnswerSeconds > 0.0f)
{
GetWorld()->GetTimerManager().SetTimer(
DelayedAnswerTimer,
this,
&UAIBaseManager::OnDelayedAnswer,
CurrentConfig->DelayAnswerSeconds,
CurrentConfig->GlobalAISettings.DelayAnswerSeconds,
false
);
}
@ -473,64 +513,85 @@ void UAIBaseManager::OnAIResponse(const FString& Chunk, bool IsFinal)
{
FAIMessage tmpAIAnswer;
tmpAIAnswer.Role = EAvatarCoreAIPromptRole::Assistant;
tmpAIAnswer.Message = Chunk;
tmpAIAnswer.Message = UpdatedAnswer;
AddMessageToArray(tmpAIAnswer);
}
}
void UAIBaseManager::AddSystemInstructions(const TArray<FSystemInstruction> SystemInstructions, bool WipeCurrent = true)
void UAIBaseManager::AddSystemInstructions(const TArray<FSystemInstruction> SystemInstructions, bool AutoSyncWithAI = false)
{
if (WipeCurrent)
ClearAllSystemInstructios();
CurrentConfig->SystemPrompts.Append(SystemInstructions);
for (const FSystemInstruction& Item : SystemInstructions)
{
AddSystemInstruction(Item, false, false);
}
if (AutoSyncWithAI)
UpdateSession();
}
void UAIBaseManager::AddSystemInstruction(const FName Name, const FString NewSystemInstruction, bool AddAsFirst = false)
void UAIBaseManager::AddSystemInstruction(const FSystemInstruction SystemInstruction, bool AddAsFirst = false, bool AutoSyncWithAI = false)
{
UAIBaseManager::RemoveSystemInstruction(Name);
FSystemInstruction tmpEntry;
tmpEntry.Name = Name;
tmpEntry.Instruction = NewSystemInstruction;
UAIBaseManager::RemoveSystemInstruction(SystemInstruction.Name, false);
if(AddAsFirst)
{
TArray<FSystemInstruction> tmpSystemPrompts;
tmpSystemPrompts.Add(tmpEntry);
tmpSystemPrompts.Add(SystemInstruction);
tmpSystemPrompts.Append(CurrentConfig->SystemPrompts);
CurrentConfig->SystemPrompts = tmpSystemPrompts;
}
else
CurrentConfig->SystemPrompts.Add(tmpEntry);
CurrentConfig->SystemPrompts.Add(SystemInstruction);
BroadcastAILog(FString::Printf(TEXT("AI Manager added System Instruction %s"), *Name.ToString()));
BroadcastAILog(FString::Printf(TEXT("AI Manager added System Instruction %s"), *SystemInstruction.Name.ToString()));
if (AutoSyncWithAI)
UpdateSession();
}
void UAIBaseManager::ClearAllSystemInstructios()
void UAIBaseManager::ClearAllSystemInstructions(bool AutoSyncWithAI = false)
{
CurrentConfig->SystemPrompts.Empty();
BroadcastAILog(FString::Printf(TEXT("AI Manager wiped all System Instructions")));
UAIBaseManager::AddRepeatSystemInstruction();
if (AutoSyncWithAI)
UpdateSession();
}
void UAIBaseManager::AddRepeatSystemInstruction()
{
UAIBaseManager::AddSystemInstruction(TEXT("Repeat Text"), TEXT("If the text starts with [REPEAT], repeat the text exactly word for word."), true);
UAIBaseManager::AddSystemInstruction(TEXT("Rephrase Text"), TEXT("If the text starts with [REPHRASE], repeat the text in your own words without stating that you are rephrasing."), true);
FSystemInstruction repeatInstruction;
repeatInstruction.Name = "Repeat Text";
repeatInstruction.Instruction = "If the text starts with [REPEAT], repeat the text exactly word for word.";
FSystemInstruction rephraseInstruction;
rephraseInstruction.Name = "Rephrase Text";
rephraseInstruction.Instruction = "If the text starts with [REPHRASE], repeat the text in your own words without stating that you are rephrasing.";
UAIBaseManager::AddSystemInstruction(repeatInstruction);
UAIBaseManager::AddSystemInstruction(rephraseInstruction);
}
void UAIBaseManager::RemoveSystemInstruction(const FName Name)
void UAIBaseManager::RemoveSystemInstruction(FName SystemInstruction, bool AutoSyncWithAI = false)
{
// Iterate in reverse to safely remove while iterating
for (int32 i = CurrentConfig->SystemPrompts.Num() - 1; i >= 0; --i)
{
if (CurrentConfig->SystemPrompts[i].Name == Name)
if (CurrentConfig->SystemPrompts[i].Name == SystemInstruction)
{
CurrentConfig->SystemPrompts.RemoveAt(i);
BroadcastAILog(FString::Printf(TEXT("AI Manager removed System Instruction %s"), *Name.ToString()));
BroadcastAILog(FString::Printf(TEXT("AI Manager removed System Instruction %s"), *SystemInstruction.ToString()));
}
}
if (AutoSyncWithAI)
UpdateSession();
}
void UAIBaseManager::RemoveSystemInstructions(const TArray<FName> SystemInstructions, bool AutoSyncWithAI)
{
for (const FName& Item : SystemInstructions)
{
RemoveSystemInstruction(Item, false);
}
if (AutoSyncWithAI)
UpdateSession();
}
FString UAIBaseManager::GetSystemInstructionPromptString(bool AsJsonString = false)
@ -588,7 +649,7 @@ void UAIBaseManager::ResetRequestTimeout()
RequestTimeoutTimer, // handle to cancel timer at a later time
this, // the owning object
&UAIBaseManager::OnRequestTimeout, // function to call on elapsed
CurrentConfig->RequestTimeout, // float delay until elapsed slightly shorter than chunk length
CurrentConfig->GlobalAISettings.RequestTimeout, // float delay until elapsed slightly shorter than chunk length
false); // looping?
}

55
Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Private/AvatarCore_AI.cpp

@ -2,11 +2,19 @@
#include "AvatarCore_AI.h"
#if WITH_EDITOR
#include "Settings/ProjectPackagingSettings.h"
#endif
#define LOCTEXT_NAMESPACE "FAvatarCore_AIModule"
void FAvatarCore_AIModule::StartupModule()
{
// This code will execute after your module is loaded into memory; the exact timing is specified in the .uplugin file per-module
#if WITH_EDITOR
// Add the Schema directory to packaging settings immediately when module starts
FAvatarCore_AIModule::AddDBDirectoryToPackaging();
#endif
}
void FAvatarCore_AIModule::ShutdownModule()
@ -15,6 +23,53 @@ void FAvatarCore_AIModule::ShutdownModule()
// we call this function before unloading the module.
}
#if WITH_EDITOR
void FAvatarCore_AIModule::AddDBDirectoryToPackaging()
{
// Get the project packaging settings
UProjectPackagingSettings* PackagingSettings = GetMutableDefault<UProjectPackagingSettings>();
if (PackagingSettings)
{
// Define the DB directory path relative to Content
const FString DBDir = TEXT("DB");
// Check if the directory is already in the list
bool bAlreadyExists = false;
for (const FDirectoryPath& ExistingPath : PackagingSettings->DirectoriesToAlwaysStageAsNonUFS)
{
if (ExistingPath.Path == DBDir)
{
bAlreadyExists = true;
break;
}
}
// Add the directory if it doesn't exist
if (!bAlreadyExists)
{
FDirectoryPath NewPath;
NewPath.Path = DBDir;
PackagingSettings->DirectoriesToAlwaysStageAsNonUFS.Add(NewPath);
// Save the settings to disk
PackagingSettings->TryUpdateDefaultConfigFile();
UE_LOG(LogTemp, Log, TEXT("Database directory added to packaging settings"));
}
else
{
UE_LOG(LogTemp, Log, TEXT("Database directory already exists in packaging settings"));
}
}
else
{
UE_LOG(LogTemp, Warning, TEXT("Could not access Project Packaging Settings"));
}
}
#endif
#undef LOCTEXT_NAMESPACE
IMPLEMENT_MODULE(FAvatarCore_AIModule, AvatarCore_AI)

37
Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Private/MCP/MCPUnrealCommand.cpp

@ -52,24 +52,27 @@ void UMCPUnrealCommand::StartTimeout()
}
}
void UMCPUnrealCommand::FinishCommand(const FString& Payload)
void UMCPUnrealCommand::FinishCommand(const FString& Payload, bool bTriggerResponse)
{
OnCommandDone.Broadcast(GetCommandName(), Payload);
// Always clear timeout, even if already finished
FAIMessage Msg;
Msg.Role = EAvatarCoreAIPromptRole::Tool;
Msg.Message = Payload;
Msg.Id = Id;
Msg.bTriggerResponse = bTriggerResponse;
OnCommandDone.Broadcast(Msg);
if (GetWorld())
{
GetWorld()->GetTimerManager().ClearTimer(TimeoutHandle);
}
}
void UMCPUnrealCommand::FailCommand(const FString& Payload)
void UMCPUnrealCommand::FailCommand(const FString& Payload, bool bTriggerResponse)
{
OnCommandFailed.Broadcast(GetCommandName(), Payload);
// Always clear timeout, even if already finished
FAIMessage Msg;
Msg.Role = EAvatarCoreAIPromptRole::System;
Msg.Message = Payload;
Msg.bTriggerResponse = bTriggerResponse;
OnCommandFailed.Broadcast(Msg);
if (GetWorld())
{
GetWorld()->GetTimerManager().ClearTimer(TimeoutHandle);
}
}
void UMCPUnrealCommand::OnTimeout()
@ -89,11 +92,21 @@ AActor* UMCPUnrealCommand::GetActorOfClass(UWorld* World, TSubclassOf<AActor> Ac
return nullptr;
}
void UMCPUnrealCommand::SetWorldContext(UObject* NewWorldContext)
{
RequiredWorldContext = NewWorldContext;
}
UObject* UMCPUnrealCommand::GetWorldContextObject() const
{
return RequiredWorldContext;
}
UWorld* UMCPUnrealCommand::GetWorld() const
{
if (RequiredWorldContext)
return RequiredWorldContext->GetWorld();
if (const UObject* Outer = GetOuter())
{
return Outer->GetWorld();
}
return nullptr;
}

570
Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Private/OpenRouter/AvatarCoreAIOpenRouter.cpp

@ -0,0 +1,570 @@
// Fill out your copyright notice in the Description page of Project Settings.
#include "OpenRouter/AvatarCoreAIOpenRouter.h"
#include "HttpModule.h"
#include "Interfaces/IHttpRequest.h"
#include "Interfaces/IHttpResponse.h"
#include "Dom/JsonObject.h"
#include "Dom/JsonValue.h"
#include "Serialization/JsonReader.h"
#include "Serialization/JsonWriter.h"
#include "Serialization/JsonSerializer.h"
// Appends incoming HTTP response bytes directly to a shared TArray.
// Called from the HTTP module's background thread; the lock guards against concurrent
// reads in OnResponseProgress (game thread). Both the manager and the archive hold
// shared refs to the buffer and lock, so neither is destroyed under a live callback.
class FSSEReceiveArchive final : public FArchive
{
public:
TSharedPtr<TArray<uint8>> Buffer;
TSharedPtr<FCriticalSection> Lock;
FSSEReceiveArchive(TSharedPtr<TArray<uint8>> InBuffer, TSharedPtr<FCriticalSection> InLock)
: Buffer(MoveTemp(InBuffer)), Lock(MoveTemp(InLock))
{
SetIsSaving(true);
}
void Serialize(void* V, int64 Length) override
{
FScopeLock ScopeLock(Lock.Get());
Buffer->Append(static_cast<uint8*>(V), static_cast<int32>(Length));
}
FString GetArchiveName() const override { return TEXT("FSSEReceiveArchive"); }
};
// ---------------------------------------------------------------------------
// Lifecycle
// ---------------------------------------------------------------------------
void UAvatarCoreAIOpenRouter::InitAIManagerChild(UAIBaseConfig* AIConfig, AActor* InWorldReferenceActor)
{
OpenRouterConfig = Cast<UOpenRouterConfig>(AIConfig);
if (!OpenRouterConfig)
{
BroadcastAIError(TEXT("Cannot cast config to UOpenRouterConfig"), EAvatarCoreAIError::InvalidConfig);
return;
}
ActivateAI();
}
void UAvatarCoreAIOpenRouter::ActivateAI()
{
SetNewState(EAvatarCoreAIState::Ready);
OnAIReady();
}
void UAvatarCoreAIOpenRouter::DeactivateAI()
{
CancelActiveRequest();
UAIBaseManager::DeactivateAI();
SetNewState(EAvatarCoreAIState::Disconnected);
}
void UAvatarCoreAIOpenRouter::UpdateSession()
{
if (!OpenRouterConfig)
{
BroadcastAIError(TEXT("OpenRouterConfig is null in UpdateSession"), EAvatarCoreAIError::InvalidConfig);
return;
}
OnAIReady();
}
void UAvatarCoreAIOpenRouter::ClearAI()
{
CancelActiveRequest();
ResetSSEState();
UAIBaseManager::ClearAI();
}
void UAvatarCoreAIOpenRouter::CancelActiveRequest()
{
if (ActiveRequest.IsValid())
{
BroadcastAILog(TEXT("OpenRouter: cancelling active HTTP request."));
ActiveRequest->CancelRequest();
ActiveRequest.Reset();
}
}
// ---------------------------------------------------------------------------
// Send
// ---------------------------------------------------------------------------
void UAvatarCoreAIOpenRouter::SendResponseChild(FAIMessage Message, bool NotifyDelay)
{
if (!Message.bTriggerResponse)
return;
UAvatarCoreAIOpenRouter::SetNewState(EAvatarCoreAIState::Processing);
// Cancel any stale request (safety net; queue should normally prevent overlaps)
CancelActiveRequest();
ResetSSEState();
SendChatCompletionRequest(Message);
}
void UAvatarCoreAIOpenRouter::ResetSSEState()
{
SSEStreamBufferPtr.Reset();
SSEBufferLock.Reset();
SSERawBuffer.Empty();
SSEByteOffset = 0;
bResponseComplete = false;
ToolCallNameMap.Empty();
ToolCallArgsMap.Empty();
ToolCallIdMap.Empty();
}
void UAvatarCoreAIOpenRouter::SendChatCompletionRequest(FAIMessage CurrentMessage)
{
if (!OpenRouterConfig)
{
BroadcastAIError(TEXT("OpenRouterConfig is null"), EAvatarCoreAIError::InvalidConfig);
return;
}
// Build request body
TSharedPtr<FJsonObject> Body = MakeShareable(new FJsonObject);
Body->SetStringField(TEXT("model"), OpenRouterConfig->OpenRouterSettings.BaseAISettings.ModelID);
Body->SetBoolField(TEXT("stream"), true);
Body->SetNumberField(TEXT("max_tokens"), OpenRouterConfig->GlobalAISettings.MaxTokens);
Body->SetNumberField(TEXT("temperature"), OpenRouterConfig->GlobalAISettings.Temperature);
Body->SetArrayField(TEXT("messages"), BuildMessagesArray(CurrentMessage));
TArray<TSharedPtr<FJsonValue>> Tools;
if (OpenRouterConfig->OpenRouterSettings.bSendTools)
Tools = BuildToolsArray();
if (Tools.Num() > 0)
{
Body->SetArrayField(TEXT("tools"), Tools);
Body->SetStringField(TEXT("tool_choice"), TEXT("auto"));
}
FString BodyString;
TSharedRef<TJsonWriter<>> Writer = TJsonWriterFactory<>::Create(&BodyString);
FJsonSerializer::Serialize(Body.ToSharedRef(), Writer);
BroadcastAILog(FString::Printf(TEXT("OpenRouter request to %s/%s (tools: %d)"),
*OpenRouterConfig->OpenRouterSettings.BaseURL, *OpenRouterConfig->OpenRouterSettings.BaseAISettings.ModelID, Tools.Num()));
BroadcastAILog(BodyString, false, true);
// Create HTTP request
ActiveRequest = FHttpModule::Get().CreateRequest();
ActiveRequest->SetURL(OpenRouterConfig->OpenRouterSettings.BaseURL + TEXT("/chat/completions"));
ActiveRequest->SetVerb(TEXT("POST"));
ActiveRequest->SetHeader(TEXT("Content-Type"), TEXT("application/json"));
ActiveRequest->SetHeader(TEXT("Authorization"), TEXT("Bearer ") + OpenRouterConfig->OpenRouterSettings.BaseAISettings.APIKey);
if (!OpenRouterConfig->OpenRouterSettings.SiteURL.IsEmpty())
ActiveRequest->SetHeader(TEXT("HTTP-Referer"), OpenRouterConfig->OpenRouterSettings.SiteURL);
if (!OpenRouterConfig->OpenRouterSettings.AppName.IsEmpty())
ActiveRequest->SetHeader(TEXT("X-Title"), OpenRouterConfig->OpenRouterSettings.AppName);
ActiveRequest->SetContentAsString(BodyString);
// Attach a receive-stream archive so the HTTP module writes bytes directly into our buffer.
// This avoids calling GetContent() on an in-progress request, which logs "Payload is incomplete".
// The lock guards against the HTTP thread (Serialize) racing the game thread (OnResponseProgress).
SSEStreamBufferPtr = MakeShared<TArray<uint8>>();
SSEBufferLock = MakeShared<FCriticalSection>();
ActiveRequest->SetResponseBodyReceiveStream(MakeShareable(new FSSEReceiveArchive(SSEStreamBufferPtr, SSEBufferLock)));
ActiveRequest->OnRequestProgress64().BindUObject(this, &UAvatarCoreAIOpenRouter::OnResponseProgress);
ActiveRequest->OnProcessRequestComplete().BindUObject(this, &UAvatarCoreAIOpenRouter::OnRequestComplete);
ResetRequestTimeout();
SetNewState(EAvatarCoreAIState::Processing);
ActiveRequest->ProcessRequest();
}
// ---------------------------------------------------------------------------
// Message / Tool array builders
// ---------------------------------------------------------------------------
TArray<TSharedPtr<FJsonValue>> UAvatarCoreAIOpenRouter::BuildMessagesArray(FAIMessage CurrentMessage)
{
TArray<TSharedPtr<FJsonValue>> Messages;
// System instructions — always the most recent, sent as first message
FString SystemPrompt = GetSystemInstructionPromptString(false);
if (!SystemPrompt.IsEmpty())
{
TSharedPtr<FJsonObject> SysMsg = MakeShareable(new FJsonObject);
SysMsg->SetStringField(TEXT("role"), TEXT("system"));
SysMsg->SetStringField(TEXT("content"), SystemPrompt);
Messages.Add(MakeShareable(new FJsonValueObject(SysMsg)));
}
// Validate history: remove any assistant tool_calls entry that has no matching tool result,
// and any tool result that has no preceding assistant tool_calls. Either orphan causes a 400.
TArray<FAIMessage> History = GetAllPreviousMessage();
for (int32 i = History.Num() - 1; i >= 0; --i)
{
const FAIMessage& Msg = History[i];
if (Msg.Role == EAvatarCoreAIPromptRole::Assistant && !Msg.Id.IsEmpty())
{
bool bHasResult = false;
for (int32 j = i + 1; j < History.Num(); ++j)
if (History[j].Role == EAvatarCoreAIPromptRole::Tool && History[j].Id == Msg.Id)
{ bHasResult = true; break; }
// CurrentMessage may be the tool result that hasn't been added to PreviousMessages yet
if (!bHasResult && CurrentMessage.Role == EAvatarCoreAIPromptRole::Tool && CurrentMessage.Id == Msg.Id)
bHasResult = true;
if (!bHasResult)
{
BroadcastAILog(FString::Printf(TEXT("OpenRouter: dropping orphaned tool_calls entry (id=%s) from history"), *Msg.Id), true);
History.RemoveAt(i);
}
}
else if (Msg.Role == EAvatarCoreAIPromptRole::Tool)
{
bool bHasCall = false;
for (int32 j = 0; j < i; ++j)
if (History[j].Role == EAvatarCoreAIPromptRole::Assistant && History[j].Id == Msg.Id)
{ bHasCall = true; break; }
if (!bHasCall)
{
BroadcastAILog(FString::Printf(TEXT("OpenRouter: dropping orphaned tool result (id=%s) from history"), *Msg.Id), true);
History.RemoveAt(i);
}
}
}
// Conversation history (does not include CurrentMessage — it was not yet added to PreviousMessages)
auto AppendMessage = [&](const FAIMessage& Msg)
{
TSharedPtr<FJsonObject> MsgObj = MakeShareable(new FJsonObject);
if (Msg.Role == EAvatarCoreAIPromptRole::Tool)
{
MsgObj->SetStringField(TEXT("role"), TEXT("tool"));
MsgObj->SetStringField(TEXT("tool_call_id"), Msg.Id);
MsgObj->SetStringField(TEXT("content"), Msg.Message);
}
else if (Msg.Role == EAvatarCoreAIPromptRole::Assistant && !Msg.Id.IsEmpty())
{
// Assistant message that triggered a tool call; Message contains the tool_calls JSON array.
// content must be null (not "") when tool_calls is present — OpenAI spec requirement.
MsgObj->SetStringField(TEXT("role"), TEXT("assistant"));
MsgObj->SetField(TEXT("content"), MakeShareable(new FJsonValueNull()));
TArray<TSharedPtr<FJsonValue>> ToolCallsArray;
TSharedRef<TJsonReader<>> TCReader = TJsonReaderFactory<>::Create(Msg.Message);
if (FJsonSerializer::Deserialize(TCReader, ToolCallsArray))
{
MsgObj->SetArrayField(TEXT("tool_calls"), ToolCallsArray);
}
else
{
BroadcastAILog(FString::Printf(TEXT("OpenRouter: failed to parse stored tool_calls JSON: %s"), *Msg.Message));
return;
}
}
else
{
FString RoleStr;
switch (Msg.Role)
{
case EAvatarCoreAIPromptRole::User: RoleStr = TEXT("user"); break;
case EAvatarCoreAIPromptRole::Assistant: RoleStr = TEXT("assistant"); break;
case EAvatarCoreAIPromptRole::System: RoleStr = TEXT("system"); break;
default: RoleStr = TEXT("user"); break;
}
MsgObj->SetStringField(TEXT("role"), RoleStr);
MsgObj->SetStringField(TEXT("content"), Msg.Message);
}
Messages.Add(MakeShareable(new FJsonValueObject(MsgObj)));
};
for (const FAIMessage& Msg : History)
AppendMessage(Msg);
// Current message appended last — ensures it is always the newest entry
AppendMessage(CurrentMessage);
return Messages;
}
TArray<TSharedPtr<FJsonValue>> UAvatarCoreAIOpenRouter::BuildToolsArray()
{
TArray<TSharedPtr<FJsonValue>> ToolsArray;
for (const FMCPToolInfo& Command : GetAvailableCommands())
{
if (Command.Name.IsEmpty())
continue;
TSharedPtr<FJsonObject> FunctionObj = MakeShareable(new FJsonObject);
FunctionObj->SetStringField(TEXT("name"), Command.Name.Left(64));
if (!Command.Description.IsEmpty())
FunctionObj->SetStringField(TEXT("description"), Command.Description);
if (!Command.InputScheme.IsEmpty())
{
TSharedPtr<FJsonObject> ParamsObj;
TSharedRef<TJsonReader<>> Reader = TJsonReaderFactory<>::Create(Command.InputScheme);
if (FJsonSerializer::Deserialize(Reader, ParamsObj) && ParamsObj.IsValid())
FunctionObj->SetObjectField(TEXT("parameters"), ParamsObj);
else
BroadcastAIError(FString::Printf(TEXT("InputScheme of '%s' is not valid JSON"), *Command.Name), EAvatarCoreAIError::MCPError);
}
TSharedPtr<FJsonObject> ToolObj = MakeShareable(new FJsonObject);
ToolObj->SetStringField(TEXT("type"), TEXT("function"));
ToolObj->SetObjectField(TEXT("function"), FunctionObj);
ToolsArray.Add(MakeShareable(new FJsonValueObject(ToolObj)));
}
return ToolsArray;
}
// ---------------------------------------------------------------------------
// SSE streaming
// ---------------------------------------------------------------------------
void UAvatarCoreAIOpenRouter::OnResponseProgress(FHttpRequestPtr Request, uint64 BytesSent, uint64 BytesReceived)
{
// Take local shared-ptr copies first. If ResetSSEState fires inside ParseSSELine
// (same call stack, game thread) it will null the members, but our locals keep the
// objects alive and the lock valid for the duration of this callback.
TSharedPtr<TArray<uint8>> Buffer = SSEStreamBufferPtr;
TSharedPtr<FCriticalSection> Lock = SSEBufferLock;
if (!Buffer.IsValid() || !Lock.IsValid())
return;
// Lock only for the brief copy of newly-arrived bytes.
// Holding the lock any longer would block the HTTP thread unnecessarily.
TArray<uint8> NewBytes;
{
FScopeLock ScopeLock(Lock.Get());
const int32 Total = Buffer->Num();
const int32 ToRead = Total - SSEByteOffset;
if (ToRead <= 0)
return;
NewBytes.Append(Buffer->GetData() + SSEByteOffset, ToRead);
SSEByteOffset = Total;
}
// Append only the newly arrived bytes as raw bytes — never convert to ANSI here,
// because UTF-8 multibyte sequences may straddle callback boundaries.
SSERawBuffer.Append(NewBytes);
// Scan for complete lines. '\n' (0x0A) is never a continuation byte in UTF-8,
// so this scan is safe on raw UTF-8 data.
int32 StartPos = 0;
const int32 BufferSize = SSERawBuffer.Num();
for (int32 i = 0; i < BufferSize; ++i)
{
if (SSERawBuffer[i] == '\n')
{
int32 End = i;
if (End > StartPos && SSERawBuffer[End - 1] == '\r')
--End; // strip \r
if (End > StartPos)
{
// Null-terminate the slice and convert UTF-8 → FString
TArray<uint8> Slice(SSERawBuffer.GetData() + StartPos, End - StartPos);
Slice.Add(0);
ParseSSELine(UTF8_TO_TCHAR((ANSICHAR*)Slice.GetData()));
}
StartPos = i + 1;
// ParseSSELine may have triggered ResetSSEState (e.g. via OnAIReady → SendResponseChild).
// If the buffer was cleared, stop processing stale data.
if (SSERawBuffer.Num() == 0)
return;
}
}
// Discard all processed bytes; keep the partial trailing line in the buffer.
// Guard against the buffer being reset by a ParseSSELine side-effect.
if (StartPos > 0 && StartPos <= SSERawBuffer.Num())
SSERawBuffer.RemoveAt(0, StartPos);
}
void UAvatarCoreAIOpenRouter::ParseSSELine(const FString& Line)
{
// SSE lines look like: "data: {...}" or "data: [DONE]"
if (!Line.StartsWith(TEXT("data: ")))
return;
FString Data = Line.Mid(6); // strip "data: "
if (Data == TEXT("[DONE]"))
return;
TSharedPtr<FJsonObject> JsonObj;
TSharedRef<TJsonReader<>> Reader = TJsonReaderFactory<>::Create(Data);
if (!FJsonSerializer::Deserialize(Reader, JsonObj) || !JsonObj.IsValid())
{
BroadcastAILog(FString::Printf(TEXT("OpenRouter: failed to parse SSE chunk: %s"), *Data), false, true);
return;
}
const TArray<TSharedPtr<FJsonValue>>* Choices;
if (!JsonObj->TryGetArrayField(TEXT("choices"), Choices) || Choices->Num() == 0)
return;
TSharedPtr<FJsonObject> Choice = (*Choices)[0]->AsObject();
if (!Choice.IsValid())
return;
TSharedPtr<FJsonObject> Delta = Choice->GetObjectField(TEXT("delta"));
// Text content chunk
FString ContentChunk;
if (Delta.IsValid() && Delta->TryGetStringField(TEXT("content"), ContentChunk) && !ContentChunk.IsEmpty())
{
ClearRequestTimeout();
OnAIResponse(ContentChunk, false);
}
// Tool call argument accumulation
const TArray<TSharedPtr<FJsonValue>>* ToolCallsArr;
if (Delta.IsValid() && Delta->TryGetArrayField(TEXT("tool_calls"), ToolCallsArr))
{
for (const TSharedPtr<FJsonValue>& TCValue : *ToolCallsArr)
{
TSharedPtr<FJsonObject> TC = TCValue->AsObject();
if (!TC.IsValid()) continue;
int32 Idx = 0;
TC->TryGetNumberField(TEXT("index"), Idx);
// Name and id only arrive on the first delta for this index
FString TCName, TCId;
TSharedPtr<FJsonObject> FuncObj = TC->GetObjectField(TEXT("function"));
if (FuncObj.IsValid())
{
FuncObj->TryGetStringField(TEXT("name"), TCName);
if (!TCName.IsEmpty())
ToolCallNameMap.FindOrAdd(Idx) += TCName;
FString ArgsDelta;
if (FuncObj->TryGetStringField(TEXT("arguments"), ArgsDelta))
ToolCallArgsMap.FindOrAdd(Idx) += ArgsDelta;
}
if (TC->TryGetStringField(TEXT("id"), TCId) && !TCId.IsEmpty())
ToolCallIdMap.FindOrAdd(Idx) = TCId;
}
}
// Finish reason
FString FinishReason;
Choice->TryGetStringField(TEXT("finish_reason"), FinishReason);
if (FinishReason == TEXT("stop") && !bResponseComplete)
{
bResponseComplete = true;
ClearRequestTimeout();
ActiveRequest.Reset();
OnAIResponse(TEXT(""), true); // IsFinal — base adds assistant message to history
SetNewState(EAvatarCoreAIState::Ready);
OnAIReady(); // drains ResponseQueue
}
else if (FinishReason == TEXT("tool_calls") && !bResponseComplete)
{
bResponseComplete = true;
ClearRequestTimeout();
ActiveRequest.Reset();
HandleToolCallsDone();
}
}
void UAvatarCoreAIOpenRouter::HandleToolCallsDone()
{
// First version: handle the tool call at index 0 (same single-call behaviour as Realtime)
if (!ToolCallIdMap.Contains(0) || !ToolCallNameMap.Contains(0))
{
BroadcastAIError(TEXT("OpenRouter: tool_calls done but no accumulated call data at index 0"),
EAvatarCoreAIError::FunctionCallFailed);
SetNewState(EAvatarCoreAIState::Error);
return;
}
FString CallId = ToolCallIdMap[0];
FString CallName = ToolCallNameMap[0];
FString CallArgs = ToolCallArgsMap.FindRef(0);
BroadcastAILog(FString::Printf(TEXT("OpenRouter: tool call '%s' (id=%s) args=%s"), *CallName, *CallId, *CallArgs), true);
// Build the tool_calls JSON array to persist in message history so the next
// request has the correct assistant→tool context.
TSharedPtr<FJsonObject> FuncObj = MakeShareable(new FJsonObject);
FuncObj->SetStringField(TEXT("name"), CallName);
FuncObj->SetStringField(TEXT("arguments"), CallArgs);
TSharedPtr<FJsonObject> ToolCallObj = MakeShareable(new FJsonObject);
ToolCallObj->SetStringField(TEXT("id"), CallId);
ToolCallObj->SetStringField(TEXT("type"), TEXT("function"));
ToolCallObj->SetObjectField(TEXT("function"), FuncObj);
TArray<TSharedPtr<FJsonValue>> ToolCallsArray;
ToolCallsArray.Add(MakeShareable(new FJsonValueObject(ToolCallObj)));
FString ToolCallsJson;
TSharedRef<TJsonWriter<>> TCWriter = TJsonWriterFactory<>::Create(&ToolCallsJson);
FJsonSerializer::Serialize(ToolCallsArray, TCWriter);
// Add assistant message with tool_calls to history
FAIMessage AssistantToolCall;
AssistantToolCall.Role = EAvatarCoreAIPromptRole::Assistant;
AssistantToolCall.Id = CallId;
AssistantToolCall.Message = ToolCallsJson; // BuildMessagesArray reads this back
AddMessageToArray(AssistantToolCall);
// RunMCPCommand sets GettingInfo state; base propagates CallId → CommandFinished → Tool role result
RunMCPCommand(CallName, CallArgs, CallId);
}
// ---------------------------------------------------------------------------
// HTTP completion handler
// ---------------------------------------------------------------------------
void UAvatarCoreAIOpenRouter::OnRequestComplete(FHttpRequestPtr Request, FHttpResponsePtr Response, bool bSuccess)
{
ActiveRequest.Reset();
// Successful streaming already handled by ParseSSELine.
// This callback is for error cases only.
if (bSuccess && Response.IsValid() && Response->GetResponseCode() < 300)
return;
ClearRequestTimeout();
int32 Code = Response.IsValid() ? Response->GetResponseCode() : 0;
// GetContentAsString() is empty when SetResponseBodyReceiveStream is active —
// all bytes (including error bodies) were written to SSEStreamBufferPtr.
FString Body;
if (Response.IsValid())
{
Body = Response->GetContentAsString();
if (Body.IsEmpty() && SSEStreamBufferPtr.IsValid() && SSEStreamBufferPtr->Num() > 0)
{
TArray<uint8> BodyBytes = *SSEStreamBufferPtr;
BodyBytes.Add(0);
Body = UTF8_TO_TCHAR((ANSICHAR*)BodyBytes.GetData());
}
}
// 422 with a tools-related body means this model doesn't support OpenAI-compatible
// function calling. Disable bSendTools in the config for this model.
if (Code == 422 && Body.Contains(TEXT("tools")))
{
BroadcastAIError(
FString::Printf(TEXT("OpenRouter 422: model '%s' rejected the tools format. "
"Disable bSendTools in OpenRouterConfig for models that don't support "
"OpenAI-compatible function calling. Raw: %s"), *OpenRouterConfig->OpenRouterSettings.BaseAISettings.ModelID, *Body),
EAvatarCoreAIError::NetworkError);
}
else
{
BroadcastAIError(
FString::Printf(TEXT("OpenRouter HTTP error %d: %s"), Code, *Body),
EAvatarCoreAIError::NetworkError);
}
SetNewState(EAvatarCoreAIState::Error);
}

96
Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Private/RealtimeAPI/AvatarCoreAIRealtime.cpp

@ -37,9 +37,9 @@ void UAvatarCoreAIRealtime::DeactivateAI()
UAvatarCoreAIRealtime::DisconnectFromWebSocket();
}
void UAvatarCoreAIRealtime::SendResponseChild(FAIMessage Message, bool NotifyDelay, bool TriggerResponse)
void UAvatarCoreAIRealtime::SendResponseChild(FAIMessage Message, bool NotifyDelay)
{
UAvatarCoreAIRealtime::CreateConversationItem(Message, TriggerResponse);
UAvatarCoreAIRealtime::CreateConversationItem(Message);
}
void UAvatarCoreAIRealtime::ClearAI()
@ -51,7 +51,9 @@ void UAvatarCoreAIRealtime::ClearAI()
}
void UAvatarCoreAIRealtime::UpdateSession()
{
{
UAvatarCoreAIRealtime::SetNewState(EAvatarCoreAIState::Initializing);
if (!RealtimeConfig) {
BroadcastAIError("RealtimeConfig is null in UpdateSession", EAvatarCoreAIError::InvalidConfig);
return;
@ -61,7 +63,7 @@ void UAvatarCoreAIRealtime::UpdateSession()
TArray<FString> Modalities;
// Voice as string
FString VoiceStr = StaticEnum<EOpenAIRealtimeVoice>()->GetNameStringByValue((int64)RealtimeConfig->Voice).ToLower();
FString VoiceStr = StaticEnum<EOpenAIRealtimeVoice>()->GetNameStringByValue((int64)RealtimeConfig->RealtimeSettings.Voice).ToLower();
if (VoiceStr == TEXT("undefined")) VoiceStr = TEXT("");
// Build session object
@ -69,7 +71,7 @@ void UAvatarCoreAIRealtime::UpdateSession()
FString InstructionsString = UAIBaseManager::GetSystemInstructionPromptString(false);
if (RealtimeConfig->AIModelAudioOutput)
if (RealtimeConfig->GlobalAISettings.AIModelAudioOutput)
Modalities.Add(TEXT("audio"));
else
Modalities.Add(TEXT("text"));
@ -84,7 +86,7 @@ void UAvatarCoreAIRealtime::UpdateSession()
SessionObj->SetStringField(TEXT("type"), TEXT("realtime"));
SessionObj->SetNumberField(TEXT("max_output_tokens"), RealtimeConfig->MaxTokens);
SessionObj->SetNumberField(TEXT("max_output_tokens"), RealtimeConfig->GlobalAISettings.MaxTokens);
// Add available tools as functions to the session JSON
TArray<FMCPToolInfo> Commands = GetAvailableCommands();
@ -130,7 +132,7 @@ void UAvatarCoreAIRealtime::UpdateSession()
TSharedPtr<FJsonObject> AudioObj = MakeShareable(new FJsonObject);
SessionObj->SetObjectField(TEXT("audio"), AudioObj);
if (RealtimeConfig->InputAudioStreaming) {
if (RealtimeConfig->RealtimeSettings.InputAudioStreaming) {
TSharedPtr<FJsonObject> AudioInputObj = MakeShareable(new FJsonObject);
AudioObj->SetObjectField(TEXT("input"), AudioInputObj);
@ -158,7 +160,7 @@ void UAvatarCoreAIRealtime::UpdateSession()
}
if (RealtimeConfig->AIModelAudioOutput) {
if (RealtimeConfig->GlobalAISettings.AIModelAudioOutput) {
if (!VoiceStr.IsEmpty()) {
TSharedPtr<FJsonObject> AudioOutputObj = MakeShareable(new FJsonObject);
AudioObj->SetObjectField(TEXT("output"), AudioOutputObj);
@ -186,18 +188,18 @@ void UAvatarCoreAIRealtime::UpdateSession()
void UAvatarCoreAIRealtime::ConnectToWebSocket()
{
FString ServerURL = TEXT("wss://" + RealtimeConfig->BaseURL + "/v1/realtime?model=" + RealtimeConfig->Model);
FString ServerURL = TEXT("wss://" + RealtimeConfig->RealtimeSettings.BaseURL + "/v1/realtime?model=" + RealtimeConfig->RealtimeSettings.BaseAISettings.ModelID);
BroadcastAILog(FString::Printf(TEXT("OpenAI ServerURL: %s"), *ServerURL));
FString ServerProtocol = TEXT("");
// Set up headers for authentication
TMap<FString, FString> Headers;
if(RealtimeConfig->IsAzureOpenAI)
Headers.Add(TEXT("api-key"), *RealtimeConfig->APIKey);
if(RealtimeConfig->RealtimeSettings.IsAzureOpenAI)
Headers.Add(TEXT("api-key"), *RealtimeConfig->RealtimeSettings.BaseAISettings.APIKey);
else
{
Headers.Add(TEXT("Authorization"), TEXT("Bearer " + RealtimeConfig->APIKey));
Headers.Add(TEXT("Authorization"), TEXT("Bearer " + RealtimeConfig->RealtimeSettings.BaseAISettings.APIKey));
}
@ -254,7 +256,7 @@ void UAvatarCoreAIRealtime::WebSocketSendType(const FString& type)
// Serialize the JSON object
if (FJsonSerializer::Serialize(JsonObject.ToSharedRef(), Writer))
{
BroadcastAILog(FString::Printf(TEXT("Sending Type: %s"), *OutputString), false);
BroadcastAILog(FString::Printf(TEXT("Sending Type: %s"), *OutputString), false, true);
UAvatarCoreAIRealtime::WebSocketSend(OutputString);
}
else
@ -264,40 +266,62 @@ void UAvatarCoreAIRealtime::WebSocketSendType(const FString& type)
}
}
void UAvatarCoreAIRealtime::CreateConversationItem(FAIMessage Message, bool triggerResponse)
void UAvatarCoreAIRealtime::CreateConversationItem(FAIMessage Message)
{
TSharedPtr<FJsonObject> RootObject = MakeShareable(new FJsonObject);
RootObject->SetStringField("type", "conversation.item.create");
// Create the item object
TSharedPtr<FJsonObject> ItemObject = MakeShareable(new FJsonObject);
// Tool result — map to Realtime API's function_call_output item type
if (Message.Role == EAvatarCoreAIPromptRole::Tool)
{
ItemObject->SetStringField("type", "function_call_output");
ItemObject->SetStringField("call_id", Message.Id);
ItemObject->SetStringField("output", Message.Message);
RootObject->SetObjectField("item", ItemObject);
FString OutputString;
TSharedRef<TJsonWriter<TCHAR, TCondensedJsonPrintPolicy<TCHAR>>> Writer =
TJsonWriterFactory<TCHAR, TCondensedJsonPrintPolicy<TCHAR>>::Create(&OutputString);
FJsonSerializer::Serialize(RootObject.ToSharedRef(), Writer);
UAvatarCoreAIRealtime::WebSocketSend(OutputString);
if (Message.bTriggerResponse)
UAvatarCoreAIRealtime::CreateReseponse();
return;
}
ItemObject->SetStringField("type", "message");
ItemObject->SetStringField("role", UAvatarCoreAIRealtime::GetRoleAsString(Message.Role));
// Create the content array with an audio object inside
FString ContentType = "input_text";
if (Message.Role == EAvatarCoreAIPromptRole::Assistant)
{
ContentType = "output_text";
}
TArray<TSharedPtr<FJsonValue>> ContentArray;
TSharedPtr<FJsonObject> TextObject = MakeShareable(new FJsonObject);
TextObject->SetStringField("type", ContentType);
TextObject->SetStringField("text", Message.Message);
ContentArray.Add(MakeShareable(new FJsonValueObject(TextObject)));
TSharedPtr<FJsonObject> AudioObject = MakeShareable(new FJsonObject);
if(Message.Role == EAvatarCoreAIPromptRole::User || triggerResponse)
AudioObject->SetStringField("type", "input_text");
else
AudioObject->SetStringField("type", "output_text");
AudioObject->SetStringField("text", Message.Message);
ContentArray.Add(MakeShareable(new FJsonValueObject(AudioObject)));
ItemObject->SetArrayField("content", ContentArray);
// Add item to the root object
RootObject->SetObjectField("item", ItemObject);
// Convert the root JSON object to a string
// Optional but recommended when replaying history:
// RootObject->SetStringField("previous_item_id", LastKnownItemId);
FString OutputString;
TSharedRef<TJsonWriter<TCHAR, TCondensedJsonPrintPolicy<TCHAR>>> Writer =
TJsonWriterFactory<TCHAR, TCondensedJsonPrintPolicy<TCHAR>>::Create(&OutputString);
FJsonSerializer::Serialize(RootObject.ToSharedRef(), Writer);
UAvatarCoreAIRealtime::WebSocketSend(OutputString); //Send the Message
UAvatarCoreAIRealtime::WebSocketSend(OutputString);
if (triggerResponse) {
if (Message.bTriggerResponse)
{
UAvatarCoreAIRealtime::CreateReseponse();
}
}
@ -316,7 +340,7 @@ void UAvatarCoreAIRealtime::CreateReseponse()
// Create the content array with an audio object inside
TArray<TSharedPtr<FJsonValue>> ModalitiesArray; //There is an error if we submit both text and audio; nevertheless text is always included in audio mode
if(RealtimeConfig->AIModelAudioOutput)
if(RealtimeConfig->GlobalAISettings.AIModelAudioOutput)
ModalitiesArray.Add(MakeShareable(new FJsonValueString("audio")));
else
ModalitiesArray.Add(MakeShareable(new FJsonValueString("text")));
@ -504,7 +528,7 @@ void UAvatarCoreAIRealtime::OnWebSocketConnectionStringReceived(const FString& M
JsonObject->TryGetStringField(TEXT("delta"), DeltaResponse.delta);
float CurrentRequestDuration = (FDateTime::Now() - CurrentRequestStartTime).GetTotalSeconds();
BroadcastAILog(FString::Printf(TEXT("Response of type %s after %f seconds"), *TypeString, CurrentRequestDuration));
BroadcastAILog(FString::Printf(TEXT("Response of type %s after %f seconds"), *TypeString, CurrentRequestDuration), false, true);
if (DeltaResponse.response_id != CurrentRequestID) //Has been cancelled and cleared
{
@ -526,13 +550,13 @@ void UAvatarCoreAIRealtime::OnWebSocketConnectionStringReceived(const FString& M
}
//float CurrentRequestDuration = (FDateTime::Now() - CurrentRequestStartTime).GetTotalSeconds();
if (CurrentRequestDuration < 0.5f && !functionCallRunning)
if (CurrentRequestDuration < 0.25f && !functionCallRunning && !CurrentRequestID.IsEmpty())
{
CurrentRequestID.Empty();
if (CurrentRetries < MaxRetries)
{
CurrentRetries++;
BroadcastAILog(FString::Printf(TEXT("Response.done is way to fast. Something is fishy. Let's try again OpenAI! %s"), *TypeString), true);
BroadcastAILog(FString::Printf(TEXT("Response.done is way to fast. Something is fishy %s"), *TypeString), true);
CreateReseponse();
}
else {
@ -543,7 +567,7 @@ void UAvatarCoreAIRealtime::OnWebSocketConnectionStringReceived(const FString& M
}
}
if (RealtimeConfig->AIModelAudioOutput && ResponseAudioDone && ResponseTextDone || !RealtimeConfig->AIModelAudioOutput && ResponseTextDone)
if (RealtimeConfig->GlobalAISettings.AIModelAudioOutput && ResponseAudioDone && ResponseTextDone || !RealtimeConfig->GlobalAISettings.AIModelAudioOutput && ResponseTextDone)
{
ClearRequestTimeout();
RequestState = EOpenAIRequestState::done;
@ -592,7 +616,7 @@ void UAvatarCoreAIRealtime::OnWebSocketConnectionStringReceived(const FString& M
// Resize and directly copy raw PCM data into PCMData
PCMData.AddUninitialized(Size);
FBase64::Decode(DeltaResponse.delta, PCMData);
BroadcastAILog(FString::Printf(TEXT("Adding: %s"), *TypeString));
BroadcastAILog(FString::Printf(TEXT("Adding: %s"), *TypeString), false, true);
OnAudioChunk.Broadcast(PCMData, false);
}
}
@ -651,7 +675,7 @@ void UAvatarCoreAIRealtime::OnWebSocketConnectionDropped(int32 StatusCode, const
void UAvatarCoreAIRealtime::SetOpenAIAudioOutput(bool InAudioOutput)
{
RealtimeConfig->AIModelAudioOutput = InAudioOutput;
RealtimeConfig->GlobalAISettings.AIModelAudioOutput = InAudioOutput;
}
void UAvatarCoreAIRealtime::OnSTTAudioChunk(TArray<int16> AudioChunks)

77
Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Public/AIBaseConfig.h

@ -6,27 +6,28 @@
#include "MCP/MCPUnrealCommand.h"
#include "MCP/MCPBaseConfig.h"
#include "UObject/NoExportTypes.h"
#include "AvatarCoreAIEnumsAndStructs.h"
#include "AIBaseConfig.generated.h"
class UMCPBaseManager;
class UAIBaseManager;
/**
*
*/
UCLASS(Abstract, Blueprintable, BlueprintType)
class AVATARCORE_AI_API UAIBaseConfig : public UObject
//These are still base settings but separate so that the global settings can be applied for various AI providers
USTRUCT(BlueprintType)
struct FBaseAISettings
{
GENERATED_BODY()
public:
//Class of the Manager
UPROPERTY(BlueprintReadOnly, Category = "AvatarCoreAI|Base")
TSubclassOf<UAIBaseManager> AIManagerClass;
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreAI|Settings", meta = (ExposeOnSpawn = "true"))
FString APIKey = "";
// All those neat little system prompts that make our avatars sooo great
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreAI|Settings", meta = (ExposeOnSpawn = "true"))
TArray<FSystemInstruction> SystemPrompts;
FString ModelID = "";
};
USTRUCT(BlueprintType)
struct FGlobalAISettings
{
GENERATED_BODY()
// Check user transcription for inappropriate behaviour first (adds a delay!)
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreAI|MCP", meta = (ExposeOnSpawn = "true"))
@ -36,22 +37,16 @@ public:
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreAI|MCP", meta = (ExposeOnSpawn = "true"))
bool bUseMCPServer = true;
// Class of the MCP Server
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreAI|MCP", meta = (ExposeOnSpawn = "true"))
TSubclassOf<UMCPBaseManager> MCPManagerClass;
// Config of the MCP Server
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreAI|MCP", meta = (ExposeOnSpawn = "true"))
UMCPBaseConfig* MCPConfig;
// Array of Unreal command objects to be used by this config
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreAI|MCP", meta = (ExposeOnSpawn = "true"))
TArray<TSubclassOf<UMCPUnrealCommand>> UnrealCommands;
// Does the AI model generate Audio Chunks that can be forwarded to the TTS Manager?
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreAI|Settings", meta = (ExposeOnSpawn = "true"))
bool AIModelAudioOutput = false;
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreAI|Settings", meta = (ExposeOnSpawn = "true"))
int32 MaxTokens = 1500;
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreAI|Settings", meta = (ExposeOnSpawn = "true"))
float Temperature = 0.8f;
// Does the AI model generate Audio Chunks that can be forwarded to the TTS Manager?
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreAI|Settings", meta = (ExposeOnSpawn = "true"))
int RequestTimeout = 10.0f;
@ -64,3 +59,37 @@ public:
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreAI|Settings", meta = (ExposeOnSpawn = "true"))
int MaxMessages = -1;
};
/**
*
*/
UCLASS(Abstract, Blueprintable, BlueprintType)
class AVATARCORE_AI_API UAIBaseConfig : public UObject
{
GENERATED_BODY()
public:
//Class of the Manager
UPROPERTY(BlueprintReadOnly, Category = "AvatarCoreAI|Settings", meta = (ExposeOnSpawn = "true"))
TSubclassOf<UAIBaseManager> AIManagerClass;
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreAI|Settings", meta = (ExposeOnSpawn = "true"))
FGlobalAISettings GlobalAISettings;
// All those neat little system prompts that make our avatars sooo great
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreAI|Settings", meta = (ExposeOnSpawn = "true"))
TArray<FSystemInstruction> SystemPrompts;
// Array of Unreal command objects to be used by this config
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreAI|MCP", meta = (ExposeOnSpawn = "true"))
TArray<TSubclassOf<UMCPUnrealCommand>> UnrealCommands;
// Class of the MCP Server
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreAI|MCP", meta = (ExposeOnSpawn = "true"))
TSubclassOf<UMCPBaseManager> MCPManagerClass;
// Config of the MCP Server
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreAI|MCP", meta = (ExposeOnSpawn = "true"))
UMCPBaseConfig* MCPConfig;
};

69
Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Public/AIBaseManager.h

@ -4,15 +4,17 @@
#include "CoreMinimal.h"
#include "UObject/Object.h"
#include "AIBaseConfig.h"
#include "AIBaseConfig.h"
#include "AvatarCoreAIEnumsAndStructs.h"
#include "MCP/MCPBaseManager.h"
#include "AIBaseManager.generated.h"
class FJsonValue;
// Delegate/Event Declarations
DECLARE_DYNAMIC_MULTICAST_DELEGATE_FourParams(FOnAISubtitle, FString, Chunk, FString, Answer, int, responseID, bool, IsFinal);
DECLARE_DYNAMIC_MULTICAST_DELEGATE_OneParam(FOnAIStateChanged, EAvatarCoreAIState, NewState);
DECLARE_DYNAMIC_MULTICAST_DELEGATE_OneParam(FOnAILog, FString, Message);
DECLARE_DYNAMIC_MULTICAST_DELEGATE_TwoParams(FOnAILog, FString, Message, bool, VeryVerbose);
DECLARE_DYNAMIC_MULTICAST_DELEGATE_TwoParams(FOnAIError, FString, ErrorMessage, EAvatarCoreAIError, ErrorType);
DECLARE_DYNAMIC_MULTICAST_DELEGATE_TwoParams(FMulticastDelegateRealtimeAPIAudioChunk, const TArray<uint8>, PCMData, bool, IsFinal);
DECLARE_DYNAMIC_MULTICAST_DELEGATE(FOnAIDelayedAnswer);
@ -111,10 +113,10 @@ public:
* Send Response/Question to the AI Model. If NotifyDelay is true call the DelayedAnswer Event when time defined in AIConfig has passed.
*/
UFUNCTION(BlueprintCallable, Category = "AvatarCoreAI")
void SendResponse(FAIMessage Message, bool NotifyDelay, bool TriggerResponse);
void SendResponse(FAIMessage Message, bool NotifyDelay);
UFUNCTION(BlueprintCallable, Category = "AvatarCoreAI")
virtual void SendResponseChild(FAIMessage Message, bool NotifyDelay, bool TriggerResponse) {};
virtual void SendResponseChild(FAIMessage Message, bool NotifyDelay) {};
/**
* Make the AI Model repeat the Text.
@ -132,7 +134,7 @@ public:
* Log a debug message and fire the OnAILog event on the game thread.
*/
UFUNCTION(BlueprintCallable, Category = "AvatarCoreAI|Debug")
virtual void BroadcastAILog(const FString& Message, bool ShowAlways = false);
virtual void BroadcastAILog(const FString& Message, bool ShowAlways = false, bool VeryVerbose = false);
/**
* Broadcast an AI error and fire the OnAIError event on the game thread.
@ -150,19 +152,25 @@ public:
* Set system instruction by array.
*/
UFUNCTION(BlueprintCallable, Category = "AvatarCoreAI|System Instruction")
void AddSystemInstructions(const TArray<FSystemInstruction> SystemInstructions, bool WipeCurrent);
void AddSystemInstructions(const TArray<FSystemInstruction> SystemInstructions, bool AutoSyncWithAI);
/**
* Add a new system instruction by name.
*/
UFUNCTION(BlueprintCallable, Category = "AvatarCoreAI|System Instruction")
void AddSystemInstruction(const FName Name, const FString NewSystemInstruction, bool AddAsFirst);
void AddSystemInstruction(const FSystemInstruction SystemInstruction, bool AddAsFirst, bool AutoSyncWithAI);
/**
* Remove a system instruction by name.
*/
UFUNCTION(BlueprintCallable, Category = "AvatarCoreAI|System Instruction")
void RemoveSystemInstruction(const FName SystemInstruction, bool AutoSyncWithAI);
/**
* Remove a system instruction by name.
*/
UFUNCTION(BlueprintCallable, Category = "AvatarCoreAI|System Instruction")
void RemoveSystemInstruction(const FName Name);
void RemoveSystemInstructions(const TArray<FName> SystemInstructions, bool AutoSyncWithAI);
/**
* Parse to System Prompt
@ -174,7 +182,7 @@ public:
* Clear all System Instruction.
*/
UFUNCTION(BlueprintCallable, Category = "AvatarCoreAI|System Instruction")
void ClearAllSystemInstructios();
void ClearAllSystemInstructions(bool AutoSyncWithAI);
// Add the prompt that let the avatar repeat what we want it to say
UFUNCTION(BlueprintCallable, Category = "AvatarCoreAI|System Instruction")
@ -192,9 +200,9 @@ public:
UFUNCTION(BlueprintCallable, Category = "AvatarCoreAI|MCP")
UMCPBaseManager* GetMCPManager();
// Add a command at runtime (handles AddToRoot)
// Register a command class at runtime; an instance is created only when the command is invoked
UFUNCTION(BlueprintCallable, Category = "AvatarCoreAI|MCP Commands")
void AddUnrealCommand(UMCPUnrealCommand* Command);
void AddUnrealCommand(TSubclassOf<UMCPUnrealCommand> CommandClass);
// Remove a command by name at runtime (handles RemoveFromRoot)
UFUNCTION(BlueprintCallable, Category = "AvatarCoreAI|Commands")
@ -206,8 +214,9 @@ public:
/**
* Runs a command by name with the given payload, rebinding completion/failure events.
* ToolCallId is stored on the command instance and propagated back through CommandFinished FAIMessage.Id.
*/
void RunMCPCommand(FString CommandName, FString Payload);
void RunMCPCommand(FString CommandName, FString Payload, FString ToolCallId = TEXT(""));
// Clear all running commands
UFUNCTION(BlueprintCallable, Category = "AvatarCoreAI|MCP Commands")
@ -218,16 +227,21 @@ public:
protected:
/**
* Called when a command finishes successfully. Override in child classes to handle result.
*/
/** Bound to UMCPUnrealCommand::OnCommandDone — message already has Role=Tool and Id set. */
UFUNCTION()
void CommandFinished(const FString& Command, const FString& Payload);
/**
* Called when a command fails. Override in child classes to handle error.
*/
void CommandFinished(const FAIMessage& Message);
/** Bound to UMCPUnrealCommand::OnCommandFailed. */
UFUNCTION()
void CommandFailed(const FAIMessage& Message);
/** Bound to MCPManager::OnMCPCommandDone — constructs FAIMessage from raw strings. */
UFUNCTION()
void MCPCommandFinished(const FString& Command, const FString& Payload);
/** Bound to MCPManager::OnMCPCommandFailed. */
UFUNCTION()
void CommandFailed(const FString& Command, const FString& Payload);
void MCPCommandFailed(const FString& Command, const FString& Payload);
//Add System/User/Assistant Message to memory archive
void AddMessageToArray(FAIMessage NewMessage);
@ -235,6 +249,10 @@ protected:
//Add System/User/Assistant Message to memory archive
TArray<FAIMessage> GetAllPreviousMessage();
// Builds an OpenAI-compatible messages array: system prompt + history + CurrentMessage appended last.
// Reusable by any OpenAI-compatible provider implementation.
TArray<TSharedPtr<FJsonValue>> BuildOpenAIMessagesArray(const FAIMessage& CurrentMessage);
//MCP Log Event
UFUNCTION()
void OnMCPLogReceived(const FString& LogMessage);
@ -259,9 +277,15 @@ protected:
/** Actor used as world context for commands */
TWeakObjectPtr<AActor> WorldReferenceActor;
TArray<class UMCPUnrealCommand*> UnrealCommands;
TArray<TSubclassOf<UMCPUnrealCommand>> UnrealCommandClasses;
TArray<FMCPToolInfo> UnrealCommandsToolInfos;
// Maps MCP server command name → tool_call_id for propagation through MCPCommandFinished
TMap<FString, FString> MCPToolCallIds;
UPROPERTY()
TArray<UMCPUnrealCommand*> ActiveCommands;
/** MCP Manager for FastMCP server communication */
UPROPERTY()
UMCPBaseManager* MCPManager;
@ -284,9 +308,6 @@ protected:
//There is a function call in progress
bool functionCallRunning = false;
//System Instruction
FString SystemInstruction;
//Current State the AI Manager
EAvatarCoreAIState CurrentAIState = EAvatarCoreAIState::Disconnected;

9
Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Public/AvatarCoreAIEnumsAndStructs.h

@ -24,6 +24,7 @@ enum class EAvatarCoreAIPromptRole : uint8
User UMETA(DisplayName = "User"),
Assistant UMETA(DisplayName = "Assistant"),
System UMETA(DisplayName = "System"),
Tool UMETA(DisplayName = "Tool"),
};
@ -61,6 +62,13 @@ struct FAIMessage
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AI")
FString Message;
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AI")
bool bTriggerResponse = true;
// tool_call_id when Role==Tool; call_id reference when Role==Assistant with tool_calls
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AI")
FString Id;
};
USTRUCT(BlueprintType)
@ -100,3 +108,4 @@ struct FMCPToolInfo
OutputScheme = "";
}
};

5
Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Public/AvatarCore_AI.h

@ -12,4 +12,9 @@ public:
/** IModuleInterface implementation */
virtual void StartupModule() override;
virtual void ShutdownModule() override;
private:
#if WITH_EDITOR
void AddDBDirectoryToPackaging();
#endif
};

2
Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Public/MCP/FastMCP/FastMCPConfig.h

@ -18,7 +18,7 @@ public:
//Direction to the Script that start FastMCP
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreAI|MCP", meta = (ExposeOnSpawn = "true"))
FString MCPExecutable = FPaths::ProjectPluginsDir() +"AvatarCore_AI/Source/ThirdParty/MCPServer/FastMCP/FastMCPServer.bat";
FString MCPExecutable = FPaths::ProjectContentDir() +"DB/FastMCP/FastMCPServer.bat";
//Custom python environment - "python" will use the system default
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreTTS|CoquiTTS", meta = (ExposeOnSpawn = "true"))

25
Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Public/MCP/MCPUnrealCommand.h

@ -5,8 +5,8 @@
#include "AvatarCoreAIEnumsAndStructs.h"
#include "MCPUnrealCommand.generated.h"
DECLARE_DYNAMIC_MULTICAST_DELEGATE_TwoParams(FOnAICommandDone, const FString&, CommandName, const FString&, Payload);
DECLARE_DYNAMIC_MULTICAST_DELEGATE_TwoParams(FOnAICommandFailed, const FString&, CommandName, const FString&, Payload);
DECLARE_DYNAMIC_MULTICAST_DELEGATE_OneParam(FOnAICommandDone, const FAIMessage&, Message);
DECLARE_DYNAMIC_MULTICAST_DELEGATE_OneParam(FOnAICommandFailed, const FAIMessage&, Message);
/**
* Base class for MCP/AI commands.
@ -23,6 +23,19 @@ public:
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "Command")
float TimeoutSeconds = 10.0f;
// Set by AIBaseManager::RunMCPCommand; propagated back to FAIMessage.Id in CommandFinished
UPROPERTY(BlueprintReadWrite, Category = "Command")
FString Id;
UPROPERTY(BlueprintReadWrite, EditAnywhere, Category = "Context", meta = (ExposeOnSpawn = "true"))
TObjectPtr<UObject> RequiredWorldContext = nullptr;
UFUNCTION(BlueprintCallable, Category = "Context")
void SetWorldContext(UObject* NewWorldContext);
UFUNCTION(BlueprintCallable, Category = "Context")
UObject* GetWorldContextObject() const;
// Result event (success)
UPROPERTY(BlueprintAssignable, Category = "Command")
FOnAICommandDone OnCommandDone;
@ -61,13 +74,13 @@ public:
UFUNCTION(BlueprintCallable, Category = "Command")
FString GetCommandOutputScheme();
/** Call this when the command is finished successfully */
/** Call this when the command is finished successfully. Set bTriggerResponse=false to suppress the AI follow-up. */
UFUNCTION(BlueprintCallable, Category = "Command")
void FinishCommand(const FString& Payload);
void FinishCommand(const FString& Payload, bool bTriggerResponse = true);
/** Call this when the command fails (timeout or error) */
/** Call this when the command fails (timeout or error). */
UFUNCTION(BlueprintCallable, Category = "Command")
void FailCommand(const FString& Payload);
void FailCommand(const FString& Payload, bool bTriggerResponse = true);
protected:
FTimerHandle TimeoutHandle;

74
Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Public/OpenRouter/AvatarCoreAIOpenRouter.h

@ -0,0 +1,74 @@
// Fill out your copyright notice in the Description page of Project Settings.
#pragma once
#include "CoreMinimal.h"
#include "AIBaseManager.h"
#include "OpenRouter/OpenRouterConfig.h"
#include "Interfaces/IHttpRequest.h"
#include "Interfaces/IHttpResponse.h"
#include "AvatarCoreAIOpenRouter.generated.h"
UCLASS(Blueprintable, BlueprintType)
class AVATARCORE_AI_API UAvatarCoreAIOpenRouter : public UAIBaseManager
{
GENERATED_BODY()
public:
// UAIBaseManager overrides
void InitAIManagerChild(UAIBaseConfig* AIConfig, AActor* InWorldReferenceActor) override;
void ActivateAI() override;
void DeactivateAI() override;
void UpdateSession() override;
void SendResponseChild(FAIMessage Message, bool NotifyDelay) override;
void ClearAI() override;
// Cancel the active HTTP request if one is in flight
UFUNCTION(BlueprintCallable, Category = "AvatarCoreAI|OpenRouter")
void CancelActiveRequest();
private:
UOpenRouterConfig* OpenRouterConfig = nullptr;
TSharedPtr<IHttpRequest, ESPMode::ThreadSafe> ActiveRequest;
// Per-request HTTP receive buffer. FSSEReceiveArchive appends bytes here directly,
// so we never call GetContent() on a live request (avoids "Payload is incomplete" warnings).
// A new TArray is created for each request so old and new request data are isolated.
TSharedPtr<TArray<uint8>> SSEStreamBufferPtr;
// Guards concurrent access: HTTP thread writes via FSSEReceiveArchive::Serialize,
// game thread reads in OnResponseProgress. Both hold shared refs so neither
// outlives the lock even if ResetSSEState fires mid-callback.
TSharedPtr<FCriticalSection> SSEBufferLock;
// Line-level byte buffer for SSE parsing; fed from SSEStreamBufferPtr.
// UTF-8 multibyte sequences are never split here because lines are extracted whole.
TArray<uint8> SSERawBuffer;
int32 SSEByteOffset = 0;
// Prevents processing finish_reason more than once if a provider sends duplicate done chunks
bool bResponseComplete = false;
// Tool call accumulation during streaming; keyed by tool_calls[index]
TMap<int32, FString> ToolCallNameMap;
TMap<int32, FString> ToolCallArgsMap;
TMap<int32, FString> ToolCallIdMap;
void SendChatCompletionRequest(FAIMessage CurrentMessage);
void OnResponseProgress(FHttpRequestPtr Request, uint64 BytesSent, uint64 BytesReceived);
void OnRequestComplete(FHttpRequestPtr Request, FHttpResponsePtr Response, bool bSuccess);
void ParseSSELine(const FString& Line);
void HandleToolCallsDone();
// Builds the messages array from system instructions + history + current message appended last
TArray<TSharedPtr<FJsonValue>> BuildMessagesArray(FAIMessage CurrentMessage);
// Builds the tools array from GetAvailableCommands()
TArray<TSharedPtr<FJsonValue>> BuildToolsArray();
void ResetSSEState();
};

7
Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Public/OpenRouter/OpenRouterConfig.cpp

@ -0,0 +1,7 @@
#include "OpenRouter/OpenRouterConfig.h"
#include "OpenRouter/AvatarCoreAIOpenRouter.h"
UOpenRouterConfig::UOpenRouterConfig(const FObjectInitializer& ObjectInitializer)
{
AIManagerClass = UAvatarCoreAIOpenRouter::StaticClass();
}

47
Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Public/OpenRouter/OpenRouterConfig.h

@ -0,0 +1,47 @@
// Fill out your copyright notice in the Description page of Project Settings.
#pragma once
#include "CoreMinimal.h"
#include "AIBaseConfig.h"
#include "OpenRouterConfig.generated.h"
USTRUCT(BlueprintType)
struct FOpenRouterAISettings
{
GENERATED_BODY()
// OpenRouter (or compatible) API base URL
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreAI|Settings", meta = (ExposeOnSpawn = "true"))
FString BaseURL = TEXT("https://openrouter.ai/api/v1");
//Base URL - Change this to the correct Azure API URL
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreAI|Settings", meta = (ExposeOnSpawn = "true"))
FBaseAISettings BaseAISettings;
// Enable OpenAI-compatible function/tool calling. Disable for models that use a
// non-standard tool format (e.g. some Mistral variants via OpenRouter).
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreAI|Settings", meta = (ExposeOnSpawn = "true"))
bool bSendTools = true;
// Optional: sent as HTTP-Referer header (appears in OpenRouter dashboard analytics)
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreAI|Settings", meta = (ExposeOnSpawn = "true"))
FString SiteURL;
// Optional: sent as X-Title header
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreAI|Settings", meta = (ExposeOnSpawn = "true"))
FString AppName;
};
UCLASS(Blueprintable, BlueprintType)
class AVATARCORE_AI_API UOpenRouterConfig : public UAIBaseConfig
{
GENERATED_BODY()
public:
UOpenRouterConfig(const FObjectInitializer& ObjectInitializer);
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreAI|Settings", meta = (ExposeOnSpawn = "true"))
FOpenRouterAISettings OpenRouterSettings;
};

5
Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Public/RealtimeAPI/AvatarCoreAIRealtime.h

@ -149,7 +149,7 @@ public:
void ActivateAI() override;
void DeactivateAI() override;
void UpdateSession() override;
void SendResponseChild(FAIMessage Message, bool NotifyDelay, bool TriggerResponse) override;
void SendResponseChild(FAIMessage Message, bool NotifyDelay) override;
void ClearAI() override;
void ConnectToWebSocket();
@ -162,11 +162,12 @@ public:
void WebSocketSendType(const FString& type);
UFUNCTION(BlueprintCallable, Category = "AvatarCore AI|RealtimeAPI")
void CreateConversationItem(FAIMessage Message, bool triggerResponse = true);
void CreateConversationItem(FAIMessage Message);
UFUNCTION(BlueprintCallable, Category = "AvatarCore AI|RealtimeAPI")
void CreateReseponse();
UFUNCTION(BlueprintCallable, Category = "AvatarCore AI|RealtimeAPI")
FString GetCurrentRequestID();

43
Unreal/Plugins/AvatarCore_AI/Source/AvatarCore_AI/Public/RealtimeAPI/RealtimeAPIConfig.h

@ -21,17 +21,14 @@ enum class EOpenAIRealtimeVoice : uint8 {
verse UMETA(DisplayName = "verse")
};
/**
*
*/
UCLASS()
class AVATARCORE_AI_API URealtimeAPIConfig : public UAIBaseConfig
USTRUCT(BlueprintType)
struct FRealtimeAISettings
{
GENERATED_BODY()
public:
URealtimeAPIConfig(const FObjectInitializer& ObjectInitializer);
//Base URL - Change this to the correct Azure API URL
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreAI|Settings", meta = (ExposeOnSpawn = "true"))
FBaseAISettings BaseAISettings;
//Base URL - Change this to the correct Azure API URL
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreAI|Settings", meta = (ExposeOnSpawn = "true"))
@ -41,27 +38,27 @@ public:
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreAI|Settings", meta = (ExposeOnSpawn = "true"))
bool IsAzureOpenAI = false;
//The OpenAI API Key
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreAI|Settings", meta = (ExposeOnSpawn = "true"))
FString APIKey;
//OpenAI Model
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreAI|Settings", meta = (ExposeOnSpawn = "true"))
FString Model = "gpt-realtime";
//OpenAI RealtimeAPI Voice
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreAI|Settings", meta = (ExposeOnSpawn = "true"))
EOpenAIRealtimeVoice Voice = EOpenAIRealtimeVoice::alloy;
//Max Token per Request
//Shall we forward all audio chunks directly to OpenAI - Does not work well, if we do not forward silence as well
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreAI|Settings", meta = (ExposeOnSpawn = "true"))
int MaxTokens = 1500;
bool InputAudioStreaming = false;
};
//Temperature of the AI model
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreAI|Settings", meta = (ExposeOnSpawn = "true"))
float Temperature = 0.8f;
/**
*
*/
UCLASS()
class AVATARCORE_AI_API URealtimeAPIConfig : public UAIBaseConfig
{
GENERATED_BODY()
public:
URealtimeAPIConfig(const FObjectInitializer& ObjectInitializer);
//Shall we forward all audio chunks directly to OpenAI - Does not work well, if we do not forward silence as well
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreAI|Settings", meta = (ExposeOnSpawn = "true"))
bool InputAudioStreaming = false;
FRealtimeAISettings RealtimeSettings;
};

1
Unreal/Plugins/AvatarCore_AI/Source/ThirdParty/MCPServer/FastMCP/.gitignore

@ -1 +0,0 @@
/__pycache__/*

5
Unreal/Plugins/AvatarCore_AI/Source/ThirdParty/MCPServer/FastMCP/AddDocumentsToDatabase.bat

@ -1,5 +0,0 @@
@echo off
call %localappdata%/AvatarCore/FastMCPVenv/Scripts/Activate.bat
cd /d "%~dp0"
python AddDocumentsToDatabase.py
cmd

68
Unreal/Plugins/AvatarCore_AI/Source/ThirdParty/MCPServer/FastMCP/AddDocumentsToDatabase.py

@ -1,68 +0,0 @@
# setup_db.py
import os
import re
from document_vectordb import DocumentVectorDB
from colorama import Fore
def _sanitize_table_name(name: str) -> str:
name = name.strip().lower()
name = re.sub(r'[^a-z0-9]+', '_', name)
name = name.strip('_')
if not name:
return "documents"
return name
def setup_database():
print(Fore.GREEN + "Initializing database...")
db = DocumentVectorDB()
db.create_table()
# Add sample documents (modify paths as needed)
document_add_folder = os.path.dirname(__file__) + "/documents_to_add"
# Add sample documents (modify paths as needed)
document_added_folder = os.path.dirname(__file__) + "/documents_added"
if not os.path.exists(document_add_folder):
os.makedirs(document_add_folder)
print(Fore.GREEN + f"Created {document_add_folder} directory. Please add your PDF/text files there.")
return
if not os.path.exists(document_added_folder):
os.makedirs(document_added_folder)
files_added = 0
for root, dirs, files in os.walk(document_add_folder):
rel_root = os.path.relpath(root, document_add_folder)
if rel_root == ".":
current_table = "documents"
else:
first_folder = rel_root.split(os.sep)[0]
current_table = _sanitize_table_name(first_folder)
for filename in files:
if filename.endswith(('.pdf', '.txt')):
file_path = os.path.join(root, filename)
if rel_root == ".":
target_root = document_added_folder
else:
target_root = os.path.join(document_added_folder, rel_root)
if not os.path.exists(target_root):
os.makedirs(target_root)
copy_file_path = os.path.join(target_root, filename)
print(Fore.GREEN +f"Adding {file_path} to table {current_table}...")
try:
db.add_document(file_path, table_name=current_table)
files_added += 1
os.rename(file_path, copy_file_path)
except Exception as e:
print(Fore.RED + f"Error adding {file_path}: {e}")
db.finalize_db()
print(Fore.GREEN + f"Database setup complete! Added {files_added} documents.")
# Show stats
stats = db.get_stats()
print(Fore.GREEN + f"Database stats: {stats}")
if __name__ == "__main__":
setup_database()

142
Unreal/Plugins/AvatarCore_AI/Source/ThirdParty/MCPServer/FastMCP/FastMCPServer.bat

@ -1,142 +0,0 @@
@echo off
setlocal enabledelayedexpansion
title "FastMCP"
REM ====== Config ======
set "PY_DOWNLOAD_URL=https://www.python.org/ftp/python/3.10.11/python-3.10.11-amd64.exe"
set "CUDA_DOWNLOAD_URL=https://developer.download.nvidia.com/compute/cuda/12.8.0/local_installers/cuda_12.8.0_571.96_windows.exe"
REM ====== Config ======
set "PY_DOWNLOAD_URL=https://www.python.org/ftp/python/3.10.11/python-3.10.11-amd64.exe"
:: Set your required Python version here
set REQUIRED_MAJOR=3
set REQUIRED_MINOR=10
set "VENV_DIR=%LOCALAPPDATA%/AvatarCore/FastMCPVenv"
set "VENV_PY=%VENV_DIR%\Scripts\python.exe"
set "REQ_FILE=%~dp0requirements.txt"
set "TARGET_SCRIPT=%~dp0FastMCPServer.py"
REM Work from this script�s directory
cd /d "%~dp0"
setlocal EnableExtensions
:checkcuda
echo === Checking for CUDA 12.8 ===
REM 1) Is nvcc in PATH?
where nvcc >nul
if errorlevel 1 goto installcuda
nvcc --version | findstr /i /r /c:"release *12\.8" >nul
if not errorlevel 1 (
REM Optional: show the version line cleanly
for /f "delims=" %%L in ('nvcc --version ^| findstr /i /c:"release"') do set "NVCC_LINE=%%L"
echo Found: %NVCC_LINE%
echo CUDA 12.8 detected. All good.
goto :checkpython
)
:installcuda
echo ERROR: - Install CUDA 12.8 first.
start "" "%CUDA_DOWNLOAD_URL%"
pause
exit /b 0
:checkpython
echo === Checking for Python %REQUIRED_PY% ===
set "PY_PATH=%1"
IF [%1] == [] set "PY_PATH=python"
:: Check if python command exists
%PY_PATH% --version >nul 2>&1
if %errorlevel% neq 0 (
echo ERROR: Python is not installed or not in PATH
start "" "%PY_DOWNLOAD_URL%"
pause
exit /b 1
)
:: Get Python version
for /f "tokens=2" %%i in ('%PY_PATH% --version 2^>^&1') do set PYTHON_VERSION=%%i
echo Found Python version: %PYTHON_VERSION%
:: Parse version numbers
for /f "tokens=1,2 delims=." %%a in ("%PYTHON_VERSION%") do (
set CURRENT_MAJOR=%%a
set CURRENT_MINOR=%%b
)
:: Version comparison logic
set VERSION_OK=0
if %CURRENT_MAJOR% EQU %REQUIRED_MAJOR% (
if %CURRENT_MINOR% EQU %REQUIRED_MINOR% (
set VERSION_OK=1
)
)
:: Display result
if %VERSION_OK% equ 1 (
echo SUCCESS: Python version is compatible %REQUIRED_MAJOR%.%REQUIRED_MINOR%!
goto :PythonReady
) else (
echo ERROR: Python version does not match!
start "" "%PY_DOWNLOAD_URL%"
pause
exit /b 1
)
:PythonReady
echo Using Python: %PY_PATH%
REM ====== Virtual environment ======
if exist "%VENV_PY%" (
echo Found existing venv: "%VENV_DIR%"
) else (
echo ERROR: No venv found. Creating venv at "%VENV_DIR%" ... 1>&2
%PY_PATH% -m venv "%VENV_DIR%"
if %errorlevel% neq 0 (
echo Failed to create virtual environment.
pause
exit /b 1
)
set "FIRST_SETUP=1"
)
REM Always upgrade pip once in venv
echo Upgrading pip in venv... 1>&2
"%VENV_PY%" -m pip install --upgrade pip
REM Install requirements only on first setup (or if requirements.txt exists and user wants a refresh)
if exist "%REQ_FILE%" (
if defined FIRST_SETUP (
echo Installing requirements from "%REQ_FILE%" ... 1>&2
)
"%VENV_PY%" -m pip install -r "%REQ_FILE%"
if %errorlevel% neq 0 (
echo Pip install failed. Check your "requirements.txt". 1>&2
pause
exit /b 1
)
) else (
echo No requirements.txt found. Skipping dependency install.
)
REM ====== Run the target script ======
if not exist "%TARGET_SCRIPT%" (
echo ERROR: "%TARGET_SCRIPT%" not found. 1>&2
echo Make sure %TARGET_SCRIPT% is next to this script, or update TARGET_SCRIPT path.
pause
exit /b 1
)
echo Running %TARGET_SCRIPT% ...
start /B /WAIT "" "%VENV_PY%" "%TARGET_SCRIPT%" %*
set "RUN_EXIT=%ERRORLEVEL%"
echo.
echo %TARGET_SCRIPT% exited with code %RUN_EXIT%.
pause
exit /b %RUN_EXIT%

41
Unreal/Plugins/AvatarCore_AI/Source/ThirdParty/MCPServer/FastMCP/FastMCPServer.py

@ -1,41 +0,0 @@
from fastmcp import FastMCP
from typing import List, Dict, Any
from document_vectordb import DocumentVectorDB
import json
import re
mcp = FastMCP("AvatarCoreMCP_1_0", stateless_http=True)
# Initialize your vector database
db = DocumentVectorDB()
db.create_table()
@mcp.tool()
def search_information(query: str) -> List[Dict[str, Any]]:
"""If you need more information about search this database."""
try:
query = re.sub('Green Hydrogen Hub Stuttgart', '', query)
query = re.sub('Green Hydrogen Hub', '', query)
query = re.sub('GHH', '', query)
print(query)
results = db.search(query, 3,"documents", True, 48, 12) #Boolean for Cuda based ReRanking
return results
except Exception as e:
return [{"error": f"Search failed: {str(e)}"}]
@mcp.tool()
def get_database_stats() -> Dict[str, Any]:
"""Get statistics about the document database"""
try:
table = db.table
count = table.count_rows()
return {
"total_entries": count,
"table_name": db.table_name
}
except Exception as e:
return {"error": f"Failed to get stats: {str(e)}"}
if __name__ == "__main__":
mcp.run(transport="http", host="127.0.0.1", port=8000, path="/mcp")

4
Unreal/Plugins/AvatarCore_AI/Source/ThirdParty/MCPServer/FastMCP/StartPythonVenv.bat

@ -1,4 +0,0 @@
@echo off
call %localappdata%/AvatarCore/FastMCPVenv/Scripts/Activate.bat
cd /d "%~dp0"
cmd

5
Unreal/Plugins/AvatarCore_AI/Source/ThirdParty/MCPServer/FastMCP/TestSearchDatabase.bat

@ -1,5 +0,0 @@
@echo off
call %localappdata%/AvatarCore/FastMCPVenv/Scripts/Activate.bat
cd /d "%~dp0"
python TestSearchDatabase.py
cmd

64
Unreal/Plugins/AvatarCore_AI/Source/ThirdParty/MCPServer/FastMCP/TestSearchDatabase.py

@ -1,64 +0,0 @@
from document_vectordb import DocumentVectorDB
import traceback
import time
table = "documents"
def DoSearch():
global table
print("-------------------------", flush=True)
print(f'Which table to search? (empty for {table})', flush=True)
table = input() or table
print('What do you wanna search?', flush=True)
query = input()
if query == "":
exit();
try:
start_time = time.time()
print("Calling db.search...", flush=True)
results = db.search(query, limit=3, table_name=table, rerank=True, candidates=48, batch_size=12)
print(f"db.search returned list of length: {len(results) if results is not None else 'None'}", flush=True)
except Exception as e:
print("Search raised an exception:", flush=True)
print(e, flush=True)
traceback.print_exc()
results = []
try:
if results and len(results) > 0:
for i, result in enumerate(results, 1):
print(f" {i} {result}", flush=True)
print("-------------------------", flush=True)
else:
print(" No results found", flush=True)
except Exception as e:
print(f"Error printing results: {e}", flush=True)
print("Execution took: %s seconds" % (time.time() - start_time))
DoSearch();
if __name__ == "__main__":
import os
try:
print("Testing Document Search...", flush=True)
# Initialize database
db = DocumentVectorDB()
db.create_table()
# Get stats
stats = db.get_stats()
print(f"Database stats: {stats}", flush=True)
DoSearch()
except Exception as e:
print("Fatal error in main:", flush=True)
print(e, flush=True)
traceback.print_exc()
finally:
try:
input("Press Enter to exit...")
except Exception:
pass

9
Unreal/Plugins/AvatarCore_AI/Source/ThirdParty/MCPServer/FastMCP/WipeDatabase.bat

@ -1,9 +0,0 @@
@echo off
cd /d "%~dp0"
robocopy documents_added documents_to_add /MOV /S
rmdir /S /Q lancedb
rmdir /S /Q documents_added
echo -----------------------------------------
echo Farewall, my old friend!
echo -----------------------------------------
cmd

370
Unreal/Plugins/AvatarCore_AI/Source/ThirdParty/MCPServer/FastMCP/document_vectordb.py

@ -1,370 +0,0 @@
import lancedb
import pandas as pd
from sentence_transformers import SentenceTransformer
from sentence_transformers import CrossEncoder
from pdfminer.high_level import extract_text
from langchain_text_splitters import RecursiveCharacterTextSplitter
import pyarrow as pa
import os
import torch
import time
from colorama import Fore
from typing import List, Dict
import re
from hashlib import sha1
class DocumentVectorDB:
def __init__(self, db_path: str = "./lancedb"):
if db_path == "./lancedb":
base_dir = os.path.dirname(os.path.abspath(__file__))
db_path = os.path.join(base_dir, "lancedb")
self.db = lancedb.connect(db_path)
# Model will be initialized in create_table() based on table vector dimension
#Debug Variables
self.start_time = None
self.end_time = None
self.model = None
self.model_dim = None
self.model_name = None
self.table_name = "documents"
self.reranker = None
self._load_reranker()
def _load_reranker(self, model_name: str = "cross-encoder/ms-marco-MiniLM-L-6-v2"):
if self.reranker is None:
if(torch.cuda.is_available()):
self.reranker = CrossEncoder(model_name, device="cuda")
else:
self.reranker = CrossEncoder(model_name)
def _load_model_for_dim(self, dim: int):
if self.model is not None and self.model_dim == dim:
return
# Select a common model matching the vector dimension
if dim == 384:
self.model_name = 'all-MiniLM-L6-v2'
elif dim == 768:
self.model_name = 'all-mpnet-base-v2'
else:
# Fallback to a widely used 384-d model
self.model_name = 'all-MiniLM-L6-v2'
dim = 384
self.model = SentenceTransformer(self.model_name)
self.model_dim = dim
def _infer_table_vector_dim(self) -> int:
try:
# Prefer schema-based detection; vector is commonly a FixedSizeList in LanceDB
field = self.table.schema.field('vector')
t = field.type
if pa.types.is_fixed_size_list(t):
return t.list_size
except Exception:
pass
# Fallback: inspect a sample row
try:
df_sample = self.table.to_pandas(limit=1)
if df_sample is not None and not df_sample.empty:
vec = df_sample.iloc[0].get('vector', [])
if isinstance(vec, list):
return len(vec)
except Exception:
pass
# Default if undetectable
return 384
def _ensure_table(self, table_name: str):
if getattr(self, "table", None) is not None and self.table_name == table_name and self.model is not None:
return
self.table_name = table_name
if table_name not in self.db.table_names():
# Initialize a default 384-d model for a fresh table
self._load_model_for_dim(384)
emb_dim = self.model.get_sentence_embedding_dimension()
# Create with sample data first to enforce schema with correct vector dim
sample_data = pd.DataFrame([{
"id": "sample",
"content": "sample content",
"source": "sample.txt",
"vector": [0.0] * emb_dim,
"doc_id": "sample.txt",
"chunk_index": 0
}])
self.table = self.db.create_table(table_name, sample_data)
# Delete the sample data
self.table.delete("id = 'sample'")
else:
self.table = self.db.open_table(table_name)
# Infer vector dimension from existing table and load matching model
dim = self._infer_table_vector_dim()
self._load_model_for_dim(dim)
def create_table(self, table_name: str = "documents"):
# Create table schema
schema = {
"id": str,
"content": str,
"source": str,
"vector": list
}
self._ensure_table(table_name)
def _create_index_for_current_table(self):
try:
row_count = self.table.count_rows()
except Exception:
row_count = 0
if row_count < 100:
return
if row_count < 1000:
num_partitions = 1
num_sub_vectors = 8
elif row_count < 10000:
num_partitions = 8
num_sub_vectors = 16
elif row_count < 100000:
num_partitions = 32
num_sub_vectors = 64
else:
num_partitions = 90
num_sub_vectors = 96
try:
self.table.create_index(
vector_column_name="vector",
index_type="IVF_PQ",
metric="cosine",
num_partitions=num_partitions,
num_sub_vectors=num_sub_vectors
)
except Exception:
pass
def clean_extracted_text(self, text: str) -> str:
"""
Cleans up common PDF extraction artifacts:
1. Removes line-break hyphens.
2. Replaces excessive whitespace and newlines.
"""
text = re.sub(r'([a-z])-(\n\s*)(\n?)', r'\1', text, flags=re.IGNORECASE)
text = re.sub(r'\s+', ' ', text).strip()
return text
def extract_text_from_pdf(self, pdf_path: str) -> str:
text = ""
# pdfminer.six is often more tolerant of broken PDF structure
try:
text = self.clean_extracted_text(extract_text(pdf_path))
return text
except Exception as e:
print(f"pdfminer.six failed on {pdf_path}: {e}")
return "" # Or try pypdf as a fallback here
def chunk_text(self, text: str, chunk_size: int = 128, overlap: int = 16) -> List[str]:
# Use characters (tokens) for chunking, not just words
splitter = RecursiveCharacterTextSplitter(
chunk_size=512, # Set chunk size to token/char count, not word count
chunk_overlap=50,
length_function=len, # Use character length
separators=["\n\n", "\n", ". ", " ", ""] # Hierarchical splitting
)
# The splitters are highly optimized and handle the logic efficiently
chunks = splitter.split_text(text)
return chunks
def add_document(self, file_path: str, doc_type: str = "auto", table_name: str = "documents"):
# Ensure model is initialized (in case add_document is used without create_table())
self._ensure_table(table_name)
# Extract text based on file type
if doc_type == "auto":
doc_type = "pdf" if file_path.endswith('.pdf') else "txt"
if doc_type == "pdf":
text = self.extract_text_from_pdf(file_path)
else:
with open(file_path, 'r', encoding='utf-8') as f:
text = f.read()
# Chunk the text
chunks = self.chunk_text(text)
print(Fore.GREEN + f"{len(chunks)} chunks added.")
print(Fore.BLUE + chunks[0])
# Create embeddings and store
data_to_add = []
if chunks:
embeddings = self.model.encode(
chunks,
batch_size=64,
convert_to_numpy=True,
normalize_embeddings=True,
)
base = os.path.basename(file_path)
for i, (chunk, emb) in enumerate(zip(chunks, embeddings)):
if len(chunk) < 1:
continue
did = sha1(f"{base}|{i}|{len(chunk)}".encode("utf-8")).hexdigest()[:16]
doc_data = {
"id": did,
"content": chunk,
"source": file_path,
"vector": emb.tolist(),
"doc_id": base,
"chunk_index": i,
}
data_to_add.append(doc_data)
# Add all chunks at once
if data_to_add:
df = pd.DataFrame(data_to_add)
self.table.add(df)
self._create_index_for_current_table()
def finalize_db(self):
self._create_index_for_current_table()
def DebugTimeIt(self, TimedLabel=""):
if self.start_time is not None:
self.end_time = time.time()
elapsed_time = self.end_time - self.start_time
print(f"{TimedLabel}: Elapsed time {elapsed_time}")
self.start_time = time.time()
def search(self, query: str, limit: int = 5, table_name: str = "documents", rerank: bool = False, candidates: int = 100, batch_size: int = 64) -> List[Dict]:
# Ensure model is initialized
self._ensure_table(table_name)
query_embedding = self.model.encode(query, normalize_embeddings=True).tolist()
if rerank:
print("Reranking...")
raw = (
self.table
.search(query_embedding)
.nprobes(20)
.refine_factor(50)
.limit(candidates)
.to_pandas()
)
if raw is None or raw.empty:
return []
pairs = [(query, c) for c in raw["content"].tolist()]
try:
self._load_reranker()
scores = self.reranker.predict(pairs, batch_size=batch_size)
raw["rerank_score"] = scores
reranked = raw.sort_values("rerank_score", ascending=False).head(limit)
except Exception:
reranked = raw.head(limit) # graceful fallback
results_list = []
for _, row in reranked.iterrows():
results_list.append({
"content": row.get("content", ""),
"source": row.get("source", ""),
"score": float(row.get("rerank_score", 0.0)),
})
return results_list
else:
# Perform vector search
print("Vector search...")
try:
results_df = (
self.table
.search(query_embedding)
#.metric("cosine")
.nprobes(20)
.refine_factor(50)
.limit(limit)
.to_pandas()
)
except Exception:
print("Error")
results_df = self.table.search(query_embedding).limit(limit).to_pandas()
if results_df is None or results_df.empty:
return []
def pick(row, keys, default_value=""):
for k in keys:
if k in row and pd.notna(row.get(k, None)):
return row.get(k)
return default_value
content_keys = ["content", "text", "chunk", "page_content", "body"]
source_keys = ["source", "path", "file_path", "document", "filename"]
score_keys = ["_distance", "score", "_similarity"]
results_list = []
for _, row in results_df.iterrows():
content = pick(row, content_keys, "")
source = pick(row, source_keys, "")
score = pick(row, score_keys, 0.0)
# Ensure numeric score
try:
score = float(score)
except Exception:
score = 0.0
results_list.append({
"content": content,
"source": source,
"score": score,
})
return results_list
def get_stats(self) -> Dict:
try:
tables_info = []
table_names = list(self.db.table_names())
old_table = getattr(self, "table", None)
old_table_name = getattr(self, "table_name", None)
for name in table_names:
try:
tbl = self.db.open_table(name)
self.table = tbl
self.table_name = name
count = tbl.count_rows()
vector_dim = self._infer_table_vector_dim()
tables_info.append({
"table_name": name,
"total_chunks": count,
"vector_dim": vector_dim,
})
except Exception as inner_e:
tables_info.append({
"table_name": name,
"error": str(inner_e),
})
if old_table is not None:
self.table = old_table
if old_table_name is not None:
self.table_name = old_table_name
return {
"lancedb_version": lancedb.__version__,
"pyarrow_version": pa.__version__,
"torch_version": torch.__version__,
"tables": tables_info,
}
except Exception as e:
return {"error": str(e)}
def get_tables(self) -> List[str]:
try:
return list(self.db.table_names())
except Exception:
return []

32
Unreal/Plugins/AvatarCore_AI/Source/ThirdParty/MCPServer/FastMCP/requirements.txt

@ -1,32 +0,0 @@
--index-url https://pypi.org/simple
--extra-index-url https://download.pytorch.org/whl/cu128
--trusted-host download.pytorch.org
--prefer-binary
fastmcp==2.10.6
lancedb==0.25.3
pandas==2.3.1
sentence-transformers==5.1.0
pdfminer.six==20251107
torch==2.9.0+cu128
transformers==4.55.0
huggingface-hub==0.34.4
scikit-learn==1.7.1
scipy==1.15.3
tqdm==4.67.1
numpy==2.2.6
regex==2025.7.34
safetensors==0.6.1
pyarrow==21.0.0
python-dotenv==1.1.1
requests==2.32.4
uvicorn==0.35.0
starlette==0.47.2
sse-starlette==2.4.1
httpx==0.28.1
httpx-sse==0.4.1
pydantic==2.11.7
pydantic-settings==2.10.1
langchain-text-splitters==1.0.0
PyYAML==6.0.2

BIN
Unreal/Plugins/AvatarCore_AI/Source/ThirdParty/MCPServer/FastMCP_ForContentFolder.zip

Binary file not shown.

BIN
Unreal/Plugins/AvatarCore_Manager/Content/AvatarCoreManager.uasset (Stored with Git LFS)

Binary file not shown.

BIN
Unreal/Plugins/AvatarCore_Manager/Content/StateManagement/States/BP_Configurable_QnA_State.uasset (Stored with Git LFS)

Binary file not shown.

BIN
Unreal/Plugins/AvatarCore_Manager/Content/Widgets/Debug/Modules/W_AvatarCoreModuleEntry.uasset (Stored with Git LFS)

Binary file not shown.

BIN
Unreal/Plugins/AvatarCore_Manager/Content/Widgets/Debug/Pages/W_DebugAvatarCoreSTT.uasset (Stored with Git LFS)

Binary file not shown.

BIN
Unreal/Plugins/AvatarCore_Manager/Content/Widgets/Debug/Pages/W_DebugAvatarCoreTTS.uasset (Stored with Git LFS)

Binary file not shown.

BIN
Unreal/Plugins/AvatarCore_Manager/Content/Widgets/StartupScreen/W_AvatarCoreStartupScreen.uasset (Stored with Git LFS)

Binary file not shown.

141
Unreal/Plugins/AvatarCore_Manager/Source/AvatarCore_Manager/Private/FL_AvatarCoreManager.cpp

@ -17,3 +17,144 @@ void UFL_AvatarCoreManager::BindTTSToAIManager(UTTSManagerBase* TTSManager, UAIB
AIManager->OnAudioChunk.AddDynamic(TTSManager, &UTTSManagerBase::AddAudioChunk);
}
}
ETTSLanguage UFL_AvatarCoreManager::ConvertLanguageToTTS(ELanguage Language)
{
switch (Language)
{
case ELanguage::ar:
return ETTSLanguage::ar;
case ELanguage::de:
return ETTSLanguage::de;
case ELanguage::en:
return ETTSLanguage::en;
case ELanguage::fr:
return ETTSLanguage::fr;
case ELanguage::el:
return ETTSLanguage::el;
case ELanguage::hi:
return ETTSLanguage::hi;
case ELanguage::it:
return ETTSLanguage::it;
case ELanguage::ja:
return ETTSLanguage::ja;
case ELanguage::ko:
return ETTSLanguage::ko;
case ELanguage::zh:
return ETTSLanguage::zh;
case ELanguage::nl:
return ETTSLanguage::nl;
case ELanguage::pl:
return ETTSLanguage::pl;
case ELanguage::pt:
return ETTSLanguage::pt;
case ELanguage::ro:
return ETTSLanguage::ro;
case ELanguage::ru:
return ETTSLanguage::ru;
case ELanguage::es:
return ETTSLanguage::es;
case ELanguage::cs:
return ETTSLanguage::cs;
case ELanguage::tr:
return ETTSLanguage::tr;
case ELanguage::uk:
return ETTSLanguage::uk;
case ELanguage::hu:
return ETTSLanguage::hu;
case ELanguage::NONE:
default:
return ETTSLanguage::NONE;
}
}
ESTTLanguage UFL_AvatarCoreManager::ConvertLanguageToSTT(ELanguage Language)
{
switch (Language)
{
case ELanguage::ar:
return ESTTLanguage::ar;
case ELanguage::de:
return ESTTLanguage::de;
case ELanguage::en:
return ESTTLanguage::en;
case ELanguage::fr:
return ESTTLanguage::fr;
case ELanguage::el:
return ESTTLanguage::el;
case ELanguage::hi:
return ESTTLanguage::hi;
case ELanguage::it:
return ESTTLanguage::it;
case ELanguage::ja:
return ESTTLanguage::ja;
case ELanguage::ko:
return ESTTLanguage::ko;
case ELanguage::zh:
return ESTTLanguage::zh;
case ELanguage::nl:
return ESTTLanguage::nl;
case ELanguage::pl:
return ESTTLanguage::pl;
case ELanguage::pt:
return ESTTLanguage::pt;
case ELanguage::ro:
return ESTTLanguage::ro;
case ELanguage::ru:
return ESTTLanguage::ru;
case ELanguage::es:
return ESTTLanguage::es;
case ELanguage::cs:
return ESTTLanguage::cs;
case ELanguage::tr:
return ESTTLanguage::tr;
case ELanguage::uk:
return ESTTLanguage::uk;
case ELanguage::hu:
return ESTTLanguage::hu;
case ELanguage::NONE:
default:
return ESTTLanguage::NONE;
}
}

64
Unreal/Plugins/AvatarCore_Manager/Source/AvatarCore_Manager/Public/AvatarCore_ManagerEnums.h

@ -36,25 +36,47 @@ enum class EAvatarState : uint8 {
UENUM(BlueprintType)
enum class ELanguage : uint8 {
NONE UMETA(DisplayName = "No language is enforced"),
Arabic UMETA(DisplayName = "arabic"),
German UMETA(DisplayName = "german"),
English UMETA(DisplayName = "english"),
French UMETA(DisplayName = "french"),
Greek UMETA(DisplayName = "greek"),
Hindi UMETA(DisplayName = "hindi"),
Italian UMETA(DisplayName = "italian"),
Japanese UMETA(DisplayName = "japanese"),
Korean UMETA(DisplayName = "korean"),
MandarinChinese UMETA(DisplayName = "mandarin chinese"),
Dutch UMETA(DisplayName = "dutch"),
Polish UMETA(DisplayName = "polish"),
Portuguese UMETA(DisplayName = "Portugiesisch"),
Romanian UMETA(DisplayName = "romanian"),
Russian UMETA(DisplayName = "russian"),
Spanish UMETA(DisplayName = "spanish"),
Czech UMETA(DisplayName = "czech"),
Turkish UMETA(DisplayName = "turkish"),
Ukrainian UMETA(DisplayName = "ukrainian"),
Hungarian UMETA(DisplayName = "hungarian")
NONE UMETA(DisplayName = "Unset"),
en UMETA(DisplayName = "English"),
fr UMETA(DisplayName = "French"),
de UMETA(DisplayName = "German"),
es UMETA(DisplayName = "Spanish"),
pt UMETA(DisplayName = "Portuguese"),
zh UMETA(DisplayName = "Chinese"),
ja UMETA(DisplayName = "Japanese"),
hi UMETA(DisplayName = "Hindi"),
it UMETA(DisplayName = "Italian"),
ko UMETA(DisplayName = "Korean"),
nl UMETA(DisplayName = "Dutch"),
pl UMETA(DisplayName = "Polish"),
ru UMETA(DisplayName = "Russian"),
sv UMETA(DisplayName = "Swedish"),
tr UMETA(DisplayName = "Turkish"),
tl UMETA(DisplayName = "Filipino"),
bg UMETA(DisplayName = "Bulgarian"),
ro UMETA(DisplayName = "Romanian"),
ar UMETA(DisplayName = "Arabic"),
cs UMETA(DisplayName = "Czech"),
el UMETA(DisplayName = "Greek"),
fi UMETA(DisplayName = "Finnish"),
hr UMETA(DisplayName = "Croatian"),
ms UMETA(DisplayName = "Malay"),
sk UMETA(DisplayName = "Slovak"),
da UMETA(DisplayName = "Danish"),
ta UMETA(DisplayName = "Tamil"),
uk UMETA(DisplayName = "Ukrainian"),
hu UMETA(DisplayName = "Hungarian"),
no UMETA(DisplayName = "Norwegian"),
vi UMETA(DisplayName = "Vietnamese"),
bn UMETA(DisplayName = "Bengali"),
th UMETA(DisplayName = "Thai"),
he UMETA(DisplayName = "Hebrew"),
ka UMETA(DisplayName = "Georgian"),
id UMETA(DisplayName = "Indonesian"),
te UMETA(DisplayName = "Telugu"),
gu UMETA(DisplayName = "Gujarati"),
kn UMETA(DisplayName = "Kannada"),
ml UMETA(DisplayName = "Malayalam"),
mr UMETA(DisplayName = "Marathi"),
pa UMETA(DisplayName = "Punjabi"),
};

8
Unreal/Plugins/AvatarCore_Manager/Source/AvatarCore_Manager/Public/FL_AvatarCoreManager.h

@ -4,6 +4,8 @@
#include "CoreMinimal.h"
#include "Kismet/BlueprintFunctionLibrary.h"
#include "AvatarCore_ManagerEnums.h"
#include "STTStructs.h"
// Forward declarations
class UTTSManagerBase;
class UAvatarCoreAIRealtime;
@ -46,4 +48,10 @@ class AVATARCORE_MANAGER_API UFL_AvatarCoreManager : public UBlueprintFunctionLi
UFUNCTION(BlueprintCallable, Category = "AvatarCoreManager")
static void BindTTSToAIManager(UTTSManagerBase* TTSManager, UAIBaseManager* AIManager);
UFUNCTION(BlueprintCallable, Category = "AvatarCoreManager")
static ETTSLanguage ConvertLanguageToTTS(ELanguage Language);
UFUNCTION(BlueprintCallable, Category = "AvatarCoreManager")
static ESTTLanguage ConvertLanguageToSTT(ELanguage Language);
};

BIN
Unreal/Plugins/AvatarCore_MetaHuman/Content/Animation/AnimBPs/AvatarCore_AnimInst_BodyForRetarget.uasset (Stored with Git LFS)

Binary file not shown.

BIN
Unreal/Plugins/AvatarCore_MetaHuman/Content/Animation/AnimBPs/AvatarCore_AnimInst_Face.uasset (Stored with Git LFS)

Binary file not shown.

BIN
Unreal/Plugins/AvatarCore_MetaHuman/Content/BP/MetaHuman/BaseAvatar.uasset (Stored with Git LFS)

Binary file not shown.

BIN
Unreal/Plugins/AvatarCore_MetaHuman/Content/Materials/MI_GrayTexture_Body_Cascadeur.uasset (Stored with Git LFS)

Binary file not shown.

BIN
Unreal/Plugins/AvatarCore_MetaHuman/Content/Materials/MI_GrayTexture_Head.uasset (Stored with Git LFS)

Binary file not shown.

BIN
Unreal/Plugins/AvatarCore_MetaHuman/Content/Materials/MI_GrayTexture_Head_Cascadeur.uasset (Stored with Git LFS)

Binary file not shown.

BIN
Unreal/Plugins/AvatarCore_MetaHuman/Content/Materials/M_GrayTexture_Body.uasset (Stored with Git LFS)

Binary file not shown.

BIN
Unreal/Plugins/AvatarCore_MetaHuman/Content/Materials/M_GrayTexture_Head.uasset (Stored with Git LFS)

Binary file not shown.

147
Unreal/Plugins/AvatarCore_STT/CLAUDE.md

@ -0,0 +1,147 @@
# AvatarCore_STT Plugin
Speech-to-text plugin for Unreal Engine. Audio flows through a linear chain of modules:
```
STTRecorder → [STTPreprocessor, ...] → STTProcessor → transcription result
```
The chain is assembled in `STTManagerBase::InitSTTManager` using `BindUFunction` with the string name `"OnChunkReceived"`. The UE reflection system resolves the correct virtual override at bind time, so the manager code does not need to change when signatures change.
---
## ESTTChainState — Pipeline Signal
Every audio chunk carries an `ESTTChainState` (defined in `Public/STTStructs.h`):
| Value | Meaning |
|-------|---------|
| `Processing` | Normal audio — buffer, process, pass through |
| `Finalizing` | End of utterance — flush buffers and trigger transcription |
| `Discarding` | BLOCKED/abort — clear all buffers, cancel in-flight requests |
**Rules:**
- Recorders always emit `Processing` — they have no concept of "final".
- PTT preprocessor emits `Finalizing` when the button is released (→ SILENCE) and `Discarding` when the system becomes BLOCKED.
- VAD preprocessor emits `Finalizing` on the last postroll silence chunk, then calls `UserSpeechStateChanged(SILENCE)` for UI purposes only. It emits `Discarding` from `OnUserSpeechStateChanged` when BLOCKED.
- Pass-through preprocessors (Converter, Debugger, SpeexDSP, WebRTC) forward `ChainState` unchanged. They must pass `Finalizing`/`Discarding` through even when `PCMData` is empty.
- Buffer preprocessor reacts to `ChainState` in-band — no `OnSpeechStateChanged` subscription.
- Processors react to `ChainState` only — no `OnSpeechStateChanged` subscriptions.
**Processors still call `UserSpeechStateChanged`** after transcription completes (for UI state), but they do NOT subscribe to it.
---
## Delegate Types
```cpp
// STTRecorderBase.h
DECLARE_DELEGATE_ThreeParams(FDelegateUnprocessedChunkReceived, TArray<int16>, FAudioInformation, ESTTChainState);
// STTPreprocessorBase.h
DECLARE_DELEGATE_ThreeParams(FDelegateProcessedChunk, TArray<int16>, FAudioInformation, ESTTChainState);
```
---
## Module Types
### STTRecorder (`Public/Recorder/STTRecorderBase.h`)
Produces audio chunks from a source (microphone, file, pixel stream). Fires `OnChunkReceived` delegate with `ESTTChainState::Processing`. Has no knowledge of speech state.
Implementations: `STTRecorderMicrophone` (PortAudio), `STTRecorderPrimaryMicrophone`, `STTRecorderUnrealMicrophone`, `STTRecorderDebugFile`, `STTRecorderAudioData`.
### STTPreprocessor (`Public/Preprocessor/STTPreprocessorBase.h`)
Chained in sequence. Each receives `OnChunkReceived` and fires `OnChunkProcessed` to the next stage. Both delegates carry `ESTTChainState`.
| Class | Role |
|-------|------|
| `STTPreprocessorConverter` | Stereo→mono, resample to target rate |
| `STTPreprocessorWebRTC` | WebRTC APM (echo cancel, noise suppress, AGC) |
| `STTPreprocessorSpeexDSP` | Speex noise suppression / echo cancel |
| `STTPreprocessorPTT` | Gates audio by PTT button state; emits Finalizing/Discarding |
| `STTPreprocessorVAD` | Voice activity detection; emits Finalizing after postroll, Discarding on BLOCKED |
| `STTPreprocessorBuffer` | Accumulates chunks to a fixed buffer size before forwarding; `bDiscardWhenNotFilledFullyOnce` drops short utterances |
| `STTPreprocessorDebugger` | Writes audio passing through it to a WAV file |
**STTPreprocessorBuffer — `bDiscardWhenNotFilledFullyOnce`:**
When enabled, if a `Finalizing` signal arrives before the buffer has ever dispatched a full-size `Processing` chunk in the current utterance, it sends `Discarding` instead. This silently drops very short accidental utterances without sending them to the transcription service.
### STTProcessor (`Public/Processor/STTProcessorBase.h`)
Receives the final audio. On `Finalizing`: trigger transcription. On `Discarding`: cancel/clear everything.
| Class | Backend |
|-------|---------|
| `STTProcessorAzure` | Microsoft Azure Cognitive Services (streaming, continuous) |
| `STTProcessorWhisper` | OpenAI Whisper / GPT-4o Transcribe (batch HTTP) |
| `STTParakeetProcessorBase` | Local NVIDIA NeMo Parakeet via TCP (JSON protocol) |
| `STTProcessorRealtimeAPI` | OpenAI Realtime API (forwards audio directly) |
| `STTProcessorDebugSaveWav` | Saves all received audio to a WAV file |
---
## Configuration
All modules are configured via `USTTBaseProcessorConfig` (a UObject subclass per processor type). Base settings are in `FSTTBaseSettings` (`Public/STTStructs.h`):
- `bUsePTT` — Push-to-talk vs. freespeech (VAD) mode
- `bCanInterrupt` — Whether user speech can interrupt the avatar
- `FreespeechPostRollTime` — Seconds of silence after speech before `Finalizing` is emitted
- `PTTPostRollTime` — Seconds after PTT release before `Finalizing` (currently unused — PTT emits Finalizing immediately on release)
- `MaxTalkingTime` — Hard timeout on PTT press duration
- `VADSettings` — Mode, min speech time, min amplitude threshold, speech-while-blocked threshold
- `WebRTCSettings` — Echo cancellation, noise suppression, AGC flags
- `SpeexDSPSettings` — Speex processing entries
- `STTReplacements` — Word replacement pairs applied to final transcription
- `STTSpecialWords` — Hints passed to transcription service for uncommon words
---
## Key Files
```
Public/
STTStructs.h — ESTTChainState, ESTTTalkingState, FAudioInformation, FSTTBaseSettings
STTManagerBase.h/.cpp — Pipeline assembly, state machine, delegate wiring
Recorder/STTRecorderBase.h — FDelegateUnprocessedChunkReceived
Preprocessor/STTPreprocessorBase.h — FDelegateProcessedChunk, virtual OnChunkReceived
Processor/STTProcessorBase.h — virtual OnChunkReceived, OnTranscriptionResult helpers
Private/
STTManagerBase.cpp — InitSTTManager (BindUFunction chain), UserSpeechStateChanged
Preprocessor/STTPreprocessorPTT.cpp — Finalizing on SILENCE, Discarding on BLOCKED
Preprocessor/STTPreprocessorVAD.cpp — Finalizing after postroll, Discarding on BLOCKED
Preprocessor/STTPreprocessorBuffer.cpp — ChainState-driven flush, bDiscardWhenNotFilledFullyOnce
Processor/Azure/STTProcessorAzure.cpp — Streaming Azure recognition
Processor/Parakeet/STTParakeetProcessorBase.cpp — TCP JSON protocol to Python server
Processor/Whisper/STTProcessorWhisper.cpp — Batch HTTP to OpenAI
```
---
## State Machine (ESTTTalkingState)
Used for UI and for VAD/PTT internal logic only. Processors do NOT subscribe to `OnSpeechStateChanged`.
```
SILENCE ──(VAD/PTT detects speech)──▶ TALKING
TALKING ──(VAD postroll / PTT release)──▶ SILENCE [Finalizing propagates through chain]
TALKING ──(SetBlocked)──▶ BLOCKED [Discarding propagates through chain]
BLOCKED ──(SetBlocked false / interrupt)──▶ SILENCE
ANY ──(transcription complete)──▶ SILENCE
```
`TRANSCRIBING` is a transitional state set by Whisper before sending an HTTP request; other processors do not use it.
---
## Common Pitfalls
- **Pass-through preprocessors must forward `Finalizing`/`Discarding` even on empty `PCMData`.** The Converter, SpeexDSP, and WebRTC all have early-return guards for empty/misaligned data — these guards check `ChainState != Processing` before returning so control signals are not swallowed.
- **PTT emits an empty `TArray<int16>` with `Finalizing`.** Processors must guard against transcribing zero-length audio (they already do via `BufferedPCMData.Num() == 0` checks).
- **Azure runs a background thread (`FAzureRunnable`).** `StopRecognition(false)` signals a graceful stop; the runnable delivers the final result via `OnRecognized`/`OnRunnableEnded` callbacks on the game thread. `StopRecognition(true)` is a forced abort (used on `Discarding`).
- **Parakeet communicates over TCP with a local Python server** (`ParakeetSTT.bat`). In editor (`bKeepAlive=true`) the Python process is kept alive between PIE sessions to avoid restart overhead.
- **`BindUFunction` matches by string name and delegate parameter types.** All `OnChunkReceived` overrides must have exactly the same signature as the base UFUNCTION or the bind will fail at runtime.

BIN
Unreal/Plugins/AvatarCore_STT/Content/Preprocessor/STTPreprocessor250ms.uasset (Stored with Git LFS)

Binary file not shown.

8
Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/AvatarCore_STT.Build.cs

@ -39,10 +39,10 @@ public class AvatarCore_STT : ModuleRules
}
);
// Ensure ThirdParty/CoquiTTS is packaged in all builds (including shipping)
string CoquiTTSPath = System.IO.Path.Combine(ModuleDirectory, "..", "ThirdParty", "Parakeet");
RuntimeDependencies.Add(System.IO.Path.Combine(CoquiTTSPath, "*.*"));
RuntimeDependencies.Add(System.IO.Path.Combine(CoquiTTSPath, "**", "*.*"));
// Ensure ThirdParty/Parakeet is packaged in all builds (including shipping)
string ParakeetTTSPath = System.IO.Path.Combine(ModuleDirectory, "..", "ThirdParty", "Parakeet");
RuntimeDependencies.Add(System.IO.Path.Combine(ParakeetTTSPath, "*.*"));
RuntimeDependencies.Add(System.IO.Path.Combine(ParakeetTTSPath, "**", "*.*"));
PublicIncludePaths.Add(Path.Combine(ModuleDirectory, "..", "ThirdParty", "fvad", "include"));
PublicAdditionalLibraries.Add(Path.Combine(ModuleDirectory, "..", "ThirdParty", "fvad", "lib", "fvad.lib"));

62
Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Preprocessor/STTPreprocessorBuffer.cpp

@ -7,42 +7,62 @@
void USTTPreprocessorBuffer::InitSTTPreprocessor(USTTManagerBase* BaseSTTManager, FSTTBaseSettings InSTTBaseSettings, bool InDebugMode)
{
USTTPreprocessorBase::InitSTTPreprocessor(BaseSTTManager, InSTTBaseSettings, InDebugMode);
if (FlushOnSilence)
BaseSTTManager->OnSpeechStateChanged.AddUniqueDynamic(this, &USTTPreprocessorBuffer::OnSpeechChanged);
}
void USTTPreprocessorBuffer::OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation)
void USTTPreprocessorBuffer::OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation, ESTTChainState ChainState)
{
if (ChainState == ESTTChainState::Discarding)
{
Buffer.Empty();
bHasFilledFully = false;
OnChunkProcessed.ExecuteIfBound({}, AudioInformation, ESTTChainState::Discarding);
return;
}
Buffer.Append(PCMData);
BufferAudioInformation = AudioInformation;
if (ChainState == ESTTChainState::Finalizing)
{
if (bDiscardWhenNotFilledFullyOnce && !bHasFilledFully)
{
Buffer.Empty();
OnChunkProcessed.ExecuteIfBound({}, BufferAudioInformation, ESTTChainState::Discarding);
}
else
{
if (Buffer.Num() > 0)
OnChunkProcessed.ExecuteIfBound(Buffer, BufferAudioInformation, ESTTChainState::Finalizing);
Buffer.Empty();
bHasFilledFully = false;
}
return;
}
const uint32 SampleCount = GetSampleCount(AudioInformation);
if (CanOverflow) {
if ((uint32)Buffer.Num() > GetSampleCount(AudioInformation)) {
OnChunkProcessed.ExecuteIfBound(Buffer, AudioInformation);
if ((uint32)Buffer.Num() > SampleCount) {
OnChunkProcessed.ExecuteIfBound(Buffer, AudioInformation, ESTTChainState::Processing);
Buffer.Empty();
bHasFilledFully = true;
}
}
else {
while ((uint32)Buffer.Num() > GetSampleCount(AudioInformation)) {
TArray<int16> TempData;
TempData.Append(&Buffer[0], GetSampleCount(AudioInformation));
OnChunkProcessed.ExecuteIfBound(TempData, AudioInformation);
TempData.Empty();
TempData.Append(&Buffer[GetSampleCount(AudioInformation)], Buffer.Num() - GetSampleCount(AudioInformation));
Buffer.Empty();
TArray<int16> TempData;
while ((uint32)Buffer.Num() > SampleCount) {
TempData.Reset();
TempData.Append(&Buffer[0], SampleCount);
OnChunkProcessed.ExecuteIfBound(TempData, AudioInformation, ESTTChainState::Processing);
bHasFilledFully = true;
TempData.Reset();
TempData.Append(&Buffer[SampleCount], Buffer.Num() - SampleCount);
Buffer.Reset();
Buffer.Append(TempData);
}
}
}
}
uint32 USTTPreprocessorBuffer::GetSampleCount(FAudioInformation AudioInformation)
{
return (uint32)((AudioInformation.SampleRate) / 1000 * (float)BufferSize);
}
void USTTPreprocessorBuffer::OnSpeechChanged(ESTTTalkingState NewSpeechState)
{
if (NewSpeechState == ESTTTalkingState::SILENCE || NewSpeechState == ESTTTalkingState::TRANSCRIBING) {
OnChunkProcessed.ExecuteIfBound(Buffer, BufferAudioInformation);
Buffer.Empty();
}
}

14
Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Preprocessor/STTPreprocessorConverter.cpp

@ -4,12 +4,14 @@
#include "Preprocessor/STTPreprocessorConverter.h"
#include "STTManagerBase.h"
void USTTPreprocessorConverter::OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation)
{
// Check if input data is valid
void USTTPreprocessorConverter::OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation, ESTTChainState ChainState)
{
if (PCMData.Num() == 0)
{
STTManager->OnSTTError.Broadcast(TEXT("STTPreprocessorConverter: Received empty PCM data."));
if (ChainState != ESTTChainState::Processing)
OnChunkProcessed.ExecuteIfBound({}, AudioInformation, ChainState);
else
STTManager->OnSTTError.Broadcast(TEXT("STTPreprocessorConverter: Received empty PCM data."));
return;
}
@ -83,11 +85,11 @@ void USTTPreprocessorConverter::OnChunkReceived(TArray<int16> PCMData, FAudioInf
return;
}
// Pass the resampled data to the next processor in the chain
OnChunkProcessed.ExecuteIfBound(ResampledPCMData, TargetAudioInformation);
OnChunkProcessed.ExecuteIfBound(ResampledPCMData, TargetAudioInformation, ChainState);
}
else {
// No resampling needed, pass the converted data as is
// Log some final sample values
OnChunkProcessed.ExecuteIfBound(ConvertedPCMData, TargetAudioInformation);
OnChunkProcessed.ExecuteIfBound(ConvertedPCMData, TargetAudioInformation, ChainState);
}
}

4
Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Preprocessor/STTPreprocessorDebugger.cpp

@ -21,7 +21,7 @@ void USTTPreprocessorDebugger::InitSTTPreprocessor(USTTManagerBase* BaseSTTManag
DataBytesWritten = 0;
}
void USTTPreprocessorDebugger::OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation)
void USTTPreprocessorDebugger::OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation, ESTTChainState ChainState)
{
STTManager->OnSTTLog.Broadcast(FString::Printf(TEXT("Audio Chunks passing through the STTPreprocessorDebugger. %i PCM int16 Array entries."), PCMData.Num()));
@ -50,7 +50,7 @@ void USTTPreprocessorDebugger::OnChunkReceived(TArray<int16> PCMData, FAudioInfo
DataBytesWritten += static_cast<uint32>(NumBytes);
}
OnChunkProcessed.ExecuteIfBound(PCMData, AudioInformation);
OnChunkProcessed.ExecuteIfBound(PCMData, AudioInformation, ChainState);
}
void USTTPreprocessorDebugger::DestroySTTPreprocessor()

26
Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Preprocessor/STTPreprocessorPTT.cpp

@ -6,6 +6,7 @@
void USTTPreprocessorPTT::InitSTTPreprocessor(USTTManagerBase* BaseSTTManager, FSTTBaseSettings InSTTBaseSettings, bool InDebugMode)
{
USTTPreprocessorBase::InitSTTPreprocessor(BaseSTTManager, InSTTBaseSettings, InDebugMode);
BaseSTTManager->OnSTTButtonStateChanged.AddUniqueDynamic(this, &USTTPreprocessorPTT::OnPTTStateChanged);
BaseSTTManager->OnSpeechStateChanged.AddUniqueDynamic(this, &USTTPreprocessorPTT::OnUserSpeechStateChanged);
}
@ -14,14 +15,29 @@ void USTTPreprocessorPTT::DestroySTTPreprocessor()
}
void USTTPreprocessorPTT::OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation, ESTTChainState ChainState)
{
if (!PTTPressed)
return;
LastAudioInformation = AudioInformation;
OnChunkProcessed.ExecuteIfBound(PCMData, AudioInformation, ESTTChainState::Processing);
}
void USTTPreprocessorPTT::OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation)
void USTTPreprocessorPTT::OnPTTStateChanged(bool IsPressed)
{
if(NewSpeechState == ESTTTalkingState::TALKING)
OnChunkProcessed.ExecuteIfBound(PCMData, AudioInformation);
PTTPressed = IsPressed;
if (!IsPressed)
OnChunkProcessed.ExecuteIfBound({}, LastAudioInformation, ESTTChainState::Finalizing);
}
void USTTPreprocessorPTT::OnUserSpeechStateChanged(ESTTTalkingState InNewSpeechState)
void USTTPreprocessorPTT::OnUserSpeechStateChanged(ESTTTalkingState NewSpeechState)
{
NewSpeechState = InNewSpeechState;
if(NewSpeechState==ESTTTalkingState::BLOCKED)
{
PTTPressed = false;
OnChunkProcessed.ExecuteIfBound({}, LastAudioInformation, ESTTChainState::Discarding);
}
}

12
Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Preprocessor/STTPreprocessorSpeexDSP.cpp

@ -39,17 +39,19 @@ void USTTPreprocessorSpeexDSP::PostInitProperties()
Super::PostInitProperties();
}
void USTTPreprocessorSpeexDSP::OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation)
void USTTPreprocessorSpeexDSP::OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation, ESTTChainState ChainState)
{
// Expect 16 kHz mono; chunk size may be 30 ms (480 samples). We process in 10 ms frames (160 samples)
if (PCMData.Num() % FrameSizeSamples != 0)
if (PCMData.Num() == 0 || PCMData.Num() % FrameSizeSamples != 0)
{
UE_LOG(LogTemp, Warning, TEXT("PCMData size (%d) is not a multiple of frame size (%d)!"), PCMData.Num(), FrameSizeSamples);
if (ChainState != ESTTChainState::Processing)
OnChunkProcessed.ExecuteIfBound(PCMData, AudioInformation, ChainState);
else
UE_LOG(LogTemp, Warning, TEXT("PCMData size (%d) is not a multiple of frame size (%d)!"), PCMData.Num(), FrameSizeSamples);
return;
}
int vadReturnvalue = 0;
if (vadReturnvalue > 1 || m_vad < 1) {
OnChunkProcessed.ExecuteIfBound(PCMData, AudioInformation);
OnChunkProcessed.ExecuteIfBound(PCMData, AudioInformation, ChainState);
}
}

39
Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Preprocessor/STTPreprocessorVAD.cpp

@ -17,10 +17,15 @@ void USTTPreprocessorVAD::DestroySTTPreprocessor()
fvad = nullptr;
}
void USTTPreprocessorVAD::OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation)
void USTTPreprocessorVAD::OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation, ESTTChainState ChainState)
{
if(STTBaseSettings.bUsePTT)
OnChunkProcessed.ExecuteIfBound(PCMData, AudioInformation);
{
OnChunkProcessed.ExecuteIfBound(PCMData, AudioInformation, ChainState);
return;
}
LastAudioInformation = AudioInformation;
if (!fvad)
{
@ -61,8 +66,6 @@ void USTTPreprocessorVAD::OnChunkReceived(TArray<int16> PCMData, FAudioInformati
const float MeanSquares = static_cast<float>(SumSquares / static_cast<double>(NumFrames));
const float Rms = FMath::Sqrt(FMath::Max(MeanSquares, 1.e-12f));
const float Dbfs = 20.0f * FMath::LogX(10.0f, Rms);
//UE_LOG(LogTemp, Warning, TEXT("Dbfs %f"), Dbfs);
isLoadEnough = (Dbfs < static_cast<float>(STTBaseSettings.VADSettings.VAD_MinSpeechAmplitude));
isLoadEnough = (Dbfs > static_cast<float>(STTBaseSettings.VADSettings.VAD_MinSpeechAmplitude));
}
@ -83,31 +86,32 @@ void USTTPreprocessorVAD::OnChunkReceived(TArray<int16> PCMData, FAudioInformati
case -1:
STTManager->OnSTTError.Broadcast(FString::Printf(TEXT("Invalid frame length %i entries in buffer"), PCMData.Num()));
break;
case 0:
case 0:
if (talkingState == ESTTTalkingState::TALKING) {
if (timeInStateInSeconds > STTBaseSettings.FreespeechPostRollTime) {
STTManager->UserSpeechStateChanged(ESTTTalkingState::SILENCE);
if (timeInStateInSeconds > STTBaseSettings.FreespeechPostRollTime) {
OnChunkProcessed.ExecuteIfBound(PCMData, AudioInformation, ESTTChainState::Finalizing);
STTManager->UserSpeechStateChanged(ESTTTalkingState::SILENCE); // UI only
}
else {
OnChunkProcessed.ExecuteIfBound(PCMData, AudioInformation); //Even send silence if threshold is not met
}
OnChunkProcessed.ExecuteIfBound(PCMData, AudioInformation, ESTTChainState::Processing); //Even send silence if threshold is not met
}
}
break;
case 1:
if (talkingState == ESTTTalkingState::BLOCKED) {
if (talkingState == ESTTTalkingState::BLOCKED) {
if(timeInStateInSeconds > STTBaseSettings.VADSettings.VAD_SpeechWhileBlocked)
{
STTManager->OnSpeechDetectedWhileBlocked.Broadcast();
if (STTBaseSettings.bCanInterrupt)
if (STTBaseSettings.bCanInterrupt)
{
STTManager->SetBlocked(false);
if (Buffer.Num() > 0)
{
Buffer.Append(PCMData);
OnChunkProcessed.ExecuteIfBound(Buffer, AudioInformation);
OnChunkProcessed.ExecuteIfBound(Buffer, AudioInformation, ESTTChainState::Processing);
Buffer.Empty();
}
}
}
}
else
Buffer.Append(PCMData); //Buffer Data
@ -120,15 +124,15 @@ void USTTPreprocessorVAD::OnChunkReceived(TArray<int16> PCMData, FAudioInformati
if(Buffer.Num() > 0)
{
Buffer.Append(PCMData);
OnChunkProcessed.ExecuteIfBound(Buffer, AudioInformation);
OnChunkProcessed.ExecuteIfBound(Buffer, AudioInformation, ESTTChainState::Processing);
Buffer.Empty();
}
}
}
else
Buffer.Append(PCMData); //Buffer Data
}
else
OnChunkProcessed.ExecuteIfBound(PCMData, AudioInformation);
OnChunkProcessed.ExecuteIfBound(PCMData, AudioInformation, ESTTChainState::Processing);
break;
}
}
@ -140,5 +144,6 @@ void USTTPreprocessorVAD::OnUserSpeechStateChanged(ESTTTalkingState NewSpeechSta
if (NewSpeechState == ESTTTalkingState::BLOCKED)
{
Buffer.Empty();
}
OnChunkProcessed.ExecuteIfBound({}, LastAudioInformation, ESTTChainState::Discarding);
}
}

9
Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Preprocessor/STTPreprocessorWebRTC.cpp

@ -144,10 +144,13 @@ void USTTPreprocessorWebRTC::PostInitProperties()
}
void USTTPreprocessorWebRTC::OnChunkReceived(TArray<int16> PCMData,
FAudioInformation AudioInformation)
FAudioInformation AudioInformation,
ESTTChainState ChainState)
{
if (PCMData.Num() <= 0)
if (PCMData.Num() == 0)
{
if (ChainState != ESTTChainState::Processing)
OnChunkProcessed.ExecuteIfBound({}, AudioInformation, ChainState);
return;
}
// Remember the *actual* capture format once
@ -184,7 +187,7 @@ void USTTPreprocessorWebRTC::OnChunkReceived(TArray<int16> PCMData,
{
// Forward as 48 kHz; downstream converter can resample to 16k or whatever is needed
AudioInformation.SampleRate = WebRTCChannel.GetSampleRate();
OnChunkProcessed.ExecuteIfBound(Processed, AudioInformation);
OnChunkProcessed.ExecuteIfBound(Processed, AudioInformation, ChainState);
}
}

11
Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Processor/Azure/AzureRunnable.cpp

@ -30,6 +30,9 @@ bool FAzureRunnable::Init()
SpeechInputStream = SpeechSDK::Audio::AudioInputStream::CreatePushStream();
AudioConfig = SpeechSDK::Audio::AudioConfig::FromStreamInput(SpeechInputStream);
// Add before Recognizer = SpeechSDK::SpeechRecognizer::FromConfig(...)
Config->SetProperty(SpeechSDK::PropertyId::Speech_SegmentationSilenceTimeoutMs, "700");
try {
Recognizer = SpeechSDK::SpeechRecognizer::FromConfig(Config, AudioConfig);
@ -102,6 +105,14 @@ bool FAzureRunnable::Init()
});
});
Recognizer->Canceled.Connect([WeakOwner, Self](const auto& EventArgs) {
FString Reason = UTF8_TO_TCHAR(EventArgs.ErrorDetails.c_str());
AsyncTask(ENamedThreads::GameThread, [WeakOwner, Reason, Self]() {
if (WeakOwner.IsValid())
WeakOwner->OnAzureError(FString::Printf(TEXT("Canceled: %s"), *Reason));
});
});
return Recognizer != nullptr;
}

151
Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Processor/Azure/STTProcessorAzure.cpp

@ -39,12 +39,12 @@ void USTTProcessorAzure::InitSTTProcessor(USTTManagerBase* BaseSTTManager, USTTB
// Set properties
FString tmpAzureLanguages = "";
for (int i = 0; i < AzureProcessorConfig->AzureLanguages.Num(); i++)
for (int i = 0; i < AzureProcessorConfig->BaseSettings.STTLanguages.Num(); i++)
{
if (i > 0) {
tmpAzureLanguages += ",";
}
tmpAzureLanguages += USTTProcessorAzure::AzureEnumToString(AzureProcessorConfig->AzureLanguages[i]);
}
tmpAzureLanguages += USTTProcessorAzure::AzureEnumToString(AzureProcessorConfig->BaseSettings.STTLanguages[i]);
}
std::string LanguageString = TCHAR_TO_UTF8(*tmpAzureLanguages);
config->SetProperty(SpeechSDK::PropertyId::SpeechServiceConnection_AutoDetectSourceLanguages, LanguageString);
@ -68,47 +68,34 @@ void USTTProcessorAzure::DestroySTTProcessor()
// Stops recognition.
}
void USTTProcessorAzure::OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation)
void USTTProcessorAzure::OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation, ESTTChainState ChainState)
{
if (IsValid(STTManager) && STTManager->IsBlocked())
return;
LastChainState = ChainState;
if (!AzureRunnable || !bTranscriptionRunning) //Runable not ready or previous session ended
if (ChainState == ESTTChainState::Discarding)
{
USTTProcessorAzure::StartRecognition();
StopRecognition(true);
return;
}
// Get the pointer to the raw PCM data
int16* rawPCMData = PCMData.GetData();
// Calculate the size of the data buffer in bytes
uint32_t bufferSize = PCMData.Num() * sizeof(int16);
// Write the raw PCM data to the PushAudioInputStream, preserving const qualifier
AzureRunnable->AddAudioChunk(PCMData);
if (PCMData.Num() > 0)
{
if (!AzureRunnable || !bTranscriptionRunning)
USTTProcessorAzure::StartRecognition();
AzureRunnable->AddAudioChunk(PCMData);
}
if (ChainState == ESTTChainState::Finalizing)
StopRecognition(false);
}
void USTTProcessorAzure::ChangeAzureLanguage(TArray<EAzureLanguages> InAzureLanguages)
void USTTProcessorAzure::ChangeAzureLanguage(TArray<ESTTLanguage> InLanguages)
{
AzureProcessorConfig->AzureLanguages = InAzureLanguages;
AzureProcessorConfig->BaseSettings.STTLanguages = InLanguages;
if (bDebugMode && IsValid(STTManager))
STTManager->OnSTTLog.Broadcast(TEXT("Azure languages changed."));
STTManager->OnSTTLog.Broadcast(TEXT("Azure languages changed."));
}
void USTTProcessorAzure::OnSpeechStateChanged(ESTTTalkingState TalkingState)
{
if (TalkingState == ESTTTalkingState::BLOCKED) {
StopRecognition(true);
}
else if (TalkingState == ESTTTalkingState::SILENCE || TalkingState == ESTTTalkingState::TRANSCRIBING) {
if (AzureRunnable) {
StopRecognition(false); // Signal stop, runnable delivers final result via OnRecognized/OnRunnableEnded
}
else if (!intermediateResult.IsEmpty()) {
// No runnable pending, send accumulated result immediately
USTTProcessorBase::OnTranscriptionResult(TranscriptionCounter, intermediateResult, DetectedLanguage);
intermediateResult = "";
}
}
}
void USTTProcessorAzure::StartRecognition()
{
@ -158,7 +145,7 @@ void USTTProcessorAzure::OnRecognized(const FString& RecognizedText, const FStri
}
if (!IsValid(STTManager))
return;
if (STTManager->IsBlocked())
if (STTManager->IsBlocked() || LastChainState == ESTTChainState::Discarding)
return;
this->DetectedLanguage = Language;
@ -167,7 +154,7 @@ void USTTProcessorAzure::OnRecognized(const FString& RecognizedText, const FStri
intermediateResult += " " + RecognizedText;
else
intermediateResult = RecognizedText;
if (STTManager->GetCurrentSpeechState() == ESTTTalkingState::TALKING) { //User still talking
if (LastChainState == ESTTChainState::Processing) { //User still talking
USTTProcessorBase::OnTranscriptionIntermediateResult(TranscriptionCounter, *intermediateResult);
}
else {
@ -185,8 +172,8 @@ void USTTProcessorAzure::OnConnectionSuccess()
// Connection test runnable returns from Run() before posting this callback,
// so Run() is already done — direct null is safe.
AzureRunnable = nullptr;
STTManager->OnSTTLog.Broadcast(TEXT("STTProcessor Azure Speech initialized successfully."));
STTManager->OnReady.Broadcast();
STTManager->OnSpeechStateChanged.AddUniqueDynamic(this, &USTTProcessorAzure::OnSpeechStateChanged);
}
void USTTProcessorAzure::OnRunnableEnded(FAzureRunnable* Caller)
@ -204,7 +191,7 @@ void USTTProcessorAzure::OnRunnableEnded(FAzureRunnable* Caller)
intermediateResult.Empty();
}
if (!STTManager->IsBlocked())
if (LastChainState == ESTTChainState::Finalizing && IsValid(STTManager) && !STTManager->IsBlocked())
STTManager->UserSpeechStateChanged(ESTTTalkingState::SILENCE);
}
}
@ -220,7 +207,7 @@ void USTTProcessorAzure::OnRunnableEnded(FAzureRunnable* Caller)
intermediateResult.Empty();
}
if (!STTManager->IsBlocked())
if (LastChainState == ESTTChainState::Finalizing && IsValid(STTManager) && !STTManager->IsBlocked())
STTManager->UserSpeechStateChanged(ESTTTalkingState::SILENCE);
}
}
@ -240,54 +227,58 @@ void USTTProcessorAzure::OnAzureError(FString Error)
if (IsValid(STTManager)) {
STTManager->OnSTTError.Broadcast(Error);
STTManager->UserSpeechStateChanged(ESTTTalkingState::SILENCE);
if(!STTManager->IsBlocked())
STTManager->UserSpeechStateChanged(ESTTTalkingState::SILENCE);
}
}
FString USTTProcessorAzure::AzureEnumToString(EAzureLanguages Language)
FString USTTProcessorAzure::AzureEnumToString(ESTTLanguage Language)
{
switch (Language)
{
case EAzureLanguages::German_Germany:
return "de-DE";
break;
case EAzureLanguages::English_UK:
return "en-GB";
break;
case EAzureLanguages::English_India:
return "en-IN";
break;
case EAzureLanguages::English_US:
return "en-US";
break;
case EAzureLanguages::Spanish_Spain:
return "es-ES";
break;
case EAzureLanguages::Spanish_Mexico:
return "es-MX";
break;
case EAzureLanguages::French_France:
return "fr-FR";
break;
case EAzureLanguages::Hindi_India:
return "hi-IN";
break;
case EAzureLanguages::Italian_Italy:
return "it-IT";
break;
case EAzureLanguages::Japanese_Japan:
return "ja-JP";
break;
case EAzureLanguages::Korean_Korea:
return "ko-KR";
break;
case EAzureLanguages::Portuguese_Brazil:
return "pt-BR";
break;
case EAzureLanguages::Chinese_Simplified:
return "zh-CN";
break;
case ESTTLanguage::en: return "en-US";
case ESTTLanguage::de: return "de-DE";
case ESTTLanguage::fr: return "fr-FR";
case ESTTLanguage::es: return "es-ES";
case ESTTLanguage::pt: return "pt-BR";
case ESTTLanguage::zh: return "zh-CN";
case ESTTLanguage::ja: return "ja-JP";
case ESTTLanguage::hi: return "hi-IN";
case ESTTLanguage::it: return "it-IT";
case ESTTLanguage::ko: return "ko-KR";
case ESTTLanguage::nl: return "nl-NL";
case ESTTLanguage::pl: return "pl-PL";
case ESTTLanguage::ru: return "ru-RU";
case ESTTLanguage::sv: return "sv-SE";
case ESTTLanguage::tr: return "tr-TR";
case ESTTLanguage::tl: return "fil-PH";
case ESTTLanguage::bg: return "bg-BG";
case ESTTLanguage::ro: return "ro-RO";
case ESTTLanguage::ar: return "ar-SA";
case ESTTLanguage::cs: return "cs-CZ";
case ESTTLanguage::el: return "el-GR";
case ESTTLanguage::fi: return "fi-FI";
case ESTTLanguage::hr: return "hr-HR";
case ESTTLanguage::ms: return "ms-MY";
case ESTTLanguage::sk: return "sk-SK";
case ESTTLanguage::da: return "da-DK";
case ESTTLanguage::ta: return "ta-IN";
case ESTTLanguage::uk: return "uk-UA";
case ESTTLanguage::hu: return "hu-HU";
case ESTTLanguage::no: return "nb-NO";
case ESTTLanguage::vi: return "vi-VN";
case ESTTLanguage::bn: return "bn-IN";
case ESTTLanguage::th: return "th-TH";
case ESTTLanguage::he: return "he-IL";
case ESTTLanguage::ka: return "ka-GE";
case ESTTLanguage::id: return "id-ID";
case ESTTLanguage::te: return "te-IN";
case ESTTLanguage::gu: return "gu-IN";
case ESTTLanguage::kn: return "kn-IN";
case ESTTLanguage::ml: return "ml-IN";
case ESTTLanguage::mr: return "mr-IN";
case ESTTLanguage::pa: return "pa-IN";
default: return "UNDEFINED";
}
return "UNDEFINED";
}

103
Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Processor/Parakeet/STTParakeetProcessorBase.cpp

@ -36,9 +36,15 @@ void USTTParakeetProcessorBase::InitSTTProcessor(USTTManagerBase* BaseSTTManager
}
#if WITH_EDITOR
bIsEditor = GIsEditor;
if (ParakeetConfig->KeepAliveRule == ESTTKeepAliveRule::Never)
bKeepAlive = false;
else
bKeepAlive = true;
#else
bIsEditor = false;
if (ParakeetConfig->KeepAliveRule == ESTTKeepAliveRule::Always)
bKeepAlive = true;
else
bKeepAlive = false;
#endif
// Resolve batch file path
@ -82,7 +88,7 @@ void USTTParakeetProcessorBase::InitSTTProcessor(USTTManagerBase* BaseSTTManager
// Editor: try to connect to existing instance (with brief delay for server to be ready after previous disconnect)
// Non-editor: kill stale processes and launch fresh
if (!bIsEditor)
if (!bKeepAlive)
{
KillExistingParakeetProcesses();
if (UWorld* World = STTManager->GetWorld())
@ -127,7 +133,6 @@ void USTTParakeetProcessorBase::DestroySTTProcessor()
// Unbind delegate
if (IsValid(STTManager))
{
STTManager->OnSpeechStateChanged.RemoveDynamic(this, &USTTParakeetProcessorBase::OnSpeechStateChanged);
if (UWorld* World = STTManager->GetWorld())
{
@ -144,7 +149,7 @@ void USTTParakeetProcessorBase::DestroySTTProcessor()
}
// Kill local process if we spawned it (non-editor only)
if (!bIsEditor && bLaunchedLocalServer)
if (!bKeepAlive && bLaunchedLocalServer)
{
if (ParakeetProcHandle.IsValid())
{
@ -154,7 +159,7 @@ void USTTParakeetProcessorBase::DestroySTTProcessor()
bLaunchedLocalServer = false;
}
if (!bIsEditor)
if (!bKeepAlive)
{
KillExistingParakeetProcesses();
}
@ -171,12 +176,15 @@ void USTTParakeetProcessorBase::DestroySTTProcessor()
// Audio chunk handling
// ---------------------------------------------------------------------------
void USTTParakeetProcessorBase::OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation)
void USTTParakeetProcessorBase::OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation, ESTTChainState ChainState)
{
if (IsValid(STTManager) && STTManager->IsBlocked())
if (ChainState == ESTTChainState::Discarding)
{
if (bDebugMode && IsValid(STTManager))
STTManager->OnSTTLog.Broadcast(TEXT("Parakeet: OnChunkReceived skipped (blocked)"));
TSharedPtr<FJsonObject> Obj = MakeShared<FJsonObject>();
Obj->SetStringField(TEXT("type"), TEXT("clear"));
SendJsonMessage(Obj);
IntermediateResult.Empty();
bTranscriptionRunning = false;
return;
}
@ -187,22 +195,27 @@ void USTTParakeetProcessorBase::OnChunkReceived(TArray<int16> PCMData, FAudioInf
return;
}
// Encode PCM16 data as base64
TArray<uint8> RawBytes;
RawBytes.Append(reinterpret_cast<const uint8*>(PCMData.GetData()), PCMData.Num() * sizeof(int16));
FString B64 = FBase64::Encode(RawBytes);
if (PCMData.Num() > 0)
{
// Encode PCM16 data as base64
TArray<uint8> RawBytes;
RawBytes.Append(reinterpret_cast<const uint8*>(PCMData.GetData()), PCMData.Num() * sizeof(int16));
FString B64 = FBase64::Encode(RawBytes);
// Build JSON message
TSharedPtr<FJsonObject> Obj = MakeShared<FJsonObject>();
Obj->SetStringField(TEXT("type"), TEXT("audio"));
Obj->SetStringField(TEXT("data_b64"), B64);
TSharedPtr<FJsonObject> Obj = MakeShared<FJsonObject>();
Obj->SetStringField(TEXT("type"), TEXT("audio"));
Obj->SetStringField(TEXT("data_b64"), B64);
SendJsonMessage(Obj);
SendJsonMessage(Obj);
if (!bTranscriptionRunning)
USTTProcessorBase::OnTranscriptionStarted();
}
// Start transcription tracking if not already running
if (!bTranscriptionRunning)
if (ChainState == ESTTChainState::Finalizing)
{
USTTProcessorBase::OnTranscriptionStarted();
TSharedPtr<FJsonObject> Obj = MakeShared<FJsonObject>();
Obj->SetStringField(TEXT("type"), TEXT("finalize"));
SendJsonMessage(Obj);
}
}
@ -210,43 +223,6 @@ void USTTParakeetProcessorBase::OnChunkReceived(TArray<int16> PCMData, FAudioInf
// Speech state handling
// ---------------------------------------------------------------------------
void USTTParakeetProcessorBase::OnSpeechStateChanged(ESTTTalkingState TalkingState)
{
if (!STTManager->IsSTTFullyInitialized())
return;
if (bDebugMode && IsValid(STTManager))
STTManager->OnSTTLog.Broadcast(FString::Printf(TEXT("Parakeet: OnSpeechStateChanged -> %d"), (int32)TalkingState));
if (TalkingState == ESTTTalkingState::BLOCKED)
{
// Discard everything
TSharedPtr<FJsonObject> Obj = MakeShared<FJsonObject>();
Obj->SetStringField(TEXT("type"), TEXT("clear"));
SendJsonMessage(Obj);
IntermediateResult.Empty();
bTranscriptionRunning = false;
}
else if (TalkingState == ESTTTalkingState::SILENCE || TalkingState == ESTTTalkingState::TRANSCRIBING)
{
if (bTranscriptionRunning)
{
// Request final transcription
TSharedPtr<FJsonObject> Obj = MakeShared<FJsonObject>();
Obj->SetStringField(TEXT("type"), TEXT("finalize"));
SendJsonMessage(Obj);
}
else
{
// If we have accumulated intermediate text but transcription already ended
if (!IntermediateResult.IsEmpty())
{
USTTProcessorBase::OnTranscriptionIntermediateResult(TranscriptionCounter, IntermediateResult);
IntermediateResult.Empty();
}
}
}
}
// ---------------------------------------------------------------------------
// Callbacks from FParakeetRunnable (game thread)
@ -254,13 +230,12 @@ void USTTParakeetProcessorBase::OnSpeechStateChanged(ESTTTalkingState TalkingSta
void USTTParakeetProcessorBase::OnParakeetReady()
{
if (bDebugMode && IsValid(STTManager))
STTManager->OnSTTLog.Broadcast(TEXT("Parakeet server READY"));
if (IsValid(STTManager))
STTManager->OnSTTLog.Broadcast(TEXT("STTManager Parakeet initialized successfully. Server READY"));
if (IsValid(STTManager))
{
STTManager->OnSTTFullyInitialized();
STTManager->OnSpeechStateChanged.AddUniqueDynamic(this, &USTTParakeetProcessorBase::OnSpeechStateChanged);
}
}
@ -466,7 +441,7 @@ bool USTTParakeetProcessorBase::StartParakeetProcess()
// Python side when the UObject (and its pipe-reader thread) is destroyed
// at the end of a PIE session while the Python process is kept alive for
// reconnect, which stalls the transcription thread on subsequent connects.
if (bIsEditor)
if (bKeepAlive)
{
ParakeetProcHandle = FPlatformProcess::CreateProc(*ParakeetBatPath, *Args, false, !bDebugMode, !bDebugMode, &ProcId, 0, nullptr, nullptr, nullptr);
if (!ParakeetProcHandle.IsValid())
@ -493,7 +468,7 @@ bool USTTParakeetProcessorBase::StartParakeetProcess()
FPlatformProcess::CreatePipe(StdErrReadPipe, StdErrWritePipe);
//Removed the piping for DebugMode - especially in edtior build pipes break after first connect
ParakeetProcHandle = FPlatformProcess::CreateProc(*ParakeetBatPath, *Args, false, !(bDebugMode && bIsEditor), !(bDebugMode && bIsEditor), &ProcId, 0, nullptr, (bDebugMode && bIsEditor) ? nullptr : StdOutWritePipe, bDebugMode ? nullptr : StdErrWritePipe);
ParakeetProcHandle = FPlatformProcess::CreateProc(*ParakeetBatPath, *Args, false, !(bDebugMode && bKeepAlive), !(bDebugMode && bKeepAlive), &ProcId, 0, nullptr, (bDebugMode && bKeepAlive) ? nullptr : StdOutWritePipe, bDebugMode ? nullptr : StdErrWritePipe);
if (!ParakeetProcHandle.IsValid())
{
if (IsValid(STTManager))

5
Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Processor/RealtimeAPI/STTProcessorRealtimeAPI.cpp

@ -24,8 +24,11 @@ void USTTProcessorRealtimeAPI::ClearToRealtimeAPI()
AvatarCoreAIRealtime = nullptr;
}
void USTTProcessorRealtimeAPI::OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation)
void USTTProcessorRealtimeAPI::OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation, ESTTChainState ChainState)
{
if (ChainState == ESTTChainState::Discarding)
return;
if (AvatarCoreAIRealtime != nullptr)
AvatarCoreAIRealtime->OnSTTAudioChunk(PCMData);
}

7
Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Processor/STTProcessorDebugSaveWav.cpp

@ -13,8 +13,13 @@ void USTTProcessorDebugSaveWav::DestroySTTProcessor()
SaveWave(FilePath);
}
void USTTProcessorDebugSaveWav::OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation)
void USTTProcessorDebugSaveWav::OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation, ESTTChainState ChainState)
{
if (ChainState == ESTTChainState::Discarding)
{
StoredPCMData.Empty();
return;
}
StoredAudioInformation = AudioInformation;
StoredPCMData.Append(PCMData);
}

88
Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Processor/Whisper/STTProcessorWhisper.cpp

@ -59,19 +59,15 @@ void USTTProcessorWhisper::InitSTTProcessor(USTTManagerBase* BaseSTTManager, UST
NormalizeWhisperURL();
if (IsValid(STTManager))
{
STTManager->OnSpeechStateChanged.AddUniqueDynamic(this, &USTTProcessorWhisper::OnSpeechStateChanged);
}
PerformHealthCheck();
STTManager->OnSTTLog.Broadcast(FString::Printf(TEXT("STTProcessor OpenAI %s initialized successfully."), *TranscribeModelEnumToString(WhisperProcessorConfig->Model)));
}
void USTTProcessorWhisper::ClearSTTProcessor()
{
USTTProcessorBase::ClearSTTProcessor();
BufferedPCMData.Empty();
bHasBufferedAudioInformation = false;
for (TSharedPtr<IHttpRequest, ESPMode::ThreadSafe>& Request : ActiveRequests)
{
@ -90,92 +86,28 @@ void USTTProcessorWhisper::DestroySTTProcessor()
STTManager = nullptr;
}
void USTTProcessorWhisper::OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation)
void USTTProcessorWhisper::OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation, ESTTChainState ChainState)
{
if (CurrentTalkingState != ESTTTalkingState::TALKING)
return;
if (PCMData.Num() == 0)
return;
if (!bHasBufferedAudioInformation)
{
BufferedAudioInformation = AudioInformation;
bHasBufferedAudioInformation = true;
}
else if (BufferedAudioInformation.SampleRate != AudioInformation.SampleRate ||
BufferedAudioInformation.NumChannels != AudioInformation.NumChannels)
{
BufferedPCMData.Empty();
BufferedAudioInformation = AudioInformation;
}
const int64 BytesPerSample = sizeof(int16);
const int64 CurrentBytes = static_cast<int64>(BufferedPCMData.Num()) * BytesPerSample;
const int64 NewBytes = static_cast<int64>(PCMData.Num()) * BytesPerSample;
const int64 MaxUploadBytes = static_cast<int64>(25) * 1024 * 1024;
const int64 MaxAudioBytes = MaxUploadBytes - 1024;
if (CurrentBytes + NewBytes > MaxAudioBytes)
{
int64 ExcessBytes = CurrentBytes + NewBytes - MaxAudioBytes;
int64 SamplesToRemove = (ExcessBytes + BytesPerSample - 1) / BytesPerSample;
if (SamplesToRemove >= BufferedPCMData.Num())
{
BufferedPCMData.Empty();
if (NewBytes > MaxAudioBytes)
{
int64 NewSamplesAllowed = MaxAudioBytes / BytesPerSample;
if (NewSamplesAllowed > 0 && NewSamplesAllowed < PCMData.Num())
{
int32 StartIndex = PCMData.Num() - static_cast<int32>(NewSamplesAllowed);
BufferedPCMData.Append(&PCMData[StartIndex], static_cast<int32>(NewSamplesAllowed));
}
}
else
{
BufferedPCMData.Append(PCMData);
}
}
else
{
BufferedPCMData.RemoveAt(0, static_cast<int32>(SamplesToRemove), EAllowShrinking::No);
BufferedPCMData.Append(PCMData);
}
}
else
{
BufferedPCMData.Append(PCMData);
}
}
void USTTProcessorWhisper::OnSpeechStateChanged(ESTTTalkingState TalkingState)
{
CurrentTalkingState = TalkingState;
if (TalkingState == ESTTTalkingState::BLOCKED)
if (ChainState == ESTTChainState::Discarding)
{
ClearSTTProcessor();
return;
}
if (TalkingState == ESTTTalkingState::SILENCE || TalkingState == ESTTTalkingState::TRANSCRIBING)
{
BufferedPCMData.Append(PCMData);
if (ChainState == ESTTChainState::Finalizing)
StartTranscriptionFromBuffer();
}
}
void USTTProcessorWhisper::StartTranscriptionFromBuffer()
{
if (BufferedPCMData.Num() == 0 || !bHasBufferedAudioInformation)
if (BufferedPCMData.Num() == 0)
return;
TArray<int16> PCMDataCopy = BufferedPCMData;
FAudioInformation AudioInfoCopy = BufferedAudioInformation;
BufferedPCMData.Empty();
bHasBufferedAudioInformation = false;
// Require at least x seconds of audio before sending to Whisper
if (AudioInfoCopy.SampleRate > 0 && AudioInfoCopy.NumChannels > 0)
@ -205,7 +137,6 @@ void USTTProcessorWhisper::StartTranscriptionFromBuffer()
}
STTManager->UserSpeechStateChanged(ESTTTalkingState::TRANSCRIBING);
SendWhisperRequest(MoveTemp(WavData));
}
@ -342,7 +273,6 @@ void USTTProcessorWhisper::PerformHealthCheck()
Manager->OnSTTError.Broadcast(TEXT("Whisper initialization check failed: API key invalid (401)."));
return;
}
Manager->OnSTTFullyInitialized();
});
@ -444,8 +374,6 @@ void USTTProcessorWhisper::SendWhisperRequest(TArray<uint8>&& WavData)
return;
}
UE_LOG(LogTemp, Warning, TEXT("OpenAI says: %s"), *JsonString);
FString Language;
if (RootObject->TryGetStringField(TEXT("language"), Language) && !Language.IsEmpty())
{

2
Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Recorder/STTRecorderAudioData.cpp

@ -25,5 +25,5 @@ void USTTRecorderAudioData::SendAudioDataToSTTModule(TArray<float> InputPCM, int
AudioInformation.NumChannels = NumChannels;
AudioInformation.SampleRate = InputSampleRate;
OnChunkReceived.ExecuteIfBound(PCMData, AudioInformation);
OnChunkReceived.ExecuteIfBound(PCMData, AudioInformation, ESTTChainState::Processing);
}

8
Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Recorder/STTRecorderDebugFile.cpp

@ -154,7 +154,7 @@ void USTTRecorderDebugFile::DebugPlayAudioFile(FString FilePath)
GetWorld()->GetTimerManager().ClearTimer(PlaybackTimerHandle);
}
STTManager->OnSTTFakeButtonStateChanged.Broadcast(true);
STTManager->OnSTTButtonStateChanged.Broadcast(true);
GetWorld()->GetTimerManager().SetTimer(
PlaybackTimerHandle, // handle to cancel timer at a later time
this, // the owning object
@ -167,7 +167,7 @@ void USTTRecorderDebugFile::DebugPlayAudioFile(FString FilePath)
void USTTRecorderDebugFile::DebugClearAudioFile()
{
STTManager->OnSTTFakeButtonStateChanged.Broadcast(false);
STTManager->OnSTTButtonStateChanged.Broadcast(false);
AudioComponent->Stop();
GetWorld()->GetTimerManager().ClearTimer(PlaybackTimerHandle);
}
@ -180,13 +180,13 @@ void USTTRecorderDebugFile::SendChunk()
if (RemainingSamples <= 0)
{
GetWorld()->GetTimerManager().ClearTimer(PlaybackTimerHandle);
STTManager->OnSTTFakeButtonStateChanged.Broadcast(false);
STTManager->OnSTTButtonStateChanged.Broadcast(false);
STTManager->OnSTTLog.Broadcast(FString::Printf(TEXT("Finished sending %i audio chunks (SampleRate: %i Channels: %i)"), PCMData.Num(), AudioInformation.SampleRate, AudioInformation.NumChannels));
return;
}
TArray<int16> Chunk;
Chunk.Append(&PCMData[SentChunks], RemainingSamples);
OnChunkReceived.ExecuteIfBound(Chunk, AudioInformation);
OnChunkReceived.ExecuteIfBound(Chunk, AudioInformation, ESTTChainState::Processing);
SentChunks += RemainingSamples;
}

2
Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Recorder/STTRecorderMicrophone.cpp

@ -297,7 +297,7 @@ void USTTRecorderMicrophone::ProcessChunk()
while (AudioQueue.Dequeue(PCMData))
{
HasReceivedAudioData = true;
OnChunkReceived.ExecuteIfBound(PCMData, CaptureAudioInformation);
OnChunkReceived.ExecuteIfBound(PCMData, CaptureAudioInformation, ESTTChainState::Processing);
ProcessedChunks++;
}
}

2
Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Recorder/STTRecorderPrimaryMicrophone.cpp

@ -304,7 +304,7 @@ void USTTRecorderPrimaryMicrophone::ProcessChunk() //Game Thead Processing
HasReceivedAudioData = true;
// Write audio data immediately to the PushAudioInputStream
OnChunkReceived.ExecuteIfBound(PCMData, CaptureAudioInformation);
OnChunkReceived.ExecuteIfBound(PCMData, CaptureAudioInformation, ESTTChainState::Processing);
ProcessedChunks++;
}
}

2
Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Recorder/STTRecorderUnrealMicrophone.cpp

@ -304,7 +304,7 @@ void USTTRecorderUnrealMicrophone::ProcessChunk() //Game Thead Processing
HasReceivedAudioData = true;
// Write audio data immediately to the PushAudioInputStream
OnChunkReceived.ExecuteIfBound(PCMData, CaptureAudioInformation);
OnChunkReceived.ExecuteIfBound(PCMData, CaptureAudioInformation, ESTTChainState::Processing);
ProcessedChunks++;
}
}

12
Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/STTManagerBase.cpp

@ -148,7 +148,6 @@ void USTTManagerBase::ClearSTTManager()
void USTTManagerBase::SetBlocked(bool isBlocked)
{
if (!bIsInitialized)
return;
@ -176,6 +175,13 @@ bool USTTManagerBase::IsBlocked()
return (CurrentSpeechState==ESTTTalkingState::BLOCKED);
}
void USTTManagerBase::SetLanguage(TArray<ESTTLanguage> NewLanguages)
{
if (!IsValid(ProcessorConfig))
return;
ProcessorConfig->BaseSettings.STTLanguages = NewLanguages;
}
void USTTManagerBase::AddSpecialWord(FString NewWord)
{
if (bDebugMode)
@ -261,7 +267,10 @@ void USTTManagerBase::PTTStateChanged(bool BtnPressed)
ClearPTTPostRollTimer();
if (BtnPressed)
{
OnSTTButtonStateChanged.Broadcast(true);
USTTManagerBase::UserSpeechStateChanged(ESTTTalkingState::TALKING);
}
else
{
if (PTTPostRollTime > 0) {
@ -279,6 +288,7 @@ void USTTManagerBase::PTTStateChanged(bool BtnPressed)
void USTTManagerBase::PTTRelease()
{
OnSTTButtonStateChanged.Broadcast(false);
if(STTProcessor->IsTranscriptionRunning())
USTTManagerBase::UserSpeechStateChanged(ESTTTalkingState::TRANSCRIBING);
else

4
Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Preprocessor/STTPreprocessorBase.h

@ -10,7 +10,7 @@
class USTTManagerBase;
DECLARE_DELEGATE_TwoParams(FDelegateProcessedChunk, TArray<int16>, FAudioInformation);
DECLARE_DELEGATE_ThreeParams(FDelegateProcessedChunk, TArray<int16>, FAudioInformation, ESTTChainState);
/**
* This module processes the audio chunks for the final processor. For example buffer the chunks or change it to 16.000 hz mono audio.
@ -31,7 +31,7 @@ public:
virtual void DestroySTTPreprocessor() {};
UFUNCTION()
virtual void OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation) {};
virtual void OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation, ESTTChainState ChainState) {};
protected:

17
Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Preprocessor/STTPreprocessorBuffer.h

@ -7,7 +7,7 @@
#include "STTPreprocessorBuffer.generated.h"
/**
*
*
*/
UCLASS(Blueprintable, BlueprintType)
class AVATARCORE_STT_API USTTPreprocessorBuffer : public USTTPreprocessorBase
@ -18,20 +18,21 @@ class AVATARCORE_STT_API USTTPreprocessorBuffer : public USTTPreprocessorBase
int32 BufferSize = 10; //Buffersize in ms
UPROPERTY(EditAnywhere)
bool CanOverflow = false; //If false Buffersize will be exact otherwise just "send" data when Buffersize is reached.
UPROPERTY(EditAnywhere)
bool FlushOnSilence = false;
void InitSTTPreprocessor(USTTManagerBase* BaseSTTManager, FSTTBaseSettings InSTTBaseSettings, bool InDebugMode = false) override;
// When Finalizing arrives but no full-size Processing chunk was ever dispatched this utterance,
// send Discarding instead — prevents very short accidental utterances from reaching the processor.
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreSTT", meta = (AllowPrivateAccess = "true"))
bool bDiscardWhenNotFilledFullyOnce = false;
void OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation) override;
void InitSTTPreprocessor(USTTManagerBase* BaseSTTManager, FSTTBaseSettings InSTTBaseSettings, bool InDebugMode = false) override;
UFUNCTION()
void OnSpeechChanged(ESTTTalkingState NewSpeechState);
void OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation, ESTTChainState ChainState) override;
private:
uint32 GetSampleCount(FAudioInformation AudioInformation);
TArray<int16> Buffer;
FAudioInformation BufferAudioInformation;
bool bHasFilledFully = false;
};

2
Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Preprocessor/STTPreprocessorConverter.h

@ -14,7 +14,7 @@ class AVATARCORE_STT_API USTTPreprocessorConverter : public USTTPreprocessorBase
{
GENERATED_BODY()
void OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation) override;
void OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation, ESTTChainState ChainState) override;
public:

2
Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Preprocessor/STTPreprocessorDebugger.h

@ -19,7 +19,7 @@ class AVATARCORE_STT_API USTTPreprocessorDebugger : public USTTPreprocessorBase
void InitSTTPreprocessor(USTTManagerBase* BaseSTTManager, FSTTBaseSettings InSTTBaseSettings, bool InDebugMode = false) override;
void OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation) override;
void OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation, ESTTChainState ChainState) override;
void DestroySTTPreprocessor() override;

7
Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Preprocessor/STTPreprocessorPTT.h

@ -18,13 +18,16 @@ class AVATARCORE_STT_API USTTPreprocessorPTT : public USTTPreprocessorBase
void DestroySTTPreprocessor() override;
void OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation) override;
void OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation, ESTTChainState ChainState) override;
UFUNCTION()
void OnUserSpeechStateChanged(ESTTTalkingState NewSpeechState);
UFUNCTION()
void OnPTTStateChanged(bool IsPressed);
private:
ESTTTalkingState NewSpeechState = ESTTTalkingState::BLOCKED;
bool PTTPressed = false;
FAudioInformation LastAudioInformation;
};

2
Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Preprocessor/STTPreprocessorSpeexDSP.h

@ -22,7 +22,7 @@ class AVATARCORE_STT_API USTTPreprocessorSpeexDSP : public USTTPreprocessorBase
void PostInitProperties() override;
void OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation) override;
void OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation, ESTTChainState ChainState) override;
UFUNCTION(BlueprintCallable, Category = STTManager)
void UpdateSpeexDSPSettings(FSpeexDSPSettings InSpeexDSPSettings);

3
Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Preprocessor/STTPreprocessorVAD.h

@ -29,7 +29,7 @@ class AVATARCORE_STT_API USTTPreprocessorVAD : public USTTPreprocessorBase // Vo
void DestroySTTPreprocessor() override;
void OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation) override;
void OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation, ESTTChainState ChainState) override;
public:
@ -44,4 +44,5 @@ private:
int32 lastVADState = -1;
float timeInStateInSeconds = 0;
FAudioInformation LastAudioInformation;
};

3
Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Preprocessor/STTPreprocessorWebRTC.h

@ -24,7 +24,8 @@ public:
virtual void DestroySTTPreprocessor() override;
virtual void PostInitProperties() override;
virtual void OnChunkReceived(TArray<int16> PCMData,
FAudioInformation AudioInformation) override;
FAudioInformation AudioInformation,
ESTTChainState ChainState) override;
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "STT|WebRTC")
int32 WebRTCStreamDelayMs = 50;
UFUNCTION(BlueprintCallable, Category = "STT|WebRTC")

23
Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Processor/Azure/STTAzureProcessorConfig.h

@ -6,27 +6,8 @@
#include "Processor/STTBaseProcessorConfig.h"
#include "STTAzureProcessorConfig.generated.h"
UENUM(BlueprintType)
enum class EAzureLanguages : uint8
{
German_Germany UMETA(DisplayName = "German (Germany)"),
English_UK UMETA(DisplayName = "English (United Kingdom)"),
English_India UMETA(DisplayName = "English (India)"),
English_US UMETA(DisplayName = "English (United States)"),
Spanish_Spain UMETA(DisplayName = "Spanish (Spain)"),
Spanish_Mexico UMETA(DisplayName = "Spanish (Mexico)"),
French_France UMETA(DisplayName = "French (France)"),
Hindi_India UMETA(DisplayName = "Hindi (India)"),
Italian_Italy UMETA(DisplayName = "Italian (Italy)"),
Japanese_Japan UMETA(DisplayName = "Japanese (Japan)"),
Korean_Korea UMETA(DisplayName = "Korean (Korea)"),
Portuguese_Brazil UMETA(DisplayName = "Portuguese (Brazil)"),
Chinese_Simplified UMETA(DisplayName = "Chinese (Mandarin, Simplified)"),
MAX UMETA(Hidden) // Helper for array size checks
};
/**
*
*
*/
UCLASS(Blueprintable, BlueprintType)
class AVATARCORE_STT_API USTTAzureProcessorConfig : public USTTBaseProcessorConfig
@ -41,6 +22,4 @@ public:
FString AzureAPIKey = "";
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreSTT|Azure", meta = (ExposeOnSpawn = "true"))
FString AzureRegion = "germanywestcentral";
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreSTT|Azure", meta = (ExposeOnSpawn = "true"))
TArray<EAzureLanguages> AzureLanguages = { EAzureLanguages::English_US, EAzureLanguages::German_Germany };
};

12
Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Processor/Azure/STTProcessorAzure.h

@ -27,21 +27,19 @@ class AVATARCORE_STT_API USTTProcessorAzure : public USTTProcessorBase
void ClearSTTProcessor();
void DestroySTTProcessor() override;
void OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation) override;
void OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation, ESTTChainState ChainState) override;
public:
UFUNCTION(BlueprintCallable, Category = STTManager)
virtual void ChangeAzureLanguage(TArray< EAzureLanguages> InAzureLanguages);
UFUNCTION()
void OnSpeechStateChanged(ESTTTalkingState TalkingState);
virtual void ChangeAzureLanguage(TArray<ESTTLanguage> InLanguages);
private:
std::shared_ptr<SpeechSDK::SpeechConfig> config;
std::shared_ptr<SpeechSDK::Audio::AudioConfig> audioConfig;
ESTTChainState LastChainState = ESTTChainState::Discarding;
FString intermediateResult = "";
private:
@ -64,7 +62,7 @@ public:
void OnAzureError(FString Error);
UFUNCTION(BlueprintPure, Category = STTManager)
FString AzureEnumToString(EAzureLanguages Language);
FString AzureEnumToString(ESTTLanguage Language);
USTTAzureProcessorConfig* AzureProcessorConfig;
};

7
Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Processor/Parakeet/STTParakeetProcessorBase.h

@ -27,7 +27,7 @@ public:
void ClearSTTProcessor() override;
void DestroySTTProcessor() override;
void OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation) override;
void OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation, ESTTChainState ChainState) override;
// Callbacks from FParakeetRunnable (called on GameThread via AsyncTask)
void OnParakeetReady();
@ -35,9 +35,6 @@ public:
void OnParakeetFinal(const FString& Text, const FString& Language);
void OnParakeetError(const FString& Error);
UFUNCTION()
void OnSpeechStateChanged(ESTTTalkingState TalkingState);
private:
USTTParakeetProcessorConfig* ParakeetConfig = nullptr;
@ -50,7 +47,7 @@ private:
FTimerHandle ConnectTimerHandle;
TSharedPtr<FInternetAddr> ParakeetAddr;
int32 ConnectAttempts = 0;
bool bIsEditor = false;
bool bKeepAlive = false;
// Process management
FString ParakeetBatPath;

3
Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Processor/Parakeet/STTParakeetProcessorConfig.h

@ -30,6 +30,9 @@ public:
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreSTT|Parakeet", meta = (ExposeOnSpawn = "true"))
int32 Port = 40200;
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreSTT|Parakeet", meta = (ExposeOnSpawn = "true"))
ESTTKeepAliveRule KeepAliveRule = ESTTKeepAliveRule::EditorOnly;
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreSTT|Parakeet", meta = (ExposeOnSpawn = "true"))
FString Device = "cuda:0";

2
Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Processor/RealtimeAPI/STTProcessorRealtimeAPI.h

@ -24,7 +24,7 @@ class AVATARCORE_STT_API USTTProcessorRealtimeAPI : public USTTProcessorBase
UFUNCTION(BlueprintCallable, Category = STTManager)
void ClearToRealtimeAPI();
void OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation) override;
void OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation, ESTTChainState ChainState) override;
private:

7
Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Processor/STTBaseProcessorConfig.h

@ -7,6 +7,13 @@
#include "STTStructs.h"
#include "STTBaseProcessorConfig.generated.h"
UENUM(BlueprintType)
enum class ESTTKeepAliveRule : uint8 {
EditorOnly UMETA(DisplayName = "Only keep STT Module open in Editor Mode"),
Never UMETA(DisplayName = "Never keep STT Module alive."),
Always UMETA(DisplayName = "Keep STT Module alive in Editor and Shipping mode.")
};
class USTTProcessorBase;
/**

2
Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Processor/STTProcessorBase.h

@ -42,7 +42,7 @@ public:
void OnTranscriptionStarted();
UFUNCTION()
virtual void OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation) {};
virtual void OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation, ESTTChainState ChainState) {};
protected:

2
Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Processor/STTProcessorDebugSaveWav.h

@ -16,7 +16,7 @@ class AVATARCORE_STT_API USTTProcessorDebugSaveWav : public USTTProcessorBase
void DestroySTTProcessor() override;
void OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation) override;
void OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation, ESTTChainState ChainState) override;
private:

7
Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Processor/Whisper/STTProcessorWhisper.h

@ -21,10 +21,7 @@ public:
virtual void ClearSTTProcessor() override;
virtual void DestroySTTProcessor() override;
virtual void OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation) override;
UFUNCTION()
void OnSpeechStateChanged(ESTTTalkingState TalkingState);
virtual void OnChunkReceived(TArray<int16> PCMData, FAudioInformation AudioInformation, ESTTChainState ChainState) override;
private:
@ -41,7 +38,5 @@ private:
FString NormalizedWhisperURL;
TArray<int16> BufferedPCMData;
FAudioInformation BufferedAudioInformation;
bool bHasBufferedAudioInformation = false;
TArray<TSharedPtr<class IHttpRequest, ESPMode::ThreadSafe>> ActiveRequests;
ESTTTalkingState CurrentTalkingState = ESTTTalkingState::SILENCE;
};

2
Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Recorder/STTRecorderBase.h

@ -9,7 +9,7 @@
class USTTManagerBase;
DECLARE_DELEGATE_TwoParams(FDelegateUnprocessedChunkReceived, TArray<int16>, FAudioInformation);
DECLARE_DELEGATE_ThreeParams(FDelegateUnprocessedChunkReceived, TArray<int16>, FAudioInformation, ESTTChainState);
/**
* This module is the producer of the audio chunks (might be a microphone or a webstream (in the case of pixel streaming).

7
Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/STTManagerBase.h

@ -17,7 +17,7 @@ DECLARE_DYNAMIC_MULTICAST_DELEGATE_ThreeParams(FMulticastDelegateTranscriptionRe
DECLARE_DYNAMIC_MULTICAST_DELEGATE_OneParam(FMulticastDelegateSpeechStateChanged, ESTTTalkingState, TalkingState);
DECLARE_DYNAMIC_MULTICAST_DELEGATE_OneParam(FMulticastDelegateSpeechStateChangedForUI, ESTTTalkingState, TalkingState);
DECLARE_DYNAMIC_MULTICAST_DELEGATE_OneParam(FMulticastDelegateSTTBlocked, bool, IsBlocked);
DECLARE_DYNAMIC_MULTICAST_DELEGATE_OneParam(FMulticastDelegateFakeButtonStateChanged, bool, IsPressed);
DECLARE_DYNAMIC_MULTICAST_DELEGATE_OneParam(FMulticastDelegateButtonStateChanged, bool, IsPressed);
DECLARE_DYNAMIC_MULTICAST_DELEGATE_OneParam(FMulticastDelegateSTTLog, FString, LogContent);
DECLARE_DYNAMIC_MULTICAST_DELEGATE_OneParam(FMulticastDelegateSTTError, FString, LogError);
DECLARE_DYNAMIC_MULTICAST_DELEGATE(FMulticastDelegateSpeechDetectedWhileBlocked);
@ -44,7 +44,7 @@ public:
UPROPERTY(BlueprintAssignable, Category = "AvatarCoreSTT")
FMulticastDelegateTranscriptionReceived OnTranscriptionReceived;
UPROPERTY(BlueprintAssignable, Category = "AvatarCoreSTT")
FMulticastDelegateFakeButtonStateChanged OnSTTFakeButtonStateChanged;
FMulticastDelegateButtonStateChanged OnSTTButtonStateChanged;
UPROPERTY(BlueprintAssignable, Category = "AvatarCoreSTT")
FMulticastDelegateSTTLog OnSTTLog;
UPROPERTY(BlueprintAssignable, Category = "AvatarCoreSTT")
@ -98,6 +98,9 @@ public:
UFUNCTION(BlueprintPure, Category = "AvatarCoreSTT")
bool IsBlocked();
UFUNCTION(BlueprintCallable, Category = "AvatarCoreSTT")
void SetLanguage(TArray<ESTTLanguage> NewLanguages);
UFUNCTION(BlueprintCallable, Category = "AvatarCoreSTT")
void AddSpecialWord(FString NewWord);

62
Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/STTStructs.h

@ -23,6 +23,14 @@ struct FAudioInformation
int32 SampleRate = 16000; // 16kHz
};
UENUM(BlueprintType)
enum class ESTTChainState : uint8
{
Processing UMETA(DisplayName = "Processing"), // normal audio chunk
Finalizing UMETA(DisplayName = "Finalizing"), // end of utterance - trigger transcription
Discarding UMETA(DisplayName = "Discarding"), // BLOCKED/abort - clear everything
};
UENUM(BlueprintType)
enum ESTTTalkingState
{
@ -35,8 +43,8 @@ enum ESTTTalkingState
UENUM(BlueprintType)
enum class ESTTTranscriptionType : uint8
{
OpenAI = 0 UMETA(DisplayName = "OpenAI Transcription"),
Azure = 1 UMETA(DisplayName = "Mircosoft Azure Congnitive Speech Services"),
Azure = 0 UMETA(DisplayName = "Mircosoft Azure Congnitive Speech Services"),
OpenAI = 1 UMETA(DisplayName = "OpenAI Transcription"),
Parakeet = 2 UMETA(DisplayName = "nvidia NeMo Parakeet (local transcription)"),
};
@ -62,6 +70,54 @@ enum class ESpeexDSPState : uint8
SPEEXPREPROCESS_SET_AGC_TARGET = 46 UMETA(DisplayName = "preprocessor Automatic Gain Control level (int32)")
};
UENUM(BlueprintType)
enum class ESTTLanguage : uint8
{
NONE UMETA(DisplayName = "Unset"),
en UMETA(DisplayName = "English"),
fr UMETA(DisplayName = "French"),
de UMETA(DisplayName = "German"),
es UMETA(DisplayName = "Spanish"),
pt UMETA(DisplayName = "Portuguese"),
zh UMETA(DisplayName = "Chinese"),
ja UMETA(DisplayName = "Japanese"),
hi UMETA(DisplayName = "Hindi"),
it UMETA(DisplayName = "Italian"),
ko UMETA(DisplayName = "Korean"),
nl UMETA(DisplayName = "Dutch"),
pl UMETA(DisplayName = "Polish"),
ru UMETA(DisplayName = "Russian"),
sv UMETA(DisplayName = "Swedish"),
tr UMETA(DisplayName = "Turkish"),
tl UMETA(DisplayName = "Filipino"),
bg UMETA(DisplayName = "Bulgarian"),
ro UMETA(DisplayName = "Romanian"),
ar UMETA(DisplayName = "Arabic"),
cs UMETA(DisplayName = "Czech"),
el UMETA(DisplayName = "Greek"),
fi UMETA(DisplayName = "Finnish"),
hr UMETA(DisplayName = "Croatian"),
ms UMETA(DisplayName = "Malay"),
sk UMETA(DisplayName = "Slovak"),
da UMETA(DisplayName = "Danish"),
ta UMETA(DisplayName = "Tamil"),
uk UMETA(DisplayName = "Ukrainian"),
hu UMETA(DisplayName = "Hungarian"),
no UMETA(DisplayName = "Norwegian"),
vi UMETA(DisplayName = "Vietnamese"),
bn UMETA(DisplayName = "Bengali"),
th UMETA(DisplayName = "Thai"),
he UMETA(DisplayName = "Hebrew"),
ka UMETA(DisplayName = "Georgian"),
id UMETA(DisplayName = "Indonesian"),
te UMETA(DisplayName = "Telugu"),
gu UMETA(DisplayName = "Gujarati"),
kn UMETA(DisplayName = "Kannada"),
ml UMETA(DisplayName = "Malayalam"),
mr UMETA(DisplayName = "Marathi"),
pa UMETA(DisplayName = "Punjabi"),
};
USTRUCT(BlueprintType)
struct FWebRTCSettings
{
@ -205,6 +261,8 @@ struct FSTTBaseSettings
FVADSettings VADSettings;
UPROPERTY(EditAnywhere, BlueprintReadWrite, meta = (ToolTip = "Settings of the SpeexDSP Module", Category = "STT|Base"))
FSpeexDSPSettings SpeexDSPSettings;
UPROPERTY(EditAnywhere, BlueprintReadWrite, meta = (ToolTip = "All languages the STT module should understand simultaneously.", Category = "STT|Base"))
TArray<ESTTLanguage> STTLanguages = { ESTTLanguage::de, ESTTLanguage::en };
UPROPERTY(EditAnywhere, BlueprintReadWrite, meta = (ToolTip = "Transcriptions to always change to another word.", Category = "STT|Base"))
TArray<FSTTWordReplacement> STTReplacements;
UPROPERTY(EditAnywhere, BlueprintReadWrite, meta = (ToolTip = "Special words that the transcription service needs to know (e.g. b.ReX or Bruce-B).", Category = "STT|Base"))

70
Unreal/Plugins/AvatarCore_TTS/Source/AvatarCore_TTS/Private/Cartesia/CartesiaTTSManager.cpp

@ -23,11 +23,30 @@ namespace
FJsonSerializer::Serialize(Obj.ToSharedRef(), Writer);
return Out;
}
static const FString TTSLanguageStrings[] = {
TEXT("en"), TEXT("fr"), TEXT("de"), TEXT("es"), TEXT("pt"), TEXT("zh"), TEXT("ja"),
TEXT("hi"), TEXT("it"), TEXT("ko"), TEXT("nl"), TEXT("pl"), TEXT("ru"), TEXT("sv"),
TEXT("tr"), TEXT("tl"), TEXT("bg"), TEXT("ro"), TEXT("ar"), TEXT("cs"), TEXT("el"),
TEXT("fi"), TEXT("hr"), TEXT("ms"), TEXT("sk"), TEXT("da"), TEXT("ta"), TEXT("uk"),
TEXT("hu"), TEXT("no"), TEXT("vi"), TEXT("bn"), TEXT("th"), TEXT("he"), TEXT("ka"),
TEXT("id"), TEXT("te"), TEXT("gu"), TEXT("kn"), TEXT("ml"), TEXT("mr"), TEXT("pa")
};
static FString TTSLanguageToString(ETTSLanguage Language)
{
const int32 Index = static_cast<int32>(Language);
if (Index >= 0 && Index < UE_ARRAY_COUNT(TTSLanguageStrings))
{
return TTSLanguageStrings[Index];
}
return TEXT("de");
}
}
FString UCartesiaTTSManager::BuildWebSocketUrl() const
{
const FString Base = (CartesiaTTSConfig && !CartesiaTTSConfig->CartesiaBaseURI.IsEmpty()) ? CartesiaTTSConfig->CartesiaBaseURI : TEXT("api.cartesia.ai");
const FString Base = (CartesiaTTSConfig && !CartesiaTTSConfig->CartesiaTTSSettings.CartesiaBaseURI.IsEmpty()) ? CartesiaTTSConfig->CartesiaTTSSettings.CartesiaBaseURI : TEXT("api.cartesia.ai");
return FString::Printf(TEXT("wss://%s/tts/websocket"), *Base);
}
@ -59,7 +78,7 @@ void UCartesiaTTSManager::InitTTSManager(UTTSBaseConfig* InTSSConfig, bool Debug
return;
}
bSupportsStreamedInput = CartesiaTTSConfig->StreamInputText;
bSupportsStreamedInput = CartesiaTTSConfig->CartesiaTTSSettings.StreamInputText;
if (!FModuleManager::Get().IsModuleLoaded("WebSockets"))
{
@ -100,6 +119,7 @@ void UCartesiaTTSManager::ClearTTS()
void UCartesiaTTSManager::CloseAndRemoveSocket(int32 TaskID, bool bSendFinal)
{
TSharedPtr<IWebSocket> Socket;
bool bNoMoreSockets = false;
{
FScopeLock Lock(&SocketsCS);
if (TSharedPtr<IWebSocket>* Found = ActiveSockets.Find(TaskID))
@ -110,14 +130,19 @@ void UCartesiaTTSManager::CloseAndRemoveSocket(int32 TaskID, bool bSendFinal)
TaskContextIds.Remove(TaskID);
TasksFlushSent.Remove(TaskID);
TasksSentFirst.Remove(TaskID);
bNoMoreSockets = ActiveSockets.Num() == 0;
}
if (Socket.IsValid())
{
if (bSendFinal)
{
AsyncTask(ENamedThreads::GameThread, [this, TaskID]()
AsyncTask(ENamedThreads::GameThread, [this, TaskID, bNoMoreSockets]()
{
if (bNoMoreSockets)
{
bReceivedFinalInput = true;
}
OnGeneratedAudioChunkReceivedByID(TaskID, TArray<uint8>(), true);
});
}
@ -160,27 +185,31 @@ void UCartesiaTTSManager::SendTranscriptMessage(int32 TaskID, const FString& Tra
}
TSharedPtr<FJsonObject> Obj = MakeShared<FJsonObject>();
Obj->SetStringField(TEXT("model_id"), CartesiaTTSConfig->CartesiaModelId);
Obj->SetStringField(TEXT("model_id"), CartesiaTTSConfig->CartesiaTTSSettings.CartesiaModelId);
Obj->SetStringField(TEXT("transcript"), Transcript);
Obj->SetStringField(TEXT("language"), CartesiaTTSConfig->CartesiaLanguage);
Obj->SetStringField(TEXT("language"), TTSLanguageToString(CartesiaTTSConfig->GlobalTTSSettings.Language));
Obj->SetStringField(TEXT("context_id"), GetOrCreateContextId(TaskID));
Obj->SetBoolField(TEXT("continue"), bContinue);
TSharedPtr<FJsonObject> VoiceObj = MakeShared<FJsonObject>();
VoiceObj->SetStringField(TEXT("mode"), TEXT("id"));
VoiceObj->SetStringField(TEXT("id"), CartesiaTTSConfig->CartesiaVoiceId);
VoiceObj->SetStringField(TEXT("id"), CartesiaTTSConfig->CartesiaTTSSettings.CartesiaVoiceId);
Obj->SetObjectField(TEXT("voice"), VoiceObj);
TSharedPtr<FJsonObject> OutputObj = MakeShared<FJsonObject>();
OutputObj->SetStringField(TEXT("container"), CartesiaTTSConfig->CartesiaContainer);
OutputObj->SetStringField(TEXT("encoding"), CartesiaTTSConfig->CartesiaEncoding);
const int32 EffectiveSR = (TTSConfig && TTSConfig->ResampleToSampleRate > 0) ? TTSConfig->ResampleToSampleRate : (TTSConfig ? TTSConfig->AudioSampleRate : 24000);
OutputObj->SetNumberField(TEXT("sample_rate"), EffectiveSR);
OutputObj->SetStringField(TEXT("container"), CartesiaTTSConfig->CartesiaTTSSettings.CartesiaContainer);
OutputObj->SetStringField(TEXT("encoding"), CartesiaTTSConfig->CartesiaTTSSettings.CartesiaEncoding);
OutputObj->SetNumberField(TEXT("sample_rate"), (TTSConfig ? TTSConfig->GlobalTTSSettings.AudioSampleRate : 24000));
Obj->SetObjectField(TEXT("output_format"), OutputObj);
Obj->SetBoolField(TEXT("add_timestamps"), false);
SendJsonForTask(TaskID, Obj);
{
FScopeLock Lock(&SocketsCS);
TasksSentFirst.Add(TaskID);
}
}
void UCartesiaTTSManager::SendFlushMessage(int32 TaskID)
@ -205,23 +234,22 @@ void UCartesiaTTSManager::SendFlushMessage(int32 TaskID)
}
TSharedPtr<FJsonObject> Obj = MakeShared<FJsonObject>();
Obj->SetStringField(TEXT("model_id"), CartesiaTTSConfig->CartesiaModelId);
Obj->SetStringField(TEXT("model_id"), CartesiaTTSConfig->CartesiaTTSSettings.CartesiaModelId);
Obj->SetStringField(TEXT("transcript"), TEXT(""));
Obj->SetStringField(TEXT("language"), CartesiaTTSConfig->CartesiaLanguage);
Obj->SetStringField(TEXT("language"), TTSLanguageToString(CartesiaTTSConfig->GlobalTTSSettings.Language));
Obj->SetStringField(TEXT("context_id"), GetOrCreateContextId(TaskID));
Obj->SetBoolField(TEXT("continue"), true);
Obj->SetBoolField(TEXT("flush"), true);
TSharedPtr<FJsonObject> VoiceObj = MakeShared<FJsonObject>();
VoiceObj->SetStringField(TEXT("mode"), TEXT("id"));
VoiceObj->SetStringField(TEXT("id"), CartesiaTTSConfig->CartesiaVoiceId);
VoiceObj->SetStringField(TEXT("id"), CartesiaTTSConfig->CartesiaTTSSettings.CartesiaVoiceId);
Obj->SetObjectField(TEXT("voice"), VoiceObj);
TSharedPtr<FJsonObject> OutputObj = MakeShared<FJsonObject>();
OutputObj->SetStringField(TEXT("container"), CartesiaTTSConfig->CartesiaContainer);
OutputObj->SetStringField(TEXT("encoding"), CartesiaTTSConfig->CartesiaEncoding);
const int32 EffectiveSR = (TTSConfig && TTSConfig->ResampleToSampleRate > 0) ? TTSConfig->ResampleToSampleRate : (TTSConfig ? TTSConfig->AudioSampleRate : 24000);
OutputObj->SetNumberField(TEXT("sample_rate"), EffectiveSR);
OutputObj->SetStringField(TEXT("container"), CartesiaTTSConfig->CartesiaTTSSettings.CartesiaContainer);
OutputObj->SetStringField(TEXT("encoding"), CartesiaTTSConfig->CartesiaTTSSettings.CartesiaEncoding);
OutputObj->SetNumberField(TEXT("sample_rate"), (TTSConfig ? TTSConfig->GlobalTTSSettings.AudioSampleRate : 24000));
Obj->SetObjectField(TEXT("output_format"), OutputObj);
Obj->SetBoolField(TEXT("add_timestamps"), false);
@ -242,12 +270,12 @@ void UCartesiaTTSManager::StartStreamingGeneration(int32 TaskID, const FString&
TTSError(TEXT("Cartesia config not set"));
return;
}
if (CartesiaTTSConfig->CartesiaAPIKey.IsEmpty())
if (CartesiaTTSConfig->CartesiaTTSSettings.CartesiaAPIKey.IsEmpty())
{
TTSError(TEXT("Cartesia API key is empty"));
return;
}
if (CartesiaTTSConfig->CartesiaVoiceId.IsEmpty())
if (CartesiaTTSConfig->CartesiaTTSSettings.CartesiaVoiceId.IsEmpty())
{
TTSError(TEXT("Cartesia VoiceId is empty"));
return;
@ -256,8 +284,8 @@ void UCartesiaTTSManager::StartStreamingGeneration(int32 TaskID, const FString&
const FString Url = BuildWebSocketUrl();
TMap<FString, FString> Headers;
Headers.Add(TEXT("X-API-Key"), CartesiaTTSConfig->CartesiaAPIKey);
Headers.Add(TEXT("Cartesia-Version"), CartesiaTTSConfig->CartesiaVersion);
Headers.Add(TEXT("X-API-Key"), CartesiaTTSConfig->CartesiaTTSSettings.CartesiaAPIKey);
Headers.Add(TEXT("Cartesia-Version"), CartesiaTTSConfig->CartesiaTTSSettings.CartesiaVersion);
Headers.Add(TEXT("Accept"), TEXT("application/json"));
Headers.Add(TEXT("User-Agent"), TEXT("AvatarCoreTTS/1.0"));
Headers.Add(TEXT("Content-Type"), TEXT("application/json"));

4
Unreal/Plugins/AvatarCore_TTS/Source/AvatarCore_TTS/Private/Cartesia/TTSCartesiaConfig.cpp

@ -12,7 +12,7 @@ UTTSCartesiaConfig::UTTSCartesiaConfig(const FObjectInitializer& ObjectInitializ
FString UTTSCartesiaConfig::GetHashPrefix() const
{
const int32 EffectiveSR = (ResampleToSampleRate > 0) ? ResampleToSampleRate : AudioSampleRate;
return FString::Printf(TEXT("%d|%d|%s|%s|%s|%s"), AudioNumChannels, EffectiveSR, *CartesiaVoiceId, *CartesiaModelId, *CartesiaContainer, *CartesiaEncoding);
const int32 EffectiveSR = (GlobalTTSSettings.ResampleToSampleRate > 0) ? GlobalTTSSettings.ResampleToSampleRate : GlobalTTSSettings.AudioSampleRate;
return FString::Printf(TEXT("%d|%d|%s|%s|%s|%s"), GlobalTTSSettings.AudioNumChannels, EffectiveSR, *CartesiaTTSSettings.CartesiaVoiceId, *CartesiaTTSSettings.CartesiaModelId, *CartesiaTTSSettings.CartesiaContainer, *CartesiaTTSSettings.CartesiaEncoding);
}

Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save