113 changed files with 2632 additions and 1675 deletions
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,570 @@ |
|||||
|
// Fill out your copyright notice in the Description page of Project Settings.
|
||||
|
|
||||
|
#include "OpenRouter/AvatarCoreAIOpenRouter.h" |
||||
|
#include "HttpModule.h" |
||||
|
#include "Interfaces/IHttpRequest.h" |
||||
|
#include "Interfaces/IHttpResponse.h" |
||||
|
#include "Dom/JsonObject.h" |
||||
|
#include "Dom/JsonValue.h" |
||||
|
#include "Serialization/JsonReader.h" |
||||
|
#include "Serialization/JsonWriter.h" |
||||
|
#include "Serialization/JsonSerializer.h" |
||||
|
|
||||
|
// Appends incoming HTTP response bytes directly to a shared TArray.
|
||||
|
// Called from the HTTP module's background thread; the lock guards against concurrent
|
||||
|
// reads in OnResponseProgress (game thread). Both the manager and the archive hold
|
||||
|
// shared refs to the buffer and lock, so neither is destroyed under a live callback.
|
||||
|
class FSSEReceiveArchive final : public FArchive |
||||
|
{ |
||||
|
public: |
||||
|
TSharedPtr<TArray<uint8>> Buffer; |
||||
|
TSharedPtr<FCriticalSection> Lock; |
||||
|
|
||||
|
FSSEReceiveArchive(TSharedPtr<TArray<uint8>> InBuffer, TSharedPtr<FCriticalSection> InLock) |
||||
|
: Buffer(MoveTemp(InBuffer)), Lock(MoveTemp(InLock)) |
||||
|
{ |
||||
|
SetIsSaving(true); |
||||
|
} |
||||
|
|
||||
|
void Serialize(void* V, int64 Length) override |
||||
|
{ |
||||
|
FScopeLock ScopeLock(Lock.Get()); |
||||
|
Buffer->Append(static_cast<uint8*>(V), static_cast<int32>(Length)); |
||||
|
} |
||||
|
|
||||
|
FString GetArchiveName() const override { return TEXT("FSSEReceiveArchive"); } |
||||
|
}; |
||||
|
|
||||
|
// ---------------------------------------------------------------------------
|
||||
|
// Lifecycle
|
||||
|
// ---------------------------------------------------------------------------
|
||||
|
|
||||
|
void UAvatarCoreAIOpenRouter::InitAIManagerChild(UAIBaseConfig* AIConfig, AActor* InWorldReferenceActor) |
||||
|
{ |
||||
|
OpenRouterConfig = Cast<UOpenRouterConfig>(AIConfig); |
||||
|
if (!OpenRouterConfig) |
||||
|
{ |
||||
|
BroadcastAIError(TEXT("Cannot cast config to UOpenRouterConfig"), EAvatarCoreAIError::InvalidConfig); |
||||
|
return; |
||||
|
} |
||||
|
ActivateAI(); |
||||
|
} |
||||
|
|
||||
|
void UAvatarCoreAIOpenRouter::ActivateAI() |
||||
|
{ |
||||
|
SetNewState(EAvatarCoreAIState::Ready); |
||||
|
OnAIReady(); |
||||
|
} |
||||
|
|
||||
|
void UAvatarCoreAIOpenRouter::DeactivateAI() |
||||
|
{ |
||||
|
CancelActiveRequest(); |
||||
|
UAIBaseManager::DeactivateAI(); |
||||
|
SetNewState(EAvatarCoreAIState::Disconnected); |
||||
|
} |
||||
|
|
||||
|
void UAvatarCoreAIOpenRouter::UpdateSession() |
||||
|
{ |
||||
|
if (!OpenRouterConfig) |
||||
|
{ |
||||
|
BroadcastAIError(TEXT("OpenRouterConfig is null in UpdateSession"), EAvatarCoreAIError::InvalidConfig); |
||||
|
return; |
||||
|
} |
||||
|
OnAIReady(); |
||||
|
} |
||||
|
|
||||
|
void UAvatarCoreAIOpenRouter::ClearAI() |
||||
|
{ |
||||
|
CancelActiveRequest(); |
||||
|
ResetSSEState(); |
||||
|
UAIBaseManager::ClearAI(); |
||||
|
} |
||||
|
|
||||
|
void UAvatarCoreAIOpenRouter::CancelActiveRequest() |
||||
|
{ |
||||
|
if (ActiveRequest.IsValid()) |
||||
|
{ |
||||
|
BroadcastAILog(TEXT("OpenRouter: cancelling active HTTP request.")); |
||||
|
ActiveRequest->CancelRequest(); |
||||
|
ActiveRequest.Reset(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// ---------------------------------------------------------------------------
|
||||
|
// Send
|
||||
|
// ---------------------------------------------------------------------------
|
||||
|
|
||||
|
void UAvatarCoreAIOpenRouter::SendResponseChild(FAIMessage Message, bool NotifyDelay) |
||||
|
{ |
||||
|
if (!Message.bTriggerResponse) |
||||
|
return; |
||||
|
UAvatarCoreAIOpenRouter::SetNewState(EAvatarCoreAIState::Processing); |
||||
|
// Cancel any stale request (safety net; queue should normally prevent overlaps)
|
||||
|
CancelActiveRequest(); |
||||
|
ResetSSEState(); |
||||
|
SendChatCompletionRequest(Message); |
||||
|
} |
||||
|
|
||||
|
void UAvatarCoreAIOpenRouter::ResetSSEState() |
||||
|
{ |
||||
|
SSEStreamBufferPtr.Reset(); |
||||
|
SSEBufferLock.Reset(); |
||||
|
SSERawBuffer.Empty(); |
||||
|
SSEByteOffset = 0; |
||||
|
bResponseComplete = false; |
||||
|
ToolCallNameMap.Empty(); |
||||
|
ToolCallArgsMap.Empty(); |
||||
|
ToolCallIdMap.Empty(); |
||||
|
} |
||||
|
|
||||
|
void UAvatarCoreAIOpenRouter::SendChatCompletionRequest(FAIMessage CurrentMessage) |
||||
|
{ |
||||
|
if (!OpenRouterConfig) |
||||
|
{ |
||||
|
BroadcastAIError(TEXT("OpenRouterConfig is null"), EAvatarCoreAIError::InvalidConfig); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
// Build request body
|
||||
|
TSharedPtr<FJsonObject> Body = MakeShareable(new FJsonObject); |
||||
|
Body->SetStringField(TEXT("model"), OpenRouterConfig->OpenRouterSettings.BaseAISettings.ModelID); |
||||
|
Body->SetBoolField(TEXT("stream"), true); |
||||
|
Body->SetNumberField(TEXT("max_tokens"), OpenRouterConfig->GlobalAISettings.MaxTokens); |
||||
|
Body->SetNumberField(TEXT("temperature"), OpenRouterConfig->GlobalAISettings.Temperature); |
||||
|
|
||||
|
Body->SetArrayField(TEXT("messages"), BuildMessagesArray(CurrentMessage)); |
||||
|
|
||||
|
TArray<TSharedPtr<FJsonValue>> Tools; |
||||
|
if (OpenRouterConfig->OpenRouterSettings.bSendTools) |
||||
|
Tools = BuildToolsArray(); |
||||
|
if (Tools.Num() > 0) |
||||
|
{ |
||||
|
Body->SetArrayField(TEXT("tools"), Tools); |
||||
|
Body->SetStringField(TEXT("tool_choice"), TEXT("auto")); |
||||
|
} |
||||
|
|
||||
|
FString BodyString; |
||||
|
TSharedRef<TJsonWriter<>> Writer = TJsonWriterFactory<>::Create(&BodyString); |
||||
|
FJsonSerializer::Serialize(Body.ToSharedRef(), Writer); |
||||
|
|
||||
|
BroadcastAILog(FString::Printf(TEXT("OpenRouter request to %s/%s (tools: %d)"), |
||||
|
*OpenRouterConfig->OpenRouterSettings.BaseURL, *OpenRouterConfig->OpenRouterSettings.BaseAISettings.ModelID, Tools.Num())); |
||||
|
BroadcastAILog(BodyString, false, true); |
||||
|
|
||||
|
// Create HTTP request
|
||||
|
ActiveRequest = FHttpModule::Get().CreateRequest(); |
||||
|
ActiveRequest->SetURL(OpenRouterConfig->OpenRouterSettings.BaseURL + TEXT("/chat/completions")); |
||||
|
ActiveRequest->SetVerb(TEXT("POST")); |
||||
|
ActiveRequest->SetHeader(TEXT("Content-Type"), TEXT("application/json")); |
||||
|
ActiveRequest->SetHeader(TEXT("Authorization"), TEXT("Bearer ") + OpenRouterConfig->OpenRouterSettings.BaseAISettings.APIKey); |
||||
|
if (!OpenRouterConfig->OpenRouterSettings.SiteURL.IsEmpty()) |
||||
|
ActiveRequest->SetHeader(TEXT("HTTP-Referer"), OpenRouterConfig->OpenRouterSettings.SiteURL); |
||||
|
if (!OpenRouterConfig->OpenRouterSettings.AppName.IsEmpty()) |
||||
|
ActiveRequest->SetHeader(TEXT("X-Title"), OpenRouterConfig->OpenRouterSettings.AppName); |
||||
|
ActiveRequest->SetContentAsString(BodyString); |
||||
|
|
||||
|
// Attach a receive-stream archive so the HTTP module writes bytes directly into our buffer.
|
||||
|
// This avoids calling GetContent() on an in-progress request, which logs "Payload is incomplete".
|
||||
|
// The lock guards against the HTTP thread (Serialize) racing the game thread (OnResponseProgress).
|
||||
|
SSEStreamBufferPtr = MakeShared<TArray<uint8>>(); |
||||
|
SSEBufferLock = MakeShared<FCriticalSection>(); |
||||
|
ActiveRequest->SetResponseBodyReceiveStream(MakeShareable(new FSSEReceiveArchive(SSEStreamBufferPtr, SSEBufferLock))); |
||||
|
|
||||
|
ActiveRequest->OnRequestProgress64().BindUObject(this, &UAvatarCoreAIOpenRouter::OnResponseProgress); |
||||
|
ActiveRequest->OnProcessRequestComplete().BindUObject(this, &UAvatarCoreAIOpenRouter::OnRequestComplete); |
||||
|
|
||||
|
ResetRequestTimeout(); |
||||
|
SetNewState(EAvatarCoreAIState::Processing); |
||||
|
|
||||
|
ActiveRequest->ProcessRequest(); |
||||
|
} |
||||
|
|
||||
|
// ---------------------------------------------------------------------------
|
||||
|
// Message / Tool array builders
|
||||
|
// ---------------------------------------------------------------------------
|
||||
|
|
||||
|
TArray<TSharedPtr<FJsonValue>> UAvatarCoreAIOpenRouter::BuildMessagesArray(FAIMessage CurrentMessage) |
||||
|
{ |
||||
|
TArray<TSharedPtr<FJsonValue>> Messages; |
||||
|
|
||||
|
// System instructions — always the most recent, sent as first message
|
||||
|
FString SystemPrompt = GetSystemInstructionPromptString(false); |
||||
|
if (!SystemPrompt.IsEmpty()) |
||||
|
{ |
||||
|
TSharedPtr<FJsonObject> SysMsg = MakeShareable(new FJsonObject); |
||||
|
SysMsg->SetStringField(TEXT("role"), TEXT("system")); |
||||
|
SysMsg->SetStringField(TEXT("content"), SystemPrompt); |
||||
|
Messages.Add(MakeShareable(new FJsonValueObject(SysMsg))); |
||||
|
} |
||||
|
|
||||
|
// Validate history: remove any assistant tool_calls entry that has no matching tool result,
|
||||
|
// and any tool result that has no preceding assistant tool_calls. Either orphan causes a 400.
|
||||
|
TArray<FAIMessage> History = GetAllPreviousMessage(); |
||||
|
for (int32 i = History.Num() - 1; i >= 0; --i) |
||||
|
{ |
||||
|
const FAIMessage& Msg = History[i]; |
||||
|
if (Msg.Role == EAvatarCoreAIPromptRole::Assistant && !Msg.Id.IsEmpty()) |
||||
|
{ |
||||
|
bool bHasResult = false; |
||||
|
for (int32 j = i + 1; j < History.Num(); ++j) |
||||
|
if (History[j].Role == EAvatarCoreAIPromptRole::Tool && History[j].Id == Msg.Id) |
||||
|
{ bHasResult = true; break; } |
||||
|
// CurrentMessage may be the tool result that hasn't been added to PreviousMessages yet
|
||||
|
if (!bHasResult && CurrentMessage.Role == EAvatarCoreAIPromptRole::Tool && CurrentMessage.Id == Msg.Id) |
||||
|
bHasResult = true; |
||||
|
if (!bHasResult) |
||||
|
{ |
||||
|
BroadcastAILog(FString::Printf(TEXT("OpenRouter: dropping orphaned tool_calls entry (id=%s) from history"), *Msg.Id), true); |
||||
|
History.RemoveAt(i); |
||||
|
} |
||||
|
} |
||||
|
else if (Msg.Role == EAvatarCoreAIPromptRole::Tool) |
||||
|
{ |
||||
|
bool bHasCall = false; |
||||
|
for (int32 j = 0; j < i; ++j) |
||||
|
if (History[j].Role == EAvatarCoreAIPromptRole::Assistant && History[j].Id == Msg.Id) |
||||
|
{ bHasCall = true; break; } |
||||
|
if (!bHasCall) |
||||
|
{ |
||||
|
BroadcastAILog(FString::Printf(TEXT("OpenRouter: dropping orphaned tool result (id=%s) from history"), *Msg.Id), true); |
||||
|
History.RemoveAt(i); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// Conversation history (does not include CurrentMessage — it was not yet added to PreviousMessages)
|
||||
|
auto AppendMessage = [&](const FAIMessage& Msg) |
||||
|
{ |
||||
|
TSharedPtr<FJsonObject> MsgObj = MakeShareable(new FJsonObject); |
||||
|
|
||||
|
if (Msg.Role == EAvatarCoreAIPromptRole::Tool) |
||||
|
{ |
||||
|
MsgObj->SetStringField(TEXT("role"), TEXT("tool")); |
||||
|
MsgObj->SetStringField(TEXT("tool_call_id"), Msg.Id); |
||||
|
MsgObj->SetStringField(TEXT("content"), Msg.Message); |
||||
|
} |
||||
|
else if (Msg.Role == EAvatarCoreAIPromptRole::Assistant && !Msg.Id.IsEmpty()) |
||||
|
{ |
||||
|
// Assistant message that triggered a tool call; Message contains the tool_calls JSON array.
|
||||
|
// content must be null (not "") when tool_calls is present — OpenAI spec requirement.
|
||||
|
MsgObj->SetStringField(TEXT("role"), TEXT("assistant")); |
||||
|
MsgObj->SetField(TEXT("content"), MakeShareable(new FJsonValueNull())); |
||||
|
|
||||
|
TArray<TSharedPtr<FJsonValue>> ToolCallsArray; |
||||
|
TSharedRef<TJsonReader<>> TCReader = TJsonReaderFactory<>::Create(Msg.Message); |
||||
|
if (FJsonSerializer::Deserialize(TCReader, ToolCallsArray)) |
||||
|
{ |
||||
|
MsgObj->SetArrayField(TEXT("tool_calls"), ToolCallsArray); |
||||
|
} |
||||
|
else |
||||
|
{ |
||||
|
BroadcastAILog(FString::Printf(TEXT("OpenRouter: failed to parse stored tool_calls JSON: %s"), *Msg.Message)); |
||||
|
return; |
||||
|
} |
||||
|
} |
||||
|
else |
||||
|
{ |
||||
|
FString RoleStr; |
||||
|
switch (Msg.Role) |
||||
|
{ |
||||
|
case EAvatarCoreAIPromptRole::User: RoleStr = TEXT("user"); break; |
||||
|
case EAvatarCoreAIPromptRole::Assistant: RoleStr = TEXT("assistant"); break; |
||||
|
case EAvatarCoreAIPromptRole::System: RoleStr = TEXT("system"); break; |
||||
|
default: RoleStr = TEXT("user"); break; |
||||
|
} |
||||
|
MsgObj->SetStringField(TEXT("role"), RoleStr); |
||||
|
MsgObj->SetStringField(TEXT("content"), Msg.Message); |
||||
|
} |
||||
|
|
||||
|
Messages.Add(MakeShareable(new FJsonValueObject(MsgObj))); |
||||
|
}; |
||||
|
|
||||
|
for (const FAIMessage& Msg : History) |
||||
|
AppendMessage(Msg); |
||||
|
|
||||
|
// Current message appended last — ensures it is always the newest entry
|
||||
|
AppendMessage(CurrentMessage); |
||||
|
|
||||
|
return Messages; |
||||
|
} |
||||
|
|
||||
|
TArray<TSharedPtr<FJsonValue>> UAvatarCoreAIOpenRouter::BuildToolsArray() |
||||
|
{ |
||||
|
TArray<TSharedPtr<FJsonValue>> ToolsArray; |
||||
|
for (const FMCPToolInfo& Command : GetAvailableCommands()) |
||||
|
{ |
||||
|
if (Command.Name.IsEmpty()) |
||||
|
continue; |
||||
|
|
||||
|
TSharedPtr<FJsonObject> FunctionObj = MakeShareable(new FJsonObject); |
||||
|
FunctionObj->SetStringField(TEXT("name"), Command.Name.Left(64)); |
||||
|
if (!Command.Description.IsEmpty()) |
||||
|
FunctionObj->SetStringField(TEXT("description"), Command.Description); |
||||
|
|
||||
|
if (!Command.InputScheme.IsEmpty()) |
||||
|
{ |
||||
|
TSharedPtr<FJsonObject> ParamsObj; |
||||
|
TSharedRef<TJsonReader<>> Reader = TJsonReaderFactory<>::Create(Command.InputScheme); |
||||
|
if (FJsonSerializer::Deserialize(Reader, ParamsObj) && ParamsObj.IsValid()) |
||||
|
FunctionObj->SetObjectField(TEXT("parameters"), ParamsObj); |
||||
|
else |
||||
|
BroadcastAIError(FString::Printf(TEXT("InputScheme of '%s' is not valid JSON"), *Command.Name), EAvatarCoreAIError::MCPError); |
||||
|
} |
||||
|
|
||||
|
TSharedPtr<FJsonObject> ToolObj = MakeShareable(new FJsonObject); |
||||
|
ToolObj->SetStringField(TEXT("type"), TEXT("function")); |
||||
|
ToolObj->SetObjectField(TEXT("function"), FunctionObj); |
||||
|
|
||||
|
ToolsArray.Add(MakeShareable(new FJsonValueObject(ToolObj))); |
||||
|
} |
||||
|
return ToolsArray; |
||||
|
} |
||||
|
|
||||
|
// ---------------------------------------------------------------------------
|
||||
|
// SSE streaming
|
||||
|
// ---------------------------------------------------------------------------
|
||||
|
|
||||
|
void UAvatarCoreAIOpenRouter::OnResponseProgress(FHttpRequestPtr Request, uint64 BytesSent, uint64 BytesReceived) |
||||
|
{ |
||||
|
// Take local shared-ptr copies first. If ResetSSEState fires inside ParseSSELine
|
||||
|
// (same call stack, game thread) it will null the members, but our locals keep the
|
||||
|
// objects alive and the lock valid for the duration of this callback.
|
||||
|
TSharedPtr<TArray<uint8>> Buffer = SSEStreamBufferPtr; |
||||
|
TSharedPtr<FCriticalSection> Lock = SSEBufferLock; |
||||
|
if (!Buffer.IsValid() || !Lock.IsValid()) |
||||
|
return; |
||||
|
|
||||
|
// Lock only for the brief copy of newly-arrived bytes.
|
||||
|
// Holding the lock any longer would block the HTTP thread unnecessarily.
|
||||
|
TArray<uint8> NewBytes; |
||||
|
{ |
||||
|
FScopeLock ScopeLock(Lock.Get()); |
||||
|
const int32 Total = Buffer->Num(); |
||||
|
const int32 ToRead = Total - SSEByteOffset; |
||||
|
if (ToRead <= 0) |
||||
|
return; |
||||
|
NewBytes.Append(Buffer->GetData() + SSEByteOffset, ToRead); |
||||
|
SSEByteOffset = Total; |
||||
|
} |
||||
|
|
||||
|
// Append only the newly arrived bytes as raw bytes — never convert to ANSI here,
|
||||
|
// because UTF-8 multibyte sequences may straddle callback boundaries.
|
||||
|
SSERawBuffer.Append(NewBytes); |
||||
|
|
||||
|
// Scan for complete lines. '\n' (0x0A) is never a continuation byte in UTF-8,
|
||||
|
// so this scan is safe on raw UTF-8 data.
|
||||
|
int32 StartPos = 0; |
||||
|
const int32 BufferSize = SSERawBuffer.Num(); |
||||
|
for (int32 i = 0; i < BufferSize; ++i) |
||||
|
{ |
||||
|
if (SSERawBuffer[i] == '\n') |
||||
|
{ |
||||
|
int32 End = i; |
||||
|
if (End > StartPos && SSERawBuffer[End - 1] == '\r') |
||||
|
--End; // strip \r
|
||||
|
|
||||
|
if (End > StartPos) |
||||
|
{ |
||||
|
// Null-terminate the slice and convert UTF-8 → FString
|
||||
|
TArray<uint8> Slice(SSERawBuffer.GetData() + StartPos, End - StartPos); |
||||
|
Slice.Add(0); |
||||
|
ParseSSELine(UTF8_TO_TCHAR((ANSICHAR*)Slice.GetData())); |
||||
|
} |
||||
|
StartPos = i + 1; |
||||
|
|
||||
|
// ParseSSELine may have triggered ResetSSEState (e.g. via OnAIReady → SendResponseChild).
|
||||
|
// If the buffer was cleared, stop processing stale data.
|
||||
|
if (SSERawBuffer.Num() == 0) |
||||
|
return; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// Discard all processed bytes; keep the partial trailing line in the buffer.
|
||||
|
// Guard against the buffer being reset by a ParseSSELine side-effect.
|
||||
|
if (StartPos > 0 && StartPos <= SSERawBuffer.Num()) |
||||
|
SSERawBuffer.RemoveAt(0, StartPos); |
||||
|
} |
||||
|
|
||||
|
void UAvatarCoreAIOpenRouter::ParseSSELine(const FString& Line) |
||||
|
{ |
||||
|
// SSE lines look like: "data: {...}" or "data: [DONE]"
|
||||
|
if (!Line.StartsWith(TEXT("data: "))) |
||||
|
return; |
||||
|
|
||||
|
FString Data = Line.Mid(6); // strip "data: "
|
||||
|
|
||||
|
if (Data == TEXT("[DONE]")) |
||||
|
return; |
||||
|
|
||||
|
TSharedPtr<FJsonObject> JsonObj; |
||||
|
TSharedRef<TJsonReader<>> Reader = TJsonReaderFactory<>::Create(Data); |
||||
|
if (!FJsonSerializer::Deserialize(Reader, JsonObj) || !JsonObj.IsValid()) |
||||
|
{ |
||||
|
BroadcastAILog(FString::Printf(TEXT("OpenRouter: failed to parse SSE chunk: %s"), *Data), false, true); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
const TArray<TSharedPtr<FJsonValue>>* Choices; |
||||
|
if (!JsonObj->TryGetArrayField(TEXT("choices"), Choices) || Choices->Num() == 0) |
||||
|
return; |
||||
|
|
||||
|
TSharedPtr<FJsonObject> Choice = (*Choices)[0]->AsObject(); |
||||
|
if (!Choice.IsValid()) |
||||
|
return; |
||||
|
|
||||
|
TSharedPtr<FJsonObject> Delta = Choice->GetObjectField(TEXT("delta")); |
||||
|
|
||||
|
// Text content chunk
|
||||
|
FString ContentChunk; |
||||
|
if (Delta.IsValid() && Delta->TryGetStringField(TEXT("content"), ContentChunk) && !ContentChunk.IsEmpty()) |
||||
|
{ |
||||
|
ClearRequestTimeout(); |
||||
|
OnAIResponse(ContentChunk, false); |
||||
|
} |
||||
|
|
||||
|
// Tool call argument accumulation
|
||||
|
const TArray<TSharedPtr<FJsonValue>>* ToolCallsArr; |
||||
|
if (Delta.IsValid() && Delta->TryGetArrayField(TEXT("tool_calls"), ToolCallsArr)) |
||||
|
{ |
||||
|
for (const TSharedPtr<FJsonValue>& TCValue : *ToolCallsArr) |
||||
|
{ |
||||
|
TSharedPtr<FJsonObject> TC = TCValue->AsObject(); |
||||
|
if (!TC.IsValid()) continue; |
||||
|
|
||||
|
int32 Idx = 0; |
||||
|
TC->TryGetNumberField(TEXT("index"), Idx); |
||||
|
|
||||
|
// Name and id only arrive on the first delta for this index
|
||||
|
FString TCName, TCId; |
||||
|
TSharedPtr<FJsonObject> FuncObj = TC->GetObjectField(TEXT("function")); |
||||
|
if (FuncObj.IsValid()) |
||||
|
{ |
||||
|
FuncObj->TryGetStringField(TEXT("name"), TCName); |
||||
|
if (!TCName.IsEmpty()) |
||||
|
ToolCallNameMap.FindOrAdd(Idx) += TCName; |
||||
|
|
||||
|
FString ArgsDelta; |
||||
|
if (FuncObj->TryGetStringField(TEXT("arguments"), ArgsDelta)) |
||||
|
ToolCallArgsMap.FindOrAdd(Idx) += ArgsDelta; |
||||
|
} |
||||
|
if (TC->TryGetStringField(TEXT("id"), TCId) && !TCId.IsEmpty()) |
||||
|
ToolCallIdMap.FindOrAdd(Idx) = TCId; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// Finish reason
|
||||
|
FString FinishReason; |
||||
|
Choice->TryGetStringField(TEXT("finish_reason"), FinishReason); |
||||
|
|
||||
|
if (FinishReason == TEXT("stop") && !bResponseComplete) |
||||
|
{ |
||||
|
bResponseComplete = true; |
||||
|
ClearRequestTimeout(); |
||||
|
ActiveRequest.Reset(); |
||||
|
OnAIResponse(TEXT(""), true); // IsFinal — base adds assistant message to history
|
||||
|
SetNewState(EAvatarCoreAIState::Ready); |
||||
|
OnAIReady(); // drains ResponseQueue
|
||||
|
} |
||||
|
else if (FinishReason == TEXT("tool_calls") && !bResponseComplete) |
||||
|
{ |
||||
|
bResponseComplete = true; |
||||
|
ClearRequestTimeout(); |
||||
|
ActiveRequest.Reset(); |
||||
|
HandleToolCallsDone(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
void UAvatarCoreAIOpenRouter::HandleToolCallsDone() |
||||
|
{ |
||||
|
// First version: handle the tool call at index 0 (same single-call behaviour as Realtime)
|
||||
|
if (!ToolCallIdMap.Contains(0) || !ToolCallNameMap.Contains(0)) |
||||
|
{ |
||||
|
BroadcastAIError(TEXT("OpenRouter: tool_calls done but no accumulated call data at index 0"), |
||||
|
EAvatarCoreAIError::FunctionCallFailed); |
||||
|
SetNewState(EAvatarCoreAIState::Error); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
FString CallId = ToolCallIdMap[0]; |
||||
|
FString CallName = ToolCallNameMap[0]; |
||||
|
FString CallArgs = ToolCallArgsMap.FindRef(0); |
||||
|
|
||||
|
BroadcastAILog(FString::Printf(TEXT("OpenRouter: tool call '%s' (id=%s) args=%s"), *CallName, *CallId, *CallArgs), true); |
||||
|
|
||||
|
// Build the tool_calls JSON array to persist in message history so the next
|
||||
|
// request has the correct assistant→tool context.
|
||||
|
TSharedPtr<FJsonObject> FuncObj = MakeShareable(new FJsonObject); |
||||
|
FuncObj->SetStringField(TEXT("name"), CallName); |
||||
|
FuncObj->SetStringField(TEXT("arguments"), CallArgs); |
||||
|
|
||||
|
TSharedPtr<FJsonObject> ToolCallObj = MakeShareable(new FJsonObject); |
||||
|
ToolCallObj->SetStringField(TEXT("id"), CallId); |
||||
|
ToolCallObj->SetStringField(TEXT("type"), TEXT("function")); |
||||
|
ToolCallObj->SetObjectField(TEXT("function"), FuncObj); |
||||
|
|
||||
|
TArray<TSharedPtr<FJsonValue>> ToolCallsArray; |
||||
|
ToolCallsArray.Add(MakeShareable(new FJsonValueObject(ToolCallObj))); |
||||
|
|
||||
|
FString ToolCallsJson; |
||||
|
TSharedRef<TJsonWriter<>> TCWriter = TJsonWriterFactory<>::Create(&ToolCallsJson); |
||||
|
FJsonSerializer::Serialize(ToolCallsArray, TCWriter); |
||||
|
|
||||
|
// Add assistant message with tool_calls to history
|
||||
|
FAIMessage AssistantToolCall; |
||||
|
AssistantToolCall.Role = EAvatarCoreAIPromptRole::Assistant; |
||||
|
AssistantToolCall.Id = CallId; |
||||
|
AssistantToolCall.Message = ToolCallsJson; // BuildMessagesArray reads this back
|
||||
|
AddMessageToArray(AssistantToolCall); |
||||
|
|
||||
|
// RunMCPCommand sets GettingInfo state; base propagates CallId → CommandFinished → Tool role result
|
||||
|
RunMCPCommand(CallName, CallArgs, CallId); |
||||
|
} |
||||
|
|
||||
|
// ---------------------------------------------------------------------------
|
||||
|
// HTTP completion handler
|
||||
|
// ---------------------------------------------------------------------------
|
||||
|
|
||||
|
void UAvatarCoreAIOpenRouter::OnRequestComplete(FHttpRequestPtr Request, FHttpResponsePtr Response, bool bSuccess) |
||||
|
{ |
||||
|
ActiveRequest.Reset(); |
||||
|
|
||||
|
// Successful streaming already handled by ParseSSELine.
|
||||
|
// This callback is for error cases only.
|
||||
|
if (bSuccess && Response.IsValid() && Response->GetResponseCode() < 300) |
||||
|
return; |
||||
|
|
||||
|
ClearRequestTimeout(); |
||||
|
|
||||
|
int32 Code = Response.IsValid() ? Response->GetResponseCode() : 0; |
||||
|
// GetContentAsString() is empty when SetResponseBodyReceiveStream is active —
|
||||
|
// all bytes (including error bodies) were written to SSEStreamBufferPtr.
|
||||
|
FString Body; |
||||
|
if (Response.IsValid()) |
||||
|
{ |
||||
|
Body = Response->GetContentAsString(); |
||||
|
if (Body.IsEmpty() && SSEStreamBufferPtr.IsValid() && SSEStreamBufferPtr->Num() > 0) |
||||
|
{ |
||||
|
TArray<uint8> BodyBytes = *SSEStreamBufferPtr; |
||||
|
BodyBytes.Add(0); |
||||
|
Body = UTF8_TO_TCHAR((ANSICHAR*)BodyBytes.GetData()); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// 422 with a tools-related body means this model doesn't support OpenAI-compatible
|
||||
|
// function calling. Disable bSendTools in the config for this model.
|
||||
|
if (Code == 422 && Body.Contains(TEXT("tools"))) |
||||
|
{ |
||||
|
BroadcastAIError( |
||||
|
FString::Printf(TEXT("OpenRouter 422: model '%s' rejected the tools format. " |
||||
|
"Disable bSendTools in OpenRouterConfig for models that don't support " |
||||
|
"OpenAI-compatible function calling. Raw: %s"), *OpenRouterConfig->OpenRouterSettings.BaseAISettings.ModelID, *Body), |
||||
|
EAvatarCoreAIError::NetworkError); |
||||
|
} |
||||
|
else |
||||
|
{ |
||||
|
BroadcastAIError( |
||||
|
FString::Printf(TEXT("OpenRouter HTTP error %d: %s"), Code, *Body), |
||||
|
EAvatarCoreAIError::NetworkError); |
||||
|
} |
||||
|
SetNewState(EAvatarCoreAIState::Error); |
||||
|
} |
||||
@ -0,0 +1,74 @@ |
|||||
|
// Fill out your copyright notice in the Description page of Project Settings.
|
||||
|
|
||||
|
#pragma once |
||||
|
|
||||
|
#include "CoreMinimal.h" |
||||
|
#include "AIBaseManager.h" |
||||
|
#include "OpenRouter/OpenRouterConfig.h" |
||||
|
#include "Interfaces/IHttpRequest.h" |
||||
|
#include "Interfaces/IHttpResponse.h" |
||||
|
#include "AvatarCoreAIOpenRouter.generated.h" |
||||
|
|
||||
|
UCLASS(Blueprintable, BlueprintType) |
||||
|
class AVATARCORE_AI_API UAvatarCoreAIOpenRouter : public UAIBaseManager |
||||
|
{ |
||||
|
GENERATED_BODY() |
||||
|
|
||||
|
public: |
||||
|
|
||||
|
// UAIBaseManager overrides
|
||||
|
void InitAIManagerChild(UAIBaseConfig* AIConfig, AActor* InWorldReferenceActor) override; |
||||
|
void ActivateAI() override; |
||||
|
void DeactivateAI() override; |
||||
|
void UpdateSession() override; |
||||
|
void SendResponseChild(FAIMessage Message, bool NotifyDelay) override; |
||||
|
void ClearAI() override; |
||||
|
|
||||
|
// Cancel the active HTTP request if one is in flight
|
||||
|
UFUNCTION(BlueprintCallable, Category = "AvatarCoreAI|OpenRouter") |
||||
|
void CancelActiveRequest(); |
||||
|
|
||||
|
private: |
||||
|
|
||||
|
UOpenRouterConfig* OpenRouterConfig = nullptr; |
||||
|
|
||||
|
TSharedPtr<IHttpRequest, ESPMode::ThreadSafe> ActiveRequest; |
||||
|
|
||||
|
// Per-request HTTP receive buffer. FSSEReceiveArchive appends bytes here directly,
|
||||
|
// so we never call GetContent() on a live request (avoids "Payload is incomplete" warnings).
|
||||
|
// A new TArray is created for each request so old and new request data are isolated.
|
||||
|
TSharedPtr<TArray<uint8>> SSEStreamBufferPtr; |
||||
|
|
||||
|
// Guards concurrent access: HTTP thread writes via FSSEReceiveArchive::Serialize,
|
||||
|
// game thread reads in OnResponseProgress. Both hold shared refs so neither
|
||||
|
// outlives the lock even if ResetSSEState fires mid-callback.
|
||||
|
TSharedPtr<FCriticalSection> SSEBufferLock; |
||||
|
|
||||
|
// Line-level byte buffer for SSE parsing; fed from SSEStreamBufferPtr.
|
||||
|
// UTF-8 multibyte sequences are never split here because lines are extracted whole.
|
||||
|
TArray<uint8> SSERawBuffer; |
||||
|
int32 SSEByteOffset = 0; |
||||
|
// Prevents processing finish_reason more than once if a provider sends duplicate done chunks
|
||||
|
bool bResponseComplete = false; |
||||
|
|
||||
|
// Tool call accumulation during streaming; keyed by tool_calls[index]
|
||||
|
TMap<int32, FString> ToolCallNameMap; |
||||
|
TMap<int32, FString> ToolCallArgsMap; |
||||
|
TMap<int32, FString> ToolCallIdMap; |
||||
|
|
||||
|
void SendChatCompletionRequest(FAIMessage CurrentMessage); |
||||
|
|
||||
|
void OnResponseProgress(FHttpRequestPtr Request, uint64 BytesSent, uint64 BytesReceived); |
||||
|
void OnRequestComplete(FHttpRequestPtr Request, FHttpResponsePtr Response, bool bSuccess); |
||||
|
|
||||
|
void ParseSSELine(const FString& Line); |
||||
|
void HandleToolCallsDone(); |
||||
|
|
||||
|
// Builds the messages array from system instructions + history + current message appended last
|
||||
|
TArray<TSharedPtr<FJsonValue>> BuildMessagesArray(FAIMessage CurrentMessage); |
||||
|
|
||||
|
// Builds the tools array from GetAvailableCommands()
|
||||
|
TArray<TSharedPtr<FJsonValue>> BuildToolsArray(); |
||||
|
|
||||
|
void ResetSSEState(); |
||||
|
}; |
||||
@ -0,0 +1,7 @@ |
|||||
|
#include "OpenRouter/OpenRouterConfig.h" |
||||
|
#include "OpenRouter/AvatarCoreAIOpenRouter.h" |
||||
|
|
||||
|
UOpenRouterConfig::UOpenRouterConfig(const FObjectInitializer& ObjectInitializer) |
||||
|
{ |
||||
|
AIManagerClass = UAvatarCoreAIOpenRouter::StaticClass(); |
||||
|
} |
||||
@ -0,0 +1,47 @@ |
|||||
|
// Fill out your copyright notice in the Description page of Project Settings.
|
||||
|
|
||||
|
#pragma once |
||||
|
|
||||
|
#include "CoreMinimal.h" |
||||
|
#include "AIBaseConfig.h" |
||||
|
#include "OpenRouterConfig.generated.h" |
||||
|
|
||||
|
USTRUCT(BlueprintType) |
||||
|
struct FOpenRouterAISettings |
||||
|
{ |
||||
|
GENERATED_BODY() |
||||
|
|
||||
|
// OpenRouter (or compatible) API base URL
|
||||
|
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreAI|Settings", meta = (ExposeOnSpawn = "true")) |
||||
|
FString BaseURL = TEXT("https://openrouter.ai/api/v1"); |
||||
|
|
||||
|
//Base URL - Change this to the correct Azure API URL
|
||||
|
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreAI|Settings", meta = (ExposeOnSpawn = "true")) |
||||
|
FBaseAISettings BaseAISettings; |
||||
|
|
||||
|
// Enable OpenAI-compatible function/tool calling. Disable for models that use a
|
||||
|
// non-standard tool format (e.g. some Mistral variants via OpenRouter).
|
||||
|
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreAI|Settings", meta = (ExposeOnSpawn = "true")) |
||||
|
bool bSendTools = true; |
||||
|
|
||||
|
// Optional: sent as HTTP-Referer header (appears in OpenRouter dashboard analytics)
|
||||
|
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreAI|Settings", meta = (ExposeOnSpawn = "true")) |
||||
|
FString SiteURL; |
||||
|
|
||||
|
// Optional: sent as X-Title header
|
||||
|
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreAI|Settings", meta = (ExposeOnSpawn = "true")) |
||||
|
FString AppName; |
||||
|
}; |
||||
|
|
||||
|
UCLASS(Blueprintable, BlueprintType) |
||||
|
class AVATARCORE_AI_API UOpenRouterConfig : public UAIBaseConfig |
||||
|
{ |
||||
|
GENERATED_BODY() |
||||
|
|
||||
|
public: |
||||
|
|
||||
|
UOpenRouterConfig(const FObjectInitializer& ObjectInitializer); |
||||
|
|
||||
|
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "AvatarCoreAI|Settings", meta = (ExposeOnSpawn = "true")) |
||||
|
FOpenRouterAISettings OpenRouterSettings; |
||||
|
}; |
||||
@ -1 +0,0 @@ |
|||||
/__pycache__/* |
|
||||
@ -1,5 +0,0 @@ |
|||||
@echo off |
|
||||
call %localappdata%/AvatarCore/FastMCPVenv/Scripts/Activate.bat |
|
||||
cd /d "%~dp0" |
|
||||
python AddDocumentsToDatabase.py |
|
||||
cmd |
|
||||
@ -1,68 +0,0 @@ |
|||||
# setup_db.py |
|
||||
import os |
|
||||
import re |
|
||||
from document_vectordb import DocumentVectorDB |
|
||||
from colorama import Fore |
|
||||
|
|
||||
def _sanitize_table_name(name: str) -> str: |
|
||||
name = name.strip().lower() |
|
||||
name = re.sub(r'[^a-z0-9]+', '_', name) |
|
||||
name = name.strip('_') |
|
||||
if not name: |
|
||||
return "documents" |
|
||||
return name |
|
||||
|
|
||||
|
|
||||
def setup_database(): |
|
||||
print(Fore.GREEN + "Initializing database...") |
|
||||
db = DocumentVectorDB() |
|
||||
db.create_table() |
|
||||
|
|
||||
# Add sample documents (modify paths as needed) |
|
||||
document_add_folder = os.path.dirname(__file__) + "/documents_to_add" |
|
||||
# Add sample documents (modify paths as needed) |
|
||||
document_added_folder = os.path.dirname(__file__) + "/documents_added" |
|
||||
|
|
||||
if not os.path.exists(document_add_folder): |
|
||||
os.makedirs(document_add_folder) |
|
||||
print(Fore.GREEN + f"Created {document_add_folder} directory. Please add your PDF/text files there.") |
|
||||
return |
|
||||
if not os.path.exists(document_added_folder): |
|
||||
os.makedirs(document_added_folder) |
|
||||
|
|
||||
files_added = 0 |
|
||||
for root, dirs, files in os.walk(document_add_folder): |
|
||||
rel_root = os.path.relpath(root, document_add_folder) |
|
||||
if rel_root == ".": |
|
||||
current_table = "documents" |
|
||||
else: |
|
||||
first_folder = rel_root.split(os.sep)[0] |
|
||||
current_table = _sanitize_table_name(first_folder) |
|
||||
for filename in files: |
|
||||
if filename.endswith(('.pdf', '.txt')): |
|
||||
file_path = os.path.join(root, filename) |
|
||||
if rel_root == ".": |
|
||||
target_root = document_added_folder |
|
||||
else: |
|
||||
target_root = os.path.join(document_added_folder, rel_root) |
|
||||
if not os.path.exists(target_root): |
|
||||
os.makedirs(target_root) |
|
||||
copy_file_path = os.path.join(target_root, filename) |
|
||||
print(Fore.GREEN +f"Adding {file_path} to table {current_table}...") |
|
||||
try: |
|
||||
db.add_document(file_path, table_name=current_table) |
|
||||
files_added += 1 |
|
||||
os.rename(file_path, copy_file_path) |
|
||||
except Exception as e: |
|
||||
print(Fore.RED + f"Error adding {file_path}: {e}") |
|
||||
|
|
||||
db.finalize_db() |
|
||||
print(Fore.GREEN + f"Database setup complete! Added {files_added} documents.") |
|
||||
|
|
||||
# Show stats |
|
||||
stats = db.get_stats() |
|
||||
print(Fore.GREEN + f"Database stats: {stats}") |
|
||||
|
|
||||
|
|
||||
if __name__ == "__main__": |
|
||||
setup_database() |
|
||||
@ -1,142 +0,0 @@ |
|||||
@echo off |
|
||||
setlocal enabledelayedexpansion |
|
||||
title "FastMCP" |
|
||||
|
|
||||
REM ====== Config ====== |
|
||||
set "PY_DOWNLOAD_URL=https://www.python.org/ftp/python/3.10.11/python-3.10.11-amd64.exe" |
|
||||
set "CUDA_DOWNLOAD_URL=https://developer.download.nvidia.com/compute/cuda/12.8.0/local_installers/cuda_12.8.0_571.96_windows.exe" |
|
||||
|
|
||||
REM ====== Config ====== |
|
||||
set "PY_DOWNLOAD_URL=https://www.python.org/ftp/python/3.10.11/python-3.10.11-amd64.exe" |
|
||||
:: Set your required Python version here |
|
||||
set REQUIRED_MAJOR=3 |
|
||||
set REQUIRED_MINOR=10 |
|
||||
|
|
||||
set "VENV_DIR=%LOCALAPPDATA%/AvatarCore/FastMCPVenv" |
|
||||
set "VENV_PY=%VENV_DIR%\Scripts\python.exe" |
|
||||
set "REQ_FILE=%~dp0requirements.txt" |
|
||||
set "TARGET_SCRIPT=%~dp0FastMCPServer.py" |
|
||||
|
|
||||
REM Work from this script�s directory |
|
||||
cd /d "%~dp0" |
|
||||
|
|
||||
setlocal EnableExtensions |
|
||||
|
|
||||
:checkcuda |
|
||||
echo === Checking for CUDA 12.8 === |
|
||||
|
|
||||
REM 1) Is nvcc in PATH? |
|
||||
where nvcc >nul |
|
||||
if errorlevel 1 goto installcuda |
|
||||
|
|
||||
nvcc --version | findstr /i /r /c:"release *12\.8" >nul |
|
||||
if not errorlevel 1 ( |
|
||||
REM Optional: show the version line cleanly |
|
||||
for /f "delims=" %%L in ('nvcc --version ^| findstr /i /c:"release"') do set "NVCC_LINE=%%L" |
|
||||
echo Found: %NVCC_LINE% |
|
||||
echo CUDA 12.8 detected. All good. |
|
||||
goto :checkpython |
|
||||
) |
|
||||
|
|
||||
:installcuda |
|
||||
echo ERROR: - Install CUDA 12.8 first. |
|
||||
start "" "%CUDA_DOWNLOAD_URL%" |
|
||||
pause |
|
||||
exit /b 0 |
|
||||
|
|
||||
:checkpython |
|
||||
echo === Checking for Python %REQUIRED_PY% === |
|
||||
set "PY_PATH=%1" |
|
||||
IF [%1] == [] set "PY_PATH=python" |
|
||||
|
|
||||
:: Check if python command exists |
|
||||
%PY_PATH% --version >nul 2>&1 |
|
||||
if %errorlevel% neq 0 ( |
|
||||
echo ERROR: Python is not installed or not in PATH |
|
||||
start "" "%PY_DOWNLOAD_URL%" |
|
||||
pause |
|
||||
exit /b 1 |
|
||||
) |
|
||||
|
|
||||
:: Get Python version |
|
||||
for /f "tokens=2" %%i in ('%PY_PATH% --version 2^>^&1') do set PYTHON_VERSION=%%i |
|
||||
|
|
||||
echo Found Python version: %PYTHON_VERSION% |
|
||||
|
|
||||
:: Parse version numbers |
|
||||
for /f "tokens=1,2 delims=." %%a in ("%PYTHON_VERSION%") do ( |
|
||||
set CURRENT_MAJOR=%%a |
|
||||
set CURRENT_MINOR=%%b |
|
||||
) |
|
||||
|
|
||||
:: Version comparison logic |
|
||||
set VERSION_OK=0 |
|
||||
if %CURRENT_MAJOR% EQU %REQUIRED_MAJOR% ( |
|
||||
if %CURRENT_MINOR% EQU %REQUIRED_MINOR% ( |
|
||||
set VERSION_OK=1 |
|
||||
) |
|
||||
) |
|
||||
|
|
||||
:: Display result |
|
||||
if %VERSION_OK% equ 1 ( |
|
||||
echo SUCCESS: Python version is compatible %REQUIRED_MAJOR%.%REQUIRED_MINOR%! |
|
||||
goto :PythonReady |
|
||||
) else ( |
|
||||
echo ERROR: Python version does not match! |
|
||||
start "" "%PY_DOWNLOAD_URL%" |
|
||||
pause |
|
||||
exit /b 1 |
|
||||
) |
|
||||
|
|
||||
|
|
||||
:PythonReady |
|
||||
echo Using Python: %PY_PATH% |
|
||||
|
|
||||
REM ====== Virtual environment ====== |
|
||||
if exist "%VENV_PY%" ( |
|
||||
echo Found existing venv: "%VENV_DIR%" |
|
||||
) else ( |
|
||||
echo ERROR: No venv found. Creating venv at "%VENV_DIR%" ... 1>&2 |
|
||||
%PY_PATH% -m venv "%VENV_DIR%" |
|
||||
if %errorlevel% neq 0 ( |
|
||||
echo Failed to create virtual environment. |
|
||||
pause |
|
||||
exit /b 1 |
|
||||
) |
|
||||
set "FIRST_SETUP=1" |
|
||||
) |
|
||||
|
|
||||
REM Always upgrade pip once in venv |
|
||||
echo Upgrading pip in venv... 1>&2 |
|
||||
"%VENV_PY%" -m pip install --upgrade pip |
|
||||
|
|
||||
REM Install requirements only on first setup (or if requirements.txt exists and user wants a refresh) |
|
||||
if exist "%REQ_FILE%" ( |
|
||||
if defined FIRST_SETUP ( |
|
||||
echo Installing requirements from "%REQ_FILE%" ... 1>&2 |
|
||||
) |
|
||||
"%VENV_PY%" -m pip install -r "%REQ_FILE%" |
|
||||
if %errorlevel% neq 0 ( |
|
||||
echo Pip install failed. Check your "requirements.txt". 1>&2 |
|
||||
pause |
|
||||
exit /b 1 |
|
||||
) |
|
||||
) else ( |
|
||||
echo No requirements.txt found. Skipping dependency install. |
|
||||
) |
|
||||
|
|
||||
REM ====== Run the target script ====== |
|
||||
if not exist "%TARGET_SCRIPT%" ( |
|
||||
echo ERROR: "%TARGET_SCRIPT%" not found. 1>&2 |
|
||||
echo Make sure %TARGET_SCRIPT% is next to this script, or update TARGET_SCRIPT path. |
|
||||
pause |
|
||||
exit /b 1 |
|
||||
) |
|
||||
|
|
||||
echo Running %TARGET_SCRIPT% ... |
|
||||
start /B /WAIT "" "%VENV_PY%" "%TARGET_SCRIPT%" %* |
|
||||
set "RUN_EXIT=%ERRORLEVEL%" |
|
||||
echo. |
|
||||
echo %TARGET_SCRIPT% exited with code %RUN_EXIT%. |
|
||||
pause |
|
||||
exit /b %RUN_EXIT% |
|
||||
@ -1,41 +0,0 @@ |
|||||
from fastmcp import FastMCP |
|
||||
from typing import List, Dict, Any |
|
||||
from document_vectordb import DocumentVectorDB |
|
||||
import json |
|
||||
import re |
|
||||
|
|
||||
|
|
||||
mcp = FastMCP("AvatarCoreMCP_1_0", stateless_http=True) |
|
||||
|
|
||||
# Initialize your vector database |
|
||||
db = DocumentVectorDB() |
|
||||
db.create_table() |
|
||||
|
|
||||
@mcp.tool() |
|
||||
def search_information(query: str) -> List[Dict[str, Any]]: |
|
||||
"""If you need more information about search this database.""" |
|
||||
try: |
|
||||
query = re.sub('Green Hydrogen Hub Stuttgart', '', query) |
|
||||
query = re.sub('Green Hydrogen Hub', '', query) |
|
||||
query = re.sub('GHH', '', query) |
|
||||
print(query) |
|
||||
results = db.search(query, 3,"documents", True, 48, 12) #Boolean for Cuda based ReRanking |
|
||||
return results |
|
||||
except Exception as e: |
|
||||
return [{"error": f"Search failed: {str(e)}"}] |
|
||||
|
|
||||
@mcp.tool() |
|
||||
def get_database_stats() -> Dict[str, Any]: |
|
||||
"""Get statistics about the document database""" |
|
||||
try: |
|
||||
table = db.table |
|
||||
count = table.count_rows() |
|
||||
return { |
|
||||
"total_entries": count, |
|
||||
"table_name": db.table_name |
|
||||
} |
|
||||
except Exception as e: |
|
||||
return {"error": f"Failed to get stats: {str(e)}"} |
|
||||
|
|
||||
if __name__ == "__main__": |
|
||||
mcp.run(transport="http", host="127.0.0.1", port=8000, path="/mcp") |
|
||||
@ -1,4 +0,0 @@ |
|||||
@echo off |
|
||||
call %localappdata%/AvatarCore/FastMCPVenv/Scripts/Activate.bat |
|
||||
cd /d "%~dp0" |
|
||||
cmd |
|
||||
@ -1,5 +0,0 @@ |
|||||
@echo off |
|
||||
call %localappdata%/AvatarCore/FastMCPVenv/Scripts/Activate.bat |
|
||||
cd /d "%~dp0" |
|
||||
python TestSearchDatabase.py |
|
||||
cmd |
|
||||
@ -1,64 +0,0 @@ |
|||||
from document_vectordb import DocumentVectorDB |
|
||||
import traceback |
|
||||
import time |
|
||||
|
|
||||
table = "documents" |
|
||||
|
|
||||
def DoSearch(): |
|
||||
global table |
|
||||
print("-------------------------", flush=True) |
|
||||
print(f'Which table to search? (empty for {table})', flush=True) |
|
||||
table = input() or table |
|
||||
print('What do you wanna search?', flush=True) |
|
||||
query = input() |
|
||||
|
|
||||
if query == "": |
|
||||
exit(); |
|
||||
|
|
||||
try: |
|
||||
start_time = time.time() |
|
||||
print("Calling db.search...", flush=True) |
|
||||
results = db.search(query, limit=3, table_name=table, rerank=True, candidates=48, batch_size=12) |
|
||||
print(f"db.search returned list of length: {len(results) if results is not None else 'None'}", flush=True) |
|
||||
except Exception as e: |
|
||||
print("Search raised an exception:", flush=True) |
|
||||
print(e, flush=True) |
|
||||
traceback.print_exc() |
|
||||
results = [] |
|
||||
try: |
|
||||
if results and len(results) > 0: |
|
||||
for i, result in enumerate(results, 1): |
|
||||
print(f" {i} {result}", flush=True) |
|
||||
print("-------------------------", flush=True) |
|
||||
else: |
|
||||
print(" No results found", flush=True) |
|
||||
except Exception as e: |
|
||||
print(f"Error printing results: {e}", flush=True) |
|
||||
|
|
||||
print("Execution took: %s seconds" % (time.time() - start_time)) |
|
||||
|
|
||||
DoSearch(); |
|
||||
|
|
||||
if __name__ == "__main__": |
|
||||
import os |
|
||||
try: |
|
||||
print("Testing Document Search...", flush=True) |
|
||||
|
|
||||
# Initialize database |
|
||||
db = DocumentVectorDB() |
|
||||
db.create_table() |
|
||||
|
|
||||
# Get stats |
|
||||
stats = db.get_stats() |
|
||||
print(f"Database stats: {stats}", flush=True) |
|
||||
|
|
||||
DoSearch() |
|
||||
except Exception as e: |
|
||||
print("Fatal error in main:", flush=True) |
|
||||
print(e, flush=True) |
|
||||
traceback.print_exc() |
|
||||
finally: |
|
||||
try: |
|
||||
input("Press Enter to exit...") |
|
||||
except Exception: |
|
||||
pass |
|
||||
@ -1,9 +0,0 @@ |
|||||
@echo off |
|
||||
cd /d "%~dp0" |
|
||||
robocopy documents_added documents_to_add /MOV /S |
|
||||
rmdir /S /Q lancedb |
|
||||
rmdir /S /Q documents_added |
|
||||
echo ----------------------------------------- |
|
||||
echo Farewall, my old friend! |
|
||||
echo ----------------------------------------- |
|
||||
cmd |
|
||||
@ -1,370 +0,0 @@ |
|||||
import lancedb |
|
||||
import pandas as pd |
|
||||
from sentence_transformers import SentenceTransformer |
|
||||
from sentence_transformers import CrossEncoder |
|
||||
from pdfminer.high_level import extract_text |
|
||||
from langchain_text_splitters import RecursiveCharacterTextSplitter |
|
||||
import pyarrow as pa |
|
||||
import os |
|
||||
import torch |
|
||||
import time |
|
||||
from colorama import Fore |
|
||||
from typing import List, Dict |
|
||||
import re |
|
||||
from hashlib import sha1 |
|
||||
|
|
||||
class DocumentVectorDB: |
|
||||
def __init__(self, db_path: str = "./lancedb"): |
|
||||
if db_path == "./lancedb": |
|
||||
base_dir = os.path.dirname(os.path.abspath(__file__)) |
|
||||
db_path = os.path.join(base_dir, "lancedb") |
|
||||
self.db = lancedb.connect(db_path) |
|
||||
# Model will be initialized in create_table() based on table vector dimension |
|
||||
|
|
||||
#Debug Variables |
|
||||
self.start_time = None |
|
||||
self.end_time = None |
|
||||
|
|
||||
self.model = None |
|
||||
self.model_dim = None |
|
||||
self.model_name = None |
|
||||
self.table_name = "documents" |
|
||||
self.reranker = None |
|
||||
self._load_reranker() |
|
||||
|
|
||||
def _load_reranker(self, model_name: str = "cross-encoder/ms-marco-MiniLM-L-6-v2"): |
|
||||
if self.reranker is None: |
|
||||
if(torch.cuda.is_available()): |
|
||||
self.reranker = CrossEncoder(model_name, device="cuda") |
|
||||
else: |
|
||||
self.reranker = CrossEncoder(model_name) |
|
||||
|
|
||||
def _load_model_for_dim(self, dim: int): |
|
||||
if self.model is not None and self.model_dim == dim: |
|
||||
return |
|
||||
# Select a common model matching the vector dimension |
|
||||
if dim == 384: |
|
||||
self.model_name = 'all-MiniLM-L6-v2' |
|
||||
elif dim == 768: |
|
||||
self.model_name = 'all-mpnet-base-v2' |
|
||||
else: |
|
||||
# Fallback to a widely used 384-d model |
|
||||
self.model_name = 'all-MiniLM-L6-v2' |
|
||||
dim = 384 |
|
||||
self.model = SentenceTransformer(self.model_name) |
|
||||
self.model_dim = dim |
|
||||
|
|
||||
def _infer_table_vector_dim(self) -> int: |
|
||||
try: |
|
||||
# Prefer schema-based detection; vector is commonly a FixedSizeList in LanceDB |
|
||||
field = self.table.schema.field('vector') |
|
||||
t = field.type |
|
||||
if pa.types.is_fixed_size_list(t): |
|
||||
return t.list_size |
|
||||
except Exception: |
|
||||
pass |
|
||||
# Fallback: inspect a sample row |
|
||||
try: |
|
||||
df_sample = self.table.to_pandas(limit=1) |
|
||||
if df_sample is not None and not df_sample.empty: |
|
||||
vec = df_sample.iloc[0].get('vector', []) |
|
||||
if isinstance(vec, list): |
|
||||
return len(vec) |
|
||||
except Exception: |
|
||||
pass |
|
||||
# Default if undetectable |
|
||||
return 384 |
|
||||
|
|
||||
def _ensure_table(self, table_name: str): |
|
||||
if getattr(self, "table", None) is not None and self.table_name == table_name and self.model is not None: |
|
||||
return |
|
||||
self.table_name = table_name |
|
||||
if table_name not in self.db.table_names(): |
|
||||
# Initialize a default 384-d model for a fresh table |
|
||||
self._load_model_for_dim(384) |
|
||||
emb_dim = self.model.get_sentence_embedding_dimension() |
|
||||
# Create with sample data first to enforce schema with correct vector dim |
|
||||
sample_data = pd.DataFrame([{ |
|
||||
"id": "sample", |
|
||||
"content": "sample content", |
|
||||
"source": "sample.txt", |
|
||||
"vector": [0.0] * emb_dim, |
|
||||
"doc_id": "sample.txt", |
|
||||
"chunk_index": 0 |
|
||||
}]) |
|
||||
self.table = self.db.create_table(table_name, sample_data) |
|
||||
# Delete the sample data |
|
||||
self.table.delete("id = 'sample'") |
|
||||
else: |
|
||||
self.table = self.db.open_table(table_name) |
|
||||
# Infer vector dimension from existing table and load matching model |
|
||||
dim = self._infer_table_vector_dim() |
|
||||
self._load_model_for_dim(dim) |
|
||||
|
|
||||
def create_table(self, table_name: str = "documents"): |
|
||||
# Create table schema |
|
||||
schema = { |
|
||||
"id": str, |
|
||||
"content": str, |
|
||||
"source": str, |
|
||||
"vector": list |
|
||||
} |
|
||||
self._ensure_table(table_name) |
|
||||
|
|
||||
def _create_index_for_current_table(self): |
|
||||
try: |
|
||||
row_count = self.table.count_rows() |
|
||||
except Exception: |
|
||||
row_count = 0 |
|
||||
|
|
||||
if row_count < 100: |
|
||||
return |
|
||||
|
|
||||
if row_count < 1000: |
|
||||
num_partitions = 1 |
|
||||
num_sub_vectors = 8 |
|
||||
elif row_count < 10000: |
|
||||
num_partitions = 8 |
|
||||
num_sub_vectors = 16 |
|
||||
elif row_count < 100000: |
|
||||
num_partitions = 32 |
|
||||
num_sub_vectors = 64 |
|
||||
else: |
|
||||
num_partitions = 90 |
|
||||
num_sub_vectors = 96 |
|
||||
|
|
||||
try: |
|
||||
self.table.create_index( |
|
||||
vector_column_name="vector", |
|
||||
index_type="IVF_PQ", |
|
||||
metric="cosine", |
|
||||
num_partitions=num_partitions, |
|
||||
num_sub_vectors=num_sub_vectors |
|
||||
) |
|
||||
except Exception: |
|
||||
pass |
|
||||
|
|
||||
def clean_extracted_text(self, text: str) -> str: |
|
||||
""" |
|
||||
Cleans up common PDF extraction artifacts: |
|
||||
1. Removes line-break hyphens. |
|
||||
2. Replaces excessive whitespace and newlines. |
|
||||
""" |
|
||||
|
|
||||
text = re.sub(r'([a-z])-(\n\s*)(\n?)', r'\1', text, flags=re.IGNORECASE) |
|
||||
text = re.sub(r'\s+', ' ', text).strip() |
|
||||
|
|
||||
return text |
|
||||
|
|
||||
def extract_text_from_pdf(self, pdf_path: str) -> str: |
|
||||
text = "" |
|
||||
# pdfminer.six is often more tolerant of broken PDF structure |
|
||||
try: |
|
||||
text = self.clean_extracted_text(extract_text(pdf_path)) |
|
||||
return text |
|
||||
except Exception as e: |
|
||||
print(f"pdfminer.six failed on {pdf_path}: {e}") |
|
||||
return "" # Or try pypdf as a fallback here |
|
||||
|
|
||||
def chunk_text(self, text: str, chunk_size: int = 128, overlap: int = 16) -> List[str]: |
|
||||
# Use characters (tokens) for chunking, not just words |
|
||||
splitter = RecursiveCharacterTextSplitter( |
|
||||
chunk_size=512, # Set chunk size to token/char count, not word count |
|
||||
chunk_overlap=50, |
|
||||
length_function=len, # Use character length |
|
||||
separators=["\n\n", "\n", ". ", " ", ""] # Hierarchical splitting |
|
||||
) |
|
||||
# The splitters are highly optimized and handle the logic efficiently |
|
||||
chunks = splitter.split_text(text) |
|
||||
return chunks |
|
||||
|
|
||||
def add_document(self, file_path: str, doc_type: str = "auto", table_name: str = "documents"): |
|
||||
# Ensure model is initialized (in case add_document is used without create_table()) |
|
||||
self._ensure_table(table_name) |
|
||||
|
|
||||
# Extract text based on file type |
|
||||
if doc_type == "auto": |
|
||||
doc_type = "pdf" if file_path.endswith('.pdf') else "txt" |
|
||||
|
|
||||
if doc_type == "pdf": |
|
||||
text = self.extract_text_from_pdf(file_path) |
|
||||
else: |
|
||||
with open(file_path, 'r', encoding='utf-8') as f: |
|
||||
text = f.read() |
|
||||
|
|
||||
# Chunk the text |
|
||||
chunks = self.chunk_text(text) |
|
||||
|
|
||||
print(Fore.GREEN + f"{len(chunks)} chunks added.") |
|
||||
print(Fore.BLUE + chunks[0]) |
|
||||
|
|
||||
# Create embeddings and store |
|
||||
data_to_add = [] |
|
||||
if chunks: |
|
||||
embeddings = self.model.encode( |
|
||||
chunks, |
|
||||
batch_size=64, |
|
||||
convert_to_numpy=True, |
|
||||
normalize_embeddings=True, |
|
||||
) |
|
||||
base = os.path.basename(file_path) |
|
||||
for i, (chunk, emb) in enumerate(zip(chunks, embeddings)): |
|
||||
if len(chunk) < 1: |
|
||||
continue |
|
||||
did = sha1(f"{base}|{i}|{len(chunk)}".encode("utf-8")).hexdigest()[:16] |
|
||||
doc_data = { |
|
||||
"id": did, |
|
||||
"content": chunk, |
|
||||
"source": file_path, |
|
||||
"vector": emb.tolist(), |
|
||||
"doc_id": base, |
|
||||
"chunk_index": i, |
|
||||
} |
|
||||
data_to_add.append(doc_data) |
|
||||
|
|
||||
# Add all chunks at once |
|
||||
if data_to_add: |
|
||||
df = pd.DataFrame(data_to_add) |
|
||||
self.table.add(df) |
|
||||
self._create_index_for_current_table() |
|
||||
|
|
||||
def finalize_db(self): |
|
||||
self._create_index_for_current_table() |
|
||||
|
|
||||
def DebugTimeIt(self, TimedLabel=""): |
|
||||
if self.start_time is not None: |
|
||||
self.end_time = time.time() |
|
||||
elapsed_time = self.end_time - self.start_time |
|
||||
print(f"{TimedLabel}: Elapsed time {elapsed_time}") |
|
||||
self.start_time = time.time() |
|
||||
|
|
||||
def search(self, query: str, limit: int = 5, table_name: str = "documents", rerank: bool = False, candidates: int = 100, batch_size: int = 64) -> List[Dict]: |
|
||||
# Ensure model is initialized |
|
||||
self._ensure_table(table_name) |
|
||||
query_embedding = self.model.encode(query, normalize_embeddings=True).tolist() |
|
||||
|
|
||||
if rerank: |
|
||||
print("Reranking...") |
|
||||
raw = ( |
|
||||
self.table |
|
||||
.search(query_embedding) |
|
||||
.nprobes(20) |
|
||||
.refine_factor(50) |
|
||||
.limit(candidates) |
|
||||
.to_pandas() |
|
||||
) |
|
||||
|
|
||||
if raw is None or raw.empty: |
|
||||
return [] |
|
||||
|
|
||||
pairs = [(query, c) for c in raw["content"].tolist()] |
|
||||
|
|
||||
try: |
|
||||
self._load_reranker() |
|
||||
scores = self.reranker.predict(pairs, batch_size=batch_size) |
|
||||
raw["rerank_score"] = scores |
|
||||
reranked = raw.sort_values("rerank_score", ascending=False).head(limit) |
|
||||
except Exception: |
|
||||
reranked = raw.head(limit) # graceful fallback |
|
||||
|
|
||||
results_list = [] |
|
||||
for _, row in reranked.iterrows(): |
|
||||
results_list.append({ |
|
||||
"content": row.get("content", ""), |
|
||||
"source": row.get("source", ""), |
|
||||
"score": float(row.get("rerank_score", 0.0)), |
|
||||
}) |
|
||||
return results_list |
|
||||
else: |
|
||||
# Perform vector search |
|
||||
print("Vector search...") |
|
||||
try: |
|
||||
results_df = ( |
|
||||
self.table |
|
||||
.search(query_embedding) |
|
||||
#.metric("cosine") |
|
||||
.nprobes(20) |
|
||||
.refine_factor(50) |
|
||||
.limit(limit) |
|
||||
.to_pandas() |
|
||||
) |
|
||||
except Exception: |
|
||||
print("Error") |
|
||||
results_df = self.table.search(query_embedding).limit(limit).to_pandas() |
|
||||
|
|
||||
if results_df is None or results_df.empty: |
|
||||
return [] |
|
||||
|
|
||||
def pick(row, keys, default_value=""): |
|
||||
for k in keys: |
|
||||
if k in row and pd.notna(row.get(k, None)): |
|
||||
return row.get(k) |
|
||||
return default_value |
|
||||
|
|
||||
content_keys = ["content", "text", "chunk", "page_content", "body"] |
|
||||
source_keys = ["source", "path", "file_path", "document", "filename"] |
|
||||
score_keys = ["_distance", "score", "_similarity"] |
|
||||
|
|
||||
results_list = [] |
|
||||
for _, row in results_df.iterrows(): |
|
||||
content = pick(row, content_keys, "") |
|
||||
source = pick(row, source_keys, "") |
|
||||
score = pick(row, score_keys, 0.0) |
|
||||
# Ensure numeric score |
|
||||
try: |
|
||||
score = float(score) |
|
||||
except Exception: |
|
||||
score = 0.0 |
|
||||
results_list.append({ |
|
||||
"content": content, |
|
||||
"source": source, |
|
||||
"score": score, |
|
||||
}) |
|
||||
|
|
||||
return results_list |
|
||||
|
|
||||
def get_stats(self) -> Dict: |
|
||||
try: |
|
||||
tables_info = [] |
|
||||
table_names = list(self.db.table_names()) |
|
||||
|
|
||||
old_table = getattr(self, "table", None) |
|
||||
old_table_name = getattr(self, "table_name", None) |
|
||||
|
|
||||
for name in table_names: |
|
||||
try: |
|
||||
tbl = self.db.open_table(name) |
|
||||
self.table = tbl |
|
||||
self.table_name = name |
|
||||
count = tbl.count_rows() |
|
||||
vector_dim = self._infer_table_vector_dim() |
|
||||
tables_info.append({ |
|
||||
"table_name": name, |
|
||||
"total_chunks": count, |
|
||||
"vector_dim": vector_dim, |
|
||||
}) |
|
||||
except Exception as inner_e: |
|
||||
tables_info.append({ |
|
||||
"table_name": name, |
|
||||
"error": str(inner_e), |
|
||||
}) |
|
||||
|
|
||||
if old_table is not None: |
|
||||
self.table = old_table |
|
||||
if old_table_name is not None: |
|
||||
self.table_name = old_table_name |
|
||||
|
|
||||
return { |
|
||||
"lancedb_version": lancedb.__version__, |
|
||||
"pyarrow_version": pa.__version__, |
|
||||
"torch_version": torch.__version__, |
|
||||
"tables": tables_info, |
|
||||
} |
|
||||
except Exception as e: |
|
||||
return {"error": str(e)} |
|
||||
|
|
||||
def get_tables(self) -> List[str]: |
|
||||
try: |
|
||||
return list(self.db.table_names()) |
|
||||
except Exception: |
|
||||
return [] |
|
||||
@ -1,32 +0,0 @@ |
|||||
--index-url https://pypi.org/simple |
|
||||
--extra-index-url https://download.pytorch.org/whl/cu128 |
|
||||
--trusted-host download.pytorch.org |
|
||||
--prefer-binary |
|
||||
|
|
||||
|
|
||||
fastmcp==2.10.6 |
|
||||
lancedb==0.25.3 |
|
||||
pandas==2.3.1 |
|
||||
sentence-transformers==5.1.0 |
|
||||
pdfminer.six==20251107 |
|
||||
torch==2.9.0+cu128 |
|
||||
transformers==4.55.0 |
|
||||
huggingface-hub==0.34.4 |
|
||||
scikit-learn==1.7.1 |
|
||||
scipy==1.15.3 |
|
||||
tqdm==4.67.1 |
|
||||
numpy==2.2.6 |
|
||||
regex==2025.7.34 |
|
||||
safetensors==0.6.1 |
|
||||
pyarrow==21.0.0 |
|
||||
python-dotenv==1.1.1 |
|
||||
requests==2.32.4 |
|
||||
uvicorn==0.35.0 |
|
||||
starlette==0.47.2 |
|
||||
sse-starlette==2.4.1 |
|
||||
httpx==0.28.1 |
|
||||
httpx-sse==0.4.1 |
|
||||
pydantic==2.11.7 |
|
||||
pydantic-settings==2.10.1 |
|
||||
langchain-text-splitters==1.0.0 |
|
||||
PyYAML==6.0.2 |
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,147 @@ |
|||||
|
# AvatarCore_STT Plugin |
||||
|
|
||||
|
Speech-to-text plugin for Unreal Engine. Audio flows through a linear chain of modules: |
||||
|
|
||||
|
``` |
||||
|
STTRecorder → [STTPreprocessor, ...] → STTProcessor → transcription result |
||||
|
``` |
||||
|
|
||||
|
The chain is assembled in `STTManagerBase::InitSTTManager` using `BindUFunction` with the string name `"OnChunkReceived"`. The UE reflection system resolves the correct virtual override at bind time, so the manager code does not need to change when signatures change. |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## ESTTChainState — Pipeline Signal |
||||
|
|
||||
|
Every audio chunk carries an `ESTTChainState` (defined in `Public/STTStructs.h`): |
||||
|
|
||||
|
| Value | Meaning | |
||||
|
|-------|---------| |
||||
|
| `Processing` | Normal audio — buffer, process, pass through | |
||||
|
| `Finalizing` | End of utterance — flush buffers and trigger transcription | |
||||
|
| `Discarding` | BLOCKED/abort — clear all buffers, cancel in-flight requests | |
||||
|
|
||||
|
**Rules:** |
||||
|
- Recorders always emit `Processing` — they have no concept of "final". |
||||
|
- PTT preprocessor emits `Finalizing` when the button is released (→ SILENCE) and `Discarding` when the system becomes BLOCKED. |
||||
|
- VAD preprocessor emits `Finalizing` on the last postroll silence chunk, then calls `UserSpeechStateChanged(SILENCE)` for UI purposes only. It emits `Discarding` from `OnUserSpeechStateChanged` when BLOCKED. |
||||
|
- Pass-through preprocessors (Converter, Debugger, SpeexDSP, WebRTC) forward `ChainState` unchanged. They must pass `Finalizing`/`Discarding` through even when `PCMData` is empty. |
||||
|
- Buffer preprocessor reacts to `ChainState` in-band — no `OnSpeechStateChanged` subscription. |
||||
|
- Processors react to `ChainState` only — no `OnSpeechStateChanged` subscriptions. |
||||
|
|
||||
|
**Processors still call `UserSpeechStateChanged`** after transcription completes (for UI state), but they do NOT subscribe to it. |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## Delegate Types |
||||
|
|
||||
|
```cpp |
||||
|
// STTRecorderBase.h |
||||
|
DECLARE_DELEGATE_ThreeParams(FDelegateUnprocessedChunkReceived, TArray<int16>, FAudioInformation, ESTTChainState); |
||||
|
|
||||
|
// STTPreprocessorBase.h |
||||
|
DECLARE_DELEGATE_ThreeParams(FDelegateProcessedChunk, TArray<int16>, FAudioInformation, ESTTChainState); |
||||
|
``` |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## Module Types |
||||
|
|
||||
|
### STTRecorder (`Public/Recorder/STTRecorderBase.h`) |
||||
|
|
||||
|
Produces audio chunks from a source (microphone, file, pixel stream). Fires `OnChunkReceived` delegate with `ESTTChainState::Processing`. Has no knowledge of speech state. |
||||
|
|
||||
|
Implementations: `STTRecorderMicrophone` (PortAudio), `STTRecorderPrimaryMicrophone`, `STTRecorderUnrealMicrophone`, `STTRecorderDebugFile`, `STTRecorderAudioData`. |
||||
|
|
||||
|
### STTPreprocessor (`Public/Preprocessor/STTPreprocessorBase.h`) |
||||
|
|
||||
|
Chained in sequence. Each receives `OnChunkReceived` and fires `OnChunkProcessed` to the next stage. Both delegates carry `ESTTChainState`. |
||||
|
|
||||
|
| Class | Role | |
||||
|
|-------|------| |
||||
|
| `STTPreprocessorConverter` | Stereo→mono, resample to target rate | |
||||
|
| `STTPreprocessorWebRTC` | WebRTC APM (echo cancel, noise suppress, AGC) | |
||||
|
| `STTPreprocessorSpeexDSP` | Speex noise suppression / echo cancel | |
||||
|
| `STTPreprocessorPTT` | Gates audio by PTT button state; emits Finalizing/Discarding | |
||||
|
| `STTPreprocessorVAD` | Voice activity detection; emits Finalizing after postroll, Discarding on BLOCKED | |
||||
|
| `STTPreprocessorBuffer` | Accumulates chunks to a fixed buffer size before forwarding; `bDiscardWhenNotFilledFullyOnce` drops short utterances | |
||||
|
| `STTPreprocessorDebugger` | Writes audio passing through it to a WAV file | |
||||
|
|
||||
|
**STTPreprocessorBuffer — `bDiscardWhenNotFilledFullyOnce`:** |
||||
|
When enabled, if a `Finalizing` signal arrives before the buffer has ever dispatched a full-size `Processing` chunk in the current utterance, it sends `Discarding` instead. This silently drops very short accidental utterances without sending them to the transcription service. |
||||
|
|
||||
|
### STTProcessor (`Public/Processor/STTProcessorBase.h`) |
||||
|
|
||||
|
Receives the final audio. On `Finalizing`: trigger transcription. On `Discarding`: cancel/clear everything. |
||||
|
|
||||
|
| Class | Backend | |
||||
|
|-------|---------| |
||||
|
| `STTProcessorAzure` | Microsoft Azure Cognitive Services (streaming, continuous) | |
||||
|
| `STTProcessorWhisper` | OpenAI Whisper / GPT-4o Transcribe (batch HTTP) | |
||||
|
| `STTParakeetProcessorBase` | Local NVIDIA NeMo Parakeet via TCP (JSON protocol) | |
||||
|
| `STTProcessorRealtimeAPI` | OpenAI Realtime API (forwards audio directly) | |
||||
|
| `STTProcessorDebugSaveWav` | Saves all received audio to a WAV file | |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## Configuration |
||||
|
|
||||
|
All modules are configured via `USTTBaseProcessorConfig` (a UObject subclass per processor type). Base settings are in `FSTTBaseSettings` (`Public/STTStructs.h`): |
||||
|
|
||||
|
- `bUsePTT` — Push-to-talk vs. freespeech (VAD) mode |
||||
|
- `bCanInterrupt` — Whether user speech can interrupt the avatar |
||||
|
- `FreespeechPostRollTime` — Seconds of silence after speech before `Finalizing` is emitted |
||||
|
- `PTTPostRollTime` — Seconds after PTT release before `Finalizing` (currently unused — PTT emits Finalizing immediately on release) |
||||
|
- `MaxTalkingTime` — Hard timeout on PTT press duration |
||||
|
- `VADSettings` — Mode, min speech time, min amplitude threshold, speech-while-blocked threshold |
||||
|
- `WebRTCSettings` — Echo cancellation, noise suppression, AGC flags |
||||
|
- `SpeexDSPSettings` — Speex processing entries |
||||
|
- `STTReplacements` — Word replacement pairs applied to final transcription |
||||
|
- `STTSpecialWords` — Hints passed to transcription service for uncommon words |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## Key Files |
||||
|
|
||||
|
``` |
||||
|
Public/ |
||||
|
STTStructs.h — ESTTChainState, ESTTTalkingState, FAudioInformation, FSTTBaseSettings |
||||
|
STTManagerBase.h/.cpp — Pipeline assembly, state machine, delegate wiring |
||||
|
Recorder/STTRecorderBase.h — FDelegateUnprocessedChunkReceived |
||||
|
Preprocessor/STTPreprocessorBase.h — FDelegateProcessedChunk, virtual OnChunkReceived |
||||
|
Processor/STTProcessorBase.h — virtual OnChunkReceived, OnTranscriptionResult helpers |
||||
|
|
||||
|
Private/ |
||||
|
STTManagerBase.cpp — InitSTTManager (BindUFunction chain), UserSpeechStateChanged |
||||
|
Preprocessor/STTPreprocessorPTT.cpp — Finalizing on SILENCE, Discarding on BLOCKED |
||||
|
Preprocessor/STTPreprocessorVAD.cpp — Finalizing after postroll, Discarding on BLOCKED |
||||
|
Preprocessor/STTPreprocessorBuffer.cpp — ChainState-driven flush, bDiscardWhenNotFilledFullyOnce |
||||
|
Processor/Azure/STTProcessorAzure.cpp — Streaming Azure recognition |
||||
|
Processor/Parakeet/STTParakeetProcessorBase.cpp — TCP JSON protocol to Python server |
||||
|
Processor/Whisper/STTProcessorWhisper.cpp — Batch HTTP to OpenAI |
||||
|
``` |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## State Machine (ESTTTalkingState) |
||||
|
|
||||
|
Used for UI and for VAD/PTT internal logic only. Processors do NOT subscribe to `OnSpeechStateChanged`. |
||||
|
|
||||
|
``` |
||||
|
SILENCE ──(VAD/PTT detects speech)──▶ TALKING |
||||
|
TALKING ──(VAD postroll / PTT release)──▶ SILENCE [Finalizing propagates through chain] |
||||
|
TALKING ──(SetBlocked)──▶ BLOCKED [Discarding propagates through chain] |
||||
|
BLOCKED ──(SetBlocked false / interrupt)──▶ SILENCE |
||||
|
ANY ──(transcription complete)──▶ SILENCE |
||||
|
``` |
||||
|
|
||||
|
`TRANSCRIBING` is a transitional state set by Whisper before sending an HTTP request; other processors do not use it. |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## Common Pitfalls |
||||
|
|
||||
|
- **Pass-through preprocessors must forward `Finalizing`/`Discarding` even on empty `PCMData`.** The Converter, SpeexDSP, and WebRTC all have early-return guards for empty/misaligned data — these guards check `ChainState != Processing` before returning so control signals are not swallowed. |
||||
|
- **PTT emits an empty `TArray<int16>` with `Finalizing`.** Processors must guard against transcribing zero-length audio (they already do via `BufferedPCMData.Num() == 0` checks). |
||||
|
- **Azure runs a background thread (`FAzureRunnable`).** `StopRecognition(false)` signals a graceful stop; the runnable delivers the final result via `OnRecognized`/`OnRunnableEnded` callbacks on the game thread. `StopRecognition(true)` is a forced abort (used on `Discarding`). |
||||
|
- **Parakeet communicates over TCP with a local Python server** (`ParakeetSTT.bat`). In editor (`bKeepAlive=true`) the Python process is kept alive between PIE sessions to avoid restart overhead. |
||||
|
- **`BindUFunction` matches by string name and delegate parameter types.** All `OnChunkReceived` overrides must have exactly the same signature as the base UFUNCTION or the bind will fail at runtime. |
||||
Binary file not shown.
Some files were not shown because too many files changed in this diff
Loading…
Reference in new issue