Browse Source

Another Attempt of improving the Azure Speech Transcription and make it less blocking

master
Tillman Staffen 1 month ago
parent
commit
62331f3ae1
  1. 2
      Unreal/Config/DefaultGame.ini
  2. BIN
      Unreal/Content/SPIE/BP/BP_SPIE_Manager_Child.uasset
  3. BIN
      Unreal/Content/SPIE/BP/Mode/DA_Mode_SPIE_SpieInnovationDay.uasset
  4. BIN
      Unreal/Content/SPIE/Maps/M_SPIE_Startup.umap
  5. BIN
      Unreal/Plugins/AvatarCore_Manager/Content/AvatarCoreManager.uasset
  6. BIN
      Unreal/Plugins/AvatarCore_Manager/Content/Widgets/Debug/Pages/W_DebugAvatarCoreSTT.uasset
  7. 22
      Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Processor/Azure/AzureRunnable.cpp
  8. 68
      Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Processor/Azure/STTProcessorAzure.cpp
  9. 8
      Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Processor/Azure/STTProcessorAzure.h

2
Unreal/Config/DefaultGame.ini

@ -6,7 +6,7 @@ CommonButtonAcceptKeyHandling=TriggerClick
[/Script/EngineSettings.GeneralProjectSettings] [/Script/EngineSettings.GeneralProjectSettings]
ProjectID=4B0928DF4291E6F7F4F0D2BD9F00EF29 ProjectID=4B0928DF4291E6F7F4F0D2BD9F00EF29
ProjectName=SPIE Avatar ProjectName=SPIE Avatar
ProjectVersion=0.1.5 ProjectVersion=0.1.6
[/Script/UnrealEd.ProjectPackagingSettings] [/Script/UnrealEd.ProjectPackagingSettings]
Build=IfProjectHasCode Build=IfProjectHasCode

BIN
Unreal/Content/SPIE/BP/BP_SPIE_Manager_Child.uasset (Stored with Git LFS)

Binary file not shown.

BIN
Unreal/Content/SPIE/BP/Mode/DA_Mode_SPIE_SpieInnovationDay.uasset (Stored with Git LFS)

Binary file not shown.

BIN
Unreal/Content/SPIE/Maps/M_SPIE_Startup.umap (Stored with Git LFS)

Binary file not shown.

BIN
Unreal/Plugins/AvatarCore_Manager/Content/AvatarCoreManager.uasset (Stored with Git LFS)

Binary file not shown.

BIN
Unreal/Plugins/AvatarCore_Manager/Content/Widgets/Debug/Pages/W_DebugAvatarCoreSTT.uasset (Stored with Git LFS)

Binary file not shown.

22
Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Processor/Azure/AzureRunnable.cpp

@ -20,10 +20,8 @@ FAzureRunnable::~FAzureRunnable()
{ {
if (Thread) if (Thread)
{ {
Owner = nullptr;
bIsRunning = false; // Unblock the Run() loop before killing the thread
Thread->Kill(true);
delete Thread; delete Thread;
Thread = nullptr;
} }
} }
@ -66,19 +64,20 @@ bool FAzureRunnable::Init()
} }
TWeakObjectPtr<USTTProcessorAzure> WeakOwner(Owner); TWeakObjectPtr<USTTProcessorAzure> WeakOwner(Owner);
FAzureRunnable* Self = this;
// Bind Recognizing event to forward data to the game thread // Bind Recognizing event to forward data to the game thread
Recognizer->Recognizing.Connect([WeakOwner](const auto& EventArgs) { Recognizer->Recognizing.Connect([WeakOwner, Self](const auto& EventArgs) {
FString RecognizedText = UTF8_TO_TCHAR(EventArgs.Result->Text.c_str()); FString RecognizedText = UTF8_TO_TCHAR(EventArgs.Result->Text.c_str());
AsyncTask(ENamedThreads::GameThread, [WeakOwner, RecognizedText]() { AsyncTask(ENamedThreads::GameThread, [WeakOwner, RecognizedText, Self]() {
if (WeakOwner.IsValid() && WeakOwner.Get() != nullptr) if (WeakOwner.IsValid() && WeakOwner.Get() != nullptr)
{ {
WeakOwner->OnRecognizing(RecognizedText); WeakOwner->OnRecognizing(RecognizedText, Self);
} }
}); });
}); });
// Bind Recognized event for final results // Bind Recognized event for final results
Recognizer->Recognized.Connect([WeakOwner](const auto& EventArgs) { Recognizer->Recognized.Connect([WeakOwner, Self](const auto& EventArgs) {
FString RecognizedText = UTF8_TO_TCHAR(EventArgs.Result->Text.c_str()); FString RecognizedText = UTF8_TO_TCHAR(EventArgs.Result->Text.c_str());
FString DetectedLangUE = TEXT("LANGUAGE_NOT_DETECTED"); FString DetectedLangUE = TEXT("LANGUAGE_NOT_DETECTED");
@ -95,10 +94,10 @@ bool FAzureRunnable::Init()
// Keep default LANGUAGE_NOT_DETECTED // Keep default LANGUAGE_NOT_DETECTED
} }
AsyncTask(ENamedThreads::GameThread, [WeakOwner, RecognizedText, DetectedLangUE]() { AsyncTask(ENamedThreads::GameThread, [WeakOwner, RecognizedText, DetectedLangUE, Self]() {
if (WeakOwner.IsValid() && WeakOwner.Get() != nullptr) if (WeakOwner.IsValid() && WeakOwner.Get() != nullptr)
{ {
WeakOwner->OnRecognized(RecognizedText, DetectedLangUE); WeakOwner->OnRecognized(RecognizedText, DetectedLangUE, Self);
} }
}); });
}); });
@ -155,10 +154,11 @@ uint32 FAzureRunnable::Run()
Recognizer->SessionStarted.DisconnectAll(); Recognizer->SessionStarted.DisconnectAll();
TWeakObjectPtr<USTTProcessorAzure> WeakOwner(Owner); TWeakObjectPtr<USTTProcessorAzure> WeakOwner(Owner);
AsyncTask(ENamedThreads::GameThread, [WeakOwner]() { FAzureRunnable* Self = this;
AsyncTask(ENamedThreads::GameThread, [WeakOwner, Self]() {
if (WeakOwner.IsValid() && WeakOwner.Get() != nullptr) if (WeakOwner.IsValid() && WeakOwner.Get() != nullptr)
{ {
WeakOwner->OnRunnableEnded(); WeakOwner->OnRunnableEnded(Self);
} }
}); });

68
Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Processor/Azure/STTProcessorAzure.cpp

@ -112,15 +112,15 @@ void USTTProcessorAzure::OnSpeechStateChanged(ESTTTalkingState TalkingState)
void USTTProcessorAzure::StartRecognition() void USTTProcessorAzure::StartRecognition()
{ {
StopRecognition(true); //In case there is something else running StopRecognition(false); // Moves any active runnable to StoppedRunnables
// Force-destroy any leftover runnable from a non-forced stop if(AzureRunnable)
if (AzureRunnable) { {
AzureRunnable->StopRecognition(true); StoppedRunnables.Add(MoveTemp(AzureRunnable)); // AzureRunnable is now null; object stays alive until Run() finishes
AzureRunnable = nullptr;
} }
intermediateResult = ""; intermediateResult = "";
USTTProcessorBase::OnTranscriptionStarted(); USTTProcessorBase::OnTranscriptionStarted();
AzureRunnable = MakeUnique<FAzureRunnable>(config, audioConfig, STTManager->GetSpecialWords(), this, false); AzureRunnable = MakeUnique<FAzureRunnable>(config, audioConfig, STTManager->GetSpecialWords(), this, false);
bTranscriptionRunning = true;
} }
void USTTProcessorAzure::StopRecognition(bool Forced) void USTTProcessorAzure::StopRecognition(bool Forced)
@ -129,19 +129,13 @@ void USTTProcessorAzure::StopRecognition(bool Forced)
if (AzureRunnable) if (AzureRunnable)
{ {
AzureRunnable->StopRecognition(Forced); AzureRunnable->StopRecognition(Forced);
if (Forced) {
AzureRunnable = nullptr; // Immediate cleanup, no result expected
}
// Non-forced: runnable finishes gracefully and delivers final result
if(bDebugMode && STTManager!=nullptr) if(bDebugMode && STTManager!=nullptr)
STTManager->OnSTTLog.Broadcast(TEXT("Recognition thread stopped.")); STTManager->OnSTTLog.Broadcast(TEXT("Recognition thread stopped."));
} }
} }
void USTTProcessorAzure::OnRecognizing(const FString& RecognizedText) void USTTProcessorAzure::OnRecognizing(const FString& RecognizedText, FAzureRunnable* Caller)
{ {
if (!bTranscriptionRunning)
return;
if (!IsValid(STTManager)) if (!IsValid(STTManager))
return; return;
if (STTManager->IsBlocked()) if (STTManager->IsBlocked())
@ -156,8 +150,12 @@ void USTTProcessorAzure::OnRecognizing(const FString& RecognizedText)
USTTProcessorBase::OnTranscriptionIntermediateResult(TranscriptionCounter, *intermediateResult); USTTProcessorBase::OnTranscriptionIntermediateResult(TranscriptionCounter, *intermediateResult);
} }
void USTTProcessorAzure::OnRecognized(const FString& RecognizedText, const FString& Language) void USTTProcessorAzure::OnRecognized(const FString& RecognizedText, const FString& Language, FAzureRunnable* Caller)
{ {
// Discard callbacks from stopped runnables
if (AzureRunnable == nullptr) {
return;
}
if (!IsValid(STTManager)) if (!IsValid(STTManager))
return; return;
if (STTManager->IsBlocked()) if (STTManager->IsBlocked())
@ -174,22 +172,30 @@ void USTTProcessorAzure::OnRecognized(const FString& RecognizedText, const FStri
} }
else { else {
USTTProcessorBase::OnTranscriptionResult(TranscriptionCounter, *intermediateResult, this->DetectedLanguage); USTTProcessorBase::OnTranscriptionResult(TranscriptionCounter, *intermediateResult, this->DetectedLanguage);
if (AzureRunnable)
{
StoppedRunnables.Add(MoveTemp(AzureRunnable)); // AzureRunnable is now null; object stays alive until Run() finishes
}
intermediateResult.Empty(); intermediateResult.Empty();
} }
} }
void USTTProcessorAzure::OnConnectionSuccess() void USTTProcessorAzure::OnConnectionSuccess()
{ {
// Connection test runnable returns from Run() before posting this callback,
// so Run() is already done — direct null is safe.
AzureRunnable = nullptr; AzureRunnable = nullptr;
STTManager->OnReady.Broadcast(); STTManager->OnReady.Broadcast();
STTManager->OnSpeechStateChanged.AddUniqueDynamic(this, &USTTProcessorAzure::OnSpeechStateChanged); STTManager->OnSpeechStateChanged.AddUniqueDynamic(this, &USTTProcessorAzure::OnSpeechStateChanged);
} }
void USTTProcessorAzure::OnRunnableEnded() void USTTProcessorAzure::OnRunnableEnded(FAzureRunnable* Caller)
{ {
// Check if it's the active runnable
if (AzureRunnable.Get() == Caller)
{
bTranscriptionRunning = false; bTranscriptionRunning = false;
AzureRunnable = nullptr; AzureRunnable = nullptr; // Safe: Run() has returned
if (IsValid(STTManager)) { if (IsValid(STTManager)) {
// Send any remaining intermediate result that wasn't finalized by OnRecognized // Send any remaining intermediate result that wasn't finalized by OnRecognized
@ -197,10 +203,24 @@ void USTTProcessorAzure::OnRunnableEnded()
USTTProcessorBase::OnTranscriptionResult(TranscriptionCounter, intermediateResult, DetectedLanguage); USTTProcessorBase::OnTranscriptionResult(TranscriptionCounter, intermediateResult, DetectedLanguage);
intermediateResult.Empty(); intermediateResult.Empty();
} }
else {
// Ensure we return to SILENCE even if no result was produced if (!STTManager->IsBlocked())
// (empty audio, network timeout, etc.) to prevent stuck TRANSCRIBING state STTManager->UserSpeechStateChanged(ESTTTalkingState::SILENCE);
if(!STTManager->IsBlocked()) }
}
else
{
// Caller was a previously stopped runnable — flush its result then remove
StoppedRunnables.RemoveAll([Caller](const TUniquePtr<FAzureRunnable>& R) {
return R.Get() == Caller;
});
if (IsValid(STTManager)) {
if (!intermediateResult.IsEmpty()) {
USTTProcessorBase::OnTranscriptionResult(TranscriptionCounter, intermediateResult, DetectedLanguage);
intermediateResult.Empty();
}
if (!STTManager->IsBlocked())
STTManager->UserSpeechStateChanged(ESTTTalkingState::SILENCE); STTManager->UserSpeechStateChanged(ESTTTalkingState::SILENCE);
} }
} }
@ -209,9 +229,15 @@ void USTTProcessorAzure::OnRunnableEnded()
void USTTProcessorAzure::OnAzureError(FString Error) void USTTProcessorAzure::OnAzureError(FString Error)
{ {
bTranscriptionRunning = false; bTranscriptionRunning = false;
AzureRunnable = nullptr;
intermediateResult.Empty(); intermediateResult.Empty();
// Remove caller from whichever array owns it
// (Error fires before Run() returns, so we move to StoppedRunnables to keep alive)
if (AzureRunnable)
{
StoppedRunnables.Add(MoveTemp(AzureRunnable));
}
if (IsValid(STTManager)) { if (IsValid(STTManager)) {
STTManager->OnSTTError.Broadcast(Error); STTManager->OnSTTError.Broadcast(Error);
STTManager->UserSpeechStateChanged(ESTTTalkingState::SILENCE); STTManager->UserSpeechStateChanged(ESTTTalkingState::SILENCE);

8
Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Processor/Azure/STTProcessorAzure.h

@ -46,19 +46,19 @@ private:
private: private:
TUniquePtr<FAzureRunnable> AzureRunnable; TUniquePtr<FAzureRunnable> AzureRunnable;
TArray<TUniquePtr<FAzureRunnable>> StoppedRunnables;
void StartRecognition(); void StartRecognition();
void StopRecognition(bool Forced); void StopRecognition(bool Forced);
public: public:
void OnRecognizing(const FString& RecognizedText); void OnRecognizing(const FString& RecognizedText, FAzureRunnable* Caller);
void OnRecognized(const FString& RecognizedText, const FString& Language); void OnRecognized(const FString& RecognizedText, const FString& Language, FAzureRunnable* Caller);
UFUNCTION() UFUNCTION()
void OnConnectionSuccess(); void OnConnectionSuccess();
UFUNCTION() void OnRunnableEnded(FAzureRunnable* Caller);
void OnRunnableEnded();
UFUNCTION() UFUNCTION()
void OnAzureError(FString Error); void OnAzureError(FString Error);

Loading…
Cancel
Save