Browse Source

Another Attempt of improving the Azure Speech Transcription and make it less blocking

master
Tillman Staffen 1 month ago
parent
commit
62331f3ae1
  1. 2
      Unreal/Config/DefaultGame.ini
  2. BIN
      Unreal/Content/SPIE/BP/BP_SPIE_Manager_Child.uasset
  3. BIN
      Unreal/Content/SPIE/BP/Mode/DA_Mode_SPIE_SpieInnovationDay.uasset
  4. BIN
      Unreal/Content/SPIE/Maps/M_SPIE_Startup.umap
  5. BIN
      Unreal/Plugins/AvatarCore_Manager/Content/AvatarCoreManager.uasset
  6. BIN
      Unreal/Plugins/AvatarCore_Manager/Content/Widgets/Debug/Pages/W_DebugAvatarCoreSTT.uasset
  7. 22
      Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Processor/Azure/AzureRunnable.cpp
  8. 66
      Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Processor/Azure/STTProcessorAzure.cpp
  9. 8
      Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Processor/Azure/STTProcessorAzure.h

2
Unreal/Config/DefaultGame.ini

@ -6,7 +6,7 @@ CommonButtonAcceptKeyHandling=TriggerClick
[/Script/EngineSettings.GeneralProjectSettings]
ProjectID=4B0928DF4291E6F7F4F0D2BD9F00EF29
ProjectName=SPIE Avatar
ProjectVersion=0.1.5
ProjectVersion=0.1.6
[/Script/UnrealEd.ProjectPackagingSettings]
Build=IfProjectHasCode

BIN
Unreal/Content/SPIE/BP/BP_SPIE_Manager_Child.uasset (Stored with Git LFS)

Binary file not shown.

BIN
Unreal/Content/SPIE/BP/Mode/DA_Mode_SPIE_SpieInnovationDay.uasset (Stored with Git LFS)

Binary file not shown.

BIN
Unreal/Content/SPIE/Maps/M_SPIE_Startup.umap (Stored with Git LFS)

Binary file not shown.

BIN
Unreal/Plugins/AvatarCore_Manager/Content/AvatarCoreManager.uasset (Stored with Git LFS)

Binary file not shown.

BIN
Unreal/Plugins/AvatarCore_Manager/Content/Widgets/Debug/Pages/W_DebugAvatarCoreSTT.uasset (Stored with Git LFS)

Binary file not shown.

22
Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Processor/Azure/AzureRunnable.cpp

@ -20,10 +20,8 @@ FAzureRunnable::~FAzureRunnable()
{
if (Thread)
{
Owner = nullptr;
bIsRunning = false; // Unblock the Run() loop before killing the thread
Thread->Kill(true);
delete Thread;
Thread = nullptr;
}
}
@ -66,19 +64,20 @@ bool FAzureRunnable::Init()
}
TWeakObjectPtr<USTTProcessorAzure> WeakOwner(Owner);
FAzureRunnable* Self = this;
// Bind Recognizing event to forward data to the game thread
Recognizer->Recognizing.Connect([WeakOwner](const auto& EventArgs) {
Recognizer->Recognizing.Connect([WeakOwner, Self](const auto& EventArgs) {
FString RecognizedText = UTF8_TO_TCHAR(EventArgs.Result->Text.c_str());
AsyncTask(ENamedThreads::GameThread, [WeakOwner, RecognizedText]() {
AsyncTask(ENamedThreads::GameThread, [WeakOwner, RecognizedText, Self]() {
if (WeakOwner.IsValid() && WeakOwner.Get() != nullptr)
{
WeakOwner->OnRecognizing(RecognizedText);
WeakOwner->OnRecognizing(RecognizedText, Self);
}
});
});
// Bind Recognized event for final results
Recognizer->Recognized.Connect([WeakOwner](const auto& EventArgs) {
Recognizer->Recognized.Connect([WeakOwner, Self](const auto& EventArgs) {
FString RecognizedText = UTF8_TO_TCHAR(EventArgs.Result->Text.c_str());
FString DetectedLangUE = TEXT("LANGUAGE_NOT_DETECTED");
@ -95,10 +94,10 @@ bool FAzureRunnable::Init()
// Keep default LANGUAGE_NOT_DETECTED
}
AsyncTask(ENamedThreads::GameThread, [WeakOwner, RecognizedText, DetectedLangUE]() {
AsyncTask(ENamedThreads::GameThread, [WeakOwner, RecognizedText, DetectedLangUE, Self]() {
if (WeakOwner.IsValid() && WeakOwner.Get() != nullptr)
{
WeakOwner->OnRecognized(RecognizedText, DetectedLangUE);
WeakOwner->OnRecognized(RecognizedText, DetectedLangUE, Self);
}
});
});
@ -155,10 +154,11 @@ uint32 FAzureRunnable::Run()
Recognizer->SessionStarted.DisconnectAll();
TWeakObjectPtr<USTTProcessorAzure> WeakOwner(Owner);
AsyncTask(ENamedThreads::GameThread, [WeakOwner]() {
FAzureRunnable* Self = this;
AsyncTask(ENamedThreads::GameThread, [WeakOwner, Self]() {
if (WeakOwner.IsValid() && WeakOwner.Get() != nullptr)
{
WeakOwner->OnRunnableEnded();
WeakOwner->OnRunnableEnded(Self);
}
});

66
Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Private/Processor/Azure/STTProcessorAzure.cpp

@ -112,15 +112,15 @@ void USTTProcessorAzure::OnSpeechStateChanged(ESTTTalkingState TalkingState)
void USTTProcessorAzure::StartRecognition()
{
StopRecognition(true); //In case there is something else running
// Force-destroy any leftover runnable from a non-forced stop
if (AzureRunnable) {
AzureRunnable->StopRecognition(true);
AzureRunnable = nullptr;
StopRecognition(false); // Moves any active runnable to StoppedRunnables
if(AzureRunnable)
{
StoppedRunnables.Add(MoveTemp(AzureRunnable)); // AzureRunnable is now null; object stays alive until Run() finishes
}
intermediateResult = "";
USTTProcessorBase::OnTranscriptionStarted();
AzureRunnable = MakeUnique<FAzureRunnable>(config, audioConfig, STTManager->GetSpecialWords(), this, false);
bTranscriptionRunning = true;
}
void USTTProcessorAzure::StopRecognition(bool Forced)
@ -129,19 +129,13 @@ void USTTProcessorAzure::StopRecognition(bool Forced)
if (AzureRunnable)
{
AzureRunnable->StopRecognition(Forced);
if (Forced) {
AzureRunnable = nullptr; // Immediate cleanup, no result expected
}
// Non-forced: runnable finishes gracefully and delivers final result
if(bDebugMode && STTManager!=nullptr)
STTManager->OnSTTLog.Broadcast(TEXT("Recognition thread stopped."));
}
}
void USTTProcessorAzure::OnRecognizing(const FString& RecognizedText)
void USTTProcessorAzure::OnRecognizing(const FString& RecognizedText, FAzureRunnable* Caller)
{
if (!bTranscriptionRunning)
return;
if (!IsValid(STTManager))
return;
if (STTManager->IsBlocked())
@ -156,8 +150,12 @@ void USTTProcessorAzure::OnRecognizing(const FString& RecognizedText)
USTTProcessorBase::OnTranscriptionIntermediateResult(TranscriptionCounter, *intermediateResult);
}
void USTTProcessorAzure::OnRecognized(const FString& RecognizedText, const FString& Language)
void USTTProcessorAzure::OnRecognized(const FString& RecognizedText, const FString& Language, FAzureRunnable* Caller)
{
// Discard callbacks from stopped runnables
if (AzureRunnable == nullptr) {
return;
}
if (!IsValid(STTManager))
return;
if (STTManager->IsBlocked())
@ -174,22 +172,30 @@ void USTTProcessorAzure::OnRecognized(const FString& RecognizedText, const FStri
}
else {
USTTProcessorBase::OnTranscriptionResult(TranscriptionCounter, *intermediateResult, this->DetectedLanguage);
if (AzureRunnable)
{
StoppedRunnables.Add(MoveTemp(AzureRunnable)); // AzureRunnable is now null; object stays alive until Run() finishes
}
intermediateResult.Empty();
}
}
void USTTProcessorAzure::OnConnectionSuccess()
{
// Connection test runnable returns from Run() before posting this callback,
// so Run() is already done — direct null is safe.
AzureRunnable = nullptr;
STTManager->OnReady.Broadcast();
STTManager->OnSpeechStateChanged.AddUniqueDynamic(this, &USTTProcessorAzure::OnSpeechStateChanged);
}
void USTTProcessorAzure::OnRunnableEnded()
void USTTProcessorAzure::OnRunnableEnded(FAzureRunnable* Caller)
{
// Check if it's the active runnable
if (AzureRunnable.Get() == Caller)
{
bTranscriptionRunning = false;
AzureRunnable = nullptr;
AzureRunnable = nullptr; // Safe: Run() has returned
if (IsValid(STTManager)) {
// Send any remaining intermediate result that wasn't finalized by OnRecognized
@ -197,9 +203,23 @@ void USTTProcessorAzure::OnRunnableEnded()
USTTProcessorBase::OnTranscriptionResult(TranscriptionCounter, intermediateResult, DetectedLanguage);
intermediateResult.Empty();
}
else {
// Ensure we return to SILENCE even if no result was produced
// (empty audio, network timeout, etc.) to prevent stuck TRANSCRIBING state
if (!STTManager->IsBlocked())
STTManager->UserSpeechStateChanged(ESTTTalkingState::SILENCE);
}
}
else
{
// Caller was a previously stopped runnable — flush its result then remove
StoppedRunnables.RemoveAll([Caller](const TUniquePtr<FAzureRunnable>& R) {
return R.Get() == Caller;
});
if (IsValid(STTManager)) {
if (!intermediateResult.IsEmpty()) {
USTTProcessorBase::OnTranscriptionResult(TranscriptionCounter, intermediateResult, DetectedLanguage);
intermediateResult.Empty();
}
if (!STTManager->IsBlocked())
STTManager->UserSpeechStateChanged(ESTTTalkingState::SILENCE);
}
@ -209,9 +229,15 @@ void USTTProcessorAzure::OnRunnableEnded()
void USTTProcessorAzure::OnAzureError(FString Error)
{
bTranscriptionRunning = false;
AzureRunnable = nullptr;
intermediateResult.Empty();
// Remove caller from whichever array owns it
// (Error fires before Run() returns, so we move to StoppedRunnables to keep alive)
if (AzureRunnable)
{
StoppedRunnables.Add(MoveTemp(AzureRunnable));
}
if (IsValid(STTManager)) {
STTManager->OnSTTError.Broadcast(Error);
STTManager->UserSpeechStateChanged(ESTTTalkingState::SILENCE);

8
Unreal/Plugins/AvatarCore_STT/Source/AvatarCore_STT/Public/Processor/Azure/STTProcessorAzure.h

@ -46,19 +46,19 @@ private:
private:
TUniquePtr<FAzureRunnable> AzureRunnable;
TArray<TUniquePtr<FAzureRunnable>> StoppedRunnables;
void StartRecognition();
void StopRecognition(bool Forced);
public:
void OnRecognizing(const FString& RecognizedText);
void OnRecognized(const FString& RecognizedText, const FString& Language);
void OnRecognizing(const FString& RecognizedText, FAzureRunnable* Caller);
void OnRecognized(const FString& RecognizedText, const FString& Language, FAzureRunnable* Caller);
UFUNCTION()
void OnConnectionSuccess();
UFUNCTION()
void OnRunnableEnded();
void OnRunnableEnded(FAzureRunnable* Caller);
UFUNCTION()
void OnAzureError(FString Error);

Loading…
Cancel
Save