Projekt for SPIE - Avatar for safety briefing / managment event
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

102 lines
3.0 KiB

// Georgy Treshchev 2025.
#pragma once
#include "CoreMinimal.h"
#include "ILipSyncProcessor.h"
#include "DSP/FFTAlgorithm.h"
#include "UObject/Object.h"
#include "Tasks/Pipe.h"
#include "AudioResampler.h"
#include "SampleBuffer.h"
#include "RuntimeVisemeGenerator.generated.h"
/**
* Viseme generator that can generate visemes from audio data
*/
UCLASS(BlueprintType)
class RUNTIMEMETAHUMANLIPSYNC_API URuntimeVisemeGenerator : public UObject
{
GENERATED_BODY()
public:
URuntimeVisemeGenerator();
virtual void BeginDestroy() override;
/**
* Create a new instance of the runtime viseme generator
*
* @return Created runtime viseme generator
*/
UFUNCTION(BlueprintCallable, Category = "Runtime Viseme Generator")
static URuntimeVisemeGenerator* CreateRuntimeVisemeGenerator();
/**
* Processes raw PCM audio data to generate viseme weights
*
* @param PCMData The raw PCM audio data to process
* @param SampleRate The sample rate of the audio data
* @param NumOfChannels The number of channels in the audio data
*/
UFUNCTION(BlueprintCallable, Category = "Runtime Viseme Generator")
void ProcessAudioData(TArray<float> PCMData, int32 SampleRate, int32 NumOfChannels);
/**
* Retrieves the current viseme weights
*
* @return An array of viseme weights representing the current state of the visemes
*/
UFUNCTION(BlueprintCallable, Category = "Runtime Viseme Generator")
TArray<float> GetVisemeWeights() const;
/**
* Retrieves the names of the visemes supported by the system
*
* @return An array of strings containing the names of the visemes
*/
UFUNCTION(BlueprintCallable, Category = "Runtime Viseme Generator")
static TArray<FString> GetVisemeNames();
void SetVisemeWeights(TArray<float> InWeights);
float GetLaughterScore() const { return LaughterScore; }
private:
/** The laughter score calculated from the audio data */
float LaughterScore = 0;
/** The current viseme weights calculated from the audio data */
TArray<float> Visemes;
/** Data guard (mutex) for thread safety */
mutable TSharedPtr<FCriticalSection> DataGuard;
/** The size of the buffer used for accumulating PCM data */
int32 BufferSize = 4096;
/** Accumulated PCM data for processing */
TArray<float> AccumulatedPCMData;
/** Task pipe for handling audio processing tasks */
TUniquePtr<UE::Tasks::FPipe> AudioTaskPipe;
ILipSyncProcessor* LipSyncProcessor = nullptr;
/**
* Mixing RAW Data to a different number of channels
*
* @param RAWData RAW data for mixing
* @param SampleRate Sample rate of the RAW data
* @param SourceNumOfChannels Source number of channels in the RAW data
* @param DestinationNumOfChannels Destination number of channels in the RAW data
* @param RemixedRAWData Remixed RAW data
* @return True if the RAW data was successfully mixed
*/
static bool MixChannelsRAWData(Audio::FAlignedFloatBuffer& RAWData, int32 SampleRate, int32 SourceNumOfChannels, int32 DestinationNumOfChannels, Audio::FAlignedFloatBuffer& RemixedRAWData);
private:
bool LipSyncProcessorInitialized = false;
};