// Copyright Voulz 2021-2025. All Rights Reserved. #include "ArchVisMovieRenderOverlappedMask.h" #include "MovieRenderPipelineCoreModule.h" DECLARE_CYCLE_STAT(TEXT("MaskedAccumulator_AccumulateSinglePlane"), STAT_AccumulateSinglePlane, STATGROUP_MoviePipeline); DECLARE_CYCLE_STAT(TEXT("MaskedAccumulator_AccumulateMultiplePlanes"), STAT_AccumulateMultiplePlanes, STATGROUP_MoviePipeline); DECLARE_CYCLE_STAT(TEXT("MaskedAccumulator_InitMemory"), STAT_InitMemory, STATGROUP_MoviePipeline); DECLARE_CYCLE_STAT(TEXT("MaskedAccumulator_ZeroPlanes"), STAT_ZeroPlanes, STATGROUP_MoviePipeline); DECLARE_CYCLE_STAT(TEXT("MaskedAccumulator_AccumulatePixelData"), STAT_AccumulatePixelData, STATGROUP_MoviePipeline); DECLARE_CYCLE_STAT(TEXT("MaskedAccumulator_FetchPixelData32"), STAT_FetchFinalPixelData32bit, STATGROUP_MoviePipeline); namespace ArchVisMoviePipeline { void FMaskOverlappedAccumulator::InitMemory(FIntPoint InPlaneSize) { SCOPE_CYCLE_COUNTER(STAT_InitMemory); PlaneSize.X = InPlaneSize.X; PlaneSize.Y = InPlaneSize.Y; ImageData.SetNumUninitialized(InPlaneSize.X * InPlaneSize.Y); WeightPlane.Init(PlaneSize); } void FMaskOverlappedAccumulator::ZeroPlanes() { SCOPE_CYCLE_COUNTER(STAT_ZeroPlanes); for (int64 Index = 0; Index < ImageData.Num(); Index++) { for (int32 Plane = 0; Plane < 6; Plane++) { ImageData[Index].Id[Plane] = -1; ImageData[Index].Weight[Plane] = 0.f; ImageData[Index].ExtraDataOffset = -1; } } SparsePixelData.Reset(); WeightPlane.ZeroPlane(); } void FMaskOverlappedAccumulator::Reset() { PlaneSize.X = 0; PlaneSize.Y = 0; // Let the desctructor clean up ImageData.Reset(); SparsePixelData.Reset(); WeightPlane.Reset(); } void FMaskOverlappedAccumulator::AccumulatePixelData(const TArray& InLayerIds, const FColor* InPixelWeights, FIntPoint InPixelDataSize, FIntPoint InTileOffset, FVector2D InSubpixelOffset, const MoviePipeline::FTileWeight1D& WeightX, const MoviePipeline::FTileWeight1D& WeightY) { SCOPE_CYCLE_COUNTER(STAT_AccumulatePixelData); FIntPoint RawSize = InPixelDataSize; { TArray WeightDataX; TArray WeightDataY; WeightX.CalculateArrayWeight(WeightDataX, RawSize.X); WeightY.CalculateArrayWeight(WeightDataY, RawSize.Y); FIntPoint SubRectOffset(WeightX.X0, WeightY.X0); FIntPoint SubRectSize(WeightX.X3 - WeightX.X0, WeightY.X3 - WeightY.X0); // For a normal RGBA image we can add each channel in isolation and then divide by the total weight at the end. // Accumulation for masks are special, because we can't just blend each RGBA channel, as they're actually IDs and a blended ID changes which // object that pixel belongs to in the mask. So instead we need to store every influence per pixel, and the weight for that influence. This is // accomplished through a mix of dense data and sparse data storage. We store the first 6 unique influences per pixel, and then if there are // more than that we track those influences in a different array. AccumulateMultipleRanks(InLayerIds, InPixelWeights, RawSize, InTileOffset, InSubpixelOffset.X, InSubpixelOffset.Y, SubRectOffset, SubRectSize, WeightDataX, WeightDataY); // Accumulate weights as well. { TArray64 VecOne; VecOne.SetNum(RawSize.X * RawSize.Y); for (int32 Index = 0; Index < VecOne.Num(); Index++) { VecOne[Index] = 1.0f; } WeightPlane.AccumulateSinglePlane(VecOne, RawSize, InTileOffset, InSubpixelOffset.X, InSubpixelOffset.Y, SubRectOffset, SubRectSize, WeightDataX, WeightDataY); } } } void FMaskOverlappedAccumulator::AccumulatePixelData(float* InPixelData, FIntPoint InPixelDataSize, FIntPoint InTileOffset, FVector2D InSubpixelOffset, const MoviePipeline::FTileWeight1D& WeightX, const MoviePipeline::FTileWeight1D& WeightY) { SCOPE_CYCLE_COUNTER(STAT_AccumulatePixelData); FIntPoint RawSize = InPixelDataSize; { TArray WeightDataX; TArray WeightDataY; WeightX.CalculateArrayWeight(WeightDataX, RawSize.X); WeightY.CalculateArrayWeight(WeightDataY, RawSize.Y); FIntPoint SubRectOffset(WeightX.X0, WeightY.X0); FIntPoint SubRectSize(WeightX.X3 - WeightX.X0, WeightY.X3 - WeightY.X0); // For a normal RGBA image we can add each channel in isolation and then divide by the total weight at the end. // Accumulation for masks are special, because we can't just blend each RGBA channel, as they're actually IDs and a blended ID changes which // object that pixel belongs to in the mask. So instead we need to store every influence per pixel, and the weight for that influence. This is // accomplished through a mix of dense data and sparse data storage. We store the first 6 unique influences per pixel, and then if there are // more than that we track those influences in a different array. for (int32 RankIter = 0; RankIter < 1; RankIter += 2) { AccumulateSingleRank(InPixelData, RawSize, InTileOffset, InSubpixelOffset.X, InSubpixelOffset.Y, SubRectOffset, SubRectSize, WeightDataX, WeightDataY); } // Accumulate weights as well. { TArray64 VecOne; VecOne.SetNum(RawSize.X * RawSize.Y); for (int32 Index = 0; Index < VecOne.Num(); Index++) { VecOne[Index] = 1.0f; } WeightPlane.AccumulateSinglePlane(VecOne, RawSize, InTileOffset, InSubpixelOffset.X, InSubpixelOffset.Y, SubRectOffset, SubRectSize, WeightDataX, WeightDataY); } } } void FMaskOverlappedAccumulator::AccumulateMultipleRanks(const TArray& InRankIds, const FColor* InPixelWeights, FIntPoint InSize, FIntPoint InOffset, float SubpixelOffsetX, float SubpixelOffsetY, FIntPoint SubRectOffset, FIntPoint SubRectSize, const TArray& WeightDataX, const TArray& WeightDataY) { SCOPE_CYCLE_COUNTER(STAT_AccumulateMultiplePlanes); check(WeightDataX.Num() == InSize.X); check(WeightDataY.Num() == InSize.Y); check(SubpixelOffsetX >= 0.0f); check(SubpixelOffsetX <= 1.0f); check(SubpixelOffsetY >= 0.0f); check(SubpixelOffsetY <= 1.0f); check(SubRectSize.X >= 1); check(SubRectSize.Y >= 1); // if the subpixel offset is less than 0.5, go back one pixel int32 StartX = (SubpixelOffsetX >= 0.5 ? InOffset.X : InOffset.X - 1); int32 StartY = (SubpixelOffsetY >= 0.5 ? InOffset.Y : InOffset.Y - 1); // Build a weight table for each influence. The subpixel offset means that the new sample is added to four pixels, each with // a weight based on how far from the center on the x/y axis. float PixelWeight[2][2]; { // make sure that the equal sign is correct, if the subpixel offset is 0.5, the weightX is 0 and it starts on the center pixel float WeightX = FMath::Frac(SubpixelOffsetX + 0.5f); float WeightY = FMath::Frac(SubpixelOffsetY + 0.5f); // row, column PixelWeight[0][0] = (1.0f - WeightX) * (1.0f - WeightY); PixelWeight[0][1] = (WeightX) * (1.0f - WeightY); PixelWeight[1][0] = (1.0f - WeightX) * (WeightY); PixelWeight[1][1] = (WeightX) * (WeightY); } // This is the reversed reference. Instead of taking a tile and applying it to the accumulation, we start from the // accumulation point and figure out the source pixels that affect it. I.e. all the math is backwards, but it should // be faster. // // Given a position on the current tile (x,y), we apply the sum of the 4 points (x+0,y+0), (x+1,y+0), (x+0,y+1), (x+1,y+1) to the destination index. // So in reverse, given a destination position, our source sample positions are (x-1,y-1), (x+0,y-1), (x-1,y+0), (x+0,y+0). // That's why we make sure to start at a minimum of index 1, instead of 0. for (int32 CurrY = 0; CurrY < InSize.Y; CurrY++) { for (int32 CurrX = 0; CurrX < InSize.X; CurrX++) { int32 DstY = StartY + CurrY; int32 DstX = StartX + CurrX; if (DstX >= 0 && DstY >= 0 && DstX < PlaneSize.X && DstY < PlaneSize.Y) { for (int32 OffsetY = 0; OffsetY < 2; OffsetY++) { for (int32 OffsetX = 0; OffsetX < 2; OffsetX++) { int32 SrcX = FMath::Max(CurrX - 1 + OffsetX, 0); int32 SrcY = FMath::Max(CurrY - 1 + OffsetY, 0); for (int32 InRankIndex = 0; InRankIndex < InRankIds.Num(); InRankIndex++) { // The id value comes from the weight id table, float Val = InRankIds.GetData()[InRankIndex]; float WX = WeightDataX[SrcX]; float WY = WeightDataY[SrcY]; float BaseWeight = WX * WY; float SampleWeight = 0.f; switch (InRankIndex) { case 0: SampleWeight = InPixelWeights[SrcY * InSize.X + SrcX].R / 255.f; break; case 1: SampleWeight = InPixelWeights[SrcY * InSize.X + SrcX].G / 255.f; break; case 2: SampleWeight = InPixelWeights[SrcY * InSize.X + SrcX].B / 255.f; break; default: checkNoEntry(); } // To support falloff for overlapped images, two 1D masks are provided to this function which gives a [0,1] weight value for each pixel. Thus, // we add the full RGBA value to the total, and accumulate a weight plane as well which specifies how much influence each pixel actually recieves // as there are edge cases (such as the outer edges of the image) where they don't recieve a full weight due to no overlap. float Weight = BaseWeight * PixelWeight[1 - OffsetY][1 - OffsetX] * SampleWeight; if (Weight < SMALL_NUMBER) { continue; } int32 TargetIndex = DstY * PlaneSize.X + DstX; FMaskPixelSamples* CurrentSampleStack = &ImageData[TargetIndex]; bool bSuccess = false; while (!bSuccess) { for (int32 SampleIndex = 0; SampleIndex < 6; SampleIndex++) { if (CurrentSampleStack->Id[SampleIndex] == *(int32*)(&Val)) { // The ID has already been recorded on this pixel, so we just increase the weight. CurrentSampleStack->Weight[SampleIndex] += Weight; bSuccess = true; break; } else if (CurrentSampleStack->Id[SampleIndex] == -1) { // If this slot hasn't been used before, we can assume control. CurrentSampleStack->Weight[SampleIndex] = Weight; CurrentSampleStack->Id[SampleIndex] = *(int32*)(&Val); bSuccess = true; break; } } // If we didn't succeed in putting the pixel into the stack above then it is full. Now we need to search the sparse data. if (!bSuccess) { if (CurrentSampleStack->ExtraDataOffset < 0) { // Allocate a new sparse data block to hold onto the data for us. int32 NewIndex = SparsePixelData.AddDefaulted(); CurrentSampleStack->ExtraDataOffset = NewIndex; } // Repeat the for loop searching this new sparse block. CurrentSampleStack = SparsePixelData[CurrentSampleStack->ExtraDataOffset].Get(); } } } } } } } } } void FMaskOverlappedAccumulator::AccumulateSingleRank(const float* InRawData, FIntPoint InSize, FIntPoint InOffset, float SubpixelOffsetX, float SubpixelOffsetY, FIntPoint SubRectOffset, FIntPoint SubRectSize, const TArray& WeightDataX, const TArray& WeightDataY) { SCOPE_CYCLE_COUNTER(STAT_AccumulateSinglePlane); check(WeightDataX.Num() == InSize.X); check(WeightDataY.Num() == InSize.Y); check(SubpixelOffsetX >= 0.0f); check(SubpixelOffsetX <= 1.0f); check(SubpixelOffsetY >= 0.0f); check(SubpixelOffsetY <= 1.0f); check(SubRectSize.X >= 1); check(SubRectSize.Y >= 1); // if the subpixel offset is less than 0.5, go back one pixel int32 StartX = (SubpixelOffsetX >= 0.5 ? InOffset.X : InOffset.X - 1); int32 StartY = (SubpixelOffsetY >= 0.5 ? InOffset.Y : InOffset.Y - 1); // Build a weight table for each influence. The subpixel offset means that the new sample is added to four pixels, each with // a weight based on how far from the center on the x/y axis. float PixelWeight[2][2]; { // make sure that the equal sign is correct, if the subpixel offset is 0.5, the weightX is 0 and it starts on the center pixel float WeightX = FMath::Frac(SubpixelOffsetX + 0.5f); float WeightY = FMath::Frac(SubpixelOffsetY + 0.5f); // row, column PixelWeight[0][0] = (1.0f - WeightX) * (1.0f - WeightY); PixelWeight[0][1] = (WeightX) * (1.0f - WeightY); PixelWeight[1][0] = (1.0f - WeightX) * (WeightY); PixelWeight[1][1] = (WeightX) * (WeightY); } // For each pixel in the final image, we read the incoming image 4 times. This lets us do the accumulation in scanlines // because each destination pixel is only touched once. int32 ActualDstX0 = StartX + SubRectOffset.X; int32 ActualDstY0 = StartY + SubRectOffset.Y; int32 ActualDstX1 = StartX + SubRectOffset.X + SubRectSize.X; int32 ActualDstY1 = StartY + SubRectOffset.Y + SubRectSize.Y; ActualDstX0 = FMath::Clamp(ActualDstX0, 0, PlaneSize.X); ActualDstX1 = FMath::Clamp(ActualDstX1, 0, PlaneSize.X); ActualDstY0 = FMath::Clamp(ActualDstY0, 0, PlaneSize.Y); ActualDstY1 = FMath::Clamp(ActualDstY1, 0, PlaneSize.Y); const float PixelWeight00 = PixelWeight[0][0]; const float PixelWeight01 = PixelWeight[0][1]; const float PixelWeight10 = PixelWeight[1][0]; const float PixelWeight11 = PixelWeight[1][1]; // Mutex to prevent multiple threads from trying to allocate SparsePixelData at the same time. FCriticalSection SparsePixelDataCS; int32 NumScanlines = ActualDstY1 - ActualDstY0; ParallelFor(NumScanlines, [&](int32 InScanlineIndex = 0) { int32 DstY = InScanlineIndex + ActualDstY0; int32 CurrY = DstY - StartY; int32 SrcY0 = FMath::Max(0, CurrY + (0 - 1)); int32 SrcY1 = FMath::Max(0, CurrY + (1 - 1)); const float* SrcLineWeight = WeightDataX.GetData(); const float RowWeight0 = WeightDataY[SrcY0]; const float RowWeight1 = WeightDataY[SrcY1]; const float* SrcLineRaw0 = &InRawData[SrcY0 * InSize.X]; const float* SrcLineRaw1 = &InRawData[SrcY1 * InSize.X]; for (int32 DstX = ActualDstX0; DstX < ActualDstX1; DstX++) { // For each X pixel in the destination image, we need to get the four ids that will contribute to this pixel from our incoming image. struct FRank { float Id; float Weight; }; int32 CurrX = DstX - StartX; int32 X0 = FMath::Max(0, CurrX - 1 + 0); int32 X1 = FMath::Max(0, CurrX - 1 + 1); FRank NewData[4]; NewData[0].Id = SrcLineRaw0[X0]; NewData[0].Weight = RowWeight0 * SrcLineWeight[X0] * PixelWeight[1][1]; NewData[1].Id = SrcLineRaw0[X1]; NewData[1].Weight = RowWeight0 * SrcLineWeight[X1] * PixelWeight[1][0]; NewData[2].Id = SrcLineRaw1[X0]; NewData[2].Weight = RowWeight1 * SrcLineWeight[X0] * PixelWeight[0][1]; NewData[3].Id = SrcLineRaw1[X1]; NewData[3].Weight = RowWeight1 * SrcLineWeight[X1] * PixelWeight[0][0]; // Now that we have all 4, we can find their ids in our destination pixel and add them + the correct weight. for (int32 NewDataIndex = 0; NewDataIndex < UE_ARRAY_COUNT(NewData); NewDataIndex++) { if (NewData[NewDataIndex].Weight < SMALL_NUMBER) { continue; } const int32 TargetIndex = DstY * PlaneSize.X + DstX; FMaskPixelSamples* CurrentSampleStack = &ImageData[TargetIndex]; bool bSuccess = false; while (!bSuccess) { for (int32 SampleIndex = 0; SampleIndex < 6; SampleIndex++) { if (CurrentSampleStack->Id[SampleIndex] == *(int32*)(&NewData[NewDataIndex].Id)) { // The ID has already been recorded on this pixel, so we just increase the weight. CurrentSampleStack->Weight[SampleIndex] += NewData[NewDataIndex].Weight; bSuccess = true; break; } else if (CurrentSampleStack->Id[SampleIndex] == -1) { // If this slot hasn't been used before, we can assume control. CurrentSampleStack->Weight[SampleIndex] = NewData[NewDataIndex].Weight; CurrentSampleStack->Id[SampleIndex] = *(int32*)(&NewData[NewDataIndex].Id); bSuccess = true; break; } } // If we didn't succeed in putting the pixel into the stack above then it is full. Now we need to search the sparse data. if (!bSuccess) { // Don't allow another thread to allocate extra data while we're allocating data as we don't // want the indexes messed up. FScopeLock ScopeLock(&SparsePixelDataCS); if (CurrentSampleStack->ExtraDataOffset < 0) { // Allocate a new sparse data block to hold onto the data for us. TSharedPtr NewSampleData = MakeShared(); int32 NewIndex = SparsePixelData.Add(NewSampleData); CurrentSampleStack->ExtraDataOffset = NewIndex; } // Repeat the for loop searching this new sparse block. This is a pointer to the memory the TSharedPtr // owns, so it's okay if another thread causes a reallocation fo the SparsePixelData array, the destination // memory won't change. CurrentSampleStack = SparsePixelData[CurrentSampleStack->ExtraDataOffset].Get(); } } } } }); } void FMaskOverlappedAccumulator::FetchFinalPixelDataLinearColor(TArray>& OutPixelDataLayers) const { SCOPE_CYCLE_COUNTER(STAT_FetchFinalPixelData32bit); int32 FullSizeX = PlaneSize.X; int32 FullSizeY = PlaneSize.Y; for (TArray64& Layer : OutPixelDataLayers) { Layer.SetNumUninitialized(FullSizeX * FullSizeY); } struct FRank { FRank() : Id(0) , Weight(0.f) {} int32 Id; float Weight; }; // Process them in scanlines since we're reading shared data and outputting to distinct indexes ParallelFor(FullSizeY, [&](int32 FullY = 0) { TArray Ranks; for (int32 FullX = 0L; FullX < FullSizeX; FullX++) { Ranks.Reset(); // be careful with this index, make sure to use 64bit math, not 32bit int64 DstIndex = int64(FullY) * int64(FullSizeX) + int64(FullX); // We look at the linked list of samples per pixel, and then sort them by weight so we can take the top influences. FMaskPixelSamples const* CurrentSampleStack = &ImageData[FullY * PlaneSize.X + FullX]; while (CurrentSampleStack) { for (int32 Index = 0; Index < 6; Index++) { // They are inserted in order so the first invalid id means no more. if (CurrentSampleStack->Id[Index] == -1) { break; } FRank& Rank = Ranks.AddDefaulted_GetRef(); Rank.Id = CurrentSampleStack->Id[Index]; Rank.Weight = CurrentSampleStack->Weight[Index]; } if (CurrentSampleStack->ExtraDataOffset >= 0) { CurrentSampleStack = SparsePixelData[CurrentSampleStack->ExtraDataOffset].Get(); } else { CurrentSampleStack = nullptr; } } // We now have a list of ranks, we can sort them. Ranks.Sort([](const FRank& A, const FRank& B) { return B.Weight < A.Weight; }); // Pad this up to the number of outputs we have. for (int32 Index = Ranks.Num(); Index < (OutPixelDataLayers.Num() * 2); Index++) { Ranks.AddDefaulted(); } // Normalize the weights float TotalWeight = 0.0f; { for (int32 Index = 0; Index < Ranks.Num(); Index++) { TotalWeight += Ranks[Index].Weight; } } // Shouldn't get a zero weight unless these have been called out of order, but better odd behavior than crash. if(TotalWeight <= 0.f) { TotalWeight = KINDA_SMALL_NUMBER; } for (int32 LayerIndex = 0; LayerIndex < OutPixelDataLayers.Num(); LayerIndex++) { FLinearColor& Color = OutPixelDataLayers[LayerIndex][DstIndex]; for (int32 ChannelIndex = 0; ChannelIndex < 2; ChannelIndex++) { int32 RankIndex = (LayerIndex * 2) + ChannelIndex; // R, B Color.Component((ChannelIndex * 2) + 0) = *(float*)(&Ranks[RankIndex].Id); float RawWeight = WeightPlane.ChannelData[FullY * FullSizeX + FullX]; float Scale = 1.0f / (FMath::Max(RawWeight, 0.0001f) * TotalWeight); // G, A Color.Component((ChannelIndex * 2) + 1) = Ranks[RankIndex].Weight / TotalWeight; } } } }); } }