LuckyWorld/Plugins/Marketplace/DLSS/Shaders/Private/VelocityCombine.usf

/*
* Copyright (c) 2020 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/

#include "/Engine/Private/Common.ush"
#include "/Engine/Private/FastMath.ush"
#include "/Engine/Private/ScreenPass.ush"

#ifndef THREADGROUP_SIZEX
#define THREADGROUP_SIZEX		8
#endif
#ifndef THREADGROUP_SIZEY
#define THREADGROUP_SIZEY		8
#endif
#define THREADGROUP_TOTALSIZE	(THREADGROUP_SIZEX * THREADGROUP_SIZEY)

#ifndef DILATE_MOTION_VECTORS
#define DILATE_MOTION_VECTORS 0
#endif

#if DILATE_MOTION_VECTORS
#define AA_CROSS 1
float2 TemporalJitterPixels;
#else
#endif

Texture2D VelocityTexture;
SamplerState VelocityTextureSampler;
SCREEN_PASS_TEXTURE_VIEWPORT(Velocity)

Texture2D DepthTexture;
SamplerState DepthTextureSampler;

Texture2D<float2> AlternateMotionVectorsTexture;

RWTexture2D<float2>	OutVelocityCombinedTexture;
SCREEN_PASS_TEXTURE_VIEWPORT(CombinedVelocity)

[numthreads(THREADGROUP_SIZEX, THREADGROUP_SIZEY, 1)]
void VelocityCombineMain(
	uint2 GroupId : SV_GroupID,
	uint2 DispatchThreadId : SV_DispatchThreadID,
	uint2 GroupThreadId : SV_GroupThreadID,
	uint GroupIndex : SV_GroupIndex)
{
	const bool bInsideViewport = all(DispatchThreadId.xy < CombinedVelocity_ViewportSize);

	BRANCH
	if (!bInsideViewport)
	{
		return;
	}

	// CombinedVelocity_ViewportMin is expected to be 0, but in case it is not
	uint2 OutputPixelPos = CombinedVelocity_ViewportMin + DispatchThreadId;

#if DILATE_MOTION_VECTORS // TODO: 2x2.

	// Screen position of minimum depth.
	float2 VelocityOffset = float2(0.0, 0.0);

	const float2 ViewportUV = (float2(DispatchThreadId) + 0.5f) * CombinedVelocity_ViewportSizeInverse;

	// Pixel coordinate of the center of output pixel O in the input viewport.
	const float2 PPCo = ViewportUV * Velocity_ViewportSize + TemporalJitterPixels;

	// Pixel coordinate of the center of the nearest input pixel K.
	const float2 PPCk = floor(PPCo) + 0.5;

	const float2 NearestBufferUV = Velocity_ExtentInverse * (Velocity_ViewportMin + PPCk);

	// FIND MOTION OF PIXEL AND NEAREST IN NEIGHBORHOOD
	// ------------------------------------------------
	float3 PosN; // Position of this pixel, possibly later nearest pixel in neighborhood.
	PosN.xy = ViewportUVToScreenPos(ViewportUV);
	PosN.z = DepthTexture.SampleLevel(DepthTextureSampler, NearestBufferUV, 0).x;

	{
		// For motion vector, use camera/dynamic motion from min depth pixel in pattern around pixel.
		// This enables better quality outline on foreground against different motion background.
		// Larger 2 pixel distance "x" works best (because AA dilates surface).
		float4 Depths;
		Depths.x = DepthTexture.SampleLevel(DepthTextureSampler, NearestBufferUV, 0, int2(-AA_CROSS, -AA_CROSS)).x;
		Depths.y = DepthTexture.SampleLevel(DepthTextureSampler, NearestBufferUV, 0, int2(AA_CROSS, -AA_CROSS)).x;
		Depths.z = DepthTexture.SampleLevel(DepthTextureSampler, NearestBufferUV, 0, int2(-AA_CROSS, AA_CROSS)).x;
		Depths.w = DepthTexture.SampleLevel(DepthTextureSampler, NearestBufferUV, 0, int2(AA_CROSS, AA_CROSS)).x;

		float2 DepthOffset = float2(AA_CROSS, AA_CROSS);
		float DepthOffsetXx = float(AA_CROSS);
#if HAS_INVERTED_Z_BUFFER
		// Nearest depth is the largest depth (depth surface 0=far, 1=near).
		if (Depths.x > Depths.y)
		{
			DepthOffsetXx = -AA_CROSS;
		}
		if (Depths.z > Depths.w)
		{
			DepthOffset.x = -AA_CROSS;
		}
		float DepthsXY = max(Depths.x, Depths.y);
		float DepthsZW = max(Depths.z, Depths.w);
		if (DepthsXY > DepthsZW)
		{
			DepthOffset.y = -AA_CROSS;
			DepthOffset.x = DepthOffsetXx;
		}
		float DepthsXYZW = max(DepthsXY, DepthsZW);
		if (DepthsXYZW > PosN.z)
		{
			// This is offset for reading from velocity texture.
			// This supports half or fractional resolution velocity textures.
			// With the assumption that UV position scales between velocity and color.
			VelocityOffset = DepthOffset * Velocity_ExtentInverse;
			// This is [0 to 1] flipped in Y.
			//PosN.xy = ScreenPos + DepthOffset * ViewportSize.zw * 2.0;
			PosN.z = DepthsXYZW;
		}
#else // !HAS_INVERTED_Z_BUFFER
#error Fix me!
#endif // !HAS_INVERTED_Z_BUFFER
	}
	// Camera motion for pixel or nearest pixel (in ScreenPos space).
	bool OffScreen = false;
	float Velocity = 0;
	float HistoryBlur = 0;

	float4 ThisClip = float4(PosN.xy, PosN.z, 1);
	float4 PrevClip = mul(ThisClip, View.ClipToPrevClip);
	float2 PrevScreen = PrevClip.xy / PrevClip.w;
	float2 BackN = PosN.xy - PrevScreen;

	float2 BackTemp = BackN * Velocity_ViewportSize;

	float4 VelocityN = VelocityTexture.SampleLevel(VelocityTextureSampler, NearestBufferUV + VelocityOffset, 0);
	bool DynamicN = VelocityN.x > 0.0;
	if (DynamicN)
	{
		BackN = DecodeVelocityFromTexture(VelocityN).xy;
	}
	BackTemp = BackN * CombinedVelocity_ViewportSize;
	OutVelocityCombinedTexture[OutputPixelPos].xy = -BackTemp * float2(0.5, -0.5);
#else
	const uint2 PixelPos = DispatchThreadId + Velocity_ViewportMin;
	float4 EncodedVelocity = VelocityTexture[PixelPos];
	float Depth = DepthTexture[PixelPos].x;

	float2 Velocity = 0.0f;

	if (EncodedVelocity.x > 0.0f)
	{
		Velocity = DecodeVelocityFromTexture(EncodedVelocity).xy;
	}
	else
	{
		float4 ClipPos;
		ClipPos.xy = SvPositionToScreenPosition(float4(PixelPos.xy + 0.5f, 0.0f, 1.0f)).xy;
		ClipPos.z = Depth;
		ClipPos.w = 1;

		float4 PrevClipPos = mul(ClipPos, View.ClipToPrevClip);

		if (PrevClipPos.w > 0)
		{
			float2 PrevScreen = PrevClipPos.xy / PrevClipPos.w;
			Velocity = ClipPos.xy - PrevScreen.xy;
		}
	}

	float2 OutVelocity = Velocity * float2(0.5, -0.5) * CombinedVelocity_ViewportSize;

#if SUPPORT_ALTERNATE_MOTION_VECTOR
	const float2 EncodedAltVelocity = AlternateMotionVectorsTexture[PixelPos];

	if (EncodedAltVelocity.x > 0.0f)
	{
		float2 DecodedVelocity = DecodeVelocityFromTexture(float4(EncodedAltVelocity, 0.0f, 0.0f)).xy;

		// we encode in the orientation DLSS expects, so the extra negate it to make them consistent with the ones
		// generated above
		OutVelocity = -1.0f * DecodedVelocity * CombinedVelocity_ViewportSize;
	}
#endif

	OutVelocityCombinedTexture[OutputPixelPos].xy = -OutVelocity;
#endif
}