new plugin sent

This commit is contained in:
gurkan01 2025-04-02 22:53:47 +03:00
parent 7c2bd7b4c3
commit 028015b4e5
1963 changed files with 934304 additions and 0 deletions

View File

@ -0,0 +1,11 @@
[FilterPlugin]
; This section lists additional files which will be packaged along with your plugin. Paths should be listed relative to the root plugin directory, and
; may include "...", "*", and "?" wildcards to match directories, files, and individual characters respectively.
;
; Examples:
; /README.txt
; /Extras/...
; /Binaries/ThirdParty/*.dll
/Config/...

View File

@ -0,0 +1,33 @@
{
"FileVersion": 3,
"Version": 5,
"VersionName": "4.0.0",
"FriendlyName": "NVIDIA Image Scaling (NIS)",
"Description": "NVIDIA Image Scaling boosts frame rates using GPU scaling and sharpening.",
"Category": "Rendering",
"CreatedBy": "NVIDIA",
"CreatedByURL": "https://developer.nvidia.com/image-scaling",
"DocsURL": "",
"MarketplaceURL": "https://www.unrealengine.com/marketplace/en-US/product/nvidia-dlss",
"SupportURL": "mailto:DLSS-Support@nvidia.com",
"EngineVersion": "5.5.0",
"CanContainContent": false,
"Installed": true,
"Modules": [
{
"Name": "NISCore",
"Type": "Runtime",
"LoadingPhase": "PostEngineInit"
},
{
"Name": "NISShaders",
"Type": "Runtime",
"LoadingPhase": "PostConfigInit"
},
{
"Name": "NISBlueprint",
"Type": "Runtime",
"LoadingPhase": "PostConfigInit"
}
]
}

View File

@ -0,0 +1,103 @@
// The MIT License(MIT)
//
// Copyright(c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of
// this software and associated documentation files(the "Software"), to deal in
// the Software without restriction, including without limitation the rights to
// use, copy, modify, merge, publish, distribute, sublicense, and / or sell copies of
// the Software, and to permit persons to whom the Software is furnished to do so,
// subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE AUTHORS OR
// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
// IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
// CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "/Engine/Public/Platform.ush"
#define NIS_HLSL 1
#define NIS_GLSL 0
#define NIS_UNROLL UNROLL
#ifndef NIS_SCALER
#define NIS_SCALER 1
#endif
#ifndef NIS_DXC
#define NIS_DXC 0
#endif
#if NIS_DXC
#define NIS_PUSH_CONSTANT [[vk::push_constant]]
#define NIS_BINDING(bindingIndex) [[vk::binding(bindingIndex, 0)]]
#else
#define NIS_PUSH_CONSTANT
#define NIS_BINDING(bindingIndex)
#endif
//NIS_BINDING(0) cbuffer cb : register(b0)
//{
float kDetectRatio;
float kDetectThres;
float kMinContrastRatio;
float kRatioNorm;
float kContrastBoost;
float kEps;
float kSharpStartY;
float kSharpScaleY;
float kSharpStrengthMin;
float kSharpStrengthScale;
float kSharpLimitMin;
float kSharpLimitScale;
float kScaleX;
float kScaleY;
float kDstNormX;
float kDstNormY;
float kSrcNormX;
float kSrcNormY;
uint kInputViewportOriginX;
uint kInputViewportOriginY;
uint kInputViewportWidth;
uint kInputViewportHeight;
uint kOutputViewportOriginX;
uint kOutputViewportOriginY;
uint kOutputViewportWidth;
uint kOutputViewportHeight;
float reserved0;
float reserved1;
//};
NIS_BINDING(1) SamplerState samplerLinearClamp/* : register(s0)*/;
NIS_BINDING(2) Texture2D in_texture /*: register(t0)*/;
NIS_BINDING(3) RWTexture2D<float4> out_texture /*: register(u0)*/;
#if NIS_SCALER
NIS_BINDING(4) Texture2D coef_scaler /*: register(t1)*/;
NIS_BINDING(5) Texture2D coef_usm /*: register(t2)*/;
#endif
#include "/ThirdParty/Plugin/NIS/NIS_Scaler.h"
[numthreads(NIS_THREAD_GROUP_SIZE, 1, 1)]
void main(uint3 blockIdx : SV_GroupID, uint3 threadIdx : SV_GroupThreadID)
{
#if NIS_SCALER
NVScaler(blockIdx.xy, threadIdx.x);
#else
NVSharpen(blockIdx.xy, threadIdx.x);
#endif
}

View File

@ -0,0 +1,99 @@
// The MIT License(MIT)
//
// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of
// this software and associated documentation files(the "Software"), to deal in
// the Software without restriction, including without limitation the rights to
// use, copy, modify, merge, publish, distribute, sublicense, and / or sell copies of
// the Software, and to permit persons to whom the Software is furnished to do so,
// subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE AUTHORS OR
// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
// IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
// CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
//---------------------------------------------------------------------------------
// NVIDIA Image Scaling SDK - v1.0.3
//---------------------------------------------------------------------------------
// GLSL main example
//---------------------------------------------------------------------------------
#version 450
#extension GL_ARB_separate_shader_objects : enable
#extension GL_ARB_shading_language_420pack : enable
#extension GL_GOOGLE_include_directive : enable
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_shader_explicit_arithmetic_types : require
#define NIS_GLSL 1
#ifndef NIS_SCALER
#define NIS_SCALER 1
#endif
layout(set=0,binding=0) uniform const_buffer
{
float kDetectRatio;
float kDetectThres;
float kMinContrastRatio;
float kRatioNorm;
float kContrastBoost;
float kEps;
float kSharpStartY;
float kSharpScaleY;
float kSharpStrengthMin;
float kSharpStrengthScale;
float kSharpLimitMin;
float kSharpLimitScale;
float kScaleX;
float kScaleY;
float kDstNormX;
float kDstNormY;
float kSrcNormX;
float kSrcNormY;
uint kInputViewportOriginX;
uint kInputViewportOriginY;
uint kInputViewportWidth;
uint kInputViewportHeight;
uint kOutputViewportOriginX;
uint kOutputViewportOriginY;
uint kOutputViewportWidth;
uint kOutputViewportHeight;
float reserved0;
float reserved1;
};
layout(set=0,binding=1) uniform sampler samplerLinearClamp;
layout(set=0,binding=2) uniform texture2D in_texture;
layout(set=0,binding=3) uniform writeonly image2D out_texture;
#if NIS_SCALER
layout(set=0,binding=4) uniform texture2D coef_scaler;
layout(set=0,binding=5) uniform texture2D coef_usm;
#endif
#include "NIS_Scaler.h"
layout(local_size_x=NIS_THREAD_GROUP_SIZE) in;
void main()
{
#if NIS_SCALER
NVScaler(gl_WorkGroupID.xy, gl_LocalInvocationID.x);
#else
NVSharpen(gl_WorkGroupID.xy, gl_LocalInvocationID.x);
#endif
}

View File

@ -0,0 +1,111 @@
// The MIT License(MIT)
//
// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of
// this software and associated documentation files(the "Software"), to deal in
// the Software without restriction, including without limitation the rights to
// use, copy, modify, merge, publish, distribute, sublicense, and / or sell copies of
// the Software, and to permit persons to whom the Software is furnished to do so,
// subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE AUTHORS OR
// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
// IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
// CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
//---------------------------------------------------------------------------------
// NVIDIA Image Scaling SDK - v1.0.3
//---------------------------------------------------------------------------------
// HLSL main example
//---------------------------------------------------------------------------------
#define NIS_HLSL 1
#ifndef NIS_SCALER
#define NIS_SCALER 1
#endif
#ifndef NIS_DXC
#define NIS_DXC 0
#endif
#if NIS_DXC
#define NIS_PUSH_CONSTANT [[vk::push_constant]]
#define NIS_BINDING(bindingIndex) [[vk::binding(bindingIndex, 0)]]
#else
#define NIS_PUSH_CONSTANT
#define NIS_BINDING(bindingIndex)
#endif
NIS_BINDING(0) cbuffer cb : register(b0)
{
float kDetectRatio;
float kDetectThres;
float kMinContrastRatio;
float kRatioNorm;
float kContrastBoost;
float kEps;
float kSharpStartY;
float kSharpScaleY;
float kSharpStrengthMin;
float kSharpStrengthScale;
float kSharpLimitMin;
float kSharpLimitScale;
float kScaleX;
float kScaleY;
float kDstNormX;
float kDstNormY;
float kSrcNormX;
float kSrcNormY;
uint kInputViewportOriginX;
uint kInputViewportOriginY;
uint kInputViewportWidth;
uint kInputViewportHeight;
uint kOutputViewportOriginX;
uint kOutputViewportOriginY;
uint kOutputViewportWidth;
uint kOutputViewportHeight;
float reserved0;
float reserved1;
};
NIS_BINDING(1) SamplerState samplerLinearClamp : register(s0);
#if NIS_NV12_SUPPORT
NIS_BINDING(2) Texture2D<float> in_texture_y : register(t0);
NIS_BINDING(2) Texture2D<float2> in_texture_uv : register(t3);
#else
NIS_BINDING(2) Texture2D in_texture : register(t0);
#endif
NIS_BINDING(3) RWTexture2D<float4> out_texture : register(u0);
#if NIS_SCALER
NIS_BINDING(4) Texture2D coef_scaler : register(t1);
NIS_BINDING(5) Texture2D coef_usm : register(t2);
#endif
#include "NIS_Scaler.h"
[numthreads(NIS_THREAD_GROUP_SIZE, 1, 1)]
void main(uint3 blockIdx : SV_GroupID, uint3 threadIdx : SV_GroupThreadID)
{
#if NIS_SCALER
NVScaler(blockIdx.xy, threadIdx.x);
#else
NVSharpen(blockIdx.xy, threadIdx.x);
#endif
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,57 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
using UnrealBuildTool;
using System.IO;
public class NISBlueprint : ModuleRules
{
public NISBlueprint(ReadOnlyTargetRules Target) : base(Target)
{
PCHUsage = ModuleRules.PCHUsageMode.UseExplicitOrSharedPCHs;
PrivateDependencyModuleNames.AddRange(
new string[]
{
"Core",
"CoreUObject",
"Engine",
"RenderCore",
"Renderer",
"Projects",
}
);
PublicDependencyModuleNames.AddRange(
new string[]
{
//"Core",
//"RenderCore",
//"Renderer",
"NISShaders",
"RHI",
}
);
PrivateIncludePaths.AddRange(
new string[] {
EngineDirectory + "/Source/Runtime/Renderer/Private",
// ... add other private include paths required here ...
}
);
}
}

View File

@ -0,0 +1,231 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#include "NISLibrary.h"
#include "NISShaders.h"
#include "Modules/ModuleManager.h"
#include "Interfaces/IPluginManager.h"
#include "ShaderCore.h"
#include "PostProcess/TemporalAA.h"
#include "Runtime/Launch/Resources/Version.h"
#define LOCTEXT_NAMESPACE "FNISBlueprintModule"
DEFINE_LOG_CATEGORY_STATIC(LogNISBlueprint, Log, All);
static const FName SetNISModeInvalidEnumValueError= FName("SetNISModeInvalidEnumValueError");
static const FName IsNISModeSupportedInvalidEnumValueError = FName("IsNISModeSupportedInvalidEnumValueError");
UNISSupport UNISLibrary::NISSupport = UNISSupport::Supported;
FNISUpscaler* UNISLibrary::NISUpscaler = nullptr;
float UNISLibrary::SavedCustomScreenPercentage = 100.0f;
bool UNISLibrary::bIsCustomMode = false;
static TAutoConsoleVariable<int32> CVarNISUpscalingAutomaticMipMapLODBias(
TEXT("r.NIS.Upscaling.AutomaticMipMapLODBias"),
1,
TEXT("Enable automatic setting of r.MipMapLODBias based on the effective NIS screen percentage (default=1)\n")
TEXT("NOTE: This is only applied when using the UNISLibrary::SetNISMode blueprint function."),
ECVF_RenderThreadSafe);
static TAutoConsoleVariable<float> CVarNISUpscalingAutomaticMipMapLODBiasOffset(
TEXT("r.NIS.Upscaling.AutomaticMipMapLODBias.Offset"),
-0.3f,
TEXT("Allows offsetting the automatic resolution dependent mip map LOD bias by this amount (default=0)\n")
TEXT("NOTE: This is only applied when using the UNISLibrary::SetNISMode blueprint function."),
ECVF_RenderThreadSafe);
bool UNISLibrary::IsNISModeSupported(UNISMode NISMode)
{
return true;
}
void UNISLibrary::GetNISScreenPercentageRange(float& MinScreenPercentage, float& MaxScreenPercentage)
{
if (IsNISSupported())
{
MinScreenPercentage = 100.0f * 0.5f;
MaxScreenPercentage = 100.0f * 1.0f;
}
else
{
#if ENGINE_MAJOR_VERSION == 5 && ENGINE_MINOR_VERSION >= 3
MinScreenPercentage = 100.0f * ISceneViewFamilyScreenPercentage::kMinTAAUpsampleResolutionFraction;
MaxScreenPercentage = 100.0f * ISceneViewFamilyScreenPercentage::kMaxTAAUpsampleResolutionFraction;
#else
MinScreenPercentage = 100.0f * ITemporalUpscaler::GetDefaultTemporalUpscaler()->GetMinUpsampleResolutionFraction();
MaxScreenPercentage = 100.0f * ITemporalUpscaler::GetDefaultTemporalUpscaler()->GetMaxUpsampleResolutionFraction();
#endif
}
}
TArray<UNISMode> UNISLibrary::GetSupportedNISModes()
{
TArray<UNISMode> SupportedQualityModes;
{
const UEnum* Enum = StaticEnum<UNISMode>();
for (int32 EnumIndex = 0; EnumIndex < Enum->NumEnums(); ++EnumIndex)
{
const int64 EnumValue = Enum->GetValueByIndex(EnumIndex);
if (EnumValue != Enum->GetMaxEnumValue())
{
const UNISMode QualityMode = UNISMode(EnumValue);
if (IsNISModeSupported(QualityMode))
{
SupportedQualityModes.Add(QualityMode);
}
}
}
}
return SupportedQualityModes;
}
bool UNISLibrary::IsNISSupported()
{
return GMaxRHIFeatureLevel >= GetNISMinRequiredFeatureLevel();
}
float UNISLibrary::GetNISRecommendedScreenPercentage(UNISMode NISMode)
{
switch (NISMode)
{
default:
checkf(false, TEXT("dear NIS plugin NVIDIA developer, please update this code to handle the new enum values"));
case UNISMode::Off:
return 1.0f;
case UNISMode::UltraQuality:
return 77.0f;
case UNISMode::Quality:
return 66.667f;
case UNISMode::Balanced:
return 59.0f;
case UNISMode::Performance:
return 50.0;
case UNISMode::Custom:
return SavedCustomScreenPercentage;
break;
}
}
void UNISLibrary::SetNISMode(UNISMode NISMode)
{
const UEnum* Enum = StaticEnum<UNISMode>();
// UEnums are strongly typed, but then one can also cast a byte to an UEnum ...
if(Enum->IsValidEnumValue(int64(NISMode)) && (Enum->GetMaxEnumValue() != int64(NISMode)))
{
static auto CVarNISEnable = IConsoleManager::Get().FindConsoleVariable(TEXT("r.NIS.Enable"));
static auto CVarNISUpscalingEnable = IConsoleManager::Get().FindConsoleVariable(TEXT("r.NIS.Upscaling"));
// Save whether we're in custom mode so we can do the right thing when custom screen percentage changes
bIsCustomMode = UNISMode::Custom == NISMode;
// r.NIS.Enable might be set to 0 via a hotfix so set r.NIS.Enable to 0 too (in case it might come from saved settings)
const bool bNISEnabled = CVarNISEnable && CVarNISEnable->GetInt();
if (ensure(CVarNISUpscalingEnable) && ensure(CVarNISEnable))
{
const bool bNISUpscalingEnabled = bNISEnabled && ( NISMode != UNISMode::Off);
CVarNISUpscalingEnable->Set(bNISUpscalingEnabled ? 1 : 0, ECVF_SetByCommandline);
if(bNISUpscalingEnabled)
{
// Temporal upscalers such as DLSS might set this to 1, but we need r.TemporalAA.Upscaler to be 0 for NIS to work.
static const auto CVarTemporalAAUpscaler = IConsoleManager::Get().FindConsoleVariable(TEXT("r.TemporalAA.Upscaler"));
CVarTemporalAAUpscaler->SetWithCurrentPriority(bNISUpscalingEnabled ? 0 : 1);
static const auto CVarTemporalAAUpsampling = IConsoleManager::Get().FindConsoleVariable(TEXT("r.TemporalAA.Upsampling"));
CVarTemporalAAUpsampling->SetWithCurrentPriority(bNISUpscalingEnabled ? 0 : 1);
static auto CVarScreenPercentage = IConsoleManager::Get().FindConsoleVariable(TEXT("r.ScreenPercentage"));
if (ensure(CVarScreenPercentage))
{
const float ScreenPercentage = GetNISRecommendedScreenPercentage(NISMode);
CVarScreenPercentage->SetWithCurrentPriority(ScreenPercentage);
if (CVarNISUpscalingAutomaticMipMapLODBias.GetValueOnAnyThread())
{
static auto CVarMipMapLodBias = IConsoleManager::Get().FindConsoleVariable(TEXT("r.MipMapLODBias"));
if (ensure(CVarMipMapLodBias))
{
const float EffectivePrimaryResolutionFraction = ScreenPercentage * 0.01f;
const float MipBias = FMath::Log2(EffectivePrimaryResolutionFraction) + CVarNISUpscalingAutomaticMipMapLODBiasOffset.GetValueOnAnyThread();
CVarMipMapLodBias->SetWithCurrentPriority(MipBias);
}
}
}
}
}
}
else
{
#if !UE_BUILD_SHIPPING
FFrame::KismetExecutionMessage(*FString::Printf(
TEXT("SetNISMode should not be called with an invalid NISMode enum value (%d) \"%s\""),
int64(NISMode), *StaticEnum<UNISMode>()->GetDisplayNameTextByValue(int64(NISMode)).ToString()),
ELogVerbosity::Error, SetNISModeInvalidEnumValueError);
#endif
}
}
void UNISLibrary::SetNISCustomScreenPercentage(float CustomScreenPercentage)
{
SavedCustomScreenPercentage = CustomScreenPercentage;
// Custom screen percentage has changed, so if we're in Custom mode we should run the set mode logic again
if (bIsCustomMode)
{
SetNISMode(UNISMode::Custom);
}
}
void UNISLibrary::SetNISSharpness(float Sharpness)
{
static const auto CVarNISharpness = IConsoleManager::Get().FindConsoleVariable(TEXT("r.NIS.Sharpness"));
if (CVarNISharpness)
{
// Quantize here so we can have sharpness snap to 0, which downstream is used to turn off sharpening
// CVarNISharpness->Set(Sharpness, ECVF_SetByCommandline) internally uses Set(*FString::Printf(TEXT("%g"), InValue), SetBy);
CVarNISharpness->Set(*FString::Printf(TEXT("%2.2f"), Sharpness), ECVF_SetByCommandline);
}
}
UNISMode UNISLibrary::GetDefaultNISMode()
{
if (UNISLibrary::IsNISSupported())
{
return UNISMode::UltraQuality;
}
else
{
return UNISMode::Off;
}
}
void FNISBlueprintModule::StartupModule()
{
}
void FNISBlueprintModule::ShutdownModule()
{
}
#undef LOCTEXT_NAMESPACE
IMPLEMENT_MODULE(FNISBlueprintModule, NISBlueprint)

View File

@ -0,0 +1,105 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#pragma once
#include "Modules/ModuleManager.h"
#include "UObject/ObjectMacros.h"
#include "UObject/Object.h"
#include "Kismet/BlueprintFunctionLibrary.h"
#include "Misc/CoreDelegates.h"
#include "NISLibrary.generated.h"
class FNISUpscaler;
class FDelegateHandle;
UENUM(BlueprintType)
enum class UNISSupport : uint8
{
Supported UMETA(DisplayName = "Supported"),
NotSupported UMETA(DisplayName = "Not Supported due to insufficient RHI Feature Level"),
};
UENUM(BlueprintType)
enum class UNISMode : uint8
{
Off UMETA(DisplayName = "Off"),
UltraQuality UMETA(DisplayName = "Ultra Quality"),
Quality UMETA(DisplayName = "Quality"),
Balanced UMETA(DisplayName = "Balanced"),
Performance UMETA(DisplayName = "Performance"),
Custom UMETA(DisplayName = "Custom")
};
UCLASS(MinimalAPI)
class UNISLibrary : public UBlueprintFunctionLibrary
{
friend class FNISBlueprintModule;
GENERATED_BODY()
public:
/** Checks whether NIS is supported by the current GPU. Further details can be retrieved via QueryNISSupport*/
UFUNCTION(BlueprintPure, Category = "NIS", meta = (DisplayName = "Is NVIDIA NIS Supported"))
static NISBLUEPRINT_API bool IsNISSupported();
/** Checks whether a NIS mode is supported */
UFUNCTION(BlueprintPure, Category = "NIS", meta = (DisplayName = "Is NIS Mode Supported"))
static NISBLUEPRINT_API bool IsNISModeSupported(UNISMode NISMode);
/** Retrieves all supported NIS modes. Can be used to populate UI */
UFUNCTION(BlueprintPure, Category = "NIS", meta = (DisplayName = "Get Supported NIS Modes"))
static NISBLUEPRINT_API TArray<UNISMode> GetSupportedNISModes();
/** Returns the recommended screen percentage for a given NIS mode. Returns CustomScreenPercentage if NISMode is UNISMode::Custom */
UFUNCTION(BlueprintPure, Category = "NIS", meta = (DisplayName = "Get NIS Recommended Screen Percentage"))
static NISBLUEPRINT_API float GetNISRecommendedScreenPercentage(UNISMode NISMode);
/** The global screen percentage range that NIS supports. */
UFUNCTION(BlueprintPure, Category = "NIS", meta = (DisplayName = "Get NIS Screen Percentage Range"))
static NISBLUEPRINT_API void GetNISScreenPercentageRange(float& MinScreenPercentage, float& MaxScreenPercentage);
/** Sets the console variables to enable/disable NIS (r.NIS.Enable, r.NIS.Upscaling, r.ScreenPercentage, r.TemporalAA.Upsampling, r.TemporalAA.Upscaler)*/
UFUNCTION(BlueprintCallable, Category = "NIS", meta = (DisplayName = "Set NIS Mode"))
static NISBLUEPRINT_API void SetNISMode(UNISMode NISMode);
/** Set the screen percentage used for Custom mode (100% by default) */
UFUNCTION(BlueprintCallable, Category = "NIS", meta = (DisplayName = "Set NIS Custom Screen Percentage"))
static NISBLUEPRINT_API void SetNISCustomScreenPercentage(float CustomScreenPercentage = 100.0f);
/* Sets the console variables to enable additional NIS sharpening. Set to 0 to disable (r.NGX.NIS.Sharpness) */
UFUNCTION(BlueprintCallable, Category = "NIS", meta = (DisplayName = "Set NIS Sharpness"))
static NISBLUEPRINT_API void SetNISSharpness(float Sharpness);
///* Find a reasonable default NIS mode based on current hardware */
UFUNCTION(BlueprintPure, Category = "NIS", meta = (DisplayName = "Get Default NIS Mode"))
static NISBLUEPRINT_API UNISMode GetDefaultNISMode();
private:
static UNISSupport NISSupport;
static FNISUpscaler* NISUpscaler;
static float SavedCustomScreenPercentage;
static bool bIsCustomMode;
};
class FNISBlueprintModule final : public IModuleInterface
{
public:
/** IModuleInterface implementation */
virtual void StartupModule() override;
virtual void ShutdownModule() override;
private:
};

View File

@ -0,0 +1,53 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
using UnrealBuildTool;
using System.IO;
public class NISCore : ModuleRules
{
public NISCore(ReadOnlyTargetRules Target) : base(Target)
{
PCHUsage = ModuleRules.PCHUsageMode.UseExplicitOrSharedPCHs;
PublicIncludePaths.AddRange(
new string[] {
}
);
PrivateIncludePaths.AddRange(
new string[] {
Path.Combine(EngineDirectory,"Source/Runtime/Renderer/Private"),
}
);
PublicDependencyModuleNames.AddRange(
new string[]
{
"Core",
"RenderCore",
"Renderer",
"NISShaders",
}
);
PrivateDependencyModuleNames.AddRange(
new string[]
{
"Engine",
"RHI",
"Projects"
}
);
}
}

View File

@ -0,0 +1,67 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#include "NISCore.h"
#include "CoreMinimal.h"
#include "NISShaders.h"
#include "NISUpscaler.h"
#include "Modules/ModuleManager.h"
#include "Interfaces/IPluginManager.h"
#include "GeneralProjectSettings.h"
#include "SceneViewExtension.h"
#include "SceneView.h"
#include "Misc/MessageDialog.h"
#define LOCTEXT_NAMESPACE "FNISModule"
DEFINE_LOG_CATEGORY(LogNIS);
void FNISCoreModule::StartupModule()
{
// This code will execute after your module is loaded into memory; the exact timing is specified in the .uplugin file per-module
UE_LOG(LogNIS, Log, TEXT("%s Enter"), ANSI_TO_TCHAR(__FUNCTION__));
FNVImageUpscaler::RegisterOnScreenMessageHandler();
{
NISViewExtension = FSceneViewExtensions::NewExtension<FNISViewExtension>();
}
UE_LOG(LogNIS, Log, TEXT("%s Leave"), ANSI_TO_TCHAR(__FUNCTION__));
}
void FNISCoreModule::ShutdownModule()
{
UE_LOG(LogNIS, Log, TEXT("%s Enter"), ANSI_TO_TCHAR(__FUNCTION__));
{
NISViewExtension = nullptr;
}
FNVImageUpscaler::RemoveOnScreenMessageHandler();
UE_LOG(LogNIS, Log, TEXT("%s Leave"), ANSI_TO_TCHAR(__FUNCTION__));
}
#undef LOCTEXT_NAMESPACE
IMPLEMENT_MODULE(FNISCoreModule, NISCore)

View File

@ -0,0 +1,278 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#include "NISUpscaler.h"
#include "DynamicResolutionState.h"
#include "LegacyScreenPercentageDriver.h"
#include "Runtime/Launch/Resources/Version.h"
#include "NISShaders.h"
#define LOCTEXT_NAMESPACE "FNISModule"
static TAutoConsoleVariable<int32> CVarNISEnable(
TEXT("r.NIS.Enable"),
1,
TEXT("Enable/disable NIS upscaling and/or sharpening"),
ECVF_RenderThreadSafe);
static TAutoConsoleVariable<int32> CVarNISUpscaling(
TEXT("r.NIS.Upscaling"),
1,
TEXT("Enable NIS Upscaling. Also requires r.TemporalAA.Upscaler 0"),
ECVF_RenderThreadSafe);
FNVImageUpscaler::FNISErrorState FNVImageUpscaler::ErrorState;
FNISViewExtension::FNISViewExtension(const FAutoRegister& AutoRegister) : FSceneViewExtensionBase(AutoRegister)
{
FSceneViewExtensionIsActiveFunctor IsActiveFunctor;
IsActiveFunctor.IsActiveFunction = [](const ISceneViewExtension* SceneViewExtension, const FSceneViewExtensionContext& Context)
{
return true;
};
IsActiveThisFrameFunctions.Add(IsActiveFunctor);
}
void FNISViewExtension::SetupViewFamily(FSceneViewFamily& InViewFamily)
{
}
void FNISViewExtension::SetupView(FSceneViewFamily& InViewFamily, FSceneView& InView)
{
}
void FNISViewExtension::SetupViewPoint(APlayerController* Player, FMinimalViewInfo& InViewInfo)
{
}
void FNISViewExtension::BeginRenderViewFamily(FSceneViewFamily& InViewFamily)
{
const bool bIsNISSupported = InViewFamily.GetFeatureLevel() >= GetNISMinRequiredFeatureLevel();
const bool bIsNISEnabled = CVarNISEnable.GetValueOnAnyThread() != 0;
const bool bIsNISUpscalingEnabled = CVarNISUpscaling.GetValueOnAnyThread() != 0;
static const auto CVarNISSharpness = IConsoleManager::Get().FindConsoleVariable(TEXT("r.NIS.Sharpness"));
const bool bIsNISSharpeningEnabled = (CVarNISSharpness ? CVarNISSharpness->GetFloat() : 0.0f) != 0.0f;
if (bIsNISSupported && bIsNISEnabled && (bIsNISUpscalingEnabled || bIsNISSharpeningEnabled))
{
/*
FViewFamily::SetPrimarySpatialUpscalerInterface asserts if a plugin tries
to set spatial upscaler interfaces when another plugin already set it before.
Ideally the developer has only one spatial upscaler plugin active at run time,
based on UI setting, gameplay logic and such. However NIS and other spatial
upscaler plugins tend to have their respective upscalers set to be active by default,
which makes sense overall.
We don't know whether we are gonna be the "first" or "second" (or maybe 3rd in the future ;) )
in the call order to get a chance to set the spatial upscaler interfaces.
If we are not the first, then we can avoid the assert by not activating NIS, and instead
showing an on screen error message (for non shipping builds), alerting the developer on how
to avoid the assert/crash in the future, without taking down the app. E.g. they could change
cvars to turn off other spatial upscaler plugins.
Empirical testing though revealed that NIS might get called first, thus potentially causing at least
one known 3rd party spatial upscaler plugin to trigger the engine side assert in
FViewFamily::SetPrimarySpatialUpscalerInterface, which is not great.
Thus we explicitly check whether any known spatial upscaler plugins are active for the current
viewfamily/frame and turn off NIS as to not enable other plugins to take down the engine :)
After all "unreal engine spatial upscaler plugins crash each other" makes for catchy headlines,
but is actually not great in practice. =)
*/
struct FConsoleVariableReference
{
const TCHAR* Name = nullptr;
IConsoleVariable* CVar = nullptr;
bool bInitialized = false;
};
static FConsoleVariableReference KnownUpscalerCVars[]
{
{TEXT("r.FidelityFX.FSR.Enabled")}
};
FNVImageUpscaler::ErrorState.IncompatibleUpscalerCVarNames = TEXT("");
bool bAnyKnownUpscalerActive = false;
for (auto& UpscalerCVar : KnownUpscalerCVars)
{
if (!UpscalerCVar.bInitialized)
{
UpscalerCVar.CVar = IConsoleManager::Get().FindConsoleVariable(UpscalerCVar.Name);
UpscalerCVar.bInitialized = true;
}
if (UpscalerCVar.CVar && UpscalerCVar.CVar->GetInt() != 0)
{
FNVImageUpscaler::ErrorState.IncompatibleUpscalerCVarNames.Append(UpscalerCVar.Name);
bAnyKnownUpscalerActive = true;
}
}
const bool bAnyOtherSpatialUpscalerActive = bAnyKnownUpscalerActive || InViewFamily.GetPrimarySpatialUpscalerInterface() != nullptr || InViewFamily.GetSecondarySpatialUpscalerInterface() != nullptr;
FNVImageUpscaler::ErrorState.bOtherSpatialUpscalerActive = bAnyOtherSpatialUpscalerActive;
bool bIsSpatialPrimaryUpscaling = false;
bool bIsTemporalPrimaryUpscaling = false;
for (const auto& View : InViewFamily.Views)
{
if (View)
{
if (View->PrimaryScreenPercentageMethod == EPrimaryScreenPercentageMethod::SpatialUpscale)
{
bIsSpatialPrimaryUpscaling = true;
}
if (View->PrimaryScreenPercentageMethod == EPrimaryScreenPercentageMethod::TemporalUpscale)
{
bIsTemporalPrimaryUpscaling = true;
}
}
}
#if ENGINE_MAJOR_VERSION == 5 && ENGINE_MINOR_VERSION >= 1
DynamicRenderScaling::TMap<float> UpperBounds = InViewFamily.GetScreenPercentageInterface()->GetResolutionFractionsUpperBound();
float PrimaryResolutionFraction = UpperBounds[GDynamicPrimaryResolutionFraction];
#else
float PrimaryResolutionFraction = InViewFamily.GetPrimaryResolutionFractionUpperBound();
#endif
const float MAX_UPSCALE_FRACTION = 1.0f;
const float MIN_UPSCALE_FRACTION = 0.5f;
const bool bIsActuallyPrimaryUpscaling = PrimaryResolutionFraction < MAX_UPSCALE_FRACTION && PrimaryResolutionFraction >= MIN_UPSCALE_FRACTION;
const bool bIsActuallySecondaryUpscaling = InViewFamily.SecondaryViewFraction < MAX_UPSCALE_FRACTION && InViewFamily.SecondaryViewFraction >= MIN_UPSCALE_FRACTION;
FNVImageUpscaler::ErrorState.bPrimaryAndSecondarySpatialUpscaling = bIsSpatialPrimaryUpscaling && bIsActuallyPrimaryUpscaling && bIsActuallySecondaryUpscaling;
if (!bAnyOtherSpatialUpscalerActive)
{
if (bIsNISUpscalingEnabled && bIsSpatialPrimaryUpscaling && bIsActuallyPrimaryUpscaling)
{
InViewFamily.SetPrimarySpatialUpscalerInterface(new FNVImageUpscaler());
}
// when running with DLSS (or TAAU/TSR) we do either secondary upscaling or just NIS sharpening
else if(bIsNISSharpeningEnabled || (bIsTemporalPrimaryUpscaling && bIsActuallyPrimaryUpscaling && bIsActuallySecondaryUpscaling))
{
InViewFamily.SetSecondarySpatialUpscalerInterface(new FNVImageUpscaler());
}
}
}
}
FNVImageUpscaler::FNVImageUpscaler()
{
}
FNVImageUpscaler::~FNVImageUpscaler()
{
}
const TCHAR* FNVImageUpscaler::GetDebugName() const
{
return TEXT("NVIDIA Image Upscaler");
}
ISpatialUpscaler* FNVImageUpscaler::Fork_GameThread(const FSceneViewFamily& ViewFamily) const
{
check(IsInGameThread());
return new FNVImageUpscaler();
}
FScreenPassTexture FNVImageUpscaler::AddPasses(FRDGBuilder& GraphBuilder, const FViewInfo& View, const ISpatialUpscaler::FInputs& PassInputs) const
{
return AddSharpenOrUpscalePass(GraphBuilder, View, PassInputs);
}
static bool ShowNISDebugOnScreenMessages()
{
return true;
// TODO do we need project settings for this?
//if (GetDefault<UDLSSOverrideSettings>()->ShowDLSSSDebugOnScreenMessages == EDLSSSettingOverride::UseProjectSettings)
//{
// return GetDefault<UDLSSSettings>()->bShowDLSSSDebugOnScreenMessages;
//}
//else
//{
// return GetDefault<UDLSSOverrideSettings>()->ShowDLSSSDebugOnScreenMessages == EDLSSSettingOverride::Enabled;
//}
}
#if !UE_BUILD_SHIPPING
FDelegateHandle FNVImageUpscaler::OnScreenMessagesDelegateHandle;
void FNVImageUpscaler::GetOnScreenMessages(TMultiMap<FCoreDelegates::EOnScreenMessageSeverity, FText>& OutMessages)
{
check(IsInGameThread());
if (ShowNISDebugOnScreenMessages())
{
if (ErrorState.bOtherSpatialUpscalerActive)
{
const FTextFormat Format(LOCTEXT("NISOtherUpscalerActive",
"NIS Error: Disabling NVIDIA NIS as the spatial upscaler since another spatial upscaler plugin is already active for this view family.\n"
" To enable NIS, please disable other primary spatial upscalers in the UI/application logic or via console variables {0} {1}. And vice versa"));
const FText Message = FText::Format(Format,
FText::FromString(!ErrorState.IncompatibleUpscalerCVarNames.IsEmpty() ? TEXT("such as") : TEXT("")),
FText::FromString(ErrorState.IncompatibleUpscalerCVarNames)
);
OutMessages.Add(FCoreDelegates::EOnScreenMessageSeverity::Error, Message);
}
if (ErrorState.bPrimaryAndSecondarySpatialUpscaling)
{
const FTextFormat Format(LOCTEXT("NISOtherUpscalerActive",
"NIS Warning: NIS is used as a primary spatial upscaler, followed by the engine built-in secondary spatial upscaler, which is not optimal.\n"
" Consider disabling the secondary screen percentage (via r.SecondaryScreenPercentage.GameViewport or Editor.OverrideDPIBasedEditorViewportScaling) in order to have NIS upscale directly to the output resolution."));
const FText Message = FText::Format(Format,
FText::FromString(!ErrorState.IncompatibleUpscalerCVarNames.IsEmpty() ? TEXT("such as") : TEXT("")),
FText::FromString(ErrorState.IncompatibleUpscalerCVarNames)
);
OutMessages.Add(FCoreDelegates::EOnScreenMessageSeverity::Warning, Message);
}
}
}
#endif
void FNVImageUpscaler::RegisterOnScreenMessageHandler()
{
#if !UE_BUILD_SHIPPING
OnScreenMessagesDelegateHandle = FCoreDelegates::OnGetOnScreenMessages.AddStatic(&GetOnScreenMessages);
#endif
}
void FNVImageUpscaler::RemoveOnScreenMessageHandler()
{
#if !UE_BUILD_SHIPPING
if (OnScreenMessagesDelegateHandle.IsValid())
{
FCoreDelegates::OnGetOnScreenMessages.Remove(OnScreenMessagesDelegateHandle);
OnScreenMessagesDelegateHandle.Reset();
}
#endif
}
#undef LOCTEXT_NAMESPACE

View File

@ -0,0 +1,74 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#pragma once
#include "CoreMinimal.h"
#include "Misc/CoreDelegates.h"
#include "RendererInterface.h"
#include "PostProcess/PostProcessUpscale.h"
#include "SceneViewExtension.h"
#include "NISShaders.h"
class FSceneTextureParameters;
class FRHITexture;
class FNISViewExtension final : public FSceneViewExtensionBase
{
public:
FNISViewExtension(const FAutoRegister& AutoRegister);
virtual void SetupViewFamily(FSceneViewFamily& InViewFamily) override;
virtual void SetupView(FSceneViewFamily& InViewFamily, FSceneView& InView) override;
virtual void SetupViewPoint(APlayerController* Player, FMinimalViewInfo& InViewInfo) override;
virtual void BeginRenderViewFamily(FSceneViewFamily& InViewFamily) override;
virtual void PreRenderView_RenderThread(FRHICommandListImmediate& RHICmdList, FSceneView& InView) final override {}
virtual void PreRenderViewFamily_RenderThread(FRHICommandListImmediate& RHICmdList, FSceneViewFamily& InViewFamily) final override {}
};
class NISCORE_API FNVImageUpscaler final : public ISpatialUpscaler
{
friend class FNISModule;
public:
FNVImageUpscaler();
virtual ~FNVImageUpscaler();
virtual const TCHAR* GetDebugName() const override;
/** Create a new ISpatialUpscaler interface for a new view family. */
virtual ISpatialUpscaler* Fork_GameThread(const class FSceneViewFamily& ViewFamily) const override;
// Inherited via ISpatialUpscaler
virtual FScreenPassTexture AddPasses(
FRDGBuilder& GraphBuilder,
const FViewInfo& View,
const FInputs& PassInputs) const override;
struct FNISErrorState
{
bool bOtherSpatialUpscalerActive = false;
FString IncompatibleUpscalerCVarNames;
bool bPrimaryAndSecondarySpatialUpscaling = false;
};
#if !UE_BUILD_SHIPPING
static void GetOnScreenMessages(TMultiMap<FCoreDelegates::EOnScreenMessageSeverity, FText>& OutMessages);
static FDelegateHandle OnScreenMessagesDelegateHandle;
#endif
static void RegisterOnScreenMessageHandler();
static void RemoveOnScreenMessageHandler();
static FNISErrorState ErrorState;
};

View File

@ -0,0 +1,30 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#pragma once
#include "Modules/ModuleManager.h"
class FNISViewExtension;
class FNVImageUpscaler;
class FNISCoreModule final: public IModuleInterface
{
public:
/** IModuleInterface implementation */
virtual void StartupModule();
virtual void ShutdownModule();
private:
TSharedPtr< FNISViewExtension , ESPMode::ThreadSafe> NISViewExtension;
TUniquePtr<FNVImageUpscaler> NISUpscaler;
};

View File

@ -0,0 +1,53 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
using UnrealBuildTool;
using System.IO;
public class NISShaders: ModuleRules
{
public NISShaders(ReadOnlyTargetRules Target) : base(Target)
{
PCHUsage = ModuleRules.PCHUsageMode.UseExplicitOrSharedPCHs;
PublicIncludePaths.AddRange(
new string[] {
}
);
PrivateIncludePaths.AddRange(
new string[] {
Path.Combine(EngineDirectory,"Source/Runtime/Renderer/Private"),
}
);
PublicDependencyModuleNames.AddRange(
new string[]
{
"Core",
"RenderCore",
"Renderer",
}
);
PrivateDependencyModuleNames.AddRange(
new string[]
{
"Engine",
"RHI",
"Projects",
"Renderer",
}
);
}
}

View File

@ -0,0 +1,687 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#include "NISShaders.h"
#include "CoreMinimal.h"
#include "Modules/ModuleManager.h"
#include "Interfaces/IPluginManager.h"
#include "RenderTargetPool.h"
#include "GeneralProjectSettings.h"
#include "SceneViewExtension.h"
#include "SceneView.h"
#include "ShaderCompilerCore.h"
#include "PostProcess/PostProcessTonemap.h"
#include "Runtime/Launch/Resources/Version.h"
#if ENGINE_MAJOR_VERSION == 5 && ENGINE_MINOR_VERSION >= 2
#include "DataDrivenShaderPlatformInfo.h"
#endif
#if ENGINE_MAJOR_VERSION == 5 && ENGINE_MINOR_VERSION >= 3
#include "SceneRendering.h"
#endif
// we don't pass NISConfigs as constant buffers into the shaders so we don't need the alignment
// however we also have static_asserts that make sure that FNISConfigParameters matches NISConfig
#define NIS_ALIGNED(x)
#include "NIS_Config.h"
#define LOCTEXT_NAMESPACE "FNISImageScalingShadersModule"
void FNISShadersModule::StartupModule()
{
// This code will execute after your module is loaded into memory; the exact timing is specified in the .uplugin file per-module
// Get the base directory of this plugin
FString PluginShaderDir = FPaths::Combine(IPluginManager::Get().FindPlugin(TEXT("NIS"))->GetBaseDir(), TEXT("Shaders"));
AddShaderSourceDirectoryMapping(TEXT("/Plugin/NIS"), PluginShaderDir);
FString ThirdPartyShaderDir = FPaths::Combine(IPluginManager::Get().FindPlugin(TEXT("NIS"))->GetBaseDir(), TEXT("Shaders"), TEXT("ThirdParty"));
AddShaderSourceDirectoryMapping(TEXT("/ThirdParty/Plugin/NIS"), ThirdPartyShaderDir);
}
void FNISShadersModule::ShutdownModule()
{
}
static TAutoConsoleVariable<float> CVarNISSharpness(
TEXT("r.NIS.Sharpness"),
0.0f,
TEXT("0.0 to 1.0: Sharpening to apply to either primary NIS pass or the secondary NIS pass. If 0.0 the secondary NIS sharpening pass will not be executed (default: 0.0f)"),
ECVF_RenderThreadSafe);
static TAutoConsoleVariable <int> CVarNISHalfPrecision(
TEXT("r.NIS.HalfPrecision"),
-1,
TEXT("Enable/disable half precision in the NIS shaders and selects which permutation is used (default:-1)\n")
TEXT("-1: automatic. Pick the appropriate FP16 permutation based on shader model and RHI\n")
TEXT(" 0: Float32, disable half precision\n")
TEXT(" 1: Min16Float, half precision, intended for UE4 DX11 SM5\n")
TEXT(" 2: Min16FloatDXC, half precision, intended for UE4 DX12 SM5\n")
TEXT(" 3: Float16DXC, half precision, intended for UE5 DX12 SM6\n"),
ECVF_RenderThreadSafe);
static TAutoConsoleVariable<int> CVarNISHDRMode(
TEXT("r.NIS.HDRMode"),
-1,
TEXT("-1: Automatic. Determines the NIS HDR mode based on ETonemapperOutputDevice\n")
TEXT("0: None\n")
TEXT("1: Linear\n")
TEXT("2: PQ\n"),
ECVF_RenderThreadSafe);
// this should match NISConfig
BEGIN_SHADER_PARAMETER_STRUCT(FNISConfigParameters, )
SHADER_PARAMETER(float, kDetectRatio)
SHADER_PARAMETER(float, kDetectThres)
SHADER_PARAMETER(float, kMinContrastRatio)
SHADER_PARAMETER(float, kRatioNorm)
SHADER_PARAMETER(float, kContrastBoost)
SHADER_PARAMETER(float, kEps)
SHADER_PARAMETER(float, kSharpStartY)
SHADER_PARAMETER(float, kSharpScaleY)
SHADER_PARAMETER(float, kSharpStrengthMin)
SHADER_PARAMETER(float, kSharpStrengthScale)
SHADER_PARAMETER(float, kSharpLimitMin)
SHADER_PARAMETER(float, kSharpLimitScale)
SHADER_PARAMETER(float, kScaleX)
SHADER_PARAMETER(float, kScaleY)
SHADER_PARAMETER(float, kDstNormX)
SHADER_PARAMETER(float, kDstNormY)
SHADER_PARAMETER(float, kSrcNormX)
SHADER_PARAMETER(float, kSrcNormY)
SHADER_PARAMETER(uint32, kInputViewportOriginX)
SHADER_PARAMETER(uint32, kInputViewportOriginY)
SHADER_PARAMETER(uint32, kInputViewportWidth)
SHADER_PARAMETER(uint32, kInputViewportHeight)
SHADER_PARAMETER(uint32, kOutputViewportOriginX)
SHADER_PARAMETER(uint32, kOutputViewportOriginY)
SHADER_PARAMETER(uint32, kOutputViewportWidth)
SHADER_PARAMETER(uint32, kOutputViewportHeight)
SHADER_PARAMETER(float, reserved0)
SHADER_PARAMETER(float, reserved1)
END_SHADER_PARAMETER_STRUCT()
// not a complete guard against mismatches, but better than nothing
static_assert(sizeof(NISConfig) == sizeof(FNISConfigParameters), "mistmatch between engine & NIS SDK side struct");
static_assert(offsetof(NISConfig, kOutputViewportHeight) == offsetof(FNISConfigParameters, kOutputViewportHeight), "mistmatch between engine & NIS SDK side struct");
class FNISScalerDim : SHADER_PERMUTATION_BOOL("NIS_SCALER");
// SHADER_PERMUTATION_SPARSE_ENUM needs a ::MAX member, so we can't use the NIS enum directly, at least not without making a UE flavored copy of the type
class FNISHdrModeDim : SHADER_PERMUTATION_SPARSE_INT("NIS_HDR_MODE", int32(NISHDRMode::None), int32(NISHDRMode::Linear), int32(NISHDRMode::PQ));
// those need to be updated if GetOptimalBlockWidth etc return new values
class FNISBlockWidthDim : SHADER_PERMUTATION_SPARSE_INT("NIS_BLOCK_WIDTH", 32);
class FNISBlockHeightDim : SHADER_PERMUTATION_SPARSE_INT("NIS_BLOCK_HEIGHT", 32, 24);
class FNISThreadGroupSizeDim : SHADER_PERMUTATION_SPARSE_INT("NIS_THREAD_GROUP_SIZE", 128, 256);
class FNISViewportSupportDim : SHADER_PERMUTATION_BOOL("NIS_VIEWPORT_SUPPORT");
// the shaders treat NIS_USE_HALF_PRECISION 1 and 2 as on so we can use this to have another permutation that we compile with DXC
enum class ENISHalfPrecisionPermutation
{
Float32, // for everything else
Min16Float, // for UE4 DX11 SM5
Min16FloatDXC, // for UE4 DX12 SM5
Float16DXC, // for UE5 DX12 SM6
MAX
};
class FNISHalfPrecisionDim : SHADER_PERMUTATION_ENUM_CLASS("NIS_USE_HALF_PRECISION", ENISHalfPrecisionPermutation);
NISSHADERS_API ERHIFeatureLevel::Type GetNISMinRequiredFeatureLevel()
{
return ERHIFeatureLevel::SM5;
}
class FNISUpscaleCS : public FGlobalShader
{
public:
static NISGPUArchitecture GetNISGPUArchitecture(const bool bHalfPrecision)
{
// those functions expect non-zero GRHIVendorId, but it's unclear how NDA platforms, such as consoles handle this...
if (GRHIVendorId && IsRHIDeviceAMD())
{
return NISGPUArchitecture::AMD_Generic;
}
else if (GRHIVendorId && IsRHIDeviceIntel())
{
return NISGPUArchitecture::Intel_Generic;
}
else if (GRHIVendorId && IsRHIDeviceNVIDIA())
{
return bHalfPrecision ? NISGPUArchitecture::NVIDIA_Generic_fp16 : NISGPUArchitecture::NVIDIA_Generic;
}
else
{
return NISGPUArchitecture::NVIDIA_Generic;
}
}
static FIntPoint GetComputeTileSize(bool bIsUpscaling, const bool bHalfPrecision)
{
NISOptimizer Optimizer{ bIsUpscaling, GetNISGPUArchitecture(bHalfPrecision)} ;
return FIntPoint(Optimizer.GetOptimalBlockWidth(), Optimizer.GetOptimalBlockHeight());
}
static int32 GetThreadGroupSize(bool bIsUpscaling, const bool bHalfPrecision)
{
NISOptimizer Optimizer{ bIsUpscaling, GetNISGPUArchitecture(bHalfPrecision) };
return Optimizer.GetOptimalThreadGroupSize();
}
static bool DoesPlatformSupportDXC(const FStaticShaderPlatform Platform)
{
return
#if ENGINE_MAJOR_VERSION == 5
FDataDrivenShaderPlatformInfo::GetSupportsDxc(Platform) ||
#endif
(FDataDrivenShaderPlatformInfo::GetIsPC(Platform) && IsD3DPlatform(Platform));
}
static bool ShouldCompilePermutation(const FGlobalShaderPermutationParameters& Parameters)
{
FPermutationDomain PermutationVector(Parameters.PermutationId);
// UE4 doesn't support SM6 and float16t reliably....
if (PermutationVector.Get<FNISHalfPrecisionDim>() == ENISHalfPrecisionPermutation::Float16DXC)
{
#if ENGINE_MAJOR_VERSION != 5
return false;
#else
return DoesPlatformSupportDXC(Parameters.Platform) && IsFeatureLevelSupported(Parameters.Platform, ERHIFeatureLevel::SM6);
#endif
}
return IsFeatureLevelSupported(Parameters.Platform, ERHIFeatureLevel::SM5);
}
static void ModifyCompilationEnvironment(const FGlobalShaderPermutationParameters& Parameters, FShaderCompilerEnvironment& OutEnvironment)
{
FGlobalShader::ModifyCompilationEnvironment(Parameters, OutEnvironment);
OutEnvironment.CompilerFlags.Add(CFLAG_AllowTypedUAVLoads);
// for DX12 we need to DXC to get min16float in the NIS shaders to have an effect
// NIS also supports SM6.2 explicit FP16, but in UE4 that's only supported for RT shaders.
FPermutationDomain PermutationVector(Parameters.PermutationId);
if(DoesPlatformSupportDXC(Parameters.Platform))
{
if (PermutationVector.Get<FNISHalfPrecisionDim>() == ENISHalfPrecisionPermutation::Min16FloatDXC)
{
OutEnvironment.CompilerFlags.Add(CFLAG_ForceDXC);
}
// UE5 supports SM6.6 so we can use the explicit FP16 NIS permutation
#if ENGINE_MAJOR_VERSION == 5
if (PermutationVector.Get<FNISHalfPrecisionDim>() == ENISHalfPrecisionPermutation::Float16DXC)
{
OutEnvironment.CompilerFlags.Add(CFLAG_ForceDXC);
OutEnvironment.CompilerFlags.Add(CFLAG_AllowRealTypes);
OutEnvironment.SetDefine(TEXT("NIS_HLSL_6_2"), 1);
}
#endif
}
}
using FPermutationDomain = TShaderPermutationDomain<FNISScalerDim, FNISHdrModeDim, FNISHalfPrecisionDim,
FNISBlockWidthDim, FNISBlockHeightDim, FNISThreadGroupSizeDim, FNISViewportSupportDim>;
DECLARE_GLOBAL_SHADER(FNISUpscaleCS);
SHADER_USE_PARAMETER_STRUCT(FNISUpscaleCS, FGlobalShader);
BEGIN_SHADER_PARAMETER_STRUCT(FParameters, )
// Input images
SHADER_PARAMETER_SAMPLER(SamplerState, samplerLinearClamp)
SHADER_PARAMETER_RDG_TEXTURE(Texture2D, in_texture)
// Output images
SHADER_PARAMETER_RDG_TEXTURE_UAV(RWTexture2D, out_texture)
SHADER_PARAMETER_STRUCT_INCLUDE(FNISConfigParameters, Config)
SHADER_PARAMETER_TEXTURE(Texture2D, coef_scaler)
SHADER_PARAMETER_TEXTURE(Texture2D, coef_usm)
SHADER_PARAMETER_STRUCT_REF(FViewUniformShaderParameters, View)
END_SHADER_PARAMETER_STRUCT()
};
IMPLEMENT_GLOBAL_SHADER(FNISUpscaleCS, "/Plugin/NIS/Private/NISUpscaler.usf", "main", SF_Compute);
struct FNISCoefficients : public FRenderResource
{
FTexture2DRHIRef ScalerRHI = nullptr;
FTexture2DRHIRef UsmRHI = nullptr;
FTexture2DRHIRef ScalerHalfPrecisionRHI = nullptr;
FTexture2DRHIRef UsmHalfPrecisionRHI = nullptr;
class FNISCoefficientsResourceBulkData : public FResourceBulkDataInterface
{
public:
FNISCoefficientsResourceBulkData(const void* InData, uint32_t InDataSize)
: Data(InData)
, DataSize(InDataSize)
{ }
public:
virtual const void* GetResourceBulkData() const
{
return Data;
}
virtual uint32 GetResourceBulkDataSize() const
{
return DataSize;
}
virtual void Discard()
{ }
private:
const void* Data;
uint32_t DataSize;
};
/**
* Initializes the RHI resources used by this resource.
* Called when entering the state where both the resource and the RHI have been initialized.
* This is only called by the rendering thread.
*/
#if ENGINE_MAJOR_VERSION == 5 && ENGINE_MINOR_VERSION >= 3
virtual void InitRHI(FRHICommandListBase& RHICmdList)
#else
virtual void InitRHI()
#endif
{
// FP32
{
const uint32 CoefficientStride = kFilterSize * 4;
const uint32 CoefficientSize = CoefficientStride * kPhaseCount;
FNISCoefficientsResourceBulkData BulkData(coef_scale, CoefficientSize);
#if ENGINE_MAJOR_VERSION == 5 && ENGINE_MINOR_VERSION >= 1
FRHITextureCreateDesc ScalerDesc = FRHITextureCreateDesc::Create2D(TEXT("FNISCoefficients::Scaler"))
.SetExtent(kFilterSize / 4, kPhaseCount)
.SetFormat(PF_A32B32G32R32F)
.SetNumMips(1)
.SetNumSamples(1)
.SetFlags(TexCreate_None)
.SetBulkData(&BulkData);
ScalerRHI = RHICreateTexture(ScalerDesc);
#else
FRHIResourceCreateInfo CreateInfo(TEXT("FNISCoefficients::Scaler"));
CreateInfo.BulkData = &BulkData;
ScalerRHI = RHICreateTexture2D(kFilterSize / 4, kPhaseCount, PF_A32B32G32R32F, 1, 1, TexCreate_None, CreateInfo);
#endif
}
{
const uint32 CoefficientStride = kFilterSize * 4;
const uint32 CoefficientSize = CoefficientStride * kPhaseCount;
FNISCoefficientsResourceBulkData BulkData(coef_usm, CoefficientSize);
#if ENGINE_MAJOR_VERSION == 5 && ENGINE_MINOR_VERSION >= 1
FRHITextureCreateDesc UsmDesc = FRHITextureCreateDesc::Create2D(TEXT("FNISCoefficients::Usm"))
.SetExtent(kFilterSize / 4, kPhaseCount)
.SetFormat(PF_A32B32G32R32F)
.SetNumMips(1)
.SetNumSamples(1)
.SetFlags(TexCreate_None)
.SetBulkData(&BulkData);
UsmRHI = RHICreateTexture(UsmDesc);
#else
FRHIResourceCreateInfo CreateInfo(TEXT("FNISCoefficients::Usm"));
CreateInfo.BulkData = &BulkData;
UsmRHI = RHICreateTexture2D(kFilterSize / 4, kPhaseCount, PF_A32B32G32R32F, 1, 1, TexCreate_None, CreateInfo);
#endif
}
// FP16
{
const uint32 CoefficientStride = kFilterSize * 2;
const uint32 CoefficientSize = CoefficientStride * kPhaseCount;
FNISCoefficientsResourceBulkData BulkData(coef_scale_fp16, CoefficientSize);
#if ENGINE_MAJOR_VERSION == 5 && ENGINE_MINOR_VERSION >= 1
FRHITextureCreateDesc ScalerHalfDesc = FRHITextureCreateDesc::Create2D(TEXT("FNISCoefficients::ScalerHalfPrecision"))
.SetExtent(kFilterSize / 4, kPhaseCount)
.SetFormat(PF_FloatRGBA)
.SetNumMips(1)
.SetNumSamples(1)
.SetFlags(TexCreate_None)
.SetBulkData(&BulkData);
ScalerHalfPrecisionRHI = RHICreateTexture(ScalerHalfDesc);
#else
FRHIResourceCreateInfo CreateInfo(TEXT("FNISCoefficients::ScalerHalfPrecision"));
CreateInfo.BulkData = &BulkData;
ScalerHalfPrecisionRHI = RHICreateTexture2D(kFilterSize / 4, kPhaseCount, PF_FloatRGBA, 1, 1, TexCreate_None, CreateInfo);
#endif
}
{
const uint32 CoefficientStride = kFilterSize * 2;
const uint32 CoefficientSize = CoefficientStride * kPhaseCount;
FNISCoefficientsResourceBulkData BulkData(coef_usm_fp16, CoefficientSize);
#if ENGINE_MAJOR_VERSION == 5 && ENGINE_MINOR_VERSION >= 1
FRHITextureCreateDesc UsmHalfDesc = FRHITextureCreateDesc::Create2D(TEXT("FNISCoefficients::UsmHalfPrecision"))
.SetExtent(kFilterSize / 4, kPhaseCount)
.SetFormat(PF_FloatRGBA)
.SetNumMips(1)
.SetNumSamples(1)
.SetFlags(TexCreate_None)
.SetBulkData(&BulkData);
UsmHalfPrecisionRHI = RHICreateTexture(UsmHalfDesc);
#else
FRHIResourceCreateInfo CreateInfo(TEXT("FNISCoefficients::UsmHalfPrecision"));
CreateInfo.BulkData = &BulkData;
UsmHalfPrecisionRHI = RHICreateTexture2D(kFilterSize / 4, kPhaseCount, PF_FloatRGBA, 1, 1, TexCreate_None, CreateInfo);
#endif
}
}
/**
* Releases the RHI resources used by this resource.
* Called when leaving the state where both the resource and the RHI have been initialized.
* This is only called by the rendering thread.
*/
virtual void ReleaseRHI()
{
ScalerRHI.SafeRelease();
UsmRHI.SafeRelease();
ScalerHalfPrecisionRHI.SafeRelease();
UsmHalfPrecisionRHI.SafeRelease();
}
};
static TGlobalResource<FNISCoefficients> GNISCoefficients;
static NISHDRMode GetNISHDRModeFromEngineToneMapperOrCVar(const FSceneViewFamily& InViewFamily)
{
const int NISHDRModeCVarValue = CVarNISHDRMode.GetValueOnRenderThread();
if (NISHDRModeCVarValue == -1)
{
const FTonemapperOutputDeviceParameters ToneMapper = GetTonemapperOutputDeviceParameters(InViewFamily);
#if ENGINE_MAJOR_VERSION == 5 && ENGINE_MINOR_VERSION >= 1
switch (EDisplayOutputFormat(ToneMapper.OutputDevice))
{
case EDisplayOutputFormat::SDR_sRGB:
case EDisplayOutputFormat::SDR_Rec709:
case EDisplayOutputFormat::SDR_ExplicitGammaMapping:
return NISHDRMode::None;
case EDisplayOutputFormat::HDR_ACES_1000nit_ST2084:
case EDisplayOutputFormat::HDR_ACES_2000nit_ST2084:
return NISHDRMode::PQ;
case EDisplayOutputFormat::HDR_ACES_1000nit_ScRGB:
case EDisplayOutputFormat::HDR_ACES_2000nit_ScRGB:
return NISHDRMode::Linear;
case EDisplayOutputFormat::HDR_LinearEXR:
case EDisplayOutputFormat::HDR_LinearNoToneCurve:
case EDisplayOutputFormat::HDR_LinearWithToneCurve:
return NISHDRMode::Linear;
case EDisplayOutputFormat::MAX:
default:
checkf(false, TEXT("invalid EDisplayOutputFormat passed into GetNISHDRModeFromEngineToneMapper "));
return NISHDRMode::None;
}
#else
switch (ETonemapperOutputDevice(ToneMapper.OutputDevice))
{
case ETonemapperOutputDevice::sRGB:
case ETonemapperOutputDevice::Rec709:
case ETonemapperOutputDevice::ExplicitGammaMapping:
return NISHDRMode::None;
case ETonemapperOutputDevice::ACES1000nitST2084:
case ETonemapperOutputDevice::ACES2000nitST2084:
return NISHDRMode::PQ;
case ETonemapperOutputDevice::ACES1000nitScRGB:
case ETonemapperOutputDevice::ACES2000nitScRGB:
return NISHDRMode::Linear;
case ETonemapperOutputDevice::LinearEXR:
case ETonemapperOutputDevice::LinearNoToneCurve:
case ETonemapperOutputDevice::LinearWithToneCurve:
return NISHDRMode::Linear;
case ETonemapperOutputDevice::MAX:
default:
checkf(false, TEXT("invalid ETonemapperOutputDevice passed into GetNISHDRModeFromEngineToneMapper "));
return NISHDRMode::None;
}
#endif
}
else
{
return NISHDRMode(FMath::Clamp<int32>(NISHDRModeCVarValue, int32(NISHDRMode::None), int32(NISHDRMode::PQ)));
}
}
FScreenPassTexture AddSharpenOrUpscalePass(
FRDGBuilder& GraphBuilder,
const FViewInfo& View,
const ISpatialUpscaler::FInputs& Inputs
)
{
check(Inputs.SceneColor.IsValid());
check(Inputs.Stage != EUpscaleStage::MAX);
FScreenPassRenderTarget Output = Inputs.OverrideOutput;
if (!Output.IsValid())
{
FRDGTextureDesc OutputDesc = Inputs.SceneColor.Texture->Desc;
OutputDesc.Reset();
if (Inputs.Stage == EUpscaleStage::PrimaryToSecondary)
{
const FIntPoint SecondaryViewRectSize = View.GetSecondaryViewRectSize();
QuantizeSceneBufferSize(SecondaryViewRectSize, OutputDesc.Extent);
Output.ViewRect.Min = FIntPoint::ZeroValue;
Output.ViewRect.Max = SecondaryViewRectSize;
}
else
{
OutputDesc.Extent = View.UnscaledViewRect.Max;
Output.ViewRect = View.UnscaledViewRect;
}
// We can't call OutputDesc.Flags |= GFastVRamConfig.Upscale this due to not being exported, so paraphrasing from SceneRendering.cpp:
static const auto CVarFastVRamUpscale = IConsoleManager::Get().FindTConsoleVariableDataInt(TEXT("r.FastVRam.Upscale"));
const int32 FastVRamUpscaleValue = CVarFastVRamUpscale ? CVarFastVRamUpscale->GetValueOnRenderThread() : 0;
if (FastVRamUpscaleValue == 1)
{
EnumAddFlags(OutputDesc.Flags, TexCreate_FastVRAM);
}
else if (FastVRamUpscaleValue == 2)
{
EnumAddFlags(OutputDesc.Flags, TexCreate_FastVRAM | TexCreate_FastVRAMPartialAlloc);
}
Output.Texture = GraphBuilder.CreateTexture(OutputDesc, TEXT("NISSharpen"));
Output.LoadAction = ERenderTargetLoadAction::EClear;
}
FRDGTextureRef OutputOrIntermediateTexture = Output.Texture;
const FIntRect SrcRect = Inputs.SceneColor.ViewRect;
FIntRect IntermediateDestRect = Output.ViewRect;
const FIntRect OutputDestRect = Output.ViewRect;
const bool bNeedIntermediateOutput = !EnumHasAnyFlags(Output.Texture->Desc.Flags, TexCreate_UAV);
const bool bIsUpscaling = SrcRect.Size() != IntermediateDestRect.Size();
// move the intermediate upscaled rect to the top left corner and allocate a smaller intermediate rendertarget
if (bNeedIntermediateOutput)
{
FRDGTextureDesc IntermediateOutputDesc = Output.Texture->Desc;
IntermediateOutputDesc.Reset();
EnumAddFlags(IntermediateOutputDesc.Flags, TexCreate_UAV);
EnumRemoveFlags(IntermediateOutputDesc.Flags, TexCreate_RenderTargetable | TexCreate_Presentable | TexCreate_ShaderResource);
const FIntPoint InterMediateViewRectSize = IntermediateDestRect.Size();
QuantizeSceneBufferSize(InterMediateViewRectSize, IntermediateOutputDesc.Extent);
IntermediateDestRect.Min = FIntPoint::ZeroValue;
IntermediateDestRect.Max = FIntPoint(InterMediateViewRectSize.X, InterMediateViewRectSize.Y);
OutputOrIntermediateTexture = GraphBuilder.CreateTexture(IntermediateOutputDesc, bIsUpscaling ? TEXT("NISUpscaleIntermediateUAV") : TEXT("NISSharpenIntermediateUAV"));
}
const bool bNeedsViewportSupport = SrcRect != FIntRect(FIntPoint::ZeroValue, Inputs.SceneColor.Texture->Desc.Extent) ||
IntermediateDestRect != FIntRect(FIntPoint::ZeroValue, OutputOrIntermediateTexture->Desc.Extent);
FNISUpscaleCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FNISUpscaleCS::FParameters>();
const float Sharpness = FMath::Clamp(CVarNISSharpness.GetValueOnRenderThread(), 0.0f, 1.0f);
const NISHDRMode HdrMode = GetNISHDRModeFromEngineToneMapperOrCVar(*View.Family);
const int32 bHalfPrecisionMode = CVarNISHalfPrecision.GetValueOnRenderThread();
ENISHalfPrecisionPermutation HalfPrecisionPermutation = ENISHalfPrecisionPermutation::Float32;
if (bHalfPrecisionMode == -1)
{
#if PLATFORM_WINDOWS
static const bool bIsDx12 = FCString::Strcmp(GDynamicRHI->GetName(), TEXT("D3D12")) == 0;
#else
static const bool bIsDx12 = false;
#endif
if (bIsDx12)
{
#if ENGINE_MAJOR_VERSION == 5
if (View.GetFeatureLevel() == ERHIFeatureLevel::SM6)
{
HalfPrecisionPermutation = ENISHalfPrecisionPermutation::Float16DXC;
}
else
#endif
{
HalfPrecisionPermutation = ENISHalfPrecisionPermutation::Min16FloatDXC;
}
}
else
{
HalfPrecisionPermutation = ENISHalfPrecisionPermutation::Min16Float;
}
}
else if (bHalfPrecisionMode == 0)
{
HalfPrecisionPermutation = ENISHalfPrecisionPermutation::Float32;
}
else if (bHalfPrecisionMode == 1)
{
HalfPrecisionPermutation = ENISHalfPrecisionPermutation::Min16Float;
}
else if (bHalfPrecisionMode == 2)
{
HalfPrecisionPermutation = ENISHalfPrecisionPermutation::Min16FloatDXC;
}
#if ENGINE_MAJOR_VERSION == 5
// we can only compile this one for SM6
else if (bHalfPrecisionMode == 3 && View.GetFeatureLevel() == ERHIFeatureLevel::SM6)
{
HalfPrecisionPermutation = ENISHalfPrecisionPermutation::Float16DXC;
}
#endif
const bool bIsAnyHalfPrecisionPermutation = HalfPrecisionPermutation != ENISHalfPrecisionPermutation::Float32;
NISConfig Config;
FMemory::Memzero(Config);
ensureMsgf(NVScalerUpdateConfig(
Config,
Sharpness,
SrcRect.Min.X, SrcRect.Min.Y,
SrcRect.Width(), SrcRect.Height(),
Inputs.SceneColor.Texture->Desc.Extent.X, Inputs.SceneColor.Texture->Desc.Extent.Y,
IntermediateDestRect.Min.X, IntermediateDestRect.Min.Y,
IntermediateDestRect.Width(), IntermediateDestRect.Height(),
OutputOrIntermediateTexture->Desc.Extent.X, OutputOrIntermediateTexture->Desc.Extent.Y,
HdrMode), TEXT("NVScalerUpdateConfig was called with invalid arguments. Please step into NVScalerUpdateConfig and put breakpoints on the return false statements to debug."));
// TODO make this less sketchy 🤐
static_assert(sizeof(NISConfig) == sizeof(FNISConfigParameters), "mistmatch between engine & NIS SDK side struct");
static_assert(offsetof(NISConfig, kOutputViewportHeight) == offsetof(FNISConfigParameters, kOutputViewportHeight), "mistmatch between engine & NIS SDK side struct");
FMemory::Memcpy(&PassParameters->Config, &Config, sizeof(NISConfig));
PassParameters->coef_scaler = bIsAnyHalfPrecisionPermutation ? GNISCoefficients.ScalerHalfPrecisionRHI : GNISCoefficients.ScalerRHI;
PassParameters->coef_usm = bIsAnyHalfPrecisionPermutation ? GNISCoefficients.UsmHalfPrecisionRHI : GNISCoefficients.UsmRHI;
PassParameters->samplerLinearClamp = TStaticSamplerState<SF_Bilinear, AM_Clamp, AM_Clamp, AM_Clamp>::GetRHI();
PassParameters->in_texture = Inputs.SceneColor.Texture;
PassParameters->out_texture = GraphBuilder.CreateUAV(OutputOrIntermediateTexture);
PassParameters->View = View.ViewUniformBuffer;
FNISUpscaleCS::FPermutationDomain PermutationVector;
PermutationVector.Set<FNISScalerDim>(bIsUpscaling);
PermutationVector.Set<FNISHdrModeDim>(int32(HdrMode));
PermutationVector.Set<FNISHalfPrecisionDim>(HalfPrecisionPermutation);
PermutationVector.Set<FNISBlockWidthDim>(FNISUpscaleCS::GetComputeTileSize(bIsUpscaling, bIsAnyHalfPrecisionPermutation).X);
PermutationVector.Set<FNISBlockHeightDim>(FNISUpscaleCS::GetComputeTileSize(bIsUpscaling, bIsAnyHalfPrecisionPermutation).Y);
PermutationVector.Set<FNISThreadGroupSizeDim>(FNISUpscaleCS::GetThreadGroupSize(bIsUpscaling, bIsAnyHalfPrecisionPermutation));
PermutationVector.Set<FNISViewportSupportDim>(bNeedsViewportSupport);
TShaderMapRef<FNISUpscaleCS> Shader(View.ShaderMap, PermutationVector);
const TCHAR* const StageNames[] = { TEXT("PrimaryToSecondary"), TEXT("PrimaryToOutput"), TEXT("SecondaryToOutput") };
static_assert(UE_ARRAY_COUNT(StageNames) == static_cast<uint32>(EUpscaleStage::MAX), "StageNames does not match EUpscaleStage");
const TCHAR* StageName = StageNames[static_cast<uint32>(Inputs.Stage)];
check(IntermediateDestRect.Size() == OutputDestRect.Size());
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("NIS %s %s %s (%s) (%dx%d -> %dx%d) = [%d,%d - %d,%d] -> [%d,%d - %d,%d]",
bIsUpscaling ? TEXT("Upscaler") : TEXT("Sharpen"),
bNeedIntermediateOutput ? TEXT("WithIntermediate ") : TEXT(""),
bNeedsViewportSupport ? TEXT(" Viewport") : TEXT(""),
StageName,
SrcRect.Width(), SrcRect.Height(),
IntermediateDestRect.Width(), IntermediateDestRect.Height(),
SrcRect.Min.X, SrcRect.Min.Y,
SrcRect.Max.X, SrcRect.Max.Y,
IntermediateDestRect.Min.X, IntermediateDestRect.Min.Y,
IntermediateDestRect.Max.X, IntermediateDestRect.Max.Y
),
Shader,
PassParameters,
FComputeShaderUtils::GetGroupCount(Output.ViewRect.Size(), Shader->GetComputeTileSize(bIsUpscaling, bIsAnyHalfPrecisionPermutation))
);
if (bNeedIntermediateOutput)
{
check(OutputOrIntermediateTexture != Output.Texture);
AddCopyTexturePass(GraphBuilder, OutputOrIntermediateTexture, Output.Texture,
IntermediateDestRect.Min,
OutputDestRect.Min,
IntermediateDestRect.Size());
}
return MoveTemp(Output);
}
#undef LOCTEXT_NAMESPACE
IMPLEMENT_MODULE(FNISShadersModule, NISShaders)

View File

@ -0,0 +1,541 @@
// The MIT License(MIT)
//
// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of
// this software and associated documentation files(the "Software"), to deal in
// the Software without restriction, including without limitation the rights to
// use, copy, modify, merge, publish, distribute, sublicense, and / or sell copies of
// the Software, and to permit persons to whom the Software is furnished to do so,
// subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE AUTHORS OR
// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
// IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
// CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
//---------------------------------------------------------------------------------
// NVIDIA Image Scaling SDK - v1.0.3
//---------------------------------------------------------------------------------
// Configuration
//---------------------------------------------------------------------------------
#pragma once
#include <algorithm>
#include <cmath>
#include <cstdint>
#ifndef NIS_ALIGNED
#if defined(_MSC_VER)
#define NIS_ALIGNED(x) __declspec(align(x))
#else
#if defined(__GNUC__)
#define NIS_ALIGNED(x) __attribute__ ((aligned(x)))
#endif
#endif
#endif
struct NIS_ALIGNED(256) NISConfig
{
float kDetectRatio;
float kDetectThres;
float kMinContrastRatio;
float kRatioNorm;
float kContrastBoost;
float kEps;
float kSharpStartY;
float kSharpScaleY;
float kSharpStrengthMin;
float kSharpStrengthScale;
float kSharpLimitMin;
float kSharpLimitScale;
float kScaleX;
float kScaleY;
float kDstNormX;
float kDstNormY;
float kSrcNormX;
float kSrcNormY;
uint32_t kInputViewportOriginX;
uint32_t kInputViewportOriginY;
uint32_t kInputViewportWidth;
uint32_t kInputViewportHeight;
uint32_t kOutputViewportOriginX;
uint32_t kOutputViewportOriginY;
uint32_t kOutputViewportWidth;
uint32_t kOutputViewportHeight;
float reserved0;
float reserved1;
};
enum class NISHDRMode : uint32_t
{
None = 0,
Linear = 1,
PQ = 2
};
enum class NISGPUArchitecture : uint32_t
{
NVIDIA_Generic = 0,
AMD_Generic = 1,
Intel_Generic = 2,
NVIDIA_Generic_fp16 = 3
};
struct NISOptimizer
{
bool isUpscaling;
NISGPUArchitecture gpuArch;
constexpr NISOptimizer(bool isUpscaling = true, NISGPUArchitecture gpuArch = NISGPUArchitecture::NVIDIA_Generic)
: isUpscaling(isUpscaling)
, gpuArch(gpuArch)
{}
constexpr uint32_t GetOptimalBlockWidth()
{
switch (gpuArch) {
case NISGPUArchitecture::NVIDIA_Generic:
return 32;
case NISGPUArchitecture::NVIDIA_Generic_fp16:
return 32;
case NISGPUArchitecture::AMD_Generic:
return 32;
case NISGPUArchitecture::Intel_Generic:
return 32;
}
return 32;
}
constexpr uint32_t GetOptimalBlockHeight()
{
switch (gpuArch) {
case NISGPUArchitecture::NVIDIA_Generic:
return isUpscaling ? 24 : 32;
case NISGPUArchitecture::NVIDIA_Generic_fp16:
return isUpscaling ? 32 : 32;
case NISGPUArchitecture::AMD_Generic:
return isUpscaling ? 24 : 32;
case NISGPUArchitecture::Intel_Generic:
return isUpscaling ? 24 : 32;
}
return isUpscaling ? 24 : 32;
}
constexpr uint32_t GetOptimalThreadGroupSize()
{
switch (gpuArch) {
case NISGPUArchitecture::NVIDIA_Generic:
return 128;
case NISGPUArchitecture::NVIDIA_Generic_fp16:
return 128;
case NISGPUArchitecture::AMD_Generic:
return 256;
case NISGPUArchitecture::Intel_Generic:
return 256;
}
return 256;
}
};
inline bool NVScalerUpdateConfig(NISConfig& config, float sharpness,
uint32_t inputViewportOriginX, uint32_t inputViewportOriginY,
uint32_t inputViewportWidth, uint32_t inputViewportHeight,
uint32_t inputTextureWidth, uint32_t inputTextureHeight,
uint32_t outputViewportOriginX, uint32_t outputViewportOriginY,
uint32_t outputViewportWidth, uint32_t outputViewportHeight,
uint32_t outputTextureWidth, uint32_t outputTextureHeight,
NISHDRMode hdrMode = NISHDRMode::None)
{
// adjust params based on value from sharpness slider
sharpness = std::max<float>(std::min<float>(1.f, sharpness), 0.f);
float sharpen_slider = sharpness - 0.5f; // Map 0 to 1 to -0.5 to +0.5
// Different range for 0 to 50% vs 50% to 100%
// The idea is to make sure sharpness of 0% map to no-sharpening,
// while also ensuring that sharpness of 100% doesn't cause too much over-sharpening.
const float MaxScale = (sharpen_slider >= 0.0f) ? 1.25f : 1.75f;
const float MinScale = (sharpen_slider >= 0.0f) ? 1.25f : 1.0f;
const float LimitScale = (sharpen_slider >= 0.0f) ? 1.25f : 1.0f;
float kDetectRatio = 2 * 1127.f / 1024.f;
// Params for SDR
float kDetectThres = 64.0f / 1024.0f;
float kMinContrastRatio = 2.0f;
float kMaxContrastRatio = 10.0f;
float kSharpStartY = 0.45f;
float kSharpEndY = 0.9f;
float kSharpStrengthMin = std::max<float>(0.0f, 0.4f + sharpen_slider * MinScale * 1.2f);
float kSharpStrengthMax = 1.6f + sharpen_slider * MaxScale * 1.8f;
float kSharpLimitMin = std::max<float>(0.1f, 0.14f + sharpen_slider * LimitScale * 0.32f);
float kSharpLimitMax = 0.5f + sharpen_slider * LimitScale * 0.6f;
if (hdrMode == NISHDRMode::Linear || hdrMode == NISHDRMode::PQ)
{
kDetectThres = 32.0f / 1024.0f;
kMinContrastRatio = 1.5f;
kMaxContrastRatio = 5.0f;
kSharpStrengthMin = std::max<float>(0.0f, 0.4f + sharpen_slider * MinScale * 1.1f);
kSharpStrengthMax = 2.2f + sharpen_slider * MaxScale * 1.8f;
kSharpLimitMin = std::max<float>(0.06f, 0.10f + sharpen_slider * LimitScale * 0.28f);
kSharpLimitMax = 0.6f + sharpen_slider * LimitScale * 0.6f;
if (hdrMode == NISHDRMode::PQ)
{
kSharpStartY = 0.35f;
kSharpEndY = 0.55f;
}
else
{
kSharpStartY = 0.3f;
kSharpEndY = 0.5f;
}
}
float kRatioNorm = 1.0f / (kMaxContrastRatio - kMinContrastRatio);
float kSharpScaleY = 1.0f / (kSharpEndY - kSharpStartY);
float kSharpStrengthScale = kSharpStrengthMax - kSharpStrengthMin;
float kSharpLimitScale = kSharpLimitMax - kSharpLimitMin;
config.kInputViewportWidth = inputViewportWidth == 0 ? inputTextureWidth : inputViewportWidth;
config.kInputViewportHeight = inputViewportHeight == 0 ? inputTextureHeight : inputViewportHeight;
config.kOutputViewportWidth = outputViewportWidth == 0 ? outputTextureWidth : outputViewportWidth;
config.kOutputViewportHeight = outputViewportHeight == 0 ? outputTextureHeight : outputViewportHeight;
if (config.kInputViewportWidth == 0 || config.kInputViewportHeight == 0 ||
config.kOutputViewportWidth == 0 || config.kOutputViewportHeight == 0)
return false;
config.kInputViewportOriginX = inputViewportOriginX;
config.kInputViewportOriginY = inputViewportOriginY;
config.kOutputViewportOriginX = outputViewportOriginX;
config.kOutputViewportOriginY = outputViewportOriginY;
config.kSrcNormX = 1.f / inputTextureWidth;
config.kSrcNormY = 1.f / inputTextureHeight;
config.kDstNormX = 1.f / outputTextureWidth;
config.kDstNormY = 1.f / outputTextureHeight;
config.kScaleX = config.kInputViewportWidth / float(config.kOutputViewportWidth);
config.kScaleY = config.kInputViewportHeight / float(config.kOutputViewportHeight);
config.kDetectRatio = kDetectRatio;
config.kDetectThres = kDetectThres;
config.kMinContrastRatio = kMinContrastRatio;
config.kRatioNorm = kRatioNorm;
config.kContrastBoost = 1.0f;
config.kEps = 1.0f / 255.0f;
config.kSharpStartY = kSharpStartY;
config.kSharpScaleY = kSharpScaleY;
config.kSharpStrengthMin = kSharpStrengthMin;
config.kSharpStrengthScale = kSharpStrengthScale;
config.kSharpLimitMin = kSharpLimitMin;
config.kSharpLimitScale = kSharpLimitScale;
if (config.kScaleX < 0.5f || config.kScaleX > 1.f || config.kScaleY < 0.5f || config.kScaleY > 1.f)
return false;
return true;
}
inline bool NVSharpenUpdateConfig(NISConfig& config, float sharpness,
uint32_t inputViewportOriginX, uint32_t inputViewportOriginY,
uint32_t inputViewportWidth, uint32_t inputViewportHeight,
uint32_t inputTextureWidth, uint32_t inputTextureHeight,
uint32_t outputViewportOriginX, uint32_t outputViewportOriginY,
NISHDRMode hdrMode = NISHDRMode::None)
{
return NVScalerUpdateConfig(config, sharpness,
inputViewportOriginX, inputViewportOriginY, inputViewportWidth, inputViewportHeight, inputTextureWidth, inputTextureHeight,
outputViewportOriginX, outputViewportOriginY, inputViewportWidth, inputViewportHeight, inputTextureWidth, inputTextureHeight,
hdrMode);
}
namespace {
constexpr size_t kPhaseCount = 64;
constexpr size_t kFilterSize = 8;
constexpr float coef_scale[kPhaseCount][kFilterSize] = {
{0.0f, 0.0f, 1.0000f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
{0.0029f, -0.0127f, 1.0000f, 0.0132f, -0.0034f, 0.0f, 0.0f, 0.0f},
{0.0063f, -0.0249f, 0.9985f, 0.0269f, -0.0068f, 0.0f, 0.0f, 0.0f},
{0.0088f, -0.0361f, 0.9956f, 0.0415f, -0.0103f, 0.0005f, 0.0f, 0.0f},
{0.0117f, -0.0474f, 0.9932f, 0.0562f, -0.0142f, 0.0005f, 0.0f, 0.0f},
{0.0142f, -0.0576f, 0.9897f, 0.0713f, -0.0181f, 0.0005f, 0.0f, 0.0f},
{0.0166f, -0.0674f, 0.9844f, 0.0874f, -0.0220f, 0.0010f, 0.0f, 0.0f},
{0.0186f, -0.0762f, 0.9785f, 0.1040f, -0.0264f, 0.0015f, 0.0f, 0.0f},
{0.0205f, -0.0850f, 0.9727f, 0.1206f, -0.0308f, 0.0020f, 0.0f, 0.0f},
{0.0225f, -0.0928f, 0.9648f, 0.1382f, -0.0352f, 0.0024f, 0.0f, 0.0f},
{0.0239f, -0.1006f, 0.9575f, 0.1558f, -0.0396f, 0.0029f, 0.0f, 0.0f},
{0.0254f, -0.1074f, 0.9487f, 0.1738f, -0.0439f, 0.0034f, 0.0f, 0.0f},
{0.0264f, -0.1138f, 0.9390f, 0.1929f, -0.0488f, 0.0044f, 0.0f, 0.0f},
{0.0278f, -0.1191f, 0.9282f, 0.2119f, -0.0537f, 0.0049f, 0.0f, 0.0f},
{0.0288f, -0.1245f, 0.9170f, 0.2310f, -0.0581f, 0.0059f, 0.0f, 0.0f},
{0.0293f, -0.1294f, 0.9058f, 0.2510f, -0.0630f, 0.0063f, 0.0f, 0.0f},
{0.0303f, -0.1333f, 0.8926f, 0.2710f, -0.0679f, 0.0073f, 0.0f, 0.0f},
{0.0308f, -0.1367f, 0.8789f, 0.2915f, -0.0728f, 0.0083f, 0.0f, 0.0f},
{0.0308f, -0.1401f, 0.8657f, 0.3120f, -0.0776f, 0.0093f, 0.0f, 0.0f},
{0.0313f, -0.1426f, 0.8506f, 0.3330f, -0.0825f, 0.0103f, 0.0f, 0.0f},
{0.0313f, -0.1445f, 0.8354f, 0.3540f, -0.0874f, 0.0112f, 0.0f, 0.0f},
{0.0313f, -0.1460f, 0.8193f, 0.3755f, -0.0923f, 0.0122f, 0.0f, 0.0f},
{0.0313f, -0.1470f, 0.8022f, 0.3965f, -0.0967f, 0.0137f, 0.0f, 0.0f},
{0.0308f, -0.1479f, 0.7856f, 0.4185f, -0.1016f, 0.0146f, 0.0f, 0.0f},
{0.0303f, -0.1479f, 0.7681f, 0.4399f, -0.1060f, 0.0156f, 0.0f, 0.0f},
{0.0298f, -0.1479f, 0.7505f, 0.4614f, -0.1104f, 0.0166f, 0.0f, 0.0f},
{0.0293f, -0.1470f, 0.7314f, 0.4829f, -0.1147f, 0.0181f, 0.0f, 0.0f},
{0.0288f, -0.1460f, 0.7119f, 0.5049f, -0.1187f, 0.0190f, 0.0f, 0.0f},
{0.0278f, -0.1445f, 0.6929f, 0.5264f, -0.1226f, 0.0200f, 0.0f, 0.0f},
{0.0273f, -0.1431f, 0.6724f, 0.5479f, -0.1260f, 0.0215f, 0.0f, 0.0f},
{0.0264f, -0.1411f, 0.6528f, 0.5693f, -0.1299f, 0.0225f, 0.0f, 0.0f},
{0.0254f, -0.1387f, 0.6323f, 0.5903f, -0.1328f, 0.0234f, 0.0f, 0.0f},
{0.0244f, -0.1357f, 0.6113f, 0.6113f, -0.1357f, 0.0244f, 0.0f, 0.0f},
{0.0234f, -0.1328f, 0.5903f, 0.6323f, -0.1387f, 0.0254f, 0.0f, 0.0f},
{0.0225f, -0.1299f, 0.5693f, 0.6528f, -0.1411f, 0.0264f, 0.0f, 0.0f},
{0.0215f, -0.1260f, 0.5479f, 0.6724f, -0.1431f, 0.0273f, 0.0f, 0.0f},
{0.0200f, -0.1226f, 0.5264f, 0.6929f, -0.1445f, 0.0278f, 0.0f, 0.0f},
{0.0190f, -0.1187f, 0.5049f, 0.7119f, -0.1460f, 0.0288f, 0.0f, 0.0f},
{0.0181f, -0.1147f, 0.4829f, 0.7314f, -0.1470f, 0.0293f, 0.0f, 0.0f},
{0.0166f, -0.1104f, 0.4614f, 0.7505f, -0.1479f, 0.0298f, 0.0f, 0.0f},
{0.0156f, -0.1060f, 0.4399f, 0.7681f, -0.1479f, 0.0303f, 0.0f, 0.0f},
{0.0146f, -0.1016f, 0.4185f, 0.7856f, -0.1479f, 0.0308f, 0.0f, 0.0f},
{0.0137f, -0.0967f, 0.3965f, 0.8022f, -0.1470f, 0.0313f, 0.0f, 0.0f},
{0.0122f, -0.0923f, 0.3755f, 0.8193f, -0.1460f, 0.0313f, 0.0f, 0.0f},
{0.0112f, -0.0874f, 0.3540f, 0.8354f, -0.1445f, 0.0313f, 0.0f, 0.0f},
{0.0103f, -0.0825f, 0.3330f, 0.8506f, -0.1426f, 0.0313f, 0.0f, 0.0f},
{0.0093f, -0.0776f, 0.3120f, 0.8657f, -0.1401f, 0.0308f, 0.0f, 0.0f},
{0.0083f, -0.0728f, 0.2915f, 0.8789f, -0.1367f, 0.0308f, 0.0f, 0.0f},
{0.0073f, -0.0679f, 0.2710f, 0.8926f, -0.1333f, 0.0303f, 0.0f, 0.0f},
{0.0063f, -0.0630f, 0.2510f, 0.9058f, -0.1294f, 0.0293f, 0.0f, 0.0f},
{0.0059f, -0.0581f, 0.2310f, 0.9170f, -0.1245f, 0.0288f, 0.0f, 0.0f},
{0.0049f, -0.0537f, 0.2119f, 0.9282f, -0.1191f, 0.0278f, 0.0f, 0.0f},
{0.0044f, -0.0488f, 0.1929f, 0.9390f, -0.1138f, 0.0264f, 0.0f, 0.0f},
{0.0034f, -0.0439f, 0.1738f, 0.9487f, -0.1074f, 0.0254f, 0.0f, 0.0f},
{0.0029f, -0.0396f, 0.1558f, 0.9575f, -0.1006f, 0.0239f, 0.0f, 0.0f},
{0.0024f, -0.0352f, 0.1382f, 0.9648f, -0.0928f, 0.0225f, 0.0f, 0.0f},
{0.0020f, -0.0308f, 0.1206f, 0.9727f, -0.0850f, 0.0205f, 0.0f, 0.0f},
{0.0015f, -0.0264f, 0.1040f, 0.9785f, -0.0762f, 0.0186f, 0.0f, 0.0f},
{0.0010f, -0.0220f, 0.0874f, 0.9844f, -0.0674f, 0.0166f, 0.0f, 0.0f},
{0.0005f, -0.0181f, 0.0713f, 0.9897f, -0.0576f, 0.0142f, 0.0f, 0.0f},
{0.0005f, -0.0142f, 0.0562f, 0.9932f, -0.0474f, 0.0117f, 0.0f, 0.0f},
{0.0005f, -0.0103f, 0.0415f, 0.9956f, -0.0361f, 0.0088f, 0.0f, 0.0f},
{0.0f, -0.0068f, 0.0269f, 0.9985f, -0.0249f, 0.0063f, 0.0f, 0.0f},
{0.0f, -0.0034f, 0.0132f, 1.0000f, -0.0127f, 0.0029f, 0.0f, 0.0f}
};
constexpr float coef_usm[kPhaseCount][kFilterSize] = {
{0.0f, -0.6001f, 1.2002f, -0.6001f, 0.0f, 0.0f, 0.0f, 0.0f},
{0.0029f, -0.6084f, 1.1987f, -0.5903f, -0.0029f, 0.0f, 0.0f, 0.0f},
{0.0049f, -0.6147f, 1.1958f, -0.5791f, -0.0068f, 0.0005f, 0.0f, 0.0f},
{0.0073f, -0.6196f, 1.1890f, -0.5659f, -0.0103f, 0.0f, 0.0f, 0.0f},
{0.0093f, -0.6235f, 1.1802f, -0.5513f, -0.0151f, 0.0f, 0.0f, 0.0f},
{0.0112f, -0.6265f, 1.1699f, -0.5352f, -0.0195f, 0.0005f, 0.0f, 0.0f},
{0.0122f, -0.6270f, 1.1582f, -0.5181f, -0.0259f, 0.0005f, 0.0f, 0.0f},
{0.0142f, -0.6284f, 1.1455f, -0.5005f, -0.0317f, 0.0005f, 0.0f, 0.0f},
{0.0156f, -0.6265f, 1.1274f, -0.4790f, -0.0386f, 0.0005f, 0.0f, 0.0f},
{0.0166f, -0.6235f, 1.1089f, -0.4570f, -0.0454f, 0.0010f, 0.0f, 0.0f},
{0.0176f, -0.6187f, 1.0879f, -0.4346f, -0.0532f, 0.0010f, 0.0f, 0.0f},
{0.0181f, -0.6138f, 1.0659f, -0.4102f, -0.0615f, 0.0015f, 0.0f, 0.0f},
{0.0190f, -0.6069f, 1.0405f, -0.3843f, -0.0698f, 0.0015f, 0.0f, 0.0f},
{0.0195f, -0.6006f, 1.0161f, -0.3574f, -0.0796f, 0.0020f, 0.0f, 0.0f},
{0.0200f, -0.5928f, 0.9893f, -0.3286f, -0.0898f, 0.0024f, 0.0f, 0.0f},
{0.0200f, -0.5820f, 0.9580f, -0.2988f, -0.1001f, 0.0029f, 0.0f, 0.0f},
{0.0200f, -0.5728f, 0.9292f, -0.2690f, -0.1104f, 0.0034f, 0.0f, 0.0f},
{0.0200f, -0.5620f, 0.8975f, -0.2368f, -0.1226f, 0.0039f, 0.0f, 0.0f},
{0.0205f, -0.5498f, 0.8643f, -0.2046f, -0.1343f, 0.0044f, 0.0f, 0.0f},
{0.0200f, -0.5371f, 0.8301f, -0.1709f, -0.1465f, 0.0049f, 0.0f, 0.0f},
{0.0195f, -0.5239f, 0.7944f, -0.1367f, -0.1587f, 0.0054f, 0.0f, 0.0f},
{0.0195f, -0.5107f, 0.7598f, -0.1021f, -0.1724f, 0.0059f, 0.0f, 0.0f},
{0.0190f, -0.4966f, 0.7231f, -0.0649f, -0.1865f, 0.0063f, 0.0f, 0.0f},
{0.0186f, -0.4819f, 0.6846f, -0.0288f, -0.1997f, 0.0068f, 0.0f, 0.0f},
{0.0186f, -0.4668f, 0.6460f, 0.0093f, -0.2144f, 0.0073f, 0.0f, 0.0f},
{0.0176f, -0.4507f, 0.6055f, 0.0479f, -0.2290f, 0.0083f, 0.0f, 0.0f},
{0.0171f, -0.4370f, 0.5693f, 0.0859f, -0.2446f, 0.0088f, 0.0f, 0.0f},
{0.0161f, -0.4199f, 0.5283f, 0.1255f, -0.2598f, 0.0098f, 0.0f, 0.0f},
{0.0161f, -0.4048f, 0.4883f, 0.1655f, -0.2754f, 0.0103f, 0.0f, 0.0f},
{0.0151f, -0.3887f, 0.4497f, 0.2041f, -0.2910f, 0.0107f, 0.0f, 0.0f},
{0.0142f, -0.3711f, 0.4072f, 0.2446f, -0.3066f, 0.0117f, 0.0f, 0.0f},
{0.0137f, -0.3555f, 0.3672f, 0.2852f, -0.3228f, 0.0122f, 0.0f, 0.0f},
{0.0132f, -0.3394f, 0.3262f, 0.3262f, -0.3394f, 0.0132f, 0.0f, 0.0f},
{0.0122f, -0.3228f, 0.2852f, 0.3672f, -0.3555f, 0.0137f, 0.0f, 0.0f},
{0.0117f, -0.3066f, 0.2446f, 0.4072f, -0.3711f, 0.0142f, 0.0f, 0.0f},
{0.0107f, -0.2910f, 0.2041f, 0.4497f, -0.3887f, 0.0151f, 0.0f, 0.0f},
{0.0103f, -0.2754f, 0.1655f, 0.4883f, -0.4048f, 0.0161f, 0.0f, 0.0f},
{0.0098f, -0.2598f, 0.1255f, 0.5283f, -0.4199f, 0.0161f, 0.0f, 0.0f},
{0.0088f, -0.2446f, 0.0859f, 0.5693f, -0.4370f, 0.0171f, 0.0f, 0.0f},
{0.0083f, -0.2290f, 0.0479f, 0.6055f, -0.4507f, 0.0176f, 0.0f, 0.0f},
{0.0073f, -0.2144f, 0.0093f, 0.6460f, -0.4668f, 0.0186f, 0.0f, 0.0f},
{0.0068f, -0.1997f, -0.0288f, 0.6846f, -0.4819f, 0.0186f, 0.0f, 0.0f},
{0.0063f, -0.1865f, -0.0649f, 0.7231f, -0.4966f, 0.0190f, 0.0f, 0.0f},
{0.0059f, -0.1724f, -0.1021f, 0.7598f, -0.5107f, 0.0195f, 0.0f, 0.0f},
{0.0054f, -0.1587f, -0.1367f, 0.7944f, -0.5239f, 0.0195f, 0.0f, 0.0f},
{0.0049f, -0.1465f, -0.1709f, 0.8301f, -0.5371f, 0.0200f, 0.0f, 0.0f},
{0.0044f, -0.1343f, -0.2046f, 0.8643f, -0.5498f, 0.0205f, 0.0f, 0.0f},
{0.0039f, -0.1226f, -0.2368f, 0.8975f, -0.5620f, 0.0200f, 0.0f, 0.0f},
{0.0034f, -0.1104f, -0.2690f, 0.9292f, -0.5728f, 0.0200f, 0.0f, 0.0f},
{0.0029f, -0.1001f, -0.2988f, 0.9580f, -0.5820f, 0.0200f, 0.0f, 0.0f},
{0.0024f, -0.0898f, -0.3286f, 0.9893f, -0.5928f, 0.0200f, 0.0f, 0.0f},
{0.0020f, -0.0796f, -0.3574f, 1.0161f, -0.6006f, 0.0195f, 0.0f, 0.0f},
{0.0015f, -0.0698f, -0.3843f, 1.0405f, -0.6069f, 0.0190f, 0.0f, 0.0f},
{0.0015f, -0.0615f, -0.4102f, 1.0659f, -0.6138f, 0.0181f, 0.0f, 0.0f},
{0.0010f, -0.0532f, -0.4346f, 1.0879f, -0.6187f, 0.0176f, 0.0f, 0.0f},
{0.0010f, -0.0454f, -0.4570f, 1.1089f, -0.6235f, 0.0166f, 0.0f, 0.0f},
{0.0005f, -0.0386f, -0.4790f, 1.1274f, -0.6265f, 0.0156f, 0.0f, 0.0f},
{0.0005f, -0.0317f, -0.5005f, 1.1455f, -0.6284f, 0.0142f, 0.0f, 0.0f},
{0.0005f, -0.0259f, -0.5181f, 1.1582f, -0.6270f, 0.0122f, 0.0f, 0.0f},
{0.0005f, -0.0195f, -0.5352f, 1.1699f, -0.6265f, 0.0112f, 0.0f, 0.0f},
{0.0f, -0.0151f, -0.5513f, 1.1802f, -0.6235f, 0.0093f, 0.0f, 0.0f},
{0.0f, -0.0103f, -0.5659f, 1.1890f, -0.6196f, 0.0073f, 0.0f, 0.0f},
{0.0005f, -0.0068f, -0.5791f, 1.1958f, -0.6147f, 0.0049f, 0.0f, 0.0f},
{0.0f, -0.0029f, -0.5903f, 1.1987f, -0.6084f, 0.0029f, 0.0f, 0.0f}
};
constexpr uint16_t coef_scale_fp16[kPhaseCount][kFilterSize] = {
{ 0, 0, 15360, 0, 0, 0, 0, 0 },
{ 6640, 41601, 15360, 8898, 39671, 0, 0, 0 },
{ 7796, 42592, 15357, 9955, 40695, 0, 0, 0 },
{ 8321, 43167, 15351, 10576, 41286, 4121, 0, 0 },
{ 8702, 43537, 15346, 11058, 41797, 4121, 0, 0 },
{ 9029, 43871, 15339, 11408, 42146, 4121, 0, 0 },
{ 9280, 44112, 15328, 11672, 42402, 5145, 0, 0 },
{ 9411, 44256, 15316, 11944, 42690, 5669, 0, 0 },
{ 9535, 44401, 15304, 12216, 42979, 6169, 0, 0 },
{ 9667, 44528, 15288, 12396, 43137, 6378, 0, 0 },
{ 9758, 44656, 15273, 12540, 43282, 6640, 0, 0 },
{ 9857, 44768, 15255, 12688, 43423, 6903, 0, 0 },
{ 9922, 44872, 15235, 12844, 43583, 7297, 0, 0 },
{ 10014, 44959, 15213, 13000, 43744, 7429, 0, 0 },
{ 10079, 45048, 15190, 13156, 43888, 7691, 0, 0 },
{ 10112, 45092, 15167, 13316, 44040, 7796, 0, 0 },
{ 10178, 45124, 15140, 13398, 44120, 8058, 0, 0 },
{ 10211, 45152, 15112, 13482, 44201, 8256, 0, 0 },
{ 10211, 45180, 15085, 13566, 44279, 8387, 0, 0 },
{ 10242, 45200, 15054, 13652, 44360, 8518, 0, 0 },
{ 10242, 45216, 15023, 13738, 44440, 8636, 0, 0 },
{ 10242, 45228, 14990, 13826, 44520, 8767, 0, 0 },
{ 10242, 45236, 14955, 13912, 44592, 8964, 0, 0 },
{ 10211, 45244, 14921, 14002, 44673, 9082, 0, 0 },
{ 10178, 45244, 14885, 14090, 44745, 9213, 0, 0 },
{ 10145, 45244, 14849, 14178, 44817, 9280, 0, 0 },
{ 10112, 45236, 14810, 14266, 44887, 9378, 0, 0 },
{ 10079, 45228, 14770, 14346, 44953, 9437, 0, 0 },
{ 10014, 45216, 14731, 14390, 45017, 9503, 0, 0 },
{ 9981, 45204, 14689, 14434, 45064, 9601, 0, 0 },
{ 9922, 45188, 14649, 14478, 45096, 9667, 0, 0 },
{ 9857, 45168, 14607, 14521, 45120, 9726, 0, 0 },
{ 9791, 45144, 14564, 14564, 45144, 9791, 0, 0 },
{ 9726, 45120, 14521, 14607, 45168, 9857, 0, 0 },
{ 9667, 45096, 14478, 14649, 45188, 9922, 0, 0 },
{ 9601, 45064, 14434, 14689, 45204, 9981, 0, 0 },
{ 9503, 45017, 14390, 14731, 45216, 10014, 0, 0 },
{ 9437, 44953, 14346, 14770, 45228, 10079, 0, 0 },
{ 9378, 44887, 14266, 14810, 45236, 10112, 0, 0 },
{ 9280, 44817, 14178, 14849, 45244, 10145, 0, 0 },
{ 9213, 44745, 14090, 14885, 45244, 10178, 0, 0 },
{ 9082, 44673, 14002, 14921, 45244, 10211, 0, 0 },
{ 8964, 44592, 13912, 14955, 45236, 10242, 0, 0 },
{ 8767, 44520, 13826, 14990, 45228, 10242, 0, 0 },
{ 8636, 44440, 13738, 15023, 45216, 10242, 0, 0 },
{ 8518, 44360, 13652, 15054, 45200, 10242, 0, 0 },
{ 8387, 44279, 13566, 15085, 45180, 10211, 0, 0 },
{ 8256, 44201, 13482, 15112, 45152, 10211, 0, 0 },
{ 8058, 44120, 13398, 15140, 45124, 10178, 0, 0 },
{ 7796, 44040, 13316, 15167, 45092, 10112, 0, 0 },
{ 7691, 43888, 13156, 15190, 45048, 10079, 0, 0 },
{ 7429, 43744, 13000, 15213, 44959, 10014, 0, 0 },
{ 7297, 43583, 12844, 15235, 44872, 9922, 0, 0 },
{ 6903, 43423, 12688, 15255, 44768, 9857, 0, 0 },
{ 6640, 43282, 12540, 15273, 44656, 9758, 0, 0 },
{ 6378, 43137, 12396, 15288, 44528, 9667, 0, 0 },
{ 6169, 42979, 12216, 15304, 44401, 9535, 0, 0 },
{ 5669, 42690, 11944, 15316, 44256, 9411, 0, 0 },
{ 5145, 42402, 11672, 15328, 44112, 9280, 0, 0 },
{ 4121, 42146, 11408, 15339, 43871, 9029, 0, 0 },
{ 4121, 41797, 11058, 15346, 43537, 8702, 0, 0 },
{ 4121, 41286, 10576, 15351, 43167, 8321, 0, 0 },
{ 0, 40695, 9955, 15357, 42592, 7796, 0, 0 },
{ 0, 39671, 8898, 15360, 41601, 6640, 0, 0 },
};
constexpr uint16_t coef_usm_fp16[kPhaseCount][kFilterSize] = {
{ 0, 47309, 15565, 47309, 0, 0, 0, 0 },
{ 6640, 47326, 15563, 47289, 39408, 0, 0, 0 },
{ 7429, 47339, 15560, 47266, 40695, 4121, 0, 0 },
{ 8058, 47349, 15554, 47239, 41286, 0, 0, 0 },
{ 8387, 47357, 15545, 47209, 41915, 0, 0, 0 },
{ 8636, 47363, 15534, 47176, 42238, 4121, 0, 0 },
{ 8767, 47364, 15522, 47141, 42657, 4121, 0, 0 },
{ 9029, 47367, 15509, 47105, 43023, 4121, 0, 0 },
{ 9213, 47363, 15490, 47018, 43249, 4121, 0, 0 },
{ 9280, 47357, 15472, 46928, 43472, 5145, 0, 0 },
{ 9345, 47347, 15450, 46836, 43727, 5145, 0, 0 },
{ 9378, 47337, 15427, 46736, 43999, 5669, 0, 0 },
{ 9437, 47323, 15401, 46630, 44152, 5669, 0, 0 },
{ 9470, 47310, 15376, 46520, 44312, 6169, 0, 0 },
{ 9503, 47294, 15338, 46402, 44479, 6378, 0, 0 },
{ 9503, 47272, 15274, 46280, 44648, 6640, 0, 0 },
{ 9503, 47253, 15215, 46158, 44817, 6903, 0, 0 },
{ 9503, 47231, 15150, 45972, 45017, 7165, 0, 0 },
{ 9535, 47206, 15082, 45708, 45132, 7297, 0, 0 },
{ 9503, 47180, 15012, 45432, 45232, 7429, 0, 0 },
{ 9470, 47153, 14939, 45152, 45332, 7560, 0, 0 },
{ 9470, 47126, 14868, 44681, 45444, 7691, 0, 0 },
{ 9437, 47090, 14793, 44071, 45560, 7796, 0, 0 },
{ 9411, 47030, 14714, 42847, 45668, 7927, 0, 0 },
{ 9411, 46968, 14635, 8387, 45788, 8058, 0, 0 },
{ 9345, 46902, 14552, 10786, 45908, 8256, 0, 0 },
{ 9313, 46846, 14478, 11647, 46036, 8321, 0, 0 },
{ 9247, 46776, 14394, 12292, 46120, 8453, 0, 0 },
{ 9247, 46714, 14288, 12620, 46184, 8518, 0, 0 },
{ 9147, 46648, 14130, 12936, 46248, 8570, 0, 0 },
{ 9029, 46576, 13956, 13268, 46312, 8702, 0, 0 },
{ 8964, 46512, 13792, 13456, 46378, 8767, 0, 0 },
{ 8898, 46446, 13624, 13624, 46446, 8898, 0, 0 },
{ 8767, 46378, 13456, 13792, 46512, 8964, 0, 0 },
{ 8702, 46312, 13268, 13956, 46576, 9029, 0, 0 },
{ 8570, 46248, 12936, 14130, 46648, 9147, 0, 0 },
{ 8518, 46184, 12620, 14288, 46714, 9247, 0, 0 },
{ 8453, 46120, 12292, 14394, 46776, 9247, 0, 0 },
{ 8321, 46036, 11647, 14478, 46846, 9313, 0, 0 },
{ 8256, 45908, 10786, 14552, 46902, 9345, 0, 0 },
{ 8058, 45788, 8387, 14635, 46968, 9411, 0, 0 },
{ 7927, 45668, 42847, 14714, 47030, 9411, 0, 0 },
{ 7796, 45560, 44071, 14793, 47090, 9437, 0, 0 },
{ 7691, 45444, 44681, 14868, 47126, 9470, 0, 0 },
{ 7560, 45332, 45152, 14939, 47153, 9470, 0, 0 },
{ 7429, 45232, 45432, 15012, 47180, 9503, 0, 0 },
{ 7297, 45132, 45708, 15082, 47206, 9535, 0, 0 },
{ 7165, 45017, 45972, 15150, 47231, 9503, 0, 0 },
{ 6903, 44817, 46158, 15215, 47253, 9503, 0, 0 },
{ 6640, 44648, 46280, 15274, 47272, 9503, 0, 0 },
{ 6378, 44479, 46402, 15338, 47294, 9503, 0, 0 },
{ 6169, 44312, 46520, 15376, 47310, 9470, 0, 0 },
{ 5669, 44152, 46630, 15401, 47323, 9437, 0, 0 },
{ 5669, 43999, 46736, 15427, 47337, 9378, 0, 0 },
{ 5145, 43727, 46836, 15450, 47347, 9345, 0, 0 },
{ 5145, 43472, 46928, 15472, 47357, 9280, 0, 0 },
{ 4121, 43249, 47018, 15490, 47363, 9213, 0, 0 },
{ 4121, 43023, 47105, 15509, 47367, 9029, 0, 0 },
{ 4121, 42657, 47141, 15522, 47364, 8767, 0, 0 },
{ 4121, 42238, 47176, 15534, 47363, 8636, 0, 0 },
{ 0, 41915, 47209, 15545, 47357, 8387, 0, 0 },
{ 0, 41286, 47239, 15554, 47349, 8058, 0, 0 },
{ 4121, 40695, 47266, 15560, 47339, 7429, 0, 0 },
{ 0, 39408, 47289, 15563, 47326, 6640, 0, 0 },
};
}

View File

@ -0,0 +1,39 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#pragma once
#include "Modules/ModuleManager.h"
#include "PostProcess/PostProcessUpscale.h"
class FNISViewExtension;
DECLARE_LOG_CATEGORY_EXTERN(LogNIS, Log, All);
class FNISShadersModule final: public IModuleInterface
{
public:
/** IModuleInterface implementation */
virtual void StartupModule();
virtual void ShutdownModule();
private:
};
NISSHADERS_API ERHIFeatureLevel::Type GetNISMinRequiredFeatureLevel();
NISSHADERS_API FScreenPassTexture AddSharpenOrUpscalePass(
FRDGBuilder& GraphBuilder,
const FViewInfo& View,
const ISpatialUpscaler::FInputs& Inputs
);

View File

@ -0,0 +1,34 @@
{
"FileVersion": 3,
"Version": 115,
"VersionName": "4.0.0-SL2.7.3",
"FriendlyName": "NVIDIA Streamline (deprecated)",
"Description": "For compatibility with projects using previous versions of the Streamline plugins",
"Category": "Rendering",
"CreatedBy": "NVIDIA",
"CreatedByURL": "https://developer.nvidia.com/rtx/streamline",
"DocsURL": "",
"MarketplaceURL": "https://www.unrealengine.com/marketplace/en-US/product/nvidia-dlss",
"SupportURL": "mailto:DLSS-Support@nvidia.com",
"EngineVersion": "5.5.0",
"CanContainContent": false,
"Installed": true,
"Plugins": [
{
"Name": "StreamlineCore",
"Enabled": true
},
{
"Name": "StreamlineDLSSG",
"Enabled": true
},
{
"Name": "StreamlineDeepDVC",
"Enabled": true
},
{
"Name": "StreamlineReflex",
"Enabled": true
}
]
}

View File

@ -0,0 +1,3 @@
# NVIDIA Streamline 1.3.3 Legacy Compatibility plugin
This NVIDIA Streamline 1.3.3 Legacy Compatibility plugin doesn't contain any code and only enables a few plugins to allow projects developed against the 1.x.x family of plugins to continue to work. Please refer to the documentation there about upgrading existing projects.

View File

@ -0,0 +1,8 @@
[CoreRedirects]
+ClassRedirects=(OldName="/Script/StreamlineBlueprint.StreamlineLibraryDLSSG",NewName="/Script/StreamlineDLSSGBlueprint.StreamlineLibraryDLSSG")
+ClassRedirects=(OldName="/Script/StreamlineBlueprint.StreamlineLibraryReflex",NewName="/Script/StreamlineReflexBlueprint.StreamlineLibraryReflex")
+EnumRedirects=(OldName="UStreamlineFeature",NewName="EStreamlineFeature")
+EnumRedirects=(OldName="UStreamlineFeatureSupport",NewName="EStreamlineFeatureSupport")
+EnumRedirects=(OldName="UStreamlineFeatureRequirementsFlags",NewName="EStreamlineFeatureRequirementsFlags")

View File

@ -0,0 +1,14 @@
[FilterPlugin]
; This section lists additional files which will be packaged along with your plugin. Paths should be listed relative to the root plugin directory, and
; may include "...", "*", and "?" wildcards to match directories, files, and individual characters respectively.
;
; Examples:
; /README.txt
; /Extras/...
; /Binaries/ThirdParty/*.dll
/Config/...
/Docs/*.md
/Docs/media/*.png

View File

@ -0,0 +1,118 @@
# Improvements and API changes in SL 1.5 and SL 2.0
> **NOTE**:
> This document contains the high level breakdown of changes introduced in SL 1.5/2.0 and how they compare to previous SL versions. For more details please read the full [programming guide](./ProgrammingGuide.md)
## SL 1.5
### RESOURCE STATE TRACKING
* SL no longer tracks resource states, host is responsible for providing correct states when tagging resources.
### COMMAND LIST STATE TRACKING
* SL by default disables any command list (CL) tracking, host is responsible for restoring CL state after each `slEvaluateFeature` call.
### DISABLING OF THE INTERPOSER
* SL disables interposer (DXGI/D3D proxies) unless they are **explicitly requested by at least one supported plugin**
### VERIFYING SL DLLs
* SL now includes `sl_security.h` header as part of the main SDK (no longer needed to download the SL sample SDK)
### OS VERSION DETECTION
* Switched to obtaining the **correct** version from the `kernel32.dll` product description instead of using various MS APIs which return different/incorrect versions
* Flag `PreferenceFlags::eBypassOSVersionCheck` in `sl::Preferences` can be used to opt-out from the OS detection (off by default and **highly discouraged to use**)
### OTA OPT-IN
* Expanded `sl::Preferences` to include `PreferenceFlags::eAllowOTA` to automatically opt-in in the OTA program (SL and NGX). This flag is set by default.
### DLSS PRESETS
* Host can change DL networks (presets) for the DLSS by modifying `sl::DLSSOptions`.
## SL 2.0
Streamline 2.0 **includes all the above mentioned v1.5 changes with the following additions**:
### FEATURE ID
* No longer provided as enum but rather as constant expression unsigned integers.
* Core feature IDs declared and defined in `sl.h` while specific feature IDs are now located in their respective header files.
### ENUM NAMING
* All enums have been converted to `enum class Name::eValue` format.
### ERROR REPORTING
* All SL functions now return `sl::Result` (new header `sl_result.h`)
* Still required to monitor error logging callbacks to catch every single possible error at the right time but this improves handling of the most common errors.
* Introduced helper macro `SL_FAILED`.
* Helper method added to convert `sl::Result` to a string.
### VERSIONING AND OTA
* New API `slGetFeatureVersion` returns both SL and NGX (if any) versions.
### REQUIREMENTS REPORTING
* Removed `slGetFeatureConfiguration` which was returning JSON since it is not always convenient for eve
* New API `slGetFeatureRequirements` is added to provide info about OS, driver, HWS, rendering API, VK extensions and other requirements
* Added `getVkPhysicalDeviceVulkan12Features` and `getVkPhysicalDeviceVulkan13Features` helper functions in the new `sl_helpers_vk.h` header.
### CONSTANTS VS SETTINGS
* Generic `slSetFeatureConsts` and `slGetFeatureSettings` API have been removed and new API `slGetFeatureFunction` has been added.
* Each SL feature exports set of functions which are used to perform feature specific task.
* New helper macro `SL_FEATURE_FUN_IMPORT` and helper functions in the related `sl_$feature.h` headers.
* Helper functions added to each per-feature header to make importing easy.
### IS FEATURE SUPPORTED AND ADAPTER BIT-MASK
* `slIsFeatureSupported` is modified to use adapter LUID which is easily obtained from DXGIAdapterDesc or VK physical device.
* Engines are already enumerating adapters so this should fit in nicely with the existing code.
* When using VK host can provide `VkPhysicalDevice` instead of LUID if needed.
### ACTUAL FPS
* Removed `actualFrameTimeMs` and `timeBetweenPresentsMs` and replaced with an integer value `numFramesActuallyPresented`.
### IMPROVED VIEWPORT SUPPORT FOR DLSS-G
* Host can specify any viewport id when calling `slDLSSGSetOptions` rather than forcing viewport 0 all the time.
### "NOT RENDERING GAME FRAMES" FLAG
* Removed completely since it has become redundant.
* Host is now required to turn DLSS-G on/off using `slDLSSGSetOptions`
### BUFFER COPIES AND TAGGING
* `slSetTag` API has been expanded to include command list and resource life-cycle information.
* If needed resources are automatically copied internally by SL.
* `slEvaluateFeature` can be used to tag resources locally (tags only valid for the specific evaluate call, see programming guide for details)
### FRAME ID
* New API `slGetNewFrameToken` is added to allow host to obtain frame handle for the next frame.
* Same handle is then passed around to all SL calls.
* If host wants to fully control frame counting the frame index can be provided as input
### MULTIPLE DEVICES
* New API `slSetD3DDevice` and `slSetVulkanInfo` can be used to specify which device SL should be using.
### OBTAINING NATIVE INTERFACES
* When using 3rd party libraries (including NVAPI) it is not advisable to pass SL proxies as inputs.
* New API `slGetNativeInterface` added to allow access to native interfaces as needed.
### MANUAL HOOKING
* Removed `slGetHook*` API
* New API `slGetNativeInterface` in combination with `slUpgradeIntercace` is now used to significantly simplify manual hooking.

View File

@ -0,0 +1,164 @@
# Debugging with JSON Configs
> **NOTE:**
> This document applies to non-production, development builds only. JSON configuration is disabled in production builds.
> Additionally, you will need to turn off any checks for signed libraries when loading Streamline libraries in order to be able to load the non-production libraries.
## JSON Config File(s)
### Location of the JSON
Note that the `sl.interposer.json` file is loaded by finding the first copy in the following ordered list of paths:
1. The directory containing the application's executable.
2. The application's current working directory at the point at which the app calls `slInit`.
### "Commenting-out" Lines
Note that the example configuration JSON files (located in `./scripts/`) include some tags that are disabled, but visible as a form of "comment"; this is done by prefixing the correct/expected tag name with underscore (_):
Functional:
```json
{
"enableInterposer": false,
}
```
Non-functional "comment":
```json
{
"_enableInterposer": false,
}
```
## How to toggle SL on/off
Place the `sl.interposer.json` file (located in `./scripts/`) in the game's working directory. Edit the following line(s):
```json
{
"enableInterposer": false,
}
```
When the game starts, if the flag is set to off, interposing will be completely disabled. This can be used to check for added CPU overhead in games.
## How to force use of proxies
Place the `sl.interposer.json` file (located in `./scripts/`) in the game's working directory. Edit the following line(s):
```json
{
"forceProxies": true
}
```
> NOTE:
> This effectively forces `slGetNativeInterface` to return proxies all the time. Useful for debugging and redirecting/changing behavior by intercepting all APIs for command queues, lists, devices etc.
## How to track engine D3D12 allocations
Place the `sl.interposer.json` file (located in `./scripts/`) in the game's working directory. Edit the following line(s):
```json
{
"trackEngineAllocations": true
}
```
> NOTE:
> This only works for D3D12 at the moment.
## How to override plugin location
Place the `sl.interposer.json` file (located in `./scripts/`) in the game's working directory. Edit the following line(s):
```json
{
"pathToPlugins": "N:/My/Plugin/Path"
}
```
By default SL looks for plugins next to the executable or in the paths provided by the host application (see sl::Preferences). If `pathToPlugins` is provided in JSON it overrides all these settings.
> **NOTE:**
> The `sl.interposer.dll` still needs to reside in the game's working directory in order to be found and loaded properly. All other SL plugin dlls should reside in the path referenced in the `pathToPlugins` setting.
## How to override logging settings
Place the `sl.interposer.json` file (located in `./scripts/`) in the game's working directory. Edit the following line(s):
```json
{
"showConsole": true,
"logLevel": 2,
"logPath": "N:/My/Log/Path"
}
```
To modify NGX logging, place `sl.common.json` file (located in `./scripts/`) in the game's working directory. Edit the following line(s):
```json
{
"logLevelNGX": 2,
}
```
Log levels are `off` (0), `on` (1) and `verbose` (2). Default values come from the `sl::Preferences` structure set by the app.
> **NOTE:**
>
> NGX logging gets redirected to SL so NGX log files will NOT be generated.
>
> Logging overrides set via this JSON configuration will override any Streamline registry or environment variable logging overrides that are currently set.
## How to override feature allow list
Place the `sl.interposer.json` file (located in `./scripts/`) in the game's working directory. Edit the following line(s):
```json
{
"loadAllFeatures": true,
"loadSpecificFeatures": [0,1],
}
```
> **NOTE:**
> This entry tells the interposer to load all features or a specific subset using the unique Ids from `sl.h`. `loadAllFeatures` supersedes `loadSpecificFeatures` if set to true.
## How to override existing or add new hot-keys
Place the `sl.common.json` file (located in `./scripts/`) in the game's working directory. Edit the following line(s):
```json
{
"keys": [
{
"alt": false,
"ctrl": true,
"shift": true,
"key": 36,
"id": "stats"
},
{
"alt": false,
"ctrl": true,
"shift": true,
"key": 45,
"id": "debug"
}
]
}
```
Note that `"key"` lines specify the *decimal* number of the Windows Virtual Key Code (`VK_*`) for the desired key.
## How to override DLSS-G settings
Place the `sl.dlss_g.json` file (located in `./scripts/`) in the game's working directory. Edit the following line(s):
```json
{
"_comment_compute" : "use compute queue or not, if game crashes set this back to false since some drivers might be buggy",
"useCompute": true,
"_comment_frames" : "optimal defaults - 1 @4K, 2 @1440p, 3 @1080p",
"numFramesToGenerate": 1,
"_comment_mode" : "possible modes cur, prev, auto - async flush current or previous frame or decide automatically",
"mode" : "auto",
"showDebugText" : true
}
```

View File

@ -0,0 +1,75 @@
# Debugging with Streamline ImGUI
> **NOTE 1:**
> This document applies to non-production, development builds only. `sl.imgui` won't load in production builds.
> Additionally, you will need to turn off any checks for signed libraries when loading Streamline libraries in order to be able to load the non-production libraries.
## What SL ImGUI Does
At a high level, the `sl.imgui` plugin uses [`imgui`](https://github.com/ocornut/imgui) to show certain metrics/information about specific SL plugins that can be useful for validating and debugging your app integration.
The `sl.imgui` plugin is a wrapper around `imgui`. On plugin load, `sl.imgui` creates its own context and exposes functions for other plugins to:
* Build their UI
* Render their UI (via callbacks, or directly by calling the `sl::imgui::render()` function)
## Using SL ImGUI to debug existing plugins
### Summary
When running a non-`Production` build of SL, you should see the `imgui` pop-ups on the app screen.
*Note 1: plugin may **NOT** load their UI if they are not engaged/turned on from the app-side.*
*Note 2: you can toggle the `imgui` pop-ups with `Ctrl + Shift + Home` hotkey. Hotkey mappings can change in the future. In general, refer to the hotkey shortcuts at the bottom of the screen, or next to the UI control, for ground-truth hotkeys.*
Plugin | Debug information | Reference Image
---|---|---
Overall Streamline | - Bottom of screen: `imgui` debug menu keyboard shortcuts and warnings <br> - Right side of screen: `imgui` debug menu (Each plugin that builds a UI will have its UI show up here) <br> *Note: some apps won't let the mouse interact with the `imgui` menus. For those apps, it's best to change the controls to be hotkey-controllable* | <blockquote><details><summary>Overall UI</summary><img width="100%" src="./media/sl_imgui_collapsed_view_captions.png"></details></blockquote>
`sl.interposer` | - SDK build date <br> - SL SDK version | <blockquote><details><summary>`sl.interposer` UI</summary><img max-width="100%" height="auto" src="./media/sl_imgui_interposer.png"></details></blockquote>
`sl.common` | - System (OS, driver, GPU, etc.) <br> - Graphics API <br> - VRAM usage | <blockquote><details><summary>`sl.common` UI</summary><img max-width="100%" height="auto" src="./media/sl_imgui_common.png"></details></blockquote>
`sl.reflex` | - Mode/FPS cap <br> - Marker usage <br> - Stats on sleep time | <blockquote><details><summary>`sl.reflex` UI</summary><img max-width="100%" height="auto" src="./media/sl_imgui_reflex.png"></details></blockquote>
`sl.dlss` | - Version <br> - Mode <br> - Performance stats | <blockquote><details><summary>`sl.dlss` UI</summary><img max-width="100%" height="auto" src="./media/sl_imgui_dlss.png"></details></blockquote>
`sl.dlss_g` | - Version <br> - Mode <br> - FPS boost stats (i.e., `Scaling`) <br> - VRAM consumption <br> - Constants passed in through `sl.common` | <blockquote><details><summary>`sl.dlss_g` UI</summary><img max-width="100%" height="auto" src="./media/sl_imgui_dlssg.png"></details></blockquote>
`sl.nis` | - Mode <br> - Viewport dimensions <br> - Execution time on GPU | <blockquote><details><summary>`sl.nis` UI</summary><img max-width="100%" height="auto" src="./media/sl_imgui_nis.png"></details></blockquote>
`reflex-sync` | - **Ignore, NVIDIA Internal Only** | <blockquote><details><summary>`reflex-sync` UI</summary><img max-width="100%" height="auto" src="./media/sl_imgui_reflex_sync.png"></details></blockquote>
### ImGUI Buffer Visualizer
For certain plugins, debugging some GPU buffers can be done through `sl.imgui`. **For now, only `sl.dlss_g` supports this feature.**
#### Debugging buffers for `sl.dlssg`
*Note: debug hotkey mappings can change in the future. In general, refer to the hotkey shortcuts at the bottom of the screen, or next to the UI control, for ground-truth hotkeys.*
1. Turn on `dlssg` from the app-side, and verify that the `sl.imgui` pop-up shows that `dlssg` is **On**
2. Use the visualizer:
* *Turn on visualizer*: `Ctrl + Shift + Insert`
* *Cycle views*: `Ctrl + Shift + End`.
* *Turn off visualizer*: `Ctrl + Shift + Insert`
In addition to the `sl.dlssg` input buffers (e.g. depth, motion vectors, etc.), the visualizer should help you view the debug buffers:
Buffer | What it means | Correctness Interpretation | Reference Image
---|---|---|---
Alignment | Visualizes the alignment of depth, motion vectors, and color buffers using a Sobel filter | - Image should be mostly blue (color data) <br> - You should see yellow/green edges around moving objects, including when the camera is moving (mvec data) <br> - You should see red edges everywhere else (depth data) | <img max-width="100%" height="auto" src="./media/sl_imgui_dlssg_buffer_alignment.png">
Dynamic Objects | Visualizes pixels that have non-zero motion vector values. This excludes motion caused by camera movement | - Only (parts) of dynamic objects should be colored red (not due to camera movement!) <br> - All other pixels should be black/zero | <img max-width="100%" height="auto" src="./media/sl_imgui_dlssg_dynamic_objs.png">
## Adding SL ImGUI to new plugins
The `sl.common` plugin's usage of `sl.imgui` is an easy to follow example on how to add `sl.common` UI and render it. Implementing something similar is advised.
```
#ifndef SL_PRODUCTION
// 1. Check for UI and register our callback
imgui::ImGUI* ui{};
param::getPointerParam(api::getContext()->parameters, param::imgui::kInterface, &ui);
if (ui)
{
// 2. Define the UI building callback
auto renderUI = [](imgui::ImGUI* ui, bool finalFrame)->void
{
// Use `ui` to build buttons/text/sliders/etc.
};
// 3. Register the callback so sl::imgui can render it
ui->registerRenderCallbacks(renderUI, nullptr);
}
#endif
```

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,336 @@
Streamline - DLSS
=======================
>The focus of this guide is on using Streamline to integrate DLSS into an application. For more information about DLSS itself, please visit the [NVIDIA Developer DLSS Page](https://developer.nvidia.com/rtx/dlss)
>For information on user interface considerations when using the DLSS plugin, please see the ["RTX UI Developer Guidelines.pdf"](<RTX UI Developer Guidelines.pdf>) document included with this SDK.
Version 2.7.3
=======
### 1.0 INITIALIZE AND SHUTDOWN
Call `slInit` as early as possible (before any dxgi/d3d11/d3d12 APIs are invoked)
```cpp
#include <sl.h>
#include <sl_consts.h>
#include <sl_dlss.h>
sl::Preferences pref{};
pref.showConsole = true; // for debugging, set to false in production
pref.logLevel = sl::eLogLevelDefault;
pref.pathsToPlugins = {}; // change this if Streamline plugins are not located next to the executable
pref.numPathsToPlugins = 0; // change this if Streamline plugins are not located next to the executable
pref.pathToLogsAndData = {}; // change this to enable logging to a file
pref.logMessageCallback = myLogMessageCallback; // highly recommended to track warning/error messages in your callback
pref.applicationId = myId; // Provided by NVDA, required if using NGX components (DLSS 2/3)
pref.engineType = myEngine; // If using UE or Unity
pref.engineVersion = myEngineVersion; // Optional version
pref.projectId = myProjectId; // Optional project id
if(SL_FAILED(res, slInit(pref)))
{
// Handle error, check the logs
if(res == sl::Result::eErrorDriverOutOfDate) { /* inform user */}
// and so on ...
}
```
For more details please see [preferences](ProgrammingGuide.md#222-preferences)
Call `slShutdown()` before destroying dxgi/d3d11/d3d12/vk instances, devices and other components in your engine.
```cpp
if(SL_FAILED(res, slShutdown()))
{
// Handle error, check the logs
}
```
#### 1.1 SET THE CORRECT DEVICE
Once the main device is created call `slSetD3DDevice` or `slSetVulkanInfo`:
```cpp
if(SL_FAILED(res, slSetD3DDevice(nativeD3DDevice)))
{
// Handle error, check the logs
}
```
### 2.0 CHECK IF DLSS IS SUPPORTED
As soon as SL is initialized, you can check if DLSS is available for the specific adapter you want to use:
```cpp
Microsoft::WRL::ComPtr<IDXGIFactory> factory;
if (SUCCEEDED(CreateDXGIFactory(__uuidof(IDXGIFactory), (void**)&factory)))
{
Microsoft::WRL::ComPtr<IDXGIAdapter> adapter{};
uint32_t i = 0;
while (factory->EnumAdapters(i, &adapter) != DXGI_ERROR_NOT_FOUND)
{
DXGI_ADAPTER_DESC desc{};
if (SUCCEEDED(adapter->GetDesc(&desc)))
{
sl::AdapterInfo adapterInfo{};
adapterInfo.deviceLUID = (uint8_t*)&desc.AdapterLuid;
adapterInfo.deviceLUIDSizeInBytes = sizeof(LUID);
if (SL_FAILED(result, slIsFeatureSupported(sl::kFeatureDLSS, adapterInfo)))
{
// Requested feature is not supported on the system, fallback to the default method
switch (result)
{
case sl::Result::eErrorOSOutOfDate: // inform user to update OS
case sl::Result::eErrorDriverOutOfDate: // inform user to update driver
case sl::Result::eErrorNoSupportedAdapter: // cannot use this adapter (older or non-NVDA GPU etc)
// and so on ...
};
}
else
{
// Feature is supported on this adapter!
}
}
i++;
}
}
```
### 3.0 CHECK DLSS SETTINGS AND SETUP VIEWPORT RENDERING SIZE
Next, we need to find out the rendering resolution and the optimal sharpness level based on DLSS settings:
```cpp
// Using helpers from sl_dlss.h
sl::DLSSOptimalSettings dlssSettings;
sl::DLSSOptions dlssOptions;
// These are populated based on user selection in the UI
dlssOptions.mode = myUI->getDLSSMode(); // e.g. sl::eDLSSModeBalanced;
dlssOptions.outputWidth = myUI->getOutputWidth(); // e.g 1920;
dlssOptions.outputHeight = myUI->getOutputHeight(); // e.g. 1080;
// Now let's check what should our rendering resolution be
if(SL_FAILED(result, slDLSSGetOptimalSettings(dlssOptions, dlssSettings))
{
// Handle error here
}
// Setup rendering based on the provided values in the sl::DLSSSettings structure
myViewport->setSize(dlssSettings.renderWidth, dlssSettings.renderHeight);
```
Note that the structure `sl::DLSSOptimalSettings` will upon return from `slDLSSGetOptimalSettings` contain information pertinent to DLSS dynamic resolution min and max source image sizes (if dynamic resolution is supported).
### 4.0 TAG ALL REQUIRED RESOURCES
DLSS requires depth, motion vectors, render-res input color and final-res output color buffers.
```cpp
// IMPORTANT: Make sure to mark resources which can be deleted or reused for other purposes within a frame as volatile
// Prepare resources (assuming d3d11/d3d12 integration so leaving Vulkan view and device memory as null pointers)
sl::Resource colorIn = {sl::ResourceType::Tex2d, myTAAUInput, nullptr, nullptr, nullptr};
sl::Resource colorOut = {sl::ResourceType::Tex2d, myTAAUOutput, nullptr, nullptr, nullptr};
sl::Resource depth = {sl::ResourceType::Tex2d, myDepthBuffer, nullptr, nullptr, nullptr};
sl::Resource mvec = {sl::ResourceType::Tex2d, myMotionVectorsBuffer, nullptr, nullptr, nullptr};
sl::Resource exposure = {sl::ResourceType::Tex2d, myExposureBuffer, nullptr, nullptr, nullptr};
sl::ResourceTag colorInTag = sl::ResourceTag {&colorIn, sl::kBufferTypeScalingInputColor, sl::ResourceLifecycle::eOnlyValidNow, &myExtent };
sl::ResourceTag colorOutTag = sl::ResourceTag {&colorOut, sl::kBufferTypeScalingOutputColor, sl::ResourceLifecycle::eOnlyValidNow, &myExtent };
sl::ResourceTag depthTag = sl::ResourceTag {&depth, sl::kBufferTypeDepth, sl::ResourceLifecycle::eValidUntilPresent, &fullExtent };
sl::ResourceTag mvecTag = sl::ResourceTag {&mvec, sl::kBufferTypeMvec, sl::ResourceLifecycle::eOnlyValidNow, &fullExtent };
sl::ResourceTag exposureTag = sl::ResourceTag {&exposure, sl::kBufferTypeExposure, sl::ResourceLifecycle::eOnlyValidNow, &my1x1Extent};
// Tag in group
sl::Resource inputs[] = {colorInTag, colorOutTag, depthTag, mvecTag};
slSetTag(viewport, inputs, _countof(inputs), cmdList);
```
> **NOTE:**
> If dynamic resolution is used then please specify the extent for each tagged resource. Please note that SL **manages resource states so there is no need to transition tagged resources**.
> **NOTE:**
> If `sl::kBufferTypeExposure` is NOT provided or `dlssOptions.useAutoExposure` is set to be true then DLSS will be in auto-exposure mode (`NVSDK_NGX_DLSS_Feature_Flags_AutoExposure` will be set automatically)
### 5.0 PROVIDE DLSS OPTIONS
DLSS options must be set so that the DLSS plugin can track any changes made by the user:
```cpp
sl::DLSSOptions dlssOptions = {};
// Set preferred Render Presets per Perf Quality Mode. These are typically set one time
// and established while evaluating DLSS SR Image Quality for your Application.
// It will be set to DSSPreset::eDefault if unspecified.
// Please Refer to section 3.12 of the DLSS Programming Guide for details.
dlssOptions.dlaaPreset = sl::DLSSPreset::ePresetA;
dlssOptions.qualityPreset = sl::DLSSPreset::ePresetD;
dlssOptions.balancedPreset = sl::DLSSPreset::ePresetD;
dlssOptions.performancePreset = sl::DLSSPreset::ePresetD;
dlssOptions.ultraPerformancePreset = sl::DLSSPreset::ePresetA;
// These are populated based on user selection in the UI
dlssOptions.mode = myUI->getDLSSMode(); // e.g. sl::eDLSSModeBalanced;
dlssOptions.outputWidth = myUI->getOutputWidth(); // e.g 1920;
dlssOptions.outputHeight = myUI->getOutputHeight(); // e.g. 1080;
dlssOptions.sharpness = dlssSettings.sharpness; // optimal sharpness
dlssOptions.colorBuffersHDR = sl::Boolean::eTrue; // assuming HDR pipeline
dlssOptions.useAutoExposure = sl::Boolean::eFalse; // autoexposure is not to be used if a proper exposure texture is available
dlssOptions.alphaUpscalingEnabled = sl::Boolean::eFalse; // experimental alpha upscaling, enable to upscale alpha channel of color texture
if(SL_FAILED(result, slDLSSSetOptions(viewport, dlssOptions)))
{
// Handle error here, check the logs
}
```
> **NOTE:**
> To turn off DLSS set `sl::DLSSOptions.mode` to `sl::DLSSMode::eOff`, note that this does NOT release any resources, for that please use `slFreeResources`
> **NOTE:**
> Set the DLSSOptions.useAutoExposure boolean to be true only if you want DLSS to be in in auto-exposure mode. Also, it is strongly advised to provide exposure if a proper exposure texture is available.
> **NOTE:**
> Alpha upscaling (`DLSSOptions::alphaUpscalingEnabled`) is experimental, and will impact performace. This feature should be used only if the alpha channel of the color texture needs to be upscaled (if `eFalse`, only RGB channels will be upscaled).
### 6.0 PROVIDE COMMON CONSTANTS
Various per frame camera related constants are required by all Streamline features and must be provided ***if any SL feature is active and as early in the frame as possible***. Please keep in mind the following:
* All SL matrices are row-major and should not contain any jitter offsets
* If motion vector values in your buffer are in {-1,1} range then motion vector scale factor in common constants should be {1,1}
* If motion vector values in your buffer are NOT in {-1,1} range then motion vector scale factor in common constants must be adjusted so that values end up in {-1,1} range
```cpp
sl::Constants consts = {};
// Set motion vector scaling based on your setup
consts.mvecScale = {1,1}; // Values in eMotionVectors are in [-1,1] range
consts.mvecScale = {1.0f / renderWidth,1.0f / renderHeight}; // Values in eMotionVectors are in pixel space
consts.mvecScale = myCustomScaling; // Custom scaling to ensure values end up in [-1,1] range
// Set all other constants here
if(SL_FAILED(result, slSetConstants(consts, *frameToken, myViewport))) // constants are changing per frame so frame index is required
{
// Handle error, check logs
}
```
For more details please see [common constants](ProgrammingGuide.md#2101-common-constants)
### 7.0 ADD DLSS TO THE RENDERING PIPELINE
On your rendering thread, call `slEvaluateFeature` at the appropriate location where up-scaling is happening. Please note that when using `slSetTag`, `slSetConstants` and `slDLSSSetOptions` the `frameToken` and `myViewport` used in `slEvaluateFeature` **must match across all API calls**.
```cpp
// Make sure DLSS is available and user selected this option in the UI
if(useDLSS)
{
// NOTE: We can provide all inputs here or separately using slSetTag, slSetConstants or slDLSSSetOptions
// Inform SL that DLSS should be injected at this point for the specific viewport
const sl::BaseStructure* inputs[] = {&myViewport};
if(SL_FAILED(result, slEvaluateFeature(sl::kFeatureDLSS, *frameToken, inputs, _countof(inputs), myCmdList)))
{
// Handle error
}
else
{
// IMPORTANT: Host is responsible for restoring state on the command list used
restoreState(myCmdList);
}
}
else
{
// Default up-scaling pass like for example TAAU goes here
}
```
> **IMPORTANT:**
> Plase note that **host is responsible for restoring the command buffer(list) state** after calling `slEvaluate`. For more details on which states are affected please see [restore pipeline section](./ProgrammingGuideManualHooking.md#70-restoring-command-listbuffer-state)
### 8.0 MULTIPLE VIEWPORTS
Here is a code snippet showing one way of handling two viewports with explicit resource allocation and de-allocation:
```cpp
// Viewport1
{
// We need to setup our constants first so sl.dlss plugin has enough information
sl::DLSSOptions dlssOptions = {};
dlssOptions.mode = viewport1->getDLSSMode(); // e.g. sl::eDLSSModeBalanced;
dlssOptions.outputWidth = viewport1->getOutputWidth(); // e.g 1920;
dlssOptions.outputHeight = viewport1->getOutputHeight(); // e.g. 1080;
// Note that we are passing viewport id 1
slDLSSSetOptions(viewport1->id, dlssOptions);
// Set our tags, note that we are passing viewport id
setTag(viewport1->id, &tags2, numTags2);
// and so on ...
// Now we can allocate our feature explicitly, again passing viewport id
slAllocateResources(sl::kFeatureDLSS, viewport1->id);
// Evaluate DLSS on viewport1, again passing viewport id so we can map tags, constants correctly
//
// NOTE: If slAllocateResources is not called DLSS resources would be initialized at this point
slEvaluateFeature(sl::kFeatureDLSS, myFrameIndex, viewport1->id, nullptr, 0, myCmdList);
// Assuming the above evaluate call is still pending on the CL, make sure to flush it before releasing resources
flush(myCmdList);
// When we no longer need this viewport
slFreeResources(sl::kFeatureDLSS, viewport1->id);
}
// Viewport2
{
// We need to setup our constants first so sl.dlss plugin has enough information
sl::DLSSOptions dlssOptions = {};
dlssOptions.mode = viewport2->getDLSSMode(); // e.g. sl::eDLSSModeBalanced;
dlssOptions.outputWidth = viewport2->getOutputWidth(); // e.g 1920;
dlssOptions.outputHeight = viewport2->getOutputHeight(); // e.g. 1080;
// Note that we are passing viewport id 2
slDLSSSetOptions(viewport2->id, dlssOptions);
// Set our tags, note that we are passing viewport id
setTag(viewport2->id, &tags2, numTags2);
// and so on ...
// Now we can allocate our feature explicitly, again passing viewport id
slAllocateResources(sl::kFeatureDLSS, viewport2->id);
// Evaluate DLSS on viewport2, again passing viewport id so we can map tags, constants correctly
//
// NOTE: If slAllocateResources is not called DLSS resources would be initialized at this point
slEvaluateFeature(sl::kFeatureDLSS, myFrameIndex, viewport2->id, nullptr, 0, myCmdList);
// Assuming the above evaluate call is still pending on the CL, make sure to flush it before releasing resources
flush(myCmdList);
// When we no longer need this viewport
slFreeResources(sl::kFeatureDLSS, viewport2->id);
}
```
### 9.0 CHECK STATE AND VRAM USAGE
To obtain current state for a given viewport the following API can be used:
```cpp
sl::DLSSState dlssState{};
if(SL_FAILED(result, slDLSSGetState(viewport, dlssState))
{
// Handle error here
}
// Check how much memory DLSS is using for this viewport
dlssState.estimatedVRAMUsageInBytes
```
### 10.0 TROUBLESHOOTING
If the DLSS output does not look right please check the following:
* If your motion vectors are in pixel space then scaling factors `sl::Constants::mvecScale` should be {1 / render width, 1 / render height}
* If your motion vectors are in normalized -1,1 space then scaling factors `sl::Constants::mvecScale` should be {1, 1}
* Make sure that jitter offset values are in pixel space
* `NVSDK_NGX_Parameter_FreeMemOnRelease` is replaced with `slFreeResources`
* `NVSDK_NGX_DLSS_Feature_Flags_MVLowRes` is handled automatically based on tagged motion vector buffer's size and extent.

View File

@ -0,0 +1,920 @@
Streamline - DLSS-G
=======================
NVIDIA DLSS Frame Generation (“DLSS-FG” or “DLSS-G”) is an AI based technology that infers frames based on rendered frames coming from a game engine or rendering pipeline. This document explains how to integrate DLSS-G into a renderer.
Version 2.7.3
=======
### 0.0 Integration checklist
See Section 15.0 for further details on some of these items, in addition to the Sections noted in the table below.
Item | Reference | Confirmed
---|---|---
All the required inputs are passed to Streamline: depth buffers, motion vectors, HUD-less color buffers | [Section 5.0](#50-tag-all-required-resources) |
Common constants and frame index are provided for **each frame** using slSetConstants and slSetFeatureConstants methods | [Section 7.0](#70-provide-common-constants) |
All tagged buffers are valid at frame present time, and they are not re-used for other purposes | [Section 5.0](#50-tag-all-required-resources) |
Buffers to be tagged with unique id 0 | [Section 5.0](#50-tag-all-required-resources) |
Make sure that frame index provided with the common constants is matching the presented frame | [Section 8.0](#80-integrate-sl-reflex) |
Inputs are passed into Streamline look correct, as well as camera matrices and dynamic objects | [SL ImGUI guide](<Debugging - SL ImGUI (Realtime Data Inspection).md>) |
Application checks the signature of sl.interposer.dll to make sure it is a genuine NVIDIA library | [Streamline programming guide, section 2.1.1](./ProgrammingGuide.md#211-security) |
Requirements for Dynamic Resolution are met (if the game supports Dynamic Resolution) | [Section 10.0](#100-dlss-g-and-dynamic-resolution) |
DLSS-G is turned off (by setting `sl::DLSSGOptions::mode` to `sl::DLSSGMode::eOff`) when the game is paused, loading, in menu and in general NOT rendering game frames and also when modifying resolution & full-screen vs windowed mode | [Section 12.0](#120-dlss-g-and-dxgi) |
Swap chain is recreated every time DLSS-G is turned on or off (by changing `sl::DLSSGOptions::mode`) to avoid unnecessary performance overhead when DLSS-G is switched off | [Section 18.0](#180-how-to-avoid-unnecessary-overhead-when-dlss-g-is-turned-off) |
Reduce the amount of motion blur; when DLSS-G enabled, halve the distance/magnitude of motion blur | N/A |
Reflex is properly integrated (see checklist in Reflex Programming Guide) | [Section 8.0](#80-integrate-sl-reflex) |
In-game UI for enabling/disabling DLSS-G is implemented | [RTX UI Guidelines](<RTX UI Developer Guidelines.pdf>) |
Only full production non-watermarked libraries are packaged in the release build | N/A |
No errors or unexpected warnings in Streamline and DLSS-G log files while running the feature | N/A |
Ensure extent resolution or resource size, whichever is in use, for `Hudless` and `UI Color and Alpha` buffers exactly match that of backbuffer. | N/A |
### 1.0 REQUIREMENTS
**NOTE - DLSS-G requires the following Windows versions/settings to run. The DLSS-G feature will fail to be available if these are not met. Failing any of these will cause DLSS-G to be unavailable, and Streamline will log an error:**
* Minimum Windows OS version of Win10 20H1 (version 2004, build 19041 or higher)
* Display Hardware-accelerated GPU Scheduling (HWS) must be enabled via Settings : System : Display : Graphics : Change default graphics settings.
### 2.0 INITIALIZATION AND SHUTDOWN
Call `slInit` as early as possible (before any d3d12/vk APIs are invoked)
```cpp
#include <sl.h>
#include <sl_consts.h>
#include <sl_dlss_g.h>
sl::Preferences pref;
pref.showConsole = true; // for debugging, set to false in production
pref.logLevel = sl::eLogLevelDefault;
pref.pathsToPlugins = {}; // change this if Streamline plugins are not located next to the executable
pref.numPathsToPlugins = 0; // change this if Streamline plugins are not located next to the executable
pref.pathToLogsAndData = {}; // change this to enable logging to a file
pref.logMessageCallback = myLogMessageCallback; // highly recommended to track warning/error messages in your callback
pref.applicationId = myId; // Provided by NVDA, required if using NGX components (DLSS 2/3)
pref.engineType = myEngine; // If using UE or Unity
pref.engineVersion = myEngineVersion; // Optional version
pref.projectId = myProjectId; // Optional project id
if(SL_FAILED(res, slInit(pref)))
{
// Handle error, check the logs
if(res == sl::Result::eErrorDriverOutOfDate) { /* inform user */}
// and so on ...
}
```
For more details please see [preferences](ProgrammingGuide.md#222-preferences)
Call `slShutdown()` before destroying dxgi/d3d12/vk instances, devices and other components in your engine.
```cpp
if(SL_FAILED(res, slShutdown()))
{
// Handle error, check the logs
}
```
#### 2.1 SET THE CORRECT DEVICE
Once the main device is created call `slSetD3DDevice` or `slSetVulkanInfo`:
```cpp
if(SL_FAILED(res, slSetD3DDevice(nativeD3DDevice)))
{
// Handle error, check the logs
}
```
### 3.0 CHECK IF DLSS-G IS SUPPORTED
As soon as SL is initialized, you can check if DLSS-G is available for the specific adapter you want to use:
```cpp
Microsoft::WRL::ComPtr<IDXGIFactory> factory;
if (SUCCEEDED(CreateDXGIFactory(__uuidof(IDXGIFactory), (void**)&factory)))
{
Microsoft::WRL::ComPtr<IDXGIAdapter> adapter{};
uint32_t i = 0;
while (factory->EnumAdapters(i, &adapter) != DXGI_ERROR_NOT_FOUND)
{
DXGI_ADAPTER_DESC desc{};
if (SUCCEEDED(adapter->GetDesc(&desc)))
{
sl::AdapterInfo adapterInfo{};
adapterInfo.deviceLUID = (uint8_t*)&desc.AdapterLuid;
adapterInfo.deviceLUIDSizeInBytes = sizeof(LUID);
if (SL_FAILED(result, slIsFeatureSupported(sl::kFeatureDLSS_G, adapterInfo)))
{
// Requested feature is not supported on the system, fallback to the default method
switch (result)
{
case sl::Result::eErrorOSOutOfDate: // inform user to update OS
case sl::Result::eErrorDriverOutOfDate: // inform user to update driver
case sl::Result::eErrorNoSupportedAdapter: // cannot use this adapter (older or non-NVDA GPU etc)
// and so on ...
};
}
else
{
// Feature is supported on this adapter!
}
}
i++;
}
}
```
#### 3.1 CHECKING DLSS-G'S CONFIGURATION AND SPECIAL REQUIREMENTS
In order for DLSS-G to work correctly certain requirements regarding the OS, driver and other settings on user's machine must be met. To obtain DLSS-G configuration and check if all requirements are met you can use the following code snippet:
```cpp
sl::FeatureRequirements requirements{};
if (SL_FAILED(result, slGetFeatureRequirements(sl::kFeatureDLSS_G, requirements)))
{
// Feature is not requested on slInit or failed to load, check logs, handle error
}
else
{
// Feature is loaded, we can check the requirements
requirements.flags & FeatureRequirementFlags::eD3D12Supported
requirements.flags & FeatureRequirementFlags::eVulkanSupported
requirements.maxNumViewports
// and so on ...
}
```
> **NOTE:**
> DLSS-G runs optical flow in interop mode in Vulkan by default. In order to leverage potential performance benefit of running optical flow natively in Vulkan, client must meet the minimum requirements of Nvidia driver version being 527.64 on Windows and 525.72 on Linux and VK_API_VERSION_1_1 (recommended version - VK_API_VERSION_1_3).
> In manual hooking mode, it must meet additional requirements as described in section 5.2.1 of ProgrammingGuideManualHooking.md.
### 4.0 HANDLE MULTIPLE SWAP-CHAINS
DLSS-G will automatically attach to any swap-chain created by the application **unless manual hooking is used**. In the editor mode there could be multiple swap-chains but DLSS-G should attach only to the main one where frame interpolation is used.
Here is how DLSS-G could be enabled only on a single swap-chain:
```cpp
// This is just one example, swap-chains can be created at any point in time and in any order.
// SL features also can be loaded/unloaded at any point in time and in any order.
// Unload DLSS-G (this can be done at any point in time and as many times as needed)
slSetFeatureLoaded(sl::kFeatureDLSS_G, false);
// Create swap chains for which DLSS-G is NOT required
IDXGISwapChain1* swapChain{};
factory->CreateSwapChainForHwnd(device, hWnd, desc, nullptr, nullptr, &swapChain);
// and so on
// Load DLSS-G (this can be done at any point in time and as many times as needed)
slSetFeatureLoaded(sl::kFeatureDLSS_G, true);
// Create main swap chains for which DLSS-G is required
IDXGISwapChain1* mainSwapChain{};
factory->CreateSwapChainForHwnd(device, hWnd, desc, nullptr, nullptr, &mainSwapChain);
// From this point onward DLSS-G will automatically manage only mainSwapChain, other swap-chains use standard DXGI implementation
```
### 5.0 TAG ALL REQUIRED RESOURCES
#### **Buffers to tag**
DLSS-G requires `depth` and `motion vectors` buffers.
If DLSS-G needs to run only on a subregion of the final color buffer, hereafter referred to as backbuffer subrect, then it is required to tag the backbuffer, only to pass in backbuffer subrect info while optionally passing in backbuffer resource pointer. Refer to [Tagging Recommendations section](#tagging-recommendations) below for details.
Additionally, for maximal image quality, it is **critical** to integrate `UI Color and Alpha` or `Hudless` buffers:
* `UI Color and Alpha` buffer provides significant image quality improvements on UI elements like name plates and on-screen hud. If your application/game has this available, we strongly recommend you integrate this buffer.
* If `UI Color and Alpha` is not available, `Hudless` integration can also significantly improve image quality on UI elements.
* Extent resolution or resource size, whichever is in use, for `Hudless` and `UI Color and Alpha` buffers should exactly match that of backbuffer.
Input | Requirements/Recommendations | Reference Image
---|---|---
Final Color | - *No requirements, this is intercepted automatically via SL's SwapChain API* | ![dlssg_final_color](./media/dlssg_docs_final_color.png "DLSSG Input Example: Final Color")
Final Color Subrect | - Subregion of the final color buffer to run frame-generation on. <br> - Subrect-external backbuffer region is copied as is to the generated frame. <br> - Tag backbuffer optionally, only to pass in backbuffer subrect info. <br> - Extent resolution or resource size, whichever is in use, for `Hudless` and `UI Color and Alpha` buffers should exactly match that of backbuffer. <br> - Refer to [Tagging Recommendations section](#tagging-recommendations) below for details. | ![dlssg_final_color_subrect](./media/dlssg_docs_final_color_subrect.png "DLSSG Input Example: Final Color Subrect")
Depth | - Same depth data used to generate motion vector data <br> - `sl::Constants` depth-related data (e.g. `depthInverted`) should be set accordingly<br> - *Note: this is the same set of requirements as DLSS-SR, and the same depth can be used for both* | ![dlssg_depth](./media/dlssg_docs_depth.png "DLSSG Input Example: Depth")
Motion Vectors | - Dense motion vector field (i.e. includes camera motion, and motion of dynamic objects) <br> - *Note: this is the same set of requirements as DLSS-SR, and the same motion vectors can be used for both* | ![dlssg_mvec](./media/dlssg_docs_mvec.png "DLSSG Input Example: Motion Vectors")
Hudless | - Should contain the full viewable scene, **without any HUD/UI elements in it**. If some HUD/UI elements are unavoidably included, expect some image quality degradation on those elements <br> - Same color space and post-processing effects (e.g tonemapping, blur etc.) as color backbuffer <br> - When appropriate buffer extents are *not* provided, needs to have the same dimensions as the color backbuffer <br> | ![dlssg_hudless](./media/dlssg_docs_hudless.png "DLSSG Input Example: Hudless")
UI Color and Alpha | - Should **only** contain pixels that denote the UI/HUD, along with appropriate alpha values (described below) <br> - Alpha is *zero* on all pixels that do *not* have UI on them <br> - Alpha is *non-zero* on all pixels that do have UI on them <br> - RGB is premultiplied by alpha, and is as close as possible to respecting the following blending formula: `Final_Color.RGB = UI.RGB + (1 - UI.Alpha) x Hudless.RGB` <br> - When appropriate buffer extents are *not* provided, needs to have the same dimensions as the color backbuffer <br> | ![dlssg_ui_color_and_alpha](./media/dlssg_docs_ui_color_and_alpha.png "DLSSG Input Example: UI Color and Alpha")
Bidirectional Distortion Field | - Optional buffer, **only needed when strong distortion effects are applied as post-processing filters** <br> - Refer to [pseudo-code below ](#bidirectional-distortion-field-buffer-generation-code-sample) for an example on how to generate this optional buffer <br> - When this buffer is tagged, Mvec and Depth need to be **undistorted** <br> - When this buffer is tagged, the FinalColor is should be **distorted** <br> - When this buffer is tagged, Hudless and UIColorAndAlpha need to be such that `Blend(Hudless, UIColorAndAlpha) = FinalColor`. This may mean that Hudless needs to be equally distorted, and in rare cases that UIColorAndAlpha is also equally distorted <br> - **Resolution**: we recommend using half of the FinalColor's resolution's width and height <br> - **Channel count**: 4 channels <br> - **RG channels**: UV coordinates of the corresponding **undistorted** pixel, as an offset relative to the source UV coordinate <br> - **BA channels**: UV coordinates of the corresponding **distorted** pixel, as an offset relative to the source UV coordinate <br> - **Units**: the buffer values should be in normalized pixel space `[0,1]`. These should be the same scale as the input MVecs <br> - **Channel precision and format:** Signed format, equal bit-count per channel (i.e. R10G10B10A2 is NOT allowed). We recommend a minimum of 8 bits per channel, with precision scale and bias (`PrecisionInfo`) passed in as part of the `ResourceTag` | <center>**Barrel distortion, RGB channels** ![dlssg_bidirectional_distortion_field](./media/dlssg_docs_bidirectional_distortion_field.png "DLSSG Input Example: Bidirectional Distortion Field") <br><br> <center>**Barrel distortion, absolute value of RG channels** ![dlssg_docs_bidirectional_distortion_field_rg_abs](./media/dlssg_docs_bidirectional_distortion_field_rg_abs.png "DLSSG Input Example: Bidirectional Distortion Field, RG channels, Absolute value")
#### **Tagging recommendations**
**For all buffers**: tagged buffers are used during the `Swapchain::Present` call. **If the tagged buffers are going to be reused, destroyed or changed in any way before the frame is presented, their life-cycle needs to be specified correctly**.
It is important to emphasize that **the overuse of `sl::ResourceLifecycle::eOnlyValidNow` and `sl::ResourceLifecycle::eValidUntilEvaluate` can result in wasted VRAM**. Therefore please do the following:
* First tag all of the DLSS-G inputs as `sl::ResourceLifecycle::eValidUntilPresent` then test and see if DLSS-G is working correctly.
* Only if you notice that one or more of the inputs (depth, mvec, hud-less, ui etc.) has incorrect content at the `present frame` time, should you proceed and flag them as `sl::ResourceLifecycle::eOnlyValidNow` or `sl::ResourceLifecycle::eValidUntilEvaluate` as appropriate.
In order to run DLSS-G on final color subrect region:
* It is required to tag backbuffer to pass-in subrect data.
* Only buffer type - `kBufferTypeBackbuffer` and backbuffer extent data are required to be passed in when setting the tag for backbuffer; the rest of the other inputs to sl::ResourceTag are optional. This implies passing in NULL backbuffer resource pointer is valid because SL already has knowledge about the backbuffer being presented.
* If a valid backbuffer resource pointer is passed in when tagging:
* SL will hold a reference to it until a null tag is set.
* SL will warn if it doesn't match the SL-provided backbuffer resource being presented.
> NOTE:
> SL will hold a reference to all `sl::ResourceLifecycle::eValidUntilPresent` resources until a null tag is set, therefore the application will not crash if host releases tagged resource before `present frame` event is reached. This does not apply to Vulkan.
```cpp
// IMPORTANT:
//
// Resource state for the immutable resources needs to be correct when tagged resource is used by SL - during the Present call
// Resource state for the volatile resources needs to be correct for the command list used to tag the resource - SL will make a copy which is later on used by DLSS-G during the Present call
//
// GPU payload that generates content for any volatile resource MUST be either already submitted to the provided command list or some other command list which is guaranteed to be executed BEFORE.
// Prepare resources (assuming d3d12 integration so leaving Vulkan view and device memory as null pointers)
//
// NOTE: As an example we are tagging depth as immutable and mvec as volatile, this needs to be adjusted based on how your engine works
sl::Resource depth = {sl::ResourceType::Tex2d, myDepthBuffer, nullptr, nullptr, depthState, nullptr};
sl::Resource mvec = {sl::ResourceType::Tex2d, myMotionVectorsBuffer, nullptr, mvecState, nullptr, nullptr};
sl::ResourceTag depthTag = sl::ResourceTag {&depth, sl::kBufferTypeDepth, sl::ResourceLifecycle::eValidUntilPresent, &fullExtent }; // valid all the time
sl::ResourceTag mvecTag = sl::ResourceTag {&mvec, sl::kBufferTypeMvec, sl::ResourceLifecycle::eOnlyValidNow, &fullExtent }; // reused for something else later on
// Normally depth and mvec are available at a similar point in the pipeline so tagging them together
// If this is not the case simply tag them separately when they are available
sl::Resource inputs[] = {depthTag, mvecTag};
slSetTag(viewport, inputs, _countof(inputs), cmdList);
// Tag backbuffer only to pass in backbuffer subrect info
sl::Extent backBufferSubrectInfo {128, 128, 512, 512}; // backbuffer subrect info to run FG on.
sl::ResourceTag backbufferTag = sl::ResourceTag {nullptr, sl::kBufferTypeBackbuffer, sl::ResourceLifecycle{}, &backBufferSubrectInfo };
sl::Resource inputs[] = {backbufferTag};
slSetTag(viewport, inputs, _countof(inputs), cmdList);
// After post-processing pass but before UI/HUD is added tag the hud-less buffer
//
sl::Resource hudLess = {sl::ResourceType::Tex2d, myHUDLessBuffer, nullptr, nullptr, hudlessState, nullptr};
sl::ResourceTag hudLessTag = sl::ResourceTag {&hudLess, sl::kBufferTypeHUDLessColor, sl::ResourceLifecycle::eValidUntilPresent, &fullExtent }; // valid all the time
sl::Resource inputs[] = {hudLessTag};
slSetTag(viewport, inputs, _countof(inputs), cmdList);
// UI buffer with color and alpha channel
//
sl::Resource ui = {sl::ResourceType::Tex2d, myUIBuffer, nullptr, nullptr, uiTextureState, nullptr};
sl::ResourceTag uiTag = sl::ResourceTag {&ui, sl::kBufferTypeUIColorAndAlpha, sl::ResourceLifecycle::eValidUntilPresent, &fullExtent }; // valid all the time
sl::Resource inputs[] = {uiTag};
slSetTag(viewport, inputs, _countof(inputs), cmdList);
// OPTIONAL! Only need the Bidirectional distortion field when strong distortion effects are applied during post-processing
//
sl::Resource bidirectionalDistortionField = {sl::ResourceType::Tex2d, myBidirectionalDistortionBuffer, nullptr, nullptr, bidirectionalDistortionState};
sl::ResourceTag bidirectionalDistortionTag = sl::ResourceTag {&bidirectionalDistortionField, sl::kBufferTypeBidirectionalDistortionField, sl::ResourceLifecycle::eValidUntilPresent, &fullExtent }; // valid all the time
sl::Resource inputs[] = {bidirectionalDistortionTag};
slSetTag(viewport, inputs, _countof(inputs), cmdList);
```
> **NOTE:**
> If dynamic resolution is used then please specify the extent for each tagged resource. Please note that SL **manages resource states so there is no need to transition tagged resources**.
> **IMPORTANT:**
> If validity of tagged resources cannot be guaranteed (for example game is loading, paused, in menu, playing a video cut scene etc.) **all tags should be set to null pointers to avoid stability or IQ issues**.
#### **Multiple viewports**
DLSS-G supports multiple viewports. Resources for each viewport must be tagged independently. Our SL Sample ( https://github.com/NVIDIAGameWorks/Streamline_Sample ) supports multiple viewports. Check the sample for recommended best practices on how to do it. The idea is that resource tags for different resources are independent
from each other. For instance - if you have two viewports, there must be two slSetTag() calls. Input resource for one viewport may be different from the input resource
for another viewport. However - all viewports do write into the same backbuffer.
Note that DLSS-G doesn't support multiple swap chains at the moment. So all viewports must write into the same backbuffer.
#### **Bidirectional Distortion Field buffer generation code sample**
The following HLSL code snippet demonstrates the generation of the bidirectional distortion field buffer. The example distortion illustrated is barrel distortion.
```cpp
const float distortionAlpha = -0.5f;
float2 barrelDistortion(float2 UV)
{
// Barrel distortion assumes UVs relative to center (0,0), so we transform
// to [-1, 1]
float2 UV11 = (UV * 2.0f) - 1.0f;
// Squared norm of distorted distance to center
float r2 = UV11.x * UV11.x + UV11.y * UV11.y;
// Reference: http://www.cs.ait.ac.th/~mdailey/papers/Bukhari-RadialDistortion.pdf
float x = UV11.x / (1.0f + distortionAlpha * r2);
float y = UV11.y / (1.0f + distortionAlpha * r2);
// Transform back to [0, 1]
float2 outUV = float2(x, y);
return (outUV + 1.0f) / 2.0f;
}
float2 inverseBarrelDistortion(float2 UV)
{
// Barrel distortion assumes UVs relative to center (0,0), so we transform
// to [-1, 1]
float2 UV11 = (UV * 2.0f) - 1.0f;
// Squared norm of undistorted distance to center
float ru2 = UV11.x * UV11.x + UV11.y * UV11.y;
// Solve for distorted distance to center, using quadratic formula
float num = sqrt(1.0f - 4.0f * distortionAlpha * ru2) - 1.0f;
float denom = 2.0f * distortionAlpha * sqrt(ru2);
float rd = -num / denom;
// Reference: http://www.cs.ait.ac.th/~mdailey/papers/Bukhari-RadialDistortion.pdf
float x = UV11.x * (rd / sqrt(ru2));
float y = UV11.y * (rd / sqrt(ru2));
// Transform back to [0, 1]
float2 outUV = float2(x, y);
return (outUV + 1.0f) / 2.0f;
}
float2 generateBidirectionalDistortionField(Texture2D output, float2 UV)
{
// Assume UV is in [0, 1]
float2 rg = barrelDistortion(UV) - UV;
float2 ba = inverseBarrelDistortion(UV) - UV;
// rg and ba needs to be in the same canonical format as the motion vectors
// i.e. a displacement of rg or ba needs to to be in the same scale as (Mvec.x, Mvec.y)
// The output can be outside of the [0, 1] range
Texture2D[UV] = float4(rg, ba); // needs to be signed
}
```
This HLSL code snippet uses an iterative Newton-Raphson method to solve the inverse distortion problem. It is designed to be used directly in shader code, especially when an analytical solution is not available. While the method is effective, it does not guarantee convergence for all distortion functions, so users should verify its suitability for their specific use case.
```cpp
float2 myDistortion(float2 xy)
{
// The distortion function
}
float loss(float2 Pxy, float2 ab)
{
float2 Pab = myDistortion(ab);
float2 delta = Pxy - Pab;
return dot(delta, delta);
}
float2 iterativeInverseDistortion(float2 UV)
{
const float kTolerance = 1e-6f;
const float kGradDelta = 1e-6f; // The delta used for gradient estimation
const int kMaxIterations = 5; // Select a low number of iterations which minimizes the loss
const int kImprovedInitialGuess = 1; // Assume a locally uniform distortion field
float2 ab = UV; // initial guess
if (kImprovedInitialGuess)
{
ab = UV - (myDistortion(UV) - UV);
}
for (int i = 0; i < kMaxIterations; ++i)
{
float F = loss(UV, ab);
// Central difference
const float Fabx1 = loss(UV, ab + float2(kGradDelta * 0.5f, 0));
const float Fabx0 = loss(UV, ab - float2(kGradDelta * 0.5f, 0));
const float Faby1 = loss(UV, ab + float2(0, kGradDelta * 0.5f));
const float Faby0 = loss(UV, ab - float2(0, kGradDelta * 0.5f));
float2 grad;
grad.x = (Fabx1 - Fabx0) / kGradDelta;
grad.y = (Faby1 - Faby0) / kGradDelta;
const float norm = grad.x * grad.x + grad.y * grad.y;
if (abs(norm) < kTolerance) {
break;
}
float delta_x = F * grad.x / norm;
float delta_y = F * grad.y / norm;
ab.x = ab.x - delta_x;
ab.y = ab.y - delta_y;
}
return ab;
}
```
### 6.0 SET DLSS-G OPTIONS
slDLSSGSetOptions() is actioned in the following DXGI / VK Present call. As such, it should not be considered thread safe with respect to that Present call. I.e. the application is expected to add any necessary synchronization logic to ensure these all slDLSSGSetOptions() and Present() calls are received by the Streamline in the correct order.
#### 6.1 ENABLING MULTI-FRAME GENERATION
**NOTE: Not all devices support multi-frame generation. To check whether multi-frame generation is supported, use `slDLSSGGetState`. See [Section 15.0](#150-how-to-check-dlss-g-status-at-runtime) for more details.**
By default, the DLSS-G plugin generates a single frame per `Present()` call.
Setting `numFramesToGenerate` to a value greater than one causes DLSS-G to
generate that many frames per `Present()`. For example, if `numFramesToGenerate`
is 3, then Streamline will present four frames each time `Present()` is called:
three generated frames, plus the frame presented by the host.
Note that the value of `numFramesToGenerate` must be between 1 and the maximum
number of generated frames, as reported in
`sl::DLSSGState::numFramesToGenerateMax` (inclusive). Attempting to set a value
outside this range will result in `Result::eErrorInvalidState`.
#### 6.2 TURNING DLSS-G ON/OFF/AUTO
**NOTE: By default DLSS-G interpolation is off, even if the feature is loaded and the required items tagged. DLSS-G must be explicitly turned on by the application using the DLSS-G-specific constants function.**
DLSS-G options must be set so that the DLSS-G plugin can track any changes made by the user, and to enable DLSS-G interpolation. To enable interpolation, be sure to set `mode` to `sl::DLSSGMode::eOn` or `sl::DLSSGMode::eAuto` if using [Dynamic Frame Generation](#220-dynamic-frame-generation). While DLSS-G can be turned on/off/auto in development builds via a hotkey, it is best for the application not to rely on this, even during development.
```cpp
// Using helpers from sl_dlss_g.h
sl::DLSSGOptions options{};
// These are populated based on user selection in the UI
options.mode = myUI->getDLSSGMode(); // e.g. sl::DLSSGMode::eOn;
// IMPORTANT: Note that we are using IDENTICAL viewport as when tagging our resources
if(SL_FAILED(result, slDLSSGSetOptions(viewport, options)))
{
// Handle error here, check the logs
}
```
**When to disable DLSS-G**
- Temporary Events (may retain resources, see below):
- A fullscreen game menu is entered
- A translucent UI element is overlaid over the majority of the screen (ex: game leaderboard)
- Persistent Events (must not retain resources):
- A user has turned off DLSS-G via a settings menu
- A console command has been used to turn off DLSS-G
#### 6.3 RETAINING RESOURCES WHEN DLSS-G IS OFF
Setting `sl::DLSSGOptions.mode` to `sl::DLSSGMode::eOff` releases all resources
allocated by DLSS-G. These resources will be reallocated when the mode is
changed back to `sl::DLSSGMode::eOn`, which may result in small stutter.
Applications should use the `sl::DLSSGFlags::eRetainResourcesWhenOff` flag to
instruct DLSS-G to not release resources when turned off. Note that to release
DLSS-G resources when this flag is set, `slFreeResources()` must be called. This
must be done whenever DLSS-G is explicitly disabled (for example, via a settings
menu or console command)
**Note:** DLSS-G will continue to automatically allocate/free resources on
events like resolution changes. The `sl::DLSSGFlags::eRetainResourcesWhenOff`
flag has no effect on these implicit events.
#### 6.4 AUTOMATICALLY DISABLING DLSS-G IN MENUS
If `kBufferTypeUIColorAndAlpha` is provided, DLSS-G can automatically detect
fullscreen menus and turn off automatically. To enable automatic fullscreen menu
detection, set the `sl::DLSSGFlags::eEnableFullscreenMenuDetection` flag.
This flag may be changed on a per-frame basis to disable detection on specific
scenes, for example.
Since this approach may not detect menus in all cases, it is still preferred to
disable DLSS-G manually, by setting the mode to `sl::DLSSGMode::eOff`.
**Note:** when DLSS-G is disabled by fullscreen menu detection, its resources
will _always_ be retained, regardless of the value of the
`sl::DLSSGFlags::eRetainResourcesWhenOff` flag
#### 6.5 HOW TO SETUP A CALLBACK TO RECEIVE API ERRORS (OPTIONAL)
DLSS-G intercepts `IDXGISwapChain::Present` and when using Vulkan `vkQueuePresentKHR` and `vkAcquireNextImageKHR`calls and executes them asynchronously. When calling these methods from the host side SL will return the "last known error" but in order to obtain per call API error you must provide an API error callback. Here is how this can be done:
```cpp
// Triggered immediately upon return from the API call but ONLY if return code != 0
void myAPIErrorCallback(const sl::APIError& e)
{
// Handle error, use e.hres with DirectX and e.vkRes on Vulkan
// IMPORTANT: STORE ERROR AND RETURN IMMEDIATELY TO AVOID STALLING PRESENT THREAD
};
sl::DLSSGOptions options{};
// Constants are populated based on user selection in the UI
options.mode = myUI->getDLSSGMode(); // e.g. sl::eDLSSGModeOn;
options.onErrorCallback = myAPIErrorCallback;
if(SL_FAILED(result, slDLSSGSetOptions(viewport, options)))
{
// Handle error here, check the logs
}
```
> **NOTE:**
> API error callbacks are triggered from the Present thread and **must not be blocked** for a prolonged period of time.
> **IMPORTANT:**
> THIS IS OPTIONAL AND ONLY NEEDED IF YOU ARE ENCOUNTERING ISSUES AND NEED TO PROCESS SPECIFIC ERRORS RETURNED BY THE VULKAN OR DXGI API
### 7.0 PROVIDE COMMON CONSTANTS
Various per frame camera related constants are required by all Streamline features and must be provided ***if any SL feature is active and as early in the frame as possible***. Please keep in mind the following:
* All SL matrices are row-major and should not contain any jitter offsets
* If motion vector values in your buffer are in {-1,1} range then motion vector scale factor in common constants should be {1,1}
* If motion vector values in your buffer are NOT in {-1,1} range then motion vector scale factor in common constants must be adjusted so that values end up in {-1,1} range
```cpp
sl::Constants consts = {};
// Set motion vector scaling based on your setup
consts.mvecScale = {1,1}; // Values in eMotionVectors are in [-1,1] range
consts.mvecScale = {1.0f / renderWidth,1.0f / renderHeight}; // Values in eMotionVectors are in pixel space
consts.mvecScale = myCustomScaling; // Custom scaling to ensure values end up in [-1,1] range
sl::Constants consts = {};
// Set all constants here
//
// Constants are changing per frame tracking handle must be provided
if(!setConstants(consts, *frameToken, viewport))
{
// Handle error, check logs
}
```
For more details please see [common constants](ProgrammingGuide.md#2101-common-constants)
### 8.0 INTEGRATE SL REFLEX
**It is required** for sl.reflex to be integrated in the host application. **Please note that any existing regular Reflex SDK integration (not using Streamline) cannot be used by DLSS-G**. Special attention should be paid to the markers `eReflexMarkerPresentStart` and `eReflexMarkerPresentEnd` which must provide correct frame index so that it can be matched to the one provided in the [section 7](#70-provide-common-constants)
For more details please see [Reflex guide](ProgrammingGuideReflex.md)
> **IMPORTANT:**
> If you see a warning in the SL log stating that `common constants cannot be found for frame N` that indicates that sl.reflex markers `eReflexMarkerPresentStart` and `eReflexMarkerPresentEnd` are out of sync with the actual frame being presented.
### 9.0 DLSS-G DEVELOPMENT HOTKEYS
When using non-production (development) builds of `sl.dlss_g.dll`, there are numerous hotkeys available, all of which can be remapped using the remapping methods described in [debugging](<Debugging - JSON Configs (Plugin Configs).md>)
* `"dlssg-sync"` (default `VK_END`)
* Toggle delaying the presentation of the next frame to experiment with minimizing latency
* `"vsync"` (default `Shift-Ctrl-'1'`)
* Toggle vsync on output swapchain
* `"debug"` (default `Shift-Ctrl-VK_INSERT`)
* Toggle debugging view
* `"stats"` (default `Shift-Ctrl-VK_HOME`)
* Toggle performance stats
* `"dlssg-toggle"` (default `VK_OEM_2` `/?` for US)
* Toggle DLSS-G on/off/auto (override app setting)
* `"write-stats"` (default `Ctrl-Alt-'O'`)
* Write performance stats to file
### 10.0 DLSS-G AND DYNAMIC RESOLUTION
DLSS-G supports dynamic resolution of the MVec and Depth buffer extents. Dynamic resolution may be done via DLSS or an app-specific method. Since DLSS-G uses the final color buffer with all post-processing complete, the color buffer, or its subrect if in use, must be a fixed size -- it cannot resize per-frame. When DLSS-G dynamic resolution mode is enabled, the application can pass in a differently-sized extent for the MVec and Depth buffers on a perf frame basis. This allows the application to dynamically change its rendering load smoothly.
There are a few requirements when using dynamic resolution with DLSS-G:
* The application must set the flag `sl::DLSSGFlags::eDynamicResolutionEnabled` in `sl::DLSSGOptions::flags` when dynamic resolution is active. It should clear the flag when/if dynamic resolutiuon is disabled. *DO NOT* leave the dynamic resolution flag set when using fixed-ratio DLSS, as it may decrease performance or image quality.
* The application should specify `sl::DLSSGOptions::dynamicResWidth` and `sl::DLSSGOptions::dynamicResHeight` to a target resolution in the range of the dynamic MVec and Depth buffer sizes.
* This is the fixed resolution at which DLSS-G will process the MVec and Depth buffers.
* This value must not change dynamically per-frame. Changing it outside of the application UI can lead to a frame rate glitch.
* Set it to a reasonable "middle-range" value and do not change it until/unless the DLSS or other dynamic-range settings change.
* For example, if the application has a final, upscaled color resolution of 3840x2160 pixels, with a rendering resolution that can vary between 1920x1080 and 3840x2160 pixels, the `dynamicResWidth` and `Height` could be set to 2880x1620 or 1920x1080.
* This ratio between the min and max resolutions can be tuned for performance and quality.
* If the application passes 0 for these values when DLSS-G dynamic resolution is enabled, then DLSS-G will default to half of the resolution of the final color target or its subrect, if in use.
```cpp
// Using helpers from sl_dlss_g.h
sl::DLSSGOptions options{};
// These are populated based on user selection in the UI
options.mode = myUI->getDLSSGMode(); // e.g. sl::eDLSSGModeOn;
options.flags = sl::DLSSGFlags::eDynamicResolutionEnabled;
options.dynamicResWidth = appSelectedInternalWidth;
options.dynamicResHeight = appSelectedInternalHeight;
if(SL_FAILED(result, slDLSSGSetOptions(viewport, options)))
{
// Handle error here, check the logs
}
```
Additionally, in development (i.e. non-production) builds of sl.dlss_g.dll, it is possible to enable DLSS-G dynamic res mode globally for debugging purposes via sl.dlss_g.json. The supported options are:
* `"forceDynamicRes": true,` force-enables DLSS-G dynamic mode, equivalent to passing the flag `eDynamicResolutionEnabled` to `slDLSSGSetOptions` on every frame.
* `"forceDynamicResScaling": 0.5` sets the desired `dynamicResWidth` and `dynamicResHeight` indirectly, as a fraction of the color output buffer size. In the case shown, the fraction is 0.5, so with a color buffer that is 3840x2160, the internal resolution used by DLSS-G for dynamic resolution MVec and Depth buffers will be 1920x1080. If this value is not set, it defaults to 0.5.
### 11.0 DLSS-G AND HDR
If your game supports HDR please make sure to use **UINT10/RGB10 pixel format and HDR10/BT.2100 color space**. For more details please see <https://docs.microsoft.com/en-us/windows/win32/direct3darticles/high-dynamic-range#option-2-use-uint10rgb10-pixel-format-and-hdr10bt2100-color-space>
When tagging `eUIColorAndAlpha` please make sure that alpha channel has enough precision (for example do NOT use formats like R10G10B10A2)
> **IMPORTANT:**
> DLSS-G currently does NOT support FP16 pixel format and scRGB color space because it is too expensive in terms of compute and bandwidth cost.
### 12.0 DLSS-G AND DXGI
DLSS-G takes over frame presenting so it is important for the host application to turn on/off DLSS-G as needed to avoid potential problems and deadlocks.
As a general rule, **when host is modifying resolution, full-screen vs windowed mode or performing any other operation that could cause SwapChain::Present call to generate a deadlock DLSS-G must be turned off by the host using the sl::DLSSGConsts::mode field.** When turned off DLSS-G will call SwapChain::Present on the same thread as the host application which is not the case when DLSS-G is turned on. For more details please see <https://docs.microsoft.com/en-us/windows/win32/direct3darticles/dxgi-best-practices#multithreading-and-dxgi>
> **IMPORTANT:**
> Turning DLSS-G on and off using the `sl::DLSSGOptions::mode` should not be confused with enabling/disabling DLSS-G feature using the `slSetFeatureLoaded`, the later would completely unload and unhook the sl.dlss_g plugin hence completely disable the `sl::kFeatureDLSS_G` (cannot be turned on/off or used in any way).
### 13.0 HOW TO OBTAIN THE ACTUAL FRAME TIMES AND NUMBER OF FRAMES PRESENTED
Since DLSS-G when turned on presents additional frames the actual frame time can be obtained using the following sample code:
```cpp
// Using helpers from sl_dlss_g.h
// Not passing flags or special options here, no need since we just want the frame stats
sl::DLSSGState state{};
if(SL_FAILED(result, slDLSSGGetState(viewport, state)))
{
// Handle error here, check the logs
}
```
> **IMPORTANT:**
> When querying only frame times or status, do not specify the `DLSSGFlags::eRequestVRAMEstimate`; setting that flag and passing a non-null `sl::DLSSGOptions` will cause DLSS-G to compute and return the estimated VRAM required. This is needless and too expensive to do per frame.
Once we have obtained DLSS-G state we can estimate the actual FPS like this:
```cpp
//! IMPORTANT: Returned value represents number of frames presented since
//! we last called slDLSSGGetState so make sure to account for that.
//!
//! If calling 'slDLSSGGetState' after each present then the actual FPS
//! can be computed like this:
auto actualFPS = myFPS * state.numFramesActuallyPresented;
```
The `numFramesActuallyPresented` is equal to the number of presented frames per one application frame. For example, if DLSS-G plugin is inserting one generated frame after each application frame, that variable will contain '2'.
> **IMPORTANT**
Please note that DLSS-G will **always present real frame generated by the host but the interpolated frame can be dropped** if presents go out of sync (interpolated frame is too close to the last real one). In addition, if the host is CPU bottlenecked it is **possible for the reported FPS to be more than 2x when DLSS-G is on** because the call to `Swapchain::Present` is no longer a blocking call for the host and can be up to 1ms faster which then translates to faster base frame times. Here is an example:
* Host is CPU bound and producing frames every 10ms
* Up to 1ms is spent blocked by the `Swapchain::Present` call
* SL present hook will take around 0.2ms instead since `Swapchain::Present` is now an async event handled by the SL pacer
* Host is now delivering frames at 10ms - 0.8ms = 9.2ms
* This results in 109fps getting bumped to 218fps when DLSS-G is active so 2.18x scaling instead of the expected 2x
### 14.0 HOW TO CHECK DLSS-G STATUS AT RUNTIME
#### 14.1 HOW TO CHECK FOR MULTIFRAME SUPPORT
Multi-frame support is reported via `sl::DLSSGState::numFramesToGenerateMax`.
Before enabling multi-frame, check for device support by calling
`slDLSSGGetState` and checking `numFramesToGenerateMax`. If the value is 1,
multi-frame is not supported. Otherwise, multi-frame is supported, up to the
number of frames specified.
#### 14.2 HOW TO CHECK FOR RUNTIME ERRORS
Even if DLSS-G feature is supported and loaded it can still end up in an invalid state at run-time due to various reasons. The following code snippet shows how to check the run-time status:
```cpp
sl::DLSSGState state{};
if(SL_FAILED(result, slDLSSGGetState(viewport, state)))
{
// Handle error here, check the logs
}
// Run-time status
if(state.status != sl::eDLSSGStatusOk)
{
// Turn off DLSS-G
sl::DLSSGOptions options{};
options.mode = sl::DLSSGMode::eOff;
slDLSSGSetOptions(viewport, options);
// Check status and errors in the log and fix your integration if applicable
}
```
For more details please see `enum DLSSGStatus` in sl_dlss_g.h
> **IMPORTANT:**
> When in invalid state and turned on DLSS-G will add pink overlay to the final color image. Warning message will be shown on screen in the NDA development build and error will be logged describing the issue.
> **IMPORTANT:**
> When querying only frame times or status, do not specify the `DLSSGFlags::eRequestVRAMEstimate`; setting that flag and passing a non-null `sl::DLSSGOptions::ext` will cause DLSS-G to compute and return the estimated VRAM required. This is needless and too expensive to do per frame.
### 15.0 HOW TO GET AN ESTIMATE OF VRAM REQUIRED BY DLSS-G
SL can return a general estimate of the GPU memory required by DLSS-G via `slDLSSGGetState`. This can be queried before DLSS-G is enabled, and can be queried for resolutions and formats other than those currently active. To receive an estimate of GPU memory required, the application must:
* Set the `sl::DLSSGOptions::flags` flag, `DLSSGFlags::eRequestVRAMEstimate`
* Provide the values in the `sl::DLSSGOptions` structure include the intended resolutions of the MVecs, Depth buffer, final color buffer (UI buffers are assumed to be the same size as the color buffer), as well as the 3D API-specific format enums for each buffer. Finally, the expected number of backbuffers in the swapchain must be specified. See the `sl::DLSSGOptions` struct for details.
If the flag and structure are provided, `slDLSSGGetState` should return a nonzero value in `sl::DLSSGState::estimatedVRAMUsageInBytes`. Note that this value is a very rough estimate/guideline and should be used for general allocation. The actual amount used may differ from this value.
> **IMPORTANT:**
> When querying only frame times or status, do not specify the `DLSSGFlags::eRequestVRAMEstimate`; setting that flag and passing a non-null `sl::DLSSGOptions` will cause DLSS-G to compute and return the estimated VRAM required. This is needless and too expensive to do per frame.
#### 15.1 HOW TO SYNCHRONIZE THE HOST APP DLSS-G INPUTS AND STREAMLINE IF REQUIRED
```cpp
//! SL client must wait on SL DLSS-G plugin-internal fence and associated value, before it can modify or destroy the tagged resources input
//! to DLSS-G enabled for the corresponding previously presented frame on a non-presenting queue.
//! If modified on client's presenting queue, then it's recommended but not required.
//! However, if DLSSGQueueParallelismMode::eBlockNoClientQueues is set, then it's always required for VK.
//! It must call slDLSSGGetState on the present thread to retrieve the fence value for the inputs consumed by FG, on which client would
//! wait in the frame it would modify those inputs.
void* inputsProcessingCompletionFence{};
uint64_t lastPresentInputsProcessingCompletionFenceValue{};
```
### 16.0 HOW TO SYNCHRONIZE THE HOST APP AND STREAMLINE WHEN USING VULKAN
SL DLSS-G implements the following logic when intercepting `vkQueuePresentKHR` and `vkAcquireNextImageKHR`:
* sl.dlssg will wait for the binary semaphore provided in the `VkPresentInfoKHR` before proceeding with adding workload(s) to the GPU
* sl.dlssg will signal binary semaphore provided in `vkAcquireNextImageKHR` call when DLSS-G workloads are submitted to the GPU
Based on this the host application MUST:
* Signal the `present` binary semaphore provided in `VkPresentInfoKHR` when submitting final workload at the end of the frame
* Wait for the signal on the `acquire` binary semaphore provided with `vkAcquireNextImageKHR` call before starting the new frame
Here is some pseudo-code:
```cpp
createBinarySemaphore(acquireSemaphore);
createBinarySemaphore(presentSemaphore);
// SL will signal the 'acquireSemaphore' when ready to continue next frame
vkAcquireNextImageKHR(acquireSemaphore, &index);
// Frame start
waitOnGPU(acquireSemaphore);
// Render frame using render target with given index
renderFrame(index);
// Finish frame
signalOnGPU(presentSemaphore);
// Present the frame (SL will wait for the 'presentSemaphore' on the GPU)
vkQueuePresent(presentSemaphore, index);
```
### 17.0 DLSS-G INTEGRATION CHECKLIST DETAILS
* Provide either correct application ID or engine type (Unity, UE etc.) when calling `slInit`
* In final (production) builds validate the public key for the NVIDIA custom digital certificate on `sl.interposer.dll` if using the binaries provided by NVIDIA. See [security section](ProgrammingGuide.md#211-security) for more details.
* Tag `eDepth`, `eMotionVectors`, `eHUDLessColor` and `eUIColorAndAlpha` buffers
* When values of depth and mvec could be invalid make sure to set all tags to null pointers (level loading, playing video cut-scenes, paused, in menu etc.)
* Tagged buffers must by marked as volatile if they are not going to be valid when SwapChain::Present call is made
* Tag backbuffer, only if DLSS-G needs to run on a subregion of the final color buffer. If tagged, ensure to set the tag to null pointer, if it could be invalid.
* Provide correct common constants and frame index using `slSetConstants` method.
* When game is rendering game frames make sure to set `sl::Constants::renderingGameFrames` correctly
* Make sure that frame index provided with the common constants is matching the presented frame (i.e. frame index provided with Reflex markers `ReflexMarker::ePresentStart` and `ReflexMarker::ePresentEnd`)
* **Do NOT set common constants (camera matrices etc) multiple times per single frame** - this causes ambiguity which can result in IQ issues.
* Use sl.imgui plugin to validate that inputs (camera matrices, depth, mvec, color etc.) are correct
* Turn DLSS-G off (by setting `sl::DLSSGOptions::mode` to `DLSSGMode::eOff`) before any window manipulation (resize, maximize/minimize, full-screen transition etc.) to avoid potential deadlocks or instability
* Reduce the amount of motion blur when DLSS-G is active
* Call `slDLSSGGetState` to obtain `sl::DLSSGState` and check the following:
* Make sure that `sl::DLSSGStatus` is set to `eDLSSGStatusOk`, if not disable DLSS-G and fix integration as needed (please see the logs for errors)
* If swap-chain back buffer size is lower than `sl::DLSSGSettings::minWidthOrHeight` DLSS-G must be disabled
* If VRAM stats and other extra information is not needed pass `nullptr` for constants for lowest overhead.
* Call `slGetFeatureRequirements` to obtain requirements for DLSS-G (see [programming guide](./ProgrammingGuide.md#23-checking-features-requirements) and check the following:
* If any of the items in the `sl::FeatureRequirements` structure like OS, driver etc. are NOT supported inform user accordingly.
* To avoid an additional overhead when presenting frames while DLSS-G is off **always make sure to re-create the swap-chain when DLSS-G is turned off**. For details please see [section 18](#180-how-to-avoid-unnecessary-overhead-when-dlss-g-is-turned-off)
* `In Vulkan`, to exploit command queue parallelism, setting `DLSSGOptions::queueParallelismMode` to 'DLSSGQueueParallelismMode::eBlockNoClientQueues' mode might offer extra performance gains depending on the workload.
* Same DLSSGQueueParallelismMode mode but be set for all the viewports.
* When using this mode, the client should wait on `DLSSGState::inputsProcessingCompletionFence` and associated value, before client can modify or destroy the tagged resources input to DLSS-G enabled for the corresponding previously presented frame on any of its queues.
* For synchronization details, please refer [section 16.1](#161-how-to-synchronize-the-host-app-dlss-g-inputs-and-streamline-if-required).
* Typical scenario in which gains might be more apparent is in GPU-limited applications having workload types employing multiple queues for submissions, especially if the presenting queue is the only one accessing FG inputs. Workloads from other application queues can happen in parallel with the DLSS-G workload; if those workloads underutilize GPU SM resources, the DLSS-G workload may better fill out SM utilization, improving overall performance. On the other hand, highly CPU-limited applications could see relatively smaller gains due to lower parallelism.
#### 17.1 Game setup for the testing DLSS Frame Generation
1. Set up a machine with an Ada board and drivers recommended by NVIDIA team.
1. Turn on Hardware GPU Scheduling: Windows Display Settings (scroll down) -> Graphics Settings -> Hardware-accelerated GPU Scheduling: ON. Restart your PC.
1. Check that Vertical Sync is set to “Use the 3D application setting” in the NVIDIA Control Panel (“Manage 3D Settings”).
1. Get the game build that has Streamline, DLSS-G and Reflex integrated and install on the machine.
1. Once the game has loaded, go into the game settings and turn DLSS-G on.
1. Once DLSS-G is on, you should be able to see it by:
* observing FPS boost in any external FPS measurement tool; and
* if the build includes Streamline and DLSS-G development libraries, seeing a debug overlay at the bottom of the screen (can be set in sl.dlss-g.json).
If the steps above fail, set up logging in sl.interposer.json, check for easy-to-fix issues & errors in the log, and contact NVIDIA team.
### 18.0 HOW TO AVOID UNNECESSARY OVERHEAD WHEN DLSS-G IS TURNED OFF
When DLSS-G is loaded it will create an extra graphics command queue used to present frames asynchronously and in addition it will force the host application to render off-screen (host has no access to the swap-chain buffers directly). In scenarios when DLSS-G is switched off by the user
this results in unnecessary overhead coming from the extra copy from the off-screen buffer to the back buffer and synchronization between the game's graphics queue and the DLSS-G's queue. To avoid this, swap-chain must be torn down and re-created every time DLSS-G is switched on or off.
Here is some pseudo code showing how this can be done:
```cpp
void onDLSSGModeChange(sl::DLSSGMode mode)
{
if(mode == sl::DLSSGMode::eOn || mode == sl::DLSSGMode::eAuto)
{
// DLSS-G was off, now we are turning it on or set the mode to auto
// Make sure no work is pending on GPU
waitForIdle();
// Destroy swap-chain back buffers
releaseBackBuffers();
// Release swap-chain
releaseSwapChain();
// Make sure DLSS-G is loaded
slSetFeatureLoaded(sl::kFeatureDLSS_G, true);
// Re-create our swap-chain using the same parameters as before
// Note that DLSS-G is loaded so SL will return a proxy (assuming host is linking SL and using SL proxy DXGI factory)
auto swapChainProxy = createSwapChain();
// Obtain native swap-chain if using manual hooking
slGetNativeInterface(swapChainProxy,&swapChainNative);
// Obtain new back buffers from the swap-chain proxy (rendering off-screen)
getBackBuffers(swapChainProxy)
}
else if(mode == sl::DLSSGMode::eOff)
{
// DLSS-G was on, now we are turning it off
// Make sure no work is pending on GPU
waitForIdle();
// Destroy swap-chain back buffers
releaseBackBuffers();
// Release swap-chain
releaseSwapChain();
// Make sure DLSS-G is un-loaded
slSetFeatureLoaded(sl::kFeatureDLSS_G, false);
// Re-create our swap-chain using the same parameters as before
// Note that DLSS-G is unloaded so there is no proxy here, SL will return native swap-chain interface
auto swapChainNative = createSwapChain();
// Obtain new back buffers from the swap-chain (rendering directly to back buffers)
getBackBuffers(swapChainNative)
}
}
```
For the additional implementation details please check out the Streamline sample, especially the `void DeviceManagerOverride_DX12::BeginFrame()` function.
> NOTE:
> When DLSS-G is turned on the overhead from rendering to an off-screen target is negligible considering the overall frame rate boost provided by the feature.
### 19.0 DLSS-FG INDICATOR TEXT
DLSS-FG can render on-screen indicator text when the feature is enabled. Developers may find this helpful for confirming DLSS-FG is executing.
The indicator supports all build variants, including production.
The indicator is configured via the Windows Registry and contains 3 levels: `{0, 1, 2}` for `{off, minimal, detailed}`.
**Example .reg file setting the level to detailed:**
```
[HKEY_LOCAL_MACHINE\SOFTWARE\NVIDIA Corporation\Global\NGXCore]
"DLSSG_IndicatorText"=dword:00000002
```
### 20.0 AUTO SCENE CHANGE DETECTION
Auto Scene Change Detection (ASCD) intelligently annotates the reset flag during input frame pair sequences.
ASCD is enabled in all DLSS-FG build variants, executes on every frame pair, and supports all graphics platforms.
#### 20.1 INPUT DATA
ASCD uses the camera forward, right, and up vectors passed into Streamline via `sl_consts.h`. These are stitched into a 3x3 camera rotation matrix such that:
```
[ cameraRight[0] cameraUp[0] cameraForward[0] ]
[ cameraRight[1] cameraUp[1] cameraForward[1] ]
[ cameraRight[2] cameraUp[2] cameraForward[2] ]
```
It is important that this matrix is orthonormal, i.e. the transpose of the matrix should equal the inverse. ASCD will only run if the orthonormal property is true. If the orthonormal check fails, ASCD is entirely disabled. Logs for DLSS-FG will show additional detail to debug incorrect input data.
#### 20.2 VIEWING STATUS
In all variants the detector status can be visualized with the detailed DLSS_G Indicator Text.
The mode will be
* Enabled
* Disabled
* Disabled (Invalid Input Data)
In developer builds, ASCD can be toggled with `Shift+F9`. In developer builds, an additional ignore_reset_flag option simulates pure dependence on ASCD `Shift+F10`.
In cases where input camera data is incorrect, ASCD will report failure to the logs every frame. Log messages can be resolved by updating the camera inputs or disabling ASCD temporarily with the keybind.
#### 20.3 DEVELOPER HINTS
In developer DLSS-FG variants ASCD displays on-screen hints for:
1. Scene change detected without the reset flag.
2. Scene change detected with the reset flag.
3. No scene change detected with the reset flag.
The hints present as text blurbs in the center of screen, messages in the DLSS-FG log file, and in scenario 1, a screen goldenrod yellow tint.
### 21.0 DYNAMIC FRAME GENERATION
Dynamic Frame Generation leverages stochastic control to automatically trigger DLSS-G. This adaptive monitoring mechanism activates frame generation only when it boosts performance beyond the native framerate production of the game. Otherwise, DLSS-G remains disabled to ensure optimal framerate performance.
#### 21.1 DLSS-G AUTO MODE
Dynamic Frame Generation is enabled when DLSS-G is in auto mode. To activate Dynamic Frame Generation, set `mode` to `sl::DLSSGMode::eAuto`.
When using non-production (development) builds of `sl.dlss_g.dll`, the status of Dynamic Frame Generation and the current state of DLSS-G is displayed on the DLSS-G status window.

View File

@ -0,0 +1,176 @@
Streamline - DeepDVC
=======================
>The focus of this guide is on using Streamline to integrate RTX Dynamic Vibrance ("DeepDVC") into an application.
>For information on user interface considerations when using the DeepDVC plugin, please see the ["RTX UI Developer Guidelines.pdf"](<RTX UI Developer Guidelines.pdf>) document included with this SDK.
RTX Dynamic Vibrance ("DeepDVC") uses AI to enhance digital vibrance in real-time, improving visual clarity and adjusting color saturation adaptively to the specific game.
The filter is controlled by two parameters:
| Parameter | What it does | What you'll notice | Range |
|--------------|--------------|-----------|------------|
| Intensity | Controls how strong or subtle the filter effect will be on an image. | A low intensity will keep the images closer to the original, while a high intensity will make the filter effect more pronounced. A zero value will result in the original image. | [0, 1] |
| Saturation Boost | Enhances the colors in your image, making them more vibrant and eye-catching. | This setting will only be active if you've turned up the Intensity. Once active, you'll see colors pop up more, making the image look more lively. | [0, 1] |
Version 2.7.3
=======
### 1.0 CHECK IF DEEPDVC IS SUPPORTED
As soon as SL is initialized, you can check if DeepDVC is available for the specific adapter you want to use:
```cpp
DXGI_ADAPTER_DESC adapterDesc; // output from DXGIAdapter::GetDesc() for adapter to query
sl::AdapterInfo adapterInfo{};
...
result = slIsFeatureSupported(sl::kFeatureDeepDVC, slAdapterInfo);
```
### 2.0 TAG ALL REQUIRED RESOURCES
DeepDVC only requires final-res output color buffers.
```cpp
// IMPORTANT: Make sure to mark resources which can be deleted or reused for other purposes within a frame as volatile
// Prepare resources (assuming d3d11/d3d12 integration so leaving Vulkan view and device memory as null pointers)
sl::Resource colorOut = {sl::ResourceType::eTex2d, myTAAUOutput, nullptr, nullptr, nullptr};
sl::ResourceTag colorOutTag = sl::ResourceTag{&colorOut, sl::kBufferTypeScalingOutputColor, sl::ResourceLifecycle::eOnlyValidNow, &myExtent};
// Tag in group
sl::Resource inputs[] = {colorOutTag};
slSetTag(viewport, inputs, _countof(inputs), cmdList);
```
### 3.0 PROVIDE DEEPDVC OPTIONS
DeepDVC options must be set so that the DeepDVC plugin can track any changes made by the user:
```cpp
sl::DeepDVCOptions deepDVCOptions = {};
// These are populated based on user selection in the UI
deepDVCOptions.mode = myUI->getDeepDVCMode(); // e.g. sl::DeepDVCMode::eOn;
deepDVCOptions.intensity = myUI->getDeepDVCIntensity(); // e.g. 0.5
deepDVCOptions.saturationBoost = myUI->getDeepDVCSaturationBoost(); // e.g. 0.75
if(SL_FAILED(result, slDeepDVCSetOptions(viewport, deepDVCOptions)))
{
// Handle error here, check the logs
}
```
> **NOTE:**
> To turn off DeepDVC set `sl::DeepDVCOptions.mode` to `sl::DeepDVCOptions::eOff`. Note that this does NOT release any resources, for that please use `slFreeResources`.
### 4.0 ADD DEEPDVC TO THE RENDERING PIPELINE
The call to evaluate the DeepDVC feature must occurs during the post-processing phase after tone-mapping. Applying DeepDVC in linear HDR in-game color-space may result in undesirables color effects. Since DeepDVC can enhance noisy or grainy regions, it is recommended that certain effects such as film grain should occur after DeepDVC.
On your rendering thread, call `slEvaluateFeature` at the appropriate location. Please note that when using `slSetTag` and `slDeepDVCSetOptions` the `frameToken` and `myViewport` used in `slEvaluateFeature` **must match across all API calls**.
```cpp
// Make sure DeepDVC is available and user selected this option in the UI
if(useDeepDVC)
{
// NOTE: We can provide all inputs here or separately using slSetTag, slSetConstants or slDeepDVCSetOptions
// Inform SL that DeepDVC should be injected at this point for the specific viewport
const sl::BaseStructure* inputs[] = {&myViewport};
if(SL_FAILED(result, slEvaluateFeature(sl::kFeatureDeepDVC, *frameToken, inputs, _countof(inputs), myCmdList)))
{
// Handle error
}
else
{
// IMPORTANT: Host is responsible for restoring state on the command list used
restoreState(myCmdList);
}
}
```
> **IMPORTANT:**
> Please note that **host is responsible for restoring the command buffer(list) state** after calling `slEvaluate`. For more details on which states are affected please see [restore pipeline section](./ProgrammingGuideManualHooking.md#70-restoring-command-listbuffer-state)
### 5.0 MULTIPLE VIEWPORTS
Here is a code snippet showing one way of handling two viewports with explicit resource allocation and de-allocation:
```cpp
// Viewport1
{
// We need to setup our constants first so sl.deepdvc plugin has enough information
sl::DeepDVCOptions deepDVCOptions = {};
deepDVCOptions.mode = viewport1->getDeepDVCMode(); // e.g. sl::DeepDVCMode::eOn;
deepDVCOptions.intensity = viewport1->getDeepDVCIntensity(); // e.g. 0.5
deepDVCOptions.saturationBoost = viewport1->getDeepDVCSaturationBoost(); // e.g. 0.75
// Note that we are passing viewport id 1
slDeepDVCSetOptions(viewport1->id, deepDVCOptions);
// Set our tags, note that we are passing viewport id
setTag(viewport1->id, &tags2, numTags2);
// and so on ...
// Now we can allocate our feature explicitly, again passing viewport id
slAllocateResources(sl::kFeatureDeepDVC, viewport1->id);
// Evaluate DeepDVC on viewport1, again passing viewport id so we can map tags, constants correctly
//
// NOTE: If slAllocateResources is not called DeepDVC resources would be initialized at this point
slEvaluateFeature(sl::kFeatureDeepDVC, myFrameIndex, viewport1->id, nullptr, 0, myCmdList);
// Assuming the above evaluate call is still pending on the CL, make sure to flush it before releasing resources
flush(myCmdList);
// When we no longer need this viewport
slFreeResources(sl::kFeatureDeepDVC, viewport1->id);
}
// Viewport2
{
// We need to setup our constants first so sl.deepdvc plugin has enough information
sl::DeepDVCOptions deepDVCOptions = {};
deepDVCOptions.mode = viewport2->getDeepDVCMode(); // e.g. sl::DeepDVCMode::eOn;
deepDVCOptions.intensity = viewport2->getDeepDVCIntensity(); // e.g. 0.5
deepDVCOptions.saturationBoost = viewport2->getDeepDVCSaturationBoost(); // e.g. 0.75
// Note that we are passing viewport id 2
slDeepDVCSetOptions(viewport2->id, deepDVCOptions);
// Set our tags, note that we are passing viewport id
setTag(viewport2->id, &tags2, numTags2);
// and so on ...
// Now we can allocate our feature explicitly, again passing viewport id
slAllocateResources(sl::kFeatureDeepDVC, viewport2->id);
// Evaluate DeepDVC on viewport2, again passing viewport id so we can map tags, constants correctly
//
// NOTE: If slAllocateResources is not called DeepDVC resources would be initialized at this point
slEvaluateFeature(sl::kFeatureDeepDVC, myFrameIndex, viewport2->id, nullptr, 0, myCmdList);
// Assuming the above evaluate call is still pending on the CL, make sure to flush it before releasing resources
flush(myCmdList);
// When we no longer need this viewport
slFreeResources(sl::kFeatureDeepDVC, viewport2->id);
}
```
### 6.0 CHECK STATE AND VRAM USAGE
To obtain current state for a given viewport the following API can be used:
```cpp
sl::DeepDVCState deepDVCState{};
if(SL_FAILED(result, slDeepDVCGetState(viewport, deepDVCState))
{
// Handle error here
}
// Check how much memory DeepDVC is using for this viewport
deepDVCState.estimatedVRAMUsageInBytes
```
### 7.0 LIMITATIONS
Current DeepDVC implementation supports SDR inputs in display-deferred color-space after tone mapping. Applying DeepDVC on HDR images may introduce undesirable color artifacts.

View File

@ -0,0 +1,596 @@
Streamline - Manual Hooking
=======================
Version 2.7.3
=======
The automated global hooking is a great way to quickly enable SL features in any application. However, this can lead to unnecessary overhead caused by the entire API redirection through SL proxies and problems with tools and 3rd party libraries which do not expect to receive SL proxies as inputs.
To address this SL provides "manual" hooking which is slightly more involved style of integration leveraging `slGetNativeInterface` and `slUpgradeInterface` APIs.
> **IMPORTANT:**
> Please read the general [ProgrammingGuide.md](ProgrammingGuide.md) before proceeding with this advanced method of integration.
### 1.0 LINKING
#### 1.1 DirectX
When using D3D11 or D3D12 one can choose between statically linking `sl.interposer.lib` or continue linking `dxgi.lib`, `d3d12.lib`, `d3d11.lib` as usual and get only `sl*` methods from the `sl.interposer.dll` after loading it dynamically (please see [secure load](ProgrammingGuide.md#211-security)).
> **IMPORTANT**
> For DirectX integrations statically linking `sl.interposer.lib` is the preferred and easiest method since it allows access to all helper methods from `sl_$feature.h` whilst still achieving minimal CPU overhead. When using dynamic linking helpers cannot be used directly due to missing `slGetFeatureFunction` API at link time.
#### 1.1 Vulkan
When using Vulkan linking the `sl.interposer.lib` would result in additional CPU overhead so the best approach is to dynamically load `sl.interposer.dll` instead of `vulkan-1.dll` and use `vkGetDeviceProcAddr` and `vkGetInstanceProcAddr` provided by the SL.
> **IMPORTANT**
> SL `vkGetDeviceProcAddr` and `vkGetInstanceProcAddr` will return addresses from `vulkan-1.dll` for the entire Vulkan API **except for the few functions intercepted by SL**. For more details please continue reading.
### 2.0 MANUAL HOOKING API
All required definitions and declarations can be found in the `sl_hooks.h` header. Here is the list of all hooks used in this SDK:
```cpp
//! NOTE: Adding new hooks require sl.interposer to be recompiled
//!
//! IMPORTANT: Since SL interposer proxies supports many different versions of various D3D/DXGI interfaces
//! we use only base interface names for our hooks.
//!
//! For example if API was added in IDXGISwapChain5::FUNCTION it is still named eIDXGISwapChain_FUNCTION (there is no 5 in the name)
//!
enum class FunctionHookID : uint32_t
{
//! Mandatory - IDXGIFactory*
eIDXGIFactory_CreateSwapChain,
eIDXGIFactory_CreateSwapChainForHwnd,
eIDXGIFactory_CreateSwapChainForCoreWindow,
//! Mandatory - IDXGISwapChain*
eIDXGISwapChain_Present,
eIDXGISwapChain_Present1,
eIDXGISwapChain_GetBuffer,
eIDXGISwapChain_ResizeBuffers,
eIDXGISwapChain_ResizeBuffers1,
eIDXGISwapChain_GetCurrentBackBufferIndex,
eIDXGISwapChain_SetFullscreenState,
//! Internal - please ignore when doing manual hooking
eIDXGISwapChain_Destroyed,
//! Mandatory - ID3D12Device*
eID3D12Device_CreateCommandQueue,
//! Mandatory - Vulkan
eVulkan_Present,
eVulkan_CreateSwapchainKHR,
eVulkan_DestroySwapchainKHR,
eVulkan_GetSwapchainImagesKHR,
eVulkan_AcquireNextImageKHR,
eVulkan_DeviceWaitIdle,
eVulkan_CreateWin32SurfaceKHR,
eVulkan_DestroySurfaceKHR,
eMaxNum
};
```
### 3.0 INITIALIZATION AND SHUTDOWN
Call `slInit` **before any of the hooks mentioned in section [2.0](#20-manual-hooking-api) could be triggered** (like for example, DXGI calls to create swap-chain) and make sure to specify the special flag `PreferenceFlag::eUseManualHooking` as shown in the snippet below:
```cpp
#include <sl.h>
#include <sl_consts.h>
#include <sl_hooks.h>
sl::Preferences pref{};
// Inform SL that we are doing advanced integration
pref.flags |= PreferenceFlag::eUseManualHooking;
// Set other preferences, request features etc.
if(SL_FAILED(result, slInit(pref)))
{
// Handle error, check the logs
}
```
> **NOTE:**
> Unlike regular SL integrations, the D3D device can be created before or after `slInit` is called. When using Vulkan however, device still must be created **after** the `slInit` call, for more details please continue reading.
When shutting down, nothing much changes from the regular SL integration. Simply call `slShutdown()` **before destroying** dxgi/d3d11/d3d12/vk instances, devices and other components in your engine.
```cpp
if(SL_FAILED(result, slShutdown()))
{
// Handle error, check the logs
}
```
### 4.0 ADDING HOOKS TO YOUR ENGINE
#### 4.1 DirectX
There are two scenarios depending on whether `sl.interposer.lib` is linked directly or not with the host application:
```cpp
//! SL LIB LINKED WITH THE GAME, SL PROXIES ARE PROVIDED TO THE HOST
if(!sl::security::verifyEmbeddedSignature(PATH_TO_SL_IN_YOUR_BUILD + "/sl.interposer.dll"))
{
// SL module not signed, disable SL
}
else
{
// SL digitally signed, OK to use it!
// D3D11
//
// IMPORTANT: Note that for D3D11 there is NO proxy for a device in any scenario
ID3D11Device* nativeDevice;
[nativeDevice, proxySwapchain] = D3D11CreateDeviceAndSwapChain();
// D3D12
proxyDevice = D3D12CreateDevice();
ID3D12Device* nativeDevice{};
if(SL_FAILED(result, slGetNativeInterface(proxyDevice, &nativeDevice))
{
// Handle error, check logs
}
// Normally this is not done for D3D11 (see above)
auto proxyFactory = DXGICreateFactory();
auto proxySwapChain = proxyFactory->CreateSwapChain();
// DXGI
//
// This part is identical for D3D11 and D3D12
IDXGISwapChain* nativeSwapchain{};
if(SL_FAILED(result, slGetNativeInterface(proxySwapChain, &nativeSwapchain))
{
// Handle error, check logs
}
}
```
```cpp
//! SL LIB NOT LINKED WITH THE GAME, NATIVE INTERFACES ARE PROVIDED TO THE HOST
if(!sl::security::verifyEmbeddedSignature(PATH_TO_SL_IN_YOUR_BUILD + "/sl.interposer.dll"))
{
// SL module not signed, disable SL
}
else
{
// SL digitally signed, we can load and use it!
auto mod = LoadLibrary(PATH_TO_SL_IN_YOUR_BUILD + "/sl.interposer.dll");
// Declare all SL functions here, showing only the one we are using
SL_FUN_DECL(slUpgradeInterface);
SL_FUN_DECL(slGetNativeInterface);
// Get all SL functions here, showing only the one we are using
auto slUpgradeInterface = reinterpret_cast<PFunSlGetUpgradeInterface>(GetProcAddress(mod, "slUpgradeInterface"));
auto slGetNativeInterface = reinterpret_cast<PFunSlGetNativeInterface>(GetProcAddress(mod, "slGetNativeInterface"));
// D3D11
//
// IMPORTANT: Note that for D3D11 there is NO proxy for a device in any scenario
ID3D11Device* nativeDevice;
[nativeDevice, nativeSwapchain] = D3D11CreateDeviceAndSwapChain();
// D3D12
auto nativeDevice = D3D12CreateDevice();
auto nativeFactory = DXGICreateFactory();
ID3D12Device* proxyDevice = nativeDevice;
if(SL_FAILED(result, slUpgradeInterface(&proxyDevice))
{
// Handle error, check logs
}
//! IMPORTANT: Any create swap chain API must be intercepted as specified by `enum class FunctionHookID` in sl_hooks.h
//!
//! Therefore here we are using proxy factory and not a native one!
IDXGIFactory* proxyFactory = nativeFactory
if(SL_FAILED(result, slUpgradeInterface(&proxyFactory)))
{
// Handle error, check logs
}
//! Now we can obtain our proxy swap-chain
proxySwapChain = proxyFactory->CreateSwapChain();
//! Next we get our proxy device
ID3D12Device* proxyDevice = nativeDevice;
if(SL_FAILED(result, slUpgradeInterface(&proxyDevice)))
{
// Handle error, check logs
}
//! Native swap-chain so we can call non-intercepted API directly
IDXGISwapChain* nativeSwapchain{};
if(SL_FAILED(result, slGetNativeInterface(proxySwapChain, &nativeSwapchain)))
{
// Handle error, check logs
}
}
```
> **IMPORTANT**
> One should use native interfaces EVERYWHERE in the host application EXCEPT for the APIs which are hooked by SL (listed as enums in sl_hooks.h).
For example, here is how to call NVAPI functions from the host side:
```cpp
//! IMPORTANT: When using 3rd party libs use native interfaces NOT the SL proxies
NVAPI_D3D12_SET_CREATE_PIPELINE_STATE_OPTIONS_PARAMS state{}
NvAPI_D3D12_SetCreatePipelineStateOptions(nativeDevice, &state);
```
Here is how one would add SL hooks to the swap-chain:
```cpp
//! FULL LIST OF SL API HOOKS IN sl_hooks.h
//!
//! eIDXGISwapChain_Present,
//! eIDXGISwapChain_Present1,
//! eIDXGISwapChain_GetBuffer,
//! eIDXGISwapChain_ResizeBuffers,
//! eIDXGISwapChain_ResizeBuffers1,
//! eIDXGISwapChain_GetCurrentBackBufferIndex,
//! eIDXGISwapChain_SetFullscreenState,
//! eID3D12Device_CreateCommandQueue
//! IMPORTANT: NOT INTERCEPTED BY STREAMLINE
//!
HRESULT myrhi::SwapChain::GetFullscreenDesc(DXGI_SWAP_CHAIN_FULLSCREEN_DESC *pDesc)
{
// EXISTING ENGINE CODE
return nativeSwapchain->GetFullscreenDesc(pDesc);
}
//! IMPORTANT: INTERCEPTED BY STREAMLINE (eIDXGISwapChain_ResizeBuffers) - USING PROXY AS NEEDED
//!
HRESULT myrhi::SwapChain::ResizeBuffers(UINT BufferCount, UINT Width, UINT Height, DXGI_FORMAT NewFormat, UINT SwapChainFlags)
{
// NEW CODE
if(g_slEnabled)
{
return proxySwapchain->ResizeBuffers(BufferCount, Width, Height, NewFormat, SwapChainFlags);
}
// EXISTING ENGINE CODE
return nativeSwapchain->ResizeBuffers(BufferCount, Width, Height, NewFormat, SwapChainFlags);
}
// and so on, calls to proxies must be added for all hooks listed in sl_hooks.h
```
Here is an example for ID3D12Device:
```cpp
//! IMPORTANT: NOT INTERCEPTED BY STREAMLINE
//!
HRESULT myrhi::D3D12Device::CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE type, REFIID riid, void** ppCommandAllocator)
{
// EXISTING ENGINE CODE
return nativeDevice->CreateCommandAllocator(type, riid, ppCommandAllocator);
}
//! IMPORTANT: INTERCEPTED BY STREAMLINE (eID3D12Device_CreateCommandQueue) - USING PROXY AS NEEDED
//!
HRESULT myrhi::D3D12Device::CreateCommandQueue(const D3D12_COMMAND_QUEUE_DESC* pDesc, REFIID riid, void** ppCommandQueue)
{
// NEW CODE
if(g_slEnabled)
{
return proxyDevice->CreateCommandQueue(pDesc, riid, ppCommandQueue);
}
// EXISTING ENGINE CODE
return nativeDevice->CreateCommandQueue(pDesc, riid, ppCommandQueue);
}
// and so on, calls to proxies must be added for all hooks listed in sl_hooks.h
```
Skip Vulkan information and go to section [5.0 Informing SL about the device to use](#50-informing-sl-about-the-device-to-use)
#### 4.2 Vulkan
SL hooking in Vulkan is also rather simple. All VK functions are obtained from `vulkan-1.dll` as usual but in addition for ones listed in the `sl_hooks.h` we need to obtain proxies from `sl.interposer.dll`. Here is some sample code:
```cpp
// VK export functions from the SL interposer, we use them to get our proxies
PFN_vkGetDeviceProcAddr vkGetDeviceProcAddrProxy{}
PFN_vkGetInstanceProcAddr vkGetInstanceProcAddrProxy{}
// Always secure load SL modules
if(!sl::security::verifyEmbeddedSignature(PATH_TO_SL_IN_YOUR_BUILD + "/sl.interposer.dll"))
{
// SL module not signed, disable SL
}
else
{
auto mod = LoadLibray(PATH_TO_SL_IN_YOUR_BUILD + "/sl.interposer.dll");
//! GetProcAddr proxies
//!
//! IMPORTANT: These proxies return functions from `vulkan-1.dll` except for those intercepted by SL and listed in sl_hooks.h
auto vkGetDeviceProcAddrProxy = reinterpret_cast<PFN_vkGetDeviceProcAddr>(GetProcAddress(mod, "vkGetDeviceProcAddr"));
auto vkGetInstanceProcAddrProxy = reinterpret_cast<PFN_vkGetInstanceProcAddr>(GetProcAddress(mod, "vkGetInstanceProcAddr"));
// Get SL proxies for ALL mandatory APIs listed in the sl_hooks.h
PFN_vkCreateSwapchainKHR vkCreateSwapchainKHRProxy = reinterpret_cast<PFN_vkCreateSwapchainKHR>(vkGetDeviceProcAddrProxy(device,"vkCreateSwapchainKHR"));
PFN_vkDestroySwapchainKHR vkDestroySwapchainKHRProxy = reinterpret_cast<PFN_vkDestroySwapchainKHR>(vkGetDeviceProcAddrProxy(device,"vkDestroySwapchainKHR"));
PFN_vkGetSwapchainImagesKHR vkGetSwapchainImagesKHRProxy = reinterpret_cast<PFN_vkGetSwapchainImagesKHR>(vkGetDeviceProcAddrProxy(device,"vkGetSwapchainImagesKHR"));
PFN_vkAcquireNextImageKHR vkAcquireNextImageKHRProxy = reinterpret_cast<PFN_vkAcquireNextImageKHR>(vkGetDeviceProcAddrProxy(device,"vkAcquireNextImageKHR"));
PFN_vkQueuePresentKHR vkQueuePresentKHRProxy = reinterpret_cast<PFN_vkQueuePresentKHR>(vkGetDeviceProcAddrProxy(device,"vkQueuePresentKHR"));
// Optional but it makes integrations much easier since SL will take care of adding requires extensions, enabling features and any extra command queues
PFN_vkCreateDevice vkCreateDeviceProxy = reinterpret_cast<PFN_vkCreateDevice>(vkGetInstanceProcAddrProxy(instance,"vkCreateDevice"));
PFN_vkCreateInstance vkCreateInstanceProxy = reinterpret_cast<PFN_vkCreateInstance>(vkGetInstanceProcAddrProxy(instance,"vkCreateInstance"));
}
```
If Vulkan function is NOT listed in the `sl_hooks.h` then it is not intercepted, continue to call base Vulkan function as normal. If specific Vulkan function is intercepted please modify your code as shown below:
```cpp
//! IMPORTANT: INTERCEPTED BY SL BUT OPTIONAL
//!
//! If not using the proxy then host is responsible for setting up all extensions, features, command queues and calling slSetVulkanInfo
VkResult myrhi::vkCreateInstance(const VkInstanceCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkInstance* pInstance)
{
//! IMPORTANT: Only call proxy for the instance that should be used by SL, skip proxy for any test instance
// NEW CODE
if(g_slEnabled)
{
// Proxy obtained from SL, takes care of all SL internal requirements when creating the instance
return vkCreateInstanceProxy(pCreateInfo, pAllocator, pInstance);
}
// EXISTING ENGINE CODE
return vkCreateInstance(pCreateInfo, pAllocator, pInstance);
}
//! IMPORTANT: INTERCEPTED BY SL BUT OPTIONAL
//!
//! If not using the proxy then host is responsible for setting up all extensions, features, command queues and calling slSetVulkanInfo
VkResult myrhi::vkCreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDevice* pDevice)
{
//! IMPORTANT: Only call proxy for the instance that should be used by SL, skip proxy for any test device
// NEW CODE
if(g_slEnabled)
{
// Proxy obtained from SL, takes care of all SL internal requirements when creating the device
return vkCreateDeviceProxy(physicalDevice, pCreateInfo, pAllocator, pDevice);
}
// EXISTING ENGINE CODE
return vkCreateDevice(physicalDevice, pCreateInfo, pAllocator, pDevice);
}
//! IMPORTANT: NOT INTERCEPTED BY SL
//!
void myrhi::vkCmdExecuteCommands(VkCommandBuffer CommandBuffer, uint32_t CommandBufferCount, const VkCommandBuffer* pCommandBuffers)
{
// EXISTING ENGINE CODE
vkCmdExecuteCommands(CommandBuffer, CommandBufferCount, pCommandBuffers);
}
//! IMPORTANT: INTERCEPTED BY SL
//!
VkResult myrhi::vkQueuePresentKHR(VkQueue Queue, const VkPresentInfoKHR* PresentInfo)
{
// NEW CODE
if(g_slEnabled)
{
// Proxy obtained from SL
return vkQueuePresentKHRProxy(Queue, PresentInfo);
}
// EXISTING ENGINE CODE
return vkQueuePresentKHR(Queue, PresentInfo)
}
```
### 5.0 INFORMING SL ABOUT THE DEVICE TO USE
Since SL is no longer intercepting all D3D/DXGI/Vulkan calls, the host needs to provide the device which is going to be used to initialize all SL features.
#### 5.1 DirectX
When using D3D simply create your device and call `slSetD3DDevice`. The following code snippet demonstrates how to do this:
```cpp
// Inform SL about the device we created
if(SL_FAILED(result, slSetD3DDevice(nativeDevice))
{
// Handle error, check the logs
}
```
Skip Vulkan information and go to section [6.0 Tagging resources](#60-tagging-resources)
#### 5.2 Vulkan
**Important: If using `vkCreateDeviceProxy` and `vkCreateInstanceProxy` provided by SL you can skip this section.**
##### 5.2.1 INSTANCE AND DEVICE ADDITIONS
SL features can request special extensions, device features or even modifications to the number of command queues which need to be generated. Therefore before creating VK instance and device you must call `slGetFeatureRequirements` **for each enabled feature** to
get this information. Here is an example on how to obtain extensions, device features and additional queues needed by sl.dlss_g:
```cpp
sl::FeatureRequirements reqs{};
if (SL_FAILED(result, slGetFeatureRequirements(sl::kFeatureDLSS_G, reqs)))
{
// Feature is not requested on slInit or failed to load, check logs, handle error
}
else
{
// Feature is loaded, we can check the requirements
// Add extra queues (if any)
myConfig.extraGraphicsQueues += reqs.numGraphicsQueuesRequired;
myConfig.extraComputeQueues += reqs.numComputeQueuesRequired;
myConfig.extraOpticalFlowQueues += reqs.numOpticalFlowQueuesRequired;
// Add extra features or extensions (if any)
for (uint i = 0; i < reqs.numInstanceExtensions; i++)
{
myConfig.pluginInstanceExtensions.push_back(reqs.instanceExtensions[i]);
}
for (uint i = 0; i < reqs.numDeviceExtensions; i++)
{
myConfig.pluginDeviceExtensions.push_back(reqs.deviceExtensions[i]);
}
// Use helpers from sl_helpers_vk.h
VkPhysicalDeviceVulkan12Features features12 = sl::getVkPhysicalDeviceVulkan12Features(reqs.numFeatures12, reqs.features12);
VkPhysicalDeviceVulkan13Features features13 = sl::getVkPhysicalDeviceVulkan13Features(reqs.numFeatures13, reqs.features13);
}
```
Now that you have the information about the additional extensions, features and queues required by SL feature(s) you can proceed to create Vulkan instance and device. For more details please check out the [implementation on GitHub](https://github.com/NVIDIAGameWorks/Streamline/blob/main/source/core/sl.interposer/vulkan/wrapper.cpp#L234).
Note that Vulkan supports optical flow feature extension from Nvidia natively, as required by DLSS-G, starting with VK_API_VERSION_1_1 (recommended version is VK_API_VERSION_1_3) and minimum Nvidia driver version 527.64 on Windows and 525.72 on Linux. Vulkan SDK version 1.3.231.0 supports validation layer for this extension.
Native optical flow feature in Vulkan requires its own optical flow queue whose family is exclusive to graphics, compute and copy queue families and whose info is required to be passed during Vulkan device creation and has certain requirements for its use in DLSS-G:
1. Native optical flow queue family cannot be the same as that of any of the other queues of its client.
2. Its queue should be the very first one of the very first native optical flow-capable family resulting in the required queue index is 0.
For more details, please check out the [helper getOpticalFlowQueueInfo on GitHub](https://github.com/NVIDIAGameWorks/Streamline/blob/main/source/platforms/sl.chi/vulkan.cpp) to retrieve the same as well as the implementation link referred above.
In the absence of this setup in manual hooking mode, DLSS-G runs optical flow in an interop mode.
##### 5.2.2 PROVIDING INSTANCE, DEVICE AND OTHER INFORMATION TO SL
Once that is done you need to inform SL about the devices, instance and queues using the following code:
```cpp
sl::VulkanInfo info{};
info.device = myVKDevice;
info.instance = myVKInstance;
info.physicalDevice = myVKPhysicalDevice;
info.computeQueueIndex = computeQueueIndexStartForSL; // Where first SL queue starts after host's queues
info.computeQueueFamily = myComputeQueueFamily;
info.graphicsQueueIndex = graphicsQueueIndexStartForSL; // Where first SL queue starts after host's queues
info.graphicsQueueFamily = myGraphicsQueueFamily;
// Inform SL about the VK devices, instances etc
if(SL_FAILED(result, slSetVulkanInfo(info)))
{
// Handle error, check the logs
}
```
### 6.0 TAGGING RESOURCES
Since SL is no longer able to track resource creation and their states when using manual hooking mechanism, it is **mandatory for the host to provide resource information** when tagging them.
#### 6.1 DirectX
When using D3D tagging is similar to the regular SL integrations with the exception of having to provide the correct resource state. Here is an example:
```cpp
// Host providing native D3D12 resource state
//
// IMPORTANT: State needs to be correct when tagged resource is used by SL and not necessarily at this point when it is tagged.
//
sl::Resource mvec = { sl::ResourceType::Tex2d, mvecResource, nullptr, nullptr, D3D12_RESOURCE_STATE_RENDER_TARGET, nullptr };
sl::ResourceTag mvecTag = sl::ResourceTag {&mvec, sl::kBufferTypeMvec, sl::ResourceLifecycle::eOnlyValidNow, &mvecExtent };
slSetTag(viewport, &mvecTag, 1, cmdList);
```
#### 6.2 Vulkan
When using Vulkan tagging is a bit more involved since host needs to provide additional information about each tagged resources. Here is an example:
```cpp
// Host providing native resource state and additional information about the resource
//
// IMPORTANT: Image layout needs to be correct when tagged resource is used by SL and not necessarily at this point when it is tagged.
//
// Vulkan does not provide a way to obtain VkImage description so host must provide one.
sl::Resource mvec = { sl::ResourceType::Tex2d, mvecImage, nullptr, mvecImageView, VK_IMAGE_LAYOUT_GENERAL , 1920, 1080, VK_FORMAT_R32G32_SFLOAT, 1, 1, 0, 0, VK_IMAGE_USAGE_STORAGE_BIT };
sl::ResourceTag mvecTag = sl::ResourceTag {&mvec, sl::kBufferTypeMvec, sl::ResourceLifecycle::eOnlyValidNow, &mvecExtent };
slSetTag(viewport, &mvecTag, 1, cmdList);
```
> **IMPORTANT:**
> Failure to provide correct resource state will cause your application to generate D3D/Vulkan validation errors and potential bugs when resources are used by SL features.
### 7.0 RESTORING COMMAND LIST(BUFFER) STATE
When manual hooking is used the host application is no longer using an SL proxy for the command lists (CL), hence it is not possible for SL to restore the CL state after each `slEvaluateFeature` call.
#### 7.1 DirectX
Here is the code snippet showing what command list states are restored by SL in the regular integration mode, host application should do the same:
```cpp
void restorePipeline(sl::interposer::D3D12GraphicsCommandList* cmdList)
{
if (cmdList->m_numHeaps > 0)
{
cmdList->SetDescriptorHeaps(cmdList->m_numHeaps, cmdList->m_heaps);
}
if (cmdList->m_rootSignature)
{
cmdList->SetComputeRootSignature(cmdList->m_rootSignature);
for (auto& pair : cmdList->m_mapHandles)
{
cmdList->SetComputeRootDescriptorTable(pair.first, pair.second);
}
for (auto& pair : cmdList->m_mapCBV)
{
cmdList->SetComputeRootConstantBufferView(pair.first, pair.second);
}
for (auto& pair : cmdList->m_mapSRV)
{
cmdList->SetComputeRootShaderResourceView(pair.first, pair.second);
}
for (auto& pair : cmdList->m_mapUAV)
{
cmdList->SetComputeRootUnorderedAccessView(pair.first, pair.second);
}
for (auto& pair : cmdList->m_mapConstants)
{
cmdList->SetComputeRoot32BitConstants(pair.first, pair.second.Num32BitValuesToSet, pair.second.SrcData, pair.second.DestOffsetIn32BitValues);
}
}
if (cmdList->m_pso)
{
cmdList->SetPipelineState(cmdList->m_pso);
}
if (cmdList->m_so)
{
static_cast<ID3D12GraphicsCommandList4*>(cmdList)->SetPipelineState1(cmdList->m_so);
}
}
```
#### 7.2 Vulkan
Here is the code snippet showing what command buffer states are restored by SL in the regular integration mode, host application should do the same:
```cpp
void restorePipeline(VkCommandBuffer cmdBuffer)
{
VulkanThreadContext* thread = (VulkanThreadContext*)m_getThreadContext();
if (thread->PipelineBindPoint != VK_PIPELINE_BIND_POINT_MAX_ENUM)
{
vkCmdBindPipeline(cmdBuffer, thread->PipelineBindPoint, thread->Pipeline);
}
if (thread->PipelineBindPointDesc != VK_PIPELINE_BIND_POINT_MAX_ENUM)
{
vkCmdBindDescriptorSets(cmdBuffer, thread->PipelineBindPointDesc, thread->Layout, thread->FirstSet, thread->DescriptorCount, thread->DescriptorSets, thread->DynamicOffsetCount, thread->DynamicOffsets);
}
return ComputeStatus::eOk;
}
```
> **IMPORTANT:**
> Failure to restore command list(buffer) state correctly will cause your application to crash or misbehave in some other form.

View File

@ -0,0 +1,198 @@
Streamline - NIS
=======================
>The focus of this guide is on using Streamline to integrate the NVIDIA Image Scaling (NIS) SDK into an application. For more information about NIS itself, please visit the [NVIDIA Image Scaling SDK Github Page](https://github.com/NVIDIAGameWorks/NVIDIAImageScaling)
>For information on user interface considerations when using the NIS plugin, please see the ["RTX UI Developer Guidelines.pdf"](<RTX UI Developer Guidelines.pdf>) document included with this SDK.
Version 2.7.3
=======
### Introduction
The NVIDIA Image Scaling SDK (NIS) provides a single spatial scaling and sharpening algorithm for cross-platform support. The scaling algorithm uses a 6-tap scaling filter combined with 4 directional scaling and adaptive sharpening filters, which creates nice smooth images and sharp edges. In addition, the SDK provides a state-of-the-art adaptive directional sharpening algorithm for use in applications where no scaling is required. By integrating both NVIDIA Image Scaling and NVIDIA DLSS, developers can get the best of both worlds: NVIDIA DLSS for the best image quality, and NVIDIA Image Scaling for cross-platform support.
The directional scaling and sharpening algorithm are combined together in NVScaler while NVSharpen only implements the adaptive-directional-sharpening algorithm. Both algorithms are provided as compute shaders and developers are free to integrate them in their applications. Note that if you integrate NVScaler, you should NOT also integrate NVSharpen, as NVScaler already includes a sharpening pass.
For more information on the NVIDIA Image scaling SDK visit https://github.com/NVIDIAGameWorks/NVIDIAImageScaling
### 1.0 INITIALIZE AND SHUTDOWN
Call `slInit` as early as possible (before any dxgi/d3d11/d3d12 APIs are invoked)
```cpp
#include <sl.h>
#include <sl_consts.h>
#include <sl_nis.h>
sl::Preferences pref{};
pref.showConsole = true; // for debugging, set to false in production
pref.logLevel = sl::eLogLevelDefault;
pref.pathsToPlugins = {}; // change this if Streamline plugins are not located next to the executable
pref.numPathsToPlugins = 0; // change this if Streamline plugins are not located next to the executable
pref.pathToLogsAndData = {}; // change this to enable logging to a file
pref.logMessageCallback = myLogMessageCallback; // highly recommended to track warning/error messages in your callback
pref.applicationId = myId; // Provided by NVDA, required if using NGX components (DLSS 2/3)
pref.engineType = myEngine; // If using UE or Unity
pref.engineVersion = myEngineVersion; // Optional version
pref.projectId = myProjectId; // Optional project id
if(SL_FAILED(res, slInit(pref)))
{
// Handle error, check the logs
if(res == sl::Result::eErrorDriverOutOfDate) { /* inform user */}
// and so on ...
}
```
For more details please see [preferences](ProgrammingGuide.md#222-preferences)
Call `slShutdown()` before destroying dxgi/d3d11/d3d12/vk instances, devices and other components in your engine.
```cpp
if(SL_FAILED(res, slShutdown()))
{
// Handle error, check the logs
}
```
#### 1.1 SET THE CORRECT DEVICE
Once the main device is created call `slSetD3DDevice` or `slSetVulkanInfo`:
```cpp
if(SL_FAILED(res, slSetD3DDevice(nativeD3DDevice)))
{
// Handle error, check the logs
}
```
### 2.0 CHECK IF NIS IS SUPPORTED
As soon as SL is initialized, you can check if NIS is available for the specific adapter you want to use:
```cpp
Microsoft::WRL::ComPtr<IDXGIFactory> factory;
if (SUCCEEDED(CreateDXGIFactory(__uuidof(IDXGIFactory), (void**)&factory)))
{
Microsoft::WRL::ComPtr<IDXGIAdapter> adapter{};
uint32_t i = 0;
while (factory->EnumAdapters(i, &adapter) != DXGI_ERROR_NOT_FOUND)
{
DXGI_ADAPTER_DESC desc{};
if (SUCCEEDED(adapter->GetDesc(&desc)))
{
sl::AdapterInfo adapterInfo{};
adapterInfo.deviceLUID = (uint8_t*)&desc.AdapterLuid;
adapterInfo.deviceLUIDSizeInBytes = sizeof(LUID);
if (SL_FAILED(result, slIsFeatureSupported(sl::kFeatureNIS, adapterInfo)))
{
// Requested feature is not supported on the system, fallback to the default method
switch (result)
{
case sl::Result::eErrorOSOutOfDate: // inform user to update OS
case sl::Result::eErrorDriverOutOfDate: // inform user to update driver
case sl::Result::eErrorNoSupportedAdapter: // cannot use this adapter (older or non-NVDA GPU etc)
// and so on ...
};
}
else
{
// Feature is supported on this adapter!
}
}
i++;
}
}
```
### 3.0 TAG ALL REQUIRED RESOURCES
NIS requires render-res input color after TAA and final-res output color buffers. We can tag resources list this:
```cpp
// Showing two scenarios, depending if resources are immutable or volatile
// IMPORTANT: Make sure to mark resources which can be deleted or reused for other purposes within a frame as volatile
// FIRST SCENARIO
sl::Resource colorIn = sl::Resource{ sl::ResourceType::eTex2d, myNativeObject, nullptr, nullptr, myInitialState};
sl::Resource colorOut = sl::Resource{ sl::ResourceType::eTex2d, myNativeObject, nullptr, nullptr, myInitialState};
// Marked both resources as volatile since they can change
sl::ResourceTag colorInTag = sl::ResourceTag {&colorIn, sl::kBufferTypeScalingInputColor, sl::ResourceLifecycle::eOnlyValidNow, &myExtent };
sl::ResourceTag colorOutTag = sl::ResourceTag {&colorOut, sl::kBufferTypeScalingOutputColor, sl::ResourceLifecycle::eOnlyValidNow, &myExtent };
// Resources must be valid at this point and valid command list must be provided since resources are volatile
sl::Resource inputs[] = {colorInTag, colorOutTag};
slSetTag(viewport, inputs, _countof(inputs), cmdList);
// SECOND SCENARIO
// Marked both resources as immutable
sl::ResourceTag colorInTag = sl::ResourceTag {&colorIn, sl::kBufferTypeScalingInputColor, sl::ResourceLifecycle::eValidUntilPresent, &myExtent };
sl::ResourceTag colorOutTag = sl::ResourceTag {&colorOut, sl::kBufferTypeScalingOutputColor, sl::ResourceLifecycle::eValidUntilPresent, &myExtent };
// Resources are immutable so they are valid all the time, no need to provide command list since no copies need to be made
std::vector<sl::Resource> inputs = {colorInTag, colorOutTag};
slSetTag(viewport, inputs, _countof(inputs), cmdList);
```
> **IMPORTANT**
> When using Vulkan additional information about the resource must be provided (width, height, format, image view etc). See `sl::Resource` for details.
### 4.0 PROVIDE NIS OPTIONS
NIS options must be set so that the NIS plugin can track any changes made by the user:
```cpp
// Using helpers from sl_nis.h
sl::NISOptions nisOptions{};
nisOptions.mode = NISMode::eNISModeScaler; // use upscaling algorithm or use eNISModeSharpen for sharpening only
nisOptions.hdrMode = NISHDR::eNISHDRNone; // No HDR mode;
// These can be populated based on user selection in the UI
nisOptions.sharpness = myUI->getSharpness();
if(SL_FAILED(result, slNISSetOptions(viewport, nisOptions)))
{
// Handle error here, check the logs
}
```
> **NOTE:**
> To use NIS sharpening only mode (with no up-scaling) set `sl::NISOptions.mode` to `sl::NISMode::eSharpen`
> **NOTE:**
> To turn off NIS set `sl::NISOptions.mode` to `sl::NISMode::eNISModeOff`or simply stop calling `slEvaluateFeature`, note that this does NOT release any resources, for that please use `slFreeResources`
### 5.0 ADD NIS TO THE RENDERING PIPELINE
On your rendering thread, call `slEvaluateFeature` at the appropriate location where up-scaling is happening. Please note that `myViewport` used in `slEvaluateFeature` must match the one used when setting NIS options and tags (unless options and tags are provided as part of evaluate inputs)
```cpp
// Make sure NIS is available and user selected this option in the UI
if(useNIS)
{
// NOTE: We can provide all inputs here or separately using slSetTag or slNISSetOptions
// Inform SL that NIS should be injected at this point for the specific viewport
const sl::BaseStructure* inputs[] = {&myViewport};
if(SL_FAILED(result, slEvaluateFeature(sl::kFeatureNIS, *frameToken, inputs, _countof(inputs), myCmdList)))
{
// Handle error
}
else
{
// IMPORTANT: Host is responsible for restoring state on the command list used
restoreState(myCmdList);
}
}
else
{
// Default up-scaling pass like for example TAAU goes here
}
```
> **IMPORTANT:**
> Plase note that **host is responsible for restoring the command buffer(list) state** after calling `slEvaluateFeature`. For more details on which states are affected please see [restore pipeline section](./ProgrammingGuideManualHooking.md#70-restoring-command-listbuffer-state)

View File

@ -0,0 +1,191 @@
Streamline - PCL Stats
=======================
>The focus of this guide is on using Streamline to integrate PCL (PC Latency) Stats into an application.
Version 2.7.3
=======
The application should not explicitly check for GPU HW, vendor, and driver version.
PCL Stats enables real time measurement of per-frame PC latency (PCL) during gameplay. E2E system latency = PCL + peripheral latency + display latency. And PCL = input sampling latency + simulation start to present + present to displayed.
Windows messages with ID == `sl::PclState::statsWindowMessage` are sent to the application periodically. The time this ping message is sent is recorded and is used to measure the latency between the simulated input and the application picking up the input. This is the input sampling latency.
Typically, the application's message pump would process keyboard and mouse input messages into a queue to be read in the upcoming frame. On seeing the ping message, the application must either call `slPclSetMarker` or `sl::PclMarker::ePCLatencyPing` to send the ping marker right away, or put it in the queue, or set a flag for the next simulation to send the ping marker.
> **NOTE:**
> The exact timing of the ping marker does not matter. It is the frame index parameter that identify which frame picks up the simulated input. For example, since the frame index is usually incremented at simulation start, in the case of the ping marker being sent at message pump before simulation start, use current frame index +1.
### 1.0 INITIALIZE AND SHUTDOWN
Call `slInit` as early as possible (before any dxgi/d3d11/d3d12 APIs are invoked). Similar to sl.common, PCL is loaded by default and doesn't need to be explicitly requested via `sl::Preferences::featuresToLoad` (as required for other plugins).
```cpp
#include <sl.h>
#include <sl_pcl.h>
sl::Preferences pref{};
pref.showConsole = true; // for debugging, set to false in production
pref.logLevel = sl::eLogLevelDefault;
pref.pathsToPlugins = {}; // change this if Streamline plugins are not located next to the executable
pref.numPathsToPlugins = 0; // change this if Streamline plugins are not located next to the executable
pref.pathToLogsAndData = {}; // change this to enable logging to a file
pref.logMessageCallback = myLogMessageCallback; // highly recommended to track warning/error messages in your callback
pref.applicationId = myId; // Provided by NVDA, required if using NGX components (DLSS 2/3)
pref.engineType = myEngine; // If using UE or Unity
pref.engineVersion = myEngineVersion; // Optional version
pref.projectId = myProjectId; // Optional project id
if(SL_FAILED(res, slInit(pref)))
{
// Handle error, check the logs
if(res == sl::Result::eErrorDriverOutOfDate) { /* inform user */}
// and so on ...
}
```
For more details please see [preferences](ProgrammingGuide.md#222-preferences)
Call `slShutdown()` before destroying dxgi/d3d11/d3d12/vk instances, devices and other components in your engine.
```cpp
if(SL_FAILED(res, slShutdown()))
{
// Handle error, check the logs
}
```
#### 1.1 SET THE CORRECT DEVICE
Once the main device is created call `slSetD3DDevice` or `slSetVulkanInfo`:
```cpp
if(SL_FAILED(res, slSetD3DDevice(nativeD3DDevice)))
{
// Handle error, check the logs
}
```
### 2.0 CHECK IF PCL STATS IS SUPPORTED
As soon as SL is initialized, you can check if PCL Stats is available for the specific adapter you want to use:
```cpp
Microsoft::WRL::ComPtr<IDXGIFactory> factory;
if (SUCCEEDED(CreateDXGIFactory(__uuidof(IDXGIFactory), (void**)&factory)))
{
Microsoft::WRL::ComPtr<IDXGIAdapter> adapter{};
uint32_t i = 0;
while (factory->EnumAdapters(i, &adapter) != DXGI_ERROR_NOT_FOUND)
{
DXGI_ADAPTER_DESC desc{};
if (SUCCEEDED(adapter->GetDesc(&desc)))
{
sl::AdapterInfo adapterInfo{};
adapterInfo.deviceLUID = (uint8_t*)&desc.AdapterLuid;
adapterInfo.deviceLUIDSizeInBytes = sizeof(LUID);
if (SL_FAILED(result, slIsFeatureSupported(sl::kFeaturePCL, adapterInfo)))
{
// Requested feature is not supported on the system, fallback to the default method
switch (result)
{
case sl::Result::eErrorOSOutOfDate: // inform user to update OS
case sl::Result::eErrorDriverOutOfDate: // inform user to update driver
case sl::Result::eErrorNoSupportedAdapter: // cannot use this adapter (older or non-NVDA GPU etc)
// and so on ...
};
}
else
{
// Feature is supported on this adapter!
}
}
i++;
}
}
```
> **IMPORTANT:**
> **PCL Stats is supported on all GPU hardwares, vendors, and driver versions**. As long as `sl::kFeaturePCL` is supported, the application should always make the same `slPCLSetMarker` calls without any explicit checks for user enablement, or GPU hardware/vendor/driver version.
### 3.0 ADD SL PCL STATS TO THE RENDERING PIPELINE
Call `slPclSetMarker` at the appropriate locations where markers need to be injected:
```cpp
bool isPCLSupported = slIsFeatureSupported(sl::kFeaturePCL, adapterInfo);
if (!isPCLSupported)
{
return;
}
// Using helpers from sl_pcl.h
// Mark the section where specific activity is happening.
//
// Here for example we will make the simulation code.
//
// Starting new frame, grab handle from SL
sl::FrameToken* currentFrame{};
if(SL_FAILED(res, slGetNewFrameToken(&currentFrame))
{
// Handle error
}
// Simulation start
if(SL_FAILED(res, slPclSetMarker(sl::PclMarker::eSimulationStart, *currentFrame)))
{
// Handle error
}
// Simulation code goes here
// Simulation end
if(SL_FAILED(res, slPclSetMarker(sl::PclMarker::eSimulationEnd, *currentFrame)))
{
// Handle error
}
// When checking for custom low latency messages inside the Windows message loop
//
if(pclState.statsWindowMessage == msgId)
{
// PCL ping based on custom message
// First scenario, using current frame
if(SL_FAILED(res, slPclSetMarker(sl::PclMarker::ePCLatencyPing, *currentFrame)))
{
// Handle error
}
// Second scenario, sending ping BEFORE simulation started, need to advance to the next frame
sl::FrameToken* nextFrame{};
auto nextIndex = *currentFrame + 1;
if(SL_FAILED(res, slGetNewFrameToken(&nextFrame, &nextIndex))
{
// Handle error
}
if(SL_FAILED(res, slPclSetMarker(sl::PclMarker::ePCLatencyPing, *nextFrame)))
{
// Handle error
}
}
```
### 4.0 MIGRATING FROM SL REFLEX
PCL markers were part of SL Reflex in earlier versions of SL. If migrating from one of those releases, the following changes will be required:
- Make sure you copy the new `sl.pcl.dll` to your build (similar to `sl.common.dll`)
- PCL has its own `slIsFeatureSupported(sl::kFeaturePCL, ...)` distinct from `sl::kFeatureReflex` (which is still required if using SL Reflex)
- `slReflexSetMarker()` helper in `sl_reflex.h` is replaced with `slPCLSetMarker()` in sl_pcl.h
- Markers in `sl::ReflexMarker` enum are now in `sl::PCLMarker` enum *class*
- Markers are no longer in top-level `sl::` namespace, use `sl::PCLMarker::`
- If you were using the implicit cast to `uint32_t`, it will now need to be explicit
- `eInputSample` marker was removed, this was already deprecated and removed in the native Reflex SDK but was never propagated to SL

View File

@ -0,0 +1,383 @@
Streamline - Reflex
=======================
>The focus of this guide is on using Streamline to integrate Reflex into an application. For more information about Reflex itself, please visit the [NVIDIA Developer Reflex Page][2].
>For information on user interface considerations when using this plugin, please see the ["RTX UI Developer Guidelines.pdf"][1] document included with this SDK.
Version 2.7.3
=======
Here is an overview list of sub-features in the Reflex plugin:
| Feature | GPU Vendor | GPU HW | Driver Version | Support Check | Key Setting/Marker |
| ------ | ------ | ------ | ------ | ------ | ------ |
| **Reflex Low Latency** | NVDA only | GeForce 900 Series and newer | 456.38+ | `sl::ReflexState::lowLatencyAvailable` | `sl::ReflexOptions::mode` |
| **Frame Rate Limiter** | NVDA only | All | All | Always | `sl::ReflexOptions::frameLimitUs` |
Additionally, [PCL Stats](ProgrammingGuidePCL.md):
| Feature | GPU Vendor | GPU HW | Driver Version | Support Check | Key Setting/Marker |
| ------ | ------ | ------ | ------ | ------ | ------ |
| **PC Latency Stats** | All | All | All | Always | `sl::PclMarker::ePCLatencyPing` |
> **NOTE:**
> The sub-features are distinct to each other without any cross-dependencies. Everything is abstracted within the plugin and is transparent to the application. The application should not explicitly check for GPU HW, vendor, and driver version. The application should do everything the same regardless of sub-feature support and enablement. The only exception is Reflex UI; the application must disable Reflex UI based on `sl::ReflexState::lowLatencyAvailable`.
### 1.0 INITIALIZE AND SHUTDOWN
Call `slInit` as early as possible (before any dxgi/d3d11/d3d12 APIs are invoked)
```cpp
sl::Preferences pref{};
pref.showConsole = true; // for debugging, set to false in production
pref.logLevel = sl::LogLevel::eDefault;
pref.pathsToPlugins = {}; // change this if Streamline plugins are not located next to the executable
pref.numPathsToPlugins = 0; // change this if Streamline plugins are not located next to the executable
pref.pathToLogsAndData = {}; // change this to enable logging to a file
pref.logMessageCallback = myLogMessageCallback; // highly recommended to track warning/error messages in your callback
pref.applicationId = myId; // Provided by NVDA, required if using NGX components (DLSS 2/3)
pref.engine = myEngine; // If using UE or Unity
pref.engineVersion = myEngineVersion; // Optional version
pref.projectId = myProjectId; // Optional project id
if(SL_FAILED(res, slInit(pref)))
{
// Handle error, check the logs
if(res == sl::Result::eErrorDriverOutOfDate) { /* inform user */}
// and so on ...
}
```
For more details please see [preferences](ProgrammingGuide.md#222-preferences)
Call `slShutdown()` before destroying dxgi/d3d11/d3d12/vk instances, devices and other components in your engine.
```cpp
if(SL_FAILED(res, slShutdown()))
{
// Handle error, check the logs
}
```
#### 1.1 SET THE CORRECT DEVICE
Once the main device is created call `slSetD3DDevice` or `slSetVulkanInfo`:
```cpp
if(SL_FAILED(res, slSetD3DDevice(nativeD3DDevice)))
{
// Handle error, check the logs
}
```
### 2.0 CHECK IF REFLEX IS SUPPORTED
As soon as SL is initialized, you can check if Reflex is available for the specific adapter you want to use:
```cpp
Microsoft::WRL::ComPtr<IDXGIFactory> factory;
if (SUCCEEDED(CreateDXGIFactory(__uuidof(IDXGIFactory), (void**)&factory)))
{
Microsoft::WRL::ComPtr<IDXGIAdapter> adapter{};
uint32_t i = 0;
while (factory->EnumAdapters(i, &adapter) != DXGI_ERROR_NOT_FOUND)
{
DXGI_ADAPTER_DESC desc{};
if (SUCCEEDED(adapter->GetDesc(&desc)))
{
sl::AdapterInfo adapterInfo{};
adapterInfo.deviceLUID = (uint8_t*)&desc.AdapterLuid;
adapterInfo.deviceLUIDSizeInBytes = sizeof(LUID);
if (SL_FAILED(result, slIsFeatureSupported(sl::kFeatureReflex, adapterInfo)))
{
// Requested feature is not supported on the system, fallback to the default method
switch (result)
{
case sl::Result::eErrorOSOutOfDate: // inform user to update OS
case sl::Result::eErrorDriverOutOfDate: // inform user to update driver
case sl::Result::eErrorNoSupportedAdapter: // cannot use this adapter (older or non-NVDA GPU etc)
// and so on ...
};
}
else
{
// Feature is supported on this adapter!
}
}
i++;
}
}
```
> **IMPORTANT:**
> Reflex involves two distinct plugins: Reflex Low Latency and PC Latency (PCL) Stats. While the support for Reflex Low Latency is dependent on GPU hardware, vendor, and driver version, **PCL Stats is supported on all GPU hardwares, vendors, and driver versions**. Both plugins handle all such abstractions; as long as `sl::kFeatureReflex` and `sl::kFeaturePCL` are supported, the application should always call `slReflexSleep` and `slPCLSetMarker`, respectively (without any explicit checks for user enablement, or GPU hardware/vendor/driver version).
### 3.0 CHECK REFLEX STATE AND CAPABILITIES
To find out what latency modes are available, currently active or to obtain the latest latency stats you can do the following:
```cpp
// Using helpers from sl_reflex.h
sl::ReflexState state{};
if(SL_FAILED(res, slReflexGetState(state)))
{
// Handle error here, check the logs
}
if(state.lowLatencyAvailable)
{
//
// Reflex Low Latency is available, on NVDA hardware this would be done through Reflex.
//
// The application can show the Reflex Low Latency UI. (Otherwise hide/disable the UI.)
// This is for UI only. Do everything else the same, even when this is false.
//
}
if(state.flashIndicatorDriverControlled)
{
//
// Reflex Flash Indicator (RFI) is controlled by the driver. This means
// the application should always check for left mouse button clicks and
// send the trigger flash markers accordingly. The driver will decide
// whether to show the RFI on screen based on user preference.
//
}
```
### 4.0 SET REFLEX OPTIONS
To configure Reflex please do the following:
```cpp
// Using helpers from sl_reflex.h
sl::ReflexOptions reflexOptions = {};
reflexOptions.mode = eLowLatency; // enable Reflex Low Latency mode, or eOff to disable, eLowLatencyWithBoost for "On + Boost"
reflexOptions.frameLimitUs = myFrameLimit; // See docs for ReflexOptions
if(SL_FAILED(res, slReflexSetOptions(reflexOptions)))
{
// Handle error here, check the logs
}
```
> **NOTE:**
> To turn off Reflex set `sl::ReflexOptions.mode` to `sl::ReflexMode::eOff`. `slReflexSleep` and `slPCLSetMarker` must always be called even when Reflex Low Latency mode is Off. PCL markers are also used by PCL Stats to measure latency.
> **NOTE:**
> For Reflex "On + Boost" set `sl::ReflexOptions.mode` to `sl::ReflexMode::eLowLatencyWithBoost`.
> **NOTE:**
> `slReflexSetOptions` needs to be called at least once, even when Reflex Low Latency is Off and there is no Reflex UI. If options do not change there is no need to call this method every frame.
### 5.0 ADD SL REFLEX TO THE RENDERING PIPELINE
Call `slReflexSleep` at the appropriate location where your application should sleep.
Here is some pseudo code:
```cpp
bool isReflexSupported = slIsFeatureSupported(sl::kFeatureReflex, adapterInfo);
if (!isReflexSupported)
{
return;
}
// Starting new frame, grab handle from SL
sl::FrameToken* currentFrame{};
if(SL_FAILED(res, slGetNewFrameToken(&currentFrame))
{
// Handle error
}
// Using helpers from sl_reflex.h
// When your application should sleep to achieve optimal low-latency mode
//
if(SL_FAILED(res, slReflexSleep(*currentFrame)))
{
// Handle error
}
```
> **NOTE:**
> See [Reflex SDK Integration Guide][2] for details on `slReflexSleep` placement
### 6.0 PCL STATS
Reflex requires you also integrate [PCL Stats](ProgrammingGuidePCL.md)
### 7.0 HOW TO TRANSITION FROM NVAPI REFLEX TO SL REFLEX
Existing Reflex integrations can be easily converted to use SL Reflex by following these steps:
* Remove NVAPI from your application
* Remove `reflexstats.h` from your application
* There is no longer any need to provide a native D3D/VK device when making Reflex calls - SL takes care of that, hence making the integrations easier
* `NvAPI_D3D_SetSleepMode` is replaced with [set reflex options](#40-set-reflex-options)
* `NvAPI_D3D_GetSleepStatus` is replaced with [get reflex state](#30-check-reflex-state-and-capabilities) - see `sl::ReflexState::lowLatencyAvailable`
* `NvAPI_D3D_GetLatency` is replaced with [get reflex state](#30-check-reflex-state-and-capabilities) - see `sl::ReflexReport`
* `NvAPI_D3D_SetLatencyMarker` is replaced with `slPCLSetMarker`
* `NvAPI_D3D_Sleep` is replaced with `slReflexSleep`
* `NVSTATS*` calls are handled automatically by SL Reflex plugin and are GPU agnostic
* `NVSTATS_IS_PING_MSG_ID` is replaced with [get reflex state](#30-check-reflex-state-and-capabilities) - see `sl::ReflexOptions::statsWindowMessage`
### 8.0 NVIDIA REFLEX QA CHECKLIST
**Checklist**
Please use this checklist to confirm that your Reflex integration has been completed successfully. While this list is not comprehensive and does not replace rigorous testing, it should help to identify obvious issues.
Checklist Item | Pass/Fail
---|---
Reflex Low Latencys default state is On |
All 3 Reflex modes (Off, On, On + Boost) function correctly |
PC Latency (PCL) in the Reflex Test Utility is not 0.0 |
Reflex Test Utility Report passed without warning with DLSS Frame Generation |
Reflex Test Utility Report passed without warning with Reflex On |
Reflex does not significantly impact FPS (more than 4%) when Reflex is On <br> (On + Boost is expect to have some FPS hit for lowest latency) |
Reflex Test Utility Report passed without warning with Reflex On + Boost |
PCL Markers are always sent regardless of Reflex Low Latency mode state |
`slReflexSleep` is called regardless of Reflex Low Latency mode state |
Reflex Flash Indicator appears when left mouse button is pressed |
Reflex UI settings are following the [RTX UI Guidelines][1] |
Keybinding menus work properly (no F13) |
PC Latency (PCL) is not 0.0 on other IHV Hardware |
Reflex UI settings are disabled or not available on other IHV Hardware |
**Steps**
1. Locate Reflex Verification tools in `utils/reflex/`
2. Install FrameView SDK
* Double click the FrameView SDK Installer (`FVSDKSetup.exe`)
* Restart the system
3. Run `ReflexTestSetup.bat` from an administrator mode command prompt
* This will force the Reflex Flash Indicator to enable, enable the Verification HUD, set up the Reflex Test framework, and start ReflexTest.exe.
4. Check Reflex Low Latency modes
1. Run game
* Make sure game is running in fullscreen exclusive
* Make sure VSYNC is disabled
* Make sure MSHybrid mode is not enabled
2. Check Reflex Low Latencys default state is On in UI and in the Verification HUD
* Use the Reset / Default button in UI if it exists
3. Cycle through the three (3) Reflex modes in UI ("Off", "On", and "On + Boost") and check that it matches with “Reflex Mode” in the Verification HUD
5. Running Reflex Tests
* For titles with DLSS Frame Generation (FG), turn FG to On
1. Press `Alt + t` in game to start the test (2 beeps)
2. Analyze results after the test is done (3 beeps)
* Test should take approximately 5 minutes
* Check for warnings in the ReflexTest.exe output
* Turn DLSS FG to Off, Reflex to On
1. Press `Alt + t` in game to start the test (2 beeps)
2. Analyze results after the test is done (3 beeps)
* Test should take approximately 5 minutes
* Check for warnings in the ReflexTest.exe output
* Turn Reflex to On + Boost
1. Press `Alt + t` in game to start the test (2 beeps)
2. Analyze results after the test is done (3 beeps)
* Test should take approximately 5 minutes
* Check for warnings in the ReflexTest.exe output
* Press `Ctrl + c` in the command prompt to exit ReflexTest.exe
6. Test that Reflex/PCL calls are made even when Reflex is Off
1. Turn Reflex to **On**
2. Look at the Verification HUD to ensure:
* `App_Called_Sleep = 1`
3. Turn Reflex to **On + Boost**
4. Look at the Verification HUD to ensure:
* `App_Called_Sleep = 1`
5. Turn Reflex to **Off**
6. Look at the Verification HUD to ensure:
* `App_Called_Sleep = 1` (it should remain `1`)
* Markers/timestamps are still updating
7. Test the Reflex Flash Indicator
1. Verify the Reflex Flash Indicator is showing
* Notice the gray square that flashes when the left mouse button is pressed
* Verify that Flash Indicator in the Verification HUD increments by 1 when the left mouse button is pressed
8. Check UI
1. Verify UI follows the [RTX UI Guidelines][1]
9. Check keybinding
1. Run `capturePclEtw.bat` in administrator mode command prompt
2. Go back to the game. Start game play
3. Check the Keybinding menu to make sure F13 is not being automatically applied when selecting a key
4. Go back to the command prompt and press any key to exit the bat
10. Run `ReflexTestCleanUp.bat` in administrator mode command prompt
* This disables the Reflex Flash Indicator and the Reflex Test framework
11. Test on other IHV (if available)
1. Install other IHV hardware
2. Install FrameView SDK and restart the system
3. Run `PrintPCL.exe` in administrator mode command prompt
4. Run game
5. Press `Alt + t` in game
* Look at the PCL value in the command prompt. If the value is not 0.0, then PCL is working
6. Press `Ctrl + c` in the command prompt to exit PrintPCL.exe
7. Check to make sure Reflex UI is not available
12. Send NVIDIA Reflex Test report and Checklist results
* Send to NVIDIA alias: reflex-sdk-support@nvidia.com
**ReflexTestResults.txt**
The Reflex Test Utility iterates through (up to) 10 FPS points, measuring PCL. Here is an example of the report summary:
```
*** Summary [647e5c14] - Reflex ON, Frame Gen x2, I>S S>Q Q>R R>FG FG>D
252.5 FPS: PCL 23.4 ms is PASS at 5.91 FT! -0.4% FPS impact. 34.0% latency reduction. {0.96 1.42 1.95 0.43 0.66}
236.4 FPS: PCL 24.1 ms is PASS at 5.71 FT! -0.5% FPS impact. 36.0% latency reduction. {0.91 1.33 1.93 0.42 0.62}
213.3 FPS: PCL 26.0 ms is PASS at 5.54 FT! 0.1% FPS impact. 39.7% latency reduction. {1.00 1.16 1.90 0.42 0.57}
197.2 FPS: PCL 27.3 ms is PASS at 5.39 FT! -0.4% FPS impact. 40.3% latency reduction. {0.99 0.99 1.91 0.41 0.58}
177.4 FPS: PCL 28.9 ms is PASS at 5.13 FT! -0.9% FPS impact. 42.8% latency reduction. {0.96 0.85 1.89 0.41 0.52}
160.5 FPS: PCL 32.6 ms is PASS at 5.24 FT! 0.2% FPS impact. 42.8% latency reduction. {1.11 0.74 1.95 0.41 0.53}
145.0 FPS: PCL 33.5 ms is PASS at 4.85 FT! -0.1% FPS impact. 46.7% latency reduction. {0.97 0.70 1.84 0.41 0.43}
121.3 FPS: PCL 38.8 ms is PASS at 4.70 FT! -1.2% FPS impact. 47.5% latency reduction. {1.10 0.47 1.87 0.40 0.36}
59.6 FPS: PCL 68.0 ms is PASS at 4.06 FT! -0.4% FPS impact. 55.1% latency reduction. {0.93 0.21 1.78 0.40 0.23}
44.1 FPS: PCL 94.9 ms is PASS at 4.18 FT! -1.3% FPS impact. 52.6% latency reduction. {1.29 0.16 1.73 0.40 0.16}
```
Field | Description
-|-
FPS | Average frames per second when Reflex On (+ Boost).
PCL | Average PC latency when Reflex On (+ Boost).
PASS/OKAY/WARN | This is a sanity check on PCL. WARN indicates a possible issue with the integration.
FT | PCL expressed in average frame times. (E.g., 23.4 / 1000 * 252.5 = 5.91 FT)
FPS impact | Average FPS difference between Reflex On (+ Boost) vs Off. <br/>A negative value means Reflex lowers/worsens FPS.
Latency reduction | Average PCL difference between Reflex On (+ Boost) vs Off. <br/>A positive value means Reflex lowers/improves latency.
I>S | Input to simulation start latency in average frame times.
S>Q | Simulation start to queue start latency in average frame times.
Q>R | Queue start to GPU end (or DLSS FG start) latency in average frame times.
R>FG | DLSS FG start to GPU end latency in average frame times.
R>D/FG>D | GPU end to displayed latency in average frame times.
**Reflex Verification HUD**
The Reflex Verification HUD can be used to help validate your integration for correctness.
- ReflexTestEnable.exe 1 [xpos ypos]
* Display the Reflex Verification HUD in your game. Optional arguments xpos and ypos specify the on-screen position of the HUD.
- ReflexTestEnable.exe 0
* Hide the Reflex Verification HUD.
For the commands to take effect, a restart of the game is required.
> **NOTE:**
> Reflex Verification HUD requires 531.18 driver or newer.
> If the Verification HUD does not appear, you will need to add the game to the app profile: NVIDIA Control Panel -> 3D Settings -> Program Settings -> Select Program to Customize -> Add -> Select a program -> Add Selected Program -> Apply.
Fields are as follows:
Field | Value | Details
------|-------|--------|
Reflex Mode | Off, enabled,<br/> enabled + boost |
App Called Sleep | 0/1 | Is the game calling `slReflexSleep()`? <br/> **If `slIsFeatureSupported(kFeatureReflex) == true`, this should be 1 (even when Reflex is Off)**
Flash Indicator | Counter | Number of flash trigger marks seen
Total Render Time | Duration | Time spent by GPU rendering
Simulation Interval | Duration | Simulation start time of frame X - simulation start time of <br/> frame X-1 <br/> All other timestamps are relative to simulation start time
Simulation End Time | Timestamp | Simulation end marker time
Render Start/End Time | Timestamp | Render submit start/end marker time
Present Start/End Time | Timestamp | Present start/end marker time
Driver Start/End Time | Timestamp | Start = first buffer submission <br/> End = last present submission
OS Queue Start/End Time | Timestamp | Start = first significant buffer submission <br/> End = GPU end time
GPU Start/End Time | Timestamp | Start = GPU rendering starts; End = GPU rendering ends
> **NOTE:**
> All durations and timestamps should be non-zero. Start timestamps must be less than end values (I.e., start must come before end).
[1]: <RTX UI Developer Guidelines.pdf>
[2]: https://developer.nvidia.com/performance-rendering-tools/reflex

View File

@ -0,0 +1,60 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#include "/Engine/Private/Common.ush"
#include "/Engine/Private/FastMath.ush"
#include "/Engine/Private/ScreenPass.ush"
#ifndef THREADGROUP_SIZEX
#define THREADGROUP_SIZEX 8
#endif
#ifndef THREADGROUP_SIZEY
#define THREADGROUP_SIZEY 8
#endif
#define THREADGROUP_TOTALSIZE (THREADGROUP_SIZEX * THREADGROUP_SIZEY)
float AlphaThreshold;
Texture2D Backbuffer;
//SamplerState VelocityTextureSampler;
//SCREEN_PASS_TEXTURE_VIEWPORT(Velocity)
//Texture2D DepthTexture;
//SamplerState DepthTextureSampler;
RWTexture2D<float4> OutUIHintTexture;
//SCREEN_PASS_TEXTURE_VIEWPORT(CombinedVelocity)
[numthreads(THREADGROUP_SIZEX, THREADGROUP_SIZEY, 1)]
void UIHintExtractionMain(
uint2 GroupId : SV_GroupID,
uint2 DispatchThreadId : SV_DispatchThreadID,
uint2 GroupThreadId : SV_GroupThreadID,
uint GroupIndex : SV_GroupIndex)
{
//uint2 PixelPos = min(DispatchThreadId + Velocity_ViewportMin, Velocity_ViewportMax - 1);
//uint2 OutputPixelPos = CombinedVelocity_ViewportMin + DispatchThreadId;
// TODO viewrects
uint2 PixelPos = DispatchThreadId;
uint2 OutPixelPos = DispatchThreadId;
float4 ColorAlpha = Backbuffer[PixelPos];
OutUIHintTexture[OutPixelPos] = (ColorAlpha.a > AlphaThreshold) ? ColorAlpha : float4(0.0, 0.0, 0.0, 0.0);
}

View File

@ -0,0 +1,202 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#include "/Engine/Private/Common.ush"
#include "/Engine/Private/FastMath.ush"
#include "/Engine/Private/ScreenPass.ush"
#ifndef THREADGROUP_SIZEX
#define THREADGROUP_SIZEX 8
#endif
#ifndef THREADGROUP_SIZEY
#define THREADGROUP_SIZEY 8
#endif
#define THREADGROUP_TOTALSIZE (THREADGROUP_SIZEX * THREADGROUP_SIZEY)
#ifndef DILATE_MOTION_VECTORS
#define DILATE_MOTION_VECTORS 0
#endif
#if DILATE_MOTION_VECTORS
#define AA_CROSS 1
float2 TemporalJitterPixels;
#else
#endif
Texture2D VelocityTexture;
SamplerState VelocityTextureSampler;
SCREEN_PASS_TEXTURE_VIEWPORT(Velocity)
Texture2D DepthTexture;
SamplerState DepthTextureSampler;
Texture2D<float2> AlternateMotionVectorsTexture;
RWTexture2D<float2> OutVelocityCombinedTexture;
SCREEN_PASS_TEXTURE_VIEWPORT(CombinedVelocity)
[numthreads(THREADGROUP_SIZEX, THREADGROUP_SIZEY, 1)]
void VelocityCombineMain(
uint2 GroupId : SV_GroupID,
uint2 DispatchThreadId : SV_DispatchThreadID,
uint2 GroupThreadId : SV_GroupThreadID,
uint GroupIndex : SV_GroupIndex)
{
uint2 PixelPos = min(DispatchThreadId + Velocity_ViewportMin, Velocity_ViewportMax - 1);
// CombinedVelocity_ViewportMin is expected to be 0, but in case it is not
uint2 OutputPixelPos = CombinedVelocity_ViewportMin + DispatchThreadId;
const bool bInsideViewport = all(PixelPos.xy < Velocity_ViewportMax);
BRANCH
if (!bInsideViewport)
return;
#if DILATE_MOTION_VECTORS // TODO: 2x2.
// Screen position of minimum depth.
float2 VelocityOffset = float2(0.0, 0.0);
float2 NearestBufferUV = (PixelPos + 0.5f) * Velocity_ViewportSizeInverse;
//float2 ViewportUV = NearestBufferUV;
float2 ViewportUV = (float2(DispatchThreadId) + 0.5f) * CombinedVelocity_ViewportSizeInverse;
// Pixel coordinate of the center of output pixel O in the input viewport.
float2 PPCo = ViewportUV * Velocity_ViewportSize + TemporalJitterPixels;
// Pixel coordinate of the center of the nearest input pixel K.
float2 PPCk = floor(PPCo) + 0.5;
// Pixel coordinate of the center of the nearest top left input pixel T.
float2 PPCt = floor(PPCo - 0.5) + 0.5;
NearestBufferUV = Velocity_ExtentInverse * (Velocity_ViewportMin + PPCk);
// FIND MOTION OF PIXEL AND NEAREST IN NEIGHBORHOOD
// ------------------------------------------------
float3 PosN; // Position of this pixel, possibly later nearest pixel in neighborhood.
PosN.xy = ViewportUVToScreenPos(ViewportUV);
PosN.z = DepthTexture.SampleLevel(DepthTextureSampler, NearestBufferUV, 0).x;
{
// For motion vector, use camera/dynamic motion from min depth pixel in pattern around pixel.
// This enables better quality outline on foreground against different motion background.
// Larger 2 pixel distance "x" works best (because AA dilates surface).
float4 Depths;
Depths.x = DepthTexture.SampleLevel(DepthTextureSampler, NearestBufferUV, 0, int2(-AA_CROSS, -AA_CROSS)).x;
Depths.y = DepthTexture.SampleLevel(DepthTextureSampler, NearestBufferUV, 0, int2(AA_CROSS, -AA_CROSS)).x;
Depths.z = DepthTexture.SampleLevel(DepthTextureSampler, NearestBufferUV, 0, int2(-AA_CROSS, AA_CROSS)).x;
Depths.w = DepthTexture.SampleLevel(DepthTextureSampler, NearestBufferUV, 0, int2(AA_CROSS, AA_CROSS)).x;
float2 DepthOffset = float2(AA_CROSS, AA_CROSS);
float DepthOffsetXx = float(AA_CROSS);
#if HAS_INVERTED_Z_BUFFER
// Nearest depth is the largest depth (depth surface 0=far, 1=near).
if (Depths.x > Depths.y)
{
DepthOffsetXx = -AA_CROSS;
}
if (Depths.z > Depths.w)
{
DepthOffset.x = -AA_CROSS;
}
float DepthsXY = max(Depths.x, Depths.y);
float DepthsZW = max(Depths.z, Depths.w);
if (DepthsXY > DepthsZW)
{
DepthOffset.y = -AA_CROSS;
DepthOffset.x = DepthOffsetXx;
}
float DepthsXYZW = max(DepthsXY, DepthsZW);
if (DepthsXYZW > PosN.z)
{
// This is offset for reading from velocity texture.
// This supports half or fractional resolution velocity textures.
// With the assumption that UV position scales between velocity and color.
VelocityOffset = DepthOffset * Velocity_ExtentInverse;
// This is [0 to 1] flipped in Y.
//PosN.xy = ScreenPos + DepthOffset * ViewportSize.zw * 2.0;
PosN.z = DepthsXYZW;
}
#else // !HAS_INVERTED_Z_BUFFER
#error Fix me!
#endif // !HAS_INVERTED_Z_BUFFER
}
// Camera motion for pixel or nearest pixel (in ScreenPos space).
bool OffScreen = false;
float Velocity = 0;
float HistoryBlur = 0;
float4 ThisClip = float4(PosN.xy, PosN.z, 1);
float4 PrevClip = mul(ThisClip, View.ClipToPrevClip);
float2 PrevScreen = PrevClip.xy / PrevClip.w;
float2 BackN = PosN.xy - PrevScreen;
float2 BackTemp = BackN * Velocity_ViewportSize;
float4 VelocityN = VelocityTexture.SampleLevel(VelocityTextureSampler, NearestBufferUV + VelocityOffset, 0);
bool DynamicN = VelocityN.x > 0.0;
if (DynamicN)
{
BackN = DecodeVelocityFromTexture(VelocityN).xy;
}
BackTemp = BackN * CombinedVelocity_ViewportSize;
OutVelocityCombinedTexture[OutputPixelPos].xy = -BackTemp * float2(0.5, -0.5);
#else
float4 EncodedVelocity = VelocityTexture[PixelPos];
float Depth = DepthTexture[PixelPos].x;
float2 Velocity;
if (all(EncodedVelocity.xy > 0))
{
Velocity = DecodeVelocityFromTexture(EncodedVelocity).xy;
}
else
{
float4 ClipPos;
ClipPos.xy = SvPositionToScreenPosition(float4(PixelPos.xy, 0, 1)).xy;
ClipPos.z = Depth;
ClipPos.w = 1;
float4 PrevClipPos = mul(ClipPos, View.ClipToPrevClip);
if (PrevClipPos.w > 0)
{
float2 PrevScreen = PrevClipPos.xy / PrevClipPos.w;
Velocity = ClipPos.xy - PrevScreen.xy;
}
else
{
Velocity = EncodedVelocity.xy;
}
}
float2 OutVelocity = Velocity * float2(0.5, -0.5) * View.ViewSizeAndInvSize.xy;
#if SUPPORT_ALTERNATE_MOTION_VECTOR
const float2 EncodedAltVelocity = AlternateMotionVectorsTexture[PixelPos];
if (EncodedAltVelocity.x > 0.0f)
{
float2 DecodedVelocity = DecodeVelocityFromTexture(float4(EncodedAltVelocity, 0.0f, 0.0f)).xy;
// we encode in the orientation DLSS expects, so the extra negate it to make them consistent with the ones
// generated above
OutVelocity = -1.0f * DecodedVelocity * CombinedVelocity_ViewportSize;
}
#endif
OutVelocityCombinedTexture[OutputPixelPos].xy = -OutVelocity;
#endif
}

View File

@ -0,0 +1,257 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#include "StreamlineLibrary.h"
#include "StreamlineLibraryPrivate.h"
#include "HAL/IConsoleManager.h"
#if WITH_STREAMLINE
#include "StreamlineCore.h"
#include "StreamlineRHI.h"
#include "StreamlineAPI.h"
#include "sl.h"
#include "sl_dlss_g.h"
#include "sl_reflex.h"
#include "sl_deepdvc.h"
#endif
#define LOCTEXT_NAMESPACE "FStreamlineBlueprintModule"
DEFINE_LOG_CATEGORY(LogStreamlineBlueprint);
TStaticArray<FStreamlineFeatureRequirements, static_cast<uint8>(EStreamlineFeature::Count)> UStreamlineLibrary::Features;
bool UStreamlineLibrary::bStreamlineLibraryInitialized = false;
#if WITH_STREAMLINE
EStreamlineFeatureSupport ToUStreamlineFeatureSupport(Streamline::EStreamlineFeatureSupport Support)
{
static_assert(int32(Streamline::EStreamlineFeatureSupport::NumValues) == 7, "dear NVIDIA plugin developer, please update this code to handle the new enum values ");
switch (Support)
{
case Streamline::EStreamlineFeatureSupport::Supported: return EStreamlineFeatureSupport::Supported;
default:
/* Gotta catch them all*/
case Streamline::EStreamlineFeatureSupport::NotSupported: return EStreamlineFeatureSupport::NotSupported;
case Streamline::EStreamlineFeatureSupport::NotSupportedIncompatibleHardware: return EStreamlineFeatureSupport::NotSupportedIncompatibleHardware;
case Streamline::EStreamlineFeatureSupport::NotSupportedDriverOutOfDate: return EStreamlineFeatureSupport::NotSupportedDriverOutOfDate;
case Streamline::EStreamlineFeatureSupport::NotSupportedOperatingSystemOutOfDate: return EStreamlineFeatureSupport::NotSupportedOperatingSystemOutOfDate;
case Streamline::EStreamlineFeatureSupport::NotSupportedHardwareSchedulingDisabled: return EStreamlineFeatureSupport::NotSupportedHardewareSchedulingDisabled;
case Streamline::EStreamlineFeatureSupport::NotSupportedIncompatibleRHI: return EStreamlineFeatureSupport::NotSupportedByRHI;
}
}
namespace
{
FStreamlineVersion FromStreamlineVersion(const sl::Version& SLVersion)
{
return FStreamlineVersion{ static_cast<int32>(SLVersion.major), static_cast<int32>(SLVersion.minor), static_cast<int32>(SLVersion.build) };
}
uint32 FromUStreamlineFeature(EStreamlineFeature InFeature)
{
static_assert(int32(EStreamlineFeature::Count) == 4, "dear NVIDIA plugin developer, please update this code to handle the new enum values ");
switch (InFeature)
{
case EStreamlineFeature::DLSSG: return sl::kFeatureDLSS_G;
case EStreamlineFeature::Latewarp: return sl::kFeatureLatewarp;
case EStreamlineFeature::Reflex: return sl::kFeatureReflex;
case EStreamlineFeature::DeepDVC: return sl::kFeatureDeepDVC;
default:
return 0;
}
}
}
#endif
int32 UStreamlineLibrary::ValidateAndConvertToIndex(EStreamlineFeature Feature)
{
const int32 FeatureInt = static_cast<int32>(Feature);
if (FeatureInt < UStreamlineLibrary::Features.Num())
{
return FeatureInt;
}
else
{
return 0;
}
}
void UStreamlineLibrary::BreakStreamlineFeatureRequirements(EStreamlineFeatureRequirementsFlags Requirements, bool& D3D11Supported, bool& D3D12Supported, bool& VulkanSupported, bool& VSyncOffRequired, bool& HardwareSchedulingRequired)
{
if (ValidateEnumBitFlags(Requirements, __FUNCTION__))
{
D3D11Supported = EnumHasAllFlags(Requirements, EStreamlineFeatureRequirementsFlags::D3D11Supported);
D3D12Supported = EnumHasAllFlags(Requirements, EStreamlineFeatureRequirementsFlags::D3D12Supported);
VulkanSupported = EnumHasAllFlags(Requirements, EStreamlineFeatureRequirementsFlags::VulkanSupported);
VSyncOffRequired = EnumHasAllFlags(Requirements, EStreamlineFeatureRequirementsFlags::VSyncOffRequired);
HardwareSchedulingRequired = EnumHasAllFlags(Requirements, EStreamlineFeatureRequirementsFlags::HardwareSchedulingRequired);
}
}
FStreamlineFeatureRequirements UStreamlineLibrary::GetStreamlineFeatureInformation(EStreamlineFeature Feature)
{
if (ValidateEnumValue(Feature, __FUNCTION__))
{
return Features[ValidateAndConvertToIndex(Feature)];
}
return FStreamlineFeatureRequirements();
}
bool UStreamlineLibrary::IsStreamlineFeatureSupported(EStreamlineFeature Feature)
{
TRY_INIT_STREAMLINE_LIBRARY_AND_RETURN(false)
if (ValidateEnumValue(Feature, __FUNCTION__))
{
return QueryStreamlineFeatureSupport(Feature) == EStreamlineFeatureSupport::Supported;
}
return false;
}
EStreamlineFeatureSupport UStreamlineLibrary::QueryStreamlineFeatureSupport(EStreamlineFeature Feature)
{
TRY_INIT_STREAMLINE_LIBRARY_AND_RETURN(EStreamlineFeatureSupport::NotSupported)
if (ValidateEnumValue(Feature, __FUNCTION__))
{
return Features[ValidateAndConvertToIndex(Feature)].Support;
}
return EStreamlineFeatureSupport::NotSupported;
}
void UStreamlineLibrary::Startup()
{
#if WITH_STREAMLINE
// This initialization will likely not succeed unless this module has been moved to PostEngineInit, and that's ok
TryInitStreamlineLibrary();
#else
UE_LOG(LogStreamlineBlueprint, Log, TEXT("Streamline is not supported on this platform at build time. The Streamline Blueprint library however is supported and stubbed out to ignore any calls to enable Streamline features and will always return UStreamlineFeatureSupport::NotSupportedByPlatformAtBuildTime, regardless of the underlying hardware. This can be used to e.g. to turn off related UI elements."));
#endif
}
void UStreamlineLibrary::Shutdown()
{
#if WITH_STREAMLINE && !UE_BUILD_SHIPPING
#endif
}
void UStreamlineLibrary::RegisterFeatureSupport(EStreamlineFeature InFeature, EStreamlineFeatureSupport InSupport)
{
#if WITH_STREAMLINE
sl::Feature SLFeature = FromUStreamlineFeature(InFeature);
FStreamlineFeatureRequirements& Requirements = Features[ValidateAndConvertToIndex(InFeature)];
if (IsStreamlineSupported())
{
sl::FeatureRequirements SLRequirements;
SLgetFeatureRequirements(SLFeature, SLRequirements);
Requirements.RequiredDriverVersion = FromStreamlineVersion(SLRequirements.driverVersionRequired);
Requirements.DetectedDriverVersion = FromStreamlineVersion(SLRequirements.driverVersionDetected);
Requirements.RequiredOperatingSystemVersion = FromStreamlineVersion(SLRequirements.osVersionRequired);
Requirements.DetectedOperatingSystemVersion = FromStreamlineVersion(SLRequirements.osVersionDetected);
// static_assert and static_cast are best friends
#define UE_SL_ENUM_CHECK(A,B) static_assert(uint32(sl::FeatureRequirementFlags::A) == uint32(EStreamlineFeatureRequirementsFlags::B), "sl::FeatureRequirementFlags vs UStreamlineFeatureRequirementsFlags enum mismatch");
UE_SL_ENUM_CHECK(eD3D11Supported, D3D11Supported)
UE_SL_ENUM_CHECK(eD3D12Supported, D3D12Supported)
UE_SL_ENUM_CHECK(eVulkanSupported, VulkanSupported)
UE_SL_ENUM_CHECK(eVSyncOffRequired, VSyncOffRequired)
UE_SL_ENUM_CHECK(eHardwareSchedulingRequired, HardwareSchedulingRequired)
#undef UE_SL_ENUM_CHECK
// strip the API support bits for those that are not implemented, but keep the other flags intact
const sl::FeatureRequirementFlags ImplementedAPIFlags = PlatformGetAllImplementedStreamlineRHIs();
const sl::FeatureRequirementFlags AllAPIFlags = sl::FeatureRequirementFlags::eD3D11Supported | sl::FeatureRequirementFlags::eD3D12Supported | sl::FeatureRequirementFlags::eVulkanSupported;
const sl::FeatureRequirementFlags SLRequirementFlags = sl::FeatureRequirementFlags(SLBitwiseAnd(SLRequirements.flags, ImplementedAPIFlags) | SLBitwiseAnd(SLRequirements.flags, ~AllAPIFlags));
Requirements.Requirements = static_cast<EStreamlineFeatureRequirementsFlags>(SLRequirementFlags);
Requirements.Support = InSupport;
}
#endif
}
#if WITH_STREAMLINE
// Delayed initialization, which allows this module to be available early so blueprints can be loaded before DLSS is available in PostEngineInit
bool UStreamlineLibrary::TryInitStreamlineLibrary()
{
if (bStreamlineLibraryInitialized)
{
// TODO
return true;
}
// // Register this before we bail out so we can show error messages
//#if !UE_BUILD_SHIPPING
// if (!DLSSOnScreenMessagesDelegateHandle.IsValid())
// {
// DLSSOnScreenMessagesDelegateHandle = FCoreDelegates::OnGetOnScreenMessages.AddStatic(&GetDLSSOnScreenMessages);
// }
//#endif
bStreamlineLibraryInitialized = true;
return true;
}
#endif // WITH_STREAMLINE
void FStreamlineBlueprintModule::StartupModule()
{
auto CVarInitializePlugin = IConsoleManager::Get().FindConsoleVariable(TEXT("r.Streamline.InitializePlugin"));
if (CVarInitializePlugin && !CVarInitializePlugin->GetBool())
{
UE_LOG(LogStreamlineBlueprint, Log, TEXT("Initialization of StreamlineBlueprint is disabled."));
return;
}
UStreamlineLibrary::Startup();
}
void FStreamlineBlueprintModule::ShutdownModule()
{
auto CVarInitializePlugin = IConsoleManager::Get().FindConsoleVariable(TEXT("r.Streamline.InitializePlugin"));
if (CVarInitializePlugin && !CVarInitializePlugin->GetBool())
{
return;
}
UStreamlineLibrary::Shutdown();
}
#undef LOCTEXT_NAMESPACE
IMPLEMENT_MODULE(FStreamlineBlueprintModule, StreamlineBlueprint)

View File

@ -0,0 +1,31 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#pragma once
#include "CoreMinimal.h"
#include "StreamlineLibrary.h"
DECLARE_LOG_CATEGORY_EXTERN(LogStreamlineBlueprint, Verbose, All);
#if WITH_STREAMLINE
#define TRY_INIT_STREAMLINE_LIBRARY_AND_RETURN(ReturnValueOrEmptyOrVoidPreFiveThree) \
if (!TryInitStreamlineLibrary()) \
{ \
UE_LOG(LogStreamlineBlueprint, Error, TEXT("%s should not be called before PostEngineInit"), ANSI_TO_TCHAR(__FUNCTION__)); \
return ReturnValueOrEmptyOrVoidPreFiveThree; \
}
#else
#define TRY_INIT_STREAMLINE_LIBRARY_AND_RETURN(ReturnValueWhichCanBeEmpty)
#endif

View File

@ -0,0 +1,226 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#pragma once
#include "CoreMinimal.h"
#include "Containers/StaticArray.h"
#include "Modules/ModuleManager.h"
#include "Kismet/BlueprintFunctionLibrary.h"
#include "UObject/Class.h"
#include "Misc/EngineVersionComparison.h"
#if UE_VERSION_NEWER_THAN(5,4,0)
#include "Blueprint/BlueprintExceptionInfo.h"
#endif
#include "StreamlineLibrary.generated.h"
//namespace sl
//{
// struct Version;
//}
// That should be updated if new BP libraries are added for new featurew
#define STREAMLINE_LIBARY_KEYWORDS "DLSS-G, Reflex, DeepDVC, Latewarp, Streamline"
UENUM(BlueprintType)
enum class EStreamlineFeature : uint8
{
DLSSG UMETA(DisplayName = "DLSS Frame Generation"),
Latewarp UMETA(DisplayName = "Latewarp"),
Reflex UMETA(DisplayName = "Reflex"),
DeepDVC UMETA(DisplayName = "DeepDVC"),
Count UMETA(Hidden)
};
UENUM(BlueprintType)
enum class EStreamlineFeatureSupport : uint8
{
Supported UMETA(DisplayName = "Supported"),
NotSupported UMETA(DisplayName = "Not Supported"),
NotSupportedIncompatibleHardware UMETA(DisplayName = "Incompatible Hardware", ToolTip = "This feature requires an NVIDIA RTX GPU"),
NotSupportedDriverOutOfDate UMETA(DisplayName = "Driver Out of Date", ToolTip = "The driver is outdated. Also see GetStreamlineFeatureGMinimumDriverVersion"),
NotSupportedOperatingSystemOutOfDate UMETA(DisplayName = "Operating System Out of Date", ToolTip = "The Operating System is outdated. Also see GetStreamlineFeatureMinimumOperatingSystemVersion"),
NotSupportedHardewareSchedulingDisabled UMETA(DisplayName = "Hardware Scheduling Disabled", ToolTip = "This feature requires Windows Hardware Scheduling to be Enabled"),
NotSupportedByRHI UMETA(DisplayName = "Not supported by RHI", ToolTip = "This RHI doesn't not support this feature run time."),
NotSupportedByPlatformAtBuildTime UMETA(DisplayName = "Platform Not Supported At Build Time", ToolTip = "This platform doesn't not support this feature at build time. Currently this feature is only supported on Windows 64"),
NotSupportedIncompatibleAPICaptureToolActive UMETA(DisplayName = "Incompatible API Capture Tool Active", ToolTip = "This feature is not compatible with an active API capture tool such as RenderDoc.")
};
UENUM(BlueprintType, meta = (Bitflags))
enum class EStreamlineFeatureRequirementsFlags : uint8
{
None = 0,
D3D11Supported = 1 << 0,
D3D12Supported = 1 << 1,
VulkanSupported = 1 << 2,
VSyncOffRequired = 1 << 3,
HardwareSchedulingRequired = 1 << 4
};
ENUM_CLASS_FLAGS(EStreamlineFeatureRequirementsFlags)
USTRUCT(BlueprintType)
struct FStreamlineVersion
{
GENERATED_BODY()
public:
UPROPERTY(BlueprintReadWrite, Category = "Streamline")
int32 Major = 0;
UPROPERTY(BlueprintReadWrite, Category = "Streamline")
int32 Minor = 0;
UPROPERTY(BlueprintReadWrite, Category = "Streamline")
int32 Build = 0;
};
static_assert(uint8(EStreamlineFeature::Count) == 4u, "dear NVIDIA plugin developer, please update the Keywords below handle the new enum values");
USTRUCT(BlueprintType)
struct FStreamlineFeatureRequirements
{
GENERATED_BODY()
public:
UPROPERTY(BlueprintReadWrite, Category = "Streamline")
EStreamlineFeatureSupport Support = EStreamlineFeatureSupport::NotSupportedByPlatformAtBuildTime;
UPROPERTY(BlueprintReadWrite, Category = "Streamline")
EStreamlineFeatureRequirementsFlags Requirements = EStreamlineFeatureRequirementsFlags::None;
UPROPERTY(BlueprintReadWrite, Category = "Streamline")
FStreamlineVersion RequiredOperatingSystemVersion;
UPROPERTY(BlueprintReadWrite, Category = "Streamline")
FStreamlineVersion DetectedOperatingSystemVersion;
UPROPERTY(BlueprintReadWrite, Category = "Streamline")
FStreamlineVersion RequiredDriverVersion;
UPROPERTY(BlueprintReadWrite, Category = "Streamline")
FStreamlineVersion DetectedDriverVersion;
};
UCLASS(MinimalAPI)
class UStreamlineLibrary : public UBlueprintFunctionLibrary
{
friend class FStreamlineBlueprintModule;
GENERATED_BODY()
public:
/** Checks whether a Streamline feature is supported by the current GPU. Further details can be retrieved via QueryStreamlineFeatureSupport*/
UFUNCTION(BlueprintPure, Category = "Streamline", meta = (DisplayName = "Get NVIDIA Streamline Feature information", Keywords = "Reflex, DLSS-G, Latewarp, DeepDVC"))
static STREAMLINEBLUEPRINT_API FStreamlineFeatureRequirements GetStreamlineFeatureInformation(EStreamlineFeature Feature);
UFUNCTION(BlueprintPure, Category = "Streamline", meta = (/*DisplayName = "Get Streamline Feature Requirements", */Keywords = "Reflex, DLSS-G, Latewarp, DeepDVC"))
static STREAMLINEBLUEPRINT_API void BreakStreamlineFeatureRequirements(EStreamlineFeatureRequirementsFlags Requirements, bool& D3D11Supported, bool& D3D12Supported, bool& VulkanSupported, bool& VSyncOffRequired, bool& HardwareSchedulingRequired);
/** Checks whether a Streamline feature is supported by the current GPU. Further details can be retrieved via QueryStreamlineFeatureSupport*/
UFUNCTION(BlueprintPure, Category = "Streamline", meta = (DisplayName = "Is NVIDIA Streamline Feature Supported", Keywords = "Reflex, DLSS-G, Latewarp, DeepDVC" ))
static STREAMLINEBLUEPRINT_API bool IsStreamlineFeatureSupported(EStreamlineFeature Feature);
/** Checks whether Streamline feature is supported by the current GPU */
UFUNCTION(BlueprintPure, Category = "Streamline", meta = (DisplayName = "Query NVIDIA Streamline Feature Support", Keywords = "Reflex, DLSS-G, Latewarp, DeepDVC"))
static STREAMLINEBLUEPRINT_API EStreamlineFeatureSupport QueryStreamlineFeatureSupport(EStreamlineFeature Feature);
static STREAMLINEBLUEPRINT_API void RegisterFeatureSupport(EStreamlineFeature Feature, EStreamlineFeatureSupport Support);
protected:
static void Startup();
static void Shutdown();
private:
static TStaticArray<FStreamlineFeatureRequirements, static_cast<uint8>(EStreamlineFeature::Count)> Features;
static int32 ValidateAndConvertToIndex(EStreamlineFeature Feature);
static bool bStreamlineLibraryInitialized;
static bool TryInitStreamlineLibrary();
};
template <typename UE>
bool ValidateEnumValue(UE Value, const char* CallSite)
{
// UEnums are strongly typed, but then one can also cast a byte to an UEnum ...
const UEnum* Enum = StaticEnum<UE>();
const bool bIsValid = Enum->IsValidEnumValue(int64(Value)) && (Enum->GetMaxEnumValue() != int64(Value));
#if !UE_BUILD_SHIPPING
if (!bIsValid)
{
const FString ValidationMessage = FString::Printf(TEXT("%s should not be called with an invalid enum value (%d) \"%s\""),
ANSI_TO_TCHAR(CallSite), int64(Value), *Enum->GetDisplayNameTextByValue(int64(Value)).ToString());
FFrame::KismetExecutionMessage(*ValidationMessage, ELogVerbosity::Error);
#if UE_VERSION_NEWER_THAN(5,4,0)
const FText ValidationMessageAsText = FText::FromString(ValidationMessage);
const FBlueprintExceptionInfo ExceptionInfo(EBlueprintExceptionType::Breakpoint, ValidationMessageAsText);
FBlueprintCoreDelegates::ThrowScriptException(FFrame::GetThreadLocalTopStackFrame()->Object, *FFrame::GetThreadLocalTopStackFrame(), ExceptionInfo);
#endif
}
#endif
return bIsValid;
}
template <typename UE>
bool ValidateEnumBitFlags(UE Value, const char* CallSite)
{
// UEnums are strongly typed, but then one can also cast a byte to an UEnum ...
const UEnum* Enum = StaticEnum<UE>();
int64 RemainingBits = int64(Value);
for (int32 EnumIndex = 0; EnumIndex < Enum->NumEnums() - 1 /* avoid _MAX */; ++EnumIndex)
{
const int64 KnownSingleBit = Enum->GetValueByIndex(EnumIndex);
RemainingBits &= ~KnownSingleBit;
}
const bool bIsValid = 0 == RemainingBits;
#if !UE_BUILD_SHIPPING
if (!bIsValid)
{
FFrame::KismetExecutionMessage(*FString::Printf(
TEXT("%s should not be called with an invalid enum bitflags (%d) \"%s\""),
ANSI_TO_TCHAR(CallSite), int64(Value), *Enum->GetDisplayNameTextByValue(int64(Value)).ToString()),
ELogVerbosity::Error);
}
#endif
return bIsValid;
}
// TODO maybe move inter SL plugin stuff into a separate header?
#if WITH_STREAMLINE
#include "StreamlineCore.h"
STREAMLINEBLUEPRINT_API EStreamlineFeatureSupport ToUStreamlineFeatureSupport(Streamline::EStreamlineFeatureSupport Support);
#endif
class FStreamlineBlueprintModule final : public IModuleInterface
{
public:
/** IModuleInterface implementation */
virtual void StartupModule() override;
virtual void ShutdownModule() override;
private:
};

View File

@ -0,0 +1,79 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
using UnrealBuildTool;
using System.IO;
public class StreamlineBlueprint : ModuleRules
{
protected virtual bool IsSupportedPlatform(ReadOnlyTargetRules Target)
{
return Target.Platform.IsInGroup(UnrealPlatformGroup.Windows);
}
public StreamlineBlueprint(ReadOnlyTargetRules Target) : base(Target)
{
PCHUsage = ModuleRules.PCHUsageMode.UseExplicitOrSharedPCHs;
PrivateDependencyModuleNames.AddRange(
new string[]
{
"Core",
"CoreUObject",
"Engine",
"RenderCore",
"Renderer",
"Projects",
}
);
PrivateIncludePaths.AddRange(
new string[] {
}
);
bool bPlatformSupportsStreamline = IsSupportedPlatform(Target);
PublicDefinitions.Add("WITH_STREAMLINE=" + (bPlatformSupportsStreamline ? '1' : '0'));
if (bPlatformSupportsStreamline)
{
PublicIncludePaths.AddRange(
new string[]
{
}
);
PrivateIncludePaths.AddRange(
new string[]
{
}
);
PublicDependencyModuleNames.AddRange(
new string[]
{
"StreamlineCore",
}
);
PrivateDependencyModuleNames.AddRange(
new string[]
{
"StreamlineRHI",
"Streamline"
}
);
}
}
}

View File

@ -0,0 +1,214 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#include "StreamlineCore.h"
#include "StreamlineCorePrivate.h"
#include "CoreMinimal.h"
#include "StreamlineSettings.h"
#include "StreamlineViewExtension.h"
#include "StreamlineReflex.h"
#include "StreamlineDLSSG.h"
#include "StreamlineLatewarp.h"
#include "StreamlineDeepDVC.h"
#include "StreamlineRHI.h"
#include "sl_helpers.h"
#include "Modules/ModuleManager.h"
#include "Interfaces/IPluginManager.h"
#include "GeneralProjectSettings.h"
#if WITH_EDITOR
#include "ISettingsModule.h"
#endif
#include "SceneViewExtension.h"
#include "SceneView.h"
#include "Misc/MessageDialog.h"
#define LOCTEXT_NAMESPACE "FStreamlineModule"
DEFINE_LOG_CATEGORY(LogStreamline);
// Epic requested a CVar to control whether the plugin will perform initialization or not.
// This allows the plugin to be included in a project and active but allows for it to not do anything
// at runtime.
static TAutoConsoleVariable<bool> CVarStreamlineInitializePlugin(
TEXT("r.Streamline.InitializePlugin"),
true,
TEXT("Enable/disable initializing the Streamline plugin (default = true)"),
ECVF_ReadOnly);
Streamline::EStreamlineFeatureSupport TranslateStreamlineResult(sl::Result Result)
{
switch (Result)
{
case sl::Result::eOk: return Streamline::EStreamlineFeatureSupport::Supported;
case sl::Result::eErrorOSDisabledHWS: return Streamline::EStreamlineFeatureSupport::NotSupportedHardwareSchedulingDisabled;
case sl::Result::eErrorOSOutOfDate: return Streamline::EStreamlineFeatureSupport::NotSupportedOperatingSystemOutOfDate;
case sl::Result::eErrorDriverOutOfDate: return Streamline::EStreamlineFeatureSupport::NotSupportedDriverOutOfDate;
case sl::Result::eErrorNoSupportedAdapterFound: return Streamline::EStreamlineFeatureSupport::NotSupportedIncompatibleHardware;
case sl::Result::eErrorAdapterNotSupported: return Streamline::EStreamlineFeatureSupport::NotSupportedIncompatibleHardware;
case sl::Result::eErrorMissingOrInvalidAPI: return Streamline::EStreamlineFeatureSupport::NotSupportedIncompatibleRHI;
default:
/* Intentionally falls through*/
return Streamline::EStreamlineFeatureSupport::NotSupported;
}
}
void FStreamlineCoreModule::StartupModule()
{
if (!CVarStreamlineInitializePlugin.GetValueOnAnyThread())
{
UE_LOG(LogStreamline, Log, TEXT("Initialization of StreamlineCore is disabled."));
return;
}
// This code will execute after your module is loaded into memory; the exact timing is specified in the .uplugin file per-module
UE_LOG(LogStreamline, Log, TEXT("%s Enter"), ANSI_TO_TCHAR(__FUNCTION__));
if (GetPlatformStreamlineSupport() == EStreamlineSupport::Supported)
{
// set the view family extension that's gonna call into SL in the postprocessing pass
bool bShouldCreateViewExtension = IsStreamlineDLSSGSupported() || IsStreamlineLatewarpSupported() || IsStreamlineDeepDVCSupported();
if (FParse::Param(FCommandLine::Get(), TEXT("slviewextension")))
{
bShouldCreateViewExtension = true;
}
if (FParse::Param(FCommandLine::Get(), TEXT("slnoviewextension")))
{
bShouldCreateViewExtension = false;
}
if (bShouldCreateViewExtension)
{
StreamlineViewExtension = FSceneViewExtensions::NewExtension<FStreamlineViewExtension>(GetStreamlineRHI());
}
else
{
StreamlineViewExtension = nullptr;
}
RegisterStreamlineReflexHooks();
if (ForceTagStreamlineBuffers() || IsStreamlineDLSSGSupported())
{
RegisterStreamlineDLSSGHooks(GetStreamlineRHI());
}
if (ForceTagStreamlineBuffers() || IsStreamlineLatewarpSupported())
{
RegisterStreamlineLatewarpHooks(GetStreamlineRHI());
}
LogStreamlineFeatureSupport(sl::kFeatureImGUI, *GetStreamlineRHI()->GetAdapterInfo());
}
UE_LOG(LogStreamline, Log, TEXT("NVIDIA Streamline supported %u"), QueryStreamlineSupport() == EStreamlineSupport::Supported);
#if WITH_EDITOR
ISettingsModule* SettingsModule = FModuleManager::GetModulePtr<ISettingsModule>("Settings");
if (SettingsModule != nullptr)
{
UStreamlineSettings* Settings = GetMutableDefault<UStreamlineSettings>();
SettingsModule->RegisterSettings("Project", "Plugins", "Streamline",
LOCTEXT("StreamlineSettingsName", "NVIDIA Streamline"),
LOCTEXT("StreamlineSettingsDecription", "Configure the NVIDIA Streamline plugins"),
Settings
);
UStreamlineOverrideSettings* OverrideSettings = GetMutableDefault<UStreamlineOverrideSettings>();
SettingsModule->RegisterSettings("Project", "Plugins", "StreamlineOverride",
LOCTEXT("StreamlineOverrideSettingsName", "NVIDIA Streamline Overrides (Local)"),
LOCTEXT("StreamlineOverrideSettingsDescription", "Configure the local settings for the NVIDIA Streamline plugins"),
OverrideSettings);
}
#endif
UE_LOG(LogStreamline, Log, TEXT("%s Leave"), ANSI_TO_TCHAR(__FUNCTION__));
}
void FStreamlineCoreModule::ShutdownModule()
{
if (!CVarStreamlineInitializePlugin.GetValueOnAnyThread())
{
return;
}
UE_LOG(LogStreamline, Log, TEXT("%s Enter"), ANSI_TO_TCHAR(__FUNCTION__));
{
StreamlineViewExtension = nullptr;
}
if (GetPlatformStreamlineSupport() == EStreamlineSupport::Supported)
{
if (IsStreamlineLatewarpSupported())
{
UnregisterStreamlineLatewarpHooks();
}
if (IsStreamlineDLSSGSupported())
{
UnregisterStreamlineDLSSGHooks();
}
UnregisterStreamlineReflexHooks();
}
#if WITH_EDITOR
ISettingsModule* SettingsModule = FModuleManager::GetModulePtr<ISettingsModule>("Settings");
if (SettingsModule != nullptr)
{
SettingsModule->UnregisterSettings("Project", "Plugins", "Streamline");
SettingsModule->UnregisterSettings("Project", "Plugins", "StreamlineOverride");
}
#endif
UE_LOG(LogStreamline, Log, TEXT("%s Leave"), ANSI_TO_TCHAR(__FUNCTION__));
}
EStreamlineSupport FStreamlineCoreModule::QueryStreamlineSupport() const
{
return GetPlatformStreamlineSupport();
}
Streamline::EStreamlineFeatureSupport FStreamlineCoreModule::QueryDLSSGSupport() const
{
return QueryStreamlineDLSSGSupport();
}
Streamline::EStreamlineFeatureSupport FStreamlineCoreModule::QueryLatewarpSupport() const
{
return QueryStreamlineLatewarpSupport();
}
Streamline::EStreamlineFeatureSupport FStreamlineCoreModule::QueryDeepDVCSupport() const
{
return QueryStreamlineDeepDVCSupport();
}
Streamline::EStreamlineFeatureSupport FStreamlineCoreModule::QueryReflexSupport() const
{
return QueryStreamlineReflexSupport();
}
FStreamlineRHI* FStreamlineCoreModule::GetStreamlineRHI()
{
return ::GetPlatformStreamlineRHI();
}
#undef LOCTEXT_NAMESPACE
IMPLEMENT_MODULE(FStreamlineCoreModule, StreamlineCore)

View File

@ -0,0 +1,59 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#pragma once
#include "CoreMinimal.h"
#include "Slate/SceneViewport.h"
#include "Framework/Application/SlateApplication.h"
#include "RenderGraphBuilder.h"
DECLARE_LOG_CATEGORY_EXTERN(LogStreamline, Verbose, All);
bool ShouldTagStreamlineBuffers();
bool ForceTagStreamlineBuffers();
bool NeedStreamlineViewIdOverride();
namespace sl
{
enum class Result;
}
namespace Streamline
{
enum class EStreamlineFeatureSupport;
}
Streamline::EStreamlineFeatureSupport TranslateStreamlineResult(sl::Result Result);
BEGIN_SHADER_PARAMETER_STRUCT(FSLSetStateShaderParameters, )
END_SHADER_PARAMETER_STRUCT()
template<typename StateOnRenderThreadLambda, typename RHIThreadLambda >
void AddStreamlineStateRenderPass(const TCHAR* FeatureName, FRDGBuilder& GraphBuilder, uint32 ViewID, const FIntRect& SecondaryViewRect, StateOnRenderThreadLambda&& StateOnRenderThread, RHIThreadLambda&& OnRHIThread)
{
FSLSetStateShaderParameters* PassParameters = GraphBuilder.AllocParameters<FSLSetStateShaderParameters>();
GraphBuilder.AddPass(
RDG_EVENT_NAME("Streamline %s State ViewID = % u", FeatureName, ViewID),
PassParameters,
ERDGPassFlags::Compute | ERDGPassFlags::Raster | ERDGPassFlags::SkipRenderPass | ERDGPassFlags::NeverCull,
[PassParameters, ViewID, SecondaryViewRect, &OnRHIThread, &StateOnRenderThread](FRHICommandListImmediate& RHICmdList) mutable
{
auto Options = StateOnRenderThread(ViewID, SecondaryViewRect);
RHICmdList.EnqueueLambda(
[ViewID, SecondaryViewRect, Options, &OnRHIThread](FRHICommandListImmediate& Cmd) mutable
{
OnRHIThread(Cmd, ViewID, SecondaryViewRect, Options);
});
});
}

View File

@ -0,0 +1,700 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#include "StreamlineDLSSG.h"
#include "StreamlineLatewarp.h"
#include "StreamlineCore.h"
#include "StreamlineShaders.h"
#include "StreamlineCorePrivate.h"
#include "StreamlineAPI.h"
#include "StreamlineRHI.h"
#include "StreamlineViewExtension.h"
#include "sl_helpers.h"
#include "sl_dlss_g.h"
#include "UIHintExtractionPass.h"
#include "CoreMinimal.h"
#include "Framework/Application/SlateApplication.h"
#include "RenderGraphBuilder.h"
#include "Runtime/Launch/Resources/Version.h"
#include "ScenePrivate.h"
#include "SystemTextures.h"
#include "HAL/PlatformApplicationMisc.h"
static FDelegateHandle OnPreRHIViewportCreateHandle;
static FDelegateHandle OnPostRHIViewportCreateHandle;
static FDelegateHandle OnSlateWindowDestroyedHandle;
static FDelegateHandle OnPreResizeWindowBackBufferHandle;
static FDelegateHandle OnPostResizeWindowBackBufferHandle;
static FDelegateHandle OnBackBufferReadyToPresentHandle;
static TAutoConsoleVariable<int32> CVarStreamlineDLSSGEnable(
TEXT("r.Streamline.DLSSG.Enable"),
0,
TEXT("DLSS-FG mode (default = 0)\n")
TEXT("0: off\n")
TEXT("1: always on\n")
TEXT("2: auto mode (on only when it helps)\n"),
ECVF_Default);
static TAutoConsoleVariable<int32> CVarStreamlineDLSSGAdjustMotionBlurTimeScale(
TEXT("r.Streamline.DLSSG.AdjustMotionBlurTimeScale"), 2,
TEXT("When DLSS-G is active, adjust the motion blur timescale based on the generated frames\n")
TEXT("0: disabled\n")
TEXT("1: enabled, not supporting auto mode\n")
TEXT("2: enabled, supporting auto mode by using last frame's actually presented frames (default)\n"),
ECVF_Default);
static TAutoConsoleVariable<bool> CVarStreamlineTagUIColorAlpha(
TEXT("r.Streamline.TagUIColorAlpha"),
true,
TEXT("Pass UI color and alpha into Streamline (default = true)\n"),
ECVF_RenderThreadSafe);
static TAutoConsoleVariable<bool> CVarStreamlineTagBackbuffer(
TEXT("r.Streamline.TagBackbuffer"),
true,
TEXT("Pass backbuffer extent into Streamline (default = true)\n"),
ECVF_RenderThreadSafe);
static TAutoConsoleVariable<float> CVarStreamlineTagUIColorAlphaThreshold(
TEXT("r.Streamline.TagUIColorAlphaThreshold"),
0.0,
TEXT("UI extraction pass alpha threshold value(default = 0.0) \n"),
ECVF_RenderThreadSafe);
static TAutoConsoleVariable<bool> CVarStreamlineEditorTagUIColorAlpha(
TEXT("r.Streamline.Editor.TagUIColorAlpha"),
false,
TEXT("Experimental: Pass UI color and alpha into Streamline in Editor PIE windows (default = false)\n"),
ECVF_RenderThreadSafe);
static TAutoConsoleVariable<bool> CVarStreamlineDLSSGCheckStatusPerFrame(
TEXT("r.Streamline.DLSSG.CheckStatusPerFrame"),
true,
TEXT("Check the DLSSG status at runtime and assert if it's failing somehow (default = true)\n"),
ECVF_RenderThreadSafe);
static TAutoConsoleVariable<bool> CVarStreamlineForceTagging(
TEXT("r.Streamline.ForceTagging"),
false,
TEXT("Force tagging Streamline resources even if they are not required based on active Streamline features (default = false)\n"),
ECVF_RenderThreadSafe);
static TAutoConsoleVariable<bool> CVarStreamlineFullScreenMenuDetection(
TEXT("r.Streamline.DLSSG.FullScreenMenuDetection"),
false,
TEXT("Automatically disable DLSS-FG if full screen menus are detected (default = false)\n"),
ECVF_RenderThreadSafe);
static TAutoConsoleVariable<int32> CVarStreamlineDLSSGDynamicResolutionMode(
TEXT("r.Streamline.DLSSG.DynamicResolutionMode"),
0,
TEXT("Experimental: Pass in sl::DLSSGFlags::eDynamicResolutionEnabled (default = 0)\n")
TEXT("0: off\n")
TEXT("1: on\n"),
ECVF_RenderThreadSafe);
static TAutoConsoleVariable<int32> CVarStreamlineDLSSGFramesToGenerate(
TEXT("r.Streamline.DLSSG.FramesToGenerate"),
1,
TEXT("Number of frames to generate (default = 1)\n")
TEXT("1..3: \n"),
ECVF_Default);
static int32 NumDLSSGInstances = 0;
bool ForceTagStreamlineBuffers()
{
static bool bStreamlineForceTagging = FParse::Param(FCommandLine::Get(), TEXT("slforcetagging"));
return bStreamlineForceTagging || CVarStreamlineForceTagging.GetValueOnAnyThread();
}
bool ShouldTagStreamlineBuffers()
{
return ForceTagStreamlineBuffers() || IsDLSSGActive() || IsLatewarpActive();
}
static void DLSSGAPIErrorCallBack(const sl::APIError& lastError)
{
FStreamlineCoreModule::GetStreamlineRHI()->APIErrorHandler(lastError);
}
// TODO template shenanigans to infer from TSharedPtr mode, to allow modifed UE4 with threadsafe shared pointers work automatically
constexpr bool AreSlateSharedPointersThreadSafe()
{
#if ENGINE_MAJOR_VERSION == 4
return false;
#else
return true;
#endif
}
static FIntRect GetViewportRect(SWindow& InWindow)
{
// During app shutdown, the window might not have a viewport anymore, so using SWindow::GetViewportSize() that handles that transparently.
FIntRect ViewportRect = FIntRect(FIntPoint::ZeroValue,InWindow.GetViewportSize().IntPoint());
if (AreSlateSharedPointersThreadSafe())
{
if (TSharedPtr<ISlateViewport> Viewport = InWindow.GetViewport())
{
if (TSharedPtr<SWidget> Widget = Viewport->GetWidget().Pin())
{
FGeometry Geom = Widget->GetPaintSpaceGeometry();
FIntPoint Min = { int32(Geom.GetAbsolutePosition().X),int32(Geom.GetAbsolutePosition().Y) };
FIntPoint Max = { int32((Geom.GetAbsolutePosition() + Geom.GetAbsoluteSize()).X),
int32((Geom.GetAbsolutePosition() + Geom.GetAbsoluteSize()).Y) };
ViewportRect = FIntRect(Min.X, Min.Y, Max.X, Max.Y);
}
}
}
else
{
// this is off by a bit in UE5 due to additional borders and editor UI scaling that's not present in UE4
// but we expect to run this only in UE4, if at all
const FSlateRect ClientRectInScreen = InWindow.GetClientRectInScreen();
const FSlateRect ClientRectInWindow = ClientRectInScreen.OffsetBy(-InWindow.GetPositionInScreen());
const FIntRect RectFromWindow = FIntRect(ClientRectInWindow.Left, ClientRectInWindow.Top, ClientRectInWindow.Right, ClientRectInWindow.Bottom);
ViewportRect = RectFromWindow;
}
return ViewportRect;
}
static void DLSSGOnBackBufferReadyToPresent(SWindow& InWindow, const FTextureRHIRef& InBackBuffer)
{
check(IsInRenderingThread());
const bool bIsGameWindow = InWindow.GetType() == EWindowType::GameWindow;
#if WITH_EDITOR
const bool bIsPIEWindow = GIsEditor && (InWindow.GetTitle().ToString().Contains(TEXT("Preview [NetMode:")));
#else
const bool bIsPIEWindow = false;
#endif
if (!(bIsGameWindow || bIsPIEWindow))
{
return;
}
// we need to "consume" the views for this backbuffer, even if we don't tag them
#if DEBUG_STREAMLINE_VIEW_TRACKING
FStreamlineViewExtension::LogTrackedViews(*FString::Printf(TEXT("%s Entry %s Backbuffer=%p"), ANSI_TO_TCHAR(__FUNCTION__), *CurrentThreadName(), InBackBuffer->GetTexture2D()));
#endif
TArray<FTrackedView>& TrackedViews = FStreamlineViewExtension::GetTrackedViews();
// the sceneview extension (via viewfamily) knows the texture it is getting rendered into.
// in game mode, this is the actual backbuffer (same as the argument to this callback)
// in the editor, this is a different, intermediate rendertarget (BufferedRT)
// so we need to handle either case to associate views to this backbuffer
FRHITexture* RealOrBufferedBackBuffer = InBackBuffer->GetTexture2D();
if (AreSlateSharedPointersThreadSafe())
{
if (TSharedPtr<ISlateViewport> Viewport = InWindow.GetViewport())
{
FSceneViewport* SceneViewport = static_cast<FSceneViewport*> (Viewport.Get());
const FTextureRHIRef& SceneViewPortRenderTarget = SceneViewport->GetRenderTargetTexture();
if (SceneViewPortRenderTarget.IsValid())
{
#if ENGINE_MAJOR_VERSION == 4
check(GIsEditor);
#else
// TODO: the following check asserts when taking a screenshot in game with F9.
// We should fix this properly, but it's Friday afternoon so I'm changing it to a non-fatal ensure for now
ensure(GIsEditor || (FRDGBuilder::IsDumpingFrame() && (InBackBuffer == SceneViewPortRenderTarget->GetTexture2D())));
#endif
RealOrBufferedBackBuffer = SceneViewPortRenderTarget->GetTexture2D();
}
}
else
{
check(!GIsEditor);
}
}
else
{
// this is not trivial/impossible to implement without getting the window/ rendertarget information from the gamethread
// this is OK in UE5 since by default we can talk to the gamethread from the renderthread here in a thread safe way
// but not in UE4
}
// Note: we cannot empty the array after we found the views for the current backbufffer since we get multiple present callbacks in case when we have multiple
// swapchains / windows so selectively removing those only for the current backbuffer still keeps those around for the next time we get the present callback for a different swapchain.
// This can happen in PIE mode with multiple active PIE windows
TArray<FTrackedView> ViewsInThisBackBuffer;
int32 ViewRectIndex = 0;
while (ViewRectIndex < TrackedViews.Num())
{
if (TrackedViews[ViewRectIndex].Texture->GetTexture2D() == RealOrBufferedBackBuffer)
{
ViewsInThisBackBuffer.Add(TrackedViews[ViewRectIndex]);
TrackedViews.RemoveAtSwap(ViewRectIndex);
}
else
{
++ViewRectIndex;
}
}
const static auto CVarStreamlineViewIndexToTag = IConsoleManager::Get().FindConsoleVariable(TEXT("r.Streamline.ViewIndexToTag"));
if (CVarStreamlineViewIndexToTag )
{
if (const int32 ViewIndexToTag = CVarStreamlineViewIndexToTag->GetInt() != -1)
{
for (int32 ViewIndex = 0; ViewIndex < ViewsInThisBackBuffer.Num(); ++ViewIndex)
{
if (ViewIndex == ViewIndexToTag)
{
const FTrackedView ViewToTrack = ViewsInThisBackBuffer[ViewIndex];
ViewsInThisBackBuffer.Empty();
ViewsInThisBackBuffer.Add(ViewToTrack);
break;
}
}
}
}
#if DEBUG_STREAMLINE_VIEW_TRACKING
if (FStreamlineViewExtension::DebugViewTracking())
{
const FString ViewRectString = FString::JoinBy(ViewsInThisBackBuffer, TEXT(", "), [](const FTrackedView& State)
{
return FString::FromInt(State.ViewKey);
}
);
UE_LOG(LogStreamline, Log, TEXT(" ViewsInThisBackBuffer=%s"), *ViewRectString);
FStreamlineViewExtension::LogTrackedViews(*FString::Printf(TEXT("%s Exit %s Backbuffer=%p "), ANSI_TO_TCHAR(__FUNCTION__), *CurrentThreadName(), InBackBuffer->GetTexture2D()));
}
#endif
if (!ShouldTagStreamlineBuffers())
{
return;
}
if (!ViewsInThisBackBuffer.Num())
{
return;
}
const bool bTagUIColorAlpha = ForceTagStreamlineBuffers() ||(GIsEditor ? CVarStreamlineEditorTagUIColorAlpha.GetValueOnRenderThread() : CVarStreamlineTagUIColorAlpha.GetValueOnRenderThread());
const bool bTagBackbuffer = ForceTagStreamlineBuffers() || (CVarStreamlineTagBackbuffer.GetValueOnRenderThread());
// TODO maybe add a helper function to add the RDG pass to tag a resource and use that everywhere
FRHICommandListImmediate& RHICmdList = FRHICommandListExecutor::GetImmediateCommandList();
FRDGBuilder GraphBuilder(RHICmdList);
FSLUIHintTagShaderParameters* PassParameters = GraphBuilder.AllocParameters<FSLUIHintTagShaderParameters>();
FStreamlineRHI* RHIExtensions = FStreamlineCoreModule::GetStreamlineRHI();
FIntPoint BackBufferDimension = { int32(InBackBuffer->GetTexture2D()->GetSizeX()), int32(InBackBuffer->GetTexture2D()->GetSizeY()) };
const FIntRect WindowClientAreaRect = GetViewportRect(InWindow);
// in PIE windows, the actual client area the scene gets rendered into is offset to make space
// for the window title bar and such.
// game mode (via -game or client configs) should have this to be 0
const FIntPoint ViewportOffsetInWindow = WindowClientAreaRect.Min;
// For multi view, we need to tag all off those. And be careful about lifetime of the UI buffer since that's only alive inside the RDG pass when we tag
// backbuffer is alive through present 🤞
for(FTrackedView& View : ViewsInThisBackBuffer)
{
// this is a bit weird, but we might end up having multiple view families of different number of views, but since we have only one cvar
// we need to be careful
View.UnscaledViewRect += ViewportOffsetInWindow;
}
#if DEBUG_STREAMLINE_VIEW_TRACKING
if (FStreamlineViewExtension::DebugViewTracking())
{
ensure(!WindowClientAreaRect.IsEmpty());
ensure(WindowClientAreaRect.Width() <= BackBufferDimension.X);
ensure(WindowClientAreaRect.Height() <= BackBufferDimension.Y);
ensure(WindowClientAreaRect.Min.X >= 0);
ensure(WindowClientAreaRect.Min.Y >= 0);
}
#endif
if(bTagBackbuffer)
{
PassParameters->BackBuffer = InBackBuffer;
}
if (bTagUIColorAlpha)
{
const float AlphaThreshold = CVarStreamlineTagUIColorAlphaThreshold.GetValueOnRenderThread();
FRDGTextureRef UIHintTexture = AddStreamlineUIHintExtractionPass(GraphBuilder, AlphaThreshold, InBackBuffer);
PassParameters->UIColorAndAlpha = UIHintTexture;
}
else
{
PassParameters->UIColorAndAlpha = nullptr;
}
AddStreamlineUIHintTagPass(GraphBuilder, bTagBackbuffer, bTagUIColorAlpha, BackBufferDimension, PassParameters, 0, RHIExtensions, ViewsInThisBackBuffer, WindowClientAreaRect, NeedStreamlineViewIdOverride());
}
void RegisterStreamlineDLSSGHooks(FStreamlineRHI* InStreamlineRHI)
{
UE_LOG(LogStreamline, Log, TEXT("%s Enter"), ANSI_TO_TCHAR(__FUNCTION__));
check(ShouldTagStreamlineBuffers() || IsStreamlineDLSSGSupported());
{
check(FSlateApplication::IsInitialized());
FSlateRenderer* SlateRenderer = FSlateApplication::Get().GetRenderer();
OnBackBufferReadyToPresentHandle = SlateRenderer->OnBackBufferReadyToPresent().AddStatic(&DLSSGOnBackBufferReadyToPresent);
// ShutdownModule is too late for this
FSlateApplication::Get().OnPreShutdown().AddLambda(
[]()
{
UE_LOG(LogStreamline, Log, TEXT("Unregistering of OnBackBufferReadyToPresent callback during FSlateApplication::OnPreShutdown"));
FSlateRenderer* SlateRenderer = FSlateApplication::Get().GetRenderer();
check(SlateRenderer);
SlateRenderer->OnBackBufferReadyToPresent().Remove(OnBackBufferReadyToPresentHandle);
}
);
}
UE_LOG(LogStreamline, Log, TEXT("%s Leave"), ANSI_TO_TCHAR(__FUNCTION__));
}
void UnregisterStreamlineDLSSGHooks()
{
// see FSlateApplication::OnPreShutdown lambda in RegisterStreamlineDLSSGHooks
}
static Streamline::EStreamlineFeatureSupport GStreamlineDLSSGSupport = Streamline::EStreamlineFeatureSupport::NotSupported;
// this is currently unreliable so
#define WITH_DLSS_FG_VRAM_ESTIMATE 0
namespace
{
float GLastDLSSGFrameRate = 0.0f;
int32 GLastDLSSGFramesPresented = 0;
#if WITH_DLSS_FG_VRAM_ESTIMATE
float GLastDLSSGVRAMEstimate = 0;
#endif
int32 GDLSSGMinWidthOrHeight = 0;
int32 GDLSSGMinGeneratedFrames = 0;
int32 GDLSSGMaxGeneratedFrames = 0;
}
STREAMLINECORE_API Streamline::EStreamlineFeatureSupport QueryStreamlineDLSSGSupport()
{
static bool bStreamlineDLSSGSupportedInitialized = false;
if (!bStreamlineDLSSGSupportedInitialized)
{
if (!FApp::CanEverRender( ))
{
GStreamlineDLSSGSupport = Streamline::EStreamlineFeatureSupport::NotSupported;
}
else if (!IsRHIDeviceNVIDIA())
{
GStreamlineDLSSGSupport = Streamline::EStreamlineFeatureSupport::NotSupportedIncompatibleHardware;
}
else if(!IsStreamlineSupported())
{
GStreamlineDLSSGSupport = Streamline::EStreamlineFeatureSupport::NotSupported;
}
else
{
FStreamlineRHI* StreamlineRHI = GetPlatformStreamlineRHI();
if (StreamlineRHI->IsDLSSGSupportedByRHI())
{
const sl::Feature Feature = sl::kFeatureDLSS_G;
sl::Result SupportedResult = SLisFeatureSupported(Feature, *StreamlineRHI->GetAdapterInfo());
LogStreamlineFeatureSupport(Feature, *StreamlineRHI->GetAdapterInfo());
GStreamlineDLSSGSupport = TranslateStreamlineResult(SupportedResult);
}
else
{
GStreamlineDLSSGSupport = Streamline::EStreamlineFeatureSupport::NotSupportedIncompatibleRHI;
}
}
// setting this to true here so we don't recurse when we call GetDLSSGStatusFromStreamline, which calls us
bStreamlineDLSSGSupportedInitialized = true;
if (Streamline::EStreamlineFeatureSupport::Supported == GStreamlineDLSSGSupport)
{
// to get the min suppported width/height as well as geerated frames range
GetDLSSGStatusFromStreamline(true);
}
}
return GStreamlineDLSSGSupport;
}
bool IsStreamlineDLSSGSupported()
{
return Streamline::EStreamlineFeatureSupport::Supported == QueryStreamlineDLSSGSupport();
}
static sl::DLSSGMode SLDLSSGModeFromCvar()
{
static_assert(uint32_t(sl::DLSSGMode::eCount) == 3U, "sl::DLSSGMode enum value mismatch. Dear NVIDIA Streamline plugin developer, please update this code!");
int32 DLSSGMode = CVarStreamlineDLSSGEnable.GetValueOnAnyThread();
switch (DLSSGMode)
{
case 0:
return sl::DLSSGMode::eOff;
case 1:
return sl::DLSSGMode::eOn;
case 2:
return sl::DLSSGMode::eAuto;
default:
UE_LOG(LogStreamline, Error, TEXT("Invalid r.Streamline.DLSSG.Enable value %d"), DLSSGMode);
return sl::DLSSGMode::eOff;
}
}
bool IsDLSSGActive()
{
if (!IsStreamlineDLSSGSupported())
{
return false;
}
else
{
return SLDLSSGModeFromCvar() != sl::DLSSGMode::eOff ? true : false;
}
}
int32 GetStreamlineDLSSGNumFramesToGenerate()
{
//return 1;
// TODO clamp by runtime query of min/max
return FMath::Clamp(CVarStreamlineDLSSGFramesToGenerate.GetValueOnAnyThread(), GDLSSGMinGeneratedFrames, GDLSSGMaxGeneratedFrames);
}
void GetStreamlineDLSSGMinMaxGeneratedFrames(int32& MinGeneratedFrames, int32& MaxGeneratedFrames)
{
MinGeneratedFrames = GDLSSGMinGeneratedFrames;
MaxGeneratedFrames = GDLSSGMaxGeneratedFrames;
}
DECLARE_STATS_GROUP(TEXT("DLSS-G"), STATGROUP_DLSSG, STATCAT_Advanced);
DECLARE_DWORD_COUNTER_STAT(TEXT("DLSS-G: Frames Presented"), STAT_DLSSGFramesPresented, STATGROUP_DLSSG);
DECLARE_FLOAT_COUNTER_STAT(TEXT("DLSS-G: Average FPS"), STAT_DLSSGAverageFPS, STATGROUP_DLSSG);
#if WITH_DLSS_FG_VRAM_ESTIMATE
DECLARE_FLOAT_COUNTER_STAT(TEXT("DLSS-G: VRAM Estimate (MiB)"), STAT_DLSSGVRAMEstimate, STATGROUP_DLSSG);
#endif
DECLARE_DWORD_COUNTER_STAT(TEXT("DLSS-G: Minimum Width or Height "), STAT_DLSSGMinWidthOrHeight, STATGROUP_DLSSG);
DECLARE_DWORD_COUNTER_STAT(TEXT("DLSS-G: Minimum Number of Generated Frames "), STAT_DLSSGMinGeneratedFrames, STATGROUP_DLSSG);
DECLARE_DWORD_COUNTER_STAT(TEXT("DLSS-G: Maximum Number of Generated Frames "), STAT_DLSSGMaxGeneratedFrames, STATGROUP_DLSSG);
namespace sl
{
inline const char* getDLSSGStatusAsStr(DLSSGStatus v)
{
switch (v)
{
SL_CASE_STR(DLSSGStatus::eOk);
SL_CASE_STR(DLSSGStatus::eFailResolutionTooLow);
SL_CASE_STR(DLSSGStatus::eFailReflexNotDetectedAtRuntime);
SL_CASE_STR(DLSSGStatus::eFailHDRFormatNotSupported);
SL_CASE_STR(DLSSGStatus::eFailCommonConstantsInvalid);
SL_CASE_STR(DLSSGStatus::eFailGetCurrentBackBufferIndexNotCalled);
};
return "Unknown";
}
}
void GetDLSSGStatusFromStreamline(bool bQueryOncePerAppLifetimeValues)
{
extern ENGINE_API float GAverageFPS;
GLastDLSSGFrameRate = GAverageFPS;
GLastDLSSGFramesPresented = 1;
#if WITH_DLSS_FG_VRAM_ESTIMATE
GLastDLSSGVRAMEstimate = 0;
#endif
if (bQueryOncePerAppLifetimeValues)
{
GDLSSGMinGeneratedFrames = 0;
GDLSSGMaxGeneratedFrames = 0;
GDLSSGMinWidthOrHeight = 0;
}
if (IsStreamlineDLSSGSupported())
{
// INSERT AWKWARD MUPPET FACE HERE
// below we disable FG if we are using actual view ids, see SetStreamlineDLSSGState
//checkf(CVarStreamlineViewIdOverride && CVarStreamlineViewIdOverride->GetInt() != 0, TEXT("r.Streamline.ViewIdOverride must be set to 1 since DLSS-G only supports a single viewport."));
sl::ViewportHandle Viewport(0);
sl::DLSSGState State;
sl::DLSSGOptions StreamlineConstantsDLSSG;
#if WITH_DLSS_FG_VRAM_ESTIMATE
StreamlineConstantsDLSSG.flags = sl::DLSSGFlags::eRequestVRAMEstimate;
#endif
StreamlineConstantsDLSSG.mode = (!NeedStreamlineViewIdOverride()) ? SLDLSSGModeFromCvar() : sl::DLSSGMode::eOff;
// TODO incorporate the checks (foreground, viewport large enough) from SetStreamlineDLSSGState
StreamlineConstantsDLSSG.numFramesToGenerate = GetStreamlineDLSSGNumFramesToGenerate();
CALL_SL_FEATURE_FN(sl::kFeatureDLSS_G, slDLSSGGetState, Viewport, State, &StreamlineConstantsDLSSG);
GLastDLSSGFramesPresented = State.numFramesActuallyPresented;
SET_DWORD_STAT(STAT_DLSSGFramesPresented, GLastDLSSGFramesPresented);
GLastDLSSGFrameRate = GAverageFPS * GLastDLSSGFramesPresented;
SET_FLOAT_STAT(STAT_DLSSGAverageFPS, GLastDLSSGFrameRate);
#if WITH_DLSS_FG_VRAM_ESTIMATE
GLastDLSSGVRAMEstimate = float(State.estimatedVRAMUsageInBytes) / (1024 * 1024);
SET_FLOAT_STAT(STAT_DLSSGVRAMEstimate, GLastDLSSGVRAMEstimate);
#endif
if (bQueryOncePerAppLifetimeValues)
{
GDLSSGMinWidthOrHeight = State.minWidthOrHeight;
SET_DWORD_STAT(STAT_DLSSGMinWidthOrHeight, GDLSSGMinWidthOrHeight);
GDLSSGMinGeneratedFrames = 1;/* That is if FG is supported we generate 1 frame*/
SET_DWORD_STAT(STAT_DLSSGMinGeneratedFrames, GDLSSGMinGeneratedFrames);
GDLSSGMaxGeneratedFrames = State.numFramesToGenerateMax; /* State.maxNumGeneratedFrames this needs an SDK Update*/;
SET_DWORD_STAT(STAT_DLSSGMaxGeneratedFrames, GDLSSGMaxGeneratedFrames);
}
#if DO_CHECK
if (CVarStreamlineDLSSGCheckStatusPerFrame.GetValueOnAnyThread())
{
checkf(State.status == sl::DLSSGStatus::eOk, TEXT("DLSS-FG failed at runtime with %s (%d). This runtime check can be disabled with the r.Streamline.DLSSG.CheckStatusPerFrame console variable"),
ANSI_TO_TCHAR(sl::getDLSSGStatusAsStr(State.status)), State.status);
}
#endif
}
}
STREAMLINECORE_API void GetStreamlineDLSSGFrameTiming(float& FrameRateInHertz, int32& FramesPresented)
{
FrameRateInHertz = GLastDLSSGFrameRate;
FramesPresented = GLastDLSSGFramesPresented;
}
void AddStreamlineDLSSGStateRenderPass(FRDGBuilder& GraphBuilder, uint32 ViewID, const FIntRect& SecondaryViewRect)
{
AddStreamlineStateRenderPass (TEXT("DLSS-G"), GraphBuilder, ViewID, SecondaryViewRect,
// this lambda computes the SL options struct based on cvars and other state
[] (uint32 ViewID, const FIntRect & SecondaryViewRect) ->sl::DLSSGOptions
{
// the callsite is expcted to not call this, so we don't need to if bail out here
check(IsStreamlineDLSSGSupported());
check(IsInRenderingThread());
sl::DLSSGOptions SLConstants;
SLConstants.onErrorCallback = DLSSGAPIErrorCallBack;
#if (ENGINE_MAJOR_VERSION == 4)
const bool bIsForeground = FApp::HasVRFocus() || FApp::IsBenchmarking() || FPlatformApplicationMisc::IsThisApplicationForeground();
#else
const bool bIsForeground = FApp::HasFocus();
#endif
const bool bIsLargeEnough = FMath::Min(SecondaryViewRect.Width(), SecondaryViewRect.Height()) >= GDLSSGMinWidthOrHeight;
SLConstants.mode = (bIsForeground && bIsLargeEnough) ? SLDLSSGModeFromCvar() : sl::DLSSGMode::eOff;
if (CVarStreamlineFullScreenMenuDetection.GetValueOnRenderThread() != 0)
{
EnumAddFlags(SLConstants.flags, sl::DLSSGFlags::eEnableFullscreenMenuDetection);
}
if (CVarStreamlineDLSSGDynamicResolutionMode.GetValueOnRenderThread() != 0)
{
EnumAddFlags(SLConstants.flags, sl::DLSSGFlags::eDynamicResolutionEnabled);
}
SLConstants.numFramesToGenerate = GetStreamlineDLSSGNumFramesToGenerate();
return SLConstants;
},
// this lambda is only here since templating the function pointer and functin name and such below is inconvenient
[](FRHICommandListImmediate& RHICmdList, uint32 ViewID, const FIntRect& SecondaryViewRect, const sl::DLSSGOptions& Options)
{
CALL_SL_FEATURE_FN(sl::kFeatureDLSS_G, slDLSSGSetOptions, sl::ViewportHandle(ViewID), Options);
}
);
}
void BeginRenderViewFamilyDLSSG(FSceneViewFamily& InViewFamily)
{
if(IsDLSSGActive() && CVarStreamlineDLSSGAdjustMotionBlurTimeScale.GetValueOnAnyThread() && InViewFamily.Views.Num())
{
// this is 1 when FG is off (or auto modes turns it off)
const int32 PresentedFrames = CVarStreamlineDLSSGAdjustMotionBlurTimeScale.GetValueOnAnyThread() == 2 ? FMath::Max(1, GLastDLSSGFramesPresented) : 1 + GetStreamlineDLSSGNumFramesToGenerate();
const float TimeScaleCorrection = 1.0f / float(PresentedFrames);
for (int32 ViewIndex = 0; ViewIndex < InViewFamily.Views.Num(); ++ViewIndex)
{
if (FSceneViewStateInterface* ViewStateInterface = InViewFamily.Views[ViewIndex]->State)
{
// The things we do to avoid engine changes ...
FSceneViewState* ViewState = static_cast<FSceneViewState*>(ViewStateInterface);
float& MotionBlurTimeScale = ViewState->MotionBlurTimeScale;
float& MotionBlurTargetDeltaTime = ViewState->MotionBlurTargetDeltaTime;
MotionBlurTimeScale *= TimeScaleCorrection;
MotionBlurTargetDeltaTime *= TimeScaleCorrection;
}
}
}
}

View File

@ -0,0 +1,244 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#include "StreamlineDeepDVC.h"
#include "StreamlineCore.h"
#include "StreamlineCorePrivate.h"
#include "StreamlineAPI.h"
#include "StreamlineRHI.h"
#include "sl_helpers.h"
#include "sl_deepdvc.h"
#include "UIHintExtractionPass.h"
#include "CoreMinimal.h"
#include "Framework/Application/SlateApplication.h"
#include "RenderGraphBuilder.h"
#include "Runtime/Launch/Resources/Version.h"
#include "ScenePrivate.h"
#include "SystemTextures.h"
#include "HAL/PlatformApplicationMisc.h"
static TAutoConsoleVariable<int32> CVarStreamlineDeepDVCEnable(
TEXT("r.Streamline.DeepDVC.Enable"),
0,
TEXT("DeepDVC mode (default = 0)\n")
TEXT("0: off\n")
TEXT("1: always on\n"),
ECVF_Default);
static TAutoConsoleVariable<float> CVarStreamlineDeepDVCIntensity(
TEXT("r.Streamline.DeepDVC.Intensity"),
0.5,
TEXT("DeepDVC Intensity (default = 0.5, range [0..1])\n")
TEXT("Controls how strong or subtle the filter effect will be on an image.\n")
TEXT("A low intensity will keep the images closer to the original, while a high intensity will make the filter effect more pronounced.\n")
TEXT("Note: '0' disables DeepDVC implicitely\n"),
ECVF_Default);
static TAutoConsoleVariable<float> CVarStreamlineDeepDVCSaturationBoost(
TEXT("r.Streamline.DeepDVC.SaturationBoost"),
0.5,
TEXT("DeepDVC SaturationBoost(default = 0.5) [0..1]\n")
TEXT("Enhances the colors in them image, making them more vibrant and eye-catching.\n")
TEXT("This setting will only be active if r.Streamline.DeepDVC.Intensity is relatively high. Once active, colors pop up more, making the image look more lively.\n")
TEXT("Note: Applied only when r.Streamline.DeepDVC.Intensity > 0\n"),
ECVF_Default);
static Streamline::EStreamlineFeatureSupport GStreamlineDeepDVCSupport = Streamline::EStreamlineFeatureSupport::NotSupported;
namespace
{
float GLastDeepDVCVRAMEstimate = 0;
}
STREAMLINECORE_API Streamline::EStreamlineFeatureSupport QueryStreamlineDeepDVCSupport()
{
static bool bStreamlineDeepDVCSupportedInitialized = false;
if (!bStreamlineDeepDVCSupportedInitialized)
{
if (!FApp::CanEverRender( ))
{
GStreamlineDeepDVCSupport = Streamline::EStreamlineFeatureSupport::NotSupported;
}
else if (!IsRHIDeviceNVIDIA())
{
GStreamlineDeepDVCSupport = Streamline::EStreamlineFeatureSupport::NotSupportedIncompatibleHardware;
}
else if(!IsStreamlineSupported())
{
GStreamlineDeepDVCSupport = Streamline::EStreamlineFeatureSupport::NotSupported;
}
else
{
FStreamlineRHI* StreamlineRHI = GetPlatformStreamlineRHI();
if (StreamlineRHI->IsDeepDVCSupportedByRHI())
{
const sl::Feature Feature = sl::kFeatureDeepDVC;
sl::Result SupportedResult = SLisFeatureSupported(Feature, *StreamlineRHI->GetAdapterInfo());
LogStreamlineFeatureSupport(Feature, *StreamlineRHI->GetAdapterInfo());
GStreamlineDeepDVCSupport = TranslateStreamlineResult(SupportedResult);
}
else
{
GStreamlineDeepDVCSupport = Streamline::EStreamlineFeatureSupport::NotSupportedIncompatibleRHI;
}
}
// setting this to true here so we don't recurse when we call GetDeepDVCStatusFromStreamline, which calls us
bStreamlineDeepDVCSupportedInitialized = true;
if (Streamline::EStreamlineFeatureSupport::Supported == GStreamlineDeepDVCSupport)
{
// to get the min suppported width/height
GetDeepDVCStatusFromStreamline();
}
}
return GStreamlineDeepDVCSupport;
}
bool IsStreamlineDeepDVCSupported()
{
return Streamline::EStreamlineFeatureSupport::Supported == QueryStreamlineDeepDVCSupport();
}
static sl::DeepDVCMode SLDeepDVCModeFromCvar()
{
int32 DeepDVCMode = CVarStreamlineDeepDVCEnable.GetValueOnAnyThread();
switch (DeepDVCMode)
{
case 0:
return sl::DeepDVCMode::eOff;
case 1:
return sl::DeepDVCMode::eOn;
default:
UE_LOG(LogStreamline, Error, TEXT("Invalid r.Streamline.DeepDVC.Enable value %d"), DeepDVCMode);
return sl::DeepDVCMode::eOff;
}
}
bool IsDeepDVCActive()
{
if (!IsStreamlineDeepDVCSupported())
{
return false;
}
else
{
return SLDeepDVCModeFromCvar() != sl::DeepDVCMode::eOff ? true : false;
}
}
DECLARE_STATS_GROUP(TEXT("DeepDVC"), STATGROUP_DeepDVC, STATCAT_Advanced);
DECLARE_FLOAT_COUNTER_STAT(TEXT("DeepDVC: VRAM Estimate (MiB)"), STAT_DeepDVCVRAMEstimate, STATGROUP_DeepDVC);
void GetDeepDVCStatusFromStreamline()
{
GLastDeepDVCVRAMEstimate = 0;
if (IsStreamlineDeepDVCSupported())
{
// INSERT AWKWARD MUPPET FACE HERE
// static const auto CVarStreamlineViewIdOverride = IConsoleManager::Get().FindConsoleVariable(TEXT("r.Streamline.ViewIdOverride"));
//checkf(CVarStreamlineViewIdOverride && CVarStreamlineViewIdOverride->GetInt() != 0, TEXT("r.Streamline.ViewIdOverride must be set to 1 since DeepDVC only supports a single viewport."));
sl::ViewportHandle Viewport(0);
sl::DeepDVCState State;
sl::DeepDVCOptions StreamlineConstantsDeepDVC;
StreamlineConstantsDeepDVC.mode = SLDeepDVCModeFromCvar();
CALL_SL_FEATURE_FN(sl::kFeatureDeepDVC, slDeepDVCGetState, Viewport, State);
GLastDeepDVCVRAMEstimate = float(State.estimatedVRAMUsageInBytes) / (1024 * 1024);
SET_FLOAT_STAT(STAT_DeepDVCVRAMEstimate, GLastDeepDVCVRAMEstimate);
}
}
namespace
{
BEGIN_SHADER_PARAMETER_STRUCT(FSLDeepDVCShaderParameters, )
SHADER_PARAMETER_RDG_TEXTURE(Texture2D, SceneColorWithoutHUD)
// Fake output to trigger pass execution
#if (ENGINE_MAJOR_VERSION == 4) && (ENGINE_MINOR_VERSION == 25)
SHADER_PARAMETER_RDG_TEXTURE(Texture2D, SceneColorAfterTonemap)
#else
SHADER_PARAMETER_RDG_TEXTURE(Texture2D, RenderPassTriggerDummy)
#endif
END_SHADER_PARAMETER_STRUCT()
}
void AddStreamlineDeepDVCStateRenderPass(FRDGBuilder& GraphBuilder, uint32 ViewID, const FIntRect& SecondaryViewRect)
{
AddStreamlineStateRenderPass(TEXT("DeepDVC"), GraphBuilder, ViewID, SecondaryViewRect,
// this lambda computes the SL options struct based on cvars and other state
[](uint32 ViewID, const FIntRect& SecondaryViewRect) ->sl::DeepDVCOptions
{
// the callsite is expcted to not call this, so we don't need to if bail out here
check(IsStreamlineDeepDVCSupported());
check(IsInRenderingThread());
sl::DeepDVCOptions SLConstants;
SLConstants.mode = SLDeepDVCModeFromCvar();
SLConstants.intensity = CVarStreamlineDeepDVCIntensity.GetValueOnRenderThread();
SLConstants.saturationBoost = CVarStreamlineDeepDVCSaturationBoost.GetValueOnRenderThread();
return SLConstants;
},
// this lambda is only here since templating the function pointer and functin name and such below is inconvenient
[](FRHICommandListImmediate& RHICmdList, uint32 ViewID, const FIntRect& SecondaryViewRect, const sl::DeepDVCOptions& Options)
{
CALL_SL_FEATURE_FN(sl::kFeatureDeepDVC, slDeepDVCSetOptions, sl::ViewportHandle(ViewID), Options);
}
);
}
void AddStreamlineDeepDVCEvaluateRenderPass(FStreamlineRHI* StreamlineRHIExtensions, FRDGBuilder& GraphBuilder, uint32 ViewID, const FIntRect& SecondaryViewRect, FRDGTextureRef SLSceneColorWithoutHUD)
{
FSLDeepDVCShaderParameters* PassParameters = GraphBuilder.AllocParameters<FSLDeepDVCShaderParameters>();
PassParameters->SceneColorWithoutHUD = SLSceneColorWithoutHUD;
GraphBuilder.AddPass(
RDG_EVENT_NAME("Streamline DeepDVC Evaluate ViewID=%u", ViewID),
PassParameters,
#if (ENGINE_MAJOR_VERSION == 4) && (ENGINE_MINOR_VERSION == 25)
ERDGPassFlags::Compute,
#else
ERDGPassFlags::Compute | ERDGPassFlags::Raster | ERDGPassFlags::SkipRenderPass | ERDGPassFlags::NeverCull,
#endif
[StreamlineRHIExtensions, PassParameters, ViewID, SecondaryViewRect](FRHICommandListImmediate& RHICmdList) mutable
{
check(PassParameters->SceneColorWithoutHUD);
PassParameters->SceneColorWithoutHUD->MarkResourceAsUsed();
FRHITexture* DeepDVCInputOutput = PassParameters->SceneColorWithoutHUD->GetRHI();
RHICmdList.EnqueueLambda(
[StreamlineRHIExtensions, DeepDVCInputOutput, ViewID, SecondaryViewRect](FRHICommandListImmediate& Cmd) mutable
{
sl::FrameToken* FrameToken = FStreamlineCoreModule::GetStreamlineRHI()->GetFrameToken(GFrameCounter);
FRHIStreamlineResource DeeDVCResource{ DeepDVCInputOutput , SecondaryViewRect, EStreamlineResource::ScalingOutputColor};
StreamlineRHIExtensions->StreamlineEvaluateDeepDVC(Cmd, DeeDVCResource, FrameToken, ViewID);
});
});
}

View File

@ -0,0 +1,284 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#include "StreamlineLatewarp.h"
#include "StreamlineCore.h"
#include "StreamlineCorePrivate.h"
#include "StreamlineShaders.h"
#include "StreamlineViewExtension.h"
#include "StreamlineAPI.h"
#include "StreamlineRHI.h"
#include "UIHintExtractionPass.h"
#include "CoreMinimal.h"
#include "RenderGraphBuilder.h"
#include "Runtime/Launch/Resources/Version.h"
#include "SystemTextures.h"
#include "SceneTextureParameters.h"
#include "PostProcess/PostProcessMaterial.h"
#include "sl_helpers.h"
#include "sl_latewarp.h"
static int32 NumLatewarpInstances = 0;
static FDelegateHandle LatewarpOnBackBufferReadyToPresentHandle;
static TAutoConsoleVariable<int32> CVarLatewarpEnable(
TEXT("r.Streamline.Latewarp.Enable"), 0,
TEXT("Enable/disable Latewarp (default = 1)\n"),
ECVF_Default);
DEFINE_LOG_CATEGORY_STATIC(LogStreamlineLatewarp, Log, All);
DECLARE_GPU_STAT(Latewarp)
static FIntRect LatewarpGetViewportRect(SWindow& InWindow)
{
// During app shutdown, the window might not have a viewport anymore, so using SWindow::GetViewportSize() that handles that transparently.
FIntRect ViewportRect = FIntRect(FIntPoint::ZeroValue,InWindow.GetViewportSize().IntPoint());
if (TSharedPtr<ISlateViewport> Viewport = InWindow.GetViewport())
{
if (TSharedPtr<SWidget> Widget = Viewport->GetWidget().Pin())
{
FGeometry Geom = Widget->GetPaintSpaceGeometry();
FIntPoint Min = { int32(Geom.GetAbsolutePosition().X),int32(Geom.GetAbsolutePosition().Y) };
FIntPoint Max = { int32((Geom.GetAbsolutePosition() + Geom.GetAbsoluteSize()).X),
int32((Geom.GetAbsolutePosition() + Geom.GetAbsoluteSize()).Y) };
ViewportRect = FIntRect(Min.X, Min.Y, Max.X, Max.Y);
}
}
return ViewportRect;
}
static void LatewarpOnBackBufferReadyToPresent(SWindow& InWindow, const FTexture2DRHIRef& InBackBuffer)
{
check(IsInRenderingThread());
const bool bIsGameWindow = InWindow.GetType() == EWindowType::GameWindow;
#if WITH_EDITOR
const bool bIsPIEWindow = GIsEditor && (InWindow.GetTitle().ToString().Contains(TEXT("Preview [NetMode:")));
#else
const bool bIsPIEWindow = false;
#endif
if (!(bIsGameWindow || bIsPIEWindow))
{
return;
}
// TODO maybe add a helper function to add the RDG pass to tag a resource and use that everywhere
FRHICommandListImmediate& RHICmdList = FRHICommandListExecutor::GetImmediateCommandList();
FRDGBuilder GraphBuilder(RHICmdList);
FSLUIHintTagShaderParameters* PassParameters = GraphBuilder.AllocParameters<FSLUIHintTagShaderParameters>();
const float AlphaThreshold= 0.0f;
FRDGTextureRef UIHintTexture = AddStreamlineUIHintExtractionPass(GraphBuilder, AlphaThreshold, InBackBuffer);
PassParameters->UIColorAndAlpha = UIHintTexture;
PassParameters->BackBuffer = InBackBuffer;
FIntPoint BackBufferDimension = { int32(InBackBuffer->GetTexture2D()->GetSizeX()), int32(InBackBuffer->GetTexture2D()->GetSizeY()) };
FStreamlineRHI* RHIExtensions = FStreamlineCoreModule::GetStreamlineRHI();
TArray<FTrackedView>& TrackedViews = FStreamlineViewExtension::GetTrackedViews();
TArray<FTrackedView> ViewsInThisBackBuffer;
int32 ViewRectIndex = 0;
FRHITexture2D* RealOrBufferedBackBuffer = InBackBuffer->GetTexture2D();
while (ViewRectIndex < TrackedViews.Num())
{
if (TrackedViews[ViewRectIndex].Texture->GetTexture2D() == RealOrBufferedBackBuffer)
{
ViewsInThisBackBuffer.Add(TrackedViews[ViewRectIndex]);
TrackedViews.RemoveAtSwap(ViewRectIndex);
}
else
{
++ViewRectIndex;
}
}
const FIntRect WindowClientAreaRect = LatewarpGetViewportRect(InWindow);
AddStreamlineUIHintTagPass(GraphBuilder, true, true, BackBufferDimension, PassParameters, 0, RHIExtensions, ViewsInThisBackBuffer, WindowClientAreaRect, true);
}
void RegisterStreamlineLatewarpHooks(FStreamlineRHI* InStreamlineRHI)
{
UE_LOG(LogStreamline, Log, TEXT("%s Enter"), ANSI_TO_TCHAR(__FUNCTION__));
check(ShouldTagStreamlineBuffers() || IsStreamlineLatewarpSupported());
{
check(FSlateApplication::IsInitialized());
FSlateRenderer* SlateRenderer = FSlateApplication::Get().GetRenderer();
LatewarpOnBackBufferReadyToPresentHandle = SlateRenderer->OnBackBufferReadyToPresent().AddStatic(&LatewarpOnBackBufferReadyToPresent);
// ShutdownModule is too late for this
FSlateApplication::Get().OnPreShutdown().AddLambda(
[]()
{
UE_LOG(LogStreamline, Log, TEXT("Unregistering of OnBackBufferReadyToPresent callback during FSlateApplication::OnPreShutdown"));
FSlateRenderer* SlateRenderer = FSlateApplication::Get().GetRenderer();
check(SlateRenderer);
SlateRenderer->OnBackBufferReadyToPresent().Remove(LatewarpOnBackBufferReadyToPresentHandle);
}
);
}
UE_LOG(LogStreamline, Log, TEXT("%s Leave"), ANSI_TO_TCHAR(__FUNCTION__));
}
void UnregisterStreamlineLatewarpHooks()
{
// see FSlateApplication::OnPreShutdown lambda in RegisterStreamlineDLSSGHooks
}
static bool IsStreamlineLatewarpSupportedInternal()
{
static bool bStreamlineLatewarpSupportedInitialized = false;
static bool bStreamlineLatewarpSupported = false;
if (!bStreamlineLatewarpSupportedInitialized)
{
FStreamlineRHI* StreamlineRHI = GetPlatformStreamlineRHI();
sl::Result Result = SLisFeatureSupported(sl::kFeatureLatewarp, *StreamlineRHI->GetAdapterInfo());
bStreamlineLatewarpSupported = (Result == sl::Result::eOk);
bStreamlineLatewarpSupportedInitialized = true;
}
return bStreamlineLatewarpSupported;
}
static Streamline::EStreamlineFeatureSupport GStreamlineLatewarpSupport = Streamline::EStreamlineFeatureSupport::NotSupported;
STREAMLINECORE_API Streamline::EStreamlineFeatureSupport QueryStreamlineLatewarpSupport()
{
static bool bStreamlineLatewarpSupportedInitialized = false;
if (!bStreamlineLatewarpSupportedInitialized)
{
if (!FApp::CanEverRender( ))
{
GStreamlineLatewarpSupport = Streamline::EStreamlineFeatureSupport::NotSupported;
}
else if (!IsRHIDeviceNVIDIA())
{
GStreamlineLatewarpSupport = Streamline::EStreamlineFeatureSupport::NotSupportedIncompatibleHardware;
}
else if(!IsStreamlineSupported())
{
GStreamlineLatewarpSupport = Streamline::EStreamlineFeatureSupport::NotSupported;
}
else
{
FStreamlineRHI* StreamlineRHI = GetPlatformStreamlineRHI();
if (StreamlineRHI->IsLatewarpSupportedByRHI())
{
const sl::Feature Feature = sl::kFeatureLatewarp;
sl::Result SupportedResult = SLisFeatureSupported(Feature, *StreamlineRHI->GetAdapterInfo());
LogStreamlineFeatureSupport(Feature, *StreamlineRHI->GetAdapterInfo());
GStreamlineLatewarpSupport = TranslateStreamlineResult(SupportedResult);
}
else
{
GStreamlineLatewarpSupport = Streamline::EStreamlineFeatureSupport::NotSupportedIncompatibleRHI;
}
}
// setting this to true here so we don't recurse when we call GetLatewarpStatusFromStreamline, which calls us
bStreamlineLatewarpSupportedInitialized = true;
}
return GStreamlineLatewarpSupport;
}
bool IsStreamlineLatewarpSupported()
{
if (!FApp::CanEverRender())
{
return false;
}
if (!IsRHIDeviceNVIDIA())
{
return false;
}
if (!IsStreamlineSupported())
{
return false;
}
if (GIsEditor)
{
return false;
}
return IsStreamlineLatewarpSupportedInternal();
}
bool IsLatewarpActive()
{
if (!IsStreamlineLatewarpSupportedInternal())
{
return false;
}
else
{
return CVarLatewarpEnable.GetValueOnAnyThread() != 0;
}
}
void AddStreamlineLatewarpStateRenderPass(FRDGBuilder& GraphBuilder, uint32 ViewID, const FIntRect& SecondaryViewRect)
{
AddStreamlineStateRenderPass(TEXT("Latewarp"), GraphBuilder, ViewID, SecondaryViewRect,
// this lambda computes the SL options struct based on cvars and other state
[](uint32 ViewID, const FIntRect& SecondaryViewRect) ->sl::LatewarpOptions
{
// the callsite is expcted to not call this, so we don't need to if bail out here
check(IsStreamlineLatewarpSupported());
check(IsInRenderingThread());
sl:: LatewarpOptions{};
sl::LatewarpOptions SLConstants;
// TODO: implement when we have an SDK RC that has that implemented
//SLConstants.onErrorCallback = DLSSGAPIErrorCallBack;
#if (ENGINE_MAJOR_VERSION == 4)
const bool bIsForeground = FApp::HasVRFocus() || FApp::IsBenchmarking() || FPlatformApplicationMisc::IsThisApplicationForeground();
#else
const bool bIsForeground = FApp::HasFocus();
#endif
const bool bIsLargeEnough = true; // FMath::Min(SecondaryViewRect.Width(), SecondaryViewRect.Height()) >= GDLSSGMinWidthOrHeight;
// TODO hook up to cvar
//
SLConstants.latewarpActive = (true || bIsForeground && bIsLargeEnough) ? IsLatewarpActive() : false;
return SLConstants;
},
// this lambda is only here since templating the function pointer and functin name and such below is inconvenient
[](FRHICommandListImmediate& RHICmdList, uint32 ViewID, const FIntRect& SecondaryViewRect, const sl::LatewarpOptions& Options)
{
CALL_SL_FEATURE_FN(sl::kFeatureLatewarp, slLatewarpSetOptions, sl::ViewportHandle(ViewID), Options);
}
);
}

View File

@ -0,0 +1,742 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#include "StreamlineReflex.h"
#include "Framework/Application/SlateApplication.h"
#include "HAL/IConsoleManager.h"
#include "Interfaces/IPluginManager.h"
#include "Modules/ModuleManager.h"
#include "RHI.h"
#include "Runtime/Launch/Resources/Version.h"
#if ENGINE_MAJOR_VERSION == 5 && ENGINE_MINOR_VERSION >= 3
#include "Null/NullPlatformApplicationMisc.h"
#endif
#include "sl_helpers.h"
#include "sl_reflex.h"
#include "sl_pcl.h"
#include "StreamlineAPI.h"
#include "StreamlineCore.h"
#include "StreamlineCorePrivate.h"
#include "StreamlineDLSSG.h"
#include "StreamlineLatewarp.h"
#include "StreamlineRHI.h"
static TAutoConsoleVariable<bool> CVarStreamlineUnregisterReflexPlugin(
TEXT("r.Streamline.UnregisterReflexPlugin"),
true,
TEXT("The existing NVAPI based UE Reflex plugin is incompatible with the DLSS Frame Generation based implementation. This cvar controls whether the Reflex plugin should be unregistered from the engine or not.\n")
TEXT("0: keep Reflex plugin modular features registered\n")
TEXT("1: unregister Reflex plugin modular features. The Reflex blueprint library should work with the DLSS Frame Generation plugin modular features 🤞\n"),
ECVF_ReadOnly);
static TAutoConsoleVariable<int32> CVarStreamlineReflexEnable(
TEXT("t.Streamline.Reflex.Enable"),
0,
TEXT("Enable Streamline Reflex extension. (default = 0)\n")
TEXT("0: Disabled\n")
TEXT("1: Enabled)\n"),
ECVF_RenderThreadSafe);
static TAutoConsoleVariable<int32> CVarStreamlineReflexEnableLatencyMarkers(
TEXT("t.Streamline.Reflex.EnableLatencyMarkers"),
1,
TEXT("Enable Streamline PC Latency metrics. (default = 1)\n")
TEXT("0: Disabled\n")
TEXT("1: Enabled)\n"),
ECVF_RenderThreadSafe);
static TAutoConsoleVariable<int32> CVarStreamlineReflexAuto(
TEXT("t.Streamline.Reflex.Auto"),
1,
TEXT("Enable Streamline Reflex extension when other SL features need it. (default = 1)\n")
TEXT("0: Disabled\n")
TEXT("1: Enabled)\n"),
ECVF_ReadOnly);
static TAutoConsoleVariable<bool> CVarStreamlineReflexEnableInEditor(
TEXT("t.Streamline.Reflex.EnableInEditor"),
true,
TEXT("Enable Streamline Reflex and PC Latency in the editor. (default = 1)\n")
TEXT("0: Disabled\n")
TEXT("1: Enabled)\n"),
ECVF_RenderThreadSafe);
static TAutoConsoleVariable<int32> CVarStreamlineReflexMode(
TEXT("t.Streamline.Reflex.Mode"),
1,
TEXT("Streamline Reflex mode (default = 1)\n")
TEXT("0: off \n")
TEXT("1: low latency\n")
TEXT("2: low latency with boost\n"),
ECVF_RenderThreadSafe);
static TAutoConsoleVariable<bool> CVarStreamlineReflexHandleMaxTickRate(
TEXT("t.Streamline.Reflex.HandleMaxTickRate"),
true,
TEXT("Controls whether Streamline Reflex handles frame rate limiting instead of the engine (default = true)"),
ECVF_Default);
static TUniquePtr<FStreamlineMaxTickRateHandler> StreamlineMaxTickRateHandler;
static TUniquePtr<FStreamlineLatencyMarkers> StreamlineLatencyMarker;
bool FStreamlineLatencyBase::bStreamlineReflexSupported = false;
bool FStreamlineLatencyBase::IsStreamlineReflexSupported()
{
if (!FApp::CanEverRender())
{
return false;
}
if (!IsStreamlineSupported())
{
return false;
}
#if WITH_EDITOR
if (GIsEditor && !CVarStreamlineReflexEnableInEditor.GetValueOnAnyThread())
{
return false;
}
#endif
static bool bStreamlineReflexSupportedInitialized = false;
if (!bStreamlineReflexSupportedInitialized)
{
const sl::AdapterInfo* AdapterInfo = FStreamlineCoreModule::GetStreamlineRHI()->GetAdapterInfo();
sl::Result Result = SLisFeatureSupported(sl::kFeatureReflex, *AdapterInfo);
bStreamlineReflexSupported = Result == sl::Result::eOk;
LogStreamlineFeatureSupport(sl::kFeatureReflex, *AdapterInfo);
bStreamlineReflexSupportedInitialized = true;
}
return bStreamlineReflexSupported;
}
bool FStreamlineLatencyBase::bStreamlinePCLSupported = false;
bool FStreamlineLatencyBase::IsStreamlinePCLSupported()
{
if (!FApp::CanEverRender())
{
return false;
}
if (!IsStreamlineSupported())
{
return false;
}
#if WITH_EDITOR
if (GIsEditor && !CVarStreamlineReflexEnableInEditor.GetValueOnAnyThread())
{
return false;
}
#endif
static bool bStreamlinePCLSupportedInitialized = false;
if (!bStreamlinePCLSupportedInitialized)
{
const sl::AdapterInfo* AdapterInfo = FStreamlineCoreModule::GetStreamlineRHI()->GetAdapterInfo();
sl::Result Result = SLisFeatureSupported(sl::kFeaturePCL, *AdapterInfo);
bStreamlinePCLSupported = (Result == sl::Result::eOk);
LogStreamlineFeatureSupport(sl::kFeaturePCL, *AdapterInfo);
bStreamlinePCLSupportedInitialized = true;
}
return bStreamlinePCLSupported;
}
void FStreamlineMaxTickRateHandler::Initialize()
{
if (IsStreamlineReflexSupported())
{
sl::ReflexState ReflexState{};
sl::Result Result = CALL_SL_FEATURE_FN(sl::kFeatureReflex, slReflexGetState, ReflexState);
checkf(Result == sl::Result::eOk, TEXT("slReflexGetState failed (%s)"), ANSI_TO_TCHAR(sl::getResultAsStr(Result)));
UE_LOG(LogStreamline, Log, TEXT("%s sl::ReflexState::lowLatencyAvailable=%u"), ANSI_TO_TCHAR(__FUNCTION__), ReflexState.lowLatencyAvailable);
UE_LOG(LogStreamline, Log, TEXT("%s sl::ReflexState::latencyReportAvailable=%u"), ANSI_TO_TCHAR(__FUNCTION__), ReflexState.latencyReportAvailable);
if (!ReflexState.lowLatencyAvailable)
{
CVarStreamlineReflexEnable->Set(false, ECVF_SetByCommandline);
}
const sl::ReflexOptions MakeSureStreamlineReflexCallsNVSTATS_INITAtLeastOnce;
CALL_SL_FEATURE_FN(sl::kFeatureReflex, slReflexSetOptions, MakeSureStreamlineReflexCallsNVSTATS_INITAtLeastOnce);
}
}
void FStreamlineMaxTickRateHandler::SetEnabled(bool bInEnabled)
{
if (!GetAvailable())
{
bInEnabled = false;
UE_LOG(LogStreamline, Log, TEXT("%s Tried to set SL Reflex Low Latency state but SL Reflex is not available"), ANSI_TO_TCHAR(__FUNCTION__));
}
CVarStreamlineReflexEnable->Set(bInEnabled, ECVF_SetByCommandline);
}
// TODO base on eventual FStreamlineRHI queries
bool DoActiveStreamlineFeaturesRequireReflex()
{
return IsDLSSGActive() || IsLatewarpActive();
}
bool FStreamlineMaxTickRateHandler::GetEnabled()
{
if (!GetAvailable())
{
return false;
}
#if WITH_EDITOR
if (GIsEditor && !CVarStreamlineReflexEnableInEditor.GetValueOnAnyThread())
{
return false;
}
#endif
int32 CVarReflex = CVarStreamlineReflexEnable.GetValueOnAnyThread();
int32 CVarReflexAuto = CVarStreamlineReflexAuto.GetValueOnAnyThread();
if ((CVarReflexAuto) != 0 && DoActiveStreamlineFeaturesRequireReflex())
{
return true;
}
else
{
return CVarReflex != 0;
}
}
bool FStreamlineMaxTickRateHandler::GetAvailable()
{
if (!IsStreamlineReflexSupported())
{
return false;
}
sl::ReflexState ReflexState{};
sl::Result Result = CALL_SL_FEATURE_FN(sl::kFeatureReflex, slReflexGetState, ReflexState);
checkf(Result == sl::Result::eOk, TEXT("slReflexGetState failed (%s)"), ANSI_TO_TCHAR(sl::getResultAsStr(Result)));
return ReflexState.lowLatencyAvailable;
}
void FStreamlineMaxTickRateHandler::SetFlags(uint32 Flags)
{
bool bLowLatencyMode = false;
bool bBoost = false;
if ((Flags & 1) > 0)
{
bLowLatencyMode = true;
}
else
{
bLowLatencyMode = false;
}
if ((Flags & 2) > 0)
{
bBoost = true;
}
else
{
bBoost = false;
}
if (bLowLatencyMode && bBoost)
{
CVarStreamlineReflexMode->Set( sl::ReflexMode::eLowLatencyWithBoost, ECVF_SetByCommandline);
}
else if(bLowLatencyMode && !bBoost)
{
CVarStreamlineReflexMode->Set(sl::ReflexMode::eLowLatency, ECVF_SetByCommandline);
}
else
{
CVarStreamlineReflexMode->Set(sl::ReflexMode::eOff, ECVF_SetByCommandline);
}
}
uint32 FStreamlineMaxTickRateHandler::GetFlags()
{
int32 CVarReflexMode = CVarStreamlineReflexMode.GetValueOnAnyThread();
int32 CVarReflexAuto = CVarStreamlineReflexAuto.GetValueOnAnyThread();
if ((CVarReflexAuto != 0) && DoActiveStreamlineFeaturesRequireReflex() && CVarReflexMode == sl::eOff)
{
return sl::eLowLatency;
}
else
{
return CVarReflexMode;
}
}
// Give "stat threading" visibility into the max tick rate handler, since it has no visibility by default
// This is the equivalent of STAT_GameTickWaitTime for our handler
DECLARE_CYCLE_STAT(TEXT("Game thread wait time (Reflex)"), STAT_GameTickReflexWaitTime, STATGROUP_Threading);
static float CalculateDesiredMinimumIntervalUs(float DesiredMaxTickRate, float DeltaRealTimeMinusSleep)
{
const float DesiredMinimumInterval = DesiredMaxTickRate > 0 ? (1.0f / DesiredMaxTickRate) : 0.0f;
// Attempt to approximate effect of engine's calculation of WaitTime when a max tick rate handler doesn't handle sleeping.
// See this line in UEngine::UpdateTimeAndHandleMaxTickRate():
// WaitTime = FMath::Max( 1.f / MaxTickRate - DeltaRealTime, 0.f );
// where DeltaRealTime does NOT include the time that the previous frame spent sleeping in UpdateTimeAndHandleMaxTickRate.
//
// This WaitTime behavior may seem counter-intuitive. After all, it permits a tick rate higher than the requested rate. But some
// questionable engine math in areas like frame rate smoothing necessitates it, or otherwise the frame rate will tend to keep
// getting lower.
if (DesiredMinimumInterval < DeltaRealTimeMinusSleep)
{
return 0.0f;
}
return 1.0E6f * DesiredMinimumInterval;
}
static bool AreReflexOptionsEquivalent(const sl::ReflexOptions& Opt1, const sl::ReflexOptions& Opt2)
{
return (Opt1.mode == Opt2.mode)
&& (Opt1.frameLimitUs == Opt2.frameLimitUs)
&& (Opt1.useMarkersToOptimize == Opt2.useMarkersToOptimize)
&& (Opt1.virtualKey == Opt2.virtualKey)
&& (Opt1.idThread == Opt2.idThread);
}
static void UpdateReflexOptionsIfChanged(const sl::ReflexOptions& ReflexOptions)
{
static sl::ReflexOptions LastFrameOptions{};
if (!(StreamlineFilterRedundantSetOptionsCalls() && AreReflexOptionsEquivalent(LastFrameOptions, ReflexOptions)))
{
sl::Result Result = CALL_SL_FEATURE_FN(sl::kFeatureReflex, slReflexSetOptions, ReflexOptions);
checkf(Result == sl::Result::eOk, TEXT("slReflexSetOptions failed (%s)"), ANSI_TO_TCHAR(sl::getResultAsStr(Result)));
LastFrameOptions = ReflexOptions;
}
}
bool FStreamlineMaxTickRateHandler::HandleMaxTickRate(float DesiredMaxTickRate)
{
bool bFrameRateHandled = false;
if (GetEnabled())
{
SCOPE_CYCLE_COUNTER(STAT_GameTickReflexWaitTime);
const double CurrentRealTime = FPlatformTime::Seconds();
static double LastRealTimeAfterSleep = CurrentRealTime - 0.0001;
const float DeltaRealTimeMinusSleep = static_cast<float>(CurrentRealTime - LastRealTimeAfterSleep);
sl::ReflexOptions ReflexOptions = {};
ReflexOptions.mode = static_cast<sl::ReflexMode>(FMath::Clamp(GetFlags(), uint32(sl::ReflexMode::eOff), uint32(sl::ReflexMode::eLowLatencyWithBoost)));
if (!CVarStreamlineReflexHandleMaxTickRate.GetValueOnAnyThread() || GIsEditor
#if (ENGINE_MAJOR_VERSION < 5) || (ENGINE_MINOR_VERSION < 2)
// Timing logic in older engines is a little different, we don't handle this yet
|| GEngine->IsAllowedFramerateSmoothing()
#endif
)
{
// Note: Currently force it to let the engine handle max tick rate in PIE because the editor can get in a state where
// the engine requests a DesiredMaxTickRate of 3.0, and then it never recovers from that.
// Issue seen in UE 5.2, not well tested in older engine versions
ReflexOptions.frameLimitUs = 0;
}
else
{
const float DesiredMinimumIntervalUs = CalculateDesiredMinimumIntervalUs(DesiredMaxTickRate, DeltaRealTimeMinusSleep);
#if ENGINE_MAJOR_VERSION > 4
ReflexOptions.frameLimitUs = FMath::TruncToInt32(DesiredMinimumIntervalUs);
#else
ReflexOptions.frameLimitUs = FMath::TruncToInt(DesiredMinimumIntervalUs);
#endif
bFrameRateHandled = true;
}
ReflexOptions.useMarkersToOptimize = true;
UpdateReflexOptionsIfChanged(ReflexOptions);
sl::FrameToken* FrameToken = FStreamlineCoreModule::GetStreamlineRHI()->GetFrameToken(GFrameCounter);
sl::Result Result = CALL_SL_FEATURE_FN(sl::kFeatureReflex, slReflexSleep, *FrameToken);
checkf(Result == sl::Result::eOk, TEXT("slReflexSleep failed (%s)"), ANSI_TO_TCHAR(sl::getResultAsStr(Result)));
LastRealTimeAfterSleep = FPlatformTime::Seconds();
}
else
{
sl::ReflexOptions ReflexOptions{};
ReflexOptions.mode = sl::ReflexMode::eOff;
UpdateReflexOptionsIfChanged(ReflexOptions);
}
return bFrameRateHandled;
}
void FStreamlineLatencyMarkers::Initialize()
{
if (IsStreamlineReflexSupported())
{
sl::ReflexState ReflexState{};
sl::Result Result = CALL_SL_FEATURE_FN(sl::kFeatureReflex, slReflexGetState, ReflexState);
checkf(Result == sl::Result::eOk, TEXT("slReflexGetState failed (%s)"), ANSI_TO_TCHAR(sl::getResultAsStr(Result)));
UE_LOG(LogStreamline, Log, TEXT("%s sl::ReflexState::flashIndicatorDriverControlled=%u"), ANSI_TO_TCHAR(__FUNCTION__), ReflexState.flashIndicatorDriverControlled);
bFlashIndicatorDriverControlled = ReflexState.flashIndicatorDriverControlled;
}
}
void FStreamlineLatencyMarkers::Tick(float DeltaTime)
{
if (IsStreamlineReflexSupported() && GetAvailable())
{
sl::ReflexState ReflexState{};
sl::Result Result = CALL_SL_FEATURE_FN(sl::kFeatureReflex, slReflexGetState, ReflexState);
checkf(Result == sl::Result::eOk, TEXT("slReflexGetState failed (%s)"), ANSI_TO_TCHAR(sl::getResultAsStr(Result)));
if (ReflexState.latencyReportAvailable)
{
// frameReport[63] contains the latest completed frameReport
const uint64_t TotalLatencyUs = ReflexState.frameReport[63].gpuRenderEndTime - ReflexState.frameReport[63].simStartTime;
if (TotalLatencyUs != 0)
{
// frameReport results available, get latest completed frame latency data
// A 3/4, 1/4 split gets close to a simple 10 frame moving average
AverageTotalLatencyMs = AverageTotalLatencyMs * 0.75f + TotalLatencyUs / 1000.0f * 0.25f;
AverageGameLatencyMs = AverageGameLatencyMs * 0.75f + (ReflexState.frameReport[63].driverEndTime - ReflexState.frameReport[63].simStartTime) / 1000.0f * 0.25f;
AverageRenderLatencyMs = AverageRenderLatencyMs * 0.75f + (ReflexState.frameReport[63].gpuRenderEndTime - ReflexState.frameReport[63].osRenderQueueStartTime) / 1000.0f * 0.25f;
AverageSimulationLatencyMs = AverageSimulationLatencyMs * 0.75f + (ReflexState.frameReport[63].simEndTime - ReflexState.frameReport[63].simStartTime) / 1000.0f * 0.25f;
AverageRenderSubmitLatencyMs = AverageRenderSubmitLatencyMs * 0.75f + (ReflexState.frameReport[63].renderSubmitEndTime - ReflexState.frameReport[63].renderSubmitStartTime) / 1000.0f * 0.25f;
AveragePresentLatencyMs = AveragePresentLatencyMs * 0.75f + (ReflexState.frameReport[63].presentEndTime - ReflexState.frameReport[63].presentStartTime) / 1000.0f * 0.25f;
AverageDriverLatencyMs = AverageDriverLatencyMs * 0.75f + (ReflexState.frameReport[63].driverEndTime - ReflexState.frameReport[63].driverStartTime) / 1000.0f * 0.25f;
AverageOSRenderQueueLatencyMs = AverageOSRenderQueueLatencyMs * 0.75f + (ReflexState.frameReport[63].osRenderQueueEndTime - ReflexState.frameReport[63].osRenderQueueStartTime) / 1000.0f * 0.25f;
AverageGPURenderLatencyMs = AverageGPURenderLatencyMs * 0.75f + (ReflexState.frameReport[63].gpuRenderEndTime - ReflexState.frameReport[63].gpuRenderStartTime) / 1000.0f * 0.25f;
RenderSubmitOffsetMs = (ReflexState.frameReport[63].renderSubmitStartTime - ReflexState.frameReport[63].simStartTime) / 1000.0f;
PresentOffsetMs = (ReflexState.frameReport[63].presentStartTime - ReflexState.frameReport[63].simStartTime) / 1000.0f;
DriverOffsetMs = (ReflexState.frameReport[63].driverStartTime - ReflexState.frameReport[63].simStartTime) / 1000.0f;
OSRenderQueueOffsetMs = (ReflexState.frameReport[63].osRenderQueueStartTime - ReflexState.frameReport[63].simStartTime) / 1000.0f;
GPURenderOffsetMs = (ReflexState.frameReport[63].gpuRenderStartTime - ReflexState.frameReport[63].simStartTime) / 1000.0f;
UE_LOG(LogStreamline, VeryVerbose, TEXT("AverageTotalLatencyMs: %f"), AverageTotalLatencyMs);
UE_LOG(LogStreamline, VeryVerbose, TEXT("AverageGameLatencyMs: %f"), AverageGameLatencyMs);
UE_LOG(LogStreamline, VeryVerbose, TEXT("AverageRenderLatencyMs: %f"), AverageRenderLatencyMs);
UE_LOG(LogStreamline, VeryVerbose, TEXT("AverageSimulationLatencyMs: %f"), AverageSimulationLatencyMs);
UE_LOG(LogStreamline, VeryVerbose, TEXT("AverageRenderSubmitLatencyMs: %f"), AverageRenderSubmitLatencyMs);
UE_LOG(LogStreamline, VeryVerbose, TEXT("AveragePresentLatencyMs: %f"), AveragePresentLatencyMs);
UE_LOG(LogStreamline, VeryVerbose, TEXT("AverageDriverLatencyMs: %f"), AverageDriverLatencyMs);
UE_LOG(LogStreamline, VeryVerbose, TEXT("AverageOSRenderQueueLatencyMs: %f"), AverageOSRenderQueueLatencyMs);
UE_LOG(LogStreamline, VeryVerbose, TEXT("AverageGPURenderLatencyMs: %f"), AverageGPURenderLatencyMs);
}
}
}
else
{
// Reset module back to default values in case re-enabled in the same session
// doing this here in case the cvar gets used to disable latency (vs SetEnabled)
AverageTotalLatencyMs = 0.0f;
AverageGameLatencyMs = 0.0f;
AverageRenderLatencyMs = 0.0f;
AverageSimulationLatencyMs = 0.0f;
AverageRenderSubmitLatencyMs = 0.0f;
AveragePresentLatencyMs = 0.0f;
AverageDriverLatencyMs = 0.0f;
AverageOSRenderQueueLatencyMs = 0.0f;
AverageGPURenderLatencyMs = 0.0f;
RenderSubmitOffsetMs = 0.0f;
PresentOffsetMs = 0.0f;
DriverOffsetMs = 0.0f;
OSRenderQueueOffsetMs = 0.0f;
GPURenderOffsetMs = 0.0f;
AverageTotalLatencyMs = 0.0f;
AverageGameLatencyMs = 0.0f;
AverageRenderLatencyMs = 0.0f;
AverageSimulationLatencyMs = 0.0f;
AverageRenderSubmitLatencyMs = 0.0f;
AveragePresentLatencyMs = 0.0f;
AverageDriverLatencyMs = 0.0f;
AverageOSRenderQueueLatencyMs = 0.0f;
AverageGPURenderLatencyMs = 0.0f;
RenderSubmitOffsetMs = 0.0f;
PresentOffsetMs = 0.0f;
DriverOffsetMs = 0.0f;
OSRenderQueueOffsetMs = 0.0f;
GPURenderOffsetMs = 0.0f;
}
}
void FStreamlineLatencyMarkers::SetInputSampleLatencyMarker(uint64)
{
//The engine calls this every frame, so making the log less chatty
//UE_LOG(LogStreamline, Warning, TEXT("FStreamlineLatencyMarkers::SetInputSampleLatencyMarker is no longer supported"));
}
void FStreamlineLatencyMarkers::SetSimulationLatencyMarkerStart(uint64 FrameNumber)
{
SetCustomLatencyMarker(uint32(sl::PCLMarker::eSimulationStart), FrameNumber);
}
void FStreamlineLatencyMarkers::SetSimulationLatencyMarkerEnd(uint64 FrameNumber)
{
SetCustomLatencyMarker(uint32(sl::PCLMarker::eSimulationEnd), FrameNumber);
}
void FStreamlineLatencyMarkers::SetRenderSubmitLatencyMarkerStart(uint64 FrameNumber)
{
SetCustomLatencyMarker(uint32(sl::PCLMarker::eRenderSubmitStart), FrameNumber);
}
void FStreamlineLatencyMarkers::SetRenderSubmitLatencyMarkerEnd(uint64 FrameNumber)
{
SetCustomLatencyMarker(uint32(sl::PCLMarker::eRenderSubmitEnd), FrameNumber);
}
void FStreamlineLatencyMarkers::SetPresentLatencyMarkerStart(uint64 FrameNumber)
{
SetCustomLatencyMarker(uint32(sl::PCLMarker::ePresentStart), FrameNumber);
}
void FStreamlineLatencyMarkers::SetPresentLatencyMarkerEnd(uint64 FrameNumber)
{
SetCustomLatencyMarker(uint32(sl::PCLMarker::ePresentEnd), FrameNumber);
// we are calling this here since that's right after present.
GetDLSSGStatusFromStreamline();
}
void FStreamlineLatencyMarkers::SetFlashIndicatorLatencyMarker(uint64 FrameNumber)
{
if (GetFlashIndicatorEnabled())
{
SetCustomLatencyMarker(uint32(sl::PCLMarker::eTriggerFlash), FrameNumber );
}
}
void FStreamlineLatencyMarkers::SetCustomLatencyMarker(uint32 MarkerId, uint64 FrameNumber)
{
if (IsStreamlinePCLSupported() && GetEnabled())
{
sl::PCLMarker Marker = static_cast<sl::PCLMarker>(MarkerId);
sl::FrameToken* FrameToken = FStreamlineCoreModule::GetStreamlineRHI()->GetFrameToken(FrameNumber);
sl::Result Result = CALL_SL_FEATURE_FN(sl::kFeaturePCL, slPCLSetMarker, Marker, *FrameToken);
checkf(Result == sl::Result::eOk, TEXT("slPCLSetMarker failed MarkerId=%s (%s)"),
ANSI_TO_TCHAR(sl::getPCLMarkerAsStr(Marker)), ANSI_TO_TCHAR(sl::getResultAsStr(Result)));
}
}
bool FStreamlineLatencyMarkers::ProcessMessage(HWND hwnd, uint32 msg, WPARAM wParam, LPARAM lParam, int32& OutResult)
{
if (IsStreamlinePCLSupported() && GetEnabled())
{
sl::PCLState LatencySettings{};
sl::Result Result = CALL_SL_FEATURE_FN(sl::kFeaturePCL, slPCLGetState, LatencySettings);
checkf(Result == sl::Result::eOk, TEXT("slPCLGetState failed (%s)"), ANSI_TO_TCHAR(sl::getResultAsStr(Result)));
if(LatencySettings.statsWindowMessage == msg)
{
// Latency ping based on custom message
sl::FrameToken* FrameToken = FStreamlineCoreModule::GetStreamlineRHI()->GetFrameToken(GFrameCounter);
Result = CALL_SL_FEATURE_FN(sl::kFeaturePCL, slPCLSetMarker, sl::PCLMarker::ePCLatencyPing, *FrameToken);
checkf(Result == sl::Result::eOk, TEXT("slPCLSetMarker ePCLatencyPing failed (%s)"), ANSI_TO_TCHAR(sl::getResultAsStr(Result)));
return true;
}
}
return false;
}
void FStreamlineLatencyMarkers::SetEnabled(bool bInEnabled)
{
CVarStreamlineReflexEnableLatencyMarkers->Set(bInEnabled, ECVF_SetByCommandline);
}
bool FStreamlineLatencyMarkers::GetEnabled()
{
if (!GetAvailable())
{
return false;
}
int32 CVarReflex = CVarStreamlineReflexEnableLatencyMarkers.GetValueOnAnyThread();
return CVarReflex != 0;
}
bool FStreamlineLatencyMarkers::GetAvailable()
{
return IsStreamlinePCLSupported();
}
void FStreamlineLatencyMarkers::SetFlashIndicatorEnabled(bool bInEnabled)
{
UE_LOG(LogStreamline, Log, TEXT("FStreamlineLatencyMarkers::SetFlashIndicatorEnabled is obsolete and non-functional. The Reflex Flash Indicator is configured by the NVIDIA GeForce Experience overlay"));
}
bool FStreamlineLatencyMarkers::GetFlashIndicatorEnabled()
{
return GetEnabled() && bFlashIndicatorDriverControlled;
}
static Streamline::EStreamlineFeatureSupport GStreamlineReflexSupport = Streamline::EStreamlineFeatureSupport::NotSupported;
Streamline::EStreamlineFeatureSupport QueryStreamlineReflexSupport()
{
static bool bStreamlineDLSSGSupportedInitialized = false;
if (!bStreamlineDLSSGSupportedInitialized)
{
if (!FApp::CanEverRender())
{
GStreamlineReflexSupport = Streamline::EStreamlineFeatureSupport::NotSupported;
}
else if (!IsRHIDeviceNVIDIA())
{
GStreamlineReflexSupport = Streamline::EStreamlineFeatureSupport::NotSupportedIncompatibleHardware;
}
else if (!IsStreamlineSupported())
{
GStreamlineReflexSupport = Streamline::EStreamlineFeatureSupport::NotSupported;
}
else
{
FStreamlineRHI* StreamlineRHI = GetPlatformStreamlineRHI();
if (StreamlineRHI->IsReflexSupportedByRHI())
{
const sl::Feature Feature = sl::kFeatureReflex;
sl::Result SupportedResult = SLisFeatureSupported(Feature, *StreamlineRHI->GetAdapterInfo());
LogStreamlineFeatureSupport(Feature, *StreamlineRHI->GetAdapterInfo());
GStreamlineReflexSupport = TranslateStreamlineResult(SupportedResult);
}
else
{
GStreamlineReflexSupport = Streamline::EStreamlineFeatureSupport::NotSupportedIncompatibleRHI;
}
}
// setting this to true here so we don't recurse when we call GetDLSSGStatusFromStreamline, which calls us
bStreamlineDLSSGSupportedInitialized = true;
if (Streamline::EStreamlineFeatureSupport::Supported == GStreamlineReflexSupport)
{
// to get the min suppported width/height as well as geerated frames range
GetDLSSGStatusFromStreamline(true);
}
}
return GStreamlineReflexSupport;
}
bool IsStreamlineReflexSupported()
{
return Streamline::EStreamlineFeatureSupport::Supported == QueryStreamlineReflexSupport();
}
void RegisterStreamlineReflexHooks()
{
UE_LOG(LogStreamline, Log, TEXT("%s Enter"), ANSI_TO_TCHAR(__FUNCTION__));
TSharedPtr<IPlugin> ReflexPlugin = IPluginManager::Get().FindPlugin(TEXT("Reflex"));
const bool bIsReflexPluginEnabled = ReflexPlugin && (ReflexPlugin->IsEnabled() || ReflexPlugin->IsEnabledByDefault(false));
if (bIsReflexPluginEnabled)
{
UE_LOG(LogStreamline, Log, TEXT("Reflex plugin enabled, which is incompatible with the Reflex implementation provided by this Streamline UE plugin"));
if (CVarStreamlineUnregisterReflexPlugin.GetValueOnAnyThread() != 0)
{
UE_LOG(LogStreamline, Log, TEXT("Unregistering the Reflex plugin related modular features. The Reflex plugin Blueprint library is expected to continue to work 🤞."));
auto UnregisterFeatures = [](const FName& FeatureName)
{
TArray< IModularFeature*> Features = IModularFeatures::Get().GetModularFeatureImplementations< IModularFeature>(FeatureName);
for (IModularFeature* Feature : Features)
{
UE_LOG(LogStreamline, Log, TEXT("Unregistering %s %p "), *FeatureName.ToString(), Feature);
IModularFeatures::Get().UnregisterModularFeature(FeatureName, Feature);
}
};
UnregisterFeatures(IMaxTickRateHandlerModule::GetModularFeatureName());
UnregisterFeatures(ILatencyMarkerModule::GetModularFeatureName());
}
else
{
UE_LOG(LogStreamline, Log, TEXT("It is recommended to either disable the Reflex plugin or set r.Streamline.UnregisterReflexPlugin to disable the incompatible parts of the Reflex plugin."));
}
}
// register the modular features for the engine to call into the SL latency APIs at various places across frame & threads
StreamlineMaxTickRateHandler = MakeUnique<FStreamlineMaxTickRateHandler>();
StreamlineMaxTickRateHandler->Initialize();
IModularFeatures::Get().RegisterModularFeature(StreamlineMaxTickRateHandler->GetModularFeatureName(), StreamlineMaxTickRateHandler.Get());
#if ENGINE_MAJOR_VERSION == 5 && ENGINE_MINOR_VERSION >= 3
if (FNullPlatformApplicationMisc::IsUsingNullApplication())
{
UE_LOG(LogStreamline, Log, TEXT("Reflex latency markers unsupported with -RenderOffScreen"));
}
else
#endif
{
StreamlineLatencyMarker = MakeUnique<FStreamlineLatencyMarkers>();
StreamlineLatencyMarker->Initialize();
IModularFeatures::Get().RegisterModularFeature(StreamlineLatencyMarker->GetModularFeatureName(), StreamlineLatencyMarker.Get());
// this one needs to be registered so ProcessWindowMessage gets called
check(FSlateApplication::IsInitialized());
FWindowsApplication* WindowsApplication = (FWindowsApplication*)FSlateApplication::Get().GetPlatformApplication().Get();
check(WindowsApplication);
WindowsApplication->AddMessageHandler(*StreamlineLatencyMarker);
FSlateApplication::Get().OnPreShutdown().AddLambda(
[]()
{
FWindowsApplication* WindowsApplication = (FWindowsApplication*)FSlateApplication::Get().GetPlatformApplication().Get();
check(WindowsApplication);
WindowsApplication->RemoveMessageHandler(*StreamlineLatencyMarker);
}
);
}
UE_LOG(LogStreamline, Log, TEXT("%s Leave"), ANSI_TO_TCHAR(__FUNCTION__));
}
void UnregisterStreamlineReflexHooks()
{
UE_LOG(LogStreamline, Log, TEXT("%s Enter"), ANSI_TO_TCHAR(__FUNCTION__));
IModularFeatures::Get().UnregisterModularFeature(StreamlineMaxTickRateHandler->GetModularFeatureName(), StreamlineMaxTickRateHandler.Get());
StreamlineMaxTickRateHandler.Reset();
IModularFeatures::Get().UnregisterModularFeature(StreamlineLatencyMarker->GetModularFeatureName(), StreamlineLatencyMarker.Get());
StreamlineLatencyMarker.Reset();
UE_LOG(LogStreamline, Log, TEXT("%s Leave"), ANSI_TO_TCHAR(__FUNCTION__));
}

View File

@ -0,0 +1,660 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#include "StreamlineReflexCamera.h"
#include "StreamlineReflex.h"
#include "Framework/Application/SlateApplication.h"
#include "HAL/IConsoleManager.h"
#include "Interfaces/IPluginManager.h"
#include "Modules/ModuleManager.h"
#include "RHI.h"
#include "Runtime/Engine/Classes/GameFramework/PlayerController.h"
#include "Runtime/Engine/Classes/Components/PrimitiveComponent.h"
#include "Runtime/Engine/Classes/GameFramework/Pawn.h"
#include "Runtime/Engine/Classes/Engine/World.h"
#include "Runtime/Launch/Resources/Version.h"
#include "ScenePrivate.h"
#include "CollisionQueryParams.h"
#if ENGINE_MAJOR_VERSION == 5 && ENGINE_MINOR_VERSION >= 3
#include "Null/NullPlatformApplicationMisc.h"
#endif
// that is right now defined for some custom engine branches
#ifndef WITH_LATE_UPDATE_MATRIX
#pragma message( "NoWarp mask and predictive rendering support disabled (WITH_LATE_UPDATE_MATRIX not defined)" )
#define WITH_LATE_UPDATE_MATRIX 0
#else
#if WITH_LATE_UPDATE_MATRIX
#pragma message( "NoWarp mask and predictive rendering support enabled (WITH_LATE_UPDATE_MATRIX is 1)" )
#else
#pragma message( "NoWarp mask and predictive rendering support DISABLED (WITH_LATE_UPDATE_MATRIX is 0)" )
#endif
#endif
#include "StreamlineAPI.h"
#include "StreamlineConversions.h"
#include "StreamlineCore.h"
#include "StreamlineCorePrivate.h"
#include "StreamlineDLSSG.h"
#include "StreamlineLatewarp.h"
#include "StreamlineRHI.h"
static TAutoConsoleVariable<bool> CVarStreamlineReflexCameraPredictor(
TEXT("r.Streamline.Reflex.CameraPredictor"),
0,
TEXT("Which predictive rendering camera predictor to use (default = 0)\n")
TEXT("0: Use the first person predictor\n")
TEXT("1: Use the third person predictor\n"),
ECVF_RenderThreadSafe);
static TAutoConsoleVariable<bool> CVarStreamlineReflexPredictiveRendering(
TEXT("r.Streamline.Reflex.PredictiveRendering"),
#if WITH_LATE_UPDATE_MATRIX
1,
TEXT("Whether predictive rendering is enabled or not. (default = 1) since custom engine \n"),
#else
0,
TEXT("Whether predictive rendering is enabled or not. (default = 0), since stock engine\n"),
#endif
ECVF_RenderThreadSafe);
static TAutoConsoleVariable<bool> CVarStreamlineReflexClipCorrection(
TEXT("r.Streamline.Reflex.ClipCorrection"), 0,
TEXT("Whether clip correction is enabled or not. (default = 0)\n"),
ECVF_RenderThreadSafe);
static TAutoConsoleVariable<float> CVarStreamlineReflexClipRadius(
TEXT("r.Streamline.Reflex.ClipRadius"), 10.f,
TEXT("Collision radius for camera extrapolation clipping. (default = 10.f)\n"),
ECVF_RenderThreadSafe);
static TAutoConsoleVariable<int32> CVarStreamlineReflexActorDebug(
TEXT("r.Streamline.Reflex.ActorDebug"), 0,
TEXT("Whether Actor debug messages are shown. (default = 0)\n"),
ECVF_RenderThreadSafe);
static TAutoConsoleVariable<int32> CVarStreamlineReflexPredictiveRenderingLateUpdateMode(
TEXT("r.Streamline.Reflex.PredictiveRendering.LateUpdateMode"), 1,
TEXT("Select how the late update matrix is applied. (default = 1)\n"),
ECVF_RenderThreadSafe);
#if !(UE_BUILD_SHIPPING || UE_BUILD_TEST)
FCriticalSection GameThreadDebugMessagesCS;
TArray<FString> GameThreadDebugMessages;
FCriticalSection RenderThreadDebugMessagesCS;
TArray<FString> RenderThreadDebugMessages;
FDelegateHandle ReflexCameraOnScreenMessagesDelegateHandle;
void GetReflexCameraOnScreenMessages(TMultiMap<FCoreDelegates::EOnScreenMessageSeverity, FText>& OutMessages)
{
check(IsInGameThread());
if (CVarStreamlineReflexActorDebug.GetValueOnGameThread())
{
{
FScopeLock Lock(&GameThreadDebugMessagesCS);
for (auto String : GameThreadDebugMessages)
{
OutMessages.Add(FCoreDelegates::EOnScreenMessageSeverity::Info, FText::FromString(String));
}
}
{
FScopeLock Lock(&RenderThreadDebugMessagesCS);
for (auto String : RenderThreadDebugMessages)
{
OutMessages.Add(FCoreDelegates::EOnScreenMessageSeverity::Info, FText::FromString(String));
}
}
}
}
void DumpActor(const TCHAR* Stage, const AActor* Actor)
{
const int32 ActorDebug = CVarStreamlineReflexActorDebug.GetValueOnAnyThread();
if (Actor && ActorDebug != 0)
{
TArray<FString>& DebugMessages = IsInGameThread() ? GameThreadDebugMessages : RenderThreadDebugMessages;
DebugMessages.Add(FString::Printf(TEXT("<%s,%s> %s (A%s)"), Stage,*CurrentThreadName(), *Actor->GetName(), *Actor->GetClass()->GetName()));
if (const AActor* ParentActor = Actor->GetParentActor())
{
DebugMessages.Add(FString::Printf(TEXT("ParentActor %s (A%s)"), *ParentActor->GetName(), *ParentActor->GetClass()->GetName()));
}
if (const AActor* ParentComponent = Actor->GetParentActor())
{
DebugMessages.Add(FString::Printf(TEXT("ParentComponent %s(U%s))"), *ParentComponent->GetName(), *ParentComponent->GetClass()->GetName()));
}
TArray<FString> OtherComponentNames;
TArray<FString> PrimitiveComponentNames;
for (auto& Component : Actor->GetComponents())
{
bool bIsPrimitiveComponent = false;
FString OwnerName;
if(Component->GetOwner() && Component->GetOwner()!= Actor)
{
OwnerName = FString::Printf(TEXT(" Owner: %s"), *Component->GetOwner()->GetName());
}
FString PrimitiveExtra;
UPrimitiveComponent* PrimitiveComponent = nullptr;
FPrimitiveSceneProxy* SceneProxy = nullptr;
if (Component->GetClass()->IsChildOf(UPrimitiveComponent::StaticClass()))
{
bIsPrimitiveComponent = true;
PrimitiveComponent = dynamic_cast<UPrimitiveComponent*>(Component);
SceneProxy = PrimitiveComponent->SceneProxy;
if (SceneProxy)
{
FPrimitiveSceneInfo* SceneInfo = SceneProxy->GetPrimitiveSceneInfo();
int32 NumPrimitives = 0;
FMatrix LateUpdateMatrix = FMatrix(EForceInit::ForceInitToZero);
#if WITH_LATE_UPDATE_MATRIX
LateUpdateMatrix = SceneProxy->GetLateUpdateMatrix();
#endif
FMatrix LocalToWorld=SceneProxy->GetLocalToWorld();
PrimitiveExtra = FString::Printf(TEXT(" %s I=%u, PI=%u L2W %s LU %s "), *SceneProxy->GetResourceName().ToString(),
SceneInfo->GetIndex(), SceneInfo->GetPersistentIndex().Index,
*LocalToWorld.ToString(), *LateUpdateMatrix.ToString()
);
if (!SceneInfo->bDrawInGame)
{
bIsPrimitiveComponent = false;
}
}
else
{
bIsPrimitiveComponent = false;
}
}
TArray<FString>& ComponentNames = bIsPrimitiveComponent ? PrimitiveComponentNames : OtherComponentNames;
if (ActorDebug >= 2)
{
FString ComponentClassName = Component->GetClass()->GetName();
ComponentNames.Add(FString::Printf(TEXT("%s%s(U%s)%s"), *Component->GetName(), *PrimitiveExtra, *ComponentClassName, *OwnerName));
}
else if (ActorDebug >= 1)
{
ComponentNames.Add(FString::Printf(TEXT("%s"), *Component->GetName()));
}
}
if(PrimitiveComponentNames.Num())
{
if (ActorDebug < 2)
{
const FString PrimitiveComponentList = FString::Join(PrimitiveComponentNames, TEXT(", "));
DebugMessages.Add(FString::Printf(TEXT(" Primitives[%u] %s"), PrimitiveComponentNames.Num(), *PrimitiveComponentList));
}
else
{
for (int32 Index = 0; Index < PrimitiveComponentNames.Num(); ++Index)
{
DebugMessages.Add(FString::Printf(TEXT(" Primitives[%u] %s"), Index, *PrimitiveComponentNames[Index]));
}
}
}
if(OtherComponentNames.Num())
{
const FString OtherComponentList = FString::Join(OtherComponentNames, TEXT(", "));
DebugMessages.Add(FString::Printf(TEXT(" Components[%u] %s"), OtherComponentNames.Num(), *OtherComponentList));
}
TArray<AActor*> Children;
Actor->GetAllChildActors(Children);
if (Children.Num())
{
DebugMessages.Add(FString::Printf(TEXT(" ChildActors[%u]"), Children.Num()));
}
}
}
#endif
void FStreamlineCameraManager::LateUpdate_GameThread(APlayerController* Player, uint64 FrameID)
{
check(IsInGameThread());
#if !(UE_BUILD_SHIPPING || UE_BUILD_TEST)
if (!ReflexCameraOnScreenMessagesDelegateHandle.IsValid())
{
ReflexCameraOnScreenMessagesDelegateHandle = FCoreDelegates::OnGetOnScreenMessages.AddStatic(&GetReflexCameraOnScreenMessages);
}
if(CVarStreamlineReflexActorDebug.GetValueOnAnyThread())
{
FScopeLock Lock(&GameThreadDebugMessagesCS);
GameThreadDebugMessages.Empty();
if (Player)
{
DumpActor(TEXT("SetupViewPoint"), Player);
if (Player->GetPawnOrSpectator())
{
DumpActor(TEXT("SetupViewPoint"), Player->GetPawnOrSpectator());
}
}
//DumpActor(TEXT("SetupViewPoint"), Player->PlayerCameraManager);
GameThreadDebugMessages.AddDefaulted();
}
#endif
if (!DoesFeatureUseCameraData())
{
return;
}
if (!Player || !Player->GetPawnOrSpectator())
{
return;
}
USceneComponent* Component = Player->GetPawnOrSpectator()->GetRootComponent();
FLateUpdateState& LateUpdateData = UpdateStates[FrameID % FramesInFlight];
LateUpdateData.FrameID = FrameID;
LateUpdateData.Primitives.Reset();
LateUpdateData.World = Component->GetWorld();
LateUpdateData.CollisionParams = FCollisionQueryParams(SCENE_QUERY_STAT(CameraPen), false, Player);
GatherLateUpdatePrimitives(FrameID, Component, LateUpdateData.CollisionParams);
}
void FStreamlineCameraManager::CacheSceneInfo(int64 FrameID, USceneComponent* Component, FCollisionQueryParams& CollisionParams)
{
ensureMsgf(!Component->IsUsingAbsoluteLocation() && !Component->IsUsingAbsoluteRotation(), TEXT("SceneComponents that use absolute location or rotation are not supported by the LateUpdateManager"));
// If a scene proxy is present, cache it
UPrimitiveComponent* PrimitiveComponent = dynamic_cast<UPrimitiveComponent*>(Component);
if (PrimitiveComponent && PrimitiveComponent->SceneProxy)
{
CollisionParams.AddIgnoredComponent(PrimitiveComponent);
FPrimitiveSceneInfo* PrimitiveSceneInfo = PrimitiveComponent->SceneProxy->GetPrimitiveSceneInfo();
if (PrimitiveSceneInfo && PrimitiveSceneInfo->IsIndexValid())
{
PrimitiveComponent->SetRenderCustomDepth(true);
PrimitiveComponent->SetCustomDepthStencilValue(1);
UpdateStates[FrameID % FramesInFlight].Primitives.Emplace(PrimitiveSceneInfo, PrimitiveSceneInfo->GetIndex());
}
}
}
void FStreamlineCameraManager::GatherLateUpdatePrimitives(int64 FrameID, USceneComponent* ParentComponent, FCollisionQueryParams& CollisionParams)
{
CacheSceneInfo(FrameID, ParentComponent, CollisionParams);
TArray<USceneComponent*> Components;
ParentComponent->GetChildrenComponents(true, Components);
for (USceneComponent* Component : Components)
{
if (Component != nullptr)
{
CacheSceneInfo(FrameID, Component, CollisionParams);
}
}
}
void FStreamlineCameraManager::PreRenderViewFamily_RenderThread(FSceneViewFamily& InViewFamily, uint64 FrameID)
{
check(IsInRenderingThread());
if (!DoesFeatureUseCameraData())
{
return;
}
const FSceneView* MainView = InViewFamily.Views[0];
check(MainView);
if (!MainView->bCameraCut && FrameID > 0 && CVarStreamlineReflexPredictiveRendering.GetValueOnRenderThread())
{
const FLateUpdateState& LateUpdateData = UpdateStates[FrameID % FramesInFlight];
if (LateUpdateData.FrameID == FrameID)
{
FVector CurrentViewOrigin = MainView->ViewLocation;
FMatrix CurrentViewRotation = FInverseRotationMatrix(MainView->ViewRotation)
* FMatrix(
FPlane(0, 0, 1, 0),
FPlane(1, 0, 0, 0),
FPlane(0, 1, 0, 0),
FPlane(0, 0, 0, 1));
FMatrix CurrentViewMatrix = FTranslationMatrix(-CurrentViewOrigin) * CurrentViewRotation;
FVector PredictedViewOrigin = LateUpdateData.UpdatedWorldToView.GetOrigin();
FMatrix PredictedViewRotation = LateUpdateData.UpdatedWorldToView.RemoveTranslation();
FMatrix PredictedViewMatrix = FTranslationMatrix(PredictedViewOrigin) * PredictedViewRotation;
const FTransform CurrentTransform = FTransform(CurrentViewMatrix);
const FTransform PredictedTransform = FTransform(PredictedViewMatrix);
FMatrix LateUpdateMatrix = (CurrentTransform * PredictedTransform.Inverse()).ToMatrixWithScale();
LateUpdate_RenderThread(InViewFamily.Scene, FrameID, LateUpdateMatrix);
}
}
else
{
LateUpdate_RenderThread(InViewFamily.Scene, FrameID, FMatrix::Identity);
}
}
void FStreamlineCameraManager::LateUpdate_RenderThread(FSceneInterface* Scene, uint64 FrameID, const FMatrix& LateUpdateTransform)
{
check(IsInRenderingThread());
if (!DoesFeatureUseCameraData())
{
return;
}
FLateUpdateState& LateUpdateData = UpdateStates[FrameID % FramesInFlight];
if (!LateUpdateData.Primitives.Num())
{
return;
}
bool bIndicesHaveChanged = false;
// Apply delta to the cached scene proxies
// Also check whether any primitive indices have changed, in case the scene has been modified in the meantime.
for (auto& PrimitivePair : LateUpdateData.Primitives)
{
FPrimitiveSceneInfo* RetrievedSceneInfo = Scene->GetPrimitiveSceneInfo(PrimitivePair.Value);
FPrimitiveSceneInfo* CachedSceneInfo = PrimitivePair.Key;
// If the retrieved scene info is different than our cached scene info then the scene has changed in the meantime
// and we need to search through the entire scene to make sure it still exists.
if (CachedSceneInfo != RetrievedSceneInfo)
{
bIndicesHaveChanged = true;
break; // No need to continue here, as we are going to brute force the scene primitives below anyway.
}
else if (CachedSceneInfo->Proxy)
{
#if WITH_LATE_UPDATE_MATRIX
if (CVarStreamlineReflexPredictiveRenderingLateUpdateMode.GetValueOnRenderThread() == 1)
{
// TODO: ApplyLateUpdateTransform gets overriden. Needs to be a callback from RendererScene
CachedSceneInfo->Proxy->SetLateUpdateTransform(LateUpdateTransform);
}
#endif
PrimitivePair.Value = -1; // Set the cached index to -1 to indicate that this primitive was already processed
}
}
// Indices have changed, so we need to scan the entire scene for primitives that might still exist
if (bIndicesHaveChanged)
{
int32 Index = 0;
FPrimitiveSceneInfo* RetrievedSceneInfo;
RetrievedSceneInfo = Scene->GetPrimitiveSceneInfo(Index++);
while (RetrievedSceneInfo)
{
if (RetrievedSceneInfo->Proxy && LateUpdateData.Primitives.Contains(RetrievedSceneInfo) &&
LateUpdateData.Primitives[RetrievedSceneInfo] >= 0)
{
#if WITH_LATE_UPDATE_MATRIX
if (CVarStreamlineReflexPredictiveRenderingLateUpdateMode.GetValueOnRenderThread() == 1)
{
// TODO: ApplyLateUpdateTransform gets overriden. Needs to be a callback from RendererScene
RetrievedSceneInfo->Proxy->SetLateUpdateTransform(LateUpdateTransform);
}
#endif
}
RetrievedSceneInfo = Scene->GetPrimitiveSceneInfo(Index++);
}
}
}
void FStreamlineCameraManager::PreRenderView_RenderThread(FSceneView& InView, uint64 FrameID)
{
check(IsInRenderingThread());
if (!DoesFeatureUseCameraData())
{
return;
}
if (!InView.bCameraCut && FrameID > 0 && CVarStreamlineReflexPredictiveRendering.GetValueOnRenderThread())
{
const FLateUpdateState& LateUpdateData = UpdateStates[FrameID % FramesInFlight];
if (LateUpdateData.FrameID == FrameID)
{
InView.UpdateProjectionMatrix(LateUpdateData.UpdatedViewToClip);
// Since we can't set the view matrix directly, set it indirectly, accounting for UE's change in coordinate system
InView.ViewLocation = -LateUpdateData.UpdatedWorldToView.GetOrigin();
InView.ViewRotation = FMatrix(
FPlane(LateUpdateData.UpdatedWorldToView.M[0][2], LateUpdateData.UpdatedWorldToView.M[0][0], LateUpdateData.UpdatedWorldToView.M[0][1], 0.f),
FPlane(LateUpdateData.UpdatedWorldToView.M[1][2], LateUpdateData.UpdatedWorldToView.M[1][0], LateUpdateData.UpdatedWorldToView.M[1][1], 0.f),
FPlane(LateUpdateData.UpdatedWorldToView.M[2][2], LateUpdateData.UpdatedWorldToView.M[2][0], LateUpdateData.UpdatedWorldToView.M[2][1], 0.f),
FPlane(0.f, 0.f, 0.f, 1.f)
).GetTransposed().Rotator();
InView.UpdateViewMatrix();
}
}
}
void FStreamlineCameraManager::PostRenderView_RenderThread(FSceneView& InView, uint64 FrameID)
{
#if !(UE_BUILD_SHIPPING || UE_BUILD_TEST)
if (!CVarStreamlineReflexActorDebug.GetValueOnRenderThread())
{
return;
}
FScopeLock Lock(&RenderThreadDebugMessagesCS);
RenderThreadDebugMessages.Empty();
DumpActor(TEXT("PostRenderView"), InView.ViewActor);
#endif
}
void FStreamlineCameraManager::SetCameraData(FSceneView& InView, uint64 FrameID)
{
check(IsInGameThread());
#if !(UE_BUILD_SHIPPING || UE_BUILD_TEST)
if(false)
{
FScopeLock Lock(&GameThreadDebugMessagesCS);
DumpActor(TEXT("SetupView"), InView.ViewActor);
}
#endif
if (!DoesFeatureUseCameraData())
{
return;
}
// Ignore multiple sets per frame
if (ViewPredictionData[0].FrameID == FrameID)
{
return;
}
const FViewMatrices& CurrentViewMatrices = FViewMatrices(InView.ViewMatrices);
const FVector CurrentTranslation = CurrentViewMatrices.GetPreViewTranslation();
const FMatrix ViewMatrix = CurrentViewMatrices.GetViewMatrix();
const FMatrix WorldToView = FMatrix(
FPlane(ViewMatrix.M[0][0], ViewMatrix.M[0][1], ViewMatrix.M[0][2], ViewMatrix.M[0][3]),
FPlane(ViewMatrix.M[1][0], ViewMatrix.M[1][1], ViewMatrix.M[1][2], ViewMatrix.M[0][3]),
FPlane(ViewMatrix.M[2][0], ViewMatrix.M[2][1], ViewMatrix.M[2][2], ViewMatrix.M[0][3]),
FPlane(CurrentTranslation.X, CurrentTranslation.Y, CurrentTranslation.Z, 1.f));
const FMatrix ProjectionMatrix = CurrentViewMatrices.ComputeProjectionNoAAMatrix();
sl::FrameToken* FrameToken = FStreamlineCoreModule::GetStreamlineRHI()->GetFrameToken(FrameID);
sl::ReflexCameraData cameraData{};
cameraData.prevRenderedWorldToViewMatrix = ToSL(FRHIStreamlineArguments::FMatrix44f(PrevRenderedWorldToView));
cameraData.prevRenderedViewToClipMatrix = ToSL(FRHIStreamlineArguments::FMatrix44f(PrevRenderedViewToClip));
cameraData.worldToViewMatrix = ToSL(FRHIStreamlineArguments::FMatrix44f(WorldToView));
cameraData.viewToClipMatrix = ToSL(FRHIStreamlineArguments::FMatrix44f(ProjectionMatrix));
CALL_SL_FEATURE_FN(sl::kFeatureReflex, slReflexSetCameraData, sl::ViewportHandle(0), *FrameToken, cameraData);
// Predictive Rendering
FViewPredictionData FrameData;
FrameData.FrameID = FrameID;
FrameData.DeltaTime = FApp::GetDeltaTime();
FrameData.Rotation = WorldToView.RemoveTranslation().ToQuat();
FrameData.Translation = CurrentTranslation;
#if (ENGINE_MAJOR_VERSION == 4) || ((ENGINE_MAJOR_VERSION == 5) && (ENGINE_MINOR_VERSION < 3))
float tanHalfFov = InView.ViewMatrices.GetInvProjectionMatrix().M[0][0];
#else
float tanHalfFov = InView.ViewMatrices.GetTanHalfFov().X;
#endif
FrameData.HFov = atan(tanHalfFov);
if (!InView.bCameraCut && CVarStreamlineReflexPredictiveRendering.GetValueOnGameThread() &&
(ViewPredictionData[1].FrameID + 1 == ViewPredictionData[0].FrameID) &&
(ViewPredictionData[0].FrameID + 1 == FrameData.FrameID))
{
FLateUpdateState& LateUpdateData = UpdateStates[FrameID % FramesInFlight];
LateUpdateData.UpdatedWorldToView = WorldToView;
LateUpdateData.UpdatedViewToClip = ProjectionMatrix;
float dtnm1 = ViewPredictionData[0].DeltaTime;
float dt = FrameData.DeltaTime;
float dtnp1 = FrameData.DeltaTime; // TODO: Proper estimate of next frame's deltaTime
// Rotation prediction
FQuat nm2r = ViewPredictionData[1].Rotation;
FQuat nm1r = ViewPredictionData[0].Rotation;
FQuat nr = FrameData.Rotation;
nm1r.EnforceShortestArcWith(nm2r);
nr.EnforceShortestArcWith(nm1r);
FQuat deltaQ1 = nm1r * nm2r.Inverse();
FQuat deltaQ2 = nr * nm1r.Inverse();
// Approximate angular velocity
FVector omegaFuture;
if (CVarStreamlineReflexCameraPredictor.GetValueOnGameThread() == 0)
{
FVector omega1 = 2 / dtnm1 * FVector(deltaQ1.X, deltaQ1.Y, deltaQ1.Z);
FVector omega2 = 2 / dt * FVector(deltaQ2.X, deltaQ2.Y, deltaQ2.Z);
FVector alpha = (omega2 - omega1) / dt;
omegaFuture = omega2 + alpha * dtnp1;
} else {
FVector omega2 = 2 / dt * FVector(deltaQ2.X, deltaQ2.Y, deltaQ2.Z);
omegaFuture = omega2;
}
FQuat deltaQFuture = FQuat::Identity;
const float omega_mag = FMath::Sqrt(omegaFuture.X * omegaFuture.X + omegaFuture.Y * omegaFuture.Y + omegaFuture.Z * omegaFuture.Z);
if (omega_mag > 0.f)
{
float half_theta = omega_mag * dtnp1 / 2.0f;
float s, c;
FMath::SinCos(&s, &c, half_theta);
deltaQFuture = FQuat(omegaFuture.X * s / omega_mag, omegaFuture.Y * s / omega_mag, omegaFuture.Z * s / omega_mag, c);
}
FQuat qFuture = deltaQFuture * nr;
qFuture.Normalize();
FMatrix predictedRotationMatrix = qFuture.ToMatrix();
// Translation prediction
FVector nm2p = ViewPredictionData[1].Translation;
FVector nm1p = ViewPredictionData[0].Translation;
FVector np = FrameData.Translation;
FVector vnm1 = (nm1p - nm2p) / dtnm1;
FVector v = (np - nm1p) / dt;
FVector a = (v - vnm1) / dt;
FVector predictedPos = np + dtnp1 * (v + 0.5f * a * dtnp1);
LateUpdateData.UpdatedWorldToView = FMatrix(
FPlane(predictedRotationMatrix.M[0][0], predictedRotationMatrix.M[0][1], predictedRotationMatrix.M[0][2], 0),
FPlane(predictedRotationMatrix.M[1][0], predictedRotationMatrix.M[1][1], predictedRotationMatrix.M[1][2], 0),
FPlane(predictedRotationMatrix.M[2][0], predictedRotationMatrix.M[2][1], predictedRotationMatrix.M[2][2], 0),
FPlane(predictedPos.X, predictedPos.Y, predictedPos.Z, 1.f));
if (CVarStreamlineReflexClipCorrection.GetValueOnGameThread())
{
const FVector CameraStart = -CurrentTranslation;
const FVector CameraEnd = -LateUpdateData.UpdatedWorldToView.GetOrigin();
FHitResult Hit;
FCollisionShape SphereShape = FCollisionShape::MakeSphere(CVarStreamlineReflexClipRadius.GetValueOnGameThread());
const bool bHit = LateUpdateData.World->SweepSingleByChannel(Hit, CameraStart, CameraEnd, FQuat::Identity, ECC_Camera, SphereShape, LateUpdateData.CollisionParams);
if (bHit)
{
LateUpdateData.UpdatedWorldToView.SetOrigin(-Hit.Location);
}
}
if (InView.IsPerspectiveProjection())
{
// Predict FOV
float nm1f = ViewPredictionData[0].HFov;
float nf = FrameData.HFov;
float vf = (nf - nm1f) / dt;
float predictedHFov = nf + dtnp1 * vf;
float invTanHFov = 1.f / tan(predictedHFov);
// TODO: Predict aspect ratio
float invar = LateUpdateData.UpdatedViewToClip.M[1][1] / LateUpdateData.UpdatedViewToClip.M[0][0];
LateUpdateData.UpdatedViewToClip.M[0][0] = invTanHFov;
LateUpdateData.UpdatedViewToClip.M[1][1] = invar * invTanHFov;
}
PrevRenderedWorldToView = LateUpdateData.UpdatedWorldToView;
PrevRenderedViewToClip = LateUpdateData.UpdatedViewToClip;
}
else
{
PrevRenderedWorldToView = WorldToView;
PrevRenderedViewToClip = ProjectionMatrix;
}
ViewPredictionData[1] = ViewPredictionData[0];
ViewPredictionData[0] = FrameData;
}
bool DoesFeatureUseCameraData()
{
return ForceTagStreamlineBuffers() || IsLatewarpActive();
}

View File

@ -0,0 +1,141 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#pragma once
#include "CoreMinimal.h"
#include "Misc/CoreDelegates.h"
#include "RendererInterface.h"
#include "RHIResources.h"
#include "ScreenPass.h"
#include "PostProcess/PostProcessMaterial.h"
#include "SceneViewExtension.h"
#include "StreamlineReflex.h"
#include "StreamlineReflexCamera.h"
#include "Runtime/Launch/Resources/Version.h"
#include "Misc/EngineVersionComparison.h"
#include "StreamlineShaders.h"
#ifndef DEBUG_STREAMLINE_VIEW_TRACKING
#define DEBUG_STREAMLINE_VIEW_TRACKING (!(UE_BUILD_TEST || UE_BUILD_SHIPPING))
#endif
class FSceneTextureParameters;
class FRHITexture;
class FStreamlineRHI;
class SWindow;
struct FTrackedView
{
FIntRect ViewRect;
FIntRect UnscaledViewRect;
FIntRect UnconstrainedViewRect;
FTextureRHIRef Texture;
uint32_t ViewKey = 0;
};
BEGIN_SHADER_PARAMETER_STRUCT(FSLUIHintTagShaderParameters, )
SHADER_PARAMETER_TEXTURE(Texture2D, BackBuffer)
SHADER_PARAMETER_RDG_TEXTURE(Texture2D, UIColorAndAlpha)
END_SHADER_PARAMETER_STRUCT()
extern void AddStreamlineUIHintTagPass(
FRDGBuilder& GraphBuilder,
bool bTagBackbuffer,
bool bTagUIColorAlpha,
const FIntPoint &BackBufferDimension,
FSLUIHintTagShaderParameters* PassParameters,
uint32 ViewId,
FStreamlineRHI* RHIExtensions,
TArray<FTrackedView>& ViewsInThisBackBuffer,
const FIntRect &WindowClientAreaRect,
bool HasViewIdOverride
);
class FStreamlineViewExtension final : public FSceneViewExtensionBase
{
public:
FStreamlineViewExtension(const FAutoRegister& AutoRegister, FStreamlineRHI* InStreamlineRHI);
~FStreamlineViewExtension();
virtual void SetupViewFamily(FSceneViewFamily& InViewFamily) override;
virtual void SetupView(FSceneViewFamily& InViewFamily, FSceneView& InView) override;
virtual void SetupViewPoint(APlayerController* Player, FMinimalViewInfo& InViewInfo) override;
virtual void BeginRenderViewFamily(FSceneViewFamily& InViewFamily) override;
#if ENGINE_MAJOR_VERSION == 4
typedef FRHICommandListImmediate FGraphBuilderOrCmdList;
#else
typedef FRDGBuilder FGraphBuilderOrCmdList;
#endif
virtual void PreRenderView_RenderThread(FGraphBuilderOrCmdList&, FSceneView& InView) final;
virtual void PreRenderViewFamily_RenderThread(FGraphBuilderOrCmdList&, FSceneViewFamily& InViewFamily) final;
virtual void PostRenderViewFamily_RenderThread(FGraphBuilderOrCmdList&, FSceneViewFamily& InViewFamily) final;
virtual void PostRenderView_RenderThread(FGraphBuilderOrCmdList&, FSceneView& InView) final;
#if UE_VERSION_OLDER_THAN(5,5,0)
virtual void SubscribeToPostProcessingPass(EPostProcessingPass Pass, FAfterPassCallbackDelegateArray& InOutPassCallbacks, bool bIsPassEnabled) override;
#else
virtual void SubscribeToPostProcessingPass(EPostProcessingPass Pass, const FSceneView& InView, FAfterPassCallbackDelegateArray& InOutPassCallbacks, bool bIsPassEnabled) override;
#endif
public:
static void AddTrackedView(const FSceneView& InView);
// that might need to get indexed by the viewfamily or smth
private: static TArray<FTrackedView> TrackedViews;
public:
static bool DebugViewTracking();
static void LogTrackedViews(const TCHAR* CallSite);
static TArray<FTrackedView>& GetTrackedViews()
{
return TrackedViews;
}
void UntrackViewsForBackbuffer(void *InViewport);
static int32 GetViewIndex(const FSceneView* InView)
{
check(InView->Family);
check(InView->Family->Views.Contains(InView));
const TArray<const FSceneView*>& Views = InView->Family->Views;
int32 ViewIndex = 0;
for (; ViewIndex < Views.Num(); ++ ViewIndex)
{
if (Views[ViewIndex] == InView)
{
break;
}
}
check(ViewIndex < InView->Family->Views.Num());
return ViewIndex;
}
private:
FScreenPassTexture PostProcessPassAtEnd_RenderThread(FRDGBuilder& GraphBuilder, const FSceneView& View, const FPostProcessMaterialInputs& InOutInputs);
FStreamlineRHI* StreamlineRHIExtensions;
FStreamlineCameraManager StreamlineCameraManager;
// Frame id, view id
TArray< TTuple<uint64, uint32> > FramesWhereStreamlineConstantsWereSet;
static FDelegateHandle OnPreResizeWindowBackBufferHandle;
static FDelegateHandle OnSlateWindowDestroyedHandle;
};

View File

@ -0,0 +1,79 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#pragma once
#include "Modules/ModuleManager.h"
class FStreamlineViewExtension;
class FStreamlineMaxTickRateHandler;
class FStreamlineLatencyMarkers;
enum class EStreamlineSupport : uint8;
class FStreamlineRHI;
namespace Streamline
{
enum class EStreamlineFeature
{
DLSSG,
Reflex,
DeepDVC,
Latewarp, // TODO see where that is used
NumValues
};
enum class EStreamlineFeatureSupport
{
Supported,
NotSupported,
NotSupportedIncompatibleHardware,
NotSupportedHardwareSchedulingDisabled,
NotSupportedOperatingSystemOutOfDate,
NotSupportedDriverOutOfDate,
NotSupportedIncompatibleRHI,
NumValues
};
};
class IStreamlineModuleInterface : public IModuleInterface
{
public:
virtual EStreamlineSupport QueryStreamlineSupport() const = 0;
virtual Streamline::EStreamlineFeatureSupport QueryDLSSGSupport() const = 0;
virtual Streamline::EStreamlineFeatureSupport QueryLatewarpSupport() const = 0;
virtual Streamline::EStreamlineFeatureSupport QueryDeepDVCSupport() const = 0;
virtual Streamline::EStreamlineFeatureSupport QueryReflexSupport() const = 0;
};
class FStreamlineCoreModule final: public IStreamlineModuleInterface
{
public:
/** IModuleInterface implementation */
virtual void StartupModule();
virtual void ShutdownModule();
virtual EStreamlineSupport QueryStreamlineSupport() const override;
virtual Streamline::EStreamlineFeatureSupport QueryDLSSGSupport() const override;
virtual Streamline::EStreamlineFeatureSupport QueryLatewarpSupport() const override;
virtual Streamline::EStreamlineFeatureSupport QueryDeepDVCSupport() const override;
virtual Streamline::EStreamlineFeatureSupport QueryReflexSupport() const override;
static FStreamlineRHI* GetStreamlineRHI();
private:
TSharedPtr< FStreamlineViewExtension, ESPMode::ThreadSafe> StreamlineViewExtension;
};

View File

@ -0,0 +1,37 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#pragma once
#include "CoreMinimal.h"
#include "StreamlineCore.h"
class FStreamlineRHI;
void RegisterStreamlineDLSSGHooks(FStreamlineRHI* InStreamlineRHI);
void UnregisterStreamlineDLSSGHooks();
bool IsDLSSGActive();
enum class Streamline::EStreamlineFeatureSupport;
extern STREAMLINECORE_API Streamline::EStreamlineFeatureSupport QueryStreamlineDLSSGSupport();
extern STREAMLINECORE_API bool IsStreamlineDLSSGSupported();
extern STREAMLINECORE_API int32 GetStreamlineDLSSGNumFramesToGenerate();
extern STREAMLINECORE_API void GetStreamlineDLSSGMinMaxGeneratedFrames(int32& MinGeneratedFrames, int32& MaxGeneratedFrames);
extern STREAMLINECORE_API void GetStreamlineDLSSGFrameTiming(float& FrameRateInHertz, int32& FramesPresented);
class FRHICommandListImmediate;
struct FRHIStreamlineArguments;
class FSceneViewFamily;
class FRDGBuilder;
void AddStreamlineDLSSGStateRenderPass(FRDGBuilder& GraphBuilder, uint32 ViewID, const FIntRect& SecondaryViewRect);
void BeginRenderViewFamilyDLSSG(FSceneViewFamily& InViewFamily);
void GetDLSSGStatusFromStreamline(bool bQueryOncePerAppLifetimeValues = false);

View File

@ -0,0 +1,33 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#pragma once
#include "CoreMinimal.h"
#include "RenderGraphDefinitions.h"
#include "StreamlineCore.h"
class FStreamlineRHI;
bool IsDeepDVCActive();
enum class Streamline::EStreamlineFeatureSupport;
extern STREAMLINECORE_API Streamline::EStreamlineFeatureSupport QueryStreamlineDeepDVCSupport();
extern STREAMLINECORE_API bool IsStreamlineDeepDVCSupported();
class FRHICommandListImmediate;
struct FRHIStreamlineArguments;
class FSceneViewFamily;
class FRDGBuilder;
void AddStreamlineDeepDVCStateRenderPass(FRDGBuilder& GraphBuilder, uint32 ViewID, const FIntRect& SecondaryViewRect);
void AddStreamlineDeepDVCEvaluateRenderPass(FStreamlineRHI* StreamlineRHIExtensions, FRDGBuilder& GraphBuilder, uint32 ViewID, const FIntRect& SecondaryViewRect, FRDGTextureRef SLSceneColorWithoutHUD);
void BeginRenderViewFamilyDeepDVC(FSceneViewFamily& InViewFamily);
void GetDeepDVCStatusFromStreamline();

View File

@ -0,0 +1,32 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#pragma once
#include "CoreMinimal.h"
#include "StreamlineCore.h"
class FStreamlineRHI;
bool IsLatewarpActive();
enum class Streamline::EStreamlineFeatureSupport;
class FRHICommandListImmediate;
struct FRHIStreamlineArguments;
class FSceneViewFamily;
class FRDGBuilder;
extern STREAMLINECORE_API Streamline::EStreamlineFeatureSupport QueryStreamlineLatewarpSupport();
extern STREAMLINECORE_API bool IsStreamlineLatewarpSupported();
void RegisterStreamlineLatewarpHooks(FStreamlineRHI* InStreamlineRHI);
void UnregisterStreamlineLatewarpHooks();
void AddStreamlineLatewarpStateRenderPass(FRDGBuilder& GraphBuilder, uint32 ViewID, const FIntRect& SecondaryViewRect);

View File

@ -0,0 +1,138 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#pragma once
#include "CoreMinimal.h"
#include "Misc/CoreMisc.h"
#include "Tickable.h"
#include "StreamlineCore.h"
#include "Windows/WindowsApplication.h"
#include "Performance/MaxTickRateHandlerModule.h"
#include "Performance/LatencyMarkerModule.h"
class FStreamlineRHI;
class FStreamlineLatencyBase
{
protected:
static bool bStreamlineReflexSupported;
static bool bStreamlinePCLSupported;
static bool IsStreamlineReflexSupported();
static bool IsStreamlinePCLSupported();
};
class FStreamlineMaxTickRateHandler : public IMaxTickRateHandlerModule, public FStreamlineLatencyBase
{
public:
virtual ~FStreamlineMaxTickRateHandler() {}
virtual void Initialize() override;
virtual void SetEnabled(bool bInEnabled) override;
virtual bool GetEnabled() override;
virtual bool GetAvailable() override;
virtual void SetFlags(uint32 Flags) override;
virtual bool HandleMaxTickRate(float DesiredMaxTickRate) override;
// Inherited via IMaxTickRateHandlerModule
virtual uint32 GetFlags() override;
private:
};
class FStreamlineLatencyMarkers : public ILatencyMarkerModule, public IWindowsMessageHandler, public FStreamlineLatencyBase, public FTickableGameObject
{
float AverageTotalLatencyMs = 0.0f;
float AverageGameLatencyMs = 0.0f;
float AverageRenderLatencyMs = 0.0f;
float AverageSimulationLatencyMs = 0.0f;
float AverageRenderSubmitLatencyMs = 0.0f;
float AveragePresentLatencyMs = 0.0f;
float AverageDriverLatencyMs = 0.0f;
float AverageOSRenderQueueLatencyMs = 0.0f;
float AverageGPURenderLatencyMs = 0.0f;
float RenderSubmitOffsetMs = 0.0f;
float PresentOffsetMs = 0.0f;
float DriverOffsetMs = 0.0f;
float OSRenderQueueOffsetMs = 0.0f;
float GPURenderOffsetMs = 0.0f;
bool bFlashIndicatorDriverControlled = false;
public:
virtual ~FStreamlineLatencyMarkers() {}
virtual void Initialize() override;
virtual void SetEnabled(bool bInEnabled) override;
virtual bool GetEnabled() override;
virtual bool GetAvailable() override;
virtual void SetFlashIndicatorEnabled(bool bInEnabled) override;
virtual bool GetFlashIndicatorEnabled() override;
virtual void Tick(float DeltaTime) override;
virtual bool IsTickable() const override { return true; }
virtual bool IsTickableInEditor() const override { return true; }
virtual bool IsTickableWhenPaused() const override { return true; }
virtual TStatId GetStatId(void) const override { RETURN_QUICK_DECLARE_CYCLE_STAT(FLatencyMarkers, STATGROUP_Tickables); }
virtual void SetInputSampleLatencyMarker(uint64 FrameNumber) override;
virtual void SetSimulationLatencyMarkerStart(uint64 FrameNumber) override;
virtual void SetSimulationLatencyMarkerEnd(uint64 FrameNumber) override;
virtual void SetRenderSubmitLatencyMarkerStart(uint64 FrameNumber) override;
virtual void SetRenderSubmitLatencyMarkerEnd(uint64 FrameNumber) override;
virtual void SetPresentLatencyMarkerStart(uint64 FrameNumber) override;
virtual void SetPresentLatencyMarkerEnd(uint64 FrameNumber) override;
virtual void SetFlashIndicatorLatencyMarker(uint64 FrameNumber) override;
virtual void SetCustomLatencyMarker(uint32 MarkerId, uint64 FrameNumber) override;
virtual float GetTotalLatencyInMs() override { return AverageTotalLatencyMs; }
virtual float GetGameLatencyInMs() override { return AverageGameLatencyMs; } // This is defined as "Game simulation start to driver submission end"
virtual float GetRenderLatencyInMs() override { return AverageRenderLatencyMs; } // This is defined as "OS render queue start to GPU render end"
virtual float GetSimulationLatencyInMs() override { return AverageSimulationLatencyMs; }
virtual float GetRenderSubmitLatencyInMs() override { return AverageRenderSubmitLatencyMs; }
virtual float GetPresentLatencyInMs() override { return AveragePresentLatencyMs; }
virtual float GetDriverLatencyInMs() override { return AverageDriverLatencyMs; }
virtual float GetOSRenderQueueLatencyInMs() override { return AverageOSRenderQueueLatencyMs; }
virtual float GetGPURenderLatencyInMs() override { return AverageGPURenderLatencyMs; }
virtual float GetRenderSubmitOffsetFromFrameStartInMs() override { return RenderSubmitOffsetMs; }
virtual float GetPresentOffsetFromFrameStartInMs() override { return PresentOffsetMs; }
virtual float GetDriverOffsetFromFrameStartInMs() override { return DriverOffsetMs; }
virtual float GetOSRenderQueueOffsetFromFrameStartInMs() override { return OSRenderQueueOffsetMs; }
virtual float GetGPURenderOffsetFromFrameStartInMs() override { return GPURenderOffsetMs; }
// Inherited via IWindowsMessageHandler
virtual bool ProcessMessage(HWND hwnd, uint32 msg, WPARAM wParam, LPARAM lParam, int32& OutResult) override;
};
void RegisterStreamlineReflexHooks();
void UnregisterStreamlineReflexHooks();
enum class Streamline::EStreamlineFeatureSupport;
extern STREAMLINECORE_API Streamline::EStreamlineFeatureSupport QueryStreamlineReflexSupport();
extern STREAMLINECORE_API bool IsStreamlineReflexSupported();

View File

@ -0,0 +1,71 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#pragma once
#include "CoreMinimal.h"
#include "SceneView.h"
#include "Misc/CoreMisc.h"
#include "Tickable.h"
#include "CollisionQueryParams.h"
#include "Windows/WindowsApplication.h"
#include "Performance/MaxTickRateHandlerModule.h"
#include "Performance/LatencyMarkerModule.h"
#include "sl_helpers.h"
#include "sl_reflex.h"
class FStreamlineRHI;
class FStreamlineCameraManager
{
public:
FStreamlineCameraManager() : PrevRenderedWorldToView(FMatrix::Identity), PrevRenderedViewToClip(FMatrix::Identity) {}
void SetCameraData(FSceneView& InView, uint64 FrameID);
void LateUpdate_GameThread(APlayerController* Player, uint64 FrameID);
void PreRenderViewFamily_RenderThread(FSceneViewFamily& InViewFamily, uint64 FrameID);
void PreRenderView_RenderThread(FSceneView& InView, uint64 FrameID);
void PostRenderView_RenderThread(FSceneView& InView, uint64 FrameID);
private:
void CacheSceneInfo(int64 FrameID, USceneComponent* Component, FCollisionQueryParams& CollisionParams);
void GatherLateUpdatePrimitives(int64 FrameID, USceneComponent* ParentComponent, FCollisionQueryParams& CollisionParams);
void LateUpdate_RenderThread(FSceneInterface* Scene, uint64 FrameID, const FMatrix& LateUpdateTransform);
struct FLateUpdateState
{
/** Frame ID for tracking */
int64 FrameID;
/** Primitives that need late update before rendering */
TMap<FPrimitiveSceneInfo*, int32> Primitives;
/** Collision parameters for late update clip prevention */
FCollisionQueryParams CollisionParams;
UWorld* World;
/** Matrix data for predictive rendering */
FMatrix UpdatedWorldToView, UpdatedViewToClip;
};
struct FViewPredictionData
{
int64 FrameID;
float DeltaTime;
FQuat Rotation;
FVector Translation;
float HFov;
};
FMatrix PrevRenderedWorldToView, PrevRenderedViewToClip;
const static size_t FramesInFlight = 3;
FLateUpdateState UpdateStates[FramesInFlight];
FViewPredictionData ViewPredictionData[2];
};
bool DoesFeatureUseCameraData();

View File

@ -0,0 +1,90 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
using System.IO;
using UnrealBuildTool;
public class StreamlineCore : ModuleRules
{
public StreamlineCore(ReadOnlyTargetRules Target) : base(Target)
{
// For UE 4.2x compat
#if !UE_5_0_OR_LATER
if (CppStandard < CppStandardVersion.Cpp17)
{
CppStandard = CppStandardVersion.Cpp17;
}
#endif
// that now gets defined in StreamlineViewExtension.h based on build config and r.Streamline.LogTrackedViews and -sllogviewtracking
//PrivateDefinitions.Add("DEBUG_STREAMLINE_VIEW_TRACKING=1");
PCHUsage = ModuleRules.PCHUsageMode.UseExplicitOrSharedPCHs;
PublicIncludePaths.AddRange(
new string[] {
}
);
PrivateIncludePaths.AddRange(
new string[] {
EngineDirectory + "/Source/Runtime/Renderer/Private",
Path.Combine(ModuleDirectory, "ThirdParty"),
}
);
PublicDependencyModuleNames.AddRange(
new string[]
{
// ... add other public dependencies that you statically link with here ...
}
);
PrivateDependencyModuleNames.AddRange(
new string[]
{
"Core",
"CoreUObject",
"EngineSettings",
"Engine",
"RenderCore",
"Renderer",
"RHI",
"Projects",
"SlateCore",
"Slate",
"Streamline",
"StreamlineRHI",
"StreamlineShaders",
"ApplicationCore",
// ... add private dependencies that you statically link with here ...
}
);
if (Target.bBuildEditor == true)
{
PrivateDependencyModuleNames.Add("Settings");
}
}
}

View File

@ -0,0 +1,156 @@
/** Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.*
* NVIDIA CORPORATION and its licensors retain all intellectual property
* and proprietary rights in and to this software, related documentation
* and any modifications thereto. Any use, reproduction, disclosure or
* distribution of this software and related documentation without an express
* license agreement from NVIDIA CORPORATION is strictly prohibited.*/
#include <windows.h>
#include <TraceLoggingProvider.h>
#include <evntrace.h>
#include <stdlib.h>
#pragma comment(lib, "advapi32.lib")
#pragma comment(lib, "user32.lib")
typedef enum _NVSTATS_LATENCY_MARKER_TYPE
{
NVSTATS_SIMULATION_START = 0,
NVSTATS_SIMULATION_END = 1,
NVSTATS_RENDERSUBMIT_START = 2,
NVSTATS_RENDERSUBMIT_END = 3,
NVSTATS_PRESENT_START = 4,
NVSTATS_PRESENT_END = 5,
NVSTATS_INPUT_SAMPLE = 6,
NVSTATS_TRIGGER_FLASH = 7,
NVSTATS_PC_LATENCY_PING = 8,
} NVSTATS_LATENCY_MARKER_TYPE;
typedef enum _NVSTATS_FLAGS
{
NVSTATS_NO_PRESENT_MARKERS = 0x00000001,
} NVSTATS_FLAGS;
TRACELOGGING_DECLARE_PROVIDER(g_hReflexStatsComponentProvider);
#define NVSTATS_DEFINE() \
TRACELOGGING_DEFINE_PROVIDER( \
g_hReflexStatsComponentProvider, \
"ReflexStatsTraceLoggingProvider", \
(0x0d216f06, 0x82a6, 0x4d49, 0xbc, 0x4f, 0x8f, 0x38, 0xae, 0x56, 0xef, 0xab)); \
UINT g_ReflexStatsWindowMessage = 0; \
WORD g_ReflexStatsVirtualKey = 0; \
HANDLE g_ReflexStatsQuitEvent = NULL; \
HANDLE g_ReflexStatsPingThread = NULL; \
bool g_ReflexStatsEnable = false; \
UINT g_ReflexStatsFlags = 0; \
DWORD ReflexStatsPingThreadProc(LPVOID lpThreadParameter) \
{ \
DWORD minPingInterval = 100 /*ms*/; \
DWORD maxPingInterval = 300 /*ms*/; \
while (WAIT_TIMEOUT == WaitForSingleObject(g_ReflexStatsQuitEvent, minPingInterval + (rand() % (maxPingInterval - minPingInterval)))) \
{ \
if (!g_ReflexStatsEnable) \
{ \
continue; \
} \
HWND hWnd = GetForegroundWindow(); \
if (hWnd) \
{ \
DWORD dwProcessId = 0; \
(void)GetWindowThreadProcessId(hWnd, &dwProcessId); \
if (GetCurrentProcessId() == dwProcessId) \
{ \
if ((g_ReflexStatsVirtualKey == VK_F13) || \
(g_ReflexStatsVirtualKey == VK_F14) || \
(g_ReflexStatsVirtualKey == VK_F15)) \
{ \
TraceLoggingWrite(g_hReflexStatsComponentProvider, "ReflexStatsInput"); \
PostMessageW(hWnd, WM_KEYDOWN, g_ReflexStatsVirtualKey, 0x00000001); \
PostMessageW(hWnd, WM_KEYUP, g_ReflexStatsVirtualKey, 0xC0000001); \
} \
else if (g_ReflexStatsWindowMessage) \
{ \
TraceLoggingWrite(g_hReflexStatsComponentProvider, "ReflexStatsInput"); \
PostMessageW(hWnd, g_ReflexStatsWindowMessage, 0, 0); \
} \
else \
{ \
break; \
} \
} \
} \
} \
return S_OK; \
} \
void WINAPI ReflexStatsComponentProviderCb(LPCGUID, ULONG ControlCode, UCHAR, ULONGLONG, ULONGLONG, PEVENT_FILTER_DESCRIPTOR, PVOID) \
{ \
switch (ControlCode) \
{ \
case EVENT_CONTROL_CODE_ENABLE_PROVIDER: \
g_ReflexStatsEnable = true; \
break; \
case EVENT_CONTROL_CODE_DISABLE_PROVIDER: \
g_ReflexStatsEnable = false; \
break; \
case EVENT_CONTROL_CODE_CAPTURE_STATE: \
TraceLoggingWrite(g_hReflexStatsComponentProvider, "ReflexStatsFlags", TraceLoggingUInt32(g_ReflexStatsFlags, "Flags")); \
break; \
default: \
break; \
} \
}
#define NVSTATS_INIT(vk, flags) \
if (((vk) == 0) && (g_ReflexStatsWindowMessage == 0)) \
{ \
g_ReflexStatsWindowMessage = RegisterWindowMessageW(L"NVIDIA_Reflex_PC_Latency_Ping"); \
} \
g_ReflexStatsVirtualKey = (vk); \
g_ReflexStatsFlags = (flags); \
if (!g_ReflexStatsQuitEvent) \
{ \
g_ReflexStatsQuitEvent = CreateEventW(NULL, 1, 0, NULL); \
} \
if (g_ReflexStatsQuitEvent) \
{ \
TraceLoggingRegisterEx(g_hReflexStatsComponentProvider, ReflexStatsComponentProviderCb, NULL); \
TraceLoggingWrite(g_hReflexStatsComponentProvider, "ReflexStatsInit"); \
if (!g_ReflexStatsPingThread) \
{ \
g_ReflexStatsPingThread = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)ReflexStatsPingThreadProc, NULL, 0, NULL); \
} \
}
#define NVSTATS_MARKER(mrk,frid) TraceLoggingWrite(g_hReflexStatsComponentProvider, "ReflexStatsEvent", TraceLoggingUInt32((mrk), "Marker"), TraceLoggingUInt64((frid), "FrameID"))
#define NVSTATS_MARKER_V2(mrk,frid) TraceLoggingWrite(g_hReflexStatsComponentProvider, "ReflexStatsEventV2", TraceLoggingUInt32((mrk), "Marker"), TraceLoggingUInt64((frid), "FrameID"), TraceLoggingUInt32(g_ReflexStatsFlags, "Flags"))
#define NVSTATS_SHUTDOWN() \
if (g_ReflexStatsPingThread) \
{ \
if (g_ReflexStatsQuitEvent) \
{ \
SetEvent(g_ReflexStatsQuitEvent); \
} \
(void)WaitForSingleObject(g_ReflexStatsPingThread, 1000); \
g_ReflexStatsPingThread = NULL; \
} \
TraceLoggingWrite(g_hReflexStatsComponentProvider, "ReflexStatsShutdown"); \
TraceLoggingUnregister(g_hReflexStatsComponentProvider); \
if (g_ReflexStatsQuitEvent) \
{ \
CloseHandle(g_ReflexStatsQuitEvent); \
g_ReflexStatsQuitEvent = NULL; \
}
#define NVSTATS_IS_PING_MSG_ID(msgId) ((msgId) == g_ReflexStatsWindowMessage)
extern "C" UINT g_ReflexStatsWindowMessage;
extern "C" WORD g_ReflexStatsVirtualKey;
extern "C" HANDLE g_ReflexStatsQuitEvent;
extern "C" HANDLE g_ReflexStatsPingThread;
extern "C" bool g_ReflexStatsEnable;
extern "C" UINT g_ReflexStatsFlags;
DWORD ReflexStatsPingThreadProc(LPVOID lpThreadParameter);
void WINAPI ReflexStatsComponentProviderCb(LPCGUID, ULONG ControlCode, UCHAR, ULONGLONG, ULONGLONG, PEVENT_FILTER_DESCRIPTOR, PVOID);

View File

@ -0,0 +1,371 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#include "StreamlineD3D11RHI.h"
#include "Features/IModularFeatures.h"
#include "GenericPlatform/GenericPlatformFile.h"
#if ENGINE_PROVIDES_ID3D11DYNAMICRHI
#include "ID3D11DynamicRHI.h"
#include "Windows/D3D11ThirdParty.h" // for dxgi1_6.h
#else
#include "D3D11RHIPrivate.h"
THIRD_PARTY_INCLUDES_START
#include "dxgi1_6.h"
THIRD_PARTY_INCLUDES_END
#endif
#include "D3D11Util.h"
#include "HAL/IConsoleManager.h"
#include "Misc/Paths.h"
#include "Modules/ModuleManager.h"
#include "Windows/IDXGISwapchainProvider.h"
#include "StreamlineAPI.h"
#include "StreamlineConversions.h"
#include "StreamlineRHI.h"
#include "sl.h"
#include "sl_dlss_g.h"
// The UE module
DEFINE_LOG_CATEGORY_STATIC(LogStreamlineD3D11RHI, Log, All);
#define LOCTEXT_NAMESPACE "StreamlineD3D11RHI"
class FStreamlineD3D11DXGISwapchainProvider : public IDXGISwapchainProvider
{
public:
FStreamlineD3D11DXGISwapchainProvider(const FStreamlineRHI* InRHI) : StreamlineRHI(InRHI) {}
virtual ~FStreamlineD3D11DXGISwapchainProvider() = default;
#if ENGINE_MAJOR_VERSION == 5 && ENGINE_MINOR_VERSION >= 1
bool SupportsRHI(ERHIInterfaceType RHIType) const override final { return RHIType == ERHIInterfaceType::D3D11; }
#else
bool SupportsRHI(const TCHAR* RHIName) const override final { return FString(RHIName) == FString("D3D11"); }
#endif
#if ENGINE_MAJOR_VERSION == 5 && ENGINE_MINOR_VERSION >= 3
const TCHAR* GetProviderName() const override final { return TEXT("FStreamlineD3D11DXGISwapchainProvider"); }
#else
TCHAR* GetName() const override final
{
static TCHAR Name[] = TEXT("FStreamlineD3D11DXGISwapchainProvider");
return Name;
}
#endif
HRESULT CreateSwapChainForHwnd(IDXGIFactory2* pFactory, IUnknown* pDevice, HWND hWnd, const DXGI_SWAP_CHAIN_DESC1* pDesc, const DXGI_SWAP_CHAIN_FULLSCREEN_DESC* pFullScreenDesc, IDXGIOutput* pRestrictToOutput, IDXGISwapChain1** ppSwapChain) override final
{
HRESULT DXGIResult = E_FAIL;
if (!StreamlineRHI->IsSwapchainHookingAllowed())
{
DXGIResult = pFactory->CreateSwapChainForHwnd(pDevice, hWnd, pDesc, pFullScreenDesc, pRestrictToOutput, ppSwapChain);
}
else
{
// TODO: what happens if a second swapchain is created while PIE is active?
IDXGIFactory2* SLFactory = pFactory;
sl::Result SLResult = SLUpgradeInterface(reinterpret_cast<void**>(&SLFactory));
checkf(SLResult == sl::Result::eOk, TEXT("%s: error upgrading IDXGIFactory (%s)"), ANSI_TO_TCHAR(__FUNCTION__), ANSI_TO_TCHAR(sl::getResultAsStr(SLResult)));
DXGIResult = SLFactory->CreateSwapChainForHwnd(pDevice, hWnd, pDesc, pFullScreenDesc, pRestrictToOutput, ppSwapChain);
}
StreamlineRHI->OnSwapchainCreated(*ppSwapChain);
return DXGIResult;
}
HRESULT CreateSwapChain(IDXGIFactory* pFactory, IUnknown* pDevice, DXGI_SWAP_CHAIN_DESC* pDesc, IDXGISwapChain** ppSwapChain) override final
{
HRESULT DXGIResult = E_FAIL;
if (!StreamlineRHI->IsSwapchainHookingAllowed())
{
DXGIResult = pFactory->CreateSwapChain(pDevice, pDesc, ppSwapChain);
}
else
{
// TODO: what happens if a second swapchain is created while PIE is active?
IDXGIFactory* SLFactory = pFactory;
sl::Result SLResult = SLUpgradeInterface(reinterpret_cast<void**>(&SLFactory));
checkf(SLResult == sl::Result::eOk, TEXT("%s: error upgrading IDXGIFactory (%s)"), ANSI_TO_TCHAR(__FUNCTION__), ANSI_TO_TCHAR(sl::getResultAsStr(SLResult)));
DXGIResult = SLFactory->CreateSwapChain(pDevice, pDesc, ppSwapChain);
}
StreamlineRHI->OnSwapchainCreated(*ppSwapChain);
return DXGIResult;
}
private:
const FStreamlineRHI* StreamlineRHI;
};
class STREAMLINED3D11RHI_API FStreamlineD3D11RHI : public FStreamlineRHI
{
public:
FStreamlineD3D11RHI(const FStreamlineRHICreateArguments& Arguments)
: FStreamlineRHI(Arguments)
#if ENGINE_PROVIDES_ID3D11DYNAMICRHI
, D3D11RHI(CastDynamicRHI<ID3D11DynamicRHI>(Arguments.DynamicRHI))
#else
, D3D11RHI(static_cast<FD3D11DynamicRHI*>(Arguments.DynamicRHI))
#endif
{
UE_LOG(LogStreamlineD3D11RHI, Log, TEXT("%s Enter"), ANSI_TO_TCHAR(__FUNCTION__));
check(D3D11RHI != nullptr);
#if ENGINE_PROVIDES_ID3D11DYNAMICRHI
DXGI_ADAPTER_DESC DXGIAdapterDesc;
D3D11RHI->RHIGetAdapter()->GetDesc(&DXGIAdapterDesc);
#elif ENGINE_MAJOR_VERSION > 4
DXGI_ADAPTER_DESC DXGIAdapterDesc = D3D11RHI->GetAdapter().DXGIAdapterDesc;
#else
ID3D11Device* NativeD3D11Device = static_cast<ID3D11Device*>(D3D11RHI->RHIGetNativeDevice());
check(NativeD3D11Device != nullptr);
TRefCountPtr<IDXGIDevice> DXGIDevice;
NativeD3D11Device->QueryInterface(__uuidof(IDXGIDevice), (void**)DXGIDevice.GetInitReference());
check(DXGIDevice.IsValid());
TRefCountPtr<IDXGIAdapter> DXGIAdapter;
DXGIDevice->GetAdapter(DXGIAdapter.GetInitReference());
check(DXGIAdapter.IsValid());
DXGI_ADAPTER_DESC DXGIAdapterDesc;
DXGIAdapter->GetDesc(&DXGIAdapterDesc);
#endif
AdapterLuid = DXGIAdapterDesc.AdapterLuid;
SLAdapterInfo.deviceLUID = reinterpret_cast<uint8_t*>(&AdapterLuid);
SLAdapterInfo.deviceLUIDSizeInBytes = sizeof(AdapterLuid);
SLAdapterInfo.vkPhysicalDevice = nullptr;
if (IsStreamlineSupported())
{
TTuple<bool, FString> bSwapchainProvider = IsSwapChainProviderRequired(SLAdapterInfo);
if (bSwapchainProvider.Get<0>())
{
UE_LOG(LogStreamlineD3D11RHI, Log, TEXT("Registering FStreamlineD3D11DXGISwapchainProvider as IDXGISwapchainProvider, due to %s"), *bSwapchainProvider.Get<1>());
CustomSwapchainProvider = MakeUnique<FStreamlineD3D11DXGISwapchainProvider>(this);
IModularFeatures::Get().RegisterModularFeature(IDXGISwapchainProvider::GetModularFeatureName(), CustomSwapchainProvider.Get());
bIsSwapchainProviderInstalled = true;
}
else
{
UE_LOG(LogStreamlineD3D11RHI, Log, TEXT("Skip registering IDXGISwapchainProvider, due to %s"), *bSwapchainProvider.Get<1>());
bIsSwapchainProviderInstalled = false;
}
}
UE_LOG(LogStreamlineD3D11RHI, Log, TEXT("%s Leave"), ANSI_TO_TCHAR(__FUNCTION__));
}
virtual ~FStreamlineD3D11RHI()
{
UE_LOG(LogStreamlineD3D11RHI, Log, TEXT("%s Enter"), ANSI_TO_TCHAR(__FUNCTION__));
if (CustomSwapchainProvider.IsValid())
{
UE_LOG(LogStreamlineD3D11RHI, Log, TEXT("Unregistering FStreamlineD3D11DXGISwapchainProvider as IDXGISwapchainProvider"));
IModularFeatures::Get().UnregisterModularFeature(IDXGISwapchainProvider::GetModularFeatureName(), CustomSwapchainProvider.Get());
CustomSwapchainProvider.Reset();
}
UE_LOG(LogStreamlineD3D11RHI, Log, TEXT("%s Leave"), ANSI_TO_TCHAR(__FUNCTION__));
}
virtual void TagTextures(FRHICommandList& CmdList, uint32 InViewID, const TArrayView<const FRHIStreamlineResource> InResources) final
{
#if ENGINE_PROVIDES_ID3D11DYNAMICRHI
void* NativeCmdBuffer = D3D11RHI->RHIGetDeviceContext();
#else
void* NativeCmdBuffer = D3D11RHI->GetDeviceContext();
#endif
for (const FRHIStreamlineResource& Resource : InResources)
{
sl::Resource SLResource;
FMemory::Memzero(SLResource);
if (Resource.Texture && Resource.Texture->IsValid())
{
SLResource.native = Resource.Texture->GetNativeResource();
}
SLResource.type = sl::ResourceType::eTex2d;
// no resource state in d3d11
SLResource.state = 0;
sl::ResourceTag Tag;
Tag.resource = &SLResource;
Tag.type = ToSL(Resource.StreamlineTag);
// TODO: sl::ResourceLifecycle::eValidUntilPreset would be more efficient, are there any textures where it's applicable?
Tag.lifecycle = sl::ResourceLifecycle::eOnlyValidNow;
Tag.extent = ToSL(Resource.ViewRect);
SLsetTag(sl::ViewportHandle(InViewID), &Tag, 1, NativeCmdBuffer);
}
}
virtual void* GetCommandBuffer(FRHICommandList& CmdList, FRHITexture* Texture) override final
{
#if ENGINE_PROVIDES_ID3D11DYNAMICRHI
return D3D11RHI->RHIGetDeviceContext();
#else
return D3D11RHI->GetDeviceContext();
#endif
}
virtual void PostStreamlineFeatureEvaluation(FRHICommandList& CmdList, FRHITexture* Texture) final
{
}
virtual const sl::AdapterInfo* GetAdapterInfo() override final
{
return &SLAdapterInfo;
}
virtual bool IsDLSSGSupportedByRHI() const override final
{
return true;
}
virtual bool IsDeepDVCSupportedByRHI() const override final
{
return true;
}
virtual bool IsLatewarpSupportedByRHI() const override final
{
return true;
}
virtual bool IsReflexSupportedByRHI() const override final
{
return true;
}
virtual void APIErrorHandler(const sl::APIError& LastError) final
{
// Not all DXGI return codes are errors, e.g. DXGI_STATUS_OCCLUDED
if (IsDXGIStatus(LastError.hres))
{
return;
}
UE_LOG(LogStreamlineD3D11RHI, Log, TEXT("DLSSG D3D11/DXGI Error %d"), LastError.hres);
#if ENGINE_MAJOR_VERSION == 5 && ENGINE_MINOR_VERSION >= 3
D3D11RHI->RHIVerifyResult(D3D11RHI->RHIGetDevice(), LastError.hres, "Streamline/DLSSG present", __FILE__, __LINE__);
#else
VerifyD3D11Result(LastError.hres, "Streamline/DLSSG present", __FILE__, __LINE__, static_cast<ID3D11Device*>(GDynamicRHI->RHIGetNativeDevice()));
#endif
}
virtual bool IsStreamlineSwapchainProxy(void* NativeSwapchain) const override final
{
TRefCountPtr<IUnknown> NativeInterface;
const sl::Result Result = SLgetNativeInterface(NativeSwapchain, IID_PPV_ARGS_Helper(NativeInterface.GetInitReference()));
if (Result == sl::Result::eOk)
{
const bool bIsProxy = NativeInterface != NativeSwapchain;
//UE_LOG(LogStreamlineD3D11RHI, Log, TEXT("%s %s NativeInterface=%p NativeSwapchain=%p isProxy=%u "), ANSI_TO_TCHAR(__FUNCTION__), *CurrentThreadName(), NativeSwapchain, NativeInterface.GetReference(), bIsProxy);
return bIsProxy;
}
else
{
UE_LOG(LogStreamlineD3D11RHI, Log, TEXT("SLgetNativeInterface(%p) failed (%d, %s)"), NativeSwapchain, Result, ANSI_TO_TCHAR(sl::getResultAsStr(Result)));
}
return false;
}
protected:
private:
#if ENGINE_PROVIDES_ID3D11DYNAMICRHI
ID3D11DynamicRHI* D3D11RHI = nullptr;
#else
FD3D11DynamicRHI* D3D11RHI = nullptr;
#endif
LUID AdapterLuid;
sl::AdapterInfo SLAdapterInfo;
TUniquePtr<FStreamlineD3D11DXGISwapchainProvider> CustomSwapchainProvider;
};
/** IModuleInterface implementation */
void FStreamlineD3D11RHIModule::StartupModule()
{
auto CVarInitializePlugin = IConsoleManager::Get().FindConsoleVariable(TEXT("r.Streamline.InitializePlugin"));
if (CVarInitializePlugin && !CVarInitializePlugin->GetBool() || (FParse::Param(FCommandLine::Get(), TEXT("slno"))))
{
UE_LOG(LogStreamlineD3D11RHI, Log, TEXT("Initialization of StreamlineD3D11RHI is disabled."));
return;
}
UE_LOG(LogStreamlineD3D11RHI, Log, TEXT("%s Enter"), ANSI_TO_TCHAR(__FUNCTION__));
if(FApp::CanEverRender())
{
if ((GDynamicRHI != nullptr) && (GDynamicRHI->GetName() == FString("D3D11")))
{
FStreamlineRHIModule& StreamlineRHIModule = FModuleManager::LoadModuleChecked<FStreamlineRHIModule>(TEXT("StreamlineRHI"));
if (AreStreamlineFunctionsLoaded())
{
StreamlineRHIModule.InitializeStreamline();
if (IsStreamlineSupported())
{
sl::Result Result = SLsetD3DDevice(GDynamicRHI->RHIGetNativeDevice());
checkf(Result == sl::Result::eOk, TEXT("%s: SLsetD3DDevice failed (%s)"), ANSI_TO_TCHAR(__FUNCTION__), ANSI_TO_TCHAR(sl::getResultAsStr(Result)));
}
}
}
else
{
UE_LOG(LogStreamlineD3D11RHI, Log, TEXT("D3D11RHI is not the active DynamicRHI; skipping of setting up the custom swapchain factory"));
}
}
else
{
UE_LOG(LogStreamlineD3D11RHI, Log, TEXT("This UE instance does not render, skipping initalizing of Streamline and registering of custom DXGI and D3D11 functions"));
}
UE_LOG(LogStreamlineD3D11RHI, Log, TEXT("%s Leave"), ANSI_TO_TCHAR(__FUNCTION__));
}
void FStreamlineD3D11RHIModule::ShutdownModule()
{
auto CVarInitializePlugin = IConsoleManager::Get().FindConsoleVariable(TEXT("r.Streamline.InitializePlugin"));
if (CVarInitializePlugin && !CVarInitializePlugin->GetBool())
{
return;
}
UE_LOG(LogStreamlineD3D11RHI, Log, TEXT("%s Enter"), ANSI_TO_TCHAR(__FUNCTION__));
UE_LOG(LogStreamlineD3D11RHI, Log, TEXT("%s Leave"), ANSI_TO_TCHAR(__FUNCTION__));
}
TUniquePtr<FStreamlineRHI> FStreamlineD3D11RHIModule::CreateStreamlineRHI(const FStreamlineRHICreateArguments& Arguments)
{
TUniquePtr<FStreamlineRHI> Result(new FStreamlineD3D11RHI(Arguments));
return Result;
}
IMPLEMENT_MODULE(FStreamlineD3D11RHIModule, StreamlineD3D11RHI )
#undef LOCTEXT_NAMESPACE

View File

@ -0,0 +1,25 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#pragma once
#include "Modules/ModuleManager.h"
#include "CoreMinimal.h"
#include "StreamlineRHI.h"
class FStreamlineD3D11RHIModule final : public IStreamlineRHIModule
{
public:
virtual TUniquePtr<FStreamlineRHI> CreateStreamlineRHI(const FStreamlineRHICreateArguments& Arguments) override;
/** IModuleInterface implementation */
virtual void StartupModule();
virtual void ShutdownModule();
};

View File

@ -0,0 +1,79 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
using UnrealBuildTool;
using System.IO;
public class StreamlineD3D11RHI : ModuleRules
{
public StreamlineD3D11RHI(ReadOnlyTargetRules Target) : base(Target)
{
PCHUsage = ModuleRules.PCHUsageMode.UseExplicitOrSharedPCHs;
PublicIncludePaths.AddRange(
new string[] {
}
);
PrivateIncludePaths.AddRange(
new string[] {
}
);
PublicDependencyModuleNames.AddRange(
new string[]
{
"StreamlineRHI",
}
);
PrivateDependencyModuleNames.AddRange(
new string[]
{
"Core",
"D3D11RHI",
"Engine",
"RenderCore",
"RHI",
"Streamline",
"StreamlineRHI",
}
);
AddEngineThirdPartyPrivateStaticDependencies(Target, "DX11");
if (ReadOnlyBuildVersion.Current.MajorVersion == 5)
{
PrivateDependencyModuleNames.Add("RHICore");
}
if (ReadOnlyBuildVersion.Current.MajorVersion == 5 && ReadOnlyBuildVersion.Current.MinorVersion >= 1)
{
PrivateDefinitions.Add("ENGINE_PROVIDES_ID3D11DYNAMICRHI=1");
}
else
{
PrivateDefinitions.Add("ENGINE_PROVIDES_ID3D11DYNAMICRHI=0");
PrivateIncludePaths.AddRange(
new string[] {
Path.Combine(EngineDirectory, "Source/Runtime/Windows/D3D11RHI/Private"),
Path.Combine(EngineDirectory, "Source/Runtime/Windows/D3D11RHI/Private/Windows"),
}
);
// required by D3D11RHI private headers to compile before 5.1
AddEngineThirdPartyPrivateStaticDependencies(Target, "IntelMetricsDiscovery");
AddEngineThirdPartyPrivateStaticDependencies(Target, "IntelExtensionsFramework");
AddEngineThirdPartyPrivateStaticDependencies(Target, "NVAftermath");
AddEngineThirdPartyPrivateStaticDependencies(Target, "HLMediaLibrary");
}
}
}

View File

@ -0,0 +1,567 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#include "StreamlineD3D12RHI.h"
#include "Features/IModularFeatures.h"
#include "GenericPlatform/GenericPlatformFile.h"
#include "HAL/PlatformMisc.h"
#include "Runtime/Launch/Resources/Version.h"
#if ENGINE_PROVIDES_ID3D12DYNAMICRHI
#include "ID3D12DynamicRHI.h"
#if (ENGINE_MAJOR_VERSION == 5) && (ENGINE_MINOR_VERSION >= 3)
#include "Windows/WindowsD3D12ThirdParty.h" // for dxgi1_6.h
#else
#include "Windows/D3D12ThirdParty.h" // for dxgi1_6.h
#endif
#else
#include "D3D12RHIPrivate.h"
THIRD_PARTY_INCLUDES_START
#include "dxgi1_6.h"
THIRD_PARTY_INCLUDES_END
#endif
#include "HAL/IConsoleManager.h"
class FD3D12Device;
#ifndef DX_MAX_MSAA_COUNT
#define DX_MAX_MSAA_COUNT 8
#endif
#if !defined(D3D12_RHI_RAYTRACING)
#define D3D12_RHI_RAYTRACING (RHI_RAYTRACING)
#endif
struct FShaderCodePackedResourceCounts;
#if ENGINE_MAJOR_VERSION < 5 || ENGINE_MINOR_VERSION < 3
#include "D3D12Util.h"
#endif
#if ENGINE_PROVIDES_ID3D12DYNAMICRHI && ENGINE_ID3D12DYNAMICRHI_NEEDS_CMDLIST
#define RHICMDLIST_ARG_PASSTHROUGH CmdList,
#else
#define RHICMDLIST_ARG_PASSTHROUGH
#endif
#include "Misc/Paths.h"
#include "Modules/ModuleManager.h"
#include "Windows/IDXGISwapchainProvider.h"
#include "StreamlineAPI.h"
#include "StreamlineConversions.h"
#include "StreamlineRHI.h"
#include "sl.h"
#include "sl_dlss_g.h"
// The UE module
DEFINE_LOG_CATEGORY_STATIC(LogStreamlineD3D12RHI, Log, All);
#define LOCTEXT_NAMESPACE "StreamlineD3D12RHI"
class FStreamlineD3D12DXGISwapchainProvider : public IDXGISwapchainProvider
{
public:
FStreamlineD3D12DXGISwapchainProvider(const FStreamlineRHI* InRHI) : StreamlineRHI(InRHI) {}
virtual ~FStreamlineD3D12DXGISwapchainProvider() = default;
#if ENGINE_MAJOR_VERSION == 5 && ENGINE_MINOR_VERSION >= 1
bool SupportsRHI(ERHIInterfaceType RHIType) const override final { return RHIType == ERHIInterfaceType::D3D12; }
#else
bool SupportsRHI(const TCHAR* RHIName) const override final { return FString(RHIName) == FString("D3D12"); }
#endif
#if ENGINE_MAJOR_VERSION == 5 && ENGINE_MINOR_VERSION >= 3
const TCHAR* GetProviderName() const override final { return TEXT("FStreamlineD3D12DXGISwapchainProvider"); }
#else
TCHAR* GetName() const override final
{
static TCHAR Name[] = TEXT("FStreamlineD3D12DXGISwapchainProvider");
return Name;
}
#endif
HRESULT CreateSwapChainForHwnd(IDXGIFactory2* pFactory, IUnknown* pDevice, HWND hWnd, const DXGI_SWAP_CHAIN_DESC1* pDesc, const DXGI_SWAP_CHAIN_FULLSCREEN_DESC* pFullScreenDesc, IDXGIOutput* pRestrictToOutput, IDXGISwapChain1** ppSwapChain) override final
{
HRESULT DXGIResult = E_FAIL;
if (!StreamlineRHI->IsSwapchainHookingAllowed())
{
DXGIResult = pFactory->CreateSwapChainForHwnd(pDevice, hWnd, pDesc, pFullScreenDesc, pRestrictToOutput, ppSwapChain);
}
else
{
// TODO: what happens if a second swapchain is created while PIE is active?
IDXGIFactory2* SLFactory = pFactory;
sl::Result SLResult = SLUpgradeInterface(reinterpret_cast<void**>(&SLFactory));
checkf(SLResult == sl::Result::eOk, TEXT("%s: error upgrading IDXGIFactory (%s)"), ANSI_TO_TCHAR(__FUNCTION__), ANSI_TO_TCHAR(sl::getResultAsStr(SLResult)));
DXGIResult = SLFactory->CreateSwapChainForHwnd(pDevice, hWnd, pDesc, pFullScreenDesc, pRestrictToOutput, ppSwapChain);
}
StreamlineRHI->OnSwapchainCreated(*ppSwapChain);
return DXGIResult;
}
HRESULT CreateSwapChain(IDXGIFactory* pFactory, IUnknown* pDevice, DXGI_SWAP_CHAIN_DESC* pDesc, IDXGISwapChain** ppSwapChain) override final
{
HRESULT DXGIResult = E_FAIL;
if (!StreamlineRHI->IsSwapchainHookingAllowed())
{
DXGIResult = pFactory->CreateSwapChain(pDevice, pDesc, ppSwapChain);
}
else
{
// TODO: what happens if a second swapchain is created while PIE is active?
IDXGIFactory* SLFactory = pFactory;
sl::Result SLResult = SLUpgradeInterface(reinterpret_cast<void**>(&SLFactory));
checkf(SLResult == sl::Result::eOk, TEXT("%s: error upgrading IDXGIFactory (%s)"), ANSI_TO_TCHAR(__FUNCTION__), ANSI_TO_TCHAR(sl::getResultAsStr(SLResult)));
DXGIResult = SLFactory->CreateSwapChain(pDevice, pDesc, ppSwapChain);
}
StreamlineRHI->OnSwapchainCreated(*ppSwapChain);
return DXGIResult;
}
private:
const FStreamlineRHI* StreamlineRHI;
};
class STREAMLINED3D12RHI_API FStreamlineD3D12RHI : public FStreamlineRHI
{
public:
FStreamlineD3D12RHI(const FStreamlineRHICreateArguments& Arguments)
: FStreamlineRHI(Arguments)
#if ENGINE_PROVIDES_ID3D12DYNAMICRHI
, D3D12RHI(CastDynamicRHI<ID3D12DynamicRHI>(Arguments.DynamicRHI))
#else
, D3D12RHI(static_cast<FD3D12DynamicRHI*>(Arguments.DynamicRHI))
#endif
{
UE_LOG(LogStreamlineD3D12RHI, Log, TEXT("%s Enter"), ANSI_TO_TCHAR(__FUNCTION__));
check(D3D12RHI != nullptr);
#if ENGINE_PROVIDES_ID3D12DYNAMICRHI
TArray<FD3D12MinimalAdapterDesc> AdapterDescs = D3D12RHI->RHIGetAdapterDescs();
check(AdapterDescs.Num() > 0);
if (AdapterDescs.Num() > 1)
{
UE_LOG(LogStreamlineD3D12RHI, Warning, TEXT("%s: found %d adapters, using first one found to query feature availability"), ANSI_TO_TCHAR(__FUNCTION__), AdapterDescs.Num());
}
const DXGI_ADAPTER_DESC& DXGIAdapterDesc = AdapterDescs[0].Desc;
#else
const DXGI_ADAPTER_DESC& DXGIAdapterDesc = D3D12RHI->GetAdapter().GetD3DAdapterDesc();
#endif
AdapterLuid = DXGIAdapterDesc.AdapterLuid;
SLAdapterInfo.deviceLUID = reinterpret_cast<uint8_t*>(&AdapterLuid);
SLAdapterInfo.deviceLUIDSizeInBytes = sizeof(AdapterLuid);
SLAdapterInfo.vkPhysicalDevice = nullptr;
if (IsStreamlineSupported())
{
TTuple<bool, FString> bSwapchainProvider = IsSwapChainProviderRequired(SLAdapterInfo);
if (bSwapchainProvider.Get<0>())
{
UE_LOG(LogStreamlineD3D12RHI, Log, TEXT("Registering FStreamlineD3D12DXGISwapchainProvider as IDXGISwapchainProvider, due to %s"), *bSwapchainProvider.Get<1>());
CustomSwapchainProvider = MakeUnique<FStreamlineD3D12DXGISwapchainProvider>(this);
IModularFeatures::Get().RegisterModularFeature(IDXGISwapchainProvider::GetModularFeatureName(), CustomSwapchainProvider.Get());
bIsSwapchainProviderInstalled = true;
}
else
{
UE_LOG(LogStreamlineD3D12RHI, Log, TEXT("Skip registering IDXGISwapchainProvider, due to %s"), *bSwapchainProvider.Get<1>());
bIsSwapchainProviderInstalled = false;
}
}
UE_LOG(LogStreamlineD3D12RHI, Log, TEXT("%s Leave"), ANSI_TO_TCHAR(__FUNCTION__));
}
virtual ~FStreamlineD3D12RHI()
{
UE_LOG(LogStreamlineD3D12RHI, Log, TEXT("%s Enter"), ANSI_TO_TCHAR(__FUNCTION__));
if (CustomSwapchainProvider.IsValid())
{
UE_LOG(LogStreamlineD3D12RHI, Log, TEXT("Unregistering FStreamlineD3D12DXGISwapchainProvider as IDXGISwapchainProvider"));
IModularFeatures::Get().UnregisterModularFeature(IDXGISwapchainProvider::GetModularFeatureName(), CustomSwapchainProvider.Get());
CustomSwapchainProvider.Reset();
}
UE_LOG(LogStreamlineD3D12RHI, Log, TEXT("%s Leave"), ANSI_TO_TCHAR(__FUNCTION__));
}
virtual void TagTextures(FRHICommandList& CmdList, uint32 InViewID, const TArrayView<const FRHIStreamlineResource> InResources) final
{
if (!InResources.Num()) // IsEmpty is only 5.1+
{
return;
}
#if ENGINE_PROVIDES_ID3D12DYNAMICRHI
ID3D12GraphicsCommandList* NativeCmdList = nullptr;
#else
ID3D12CommandList* NativeCmdList = nullptr;
FD3D12Device* D3D12Device = nullptr;
#endif
for (const FRHIStreamlineResource& Resource : InResources)
{
if (Resource.Texture)
{
// that's inconsistent with below, but...
check(Resource.Texture->IsValid());
#if ENGINE_PROVIDES_ID3D12DYNAMICRHI
NativeCmdList = D3D12RHI->RHIGetGraphicsCommandList(RHICMDLIST_ARG_PASSTHROUGH D3D12RHI->RHIGetResourceDeviceIndex(Resource.Texture));
#else
FD3D12TextureBase* DeviceQueryD3D12Texture = GetD3D12TextureFromRHITexture(Resource.Texture);
D3D12Device = DeviceQueryD3D12Texture->GetParentDevice();
NativeCmdList = D3D12Device->GetDefaultCommandContext().CommandListHandle.CommandList();
#endif
// TODO check that all resources have the same device index. So if that ever changes we might need to split the calls into slTag into per command list/per device index calls.
// for now we take any commandlist
break;
}
}
struct FStreamlineD3D12Transition
{
FRHITexture* Texture;
D3D12_RESOURCE_STATES State;
uint32 SubresouceIndex;
};
auto TransitionResource = [&](const FStreamlineD3D12Transition& Transition)
{
#if ENGINE_PROVIDES_ID3D12DYNAMICRHI
D3D12RHI->RHITransitionResource(CmdList, Transition.Texture, Transition.State, Transition.SubresouceIndex);
#else
const FD3D12TextureBase* D3D12Texture = GetD3D12TextureFromRHITexture(Transition.Texture);
#if ENGINE_MAJOR_VERSION == 5
D3D12RHI->TransitionResource(D3D12Device->GetDefaultCommandContext().CommandListHandle, D3D12Texture->GetResource(), D3D12_RESOURCE_STATE_TBD, Transition.State, Transition.SubresouceIndex, FD3D12DynamicRHI::ETransitionMode::Apply);
#else
D3D12RHI->TransitionResource(D3D12Device->GetDefaultCommandContext().CommandListHandle, D3D12Texture->GetResource(), Transition.State, Transition.SubresouceIndex);
#endif
#endif
};
// adding + 1 to get to the count
constexpr uint32 AllocatorNum = uint32(EStreamlineResource::Last) + 1;
// if all input resources are nullptr, those arrays stay empty below
TArray<FStreamlineD3D12Transition, TInlineAllocator<AllocatorNum>> PreTagTransitions;
TArray<FStreamlineD3D12Transition, TInlineAllocator<AllocatorNum>> PostTagTransitions;
// those get filled in also for null input resource so we can "Streamline nulltag" them
TArray<sl::Resource, TInlineAllocator<AllocatorNum>> SLResources;
TArray<sl::ResourceTag, TInlineAllocator<AllocatorNum>> SLTags;
for(const FRHIStreamlineResource& Resource : InResources)
{
sl::Resource SLResource;
FMemory::Memzero(SLResource);
SLResource.type = sl::ResourceType::eCount;
sl::ResourceTag SLTag;
SLTag.type = ToSL(Resource.StreamlineTag);
// TODO: sl::ResourceLifecycle::eValidUntilPresent would be more efficient, are there any textures where it's applicable?
SLTag.lifecycle = sl::ResourceLifecycle::eOnlyValidNow;
if(Resource.Texture && Resource.Texture->IsValid())
{
SLResource.native = Resource.Texture->GetNativeResource();
SLResource.type = sl::ResourceType::eTex2d;
switch (Resource.StreamlineTag)
{
case EStreamlineResource::Depth:
// note: subresources are in different states, so we add a transition sandwich
// subresource 0 is D3D12_RESOURCE_STATE_DEPTH_READ|D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE
// subresource 1 is D3D12_RESOURCE_STATE_DEPTH_WRITE
PreTagTransitions.Add( { Resource.Texture, D3D12_RESOURCE_STATE_DEPTH_READ | D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, 1 });
SLResource.state = PreTagTransitions.Last().State;
PostTagTransitions.Add({ Resource.Texture, D3D12_RESOURCE_STATE_DEPTH_WRITE, 1 });
break;
case EStreamlineResource::MotionVectors:
SLResource.state = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
break;
case EStreamlineResource::HUDLessColor:
SLResource.state = D3D12_RESOURCE_STATE_COPY_DEST;
break;
case EStreamlineResource::UIColorAndAlpha:
SLResource.state = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
break;
case EStreamlineResource::Backbuffer:
SLResource.state = 0;
case EStreamlineResource::ScalingOutputColor:
SLResource.state = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
break;
case EStreamlineResource::NoWarpMask:
SLResource.state = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
break;
default:
checkf(false, TEXT("Unimplemented tag type (streamline plugin developer should fix)"));
SLResource.state = D3D12_RESOURCE_STATE_COMMON;
break;
}
SLTag.extent = ToSL(Resource.ViewRect);
} // if resource is valid
else
{
// explicitely nulltagging so SL removes it from it's internal book keeping
SLResource.native = nullptr;
}
// order matters here so we first put the resource into our array and then point the sltag at the resource in the array
// Note: we have an TInline Allocator so our memory is pre-allocated so we should not have a re-allocation here (which then would invalidate pointers previously stored)
SLResources.Add(SLResource);
SLTag.resource = &SLResources.Last();
SLTags.Add(SLTag);
}
// transition any resources before
for (FStreamlineD3D12Transition& Transition : PreTagTransitions)
{
TransitionResource(Transition);
}
//flush transitions
// if we nulltag D3D12Device is nullptr and PreTagTransitions is empty
if (PreTagTransitions.Num())
{
#if ENGINE_PROVIDES_ID3D12DYNAMICRHI
// TODO 5.1+ support
#else
D3D12Device->GetDefaultCommandContext().CommandListHandle.FlushResourceBarriers();
#endif
}
// tag all the things
// note that NativeCmdList might be null if we only have resources to "Streamline nulltag"
SLsetTag(sl::ViewportHandle(InViewID), SLTags.GetData(), SLTags.Num(), NativeCmdList);
// then transition back to what was before
for (FStreamlineD3D12Transition& Transition : PostTagTransitions)
{
TransitionResource(Transition);
}
// TODO flush transitions again?
}
virtual void* GetCommandBuffer(FRHICommandList& CmdList, FRHITexture* Texture) override final
{
#if ENGINE_PROVIDES_ID3D12DYNAMICRHI
ID3D12GraphicsCommandList* NativeCmdList = D3D12RHI->RHIGetGraphicsCommandList(RHICMDLIST_ARG_PASSTHROUGH D3D12RHI->RHIGetResourceDeviceIndex(Texture));
#else
FD3D12TextureBase* D3D12Texture = GetD3D12TextureFromRHITexture(Texture);
FD3D12Device* Device = D3D12Texture->GetParentDevice();
ID3D12CommandList* NativeCmdList = Device->GetDefaultCommandContext().CommandListHandle.CommandList();
#endif
return static_cast<void*>(NativeCmdList);
}
void PostStreamlineFeatureEvaluation(FRHICommandList& CmdList, FRHITexture* Texture) final
{
#if ENGINE_PROVIDES_ID3D12DYNAMICRHI
const uint32 DeviceIndex = D3D12RHI->RHIGetResourceDeviceIndex(Texture);
D3D12RHI->RHIFinishExternalComputeWork(RHICMDLIST_ARG_PASSTHROUGH DeviceIndex, D3D12RHI->RHIGetGraphicsCommandList(RHICMDLIST_ARG_PASSTHROUGH DeviceIndex));
#else
FD3D12Device* Device = D3D12RHI->GetAdapter().GetDevice(CmdList.GetGPUMask().ToIndex());
Device->GetCommandContext().StateCache.ForceSetComputeRootSignature();
Device->GetCommandContext().StateCache.GetDescriptorCache()->SetCurrentCommandList(Device->GetCommandContext().CommandListHandle);
#endif
}
virtual const sl::AdapterInfo* GetAdapterInfo() override final
{
return &SLAdapterInfo;
}
virtual bool IsDLSSGSupportedByRHI() const override final
{
return true;
}
virtual bool IsDeepDVCSupportedByRHI() const override final
{
return true;
}
virtual bool IsLatewarpSupportedByRHI() const override final
{
return true;
}
virtual bool IsReflexSupportedByRHI() const override final
{
return true;
}
virtual void APIErrorHandler(const sl::APIError& LastError) final
{
// Not all DXGI return codes are errors, e.g. DXGI_STATUS_OCCLUDED
if (IsDXGIStatus(LastError.hres))
{
return;
}
TCHAR ErrorMessage[1024];
FPlatformMisc::GetSystemErrorMessage(ErrorMessage, 1024, LastError.hres);
UE_LOG(LogStreamlineD3D12RHI, Log, TEXT("DLSSG D3D12/DXGI Error 0x%x (%s)"), LastError.hres, ErrorMessage);
#if ENGINE_MAJOR_VERSION == 5 && ENGINE_MINOR_VERSION >= 3
D3D12RHI->RHIVerifyResult(static_cast<ID3D12Device*>(D3D12RHI->RHIGetNativeDevice()), LastError.hres, "Streamline/DLSSG present", __FILE__, __LINE__);
#else
// that should be set in the 5.1 to 4.27 backport branches that have D3D12RHI_API for VerifyD3D12Result
// and optionally a 5.2 NVRTX branch
#if!defined HAS_VERIFYD3D12_DLL_EXPORT
#define HAS_VERIFYD3D12_DLL_EXPORT (defined (ENGINE_STREAMLINE_VERSION) && ENGINE_STREAMLINE_VERSION >=3 )
#endif
#if IS_MONOLITHIC || HAS_VERIFYD3D12_DLL_EXPORT
VerifyD3D12Result(LastError.hres, "Streamline/DLSSG present", __FILE__, __LINE__,static_cast<ID3D12Device*>(GDynamicRHI->RHIGetNativeDevice()));
#else
using VerifyD3D12ResultPtrType = void (HRESULT, const ANSICHAR* , const ANSICHAR* , uint32 , ID3D12Device*, FString );
VerifyD3D12ResultPtrType* VerifyD3D12ResultPtr = nullptr;
const TCHAR* VerifyD3D12ResultDemangledName = TEXT("?VerifyD3D12Result@D3D12RHI@@YAXJPEBD0IPEAUID3D12Device@@VFString@@@Z");
const FString D3D12RHIBinaryPath = FModuleManager::Get().GetModuleFilename(FName(TEXT("D3D12RHI")));
void*D3D12BinaryDLL = FPlatformProcess::GetDllHandle(*D3D12RHIBinaryPath);
VerifyD3D12ResultPtr = (VerifyD3D12ResultPtrType*)(FWindowsPlatformProcess::GetDllExport(D3D12BinaryDLL, VerifyD3D12ResultDemangledName));
UE_LOG(LogStreamlineD3D12RHI, Log, TEXT("%s = %p"), VerifyD3D12ResultDemangledName, VerifyD3D12ResultPtr);
if (VerifyD3D12ResultPtr)
{
VerifyD3D12ResultPtr(LastError.hres, "Streamline/DLSSG present", __FILE__, __LINE__, static_cast<ID3D12Device*>(GDynamicRHI->RHIGetNativeDevice()), FString());
}
else
{
UE_LOG(LogStreamlineD3D12RHI, Log, TEXT("Please add a D3D12RHI_API to the declaration of VerifyD3D12Result in D3D12Util.h to allow non monolithic builds to pipe handling of this error into the D3D12RHI DX/DXGI error handling system"));
}
#endif
#endif
}
virtual bool IsStreamlineSwapchainProxy(void* NativeSwapchain) const override final
{
TRefCountPtr<IUnknown> NativeInterface;
const sl::Result Result = SLgetNativeInterface(NativeSwapchain, IID_PPV_ARGS_Helper(NativeInterface.GetInitReference()));
if (Result == sl::Result::eOk)
{
const bool bIsProxy = NativeInterface != NativeSwapchain;
//UE_LOG(LogStreamlineD3D12RHI, Log, TEXT("%s %s NativeInterface=%p NativeSwapchain=%p isProxy=%u "), ANSI_TO_TCHAR(__FUNCTION__), *CurrentThreadName(), NativeSwapchain, NativeInterface.GetReference(), bIsProxy);
return bIsProxy;
}
else
{
UE_LOG(LogStreamlineD3D12RHI, Log, TEXT("SLgetNativeInterface(%p) failed (%d, %s)"), NativeSwapchain, Result, ANSI_TO_TCHAR(sl::getResultAsStr(Result)));
}
return false;
}
protected:
private:
#if ENGINE_PROVIDES_ID3D12DYNAMICRHI
ID3D12DynamicRHI* D3D12RHI = nullptr;
#else
FD3D12DynamicRHI* D3D12RHI = nullptr;
#endif
LUID AdapterLuid;
sl::AdapterInfo SLAdapterInfo;
TUniquePtr<FStreamlineD3D12DXGISwapchainProvider> CustomSwapchainProvider;
};
/** IModuleInterface implementation */
void FStreamlineD3D12RHIModule::StartupModule()
{
auto CVarInitializePlugin = IConsoleManager::Get().FindConsoleVariable(TEXT("r.Streamline.InitializePlugin"));
if (CVarInitializePlugin && !CVarInitializePlugin->GetBool() || (FParse::Param(FCommandLine::Get(), TEXT("slno"))))
{
UE_LOG(LogStreamlineD3D12RHI, Log, TEXT("Initialization of StreamlineD3D12RHI is disabled."));
return;
}
UE_LOG(LogStreamlineD3D12RHI, Log, TEXT("%s Enter"), ANSI_TO_TCHAR(__FUNCTION__));
if(FApp::CanEverRender())
{
if ((GDynamicRHI != nullptr) && (GDynamicRHI->GetName() == FString("D3D12")))
{
FStreamlineRHIModule& StreamlineRHIModule = FModuleManager::LoadModuleChecked<FStreamlineRHIModule>(TEXT("StreamlineRHI"));
if (AreStreamlineFunctionsLoaded())
{
StreamlineRHIModule.InitializeStreamline();
if (IsStreamlineSupported())
{
sl::Result Result = SLsetD3DDevice(GDynamicRHI->RHIGetNativeDevice());
checkf(Result == sl::Result::eOk, TEXT("%s: SLsetD3DDevice failed (%s)"), ANSI_TO_TCHAR(__FUNCTION__), ANSI_TO_TCHAR(sl::getResultAsStr(Result)));
}
}
}
else
{
UE_LOG(LogStreamlineD3D12RHI, Log, TEXT("D3D12RHI is not the active DynamicRHI; skipping of setting up the custom swapchain factory"));
}
}
else
{
UE_LOG(LogStreamlineD3D12RHI, Log, TEXT("This UE instance does not render, skipping initalizing of Streamline and registering of custom DXGI and D3D12 functions"));
}
UE_LOG(LogStreamlineD3D12RHI, Log, TEXT("%s Leave"), ANSI_TO_TCHAR(__FUNCTION__));
}
void FStreamlineD3D12RHIModule::ShutdownModule()
{
auto CVarInitializePlugin = IConsoleManager::Get().FindConsoleVariable(TEXT("r.Streamline.InitializePlugin"));
if (CVarInitializePlugin && !CVarInitializePlugin->GetBool())
{
return;
}
UE_LOG(LogStreamlineD3D12RHI, Log, TEXT("%s Enter"), ANSI_TO_TCHAR(__FUNCTION__));
UE_LOG(LogStreamlineD3D12RHI, Log, TEXT("%s Leave"), ANSI_TO_TCHAR(__FUNCTION__));
}
TUniquePtr<FStreamlineRHI> FStreamlineD3D12RHIModule::CreateStreamlineRHI(const FStreamlineRHICreateArguments& Arguments)
{
TUniquePtr<FStreamlineRHI> Result(new FStreamlineD3D12RHI(Arguments));
return Result;
}
IMPLEMENT_MODULE(FStreamlineD3D12RHIModule, StreamlineD3D12RHI )
#undef LOCTEXT_NAMESPACE

View File

@ -0,0 +1,25 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#pragma once
#include "Modules/ModuleManager.h"
#include "CoreMinimal.h"
#include "StreamlineRHI.h"
class FStreamlineD3D12RHIModule final : public IStreamlineRHIModule
{
public:
virtual TUniquePtr<FStreamlineRHI> CreateStreamlineRHI(const FStreamlineRHICreateArguments& Arguments) override;
/** IModuleInterface implementation */
virtual void StartupModule();
virtual void ShutdownModule();
};

View File

@ -0,0 +1,78 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
using UnrealBuildTool;
using System.IO;
public class StreamlineD3D12RHI : ModuleRules
{
public StreamlineD3D12RHI(ReadOnlyTargetRules Target) : base(Target)
{
PCHUsage = ModuleRules.PCHUsageMode.UseExplicitOrSharedPCHs;
PublicIncludePaths.AddRange(
new string[] {
}
);
PrivateIncludePaths.AddRange(
new string[] {
}
);
PublicDependencyModuleNames.AddRange(
new string[]
{
"StreamlineRHI",
}
);
PrivateDependencyModuleNames.AddRange(
new string[]
{
"Core",
"D3D12RHI",
"Engine",
"RenderCore",
"RHI",
"Streamline",
"StreamlineRHI",
}
);
AddEngineThirdPartyPrivateStaticDependencies(Target, "DX12");
if (ReadOnlyBuildVersion.Current.MajorVersion == 5 && ReadOnlyBuildVersion.Current.MinorVersion >= 1)
{
PrivateDefinitions.Add("ENGINE_PROVIDES_ID3D12DYNAMICRHI=1");
if (ReadOnlyBuildVersion.Current.MajorVersion == 5 && ReadOnlyBuildVersion.Current.MinorVersion >= 5)
{
PrivateDefinitions.Add("ENGINE_ID3D12DYNAMICRHI_NEEDS_CMDLIST=1");
}
else
{
PrivateDefinitions.Add("ENGINE_ID3D12DYNAMICRHI_NEEDS_CMDLIST=0");
}
}
else
{
PrivateDefinitions.Add("ENGINE_PROVIDES_ID3D12DYNAMICRHI=0");
PrivateIncludePaths.Add( Path.Combine(EngineDirectory, "Source/Runtime/D3D12RHI/Private") );
}
if (ReadOnlyBuildVersion.Current.MajorVersion == 5)
{
PrivateDependencyModuleNames.Add("RHICore");
}
}
}

View File

@ -0,0 +1,712 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#include "StreamlineAPI.h"
#include "StreamlineRHI.h"
#include "StreamlineRHIPrivate.h"
#include "HAL/IConsoleManager.h"
#include "HAL/PlatformProcess.h"
#include "HAL/ThreadManager.h"
#include "Runtime/Launch/Resources/Version.h"
#include "sl.h"
#include "sl_helpers.h"
#define LOCTEXT_NAMESPACE "FStreamlineRHIModule"
#ifndef LOG_SL_FUNCTIONS
#define LOG_SL_FUNCTIONS (!(UE_BUILD_TEST || UE_BUILD_SHIPPING))
#endif
static bool bLogStreamlineLogFunctions = 0;
static FAutoConsoleVariableRef CVarStreamlineLogFunctions(
TEXT("r.Streamline.LogFunctions"),
bLogStreamlineLogFunctions,
TEXT("Enable/disable whether streamline functions calls and thread are written to the log. Is also set to true with -slloglevel=3 \n"),
ECVF_Default);
static int32 LogStreamlineLogLevel = 0;
static FAutoConsoleVariableRef CVarStreamlieeLogLevel(
TEXT("r.Streamline.LogFunctions.LogLevel"),
LogStreamlineLogLevel,
TEXT("Determines which functions are logged and noe\n")
TEXT("0: Log subset of features, don't log Get Functions\n")
TEXT("1: Log all features, don't log Get functions\n")
TEXT("2: Log all features, log Get functions\n")
,
ECVF_Default);
bool LogStreamlineFunctions()
{
#if LOG_SL_FUNCTIONS
return bLogStreamlineLogFunctions;
#else
return false;
#endif
}
void SetStreamlineAPILoggingEnabled(bool bEnabled)
{
bLogStreamlineLogFunctions = bEnabled;
}
namespace sl
{
inline const char* getFeatureRequirementsSingleBitFlagsAsStr(FeatureRequirementFlags f)
{
switch (f)
{
SL_CASE_STR(FeatureRequirementFlags::eD3D11Supported);
SL_CASE_STR(FeatureRequirementFlags::eD3D12Supported);
SL_CASE_STR(FeatureRequirementFlags::eVulkanSupported);
SL_CASE_STR(FeatureRequirementFlags::eVSyncOffRequired);
SL_CASE_STR(FeatureRequirementFlags::eHardwareSchedulingRequired);
}
return "Unknown";
}
inline FString getFeatureRequirementsFlagsAsStr(FeatureRequirementFlags f)
{
FString Result;
for (uint32 SingleBit = uint32(sl::FeatureRequirementFlags::eHardwareSchedulingRequired); SingleBit != 0; SingleBit >>= 1)
{
sl::FeatureRequirementFlags Flag = sl::FeatureRequirementFlags(SingleBit);
if (Flag == SLBitwiseAnd(f, Flag))
{
Result.Append(getFeatureRequirementsSingleBitFlagsAsStr(Flag));
Result.AppendChar(TCHAR('|'));
}
}
Result.RemoveFromEnd(TEXT("|"));
return Result;
}
}
STREAMLINERHI_API void LogStreamlineFeatureSupport(sl::Feature Feature, const sl::AdapterInfo& Adapter)
{
sl::Result SupportedResult = SLisFeatureSupported(Feature,Adapter);
UE_LOG(LogStreamlineRHI, Log, TEXT("SLisFeatureSupported(%s) -> (%d, %s)"), ANSI_TO_TCHAR(sl::getFeatureAsStr(Feature)), SupportedResult, ANSI_TO_TCHAR(sl::getResultAsStr(SupportedResult)));
// putting this here since the alternative of having FStreamlineRHI compute & store that was annoying since it would mean to have sl.h be a public include
if (SupportedResult != sl::Result::eErrorFeatureMissing)
{
sl::FeatureVersion Version;
sl::Result VersionResult = SLgetFeatureVersion(Feature, Version);
UE_LOG(LogStreamlineRHI, Log, TEXT("SLgetFeatureVersion(%s) versionSL = %s, versionNGX = %s -> (%d, %s)"),
ANSI_TO_TCHAR(sl::getFeatureAsStr(Feature)), ANSI_TO_TCHAR(Version.versionSL.toStr().c_str()), ANSI_TO_TCHAR(Version.versionNGX.toStr().c_str()), VersionResult, ANSI_TO_TCHAR(sl::getResultAsStr(VersionResult)));
sl::FeatureRequirements Requirements;
sl::Result RequirementsResult = SLgetFeatureRequirements(Feature, Requirements);
UE_LOG(LogStreamlineRHI, Log, TEXT("SLgetFeatureRequirements(%s) -> (%d, %s)"), ANSI_TO_TCHAR(sl::getFeatureAsStr(Feature)), RequirementsResult, ANSI_TO_TCHAR(sl::getResultAsStr(RequirementsResult)));
LogStreamlineFeatureRequirements(Feature, Requirements);
}
}
void LogStreamlineFeatureRequirements(sl::Feature Feature, const sl::FeatureRequirements& Requirements)
{
UE_LOG(LogStreamlineRHI, Log, TEXT("FeatureRequirements %s: flags %s"), ANSI_TO_TCHAR(sl::getFeatureAsStr(Feature)), *sl::getFeatureRequirementsFlagsAsStr(Requirements.flags));
UE_LOG(LogStreamlineRHI, Log, TEXT("maxNumCPUThreads : %u"),Requirements.maxNumCPUThreads);
UE_LOG(LogStreamlineRHI, Log, TEXT("maxNumViewports : %u"), Requirements.maxNumViewports);
UE_LOG(LogStreamlineRHI, Log, TEXT("osVersion detected: %s, required: %s"), ANSI_TO_TCHAR(Requirements.osVersionDetected.toStr().c_str()), ANSI_TO_TCHAR(Requirements.osVersionRequired.toStr().c_str()));
UE_LOG(LogStreamlineRHI, Log, TEXT("driverVersion detected: %s, required: %s"), ANSI_TO_TCHAR(Requirements.driverVersionDetected.toStr().c_str()), ANSI_TO_TCHAR(Requirements.driverVersionRequired.toStr().c_str()));
UE_LOG(LogStreamlineRHI, Log, TEXT("requiredTags (%u): {%s}"),
Requirements.numRequiredTags
,*FString::JoinBy(MakeArrayView( Requirements.requiredTags, Requirements.numRequiredTags), TEXT(", "), [](const sl::BufferType& Buffer) { return FString::Printf(TEXT("%s (%u)"), ANSI_TO_TCHAR(sl::getBufferTypeAsStr(Buffer)), Buffer); })
);
}
namespace
{
void* SLInterPoserDLL = nullptr;
PFun_slInit* Ptr_init = nullptr;
PFun_slShutdown* Ptr_shutdown = nullptr;
PFun_slIsFeatureSupported* Ptr_isFeatureSupported = nullptr;
PFun_slIsFeatureLoaded* Ptr_isFeatureLoaded = nullptr;
PFun_slSetFeatureLoaded* Ptr_setFeatureLoaded = nullptr;
PFun_slEvaluateFeature* Ptr_evaluateFeature = nullptr;
PFun_slAllocateResources* Ptr_allocateResources = nullptr;
PFun_slFreeResources* Ptr_freeResources = nullptr;
PFun_slSetTag* Ptr_setTag = nullptr;
PFun_slGetFeatureRequirements* Ptr_getFeatureRequirements = nullptr;
PFun_slGetFeatureVersion* Ptr_getFeatureVersion = nullptr;
PFun_slUpgradeInterface* Ptr_upgradeInterface = nullptr;
PFun_slSetConstants* Ptr_setConstants = nullptr;
PFun_slGetNativeInterface* Ptr_getNativeInterface = nullptr;
PFun_slGetFeatureFunction* Ptr_getFeatureFunction = nullptr;
PFun_slGetNewFrameToken* Ptr_getNewFrameToken = nullptr;
PFun_slSetD3DDevice* Ptr_setD3DDevice = nullptr;
bool bIsStreamlineFunctionPointersLoaded = false;
}
FString CurrentThreadName()
{
const uint32 ThreadId = FPlatformTLS::GetCurrentThreadId();
const FString ThreadName = FThreadManager::GetThreadName(ThreadId);
return FString::Printf(TEXT("%s (tid=%u)"),*ThreadName, ThreadId);
}
void LogStreamlineFunctionCall(sl::Feature Feature, const FString& Function, const FString& Arguments)
{
#if LOG_SL_FUNCTIONS
if (LogStreamlineFunctions())
{
bool bLogFeature = true;
if (LogStreamlineLogLevel < 1)
{
// Look at me, I'm the denoiser nows
if (Feature == sl::kFeaturePCL)
{
bLogFeature = false;
}
if (Feature == sl::kFeatureReflex)
{
bLogFeature = false;
}
}
if (LogStreamlineLogLevel < 2)
{
if (FString(Function).Contains(TEXT("Get")))
{
bLogFeature = false;
}
}
if (bLogFeature)
{
UE_LOG(LogStreamlineRHI, Log, TEXT("%s %s %s"), *Function, *CurrentThreadName(), *Arguments);
}
}
#endif
}
STREAMLINERHI_API bool AreStreamlineFunctionsLoaded()
{
return bIsStreamlineFunctionPointersLoaded;
}
sl::Result SLinit(const sl::Preferences& pref, uint64_t sdkVersion)
{
// we cannot call IsStreamlineSupported since that checks whether bIsStreamlineInitialized is set to true, which it will with the result of this call
check(AreStreamlineFunctionsLoaded());
check(SLInterPoserDLL);
check(Ptr_init != nullptr);
#if LOG_SL_FUNCTIONS
if(LogStreamlineFunctions())
{
UE_LOG(LogStreamlineRHI, Log, TEXT("%s %s sdkVersion=%llx"), ANSI_TO_TCHAR(__FUNCTION__), *CurrentThreadName(), sdkVersion);
}
#endif
return Ptr_init(pref, sdkVersion);
}
sl::Result SLshutdown()
{
check(IsStreamlineSupported());
check(SLInterPoserDLL);
check(Ptr_shutdown != nullptr);
#if LOG_SL_FUNCTIONS
if (LogStreamlineFunctions())
{
UE_LOG(LogStreamlineRHI, Log, TEXT("%s %s"), ANSI_TO_TCHAR(__FUNCTION__), *CurrentThreadName());
}
#endif
return Ptr_shutdown();
}
sl::Result SLisFeatureSupported(sl::Feature feature, const sl::AdapterInfo& adapterInfo)
{
check(IsStreamlineSupported());
check(SLInterPoserDLL);
check(Ptr_isFeatureSupported != nullptr);
#if LOG_SL_FUNCTIONS
if (LogStreamlineFunctions())
{
UE_LOG(LogStreamlineRHI, Log, TEXT("%s %s feature=%s (%u)"), ANSI_TO_TCHAR(__FUNCTION__), *CurrentThreadName(),
ANSI_TO_TCHAR(sl::getFeatureAsStr(feature)), feature );
}
#endif
return Ptr_isFeatureSupported(feature, adapterInfo);
}
sl::Result SLisFeatureLoaded(sl::Feature feature, bool& loaded)
{
check(IsStreamlineSupported());
check(SLInterPoserDLL);
check(Ptr_isFeatureLoaded != nullptr);
#if LOG_SL_FUNCTIONS
if (LogStreamlineFunctions())
{
UE_LOG(LogStreamlineRHI, Log, TEXT("%s %s feature=%s (%u)"), ANSI_TO_TCHAR(__FUNCTION__), *CurrentThreadName(),
ANSI_TO_TCHAR(sl::getFeatureAsStr(feature)), feature );
}
#endif
return Ptr_isFeatureLoaded(feature, loaded);
}
sl::Result SLsetFeatureLoaded(sl::Feature feature, bool loaded)
{
check(IsStreamlineSupported());
check(SLInterPoserDLL);
check(Ptr_setFeatureLoaded != nullptr);
#if LOG_SL_FUNCTIONS
if (LogStreamlineFunctions())
{
UE_LOG(LogStreamlineRHI, Log, TEXT("%s %s feature=%s (%u), loaded=%u"), ANSI_TO_TCHAR(__FUNCTION__), *CurrentThreadName(),
ANSI_TO_TCHAR(sl::getFeatureAsStr(feature)), feature, loaded);
}
#endif
return Ptr_setFeatureLoaded(feature, loaded);
}
#if (SL_VERSION_MAJOR == 2) && (SL_VERSION_MINOR < 5)
namespace sl
{
template<typename T>
T* findStruct(const void* ptr)
{
auto base = static_cast<const BaseStructure*>(ptr);
while (base && base->structType != T::s_structType)
{
base = base->next;
}
return (T*)base;
}
template<typename T>
T* findStruct(void* ptr)
{
auto base = static_cast<const BaseStructure*>(ptr);
while (base && base->structType != T::s_structType)
{
base = base->next;
}
return (T*)base;
}
//! Find a struct of type T, but stop the search if we find a struct of type S
template<typename T, typename S>
T* findStruct(void* ptr)
{
auto base = static_cast<const BaseStructure*>(ptr);
while (base && base->structType != T::s_structType)
{
base = base->next;
// If we find a struct of type S, we know should stop the search
if (base->structType == S::s_structType)
{
return nullptr;
}
}
return (T*)base;
}
template<typename T>
T* findStruct(const void** ptr, uint32_t count)
{
const BaseStructure* base{};
for (uint32_t i = 0; base == nullptr && i < count; i++)
{
base = static_cast<const BaseStructure*>(ptr[i]);
while (base && base->structType != T::s_structType)
{
base = base->next;
}
}
return (T*)base;
}
template<typename T>
bool findStructs(const void** ptr, uint32_t count, std::vector<T*>& structs)
{
for (uint32_t i = 0; i < count; i++)
{
auto base = static_cast<const BaseStructure*>(ptr[i]);
while (base)
{
if (base->structType == T::s_structType)
{
structs.push_back((T*)base);
}
base = base->next;
}
}
return structs.size() > 0;
}
}
#endif
sl::Result SLevaluateFeature(sl::Feature feature, const sl::FrameToken& frame, const sl::BaseStructure** inputs, uint32_t numInputs, sl::CommandBuffer* cmdBuffer)
{
check(IsStreamlineSupported());
check(SLInterPoserDLL);
check(Ptr_evaluateFeature != nullptr);
#if LOG_SL_FUNCTIONS
if (LogStreamlineFunctions())
{
FString ViewportHandle;
auto viewport = sl::findStruct<sl::ViewportHandle>(inputs);
if (viewport)
{
ViewportHandle = FString::FromInt(viewport->operator unsigned int());
}
UE_LOG(LogStreamlineRHI, Log, TEXT("%s %s feature=%s (%u) frame=%u, numInputs=%u, {viewport=%s} "), ANSI_TO_TCHAR(__FUNCTION__), *CurrentThreadName(),
ANSI_TO_TCHAR(sl::getFeatureAsStr(feature)), feature, static_cast<uint32_t>(frame), numInputs, *ViewportHandle);
}
#endif
return Ptr_evaluateFeature(feature, frame, inputs, numInputs, cmdBuffer);
}
sl::Result SLAllocateResources(sl::CommandBuffer* cmdBuffer, sl::Feature feature, const sl::ViewportHandle& viewport)
{
check(IsStreamlineSupported());
check(SLInterPoserDLL);
check(Ptr_allocateResources != nullptr);
#if LOG_SL_FUNCTIONS
if (LogStreamlineFunctions())
{
UE_LOG(LogStreamlineRHI, Log, TEXT("%s %s feature=%s (%u), viewport=%u"), ANSI_TO_TCHAR(__FUNCTION__), *CurrentThreadName(),
ANSI_TO_TCHAR(sl::getFeatureAsStr(feature)), feature, static_cast<uint32_t>(viewport));
}
#endif
return Ptr_allocateResources(cmdBuffer, feature, viewport);
}
sl::Result SLFreeResources(sl::Feature feature, const sl::ViewportHandle& viewport)
{
check(IsStreamlineSupported());
check(SLInterPoserDLL);
check(Ptr_freeResources != nullptr);
#if LOG_SL_FUNCTIONS
if (LogStreamlineFunctions())
{
UE_LOG(LogStreamlineRHI, Log, TEXT("%s %s feature=%s (%u), viewport=%u"), ANSI_TO_TCHAR(__FUNCTION__), *CurrentThreadName(),
ANSI_TO_TCHAR(sl::getFeatureAsStr(feature)), feature, static_cast<uint32_t>(viewport));
}
#endif
return Ptr_freeResources(feature, viewport);
}
sl::Result SLsetTag(const sl::ViewportHandle& viewport, const sl::ResourceTag* tags, uint32_t numTags, sl::CommandBuffer* cmdBuffer)
{
check(IsStreamlineSupported());
check(SLInterPoserDLL);
check(Ptr_setTag != nullptr);
#if LOG_SL_FUNCTIONS
if (LogStreamlineFunctions())
{
if (numTags > 0)
{
const FString Tags = FString::JoinBy(MakeArrayView(tags, numTags), TEXT(", "), [](const sl::ResourceTag Tag)
{ return FString::Printf(TEXT("%s(%u) [left=%u, top=%u, width=%u, height=%u] "), ANSI_TO_TCHAR(sl::getBufferTypeAsStr(Tag.type)), Tag.type,
Tag.extent.left, Tag.extent.top, Tag.extent.width, Tag.extent.height
);
}
);
UE_LOG(LogStreamlineRHI, Log, TEXT("%s %s tags=%s (%u), viewport=%u"), ANSI_TO_TCHAR(__FUNCTION__), *CurrentThreadName(),
*Tags, numTags, static_cast<uint32_t>(viewport));
}
}
#endif
return Ptr_setTag(viewport, tags, numTags, cmdBuffer);
}
sl::Result SLgetFeatureRequirements(sl::Feature feature, sl::FeatureRequirements& requirements)
{
check(IsStreamlineSupported());
check(SLInterPoserDLL);
check(Ptr_getFeatureRequirements != nullptr);
#if LOG_SL_FUNCTIONS
if (LogStreamlineFunctions())
{
UE_LOG(LogStreamlineRHI, Log, TEXT("%s %s feature=%s (%u)"), ANSI_TO_TCHAR(__FUNCTION__), *CurrentThreadName(),
ANSI_TO_TCHAR(sl::getFeatureAsStr(feature)), feature);
}
#endif
return Ptr_getFeatureRequirements(feature, requirements);
}
sl::Result SLgetFeatureVersion(sl::Feature feature, sl::FeatureVersion& version)
{
check(IsStreamlineSupported());
check(SLInterPoserDLL);
check(Ptr_getFeatureVersion != nullptr);
#if LOG_SL_FUNCTIONS
if (LogStreamlineFunctions())
{
UE_LOG(LogStreamlineRHI, Log, TEXT("%s %s feature=%s (%u)"), ANSI_TO_TCHAR(__FUNCTION__), *CurrentThreadName(),
ANSI_TO_TCHAR(sl::getFeatureAsStr(feature)), feature);
}
#endif
return Ptr_getFeatureVersion(feature, version);
}
sl::Result SLUpgradeInterface(void** baseInterface)
{
check(IsStreamlineSupported());
check(SLInterPoserDLL);
check(Ptr_upgradeInterface != nullptr);
#if LOG_SL_FUNCTIONS
if (LogStreamlineFunctions())
{
UE_LOG(LogStreamlineRHI, Log, TEXT("%s %s"), ANSI_TO_TCHAR(__FUNCTION__), *CurrentThreadName());
}
#endif
return Ptr_upgradeInterface(baseInterface);
}
sl::Result SLsetConstants(const sl::Constants& values, const sl::FrameToken& frame, const sl::ViewportHandle& viewport)
{
check(IsStreamlineSupported());
check(SLInterPoserDLL);
check(Ptr_setConstants != nullptr);
#if LOG_SL_FUNCTIONS
if (LogStreamlineFunctions())
{
// cameraAspectRatio and mvecScale (derived from 1/ ViewRect size) are typically different for each view thus useful to debug "same view different constants per frame errors"
UE_LOG(LogStreamlineRHI, Log, TEXT("%s %s frame=%u, values.cameraAspectRatio=%0.2f 1/mvecScale=%0.0f x %0.0f viewport=%u"), ANSI_TO_TCHAR(__FUNCTION__), *CurrentThreadName(), static_cast<uint32_t>(frame),
values.cameraAspectRatio, 1.0f/values.mvecScale.x, 1.0f / values.mvecScale.y,
static_cast<uint32_t>(viewport));
}
#endif
return Ptr_setConstants(values, frame, viewport);
}
sl::Result SLgetNativeInterface(void* proxyInterface, void** baseInterface)
{
check(IsStreamlineSupported());
check(SLInterPoserDLL);
check(Ptr_getNativeInterface != nullptr);
#if LOG_SL_FUNCTIONS
if (LogStreamlineFunctions())
{
UE_LOG(LogStreamlineRHI, Log, TEXT("%s %s proxyInterface=%p"), ANSI_TO_TCHAR(__FUNCTION__), *CurrentThreadName(), proxyInterface);
}
#endif
return Ptr_getNativeInterface(proxyInterface, baseInterface);
}
sl::Result SLgetFeatureFunction(sl::Feature feature, const char* functionName, void*& function)
{
check(IsStreamlineSupported());
check(SLInterPoserDLL);
check(Ptr_getFeatureFunction != nullptr);
#if LOG_SL_FUNCTIONS
if (LogStreamlineFunctions())
{
UE_LOG(LogStreamlineRHI, Log, TEXT("%s %s feature=%s (%u), functionName=%s"), ANSI_TO_TCHAR(__FUNCTION__), *CurrentThreadName(),
ANSI_TO_TCHAR(sl::getFeatureAsStr(feature)), feature, ANSI_TO_TCHAR(functionName));
}
#endif
return Ptr_getFeatureFunction(feature, functionName, function);
}
sl::Result SLgetNewFrameToken(sl::FrameToken*& token, uint32_t* frameIndex)
{
check(IsStreamlineSupported());
check(SLInterPoserDLL);
check(Ptr_getNewFrameToken != nullptr);
#if LOG_SL_FUNCTIONS
if (LogStreamlineFunctions())
{
if (LogStreamlineLogLevel >= 2)
{
if (frameIndex != nullptr)
{
UE_LOG(LogStreamlineRHI, Log, TEXT("%s %s frameIndex=%u"), ANSI_TO_TCHAR(__FUNCTION__), *CurrentThreadName(), *frameIndex);
}
else
{
UE_LOG(LogStreamlineRHI, Log, TEXT("%s %s frameIndex=nullptr"), ANSI_TO_TCHAR(__FUNCTION__), *CurrentThreadName());
}
}
}
#endif
return Ptr_getNewFrameToken(token, frameIndex);
}
sl::Result SLsetD3DDevice(void* d3dDevice)
{
check(IsStreamlineSupported());
check(SLInterPoserDLL);
check(Ptr_setD3DDevice != nullptr);
#if LOG_SL_FUNCTIONS
if (LogStreamlineFunctions())
{
UE_LOG(LogStreamlineRHI, Log, TEXT("%s %s"), ANSI_TO_TCHAR(__FUNCTION__), *CurrentThreadName());
}
#endif
return Ptr_setD3DDevice(d3dDevice);
}
bool LoadStreamlineFunctionPointers(const FString& InterposerBinaryPath)
{
if (!bIsStreamlineFunctionPointersLoaded)
{
UE_LOG(LogStreamlineRHI, Log, TEXT("loading core Streamline functions from Streamline interposer at %s"), *InterposerBinaryPath);
const bool bInterposerBinarySigned = slVerifyEmbeddedSignature(InterposerBinaryPath);
#if UE_BUILD_SHIPPING
if (bInterposerBinarySigned)
#endif
{
SLInterPoserDLL = FPlatformProcess::GetDllHandle(*InterposerBinaryPath);
if (SLInterPoserDLL != nullptr)
{
UE_LOG(LogStreamlineRHI, Log, TEXT("SLInterPoserLibrary = %p"), SLInterPoserDLL);
}
else
{
UE_LOG(LogStreamlineRHI, Error, TEXT("Unable to load SLInterPoserLibrary from %s"), *InterposerBinaryPath);
return false;
}
Ptr_init = (PFun_slInit*)(FWindowsPlatformProcess::GetDllExport(SLInterPoserDLL, TEXT("slInit")));
UE_LOG(LogStreamlineRHI, Log, TEXT("slInit = %p"), Ptr_init);
check(Ptr_init);
Ptr_shutdown = (PFun_slShutdown*)(FWindowsPlatformProcess::GetDllExport(SLInterPoserDLL, TEXT("slShutdown")));
UE_LOG(LogStreamlineRHI, Log, TEXT("slShutdown = %p"), Ptr_shutdown);
check(Ptr_shutdown);
Ptr_isFeatureSupported = (PFun_slIsFeatureSupported*)(FWindowsPlatformProcess::GetDllExport(SLInterPoserDLL, TEXT("slIsFeatureSupported")));
UE_LOG(LogStreamlineRHI, Log, TEXT("slIsFeatureSupported = %p"), Ptr_isFeatureSupported);
check(Ptr_isFeatureSupported);
Ptr_isFeatureLoaded = (PFun_slIsFeatureLoaded*)(FWindowsPlatformProcess::GetDllExport(SLInterPoserDLL, TEXT("slIsFeatureLoaded")));
UE_LOG(LogStreamlineRHI, Log, TEXT("slIsFeatureLoaded = %p"), Ptr_isFeatureLoaded);
check(Ptr_isFeatureLoaded);
Ptr_setFeatureLoaded = (PFun_slSetFeatureLoaded*)(FWindowsPlatformProcess::GetDllExport(SLInterPoserDLL, TEXT("slSetFeatureLoaded")));
UE_LOG(LogStreamlineRHI, Log, TEXT("slSetFeatureLoaded = %p"), Ptr_setFeatureLoaded);
check(Ptr_setFeatureLoaded);
Ptr_evaluateFeature = (PFun_slEvaluateFeature*)(FWindowsPlatformProcess::GetDllExport(SLInterPoserDLL, TEXT("slEvaluateFeature")));
UE_LOG(LogStreamlineRHI, Log, TEXT("slEvaluateFeature = %p"), Ptr_evaluateFeature);
check(Ptr_evaluateFeature);
Ptr_allocateResources = (PFun_slAllocateResources*)(FWindowsPlatformProcess::GetDllExport(SLInterPoserDLL, TEXT("slAllocateResources")));
UE_LOG(LogStreamlineRHI, Log, TEXT("slAllocateResources = %p"), Ptr_allocateResources);
check(Ptr_allocateResources);
Ptr_freeResources = (PFun_slFreeResources*)(FWindowsPlatformProcess::GetDllExport(SLInterPoserDLL, TEXT("slFreeResources")));
UE_LOG(LogStreamlineRHI, Log, TEXT("slFreeResources = %p"), Ptr_freeResources);
check(Ptr_freeResources);
Ptr_setTag = (PFun_slSetTag*)(FWindowsPlatformProcess::GetDllExport(SLInterPoserDLL, TEXT("slSetTag")));
UE_LOG(LogStreamlineRHI, Log, TEXT("slSetTag = %p"), Ptr_setTag);
check(Ptr_setTag);
Ptr_getFeatureRequirements = (PFun_slGetFeatureRequirements*)(FWindowsPlatformProcess::GetDllExport(SLInterPoserDLL, TEXT("slGetFeatureRequirements")));
UE_LOG(LogStreamlineRHI, Log, TEXT("slGetFeatureRequirements = %p"), Ptr_getFeatureRequirements);
check(Ptr_getFeatureRequirements);
Ptr_getFeatureVersion = (PFun_slGetFeatureVersion*)(FWindowsPlatformProcess::GetDllExport(SLInterPoserDLL, TEXT("slGetFeatureVersion")));
UE_LOG(LogStreamlineRHI, Log, TEXT("slGetFeatureVersion = %p"), Ptr_getFeatureVersion);
check(Ptr_getFeatureVersion);
Ptr_upgradeInterface = (PFun_slUpgradeInterface*)(FWindowsPlatformProcess::GetDllExport(SLInterPoserDLL, TEXT("slUpgradeInterface")));
UE_LOG(LogStreamlineRHI, Log, TEXT("slUpgradeInterface = %p"), Ptr_upgradeInterface);
check(Ptr_upgradeInterface);
Ptr_setConstants = (PFun_slSetConstants*)(FWindowsPlatformProcess::GetDllExport(SLInterPoserDLL, TEXT("slSetConstants")));
UE_LOG(LogStreamlineRHI, Log, TEXT("slSetConstants = %p"), Ptr_setConstants);
check(Ptr_setConstants);
Ptr_getNativeInterface = (PFun_slGetNativeInterface*)(FWindowsPlatformProcess::GetDllExport(SLInterPoserDLL, TEXT("slGetNativeInterface")));
UE_LOG(LogStreamlineRHI, Log, TEXT("slGetNativeInterface = %p"), Ptr_getNativeInterface);
check(Ptr_getNativeInterface);
Ptr_getFeatureFunction = (PFun_slGetFeatureFunction*)(FWindowsPlatformProcess::GetDllExport(SLInterPoserDLL, TEXT("slGetFeatureFunction")));
UE_LOG(LogStreamlineRHI, Log, TEXT("slGetFeatureFunction = %p"), Ptr_getFeatureFunction);
check(Ptr_getFeatureFunction);
Ptr_getNewFrameToken = (PFun_slGetNewFrameToken*)(FWindowsPlatformProcess::GetDllExport(SLInterPoserDLL, TEXT("slGetNewFrameToken")));
UE_LOG(LogStreamlineRHI, Log, TEXT("slGetNewFrameToken = %p"), Ptr_getNewFrameToken);
check(Ptr_getNewFrameToken);
Ptr_setD3DDevice = (PFun_slSetD3DDevice*)(FWindowsPlatformProcess::GetDllExport(SLInterPoserDLL, TEXT("slSetD3DDevice")));
UE_LOG(LogStreamlineRHI, Log, TEXT("slSetD3DDevice = %p"), Ptr_setD3DDevice);
check(Ptr_setD3DDevice);
bIsStreamlineFunctionPointersLoaded = true;
}
}
return bIsStreamlineFunctionPointersLoaded;
}
#undef LOCTEXT_NAMESPACE

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,21 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#pragma once
#include "CoreMinimal.h"
DECLARE_LOG_CATEGORY_EXTERN(LogStreamlineRHI, Log, All);
bool slVerifyEmbeddedSignature(const FString& PathToBinary);
bool LoadStreamlineFunctionPointers(const FString& InterposerBinaryPath);
void SetStreamlineAPILoggingEnabled(bool bEnabled);

View File

@ -0,0 +1,479 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#include "StreamlineRHIPrivate.h"
#include "Misc/Paths.h"
#define _UNICODE 1
#define UNICODE 1
#include <tchar.h>
//#include <stdio.h>
//#include <stdlib.h>
#include "Windows/AllowWindowsPlatformTypes.h"
THIRD_PARTY_INCLUDES_START
#pragma push_macro("NTDDI_VERSION")
#if (NTDDI_VERSION < NTDDI_WIN8)
#undef NTDDI_VERSION
#define NTDDI_VERSION NTDDI_WIN8
//this header cannot be directly imported because of current _WIN32_WINNT less then 0x0602 (the value assigned in UEBuildWindows.cs:139)
//the macro code added from couple interface declarations, it doesn't affect to any imported function
#include <shobjidl.h>
#else
#include <shobjidl.h>
#endif
#include <windows.h>
#include <Softpub.h>
#include <wincrypt.h>
#include <wintrust.h>
#include <inttypes.h>
#pragma pop_macro("NTDDI_VERSION")
THIRD_PARTY_INCLUDES_END
#define GetProc(hModule, procName, proc) (((NULL == proc) && (NULL == (*((FARPROC*)&proc) = GetProcAddress(hModule, procName)))) ? FALSE : TRUE)
typedef BOOL(WINAPI* PfnCryptMsgClose)(IN HCRYPTMSG hCryptMsg);
static PfnCryptMsgClose pfnCryptMsgClose = NULL;
typedef BOOL(WINAPI* PfnCertCloseStore)(IN HCERTSTORE hCertStore, DWORD dwFlags);
static PfnCertCloseStore pfnCertCloseStore = NULL;
typedef HCERTSTORE(WINAPI* PfnCertOpenStore)(
IN LPCSTR lpszStoreProvider,
IN DWORD dwEncodingType,
IN HCRYPTPROV_LEGACY hCryptProv,
IN DWORD dwFlags,
IN const void* pvPara
);
static PfnCertOpenStore pfnCertOpenStore = NULL;
typedef BOOL(WINAPI* PfnCertFreeCertificateContext)(IN PCCERT_CONTEXT pCertContext);
static PfnCertFreeCertificateContext pfnCertFreeCertificateContext = NULL;
typedef PCCERT_CONTEXT(WINAPI* PfnCertFindCertificateInStore)(
IN HCERTSTORE hCertStore,
IN DWORD dwCertEncodingType,
IN DWORD dwFindFlags,
IN DWORD dwFindType,
IN const void* pvFindPara,
IN PCCERT_CONTEXT pPrevCertContext
);
static PfnCertFindCertificateInStore pfnCertFindCertificateInStore = NULL;
typedef BOOL(WINAPI* PfnCryptMsgGetParam)(
IN HCRYPTMSG hCryptMsg,
IN DWORD dwParamType,
IN DWORD dwIndex,
OUT void* pvData,
IN OUT DWORD* pcbData
);
static PfnCryptMsgGetParam pfnCryptMsgGetParam = NULL;
typedef HCRYPTMSG(WINAPI* PfnCryptMsgOpenToDecode)(
IN DWORD dwMsgEncodingType,
IN DWORD dwFlags,
IN DWORD dwMsgType,
IN HCRYPTPROV_LEGACY hCryptProv,
IN PCERT_INFO pRecipientInfo,
IN PCMSG_STREAM_INFO pStreamInfo
);
static PfnCryptMsgOpenToDecode pfnCryptMsgOpenToDecode = NULL;
typedef BOOL(WINAPI* PfnCryptMsgUpdate)(
IN HCRYPTMSG hCryptMsg,
IN const BYTE* pbData,
IN DWORD cbData,
IN BOOL fFinal
);
static PfnCryptMsgUpdate pfnCryptMsgUpdate = NULL;
typedef BOOL(WINAPI* PfnCryptQueryObject)(
DWORD dwObjectType,
const void* pvObject,
DWORD dwExpectedContentTypeFlags,
DWORD dwExpectedFormatTypeFlags,
DWORD dwFlags,
DWORD* pdwMsgAndCertEncodingType,
DWORD* pdwContentType,
DWORD* pdwFormatType,
HCERTSTORE* phCertStore,
HCRYPTMSG* phMsg,
const void** ppvContext
);
static PfnCryptQueryObject pfnCryptQueryObject = NULL;
typedef BOOL(WINAPI* PfnCryptDecodeObjectEx)(
IN DWORD dwCertEncodingType,
IN LPCSTR lpszStructType,
IN const BYTE* pbEncoded,
IN DWORD cbEncoded,
IN DWORD dwFlags,
IN PCRYPT_DECODE_PARA pDecodePara,
OUT void* pvStructInfo,
IN OUT DWORD* pcbStructInfo
);
static PfnCryptDecodeObjectEx pfnCryptDecodeObjectEx = NULL;
typedef LONG(WINAPI* PfnWinVerifyTrust)(
IN HWND hwnd,
IN GUID* pgActionID,
IN LPVOID pWVTData
);
static PfnWinVerifyTrust pfnWinVerifyTrust = NULL;
bool isSignedByNVIDIA(const wchar_t* pathToFile)
{
bool valid = false;
// Now let's make sure this is actually signed by NVIDIA
DWORD dwEncoding, dwContentType, dwFormatType;
HCERTSTORE hStore = NULL;
HCRYPTMSG hMsg = NULL;
PCMSG_SIGNER_INFO pSignerInfo = NULL;
DWORD dwSignerInfo;
if (!pfnCertOpenStore)
{
// We only support Win10+ so we can search for module in system32 directly
auto hModCrypt32 = LoadLibraryExW(L"crypt32.dll", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32);
if (!hModCrypt32 ||
!GetProc(hModCrypt32, "CryptMsgClose", pfnCryptMsgClose) ||
!GetProc(hModCrypt32, "CertOpenStore", pfnCertOpenStore) ||
!GetProc(hModCrypt32, "CertCloseStore", pfnCertCloseStore) ||
!GetProc(hModCrypt32, "CertFreeCertificateContext", pfnCertFreeCertificateContext) ||
!GetProc(hModCrypt32, "CertFindCertificateInStore", pfnCertFindCertificateInStore) ||
!GetProc(hModCrypt32, "CryptMsgGetParam", pfnCryptMsgGetParam) ||
!GetProc(hModCrypt32, "CryptMsgUpdate", pfnCryptMsgUpdate) ||
!GetProc(hModCrypt32, "CryptMsgOpenToDecode", pfnCryptMsgOpenToDecode) ||
!GetProc(hModCrypt32, "CryptQueryObject", pfnCryptQueryObject) ||
!GetProc(hModCrypt32, "CryptDecodeObjectEx", pfnCryptDecodeObjectEx))
{
UE_LOG(LogStreamlineRHI, Log, TEXT("Unable to obtain crypt32.dll functionality - cannot validate digital signature on SL plugins."));
return false;
}
}
// Get message handle and store handle from the signed file.
auto bResult = pfnCryptQueryObject(CERT_QUERY_OBJECT_FILE,
pathToFile,
CERT_QUERY_CONTENT_FLAG_PKCS7_SIGNED_EMBED,
CERT_QUERY_FORMAT_FLAG_BINARY,
0,
&dwEncoding,
&dwContentType,
&dwFormatType,
&hStore,
&hMsg,
NULL);
if (!bResult)
{
return false;
}
// Get signer information size.
bResult = pfnCryptMsgGetParam(hMsg,
CMSG_SIGNER_INFO_PARAM,
0,
NULL,
&dwSignerInfo);
if (!bResult)
{
return false;
}
// Allocate memory for signer information.
pSignerInfo = (PCMSG_SIGNER_INFO)LocalAlloc(LPTR, dwSignerInfo);
if (!pSignerInfo)
{
return false;
}
// Get Signer Information.
bResult = pfnCryptMsgGetParam(hMsg,
CMSG_SIGNER_INFO_PARAM,
0,
(PVOID)pSignerInfo,
&dwSignerInfo);
if (!bResult)
{
LocalFree(pSignerInfo);
return false;
}
// Look for nested signature
constexpr const char* kOID_NESTED_SIGNATURE = "1.3.6.1.4.1.311.2.4.1";
for (DWORD i = 0; i < pSignerInfo->UnauthAttrs.cAttr; i++)
{
if (strcmp(kOID_NESTED_SIGNATURE, pSignerInfo->UnauthAttrs.rgAttr[i].pszObjId) == 0)
{
HCRYPTMSG hMsg2 = pfnCryptMsgOpenToDecode(X509_ASN_ENCODING | PKCS_7_ASN_ENCODING, 0, 0, NULL, NULL, NULL);
if (hMsg2)
{
if (pfnCryptMsgUpdate(hMsg2, pSignerInfo->UnauthAttrs.rgAttr[i].rgValue->pbData, pSignerInfo->UnauthAttrs.rgAttr[i].rgValue->cbData,TRUE))
{
/*DWORD*/ dwSignerInfo = 0;
pfnCryptMsgGetParam(hMsg2, CMSG_SIGNER_INFO_PARAM, 0, NULL, &dwSignerInfo);
if (dwSignerInfo != 0)
{
PCMSG_SIGNER_INFO pSignerInfo2 = (PCMSG_SIGNER_INFO)LocalAlloc(LPTR, dwSignerInfo);
if (pSignerInfo2)
{
if (pfnCryptMsgGetParam(hMsg2, CMSG_SIGNER_INFO_PARAM, 0, (PVOID)pSignerInfo2, &dwSignerInfo))
{
CRYPT_DATA_BLOB c7Data;
c7Data.pbData = pSignerInfo->UnauthAttrs.rgAttr[i].rgValue->pbData;
c7Data.cbData = pSignerInfo->UnauthAttrs.rgAttr[i].rgValue->cbData;
auto hStore2 = pfnCertOpenStore(CERT_STORE_PROV_PKCS7, X509_ASN_ENCODING | PKCS_7_ASN_ENCODING, NULL, 0, &c7Data);
if (!hStore2)
{
LocalFree(pSignerInfo2);
return false;
}
CERT_INFO CertInfo{};
PCCERT_CONTEXT pCertContext = NULL;
// Search for the signer certificate in the temporary certificate store.
CertInfo.Issuer = pSignerInfo2->Issuer;
CertInfo.SerialNumber = pSignerInfo2->SerialNumber;
pCertContext = pfnCertFindCertificateInStore(hStore2,
(X509_ASN_ENCODING | PKCS_7_ASN_ENCODING),
0,
CERT_FIND_SUBJECT_CERT,
(PVOID)&CertInfo,
NULL);
if (!pCertContext)
{
LocalFree(pSignerInfo2);
pfnCertCloseStore(hStore2, CERT_CLOSE_STORE_FORCE_FLAG);
return false;
}
void* decodedPublicKey{};
DWORD decodedPublicLength{};
if (pfnCryptDecodeObjectEx((PKCS_7_ASN_ENCODING | X509_ASN_ENCODING),
CNG_RSA_PUBLIC_KEY_BLOB,
pCertContext->pCertInfo->SubjectPublicKeyInfo.PublicKey.pbData,
pCertContext->pCertInfo->SubjectPublicKeyInfo.PublicKey.cbData,
CRYPT_ENCODE_ALLOC_FLAG,
NULL,
&decodedPublicKey,
&decodedPublicLength))
{
static uint8_t s_rsaStreamlinePublicKey[] =
{
0x52, 0x53, 0x41, 0x31, 0x00, 0x0c, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x80, 0x01, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0xc1, 0x8e, 0x40, 0xc3, 0xf5,
0xa7, 0x01, 0x9a, 0x37, 0x6b, 0x47, 0xa8, 0x58, 0xe8, 0xbe, 0xe3, 0x55, 0x0a, 0xee, 0x0f, 0x0d,
0x32, 0xaa, 0x12, 0xf9, 0x56, 0x7f, 0x5d, 0xfd, 0x82, 0x09, 0x33, 0x21, 0x42, 0xf2, 0xe8, 0x74,
0x98, 0x51, 0xb3, 0x88, 0x74, 0xcd, 0x00, 0x6e, 0xb1, 0x08, 0x10, 0x4b, 0xf1, 0xda, 0xd6, 0x97,
0x87, 0xd4, 0x9c, 0xb1, 0x13, 0xa8, 0xa2, 0x86, 0x15, 0x0e, 0xc1, 0xa5, 0x9c, 0xe5, 0x90, 0x9b,
0xbe, 0x69, 0xdc, 0x6a, 0x82, 0xbe, 0xb4, 0x4b, 0x4b, 0xfa, 0x95, 0x8e, 0xc1, 0xfc, 0x2b, 0x61,
0x95, 0xd1, 0x91, 0xed, 0xeb, 0x87, 0xe7, 0x09, 0x84, 0x05, 0x41, 0x03, 0xb0, 0x2d, 0xd4, 0x39,
0x7f, 0x62, 0x06, 0x56, 0x33, 0x93, 0x7e, 0x77, 0x54, 0x06, 0x77, 0x2b, 0x75, 0x05, 0xbc, 0xeb,
0x98, 0xea, 0xc0, 0xa2, 0xca, 0x98, 0x86, 0x0f, 0x10, 0x65, 0xde, 0x19, 0x2c, 0xa6, 0x1e, 0x93,
0xb0, 0x92, 0x5d, 0x5f, 0x5b, 0x6f, 0x79, 0x6d, 0x2c, 0x76, 0xa6, 0x67, 0x50, 0xaa, 0x8f, 0xc2,
0x4c, 0xf1, 0x08, 0xf7, 0xc0, 0x27, 0x29, 0xf0, 0x68, 0xf4, 0x64, 0x00, 0x1c, 0xb6, 0x28, 0x1e,
0x25, 0xb8, 0xf3, 0x8a, 0xd1, 0x6e, 0x65, 0xa3, 0x61, 0x9d, 0xf8, 0xca, 0x4a, 0x41, 0x60, 0x80,
0x62, 0xdf, 0x41, 0xa4, 0x8b, 0xdc, 0x97, 0xee, 0xeb, 0x64, 0x6f, 0xe4, 0x8f, 0x4b, 0xdf, 0x24,
0x01, 0x80, 0xd9, 0xb4, 0x0a, 0xec, 0x0d, 0x3e, 0xb7, 0x76, 0xba, 0xe9, 0xe7, 0xde, 0x07, 0xdd,
0x30, 0xc8, 0x4a, 0x14, 0x79, 0xec, 0x15, 0xed, 0x5c, 0xc6, 0xcc, 0xd4, 0xe6, 0x06, 0x3c, 0x42,
0x92, 0x10, 0xf7, 0x7c, 0x80, 0x1e, 0x78, 0xd3, 0xb4, 0x9f, 0xc2, 0x3b, 0xa8, 0x7b, 0xa0, 0xe3,
0x0c, 0xd9, 0xad, 0x2e, 0x09, 0x72, 0xe2, 0x8f, 0x54, 0x28, 0x87, 0x3c, 0xba, 0x7c, 0x97, 0x80,
0xdc, 0x09, 0xb5, 0x12, 0x34, 0x78, 0x9a, 0x26, 0xd0, 0xa3, 0xa7, 0xa7, 0x1b, 0x25, 0x19, 0xe5,
0x6e, 0xbe, 0xd7, 0x5a, 0x91, 0x32, 0xc4, 0xa9, 0x2f, 0xcc, 0xd5, 0x82, 0x4b, 0x5b, 0x9f, 0xad,
0xf3, 0x2f, 0xed, 0x4f, 0x33, 0xe1, 0x50, 0x33, 0xd6, 0x90, 0x79, 0x22, 0xe5, 0x1c, 0xc7, 0x35,
0xe7, 0x58, 0xe6, 0xb4, 0x8b, 0xc4, 0x28, 0x20, 0xec, 0xca, 0x70, 0xbb, 0x02, 0x1b, 0x48, 0xd8,
0x84, 0x51, 0x24, 0x33, 0x2a, 0x08, 0xb1, 0x15, 0x4e, 0xbc, 0x88, 0xa5, 0xe1, 0x37, 0x76, 0x70,
0xe6, 0xdf, 0x3f, 0x73, 0xfd, 0x0d, 0x8a, 0xd9, 0x0d, 0xa5, 0x35, 0xb2, 0xb4, 0x01, 0x42, 0x96,
0xc4, 0xaa, 0x1c, 0xeb, 0x68, 0x62, 0x36, 0xbf, 0xef, 0x5e, 0x2a, 0x3d, 0x18, 0x91, 0x8b, 0x92,
0x0a, 0x1e, 0xce, 0x98, 0x5b, 0x7b, 0x64, 0x42, 0x09, 0xb0, 0x1d
};
valid = decodedPublicLength == sizeof(s_rsaStreamlinePublicKey) && memcmp(s_rsaStreamlinePublicKey, decodedPublicKey, decodedPublicLength) == 0;
LocalFree(decodedPublicKey);
}
pfnCertFreeCertificateContext(pCertContext);
pfnCertCloseStore(hStore2, CERT_CLOSE_STORE_FORCE_FLAG);
}
LocalFree(pSignerInfo2);
}
}
}
pfnCryptMsgClose(hMsg2);
}
break;
}
}
LocalFree(pSignerInfo);
pfnCryptMsgClose(hMsg);
pfnCertCloseStore(hStore, CERT_CLOSE_STORE_FORCE_FLAG);
return valid;
}
//! See https://docs.microsoft.com/en-us/windows/win32/seccrypto/example-c-program--verifying-the-signature-of-a-pe-file
bool slVerifyEmbeddedSignature(const FString& InPathToBinary)
{
FString PathToBinary = InPathToBinary;
FPaths::ConvertRelativePathToFull(PathToBinary);
FPaths::MakePlatformFilename(PathToBinary);
FTCHARToWChar WPathToBinary = StringCast<WIDECHAR>(*PathToBinary);
const wchar_t* pathToFile = WPathToBinary.Get();
bool valid = true;
LONG lStatus = {};
// Initialize the WINTRUST_FILE_INFO structure.
WINTRUST_FILE_INFO FileData;
memset(&FileData, 0, sizeof(FileData));
FileData.cbStruct = sizeof(WINTRUST_FILE_INFO);
FileData.pcwszFilePath = pathToFile;
FileData.hFile = NULL;
FileData.pgKnownSubject = NULL;
if (!pfnWinVerifyTrust)
{
// We only support Win10+ so we can search for module in system32 directly
auto hModWintrust = LoadLibraryExW(L"wintrust.dll", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32);
if (!hModWintrust || !GetProc(hModWintrust, "WinVerifyTrust", pfnWinVerifyTrust))
{
//printf("Unable to obtain wintrust.dll functionality - cannot validate digital signature on SL plugins.");
UE_LOG(LogStreamlineRHI, Log, TEXT("Unable to obtain wintrust.dll functionality - cannot validate digital signature on SL plugins."));
return false;
}
}
/*
WVTPolicyGUID specifies the policy to apply on the file
WINTRUST_ACTION_GENERIC_VERIFY_V2 policy checks:
1) The certificate used to sign the file chains up to a root
certificate located in the trusted root certificate store. This
implies that the identity of the publisher has been verified by
a certification authority.
2) In cases where user interface is displayed (which this example
does not do), WinVerifyTrust will check for whether the
end entity certificate is stored in the trusted publisher store,
implying that the user trusts content from this publisher.
3) The end entity certificate has sufficient permission to sign
code, as indicated by the presence of a code signing EKU or no
EKU.
*/
GUID WVTPolicyGUID = WINTRUST_ACTION_GENERIC_VERIFY_V2;
WINTRUST_DATA WinTrustData;
// Initialize the WinVerifyTrust input data structure.
// Default all fields to 0.
memset(&WinTrustData, 0, sizeof(WinTrustData));
WinTrustData.cbStruct = sizeof(WinTrustData);
// Use default code signing EKU.
WinTrustData.pPolicyCallbackData = NULL;
// No data to pass to SIP.
WinTrustData.pSIPClientData = NULL;
// Disable WVT UI.
WinTrustData.dwUIChoice = WTD_UI_NONE;
// No revocation checking.
WinTrustData.fdwRevocationChecks = WTD_REVOKE_NONE;
// Verify an embedded signature on a file.
WinTrustData.dwUnionChoice = WTD_CHOICE_FILE;
// Verify action.
WinTrustData.dwStateAction = WTD_STATEACTION_VERIFY;
// Verification sets this value.
WinTrustData.hWVTStateData = NULL;
// Not used.
WinTrustData.pwszURLReference = NULL;
// This is not applicable if there is no UI because it changes
// the UI to accommodate running applications instead of
// installing applications.
WinTrustData.dwUIContext = 0;
// Set pFile.
WinTrustData.pFile = &FileData;
// First verify the primary signature (index 0) to determine how many secondary signatures
// are present. We use WSS_VERIFY_SPECIFIC and dwIndex to do this, also setting
// WSS_GET_SECONDARY_SIG_COUNT to have the number of secondary signatures returned.
WINTRUST_SIGNATURE_SETTINGS SignatureSettings = {};
CERT_STRONG_SIGN_PARA StrongSigPolicy = {};
SignatureSettings.cbStruct = sizeof(WINTRUST_SIGNATURE_SETTINGS);
SignatureSettings.dwFlags = WSS_GET_SECONDARY_SIG_COUNT | WSS_VERIFY_SPECIFIC;
SignatureSettings.dwIndex = 0;
WinTrustData.pSignatureSettings = &SignatureSettings;
StrongSigPolicy.cbSize = sizeof(CERT_STRONG_SIGN_PARA);
StrongSigPolicy.dwInfoChoice = CERT_STRONG_SIGN_OID_INFO_CHOICE;
static char OID[] = szOID_CERT_STRONG_SIGN_OS_CURRENT;
StrongSigPolicy.pszOID = OID;
WinTrustData.pSignatureSettings->pCryptoPolicy = &StrongSigPolicy;
// WinVerifyTrust verifies signatures as specified by the GUID and Wintrust_Data.
lStatus = pfnWinVerifyTrust(NULL, &WVTPolicyGUID, &WinTrustData);
// First signature must be validated by the OS
valid = lStatus == ERROR_SUCCESS;
if (!valid)
{
//printf("File '%S' is NOT correctly signed - Streamline will not load unsecured modules", pathToFile);
UE_LOG(LogStreamlineRHI, Log, TEXT("File '%s' is NOT correctly signed - Streamline will not load unsecured modules for UE_BUILD_SHIPPING configurations."), WCHAR_TO_TCHAR(pathToFile));
}
else
{
// Now there has to be a secondary one
valid &= WinTrustData.pSignatureSettings->cSecondarySigs == 1;
if (!valid)
{
UE_LOG(LogStreamlineRHI, Log, TEXT("File '%s' does not have the secondary NVIDIA signature - Streamline will not load unsecured modules for UE_BUILD_SHIPPING configurations."), WCHAR_TO_TCHAR(pathToFile));
}
else
{
// The secondary signature must be from NVIDIA
valid &= isSignedByNVIDIA(pathToFile);
if (valid)
{
UE_LOG(LogStreamlineRHI, Log, TEXT("File '%s' is signed by NVIDIA and the signature was verified."), WCHAR_TO_TCHAR(pathToFile));
}
else
{
UE_LOG(LogStreamlineRHI, Log, TEXT("File '%s' is NOT correctly signed - Streamline will not load unsecured modules for UE_BUILD_SHIPPING configurations."), WCHAR_TO_TCHAR(pathToFile));
}
}
}
// Any hWVTStateData must be released by a call with close.
WinTrustData.dwStateAction = WTD_STATEACTION_CLOSE;
lStatus = pfnWinVerifyTrust(NULL, &WVTPolicyGUID, &WinTrustData);
return valid;
}
#include "Windows/HideWindowsPlatformTypes.h"

View File

@ -0,0 +1,191 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#pragma once
#include "CoreMinimal.h"
#include "sl.h"
#include "sl_helpers.h"
#include "sl_dlss_g.h"
#include "sl_latewarp.h"
#include "sl_deepdvc.h"
// Those are the actual Streamline API calls
extern STREAMLINERHI_API sl::Result SLinit(const sl::Preferences& pref, uint64_t sdkVersion = sl::kSDKVersion);
extern STREAMLINERHI_API sl::Result SLshutdown();
extern STREAMLINERHI_API sl::Result SLisFeatureSupported(sl::Feature feature, const sl::AdapterInfo& adapterInfo);
extern STREAMLINERHI_API sl::Result SLisFeatureLoaded(sl::Feature feature, bool& loaded);
extern STREAMLINERHI_API sl::Result SLsetFeatureLoaded(sl::Feature feature, bool loaded);
extern STREAMLINERHI_API sl::Result SLevaluateFeature(sl::Feature feature, const sl::FrameToken& frame, const sl::BaseStructure** inputs, uint32_t numInputs, sl::CommandBuffer* cmdBuffer);
extern STREAMLINERHI_API sl::Result SLAllocateResources(sl::CommandBuffer* cmdBuffer, sl::Feature feature, const sl::ViewportHandle& viewport);
extern STREAMLINERHI_API sl::Result SLFreeResources(sl::Feature feature, const sl::ViewportHandle& viewport);
extern STREAMLINERHI_API sl::Result SLsetTag(const sl::ViewportHandle& viewport, const sl::ResourceTag* tags, uint32_t numTags, sl::CommandBuffer* cmdBuffer);
extern STREAMLINERHI_API sl::Result SLgetFeatureRequirements(sl::Feature feature, sl::FeatureRequirements& requirements);
extern STREAMLINERHI_API sl::Result SLgetFeatureVersion(sl::Feature feature, sl::FeatureVersion& version);
extern STREAMLINERHI_API sl::Result SLUpgradeInterface(void** baseInterface);
extern STREAMLINERHI_API sl::Result SLsetConstants(const sl::Constants& values, const sl::FrameToken& frame, const sl::ViewportHandle& viewport);
extern STREAMLINERHI_API sl::Result SLgetNativeInterface(void* proxyInterface, void** baseInterface);
extern STREAMLINERHI_API sl::Result SLgetFeatureFunction(sl::Feature feature, const char* functionName, void*& function);
extern STREAMLINERHI_API sl::Result SLgetNewFrameToken(sl::FrameToken*& token, uint32_t* frameIndex = nullptr);
extern STREAMLINERHI_API sl::Result SLsetD3DDevice(void* d3dDevice);
extern STREAMLINERHI_API void LogStreamlineFunctionCall(sl::Feature Feature, const FString& Function, const FString& Arguments);
extern STREAMLINERHI_API bool LogStreamlineFunctions();
// workaround for bad operator& definition that returns bool in sl_consts.h macro, until Streamline header is fixed
// if you get a compile error for SLBitwiseAnd specialization, it's likely that Streamline has been fixed, so you can use & and remove this function template
using SLBitwiseAndOperatorReturnType = decltype(sl::operator&(sl::FeatureRequirementFlags::eD3D11Supported, sl::FeatureRequirementFlags::eD3D11Supported));
template<typename E, std::enable_if_t<!std::is_same_v<SLBitwiseAndOperatorReturnType, sl::FeatureRequirementFlags>, bool> = true>
constexpr E SLBitwiseAnd(E x1, E x2)
{
return static_cast<E>(static_cast<std::underlying_type<E>::type>(x1) & static_cast<std::underlying_type<E>::type>(x2));
}
struct StringifySLArgument
{
TArray<FString> ArgStrings;
FString GetJoinedArgString() const
{
return FString::Join(ArgStrings, TEXT(", "));
}
template <typename Whatever>
void operator()(const Whatever& In)
{
ArgStrings.Add(FString::Printf(TEXT("arg%d"), ArgStrings.Num()));
}
void operator()(const sl::PCLMarker& In)
{
ArgStrings.Add(FString(ANSI_TO_TCHAR(sl::getPCLMarkerAsStr(In))));
}
void operator()(const sl::FrameToken& In)
{
ArgStrings.Add(FString::Printf(TEXT("frame=%u"), In.operator unsigned int()));
}
void operator()(const sl::ViewportHandle& In)
{
ArgStrings.Add(FString::Printf(TEXT("viewport=%u"), In.operator unsigned int()));
}
// sl_helpers.h, where are thou?
inline const char* getDLSSGModeAsStr(sl::DLSSGMode mode)
{
static_assert (uint32(sl::DLSSGMode::eCount) == 3U,"sl::DLSSGMode got a new enum value and needs to be addressed here");
switch (mode)
{
SL_CASE_STR(sl::DLSSGMode::eOff);
SL_CASE_STR(sl::DLSSGMode::eOn);
SL_CASE_STR(sl::DLSSGMode::eAuto);
SL_CASE_STR(sl::DLSSGMode::eCount);
};
return "Unknown";
}
inline const char* getDLSSGFlagsSingleBitFlagsAsStr(sl::DLSSGFlags f)
{
using namespace sl;
switch (f)
{
SL_CASE_STR(DLSSGFlags::eShowOnlyInterpolatedFrame);
SL_CASE_STR(DLSSGFlags::eDynamicResolutionEnabled);
SL_CASE_STR(DLSSGFlags::eRequestVRAMEstimate);
SL_CASE_STR(DLSSGFlags::eRetainResourcesWhenOff);
SL_CASE_STR(DLSSGFlags::eEnableFullscreenMenuDetection);
}
return "Unknown";
}
inline FString getDLSSGFlagsAsStr(sl::DLSSGFlags f)
{
FString Result;
for (uint32 SingleBit = uint32(sl::DLSSGFlags::eEnableFullscreenMenuDetection); SingleBit != 0; SingleBit >>= 1)
{
sl::DLSSGFlags Flag = sl::DLSSGFlags(SingleBit);
if (Flag == SLBitwiseAnd(f, Flag))
{
Result.Append(getDLSSGFlagsSingleBitFlagsAsStr(Flag));
Result.AppendChar(TCHAR('|'));
}
}
Result.RemoveFromEnd(TEXT("|"));
return Result;
}
void operator()(const sl::DLSSGOptions& In)
{
ArgStrings.Add(FString::Printf(TEXT("%s numFramesToGenerate=%u"), ANSI_TO_TCHAR(getDLSSGModeAsStr(In.mode)), In.numFramesToGenerate));
}
void operator()(const sl::DeepDVCOptions& In)
{
ArgStrings.Add(FString::Printf(TEXT("%s intensity=%.3f saturationBoost=%.3f"), ANSI_TO_TCHAR(getDeepDVCModeAsStr(In.mode)), In.intensity, In.saturationBoost));
}
void operator()(const sl::LatewarpOptions& In)
{
ArgStrings.Add(FString::Printf(TEXT("latewarpActive=%u"), In.latewarpActive));
}
void operator()(const sl::ReflexOptions& In)
{
ArgStrings.Add(FString::Printf(TEXT("mode=%s"), ANSI_TO_TCHAR(getReflexModeAsStr(In.mode))));
}
void operator()(const sl::ReflexCameraData& In)
{
ArgStrings.Add(FString::Printf(TEXT("worldToViewMatrix=... viewToClipMatrix=... prevRenderedWorldToViewMatrix=..., prevRenderedViewToClipMatrix=...")));
}
};
// Convenience function template and macro for using SLgetFeatureFunction
//
// Example use:
// sl::Result Result = CALL_SL_FEATURE_FN(sl::kFeatureReflex, slReflexSleep, *FrameToken);
template<typename F, typename... Ts> static sl::Result CallSLFeatureFn(sl::Feature Feature, const char* FunctionName, Ts&&... args)
{
static F PtrFn = nullptr;
if (PtrFn == nullptr)
{
sl::Result Result = SLgetFeatureFunction(Feature, FunctionName, reinterpret_cast<void*&>(PtrFn));
checkf(Result == sl::Result::eOk, TEXT("%s: unable to map function %s (%s)"), ANSI_TO_TCHAR(__FUNCTION__), ANSI_TO_TCHAR(FunctionName), ANSI_TO_TCHAR(sl::getResultAsStr(Result)));
}
FString FunctionArgs;
const TTuple<Ts...> Quarrel(args...);
StringifySLArgument Stringifier;
VisitTupleElements(Stringifier, Quarrel);
LogStreamlineFunctionCall(Feature,FString::Printf(TEXT("%s"), ANSI_TO_TCHAR(FunctionName)), Stringifier.GetJoinedArgString());
return PtrFn(std::forward<Ts>(args)...);
}
#define CALL_SL_FEATURE_FN(Feature, FunctionName, ...) CallSLFeatureFn<decltype(&FunctionName)>(Feature, #FunctionName, __VA_ARGS__)

View File

@ -0,0 +1,79 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#pragma once
#include "StreamlineRHI.h"
#include "sl.h"
#include "sl_helpers.h"
inline sl::float4x4 ToSL(const FRHIStreamlineArguments::FMatrix44f& InMatrix, bool bIsOrthographicProjection = false)
{
// This assertion can still fire when switching between ortho/perspective, which we don't support
// Some interesting PrevClipToClip have been seen for example
//check(bIsOrthographicProjection || !FMath::IsNearlyZero(InMatrix.Determinant()));
sl::float4x4 Result;
for (int i = 0; i < 4; i++)
{
Result.setRow(i, { InMatrix.M[i][0], InMatrix.M[i][1], InMatrix.M[i][2], InMatrix.M[i][3] });
}
return Result;
};
inline sl::float4 ToSL(const FRHIStreamlineArguments::FVector4f& InVector)
{
return { InVector.X, InVector.Y, InVector.Z, InVector.W };
};
inline sl::float3 ToSL(const FRHIStreamlineArguments::FVector3f& InVector)
{
return { InVector.X, InVector.Y, InVector.Z };
};
inline sl::float2 ToSL(const FRHIStreamlineArguments::FVector2f& InVector)
{
return { InVector.X, InVector.Y };
};
inline sl::Boolean ToSL(bool b)
{
return b ? sl::eTrue : sl::eFalse;
};
inline sl::Extent ToSL(const FIntRect& InRect)
{
check(InRect.Min.X >= 0);
check(InRect.Min.Y >= 0);
check(InRect.Width() >= 0);
check(InRect.Height() >= 0);
// sl::Extent has ^top^ first in the struct definion and ^left^ second, i.e. YX for the offset, so we need to swap Y and X :|
return { uint32_t(InRect.Min.Y /* Y first intentionally*/), uint32_t(InRect.Min.X /* X second intentionally*/),
uint32_t(InRect.Width()), uint32_t(InRect.Height())};
};
inline sl::BufferType ToSL(EStreamlineResource InResourceTag)
{
switch (InResourceTag)
{
default:
checkf(false, TEXT("unexpected EStreamlineResource enum value %u. This is a UE Streamline plugin developer bug"), InResourceTag);
case EStreamlineResource::Depth: return sl::kBufferTypeDepth;
case EStreamlineResource::MotionVectors: return sl::kBufferTypeMotionVectors;
case EStreamlineResource::NoWarpMask: return sl::kBufferTypeNoWarpMask;
case EStreamlineResource::HUDLessColor: return sl::kBufferTypeHUDLessColor;
case EStreamlineResource::UIColorAndAlpha: return sl::kBufferTypeUIColorAndAlpha;
case EStreamlineResource::Backbuffer: return sl::kBufferTypeBackbuffer;
case EStreamlineResource::ScalingOutputColor: return sl::kBufferTypeScalingOutputColor;
}
}

View File

@ -0,0 +1,326 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#pragma once
#include "Modules/ModuleManager.h"
#include "CoreMinimal.h"
#include "RendererInterface.h"
#include "RenderGraphResources.h"
#include "Runtime/Launch/Resources/Version.h"
namespace sl
{
struct AdapterInfo;
struct FrameToken;
struct APIError;
struct FeatureRequirements;
using Feature = uint32_t;
enum class FeatureRequirementFlags : uint32_t;
}
class FSLFrameTokenProvider;
enum class EStreamlineSupport : uint8
{
Supported,
NotSupported,
NotSupportedIncompatibleRHI,
NumValues
};
enum class EStreamlineResource
{
Depth,
MotionVectors,
NoWarpMask,
HUDLessColor,
UIColorAndAlpha,
Backbuffer,
ScalingOutputColor,
// we use this to size some arrays statically somewhere, but we also don't wanto have a real new enum value so we don't have to handle switch statements ...
Last = ScalingOutputColor
};
struct FRHIStreamlineResource
{
FRHITexture* Texture = nullptr;
FIntRect ViewRect = FIntRect(FIntPoint::ZeroValue, FIntPoint::ZeroValue);
EStreamlineResource StreamlineTag;
static FRHIStreamlineResource NullResource(EStreamlineResource InTag)
{
return { nullptr, FIntRect(FIntPoint::ZeroValue, FIntPoint::ZeroValue), InTag };
}
static FRHIStreamlineResource FromRDGTexture(FRDGTexture* InRDGResource, EStreamlineResource InTag)
{
return { InRDGResource ? InRDGResource->GetRHI() : nullptr,
FIntRect(FIntPoint::ZeroValue,InRDGResource ? InRDGResource->Desc.Extent :FIntPoint::ZeroValue),
InTag};
}
static FRHIStreamlineResource FromRDGTexture(FRDGTexture* InRDGResource, FIntRect InRect, EStreamlineResource InTag)
{
return { InRDGResource ? InRDGResource->GetRHI() : nullptr, InRect, InTag };
}
};
// TODO STREAMLINE rename variables
struct STREAMLINERHI_API FRHIStreamlineArguments
{
#if ENGINE_MAJOR_VERSION < 5
// New type names with dims and precision, required in SHADER_PARAMETER declarations and useful everywhere
using FMatrix44f = FMatrix;
using FVector2f = FVector2D;
using FVector3f = FVector;
using FVector4f = FVector4;
#else
using FMatrix44f = ::FMatrix44f;
using FVector3f = ::FVector3f;
using FVector2f = ::FVector2f;
using FVector4f = ::FVector4f;
#endif
// View ID across all active views
uint32 ViewId;
uint32 FrameId;
//! Specifies if previous frame has no connection to the current one (motion vectors are invalid)
bool bReset;
//! Specifies if depth values are inverted (value closer to the camera is higher) or not.
bool bIsDepthInverted;
//! Specifies clip space jitter offset
FVector2f JitterOffset;
//! Specifies scale factors used to normalize motion vectors (so the values are in [-1,1] range)
FVector2f MotionVectorScale;
//! Specifies if motion vectors are already dilated or not.
bool bAreMotionVectorsDilated;
//! Specifies if orthographic projection is used or not.
bool bIsOrthographicProjection;
//! Specifies matrix transformation from the camera view to the clip space.
FMatrix44f CameraViewToClip;
//! Specifies matrix transformation from the clip space to the camera view space.
FMatrix44f ClipToCameraView;
//! Specifies matrix transformation describing lens distortion in clip space.
FMatrix44f ClipToLenseClip;
//! Specifies matrix transformation from the current clip to the previous clip space.
FMatrix44f ClipToPrevClip;
//! Specifies matrix transformation from the previous clip to the current clip space.
FMatrix44f PrevClipToClip;
//! Specifies camera position in world space.
FVector3f CameraOrigin;
//! Specifies camera up vector in world space.
FVector3f CameraUp;
//! Specifies camera right vector in world space.
FVector3f CameraRight;
//! Specifies camera forward vector in world space.
FVector3f CameraForward;
//! Specifies camera near view plane distance.
float CameraNear;
//! Specifies camera far view plane distance.
float CameraFar;
//! Specifies camera field of view in radians.
float CameraFOV;
//! Specifies camera aspect ratio defined as view space width divided by height.
float CameraAspectRatio;
//! Specifies camera pinhole offset.
FVector2f CameraPinholeOffset;
};
struct FStreamlineRHICreateArguments
{
FString PluginBaseDir;
FDynamicRHI* DynamicRHI = nullptr;
};
class FSLFrameTokenProvider
{
public:
FSLFrameTokenProvider();
sl::FrameToken* GetTokenForFrame(uint64 FrameCounter);
private:
FCriticalSection Section;
sl::FrameToken* FrameToken;
uint32_t LastFrameCounter;
};
class FStreamlineRHIModule;
class STREAMLINERHI_API FStreamlineRHI
{
friend class FStreamlineRHIModule;
public:
virtual ~FStreamlineRHI();
virtual void SetStreamlineData(FRHICommandList& CmdList, const FRHIStreamlineArguments& InArguments);
void StreamlineEvaluateDeepDVC(FRHICommandList& CmdList, const FRHIStreamlineResource& InputOutput, sl::FrameToken* FrameToken, uint32 ViewID);
void TagTextures(FRHICommandList& CmdList, uint32 InViewID, std::initializer_list< FRHIStreamlineResource> InResources)
{
TagTextures(CmdList, InViewID, MakeArrayView(InResources));
}
void TagTexture(FRHICommandList& CmdList, uint32 InViewID, const FRHIStreamlineResource& InResource)
{
TagTextures(CmdList, InViewID, MakeArrayView<const FRHIStreamlineResource>(&InResource, 1));
}
// Implemented by API specific subclasses
//
public:
virtual void TagTextures(FRHICommandList& CmdList, uint32 InViewID, const TArrayView<const FRHIStreamlineResource> InResources) = 0;
virtual const sl::AdapterInfo* GetAdapterInfo() = 0;
virtual void APIErrorHandler(const sl::APIError& LastError) = 0;
protected:
virtual void* GetCommandBuffer(FRHICommandList& CmdList, FRHITexture* Texture) = 0;
virtual void PostStreamlineFeatureEvaluation(FRHICommandList& CmdList, FRHITexture* Texture) = 0;
TTuple<bool, FString> IsSwapChainProviderRequired(const sl::AdapterInfo& AdapterInfo) const;
public:
virtual bool IsDLSSGSupportedByRHI() const
{
return false;
}
virtual bool IsDeepDVCSupportedByRHI() const
{
return false;
}
virtual bool IsLatewarpSupportedByRHI() const
{
return false;
}
virtual bool IsReflexSupportedByRHI() const
{
return false;
}
bool IsStreamlineAvailable() const;
static bool IsIncompatibleAPICaptureToolActive()
{
return bIsIncompatibleAPICaptureToolActive;
}
sl::FrameToken* GetFrameToken(uint64 FrameCounter);
bool IsSwapchainHookingAllowed() const;
bool IsSwapchainProviderInstalled() const;
void ReleaseStreamlineResourcesForAllFeatures(uint32 ViewID);
// that needs to call some virtual methods that we can't call in the ctor. Just C++ things
void PostPlatformRHICreateInit();
void OnSwapchainDestroyed(void* InNativeSwapchain) const;
void OnSwapchainCreated(void* InNativeSwapchain) const;
protected:
FStreamlineRHI(const FStreamlineRHICreateArguments& Arguments);
#if WITH_EDITOR
void OnBeginPIE(const bool bIsSimulating);
void OnEndPIE(const bool bIsSimulating);
bool bIsPIEActive = false;
FDelegateHandle BeginPIEHandle;
FDelegateHandle EndPIEHandle;
#endif
mutable int32 NumActiveSwapchainProxies = 0;
virtual bool IsStreamlineSwapchainProxy(void* NativeSwapchain) const = 0;
int32 GetMaxNumSwapchainProxies() const;
void ValidateNumSwapchainProxies(const char* CallSite) const;
#if PLATFORM_WINDOWS
// whether an HRESULT is a DXGI_STATUS_*
bool IsDXGIStatus(const HRESULT HR);
#endif
FDynamicRHI* DynamicRHI = nullptr;
TUniquePtr<FSLFrameTokenProvider> FrameTokenProvider = nullptr;
static bool bIsIncompatibleAPICaptureToolActive;
bool bIsSwapchainProviderInstalled = false;
static TArray<sl::Feature> FeaturesRequestedAtSLInitTime;
TArray<sl::Feature> LoadedFeatures;
TArray<sl::Feature> SupportedFeatures;
};
class IStreamlineRHIModule : public IModuleInterface
{
public:
virtual TUniquePtr<FStreamlineRHI> CreateStreamlineRHI(const FStreamlineRHICreateArguments& Arguments) = 0;
};
class FStreamlineRHIModule final : public IModuleInterface
{
public:
STREAMLINERHI_API void InitializeStreamline();
STREAMLINERHI_API void ShutdownStreamline();
/** IModuleInterface implementation */
virtual void StartupModule();
virtual void ShutdownModule();
private:
FString StreamlineBinaryDirectory;
};
STREAMLINERHI_API void PlatformCreateStreamlineRHI();
STREAMLINERHI_API FStreamlineRHI* GetPlatformStreamlineRHI();
STREAMLINERHI_API EStreamlineSupport GetPlatformStreamlineSupport();
STREAMLINERHI_API bool IsStreamlineSupported();
STREAMLINERHI_API bool AreStreamlineFunctionsLoaded();
STREAMLINERHI_API sl::FeatureRequirementFlags PlatformGetAllImplementedStreamlineRHIs();
namespace sl
{
};
STREAMLINERHI_API bool StreamlineFilterRedundantSetOptionsCalls();
STREAMLINERHI_API void LogStreamlineFeatureSupport(sl::Feature Feature, const sl::AdapterInfo& Adapter);
STREAMLINERHI_API void LogStreamlineFeatureRequirements(sl::Feature Feature, const sl::FeatureRequirements& Requirements);
STREAMLINERHI_API FString CurrentThreadName();

View File

@ -0,0 +1,89 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#pragma once
#include "Engine/DeveloperSettings.h"
#include "UObject/Object.h"
#include "UObject/ObjectMacros.h"
#include "StreamlineSettings.generated.h"
UENUM()
enum class EStreamlineSettingOverride : uint8
{
Enabled UMETA(DisplayName = "True"),
Disabled UMETA(DisplayName = "False"),
UseProjectSettings UMETA(DisplayName = "Use project settings"),
};
UCLASS(Config = Engine, ProjectUserConfig)
class STREAMLINERHI_API UStreamlineOverrideSettings : public UObject
{
GENERATED_BODY()
public:
/**
* Enable DLSS Frame Generation in New Editor Window Play In Editor mode.
* Saved to local user config only.
* Note: DLSS Frame Generation is not supported in editor viewports
*/
UPROPERTY(Config, EditAnywhere, Category = "Level Editor - Viewport (Local)", DisplayName = "Enable DLSS-FG in New Editor Window (PIE) mode")
EStreamlineSettingOverride EnableDLSSFGInPlayInEditorViewportsOverride = EStreamlineSettingOverride::UseProjectSettings;
/**
* Load the Streamline debug overlay in non-Shipping configurations. Note that the overlay requires DLSS Frame Generation to be available.
* Modifying this setting requires an editor restart to take effect. Saved to local user config only
*/
UPROPERTY(Config, EditAnywhere, Category = "General Settings", DisplayName = "Load Debug Overlay")
EStreamlineSettingOverride LoadDebugOverlayOverride = EStreamlineSettingOverride::UseProjectSettings;
};
UCLASS(Config = Engine, DefaultConfig)
class STREAMLINERHI_API UStreamlineSettings: public UObject
{
GENERATED_BODY()
public:
/** Enable plugin features for D3D12, if the driver supports it at runtime */
UPROPERTY(Config, EditAnywhere, Category = "Platforms", DisplayName = "Enable plugin features for the D3D12RHI")
bool bEnableStreamlineD3D12 = PLATFORM_WINDOWS;
/** Enable plugin features for D3D11, if the driver supports it at runtime */
UPROPERTY(Config, EditAnywhere, Category = "Platforms", DisplayName = "Enable plugin features for the D3D11RHI (Reflex only)")
bool bEnableStreamlineD3D11 = PLATFORM_WINDOWS;
/**
* Enable DLSS Frame Generation in New Editor Window Play In Editor mode.
* This project wide setting can be locally overridden in the NVIDIA DLSS Frame Generation (Local) settings.
* Note: DLSS Frame Generation is not supported in editor viewports
*/
UPROPERTY(Config, EditAnywhere, Category = "Level Editor - Viewport", DisplayName = "Enable DLSS-FG in New Editor Window (PIE) mode")
bool bEnableDLSSFGInPlayInEditorViewports = true;
/**
* Load the Streamline debug overlay in non-Shipping configurations. Note that the overlay requires DLSS Frame Generation to be available.
* This project wide setting can be locally overridden in the NVIDIA DLSS Frame Generation (Local) settings.
* Modifying this setting requires an editor restart to take effect
*/
UPROPERTY(Config, EditAnywhere, Category = "General Settings", DisplayName = "Load Debug Overlay")
bool bLoadDebugOverlay = false;
/** Allow OTA updates of Streamline features */
UPROPERTY(Config, EditAnywhere, Category = "General Settings", DisplayName = "Allow OTA update")
bool bAllowOTAUpdate = true;
/** By default the DLSS Frame Generation plugin uses the UE Project ID to initialize Streamline. In some cases NVIDIA might provide a separate NVIDIA Application ID, which should be put here. */
UPROPERTY(Config, EditAnywhere, Category = "General Settings", DisplayName = "NVIDIA NGX Application ID", AdvancedDisplay)
int32 NVIDIANGXApplicationId = 0;
};

View File

@ -0,0 +1,66 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
using UnrealBuildTool;
using System.IO;
public class StreamlineRHI : ModuleRules
{
public StreamlineRHI(ReadOnlyTargetRules Target) : base(Target)
{
PCHUsage = ModuleRules.PCHUsageMode.UseExplicitOrSharedPCHs;
PublicIncludePaths.AddRange(
new string[] {
}
);
PrivateIncludePaths.AddRange(
new string[] {
}
);
PublicDependencyModuleNames.AddRange(
new string[]
{
}
);
PrivateDependencyModuleNames.AddRange(
new string[]
{
"Core",
"CoreUObject",
"Engine",
"Projects",
"RenderCore",
"RHI",
"Streamline",
}
);
DynamicallyLoadedModuleNames.AddRange(
new string[]
{
"StreamlineD3D11RHI",
"StreamlineD3D12RHI",
}
);
if (Target.bBuildEditor == true)
{
PrivateDependencyModuleNames.Add("UnrealEd");
}
}
}

View File

@ -0,0 +1,39 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#include "StreamlineShaders.h"
#include "Modules/ModuleManager.h"
#include "Interfaces/IPluginManager.h"
#include "ShaderCore.h"
#define LOCTEXT_NAMESPACE "FStreamlineShadersModule"
DEFINE_LOG_CATEGORY_STATIC(LogStreamlineShaders, Log, All);
void FStreamlineShadersModule::StartupModule()
{
// write the plugin version to the log
// we use the StreamlineShaders module to write this information because it is the first plugin module loaded on supported platforms
TSharedPtr<IPlugin> ThisPlugin = IPluginManager::Get().FindPlugin(TEXT("StreamlineCore"));
UE_LOG(LogStreamlineShaders, Log, TEXT("Loaded Streamline plugin version %s"), *ThisPlugin->GetDescriptor().VersionName);
FString PluginShaderDir = FPaths::Combine(ThisPlugin->GetBaseDir(), TEXT("Shaders"));
AddShaderSourceDirectoryMapping(TEXT("/Plugin/StreamlineCore"), PluginShaderDir);
}
void FStreamlineShadersModule::ShutdownModule()
{
}
#undef LOCTEXT_NAMESPACE
IMPLEMENT_MODULE(FStreamlineShadersModule, StreamlineShaders)

View File

@ -0,0 +1,141 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#include "UIHintExtractionPass.h"
#include "Runtime/Launch/Resources/Version.h"
#if (ENGINE_MAJOR_VERSION == 5) && (ENGINE_MINOR_VERSION >= 2)
#include "DataDrivenShaderPlatformInfo.h"
#endif
static const int32 kUIHintExtractionComputeTileSizeX = FComputeShaderUtils::kGolden2DGroupSize;
static const int32 kUIHintExtractionComputeTileSizeY = FComputeShaderUtils::kGolden2DGroupSize;
class FStreamlineUIHintExtractionCS : public FGlobalShader
{
public:
static bool ShouldCompilePermutation(const FGlobalShaderPermutationParameters& Parameters)
{
// Only cook for the platforms/RHIs where DLSS-FG is supported, which is DX11,DX12 [on Win64]
return IsFeatureLevelSupported(Parameters.Platform, ERHIFeatureLevel::SM5) &&
IsPCPlatform(Parameters.Platform) && (
#if (ENGINE_MAJOR_VERSION == 4) && (ENGINE_MINOR_VERSION < 27)
IsD3DPlatform(Parameters.Platform, false));
#else
IsD3DPlatform(Parameters.Platform));
#endif
}
static void ModifyCompilationEnvironment(const FGlobalShaderPermutationParameters& Parameters, FShaderCompilerEnvironment& OutEnvironment)
{
FGlobalShader::ModifyCompilationEnvironment(Parameters, OutEnvironment);
OutEnvironment.SetDefine(TEXT("THREADGROUP_SIZEX"), kUIHintExtractionComputeTileSizeX);
OutEnvironment.SetDefine(TEXT("THREADGROUP_SIZEY"), kUIHintExtractionComputeTileSizeY);
}
DECLARE_GLOBAL_SHADER(FStreamlineUIHintExtractionCS);
SHADER_USE_PARAMETER_STRUCT(FStreamlineUIHintExtractionCS, FGlobalShader);
BEGIN_SHADER_PARAMETER_STRUCT(FParameters, )
// Input images
SHADER_PARAMETER(float, AlphaThreshold)
SHADER_PARAMETER_TEXTURE(Texture2D, BackBuffer)
// SHADER_PARAMETER_SAMPLER(SamplerState, VelocityTextureSampler)
// SHADER_PARAMETER_STRUCT(FScreenPassTextureViewportParameters, Velocity)
// SHADER_PARAMETER_RDG_TEXTURE(Texture2D, DepthTexture)
// SHADER_PARAMETER_SAMPLER(SamplerState, DepthTextureSampler)
// SHADER_PARAMETER_STRUCT_REF(FViewUniformShaderParameters, View)
// Output images
SHADER_PARAMETER_RDG_TEXTURE_UAV(RWTexture2D, OutUIHintTexture)
// SHADER_PARAMETER_STRUCT(FScreenPassTextureViewportParameters, CombinedVelocity)
END_SHADER_PARAMETER_STRUCT()
};
IMPLEMENT_GLOBAL_SHADER(FStreamlineUIHintExtractionCS, "/Plugin/StreamlineCore/Private/UIHintExtraction.usf", "UIHintExtractionMain", SF_Compute);
FRDGTextureRef AddStreamlineUIHintExtractionPass(
FRDGBuilder& GraphBuilder,
// const FViewInfo& View,
const float InAlphaThreshold,
const FTextureRHIRef& InBackBuffer
// FRDGTextureRef InVelocityTexture
)
{
FIntPoint BackBufferDimension = { int32(InBackBuffer->GetTexture2D()->GetSizeX()), int32(InBackBuffer->GetTexture2D()->GetSizeY()) };
const FIntRect InputViewRect = { FIntPoint::ZeroValue,BackBufferDimension };
const FIntRect OutputViewRect = { FIntPoint::ZeroValue,BackBufferDimension };
FRDGTextureDesc UIHintTextureDesc =
FRDGTextureDesc::Create2D(
OutputViewRect.Size(),
PF_B8G8R8A8,
FClearValueBinding::Black,
TexCreate_ShaderResource | TexCreate_UAV);
const TCHAR* OutputName = TEXT("Streamline.UIColorAndAlpha");
FRDGTextureRef UIHintTexture = GraphBuilder.CreateTexture(
UIHintTextureDesc,
OutputName);
FStreamlineUIHintExtractionCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FStreamlineUIHintExtractionCS::FParameters>();
PassParameters->AlphaThreshold = FMath::Clamp(InAlphaThreshold, 0.0f, 1.0f);
// input velocity
{
PassParameters->BackBuffer = InBackBuffer;
//PassParameters->VelocityTextureSampler = TStaticSamplerState<SF_Point>::GetRHI();
// we use InSceneDepthTexture here and not InVelocityTexture since the latter can be a 1x1 black texture
//check(InVelocityTexture->Desc.Extent == FIntPoint(1, 1) || InVelocityTexture->Desc.Extent == InSceneDepthTexture->Desc.Extent);
//FScreenPassTextureViewport velocityViewport(InSceneDepthTexture, InputViewRect);
//FScreenPassTextureViewportParameters velocityViewportParameters = GetScreenPassTextureViewportParameters(velocityViewport);
//PassParameters->Velocity = velocityViewportParameters;
}
{
PassParameters->OutUIHintTexture = GraphBuilder.CreateUAV(UIHintTexture);
//FScreenPassTextureViewport CombinedVelocityViewport(CombinedVelocityTexture, OutputViewRect);
//FScreenPassTextureViewportParameters CombinedVelocityViewportParameters = GetScreenPassTextureViewportParameters(CombinedVelocityViewport);
// PassParameters->CombinedVelocity = CombinedVelocityViewportParameters;
}
// various state
{
// PassParameters->View = View.ViewUniformBuffer;
}
FStreamlineUIHintExtractionCS::FPermutationDomain PermutationVector;
TShaderMapRef<FStreamlineUIHintExtractionCS> ComputeShader(GetGlobalShaderMap(GMaxRHIFeatureLevel), PermutationVector);
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("Streamline UI Hint extraction (%dx%d) [%d,%d -> %d,%d]",
OutputViewRect.Width(), OutputViewRect.Height(),
OutputViewRect.Min.X, OutputViewRect.Min.Y,
OutputViewRect.Max.X, OutputViewRect.Max.Y
),
ComputeShader,
PassParameters,
FComputeShaderUtils::GetGroupCount(OutputViewRect.Size(), FComputeShaderUtils::kGolden2DGroupSize));
return UIHintTexture;
}

View File

@ -0,0 +1,171 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#include "VelocityCombinePass.h"
#include "Runtime/Launch/Resources/Version.h"
#if (ENGINE_MAJOR_VERSION == 5) && (ENGINE_MINOR_VERSION >= 2)
#include "DataDrivenShaderPlatformInfo.h"
#endif
#include "SceneRendering.h"
#include "ShaderPermutation.h"
#include "ScenePrivate.h"
const FIntPoint kVelocityCombineComputeTileSize ( FComputeShaderUtils::kGolden2DGroupSize, FComputeShaderUtils::kGolden2DGroupSize);
class FStreamlineVelocityCombineCS : public FGlobalShader
{
public:
class FDilateMotionVectorsDim : SHADER_PERMUTATION_BOOL("DILATE_MOTION_VECTORS");
class FSupportAlternateMotionVectorDim : SHADER_PERMUTATION_BOOL("SUPPORT_ALTERNATE_MOTION_VECTOR");
using FPermutationDomain = TShaderPermutationDomain<FDilateMotionVectorsDim, FSupportAlternateMotionVectorDim>;
static bool ShouldCompilePermutation(const FGlobalShaderPermutationParameters& Parameters)
{
// Only cook for the platforms/RHIs where DLSS-FG is supported, which is DX11,DX12 [on Win64]
return IsFeatureLevelSupported(Parameters.Platform, ERHIFeatureLevel::SM5) &&
IsPCPlatform(Parameters.Platform) &&
#if (ENGINE_MAJOR_VERSION == 4) && (ENGINE_MINOR_VERSION <= 26)
IsD3DPlatform(Parameters.Platform, false);
#else
IsD3DPlatform(Parameters.Platform);
#endif
}
static void ModifyCompilationEnvironment(const FGlobalShaderPermutationParameters& Parameters, FShaderCompilerEnvironment& OutEnvironment)
{
FGlobalShader::ModifyCompilationEnvironment(Parameters, OutEnvironment);
OutEnvironment.SetDefine(TEXT("THREADGROUP_SIZEX"), kVelocityCombineComputeTileSize.X);
OutEnvironment.SetDefine(TEXT("THREADGROUP_SIZEY"), kVelocityCombineComputeTileSize.Y);
}
DECLARE_GLOBAL_SHADER(FStreamlineVelocityCombineCS);
SHADER_USE_PARAMETER_STRUCT(FStreamlineVelocityCombineCS, FGlobalShader);
BEGIN_SHADER_PARAMETER_STRUCT(FParameters, )
// Input images
SHADER_PARAMETER_RDG_TEXTURE(Texture2D, VelocityTexture)
SHADER_PARAMETER_SAMPLER(SamplerState, VelocityTextureSampler)
SHADER_PARAMETER_STRUCT(FScreenPassTextureViewportParameters, Velocity)
SHADER_PARAMETER_RDG_TEXTURE(Texture2D, DepthTexture)
SHADER_PARAMETER_SAMPLER(SamplerState, DepthTextureSampler)
#if DLSS_ENGINE_USES_FVECTOR2D
SHADER_PARAMETER(FVector2D, TemporalJitterPixels)
#else
SHADER_PARAMETER(FVector2f, TemporalJitterPixels)
#endif
SHADER_PARAMETER_STRUCT_REF(FViewUniformShaderParameters, View)
// Output images
SHADER_PARAMETER_RDG_TEXTURE_UAV(RWTexture2D, OutVelocityCombinedTexture)
SHADER_PARAMETER_STRUCT(FScreenPassTextureViewportParameters, CombinedVelocity)
// motion vectors to consider instead of the standard ones from the engine
SHADER_PARAMETER_RDG_TEXTURE(Texture2D<float2>, AlternateMotionVectorsTexture)
END_SHADER_PARAMETER_STRUCT()
};
IMPLEMENT_GLOBAL_SHADER(FStreamlineVelocityCombineCS, "/Plugin/StreamlineCore/Private/VelocityCombine.usf", "VelocityCombineMain", SF_Compute);
FRDGTextureRef AddStreamlineVelocityCombinePass(
FRDGBuilder& GraphBuilder,
const FViewInfo& View,
FRDGTextureRef InSceneDepthTexture,
FRDGTextureRef InVelocityTexture,
FRDGTextureRef AlternateMotionVectorTexture,
bool bDilateMotionVectors
)
{
const FIntRect InputViewRect = View.ViewRect;
const FIntRect OutputViewRect = FIntRect( FIntPoint::ZeroValue, bDilateMotionVectors ? View.GetSecondaryViewRectSize() : View.ViewRect.Size());
FRDGTextureDesc CombinedVelocityDesc =
FRDGTextureDesc::Create2D(
OutputViewRect.Size(),
PF_G16R16F,
FClearValueBinding::Black,
TexCreate_ShaderResource | TexCreate_UAV);
const TCHAR* OutputName = TEXT("Streamline.CombinedVelocity");
FRDGTextureRef CombinedVelocityTexture = GraphBuilder.CreateTexture(
CombinedVelocityDesc,
OutputName);
FStreamlineVelocityCombineCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FStreamlineVelocityCombineCS::FParameters>();
const bool bHasAlternateMotionVectors = AlternateMotionVectorTexture != nullptr;
// input velocity
{
PassParameters->VelocityTexture = InVelocityTexture;
PassParameters->VelocityTextureSampler = TStaticSamplerState<SF_Point>::GetRHI();
// we use InSceneDepthTexture here and not InVelocityTexture since the latter can be a 1x1 black texture
check(InVelocityTexture->Desc.Extent == FIntPoint(1, 1) || InVelocityTexture->Desc.Extent == InSceneDepthTexture->Desc.Extent);
FScreenPassTextureViewport velocityViewport(InSceneDepthTexture, InputViewRect);
FScreenPassTextureViewportParameters velocityViewportParameters = GetScreenPassTextureViewportParameters(velocityViewport);
PassParameters->Velocity = velocityViewportParameters;
}
// input depth
{
PassParameters->DepthTexture = InSceneDepthTexture;
PassParameters->DepthTextureSampler = TStaticSamplerState<SF_Point>::GetRHI();
}
// replacement motion vectors for items like reflections that DLSS might prefer to track
{
PassParameters->AlternateMotionVectorsTexture = AlternateMotionVectorTexture;
}
// output combined velocity
{
PassParameters->OutVelocityCombinedTexture = GraphBuilder.CreateUAV(CombinedVelocityTexture);
FScreenPassTextureViewport CombinedVelocityViewport(CombinedVelocityTexture, OutputViewRect);
FScreenPassTextureViewportParameters CombinedVelocityViewportParameters = GetScreenPassTextureViewportParameters(CombinedVelocityViewport);
PassParameters->CombinedVelocity = CombinedVelocityViewportParameters;
}
// various state
{
#if ENGINE_MAJOR_VERSION < 5
PassParameters->TemporalJitterPixels = View.TemporalJitterPixels;
#else
PassParameters->TemporalJitterPixels = FVector2f(View.TemporalJitterPixels); // LWC_TODO: Precision loss
#endif
PassParameters->View = View.ViewUniformBuffer;
}
FStreamlineVelocityCombineCS::FPermutationDomain PermutationVector;
PermutationVector.Set<FStreamlineVelocityCombineCS::FDilateMotionVectorsDim>(bDilateMotionVectors);
PermutationVector.Set<FStreamlineVelocityCombineCS::FSupportAlternateMotionVectorDim>(bHasAlternateMotionVectors);
TShaderMapRef<FStreamlineVelocityCombineCS> ComputeShader(View.ShaderMap, PermutationVector);
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("Velocity Combine%s%s (%dx%d -> %dx%d)",
bDilateMotionVectors ? TEXT(" Dilate") : TEXT(""),
bHasAlternateMotionVectors ? TEXT(" AlternateMotionVectors") : TEXT("SceneMotionVectors"),
InputViewRect.Width(), InputViewRect.Height(),
OutputViewRect.Width(), OutputViewRect.Height()
),
ComputeShader,
PassParameters,
FComputeShaderUtils::GetGroupCount(OutputViewRect.Size(), kVelocityCombineComputeTileSize));
return CombinedVelocityTexture;
}

View File

@ -0,0 +1,25 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#pragma once
#include "Modules/ModuleManager.h"
class FStreamlineShadersModule final : public IModuleInterface
{
public:
/** IModuleInterface implementation */
virtual void StartupModule() override;
virtual void ShutdownModule() override;
private:
};

View File

@ -0,0 +1,28 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#pragma once
#include "CoreMinimal.h"
#include "RendererInterface.h"
#include "ScreenPass.h"
#include "Runtime/Launch/Resources/Version.h"
#if ENGINE_MAJOR_VERSION == 4 || ENGINE_MAJOR_VERSION == 5 && ENGINE_MINOR_VERSION < 1
#define FTextureRHIRef FTexture2DRHIRef
#endif
extern STREAMLINESHADERS_API FRDGTextureRef AddStreamlineUIHintExtractionPass(
FRDGBuilder& GraphBuilder,
// const FViewInfo& View,
const float InAlphaThresholdValue,
const FTextureRHIRef& InBackBuffer
// FRDGTextureRef InVelocityTexture
);

View File

@ -0,0 +1,25 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#pragma once
#include "CoreMinimal.h"
#include "RendererInterface.h"
#include "ScreenPass.h"
extern STREAMLINESHADERS_API FRDGTextureRef AddStreamlineVelocityCombinePass(
FRDGBuilder& GraphBuilder,
const FViewInfo& View,
FRDGTextureRef InSceneDepthTexture,
FRDGTextureRef InVelocityTexture,
FRDGTextureRef AlternateMotionVectorTexture,
bool bDilateMotionVectors
);

View File

@ -0,0 +1,56 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
using UnrealBuildTool;
using System.IO;
public class StreamlineShaders : ModuleRules
{
public StreamlineShaders(ReadOnlyTargetRules Target) : base(Target)
{
PCHUsage = ModuleRules.PCHUsageMode.UseExplicitOrSharedPCHs;
PublicIncludePaths.AddRange(
new string[] {
}
);
PrivateIncludePaths.AddRange(
new string[] {
Path.Combine(EngineDirectory,"Source/Runtime/Renderer/Private"),
}
);
PublicDependencyModuleNames.AddRange(
new string[]
{
"Core",
"RenderCore",
"Renderer",
}
);
PrivateDependencyModuleNames.AddRange(
new string[]
{
"Engine",
"RHI",
"Projects"
}
);
// 4.x and early access 5.0 engines used FVector2D type instead of FVector2f type for shader parameters
bool bEngineUsesFVector2D = (Target.Version.MajorVersion == 4) || (Target.Version.BranchName == "++UE5+Release-5.0-EarlyAccess");
PrivateDefinitions.Add(string.Format("DLSS_ENGINE_USES_FVECTOR2D={0}", bEngineUsesFVector2D ? "1" : "0"));
}
}

View File

@ -0,0 +1,128 @@
/*
* Copyright (c) 2022 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#if UE_5_0_OR_LATER
using EpicGames.Core;
#else
using Tools.DotNETCommon;
#endif
using System.Collections.Generic;
using UnrealBuildTool;
using System.IO;
public class Streamline : ModuleRules
{
protected virtual bool IsSupportedWindowsPlatform(ReadOnlyTargetRules Target)
{
return Target.Platform.IsInGroup(UnrealPlatformGroup.Windows);
}
public Streamline (ReadOnlyTargetRules Target) : base(Target)
{
Type = ModuleType.External;
if (IsSupportedWindowsPlatform(Target))
{
string StreamlinePath = ModuleDirectory + "/";
PublicIncludePaths.Add(StreamlinePath + "include/");
string SLProductionBinariesPath = PluginDirectory + @"\Binaries\ThirdParty\Win64\";
string SLDevelopmentBinariesPath = SLProductionBinariesPath + @"Development\";
string SLDebugBinariesPath = SLProductionBinariesPath + @"Debug\";
// those are loaded at runtime by the SL & NGX plugin loader which accepts our path
List<string> StreamlineDlls = new List<string>
{
"sl.interposer.dll",
"sl.common.dll",
//"sl.latewarp.dll",
//"nvngx_latewarp.dll",
"sl.reflex.dll",
"sl.pcl.dll",
"nvngx_dlssg.dll",
"sl.dlss_g.dll",
"nvngx_deepdvc.dll",
"sl.deepdvc.dll",
};
List<string> StreamlinePdbs = new List<string>(StreamlineDlls);
StreamlinePdbs.ForEach(DLLFile => Path.ChangeExtension(DLLFile, ".pdb"));
PublicDefinitions.Add("STREAMLINE_INTERPOSER_BINARY_NAME=TEXT(\"" + StreamlineDlls[0] + "\")");
PublicDefinitions.Add("SL_BUILD_DEEPDVC=1");
bool bHasProductionBinaries = Directory.Exists(SLProductionBinariesPath);
bool bHasDevelopmentBinaries = Directory.Exists(SLDevelopmentBinariesPath);
bool bHasDebugBinaries = Directory.Exists(SLDebugBinariesPath);
foreach (string StreamlineDll in StreamlineDlls)
{
RuntimeDependencies.Add(SLProductionBinariesPath + StreamlineDll, StagedFileType.NonUFS);
if (Target.Configuration != UnrealTargetConfiguration.Shipping)
{
if (bHasDevelopmentBinaries)
{
RuntimeDependencies.Add(SLDevelopmentBinariesPath + StreamlineDll, StagedFileType.NonUFS);
}
if (bHasDebugBinaries)
{
RuntimeDependencies.Add(SLDebugBinariesPath + StreamlineDll, StagedFileType.NonUFS);
}
}
}
if (Target.Configuration != UnrealTargetConfiguration.Shipping)
{
// include symbols in non-shipping builds
foreach (string StreamlinePdb in StreamlinePdbs)
{
RuntimeDependencies.Add(SLProductionBinariesPath + StreamlinePdb, StagedFileType.DebugNonUFS);
if (bHasDevelopmentBinaries)
{
RuntimeDependencies.Add(SLDevelopmentBinariesPath + StreamlinePdb, StagedFileType.DebugNonUFS);
}
if (bHasDebugBinaries)
{
RuntimeDependencies.Add(SLDebugBinariesPath + StreamlinePdb, StagedFileType.DebugNonUFS);
}
}
// useful to have debug overlay during testing, but we don't want to ship with that
List<string> StreamlineOverlayBinaries = new List<string>
{
"sl.imgui.dll",
"sl.imgui.pdb",
};
foreach (string StreamlineOverlayBinary in StreamlineOverlayBinaries)
{
StagedFileType FileType = StreamlineOverlayBinary.EndsWith("pdb") ? StagedFileType.DebugNonUFS : StagedFileType.NonUFS;
if (bHasDevelopmentBinaries)
{
RuntimeDependencies.Add(SLDevelopmentBinariesPath + StreamlineOverlayBinary, FileType);
}
if (bHasDebugBinaries)
{
RuntimeDependencies.Add(SLDebugBinariesPath + StreamlineOverlayBinary, FileType);
}
}
}
}
}
}

View File

@ -0,0 +1,7 @@
<?xml version="1.0" encoding="utf-8"?>
<TpsData xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<Name>NVStreamline</Name>
<Location>/Engine/Plugins/Runtime/Nvidia/Streamline/Source/ThirdParty/Streamline</Location>
<Function>NVStreamline</Function>
<Justification>SDK for NVIDIA Rendering Technologies</Justification>
</TpsData>

View File

@ -0,0 +1,46 @@
/*
* Copyright (c) 2022-2023 NVIDIA CORPORATION. All rights reserved
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#pragma once
#include <limits.h>
#include "sl_struct.h"
#include "sl_consts.h"
#include "sl_version.h"
#include "sl_result.h"
#include "sl_appidentity.h"
#include "sl_device_wrappers.h"
#include "sl_core_api.h"
#include "sl_core_types.h"
#define SL_FUN_DECL(name) PFun_##name* name{}
//! IMPORTANT: Macros which use `slGetFeatureFunction` can only be used AFTER device is set by calling either slSetD3DDevice or slSetVulkanInfo.
#define SL_FEATURE_FUN_IMPORT(feature, func) slGetFeatureFunction(feature, #func, (void*&) ##func)
#define SL_FEATURE_FUN_IMPORT_STATIC(feature, func) \
static PFun_##func* s_ ##func{}; \
if(!s_ ##func) { \
sl::Result res = slGetFeatureFunction(feature, #func, (void*&) s_ ##func); \
if(res != sl::Result::eOk) return res; \
} \

View File

@ -0,0 +1,21 @@
#pragma once
#include <cstdint>
#include "sl_struct.h"
namespace sl
{
//! Engine types
//!
enum class EngineType : uint32_t
{
eCustom,
eUnreal,
eUnity,
eCount
};
}

View File

@ -0,0 +1,254 @@
/*
* Copyright (c) 2022-2023 NVIDIA CORPORATION. All rights reserved
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#pragma once
#include <stdint.h>
#include <assert.h>
#include <string>
#include "sl_struct.h"
#define SL_ENUM_OPERATORS_64(T) \
inline bool operator&(T a, T b) \
{ \
return ((uint64_t)a & (uint64_t)b) != 0; \
} \
\
inline T& operator&=(T& a, T b) \
{ \
a = (T)((uint64_t)a & (uint64_t)b); \
return a; \
} \
\
inline T operator|(T a, T b) \
{ \
return (T)((uint64_t)a | (uint64_t)b); \
} \
\
inline T& operator |= (T& lhs, T rhs) \
{ \
lhs = (T)((uint64_t)lhs | (uint64_t)rhs); \
return lhs; \
} \
\
inline T operator~(T a) \
{ \
return (T)~((uint64_t)a); \
}
#define SL_ENUM_OPERATORS_32(T) \
inline bool operator&(T a, T b) \
{ \
return ((uint32_t)a & (uint32_t)b) != 0; \
} \
\
inline T& operator&=(T& a, T b) \
{ \
a = (T)((uint32_t)a & (uint32_t)b); \
return a; \
} \
\
inline T operator|(T a, T b) \
{ \
return (T)((uint32_t)a | (uint32_t)b); \
} \
\
inline T& operator |= (T& lhs, T rhs) \
{ \
lhs = (T)((uint32_t)lhs | (uint32_t)rhs); \
return lhs; \
} \
\
inline T operator~(T a) \
{ \
return (T)~((uint32_t)a); \
}
namespace sl
{
//! For cases when value has to be provided and we don't have good default
constexpr float INVALID_FLOAT = 3.40282346638528859811704183484516925440e38f;
constexpr uint32_t INVALID_UINT = 0xffffffff;
//! Normally host would work with no more than 2 frames at the same time but sl.reflex sometimes
//! needs to send markers for previous and next frame so the total number of in-flight frames can be higher
constexpr uint32_t MAX_FRAMES_IN_FLIGHT = 6;
struct uint3
{
uint32_t x;
uint32_t y;
uint32_t z;
};
struct float2
{
float2() : x(INVALID_FLOAT), y(INVALID_FLOAT) {}
float2(float _x, float _y) : x(_x), y(_y) {}
float x, y;
};
struct float3
{
float3() : x(INVALID_FLOAT), y(INVALID_FLOAT), z(INVALID_FLOAT) {}
float3(float _x, float _y, float _z) : x(_x), y(_y), z(_z) {}
float x, y, z;
};
struct float4
{
float4() : x(INVALID_FLOAT), y(INVALID_FLOAT), z(INVALID_FLOAT), w(INVALID_FLOAT) {}
float4(float _x, float _y, float _z, float _w) : x(_x), y(_y), z(_z), w(_w) {}
float x, y, z, w;
};
struct float4x4
{
//! All access points take row index as a parameter
inline float4& operator[](uint32_t i) { return row[i]; }
inline const float4& operator[](uint32_t i) const { return row[i]; }
inline void setRow(uint32_t i, const float4& v) { row[i] = v; }
inline const float4& getRow(uint32_t i) { return row[i]; }
//! Row major matrix
float4 row[4];
};
struct Extent
{
uint32_t top{};
uint32_t left{};
uint32_t width{};
uint32_t height{};
inline operator bool() const { return width != 0 && height != 0; }
inline bool operator==(const Extent& rhs) const
{
return top == rhs.top && left == rhs.left &&
width == rhs.width && height == rhs.height;
}
inline bool operator!=(const Extent& rhs) const
{
return !operator==(rhs);
}
inline bool isSameRes(const Extent& rhs) const
{
return width == rhs.width && height == rhs.height;
}
#if defined(_WINDEF_)
// Cast helper for sl::Extent->RECT when windef.h has been included
inline operator RECT() const { return RECT { (LONG)left, (LONG)top, (LONG)(left + width), (LONG)(top + height) }; }
#endif
};
//! For cases when value has to be provided and we don't have good default
enum Boolean : char
{
eFalse,
eTrue,
eInvalid
};
//! Common constants, all parameters must be provided unless they are marked as optional
//!
//! {DCD35AD7-4E4A-4BAD-A90C-E0C49EB23AFE}
SL_STRUCT_BEGIN(Constants, StructType({ 0xdcd35ad7, 0x4e4a, 0x4bad, { 0xa9, 0xc, 0xe0, 0xc4, 0x9e, 0xb2, 0x3a, 0xfe } }), kStructVersion2)
//! IMPORTANT: All matrices are row major (see float4x4 definition) and
//! must NOT contain temporal AA jitter offset (if any). Any jitter offset
//! should be provided as the additional parameter Constants::jitterOffset (see below)
//! Specifies matrix transformation from the camera view to the clip space.
float4x4 cameraViewToClip;
//! Specifies matrix transformation from the clip space to the camera view space.
float4x4 clipToCameraView;
//! Optional - Specifies matrix transformation describing lens distortion in clip space.
float4x4 clipToLensClip;
//! Specifies matrix transformation from the current clip to the previous clip space.
//! clipToPrevClip = clipToView * viewToViewPrev * viewToClipPrev
//! Sample code can be found in sl_matrix_helpers.h
float4x4 clipToPrevClip;
//! Specifies matrix transformation from the previous clip to the current clip space.
//! prevClipToClip = clipToPrevClip.inverse()
float4x4 prevClipToClip;
//! Specifies pixel space jitter offset
float2 jitterOffset;
//! Specifies scale factors used to normalize motion vectors (so the values are in [-1,1] range)
float2 mvecScale;
//! Optional - Specifies camera pinhole offset if used.
float2 cameraPinholeOffset;
//! Specifies camera position in world space.
float3 cameraPos;
//! Specifies camera up vector in world space.
float3 cameraUp;
//! Specifies camera right vector in world space.
float3 cameraRight;
//! Specifies camera forward vector in world space.
float3 cameraFwd;
//! Specifies camera near view plane distance.
float cameraNear = INVALID_FLOAT;
//! Specifies camera far view plane distance.
float cameraFar = INVALID_FLOAT;
//! Specifies camera field of view in radians.
float cameraFOV = INVALID_FLOAT;
//! Specifies camera aspect ratio defined as view space width divided by height.
float cameraAspectRatio = INVALID_FLOAT;
//! Specifies which value represents an invalid (un-initialized) value in the motion vectors buffer
//! NOTE: This is only required if `cameraMotionIncluded` is set to false and SL needs to compute it.
float motionVectorsInvalidValue = INVALID_FLOAT;
//! Specifies if depth values are inverted (value closer to the camera is higher) or not.
Boolean depthInverted = Boolean::eInvalid;
//! Specifies if camera motion is included in the MVec buffer.
Boolean cameraMotionIncluded = Boolean::eInvalid;
//! Specifies if motion vectors are 3D or not.
Boolean motionVectors3D = Boolean::eInvalid;
//! Specifies if previous frame has no connection to the current one (i.e. motion vectors are invalid)
Boolean reset = Boolean::eInvalid;
//! Specifies if orthographic projection is used or not.
Boolean orthographicProjection = Boolean::eFalse;
//! Specifies if motion vectors are already dilated or not.
Boolean motionVectorsDilated = Boolean::eFalse;
//! Specifies if motion vectors are jittered or not.
Boolean motionVectorsJittered = Boolean::eFalse;
//! Version 2 members:
//!
//! Optional heuristic that specifies the minimum depth difference between two objects in screen-space.
//! The units of the value are in linear depth units.
//! Linear depth is computed as:
//! if depthInverted is false: `lin_depth = 1 / (1 - depth)`
//! if depthInverted is true: `lin_depth = 1 / depth`
//!
//! Although unlikely to need to be modified, smaller thresholds are useful when depth units are
//! unusually compressed into a small dynamic range near 1.
//!
//! If not specified, the default value is 40.0f.
float minRelativeLinearDepthObjectSeparation = 40.0f;
//! IMPORTANT: New members go here or if optional can be chained in a new struct, see sl_struct.h for details
SL_STRUCT_END()
}

View File

@ -0,0 +1,298 @@
/*
* Copyright (c) 2022-2024 NVIDIA CORPORATION. All rights reserved
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#pragma once
#include <limits.h>
#include "sl_struct.h"
#include "sl_consts.h"
#include "sl_version.h"
#include "sl_result.h"
#include "sl_appidentity.h"
#include "sl_device_wrappers.h"
#include "sl_core_types.h"
#if defined(SL_INTERPOSER)
#if defined(_WIN32)
#define SL_API extern "C" __declspec(dllexport)
#else
#error Unsupported Platform!
#endif
#else
#define SL_API extern "C"
#endif
#pragma region SL_API
//! Streamline core API functions (check feature specific headers for additional APIs)
//!
using PFun_slInit = sl::Result(const sl::Preferences& pref, uint64_t sdkVersion);
using PFun_slShutdown = sl::Result();
using PFun_slIsFeatureSupported = sl::Result(sl::Feature feature, const sl::AdapterInfo& adapterInfo);
using PFun_slIsFeatureLoaded = sl::Result(sl::Feature feature, bool& loaded);
using PFun_slSetFeatureLoaded = sl::Result(sl::Feature feature, bool loaded);
using PFun_slEvaluateFeature = sl::Result(sl::Feature feature, const sl::FrameToken& frame, const sl::BaseStructure** inputs, uint32_t numInputs, sl::CommandBuffer* cmdBuffer);
using PFun_slAllocateResources = sl::Result(sl::CommandBuffer* cmdBuffer, sl::Feature feature, const sl::ViewportHandle& viewport);
using PFun_slFreeResources = sl::Result(sl::Feature feature, const sl::ViewportHandle& viewport);
using PFun_slSetTag = sl::Result(const sl::ViewportHandle& viewport, const sl::ResourceTag* tags, uint32_t numTags, sl::CommandBuffer* cmdBuffer);
using PFun_slGetFeatureRequirements = sl::Result(sl::Feature feature, sl::FeatureRequirements& requirements);
using PFun_slGetFeatureVersion = sl::Result(sl::Feature feature, sl::FeatureVersion& version);
using PFun_slUpgradeInterface = sl::Result(void** baseInterface);
using PFun_slSetConstants = sl::Result(const sl::Constants& values, const sl::FrameToken& frame, const sl::ViewportHandle& viewport);
using PFun_slGetNativeInterface = sl::Result(void* proxyInterface, void** baseInterface);
using PFun_slGetFeatureFunction = sl::Result(sl::Feature feature, const char* functionName, void*& function);
using PFun_slGetNewFrameToken = sl::Result(sl::FrameToken*& token, const uint32_t* frameIndex);
using PFun_slSetD3DDevice = sl::Result(void* d3dDevice);
//! Initializes the SL module
//!
//! Call this method when the game is initializing.
//!
//! @param pref Specifies preferred behavior for the SL library (SL will keep a copy)
//! @param sdkVersion Current SDK version
//! @return sl::ResultCode::eOk if successful, error code otherwise (see sl_result.h for details)
//!
//! This method is NOT thread safe.
SL_API sl::Result slInit(const sl::Preferences &pref, uint64_t sdkVersion = sl::kSDKVersion);
//! Shuts down the SL module
//!
//! Call this method when the game is shutting down.
//!
//! @return sl::ResultCode::eOk if successful, error code otherwise (see sl_result.h for details)
//!
//! This method is NOT thread safe.
SL_API sl::Result slShutdown();
//! Checks if a specific feature is supported or not.
//!
//! Call this method to check if a certain e* (see above) is available.
//!
//! @param feature Specifies which feature to use
//! @param adapterInfo Adapter to check (optional)
//! @return sl::ResultCode::eOk if successful, error code otherwise (see sl_result.h for details)
//!
//! NOTE: If adapter info is null SL will return general feature compatibility with the OS,
//! installed drivers or any other requirements not directly related to the adapter.
//!
//! This method is NOT thread safe.
SL_API sl::Result slIsFeatureSupported(sl::Feature feature, const sl::AdapterInfo& adapterInfo);
//! Checks if specified feature is loaded or not.
//!
//! Call this method to check if feature is loaded.
//! All requested features are loaded by default and have to be unloaded explicitly if needed.
//!
//! @param feature Specifies which feature to check
//! @param loaded Value specifying if feature is loaded or unloaded.
//! @return sl::ResultCode::eOk if successful, error code otherwise (see sl_result.h for details)
//!
//! This method is NOT thread safe and requires DX/VK device to be created before calling it.
SL_API sl::Result slIsFeatureLoaded(sl::Feature feature, bool& loaded);
//! Sets the specified feature to either loaded or unloaded state.
//!
//! Call this method to load or unload certain e*.
//!
//! NOTE: All requested features are loaded by default and have to be unloaded explicitly if needed.
//!
//! @param feature Specifies which feature to check
//! @param loaded Value specifying if feature should be loaded or unloaded.
//! @return sl::ResultCode::eOk if successful, error code otherwise (see sl_result.h for details)
//!
//! NOTE: When this method is called no other DXGI/D3D/Vulkan APIs should be invoked in parallel so
//! make sure to flush your pipeline before calling this method.
//!
//! This method is NOT thread safe and requires DX/VK device to be created before calling it.
SL_API sl::Result slSetFeatureLoaded(sl::Feature feature, bool loaded);
//! Tags resource globally
//!
//! Call this method to tag the appropriate buffers in global scope.
//!
//! @param viewport Specifies viewport this tag applies to
//! @param tags Pointer to resources tags, set to null to remove the specified tag
//! @param numTags Number of resource tags in the provided list
//! @param cmdBuffer Command buffer to use (optional and can be null if ALL tags are null or have eValidUntilPresent life-cycle)
//! @return sl::ResultCode::eOk if successful, error code otherwise (see sl_result.h for details)
//!
//! IMPORTANT: GPU payload that generates content for the provided tag(s) MUST be either already submitted to the provided command buffer
//! or some other command buffer which is guaranteed, by the host application, to be executed BEFORE the provided command buffer.
//!
//! This method is thread safe and requires DX/VK device to be created before calling it.
SL_API sl::Result slSetTag(const sl::ViewportHandle& viewport, const sl::ResourceTag* tags, uint32_t numTags, sl::CommandBuffer* cmdBuffer);
//! Sets common constants.
//!
//! Call this method to provide the required data (SL will keep a copy).
//!
//! @param values Common constants required by SL plugins (SL will keep a copy)
//! @param frame Index of the current frame
//! @param viewport Unique id (can be viewport id | instance id etc.)
//! @return sl::ResultCode::eOk if successful, error code otherwise (see sl_result.h for details)
//!
//! This method is thread safe and requires DX/VK device to be created before calling it.
SL_API sl::Result slSetConstants(const sl::Constants& values, const sl::FrameToken& frame, const sl::ViewportHandle& viewport);
//! Returns feature's requirements
//!
//! Call this method to check what is required to run certain eFeature* (see above).
//! This method must be called after init otherwise it will always return an error.
//!
//! @param feature Specifies which feature to check
//! @param requirements Data structure with feature's requirements
//! @return sl::ResultCode::eOk if successful, error code otherwise (see sl_result.h for details)
//!
//! This method is NOT thread safe.
SL_API sl::Result slGetFeatureRequirements(sl::Feature feature, sl::FeatureRequirements& requirements);
//! Returns feature's version
//!
//! Call this method to check version for a certain eFeature* (see above).
//! This method must be called after init otherwise it will always return an error.
//!
//! @param feature Specifies which feature to check
//! @param version Data structure with feature's version
//! @return sl::ResultCode::eOk if successful, error code otherwise (see sl_result.h for details)
//!
//! This method is thread safe.
SL_API sl::Result slGetFeatureVersion(sl::Feature feature, sl::FeatureVersion& version);
//! Allocates resources for the specified feature.
//!
//! Call this method to explicitly allocate resources
//! for an instance of the specified feature.
//!
//! @param cmdBuffer Command buffer to use (must be created on device where feature is supported but can be null if not needed)
//! @param feature Feature we are working with
//! @param viewport Unique id (viewport handle)
//! @return sl::ResultCode::eOk if successful, error code otherwise (see sl_result.h for details)
//!
//! This method is NOT thread safe and requires DX/VK device to be created before calling it.
SL_API sl::Result slAllocateResources(sl::CommandBuffer* cmdBuffer, sl::Feature feature, const sl::ViewportHandle& viewport);
//! Frees resources for the specified feature.
//!
//! Call this method to explicitly free resources
//! for an instance of the specified feature.
//!
//! @param feature Feature we are working with
//! @param viewport Unique id (viewport handle)
//! @return sl::ResultCode::eOk if successful, error code otherwise (see sl_result.h for details)
//!
//! IMPORTANT: If slEvaluateFeature is pending on a command list, that command list must be flushed
//! before calling this method to prevent invalid resource access on the GPU.
//!
//! IMPORTANT: If slEvaluateFeature is pending on a command list, that command list must be flushed
//! before calling this method to prevent invalid resource access on the GPU.
//!
//! This method is NOT thread safe and requires DX/VK device to be created before calling it.
SL_API sl::Result slFreeResources(sl::Feature feature, const sl::ViewportHandle& viewport);
//! Evaluates feature
//!
//! Use this method to mark the section in your rendering pipeline
//! where specific feature should be injected.
//!
//! @param feature Feature we are working with
//! @param frame Current frame handle obtained from SL
//! @param inputs The chained structures providing the input data (viewport, tags, constants etc)
//! @param numInputs Number of inputs
//! @param cmdBuffer Command buffer to use (must be created on device where feature is supported)
//! @return sl::ResultCode::eOk if successful, error code otherwise (see sl_result.h for details)
//!
//! IMPORTANT: Frame and viewport must match whatever is used to set common and or feature options and constants (if any)
//!
//! NOTE: It is allowed to pass in buffer tags as inputs, they are considered to be a "local" tags and do NOT interact with
//! same tags sent in the global scope using slSetTag API.
//!
//! This method is NOT thread safe and requires DX/VK device to be created before calling it.
SL_API sl::Result slEvaluateFeature(sl::Feature feature, const sl::FrameToken& frame, const sl::BaseStructure** inputs, uint32_t numInputs, sl::CommandBuffer* cmdBuffer);
//! Upgrade interface
//!
//! Use this method to upgrade basic D3D or DXGI interface to an SL proxy.
//!
//! @param baseInterface Pointer to a pointer to the base interface (for example ID3D12Device etc.) to be replaced in place.
//! @return sl::ResultCode::eOk if successful, error code otherwise (see sl_result.h for details)
//!
//! IMPORTANT: This method should ONLY be used to support 3rd party SDKs like AMD AGS
//! which bypass SL or when using manual hooking.
//!
//! This method is NOT thread safe and should be called IMMEDIATELY after base interface is created.
SL_API sl::Result slUpgradeInterface(void** baseInterface);
//! Obtain native interface
//!
//! Use this method to obtain underlying D3D or DXGI interface from an SL proxy.
//!
//! IMPORTANT: When calling NVAPI or other 3rd party SDKs from your application
//! it is recommended to provide native interfaces instead of SL proxies.
//!
//! @param proxyInterface Pointer to the SL proxy (D3D device, swap-chain etc)
//! @param baseInterface Pointer to a pointer to the base interface be returned.
//! @return sl::ResultCode::eOk if successful, error code otherwise (see sl_result.h for details)
//!
//! This method is NOT thread safe
SL_API sl::Result slGetNativeInterface(void* proxyInterface, void** baseInterface);
//! Gets specific feature's function
//!
//! Call this method to obtain various functions for the specified feature. See sl_$feature.h for details.
//!
//! @param feature Feature we are working with
//! @param functionName The name of the API to obtain (declared in sl_[$feature].h
//! @param function Pointer to the function to return
//! @return sl::ResultCode::eOk if successful, error code otherwise (see sl_result.h for details)
//!
//! IMPORTANT: Must be called AFTER device is set by calling either slSetD3DDevice or slSetVulkanInfo.
//!
//! This method is thread safe.
SL_API sl::Result slGetFeatureFunction(sl::Feature feature, const char* functionName, void*& function);
//! Gets unique frame token
//!
//! Call this method to obtain token for the unique frame identification.
//!
//! @param handle Frame token to return
//! @param frameIndex Frame index (optional, if not provided SL internal frame counting is used)
//! @return sl::ResultCode::eOk if successful, error code otherwise (see sl_result.h for details)
//!
//! NOTE: Normally SL would not expect more that 3 frames in flight due to added latency.
//!
//! This method is thread safe.
SL_API sl::Result slGetNewFrameToken(sl::FrameToken*& token, const uint32_t* frameIndex = nullptr);
//! Set D3D device to use
//!
//! Use this method to specify which D3D device should be used.
//!
//! @param d3dDevice D3D device to use
//! @return sl::ResultCode::eOk if successful, error code otherwise (see sl_result.h for details)
//!
//! This method is NOT thread safe and should be called IMMEDIATELY after main device is created.
SL_API sl::Result slSetD3DDevice(void* d3dDevice);
#pragma endregion SL_API

View File

@ -0,0 +1,669 @@
/*
* Copyright (c) 2022-2024 NVIDIA CORPORATION. All rights reserved
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#pragma once
#include <limits.h>
#include "sl_struct.h"
#include "sl_consts.h"
#include "sl_version.h"
#include "sl_result.h"
#include "sl_appidentity.h"
#include "sl_device_wrappers.h"
typedef struct ID3D11Resource ID3D11Resource;
typedef struct ID3D11Buffer ID3D11Buffer;
typedef struct ID3D11Texture2D ID3D11Texture2D;
typedef struct ID3D12Resource ID3D12Resource;
// Forward declarations matching MS and VK specs
enum VkResult;
using HRESULT = long;
namespace sl {
using CommandBuffer = void;
using Device = void;
//! Buffer types used for tagging
//!
//! IMPORTANT: Each tag must use the unique id
//!
using BufferType = uint32_t;
//! Depth buffer - IMPORTANT - Must be suitable to use with clipToPrevClip transformation (see Constants below)
constexpr BufferType kBufferTypeDepth = 0;
//! Object and optional camera motion vectors (see Constants below)
constexpr BufferType kBufferTypeMotionVectors = 1;
//! Color buffer with all post-processing effects applied but without any UI/HUD elements
constexpr BufferType kBufferTypeHUDLessColor = 2;
//! Color buffer containing jittered input data for the image scaling pass
constexpr BufferType kBufferTypeScalingInputColor = 3;
//! Color buffer containing results from the image scaling pass
constexpr BufferType kBufferTypeScalingOutputColor = 4;
//! Normals
constexpr BufferType kBufferTypeNormals = 5;
//! Roughness
constexpr BufferType kBufferTypeRoughness = 6;
//! Albedo
constexpr BufferType kBufferTypeAlbedo = 7;
//! Specular Albedo
constexpr BufferType kBufferTypeSpecularAlbedo = 8;
//! Indirect Albedo
constexpr BufferType kBufferTypeIndirectAlbedo = 9;
//! Specular Motion Vectors
constexpr BufferType kBufferTypeSpecularMotionVectors = 10;
//! Disocclusion Mask
constexpr BufferType kBufferTypeDisocclusionMask = 11;
//! Emissive
constexpr BufferType kBufferTypeEmissive = 12;
//! Exposure
constexpr BufferType kBufferTypeExposure = 13;
//! Buffer with normal and roughness in alpha channel
constexpr BufferType kBufferTypeNormalRoughness = 14;
//! Diffuse and camera ray length
constexpr BufferType kBufferTypeDiffuseHitNoisy = 15;
//! Diffuse denoised
constexpr BufferType kBufferTypeDiffuseHitDenoised = 16;
//! Specular and reflected ray length
constexpr BufferType kBufferTypeSpecularHitNoisy = 17;
//! Specular denoised
constexpr BufferType kBufferTypeSpecularHitDenoised = 18;
//! Shadow noisy
constexpr BufferType kBufferTypeShadowNoisy = 19;
//! Shadow denoised
constexpr BufferType kBufferTypeShadowDenoised = 20;
//! AO noisy
constexpr BufferType kBufferTypeAmbientOcclusionNoisy = 21;
//! AO denoised
constexpr BufferType kBufferTypeAmbientOcclusionDenoised = 22;
//! Optional - UI/HUD color and alpha
//! IMPORTANT: Please make sure that alpha channel has enough precision (for example do NOT use formats like R10G10B10A2)
constexpr BufferType kBufferTypeUIColorAndAlpha = 23;
//! Optional - Shadow pixels hint (set to 1 if a pixel belongs to the shadow area, 0 otherwise)
constexpr BufferType kBufferTypeShadowHint = 24;
//! Optional - Reflection pixels hint (set to 1 if a pixel belongs to the reflection area, 0 otherwise)
constexpr BufferType kBufferTypeReflectionHint = 25;
//! Optional - Particle pixels hint (set to 1 if a pixel represents a particle, 0 otherwise)
constexpr BufferType kBufferTypeParticleHint = 26;
//! Optional - Transparency pixels hint (set to 1 if a pixel belongs to the transparent area, 0 otherwise)
constexpr BufferType kBufferTypeTransparencyHint = 27;
//! Optional - Animated texture pixels hint (set to 1 if a pixel belongs to the animated texture area, 0 otherwise)
constexpr BufferType kBufferTypeAnimatedTextureHint = 28;
//! Optional - Bias for current color vs history hint - lerp(history, current, bias) (set to 1 to completely reject history)
constexpr BufferType kBufferTypeBiasCurrentColorHint = 29;
//! Optional - Ray-tracing distance (camera ray length)
constexpr BufferType kBufferTypeRaytracingDistance = 30;
//! Optional - Motion vectors for reflections
constexpr BufferType kBufferTypeReflectionMotionVectors = 31;
//! Optional - Position, in same space as eNormals
constexpr BufferType kBufferTypePosition = 32;
//! Optional - Indicates (via non-zero value) which pixels have motion/depth values that do not match the final color content at that pixel (e.g. overlaid, opaque Picture-in-Picture)
constexpr BufferType kBufferTypeInvalidDepthMotionHint = 33;
//! Alpha
constexpr BufferType kBufferTypeAlpha = 34;
//! Color buffer containing only opaque geometry
constexpr BufferType kBufferTypeOpaqueColor = 35;
//! Optional - Reduce reliance on history instead using current frame hint (0 if a pixel is not at all reactive and default composition should be used, 1 if fully reactive)
constexpr BufferType kBufferTypeReactiveMaskHint = 36;
//! Optional - Pixel lock adjustment hint (set to 1 if pixel lock should be completely removed, 0 otherwise)
constexpr BufferType kBufferTypeTransparencyAndCompositionMaskHint = 37;
//! Optional - Albedo of the reflection ray hit point. For multibounce reflections, this should be the albedo of the first non-specular bounce.
constexpr BufferType kBufferTypeReflectedAlbedo = 38;
//! Optional - Color buffer before particles are drawn.
constexpr BufferType kBufferTypeColorBeforeParticles = 39;
//! Optional - Color buffer before transparent objects are drawn.
constexpr BufferType kBufferTypeColorBeforeTransparency = 40;
//! Optional - Color buffer before fog is drawn.
constexpr BufferType kBufferTypeColorBeforeFog = 41;
//! Optional - Buffer containing the hit distance of a specular ray.
constexpr BufferType kBufferTypeSpecularHitDistance = 42;
//! Optional - Buffer that contains 3 components of a specular ray direction, and 1 component of specular hit distance.
constexpr BufferType kBufferTypeSpecularRayDirectionHitDistance = 43;
//! Optional - Buffer containing normalized direction of a specular ray.
constexpr BufferType kBufferTypeSpecularRayDirection = 44;
// !Optional - Buffer containing the hit distance of a diffuse ray.
constexpr BufferType kBufferTypeDiffuseHitDistance = 45;
//! Optional - Buffer that contains 3 components of a diffuse ray direction, and 1 component of diffuse hit distance.
constexpr BufferType kBufferTypeDiffuseRayDirectionHitDistance = 46;
//! Optional - Buffer containing normalized direction of a diffuse ray.
constexpr BufferType kBufferTypeDiffuseRayDirection = 47;
//! Optional - Buffer containing display resolution depth.
constexpr BufferType kBufferTypeHiResDepth = 48;
//! Required either this or kBufferTypeDepth - Buffer containing linear depth.
constexpr BufferType kBufferTypeLinearDepth = 49;
//! Optional - Bidirectional distortion field. 4 channels in normalized [0,1] pixel space. RG = distorted pixel to undistorted pixel displacement. BA = undistorted pixel to distorted pixel displacement.
constexpr BufferType kBufferTypeBidirectionalDistortionField = 50;
//!Optional - Buffer containing particles or other similar transparent effects rendered into it instead of passing it as part of the input color
constexpr BufferType kBufferTypeTransparencyLayer = 51;
//!Optional - Buffer to be used in addition to TransparencyLayer which allows 3-channels of Opacity versus 1-channel.
// In this case, TransparencyLayer represents Color (RcGcBc), TransparencyLayerOpacity represents alpha (RaGaBa)'
constexpr BufferType kBufferTypeTransparencyLayerOpacity = 52;
//! Optional - Swapchain buffer to be presented
constexpr BufferType kBufferTypeBackbuffer = 53;
//! Optional - Mask for pixels to skip warping
constexpr BufferType kBufferTypeNoWarpMask = 54;
//! Optional - Color buffer after particles are drawn (for research purposes)
constexpr BufferType kBufferTypeColorAfterParticles = 55;
//! Optional - Color buffer after transparent objects are drawn (for research purposes)
constexpr BufferType kBufferTypeColorAfterTransparency = 56;
//! Optional - Color buffer after fog is drawn (for research purposes)
constexpr BufferType kBufferTypeColorAfterFog = 57;
//! Optional - Subsurface scattering guide buffer
constexpr BufferType kBufferTypeScreenSpaceSubsurfaceScatteringGuide = 58;
//! Optional - Color buffer before subsurface scattering (for research purposes)
constexpr BufferType kBufferTypeColorBeforeScreenSpaceSubsurfaceScattering = 59;
//! Optional - Color buffer after subsurface scattering (for research purposes)
constexpr BufferType kBufferTypeColorAfterScreenSpaceSubsurfaceScattering = 60;
//! Optional - Refraction guide buffer (for research purposes)
constexpr BufferType kBufferTypeScreenSpaceRefractionGuide = 61;
//! Optional - Color buffer before refraction (for research purposes)
constexpr BufferType kBufferTypeColorBeforeScreenSpaceRefraction = 62;
//! Optional - Color buffer after refraction (for research purposes)
constexpr BufferType kBufferTypeColorAfterScreenSpaceRefraction = 63;
//! Optional - Depth of Field Buffer (for research purposes)
constexpr BufferType kBufferTypeDepthOfFieldGuide = 64;
//! Optional - Color buffer before Depth of Field (for research purposes)
constexpr BufferType kBufferTypeColorBeforeDepthOfField = 65;
//! Optional - Color buffer after Depth of Field (for research purposes)
constexpr BufferType kBufferTypeColorAfterDepthOfField = 66;
//! Features supported with this SDK
//!
//! IMPORTANT: Each feature must use a unique id
//!
using Feature = uint32_t;
//! Deep Learning Super Sampling
constexpr Feature kFeatureDLSS = 0;
//! Real-Time Denoiser (removed)
constexpr Feature kFeatureNRD_INVALID = 1;
//! NVIDIA Image Scaling
constexpr Feature kFeatureNIS = 2;
//! Reflex
constexpr Feature kFeatureReflex = 3;
//! PC Latency
constexpr Feature kFeaturePCL = 4;
//! DeepDVC
constexpr Feature kFeatureDeepDVC = 5;
constexpr Feature kFeatureLatewarp = 6;
//! DLSS Frame Generation
constexpr Feature kFeatureDLSS_G = 1000;
//! DLSS Ray Reconstruction
constexpr Feature kFeatureDLSS_RR = 1001;
constexpr Feature kFeatureNvPerf = 1002;
constexpr Feature kFeatureDirectSR = 1003;
// ImGUI
constexpr Feature kFeatureImGUI = 9999;
//! Common feature, NOT intended to be used directly
constexpr Feature kFeatureCommon = UINT_MAX;
//! Different levels for logging
enum class LogLevel : uint32_t
{
//! No logging
eOff,
//! Default logging
eDefault,
//! Verbose logging
eVerbose,
//! Total count
eCount
};
//! Resource types
enum class ResourceType : char
{
eTex2d,
eBuffer,
eCommandQueue,
eCommandBuffer,
eCommandPool,
eFence,
eSwapchain,
eHostFence,
// this type means that the only thing we know for sure about this resource is that it's castable to IUnknown
eUnknown,
eCount
};
//! Resource allocate information
//!
SL_STRUCT_BEGIN(ResourceAllocationDesc, StructType({ 0xbb57e5, 0x49a2, 0x4c23, { 0xa5, 0x19, 0xab, 0x92, 0x86, 0xe7, 0x40, 0x14 } }), kStructVersion1)
ResourceAllocationDesc(ResourceType _type, void* _desc, uint32_t _state, void* _heap) : BaseStructure(ResourceAllocationDesc::s_structType, kStructVersion1), type(_type),desc(_desc),state(_state),heap(_heap){};
//! Indicates the type of resource
ResourceType type = ResourceType::eTex2d;
//! D3D12_RESOURCE_DESC/VkImageCreateInfo/VkBufferCreateInfo
void* desc{};
//! Initial state as D3D12_RESOURCE_STATES or VkMemoryPropertyFlags
uint32_t state = 0;
//! CD3DX12_HEAP_PROPERTIES or nullptr
void* heap{};
//! IMPORTANT: New members go here or if optional can be chained in a new struct, see sl_struct.h for details
SL_STRUCT_END()
//! Subresource range information, for Vulkan resources
//!
//! {8D4C316C-D402-4524-89A7-14E79E638E3A}
SL_STRUCT_BEGIN(SubresourceRange, StructType({ 0x8d4c316c, 0xd402, 0x4524, { 0x89, 0xa7, 0x14, 0xe7, 0x9e, 0x63, 0x8e, 0x3a } }), kStructVersion1)
//! Vulkan subresource aspectMask
uint32_t aspectMask;
//! Vulkan subresource baseMipLevel
uint32_t baseMipLevel;
//! Vulkan subresource levelCount
uint32_t levelCount;
//! Vulkan subresource baseArrayLayer
uint32_t baseArrayLayer;
//! Vulkan subresource layerCount
uint32_t layerCount;
SL_STRUCT_END()
//! Native resource
//!
//! {3A9D70CF-2418-4B72-8391-13F8721C7261}
SL_STRUCT_BEGIN(Resource, StructType({ 0x3a9d70cf, 0x2418, 0x4b72, { 0x83, 0x91, 0x13, 0xf8, 0x72, 0x1c, 0x72, 0x61 } }), kStructVersion1)
//! Constructors
//!
//! Resource type, native pointer are MANDATORY always
//! Resource state is MANDATORY unless using D3D11
//! Resource view, description etc. are MANDATORY only when using Vulkan
//!
Resource(ResourceType _type, void* _native, void* _mem, void* _view, uint32_t _state = UINT_MAX) : BaseStructure(Resource::s_structType, kStructVersion1), type(_type), native(_native), memory(_mem), view(_view), state(_state){};
Resource(ResourceType _type, void* _native, uint32_t _state = UINT_MAX) : BaseStructure(Resource::s_structType, kStructVersion1), type(_type), native(_native), state(_state) {};
//! Conversion helpers for D3D
inline operator ID3D12Resource* () { return reinterpret_cast<ID3D12Resource*>(native); }
inline operator ID3D11Resource* () { return reinterpret_cast<ID3D11Resource*>(native); }
inline operator ID3D11Buffer* () { return reinterpret_cast<ID3D11Buffer*>(native); }
inline operator ID3D11Texture2D* () { return reinterpret_cast<ID3D11Texture2D*>(native); }
//! Indicates the type of resource
ResourceType type = ResourceType::eTex2d;
//! ID3D11Resource/ID3D12Resource/VkBuffer/VkImage
void* native{};
//! vkDeviceMemory or nullptr
void* memory{};
//! VkImageView/VkBufferView or nullptr
void* view{};
//! State as D3D12_RESOURCE_STATES or VkImageLayout
//!
//! IMPORTANT: State is MANDATORY and needs to be correct when tagged resources are actually used.
//!
uint32_t state = UINT_MAX;
//! Width in pixels
uint32_t width{};
//! Height in pixels
uint32_t height{};
//! Native format
uint32_t nativeFormat{};
//! Number of mip-map levels
uint32_t mipLevels{};
//! Number of arrays
uint32_t arrayLayers{};
//! Virtual address on GPU (if applicable)
uint64_t gpuVirtualAddress{};
//! VkImageCreateFlags
uint32_t flags;
//! VkImageUsageFlags
uint32_t usage{};
//! Reserved for internal use
uint32_t reserved{};
//! IMPORTANT: New members go here or if optional can be chained in a new struct, see sl_struct.h for details
SL_STRUCT_END()
//! Specifies life-cycle for the tagged resource
//!
//! IMPORTANT: Use 'eOnlyValidNow' and 'eValidUntilEvaluate' ONLY when really needed since it can result in wasting VRAM if SL ends up making unnecessary copies.
//!
//! If integrating features, like for example DLSS-G, which require tags to be 'eValidUntilPresent' please try to tag everything as 'eValidUntilPresent' first
//! and only make modifications if upon visual inspection you notice that tags are corrupted when used during the Present frame call.
enum ResourceLifecycle
{
//! Resource can change, get destroyed or reused for other purposes after it is provided to SL
eOnlyValidNow,
//! Resource does NOT change, gets destroyed or reused for other purposes from the moment it is provided to SL until the frame is presented
eValidUntilPresent,
//! Resource does NOT change, gets destroyed or reused for other purposes from the moment it is provided to SL until after the slEvaluateFeature call has returned.
eValidUntilEvaluate
};
//! Tagged resource
//!
//! {4C6A5AAD-B445-496C-87FF-1AF3845BE653}
//! Extensions as part of the `next` ptr:
//! PrecisionInfo
SL_STRUCT_BEGIN(ResourceTag, StructType({ 0x4c6a5aad, 0xb445, 0x496c, { 0x87, 0xff, 0x1a, 0xf3, 0x84, 0x5b, 0xe6, 0x53 } }), kStructVersion1)
ResourceTag(Resource* r, BufferType t, ResourceLifecycle l, const Extent* e = nullptr)
: BaseStructure(ResourceTag::s_structType, kStructVersion1), resource(r), type(t), lifecycle(l)
{
if (e) extent = *e;
};
//! Resource description
Resource* resource{};
//! Type of the tagged buffer
BufferType type{};
//! The life-cycle for the tag, if resource is volatile a valid command buffer must be specified
ResourceLifecycle lifecycle{};
//! The area of the tagged resource to use (if using the entire resource leave as null)
Extent extent{};
//! IMPORTANT: New members go here or if optional can be chained in a new struct, see sl_struct.h for details
SL_STRUCT_END()
//
//! Precision info, optional extension for ResourceTag.
//!
//! {98F6E9BA-8D16-4831-A802-4D3B52FF26BF}
//! Extensions as part of the `next` ptr:
//! ResourceTag
SL_STRUCT_BEGIN(PrecisionInfo, StructType({ 0x98f6e9ba, 0x8d16, 0x4831, { 0xa8, 0x2, 0x4d, 0x3b, 0x52, 0xff, 0x26, 0xbf } }), kStructVersion1)
// Formula used to convert the low-precision data to high-precision
enum PrecisionFormula : uint32_t
{
eNoTransform = 0, // hi = lo, essentially no conversion is done
eLinearTransform, // hi = lo * scale + bias
};
PrecisionInfo(PrecisionInfo::PrecisionFormula formula, float bias, float scale)
: BaseStructure(PrecisionInfo::s_structType, kStructVersion1), conversionFormula(formula), bias(bias), scale(scale) {};
static std::string getPrecisionFormulaAsStr(PrecisionFormula formula)
{
switch (formula)
{
case eNoTransform:
return "eNoTransform";
case eLinearTransform:
return "eLinearTransform";
default:
assert("Invalid PrecisionFormula" && false);
return "Unknown";
}
};
PrecisionFormula conversionFormula{ eNoTransform };
float bias{ 0.0f };
float scale{ 1.0f };
inline operator bool() const { return conversionFormula != eNoTransform; }
inline bool operator==(const PrecisionInfo& rhs) const
{
return conversionFormula == rhs.conversionFormula && bias == rhs.bias && scale == rhs.scale;
}
inline bool operator!=(const PrecisionInfo& rhs) const
{
return !operator==(rhs);
}
SL_STRUCT_END()
//! Resource allocation/deallocation callbacks
//!
//! Use these callbacks to gain full control over
//! resource life cycle and memory allocation tracking.
//!
//! @param device - Device to be used (vkDevice or ID3D11Device or ID3D12Device)
//!
//! IMPORTANT: Textures must have the pixel shader resource
//! and the unordered access view flags set
using PFun_ResourceAllocateCallback = Resource(const ResourceAllocationDesc* desc, void* device);
using PFun_ResourceReleaseCallback = void(Resource* resource, void* device);
//! Log type
enum class LogType : uint32_t
{
//! Controlled by LogLevel, SL can show more information in eLogLevelVerbose mode
eInfo,
//! Always shown regardless of LogLevel
eWarn,
eError,
//! Total count
eCount
};
//! Logging callback
//!
//! Use these callbacks to track messages posted in the log.
//! If any of the SL methods returns false use eLogTypeError
//! type to track down what went wrong and why.
using PFun_LogMessageCallback = void(LogType type, const char* msg);
struct APIError
{
union
{
HRESULT hres;
VkResult vkRes;
};
};
//! Returns an error returned by DXGI or Vulkan API calls 'vkQueuePresentKHR' and 'vkAcquireNextImageKHR'
using PFunOnAPIErrorCallback = void(const APIError& lastError);
//! Optional flags
enum class PreferenceFlags : uint64_t
{
//! Set by default - Disables command list state tracking - Host application is responsible for restoring CL state correctly after each 'slEvaluateFeature' call
eDisableCLStateTracking = 1 << 0,
//! Optional - Disables debug text on screen in development builds
eDisableDebugText = 1 << 1,
//! Optional - IMPORTANT: Only to be used in the advanced integration mode, see the 'manual hooking' programming guide for more details
eUseManualHooking = 1 << 2,
//! Optional - Enables downloading of Over The Air (OTA) updates for SL and NGX
//! This will invoke the OTA updater to look for new updates. A separate
//! flag below is used to control whether or not OTA-downloaded SL Plugins are
//! loaded.
eAllowOTA = 1 << 3,
//! Do not check OS version when deciding if feature is supported or not
//!
//! IMPORTANT: ONLY SET THIS FLAG IF YOU KNOW WHAT YOU ARE DOING.
//!
//! VARIOUS WIN APIs INCLUDING BUT NOT LIMITED TO `IsWindowsXXX`, `GetVersionX`, `rtlGetVersion` ARE KNOWN FOR RETURNING INCORRECT RESULTS.
eBypassOSVersionCheck = 1 << 4,
//! Optional - If specified SL will create DXGI factory proxy rather than modifying the v-table for the base interface.
//!
//! This can help with 3rd party overlays which are NOT integrated with the host application but rather operate via injection.
eUseDXGIFactoryProxy = 1 << 5,
//! Optional - Enables loading of plugins downloaded Over The Air (OTA), to
//! be used in conjunction with the eAllowOTA flag.
eLoadDownloadedPlugins = 1 << 6,
};
SL_ENUM_OPERATORS_64(PreferenceFlags)
//! Application preferences
//!
//! {1CA10965-BF8E-432B-8DA1-6716D879FB14}
SL_STRUCT_BEGIN(Preferences, StructType({ 0x1ca10965, 0xbf8e, 0x432b, { 0x8d, 0xa1, 0x67, 0x16, 0xd8, 0x79, 0xfb, 0x14 } }), kStructVersion1)
//! Optional - In non-production builds it is useful to enable debugging console window
bool showConsole = false;
//! Optional - Various logging levels
LogLevel logLevel = LogLevel::eDefault;
//! Optional - Absolute paths to locations where to look for plugins, first path in the list has the highest priority
const wchar_t** pathsToPlugins{};
//! Optional - Number of paths to search
uint32_t numPathsToPlugins = 0;
//! Optional - Absolute path to location where logs and other data should be stored
//!
//! NOTE: Set this to nullptr in order to disable logging to a file
const wchar_t* pathToLogsAndData{};
//! Optional - Allows resource allocation tracking on the host side
PFun_ResourceAllocateCallback* allocateCallback{};
//! Optional - Allows resource deallocation tracking on the host side
PFun_ResourceReleaseCallback* releaseCallback{};
//! Optional - Allows log message tracking including critical errors if they occur
PFun_LogMessageCallback* logMessageCallback{};
//! Optional - Flags used to enable or disable advanced options
PreferenceFlags flags = PreferenceFlags::eDisableCLStateTracking | PreferenceFlags::eAllowOTA | PreferenceFlags::eLoadDownloadedPlugins;
//! Required - Features to load (assuming appropriate plugins are found), if not specified NO features will be loaded by default
const Feature* featuresToLoad{};
//! Required - Number of features to load, only used when list is not a null pointer
uint32_t numFeaturesToLoad{};
//! Optional - Id provided by NVIDIA, if not specified then engine type and version are required
uint32_t applicationId{};
//! Optional - Type of the rendering engine used, if not specified then applicationId is required
EngineType engine = EngineType::eCustom;
//! Optional - Version of the rendering engine used
const char* engineVersion{};
//! Optional - GUID (like for example 'a0f57b54-1daf-4934-90ae-c4035c19df04')
const char* projectId{};
//! Optional - Which rendering API host is planning to use
//!
//! NOTE: To ensure correct `slGetFeatureRequirements` behavior please specify if planning to use Vulkan.
RenderAPI renderAPI = RenderAPI::eD3D12;
//! IMPORTANT: New members go here or if optional can be chained in a new struct, see sl_struct.h for details
SL_STRUCT_END()
//! Frame tracking handle
//!
//! IMPORTANT: Use slGetNewFrameToken to obtain unique instance
//!
//! {830A0F35-DB84-4171-A804-59B206499B18}
SL_STRUCT_PROTECTED_BEGIN(FrameToken, StructType({ 0x830a0f35, 0xdb84, 0x4171, { 0xa8, 0x4, 0x59, 0xb2, 0x6, 0x49, 0x9b, 0x18 } }), kStructVersion1)
//! Helper operator to obtain current frame index
virtual operator uint32_t() const = 0;
SL_STRUCT_END()
//! Handle for the unique viewport
//!
//! {171B6435-9B3C-4FC8-9994-FBE52569AAA4}
SL_STRUCT_BEGIN(ViewportHandle, StructType({ 0x171b6435, 0x9b3c, 0x4fc8, { 0x99, 0x94, 0xfb, 0xe5, 0x25, 0x69, 0xaa, 0xa4 } }), kStructVersion1)
ViewportHandle(uint32_t v) : BaseStructure(ViewportHandle::s_structType, kStructVersion1), value(v) {}
ViewportHandle(int32_t v) : BaseStructure(ViewportHandle::s_structType, kStructVersion1), value(v) {}
operator uint32_t() const { return value; }
private:
uint32_t value = UINT_MAX;
SL_STRUCT_END()
//! Specifies feature requirement flags
//!
enum class FeatureRequirementFlags : uint32_t
{
//! Rendering APIs
eD3D11Supported = 1 << 0,
eD3D12Supported = 1 << 1,
eVulkanSupported = 1 << 2,
//! If set V-Sync must be disabled when feature is active
eVSyncOffRequired = 1 << 3,
//! If set GPU hardware scheduling OS feature must be turned on
eHardwareSchedulingRequired = 1 << 4
};
SL_ENUM_OPERATORS_32(FeatureRequirementFlags);
//! Specifies feature requirements
//!
//! {66714097-AC6D-4BC6-8915-1E0F55A6B61F}
SL_STRUCT_BEGIN(FeatureRequirements, StructType({ 0x66714097, 0xac6d, 0x4bc6, { 0x89, 0x15, 0x1e, 0xf, 0x55, 0xa6, 0xb6, 0x1f } }), kStructVersion2)
//! Various Flags
FeatureRequirementFlags flags {};
//! Feature will create this many CPU threads
uint32_t maxNumCPUThreads{};
//! Feature supports only this many viewports
uint32_t maxNumViewports{};
//! Required buffer tags
uint32_t numRequiredTags{};
const BufferType* requiredTags{};
//! OS and Driver versions
Version osVersionDetected{};
Version osVersionRequired{};
Version driverVersionDetected{};
Version driverVersionRequired{};
//! Vulkan specific bits
//! Command queues
uint32_t vkNumComputeQueuesRequired{};
uint32_t vkNumGraphicsQueuesRequired{};
//! Device extensions
uint32_t vkNumDeviceExtensions{};
const char** vkDeviceExtensions{};
//! Instance extensions
uint32_t vkNumInstanceExtensions{};
const char** vkInstanceExtensions{};
//! 1.2 features
//!
//! NOTE: Use getVkPhysicalDeviceVulkan12Features from sl_helpers_vk.h
uint32_t vkNumFeatures12{};
const char** vkFeatures12{};
//! 1.3 features
//!
//! NOTE: Use getVkPhysicalDeviceVulkan13Features from sl_helpers_vk.h
uint32_t vkNumFeatures13{};
const char** vkFeatures13{};
//! Vulkan optical flow feature
uint32_t vkNumOpticalFlowQueuesRequired{};
//! IMPORTANT: New members go here or if optional can be chained in a new struct, see sl_struct.h for details
SL_STRUCT_END()
//! Specifies feature's version
//!
//! {6D5B51F0-076B-486D-9995-5A561043F5C1}
SL_STRUCT_BEGIN(FeatureVersion, StructType({ 0x6d5b51f0, 0x76b, 0x486d, { 0x99, 0x95, 0x5a, 0x56, 0x10, 0x43, 0xf5, 0xc1 } }), kStructVersion1)
//! SL version
Version versionSL{};
//! NGX version (if feature is using NGX, null otherwise)
Version versionNGX{};
//! IMPORTANT: New members go here or if optional can be chained in a new struct, see sl_struct.h for details
SL_STRUCT_END()
//! Specifies either DXGI adapter or VK physical device
//!
//! {0677315F-A746-4492-9F42-CB6142C9C3D4}
SL_STRUCT_BEGIN(AdapterInfo, StructType({ 0x677315f, 0xa746, 0x4492, { 0x9f, 0x42, 0xcb, 0x61, 0x42, 0xc9, 0xc3, 0xd4 } }), kStructVersion1)
//! Locally unique identifier
uint8_t* deviceLUID {};
//! Size in bytes
uint32_t deviceLUIDSizeInBytes{};
//! Vulkan Specific, if specified LUID will be ignored
void* vkPhysicalDevice{};
//! IMPORTANT: New members go here or if optional can be chained in a new struct, see sl_struct.h for details
SL_STRUCT_END()
}

View File

@ -0,0 +1,103 @@
/*
* Copyright (c) 2022-2023 NVIDIA CORPORATION. All rights reserved
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#pragma once
#include "sl.h"
#include "sl_helpers.h"
namespace sl
{
enum class DeepDVCMode : uint32_t
{
eOff,
eOn,
eCount
};
// {23288AAD-7E7E-BE2A-916F-27DA30A3046B}
SL_STRUCT_BEGIN(DeepDVCOptions, StructType({ 0x23288aad, 0x7e7e, 0xbe2a, { 0x91, 0x67, 0x27, 0xda, 0x30, 0xa3, 0x04, 0x6b } }), kStructVersion1)
//! Specifies which mode should be used
DeepDVCMode mode = DeepDVCMode::eOff;
//! Specifies intensity level in range [0,1]. Default 0.5
float intensity = 0.5f;
//! Specifies saturation boost in range [0,1]. Default 0.25
float saturationBoost = 0.25f;
SL_STRUCT_END()
//! Returned by the DeepDVC plugin
//!
// {934FD3D3-B34C-70A7-A139-F19FE04D91D3}
SL_STRUCT_BEGIN(DeepDVCState, StructType({ 0x934fd3d3, 0xb34c, 0x70a7, { 0xa1, 0x39, 0xf1, 0x9f, 0xe0, 0x4d, 0x91, 0xd3 } }), kStructVersion1)
//! Specified the amount of memory expected to be used
uint64_t estimatedVRAMUsageInBytes {};
SL_STRUCT_END()
}
//! Sets DeepDVC options
//!
//! Call this method to turn DeepDVC on/off, change mode etc.
//!
//! @param viewport Specified viewport we are working with
//! @param options Specifies DeepDVC options to use
//! @return sl::ResultCode::eOk if successful, error code otherwise (see sl_result.h for details)
//!
//! This method is NOT thread safe.
using PFun_slDeepDVCSetOptions = sl::Result(const sl::ViewportHandle& viewport, const sl::DeepDVCOptions& options);
//! Provides DeepDVC state for the given viewport
//!
//! Call this method to obtain VRAM usage and other information.
//!
//! @param viewport Specified viewport we are working with
//! @param state Reference to a structure where state is to be returned
//! @return sl::ResultCode::eOk if successful, error code otherwise (see sl_result.h for details)
//!
//! This method is NOT thread safe.
using PFun_slDeepDVCGetState = sl::Result(const sl::ViewportHandle& viewport, sl::DeepDVCState& state);
//! HELPERS
//!
inline sl::Result slDeepDVCSetOptions(const sl::ViewportHandle& viewport, const sl::DeepDVCOptions& options)
{
SL_FEATURE_FUN_IMPORT_STATIC(sl::kFeatureDeepDVC, slDeepDVCSetOptions);
return s_slDeepDVCSetOptions(viewport, options);
}
inline sl::Result slDeepDVCGetState(const sl::ViewportHandle& viewport, sl::DeepDVCState& state)
{
SL_FEATURE_FUN_IMPORT_STATIC(sl::kFeatureDeepDVC, slDeepDVCGetState);
return s_slDeepDVCGetState(viewport, state);
}
//#define SL_CASE_STR(a) case a : return #a;
inline const char* getDeepDVCModeAsStr(sl::DeepDVCMode v)
{
switch (v)
{
SL_CASE_STR(sl::DeepDVCMode::eOff);
SL_CASE_STR(sl::DeepDVCMode::eOn);
};
return "Unknown";
}

View File

@ -0,0 +1,23 @@
#pragma once
#include <cstdint>
#include "sl_struct.h"
namespace sl
{
//! Rendering API
//!
enum class RenderAPI : uint32_t
{
eD3D11,
eD3D12,
eVulkan,
eCount
};
}

View File

@ -0,0 +1,180 @@
/*
* Copyright (c) 2022-2023 NVIDIA CORPORATION. All rights reserved
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#pragma once
namespace sl
{
enum class DLSSMode : uint32_t
{
eOff,
eMaxPerformance,
eBalanced,
eMaxQuality,
eUltraPerformance,
eUltraQuality,
eDLAA,
eCount,
};
enum class DLSSPreset : uint32_t
{
//! Default behavior, may or may not change after an OTA
eDefault,
//! Fixed DL models
ePresetA,
ePresetB,
ePresetC,
ePresetD,
ePresetE,
ePresetF,
ePresetG, // reverts to default, not recommended to use
ePresetH, // reverts to default, not recommended to use
ePresetI, // reverts to default, not recommended to use
ePresetJ,
};
// {6AC826E4-4C61-4101-A92D-638D421057B8}
SL_STRUCT_BEGIN(DLSSOptions, StructType({ 0x6ac826e4, 0x4c61, 0x4101, { 0xa9, 0x2d, 0x63, 0x8d, 0x42, 0x10, 0x57, 0xb8 } }), kStructVersion3)
//! Specifies which mode should be used
DLSSMode mode = DLSSMode::eOff;
//! Specifies output (final) target width
uint32_t outputWidth = INVALID_UINT;
//! Specifies output (final) target height
uint32_t outputHeight = INVALID_UINT;
//! Specifies sharpening level in range [0,1]
float sharpness = 0.0f;
//! Specifies pre-exposure value
float preExposure = 1.0f;
//! Specifies exposure scale value
float exposureScale = 1.0f;
//! Specifies if tagged color buffers are full HDR or not (DLSS in HDR pipeline or not)
Boolean colorBuffersHDR = Boolean::eTrue;
//! Specifies if indicator on screen should invert axis
Boolean indicatorInvertAxisX = Boolean::eFalse;
//! Specifies if indicator on screen should invert axis
Boolean indicatorInvertAxisY = Boolean::eFalse;
//! Presets
DLSSPreset dlaaPreset = DLSSPreset::eDefault;
DLSSPreset qualityPreset = DLSSPreset::eDefault;
DLSSPreset balancedPreset = DLSSPreset::eDefault;
DLSSPreset performancePreset = DLSSPreset::eDefault;
DLSSPreset ultraPerformancePreset = DLSSPreset::eDefault;
DLSSPreset ultraQualityPreset = DLSSPreset::eDefault;
//! Specifies if the setting for AutoExposure is used
Boolean useAutoExposure = Boolean::eFalse;
//! Whether or not the alpha channel should be upscaled (if false, only RGB is upscaled)
//! Enabling alpha upscaling may impact performance
Boolean alphaUpscalingEnabled = Boolean::eFalse;
//! IMPORTANT: New members go here or if optional can be chained in a new struct, see sl_struct.h for details
SL_STRUCT_END()
//! Returned by DLSS plugin
//!
//! {EF1D0957-FD58-4DF7-B504-8B69D8AA6B76}
SL_STRUCT_BEGIN(DLSSOptimalSettings, StructType({ 0xef1d0957, 0xfd58, 0x4df7, { 0xb5, 0x4, 0x8b, 0x69, 0xd8, 0xaa, 0x6b, 0x76 } }), kStructVersion1)
//! Specifies render area width
uint32_t optimalRenderWidth{};
//! Specifies render area height
uint32_t optimalRenderHeight{};
//! Specifies the optimal sharpness value
float optimalSharpness{};
//! Specifies minimal render area width
uint32_t renderWidthMin{};
//! Specifies minimal render area height
uint32_t renderHeightMin{};
//! Specifies maximal render area width
uint32_t renderWidthMax{};
//! Specifies maximal render area height
uint32_t renderHeightMax{};
//! IMPORTANT: New members go here or if optional can be chained in a new struct, see sl_struct.h for details
SL_STRUCT_END()
//! Returned by DLSS plugin
//!
//! {9366B056-8C01-463C-BB91-E68782636CE9}
SL_STRUCT_BEGIN(DLSSState, StructType({ 0x9366b056, 0x8c01, 0x463c, { 0xbb, 0x91, 0xe6, 0x87, 0x82, 0x63, 0x6c, 0xe9 } }), kStructVersion1)
//! Specified the amount of memory expected to be used
uint64_t estimatedVRAMUsageInBytes{};
//! IMPORTANT: New members go here or if optional can be chained in a new struct, see sl_struct.h for details
SL_STRUCT_END()
}
//! Provides optimal DLSS settings
//!
//! Call this method to obtain optimal render target size and other DLSS related settings.
//!
//! @param options Specifies DLSS options to use
//! @param settings Reference to a structure where settings are returned
//! @return sl::ResultCode::eOk if successful, error code otherwise (see sl_result.h for details)
//!
//! This method is NOT thread safe.
using PFun_slDLSSGetOptimalSettings = sl::Result(const sl::DLSSOptions & options, sl::DLSSOptimalSettings & settings);
//! Provides DLSS state for the given viewport
//!
//! Call this method to obtain optimal render target size and other DLSS related settings.
//!
//! @param viewport Specified viewport we are working with
//! @param state Reference to a structure where state is to be returned
//! @return sl::ResultCode::eOk if successful, error code otherwise (see sl_result.h for details)
//!
//! This method is NOT thread safe.
using PFun_slDLSSGetState = sl::Result(const sl::ViewportHandle & viewport, sl::DLSSState & state);
//! Sets DLSS options
//!
//! Call this method to turn DLSS on/off, change mode etc.
//!
//! @param viewport Specified viewport we are working with
//! @param options Specifies DLSS options to use
//! @return sl::ResultCode::eOk if successful, error code otherwise (see sl_result.h for details)
//!
//! This method is NOT thread safe.
using PFun_slDLSSSetOptions = sl::Result(const sl::ViewportHandle& viewport, const sl::DLSSOptions& options);
//! HELPERS
//!
inline sl::Result slDLSSGetOptimalSettings(const sl::DLSSOptions& options, sl::DLSSOptimalSettings& settings)
{
SL_FEATURE_FUN_IMPORT_STATIC(sl::kFeatureDLSS, slDLSSGetOptimalSettings);
return s_slDLSSGetOptimalSettings(options, settings);
}
inline sl::Result slDLSSGetState(const sl::ViewportHandle& viewport, sl::DLSSState& state)
{
SL_FEATURE_FUN_IMPORT_STATIC(sl::kFeatureDLSS, slDLSSGetState);
return s_slDLSSGetState(viewport, state);
}
inline sl::Result slDLSSSetOptions(const sl::ViewportHandle& viewport, const sl::DLSSOptions& options)
{
SL_FEATURE_FUN_IMPORT_STATIC(sl::kFeatureDLSS, slDLSSSetOptions);
return s_slDLSSSetOptions(viewport, options);
}

View File

@ -0,0 +1,200 @@
/*
* Copyright (c) 2022-2023 NVIDIA CORPORATION. All rights reserved
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#pragma once
#include "sl.h"
#include "sl_consts.h"
#include "sl_core_types.h"
#include <vector>
namespace sl
{
enum class DLSSGMode : uint32_t
{
eOff,
eOn,
eAuto,
eCount
};
enum class DLSSGFlags : uint32_t
{
eShowOnlyInterpolatedFrame = 1 << 0,
eDynamicResolutionEnabled = 1 << 1,
eRequestVRAMEstimate = 1 << 2,
eRetainResourcesWhenOff = 1 << 3,
eEnableFullscreenMenuDetection = 1 << 4,
};
enum class DLSSGQueueParallelismMode : uint32_t
{
//! Default mode in which client's presenting queue is blocked until DLSSG workload execution completes.
eBlockPresentingClientQueue,
//! This mode is only supported on Vulkan presently. Even if set by any D3D client, it would default to
//! eBlockPresentingClientQueue as before. eBlockNoClientQueues mode helps achieve maximum performance benefit
//! from queue-level paralleism in Vulkan during DLSS-G processing. In this mode, client must must wait on
//! DLSSGState::inputsProcessingCompletionFence and associated value, before it can modify or destroy the tagged
//! resources input to DLSS-G enabled for the corresponding previously presented frame on any client queue.
eBlockNoClientQueues,
eCount
};
// Adds various useful operators for our enum
SL_ENUM_OPERATORS_32(DLSSGFlags)
// {FAC5F1CB-2DFD-4F36-A1E6-3A9E865256C5}
SL_STRUCT_BEGIN(DLSSGOptions, StructType({ 0xfac5f1cb, 0x2dfd, 0x4f36, { 0xa1, 0xe6, 0x3a, 0x9e, 0x86, 0x52, 0x56, 0xc5 } }), kStructVersion3)
//! Specifies which mode should be used.
DLSSGMode mode = DLSSGMode::eOff;
//! Number of frames to generate inbetween fully rendered frames. Cannot exceed DLSSGState::numFramesToGenerateMax.
//! For 2x frame multiplier, numFramesToGenerate is 1.
//! For 3x frame multiplier, numFramesToGenerate is 2.
//! For 4x frame multiplier, numFramesToGenerate is 3.
uint32_t numFramesToGenerate = 1;
//! Optional - Flags used to enable or disable certain functionality
DLSSGFlags flags{};
//! Optional - Dynamic resolution optimal width (used only if eDynamicResolutionEnabled is set)
uint32_t dynamicResWidth{};
//! Optional - Dynamic resolution optimal height (used only if eDynamicResolutionEnabled is set)
uint32_t dynamicResHeight{};
//! Optional - Expected number of buffers in the swap-chain
uint32_t numBackBuffers{};
//! Optional - Expected width of the input render targets (depth, motion-vector buffers etc)
uint32_t mvecDepthWidth{};
//! Optional - Expected height of the input render targets (depth, motion-vector buffers etc)
uint32_t mvecDepthHeight{};
//! Optional - Expected width of the back buffers in the swap-chain
uint32_t colorWidth{};
//! Optional - Expected height of the back buffers in the swap-chain
uint32_t colorHeight{};
//! Optional - Indicates native format used for the swap-chain back buffers
uint32_t colorBufferFormat{};
//! Optional - Indicates native format used for eMotionVectors
uint32_t mvecBufferFormat{};
//! Optional - Indicates native format used for eDepth
uint32_t depthBufferFormat{};
//! Optional - Indicates native format used for eHUDLessColor
uint32_t hudLessBufferFormat{};
//! Optional - Indicates native format used for eUIColorAndAlpha
uint32_t uiBufferFormat{};
//! Optional - if specified DLSSG will return any errors which occur when calling underlying API (DXGI or Vulkan)
PFunOnAPIErrorCallback* onErrorCallback{};
// kStructVersion2
Boolean bReserved15 = eInvalid;
// kStructVersion3
//! Optional - determines the level of client and DLSSG queue parallelism to use for performance gain - must be same for all viewports.
DLSSGQueueParallelismMode queueParallelismMode{};
//! IMPORTANT: New members go here or if optional can be chained in a new struct, see sl_struct.h for details
SL_STRUCT_END()
enum class DLSSGStatus : uint32_t
{
//! Everything is working as expected
eOk = 0,
//! Output resolution (size of the back buffers in the swap-chain) is too low
eFailResolutionTooLow = 1 << 0,
//! Reflex is not active while DLSS-G is running, Reflex must be turned on when DLSS-G is on
eFailReflexNotDetectedAtRuntime = 1 << 1,
//! HDR format not supported, see DLSS-G programming guide for more details
eFailHDRFormatNotSupported = 1 << 2,
//! Some constants are invalid, see programming guide for more details
eFailCommonConstantsInvalid = 1 << 3,
//! D3D integrations must use SwapChain::GetCurrentBackBufferIndex API
eFailGetCurrentBackBufferIndexNotCalled = 1 << 4,
eReserved5 = 1 << 5
};
// Adds various useful operators for our enum
SL_ENUM_OPERATORS_32(DLSSGStatus)
// {CC8AC8E1-A179-44F5-97FA-E74112F9BC61}
SL_STRUCT_BEGIN(DLSSGState, StructType({ 0xcc8ac8e1, 0xa179, 0x44f5, { 0x97, 0xfa, 0xe7, 0x41, 0x12, 0xf9, 0xbc, 0x61 } }), kStructVersion3)
//! Specifies the amount of memory expected to be used
uint64_t estimatedVRAMUsageInBytes{};
//! Specifies current status of DLSS-G
DLSSGStatus status{};
//! Specifies minimum supported dimension
uint32_t minWidthOrHeight{};
//! Number of frames presented since the last 'slDLSSGGetState' call
uint32_t numFramesActuallyPresented{};
// kStructVersion2
//! Maximum number of frames possible to generate on this gpu architecture.
//! For 2x only supporting devices, numFramesToGenerateMax is 1.
//! For 3x and 4x supporting devices, numFramesToGenerateMax is 3.
uint32_t numFramesToGenerateMax{};
sl::Boolean bReserved4{};
//! Hint to the application to display VSync support in the user interface
sl::Boolean bIsVsyncSupportAvailable{};
//! SL client must wait on SL DLSS-G plugin-internal fence and associated value, before it can modify or destroy the tagged resources input
//! to DLSS-G enabled for the corresponding previously presented frame on a non-presenting queue.
//! If modified on client's presenting queue, then it's recommended but not required.
//! However, if DLSSGQueueParallelismMode::eBlockNoClientQueues is set, then it's always required.
//! It must call slDLSSGGetState on the present thread to retrieve the fence value for the inputs consumed by FG, on which client would
//! wait in the frame it would modify those inputs.
void* inputsProcessingCompletionFence{};
uint64_t lastPresentInputsProcessingCompletionFenceValue{};
//! IMPORTANT: New members go here or if optional can be chained in a new struct, see sl_struct.h for details
SL_STRUCT_END()
}
//! Provides DLSS-G state
//!
//! Call this method to obtain current state of DLSS-G
//!
//! @param viewport Specified viewport we are working with
//! @param state Reference to a structure where state is returned
//! @param options Specifies DLSS-G options to use (can be null if not needed)
//! @return sl::ResultCode::eOk if successful, error code otherwise (see sl_result.h for details)
//!
//! This method is NOT thread safe.
using PFun_slDLSSGGetState = sl::Result(const sl::ViewportHandle& viewport, sl::DLSSGState& state, const sl::DLSSGOptions* options);
//! Sets DLSS-G options
//!
//! Call this method to turn DLSS-G on/off, change modes etc.
//!
//! @param viewport Specified viewport we are working with
//! @param options Specifies DLSS-G options to use
//! @return sl::ResultCode::eOk if successful, error code otherwise (see sl_result.h for details)
//!
//! This method is NOT thread safe.
using PFun_slDLSSGSetOptions = sl::Result(const sl::ViewportHandle& viewport, const sl::DLSSGOptions& options);
//! HELPERS
//!
inline sl::Result slDLSSGGetState(const sl::ViewportHandle& viewport, sl::DLSSGState& state, const sl::DLSSGOptions* options)
{
SL_FEATURE_FUN_IMPORT_STATIC(sl::kFeatureDLSS_G, slDLSSGGetState);
return s_slDLSSGGetState(viewport, state, options);
}
inline sl::Result slDLSSGSetOptions(const sl::ViewportHandle& viewport, const sl::DLSSGOptions& options)
{
SL_FEATURE_FUN_IMPORT_STATIC(sl::kFeatureDLSS_G, slDLSSGSetOptions);
return s_slDLSSGSetOptions(viewport, options);
}

View File

@ -0,0 +1,417 @@
/*
* Copyright (c) 2022-2023 NVIDIA CORPORATION. All rights reserved
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#pragma once
#include <string.h>
#include <vector>
#define FEATURE_SPECIFIC_BUFFER_TYPE_ID(feature, number) feature << 16 | number
#include "sl.h"
#include "sl_consts.h"
#include "sl_reflex.h"
#include "sl_pcl.h"
#include "sl_dlss.h"
#include "sl_nis.h"
#include "sl_dlss_g.h"
namespace sl
{
inline float4x4 transpose(const float4x4& m)
{
float4x4 r;
r[0] = { m[0].x, m[1].x, m[2].x, m[3].x };
r[1] = { m[0].y, m[1].y, m[2].y, m[3].y };
r[2] = { m[0].z, m[1].z, m[2].z, m[3].z };
r[3] = { m[0].w, m[1].w, m[2].w, m[3].w };
return r;
};
#define SL_CASE_STR(a) case a : return #a;
// Check for c++17 features
#if __cplusplus >= 201703L
#define SL_FALLTHROUGH [[fallthrough]];
#else
#define SL_FALLTHROUGH
#endif
inline const char* getResultAsStr(Result v)
{
switch (v)
{
SL_CASE_STR(Result::eOk);
SL_CASE_STR(Result::eErrorIO);
SL_CASE_STR(Result::eErrorDriverOutOfDate);
SL_CASE_STR(Result::eErrorOSOutOfDate);
SL_CASE_STR(Result::eErrorOSDisabledHWS);
SL_CASE_STR(Result::eErrorDeviceNotCreated);
SL_CASE_STR(Result::eErrorNoSupportedAdapterFound);
SL_CASE_STR(Result::eErrorAdapterNotSupported);
SL_CASE_STR(Result::eErrorNoPlugins);
SL_CASE_STR(Result::eErrorVulkanAPI);
SL_CASE_STR(Result::eErrorDXGIAPI);
SL_CASE_STR(Result::eErrorD3DAPI);
SL_CASE_STR(Result::eErrorNRDAPI);
SL_CASE_STR(Result::eErrorNVAPI);
SL_CASE_STR(Result::eErrorReflexAPI);
SL_CASE_STR(Result::eErrorNGXFailed);
SL_CASE_STR(Result::eErrorJSONParsing);
SL_CASE_STR(Result::eErrorMissingProxy);
SL_CASE_STR(Result::eErrorMissingResourceState);
SL_CASE_STR(Result::eErrorInvalidIntegration);
SL_CASE_STR(Result::eErrorMissingInputParameter);
SL_CASE_STR(Result::eErrorNotInitialized);
SL_CASE_STR(Result::eErrorComputeFailed);
SL_CASE_STR(Result::eErrorInitNotCalled);
SL_CASE_STR(Result::eErrorExceptionHandler);
SL_CASE_STR(Result::eErrorInvalidParameter);
SL_CASE_STR(Result::eErrorMissingConstants);
SL_CASE_STR(Result::eErrorDuplicatedConstants);
SL_CASE_STR(Result::eErrorMissingOrInvalidAPI);
SL_CASE_STR(Result::eErrorCommonConstantsMissing);
SL_CASE_STR(Result::eErrorUnsupportedInterface);
SL_CASE_STR(Result::eErrorFeatureMissing);
SL_CASE_STR(Result::eErrorFeatureNotSupported);
SL_CASE_STR(Result::eErrorFeatureMissingHooks);
SL_CASE_STR(Result::eErrorFeatureFailedToLoad);
SL_CASE_STR(Result::eErrorFeatureWrongPriority);
SL_CASE_STR(Result::eErrorFeatureMissingDependency);
SL_CASE_STR(Result::eErrorFeatureManagerInvalidState);
SL_CASE_STR(Result::eErrorInvalidState);
SL_CASE_STR(Result::eWarnOutOfVRAM);
};
return "Unknown";
}
inline const char* getNISModeAsStr(NISMode v)
{
switch (v)
{
SL_CASE_STR(NISMode::eOff);
SL_CASE_STR(NISMode::eScaler);
SL_CASE_STR(NISMode::eSharpen);
};
return "Unknown";
}
inline const char* getNISHDRAsStr(NISHDR v)
{
switch (v)
{
SL_CASE_STR(NISHDR::eNone);
SL_CASE_STR(NISHDR::eLinear);
SL_CASE_STR(NISHDR::ePQ);
};
return "Unknown";
}
inline const char* getReflexModeAsStr(ReflexMode mode)
{
switch (mode)
{
SL_CASE_STR(ReflexMode::eOff);
SL_CASE_STR(ReflexMode::eLowLatency);
SL_CASE_STR(ReflexMode::eLowLatencyWithBoost);
};
return "Unknown";
}
inline const char* getPCLMarkerAsStr(PCLMarker marker)
{
switch (marker)
{
SL_CASE_STR(PCLMarker::eSimulationStart);
SL_CASE_STR(PCLMarker::eSimulationEnd);
SL_CASE_STR(PCLMarker::eRenderSubmitStart);
SL_CASE_STR(PCLMarker::eRenderSubmitEnd);
SL_CASE_STR(PCLMarker::ePresentStart);
SL_CASE_STR(PCLMarker::ePresentEnd);
SL_CASE_STR(PCLMarker::eTriggerFlash);
SL_CASE_STR(PCLMarker::ePCLatencyPing);
SL_CASE_STR(PCLMarker::eOutOfBandRenderSubmitStart);
SL_CASE_STR(PCLMarker::eOutOfBandRenderSubmitEnd);
SL_CASE_STR(PCLMarker::eOutOfBandPresentStart);
SL_CASE_STR(PCLMarker::eOutOfBandPresentEnd);
};
return "Unknown";
}
inline const char* getDLSSModeAsStr(DLSSMode mode)
{
switch (mode)
{
SL_CASE_STR(DLSSMode::eOff);
SL_CASE_STR(DLSSMode::eDLAA);
SL_CASE_STR(DLSSMode::eMaxPerformance);
SL_CASE_STR(DLSSMode::eBalanced);
SL_CASE_STR(DLSSMode::eMaxQuality);
SL_CASE_STR(DLSSMode::eUltraPerformance);
SL_CASE_STR(DLSSMode::eUltraQuality);
};
return "Unknown";
}
inline const char* getDLSSGModeAsStr(DLSSGMode mode)
{
switch (mode)
{
SL_CASE_STR(sl::DLSSGMode::eOff);
SL_CASE_STR(sl::DLSSGMode::eOn);
SL_CASE_STR(sl::DLSSGMode::eAuto);
};
return "Unknown";
}
inline const char* getBufferTypeAsStr(BufferType buf)
{
switch (buf)
{
SL_CASE_STR(kBufferTypeDepth);
SL_CASE_STR(kBufferTypeMotionVectors);
SL_CASE_STR(kBufferTypeHUDLessColor);
SL_CASE_STR(kBufferTypeScalingInputColor);
SL_CASE_STR(kBufferTypeScalingOutputColor);
SL_CASE_STR(kBufferTypeNormals);
SL_CASE_STR(kBufferTypeRoughness);
SL_CASE_STR(kBufferTypeAlbedo);
SL_CASE_STR(kBufferTypeSpecularAlbedo);
SL_CASE_STR(kBufferTypeIndirectAlbedo);
SL_CASE_STR(kBufferTypeSpecularMotionVectors);
SL_CASE_STR(kBufferTypeDisocclusionMask);
SL_CASE_STR(kBufferTypeEmissive);
SL_CASE_STR(kBufferTypeExposure);
SL_CASE_STR(kBufferTypeNormalRoughness);
SL_CASE_STR(kBufferTypeDiffuseHitNoisy);
SL_CASE_STR(kBufferTypeDiffuseHitDenoised);
SL_CASE_STR(kBufferTypeSpecularHitNoisy);
SL_CASE_STR(kBufferTypeSpecularHitDenoised);
SL_CASE_STR(kBufferTypeShadowNoisy);
SL_CASE_STR(kBufferTypeShadowDenoised);
SL_CASE_STR(kBufferTypeAmbientOcclusionNoisy);
SL_CASE_STR(kBufferTypeAmbientOcclusionDenoised);
SL_CASE_STR(kBufferTypeUIColorAndAlpha);
SL_CASE_STR(kBufferTypeShadowHint);
SL_CASE_STR(kBufferTypeReflectionHint);
SL_CASE_STR(kBufferTypeParticleHint);
SL_CASE_STR(kBufferTypeTransparencyHint);
SL_CASE_STR(kBufferTypeAnimatedTextureHint);
SL_CASE_STR(kBufferTypeBiasCurrentColorHint);
SL_CASE_STR(kBufferTypeRaytracingDistance);
SL_CASE_STR(kBufferTypeReflectionMotionVectors);
SL_CASE_STR(kBufferTypePosition);
SL_CASE_STR(kBufferTypeInvalidDepthMotionHint);
SL_CASE_STR(kBufferTypeAlpha);
SL_CASE_STR(kBufferTypeOpaqueColor);
SL_CASE_STR(kBufferTypeReactiveMaskHint);
SL_CASE_STR(kBufferTypeTransparencyAndCompositionMaskHint);
SL_CASE_STR(kBufferTypeReflectedAlbedo);
SL_CASE_STR(kBufferTypeColorBeforeParticles);
SL_CASE_STR(kBufferTypeColorBeforeTransparency);
SL_CASE_STR(kBufferTypeColorBeforeFog);
SL_CASE_STR(kBufferTypeSpecularHitDistance);
SL_CASE_STR(kBufferTypeSpecularRayDirectionHitDistance);
SL_CASE_STR(kBufferTypeSpecularRayDirection);
SL_CASE_STR(kBufferTypeDiffuseHitDistance);
SL_CASE_STR(kBufferTypeDiffuseRayDirectionHitDistance);
SL_CASE_STR(kBufferTypeDiffuseRayDirection);
SL_CASE_STR(kBufferTypeHiResDepth);
SL_CASE_STR(kBufferTypeLinearDepth);
SL_CASE_STR(kBufferTypeColorAfterParticles);
SL_CASE_STR(kBufferTypeColorAfterTransparency);
SL_CASE_STR(kBufferTypeColorAfterFog);
SL_CASE_STR(kBufferTypeScreenSpaceSubsurfaceScatteringGuide);
SL_CASE_STR(kBufferTypeColorBeforeScreenSpaceSubsurfaceScattering);
SL_CASE_STR(kBufferTypeColorAfterScreenSpaceSubsurfaceScattering);
SL_CASE_STR(kBufferTypeScreenSpaceRefractionGuide);
SL_CASE_STR(kBufferTypeColorBeforeScreenSpaceRefraction);
SL_CASE_STR(kBufferTypeColorAfterScreenSpaceRefraction);
SL_CASE_STR(kBufferTypeDepthOfFieldGuide);
SL_CASE_STR(kBufferTypeColorBeforeDepthOfField);
SL_CASE_STR(kBufferTypeColorAfterDepthOfField);
SL_CASE_STR(kBufferTypeBidirectionalDistortionField);
SL_CASE_STR(kBufferTypeTransparencyLayer);
SL_CASE_STR(kBufferTypeTransparencyLayerOpacity);
SL_CASE_STR(kBufferTypeBackbuffer);
SL_CASE_STR(kBufferTypeNoWarpMask);
};
return "Unknown";
}
inline const char* getFeatureAsStr(Feature f)
{
switch (f)
{
SL_CASE_STR(kFeatureDLSS);
SL_CASE_STR(kFeatureNIS);
SL_CASE_STR(kFeatureReflex);
SL_CASE_STR(kFeaturePCL);
SL_CASE_STR(kFeatureDLSS_G);
SL_CASE_STR(kFeatureNvPerf);
SL_CASE_STR(kFeatureImGUI);
SL_CASE_STR(kFeatureCommon);
SL_CASE_STR(kFeatureDLSS_RR);
SL_CASE_STR(kFeatureDeepDVC);
SL_CASE_STR(kFeatureDirectSR);
SL_CASE_STR(kFeatureLatewarp);
// Removed features
case kFeatureNRD_INVALID: SL_FALLTHROUGH
default:
break;
}
return "Unknown";
}
// Get the feature file name as a string. For a given feature kFeatureDLSS with
// a plugin name sl.dlss.dll the value "dlss" will be returned
inline const char* getFeatureFilenameAsStrNoSL(Feature f)
{
switch (f)
{
case kFeatureDLSS: return "dlss";
case kFeatureNIS: return "nis";
case kFeatureReflex: return "reflex";
case kFeaturePCL: return "pcl";
case kFeatureDLSS_G: return "dlss_g";
case kFeatureNvPerf: return "nvperf";
case kFeatureDeepDVC: return "deepdvc";
case kFeatureImGUI: return "imgui";
case kFeatureCommon: return "common";
case kFeatureDLSS_RR: return "dlss_d";
case kFeatureDirectSR: return "directsr";
case kFeatureLatewarp: return "latewarp";
case kFeatureNRD_INVALID: SL_FALLTHROUGH
default: return "Unknown";
}
}
inline const char* getLogLevelAsStr(LogLevel v)
{
switch (v)
{
SL_CASE_STR(LogLevel::eOff);
SL_CASE_STR(LogLevel::eDefault);
SL_CASE_STR(LogLevel::eVerbose);
};
return "Unknown";
}
inline const char* getResourceTypeAsStr(ResourceType v)
{
switch (v)
{
SL_CASE_STR(ResourceType::eTex2d);
SL_CASE_STR(ResourceType::eBuffer);
SL_CASE_STR(ResourceType::eCommandQueue);
SL_CASE_STR(ResourceType::eCommandBuffer);
SL_CASE_STR(ResourceType::eCommandPool);
SL_CASE_STR(ResourceType::eFence);
SL_CASE_STR(ResourceType::eSwapchain);
SL_CASE_STR(ResourceType::eHostFence);
};
return "Unknown";
}
inline const char* getResourceLifecycleAsStr(ResourceLifecycle v)
{
switch (v)
{
SL_CASE_STR(ResourceLifecycle::eOnlyValidNow);
SL_CASE_STR(ResourceLifecycle::eValidUntilPresent);
SL_CASE_STR(ResourceLifecycle::eValidUntilEvaluate);
};
return "Unknown";
}
// Advanced/internal functions that are not useful or necessary in the vast majority of integrations
// and would just pollute the namespace and/or cause distractions.
// But, may be useful in e.g. intermediary game engine integrations, etc.
#ifndef __INTELLISENSE__
//! Find a struct of type T
template<typename T>
T* findStruct(const void* ptr)
{
auto base = static_cast<const BaseStructure*>(ptr);
while (base && base->structType != T::s_structType)
{
base = base->next;
}
return (T*)base;
}
//! Find a struct of type T, but stop the search if we find a struct of type S
template<typename T, typename S>
T* findStruct(const void* ptr)
{
auto base = static_cast<const BaseStructure*>(ptr);
while (base && base->structType != T::s_structType)
{
base = base->next;
// If we find a struct of type S, we know should stop the search
if (base->structType == S::s_structType)
{
return nullptr;
}
}
return (T*)base;
}
template<typename T>
T* findStruct(const void** ptr, uint32_t count)
{
const BaseStructure* base{};
for (uint32_t i = 0; base == nullptr && i < count; i++)
{
base = static_cast<const BaseStructure*>(ptr[i]);
while (base && base->structType != T::s_structType)
{
base = base->next;
}
}
return (T*)base;
}
template<typename T>
bool findStructs(const void** ptr, uint32_t count, std::vector<T*>& structs)
{
for (uint32_t i = 0; i < count; i++)
{
auto base = static_cast<const BaseStructure*>(ptr[i]);
while (base)
{
if (base->structType == T::s_structType)
{
structs.push_back((T*)base);
}
base = base->next;
}
}
return structs.size() > 0;
}
#endif // __INTELLISENSE__
} // namespace sl

View File

@ -0,0 +1,256 @@
/*
* Copyright (c) 2022-2023 NVIDIA CORPORATION. All rights reserved
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#pragma once
#include "sl.h"
#include <string.h>
namespace sl
{
#define SL_VK_FEATURE(n) if(strcmp(featureNames[i], #n) == 0) features.n = VK_TRUE;
inline VkPhysicalDeviceVulkan12Features getVkPhysicalDeviceVulkan12Features(uint32_t featureCount, const char** featureNames)
{
VkPhysicalDeviceVulkan12Features features{ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES };
for (uint32_t i = 0; i < featureCount; i++)
{
SL_VK_FEATURE(samplerMirrorClampToEdge);
SL_VK_FEATURE(drawIndirectCount);
SL_VK_FEATURE(storageBuffer8BitAccess);
SL_VK_FEATURE(uniformAndStorageBuffer8BitAccess);
SL_VK_FEATURE(storagePushConstant8);
SL_VK_FEATURE(shaderBufferInt64Atomics);
SL_VK_FEATURE(shaderSharedInt64Atomics);
SL_VK_FEATURE(shaderFloat16);
SL_VK_FEATURE(shaderInt8);
SL_VK_FEATURE(descriptorIndexing);
SL_VK_FEATURE(shaderInputAttachmentArrayDynamicIndexing);
SL_VK_FEATURE(shaderUniformTexelBufferArrayDynamicIndexing);
SL_VK_FEATURE(shaderStorageTexelBufferArrayDynamicIndexing);
SL_VK_FEATURE(shaderUniformBufferArrayNonUniformIndexing);
SL_VK_FEATURE(shaderSampledImageArrayNonUniformIndexing);
SL_VK_FEATURE(shaderStorageBufferArrayNonUniformIndexing);
SL_VK_FEATURE(shaderStorageImageArrayNonUniformIndexing);
SL_VK_FEATURE(shaderInputAttachmentArrayNonUniformIndexing);
SL_VK_FEATURE(shaderUniformTexelBufferArrayNonUniformIndexing);
SL_VK_FEATURE(shaderStorageTexelBufferArrayNonUniformIndexing);
SL_VK_FEATURE(descriptorBindingUniformBufferUpdateAfterBind);
SL_VK_FEATURE(descriptorBindingSampledImageUpdateAfterBind);
SL_VK_FEATURE(descriptorBindingStorageImageUpdateAfterBind);
SL_VK_FEATURE(descriptorBindingStorageBufferUpdateAfterBind);
SL_VK_FEATURE(descriptorBindingUniformTexelBufferUpdateAfterBind);
SL_VK_FEATURE(descriptorBindingStorageTexelBufferUpdateAfterBind);
SL_VK_FEATURE(descriptorBindingUpdateUnusedWhilePending);
SL_VK_FEATURE(descriptorBindingPartiallyBound);
SL_VK_FEATURE(descriptorBindingVariableDescriptorCount);
SL_VK_FEATURE(runtimeDescriptorArray);
SL_VK_FEATURE(samplerFilterMinmax);
SL_VK_FEATURE(scalarBlockLayout);
SL_VK_FEATURE(imagelessFramebuffer);
SL_VK_FEATURE(uniformBufferStandardLayout);
SL_VK_FEATURE(shaderSubgroupExtendedTypes);
SL_VK_FEATURE(separateDepthStencilLayouts);
SL_VK_FEATURE(hostQueryReset);
SL_VK_FEATURE(timelineSemaphore);
SL_VK_FEATURE(bufferDeviceAddress);
SL_VK_FEATURE(bufferDeviceAddressCaptureReplay);
SL_VK_FEATURE(bufferDeviceAddressMultiDevice);
SL_VK_FEATURE(vulkanMemoryModel);
SL_VK_FEATURE(vulkanMemoryModelDeviceScope);
SL_VK_FEATURE(vulkanMemoryModelAvailabilityVisibilityChains);
SL_VK_FEATURE(shaderOutputViewportIndex);
SL_VK_FEATURE(shaderOutputLayer);
SL_VK_FEATURE(subgroupBroadcastDynamicId);
}
return features;
}
inline VkPhysicalDeviceVulkan13Features getVkPhysicalDeviceVulkan13Features(uint32_t featureCount, const char** featureNames)
{
VkPhysicalDeviceVulkan13Features features{ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES };
for (uint32_t i = 0; i < featureCount; i++)
{
SL_VK_FEATURE(robustImageAccess);
SL_VK_FEATURE(robustImageAccess);
SL_VK_FEATURE(inlineUniformBlock);
SL_VK_FEATURE(descriptorBindingInlineUniformBlockUpdateAfterBind);
SL_VK_FEATURE(pipelineCreationCacheControl);
SL_VK_FEATURE(privateData);
SL_VK_FEATURE(shaderDemoteToHelperInvocation);
SL_VK_FEATURE(shaderTerminateInvocation);
SL_VK_FEATURE(subgroupSizeControl);
SL_VK_FEATURE(computeFullSubgroups);
SL_VK_FEATURE(synchronization2);
SL_VK_FEATURE(textureCompressionASTC_HDR);
SL_VK_FEATURE(shaderZeroInitializeWorkgroupMemory);
SL_VK_FEATURE(dynamicRendering);
SL_VK_FEATURE(shaderIntegerDotProduct);
SL_VK_FEATURE(maintenance4);
}
return features;
}
inline VkPhysicalDeviceOpticalFlowFeaturesNV getVkPhysicalDeviceOpticalFlowNVFeatures(uint32_t featureCount, const char** featureNames)
{
VkPhysicalDeviceOpticalFlowFeaturesNV features{ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_OPTICAL_FLOW_FEATURES_NV };
for (uint32_t i = 0; i < featureCount; i++)
{
SL_VK_FEATURE(opticalFlow);
}
return features;
}
#define SL_VK_FEATURE_SUPPORT(T, feature, n) ((T*)pPhysicalDeviceFeatures)->n = ((feature) && (((T*)pSupportedFeatures)->n))
#define SL_VK_FEATURE_MERGE_SUPPORT(T, n) (pFeaturesToMerge == NULL) ? \
SL_VK_FEATURE_SUPPORT(T, ((T*)pPhysicalDeviceFeatures)->n, n) : SL_VK_FEATURE_SUPPORT(T, ((((T*)pPhysicalDeviceFeatures)->n) || (((T*)pFeaturesToMerge)->n)), n)
inline void getMergedSupportedVkPhysicalDeviceVulkanFeatures(VkBaseOutStructure* pPhysicalDeviceFeatures, const VkBaseOutStructure* pFeaturesToMerge, const VkBaseOutStructure* pSupportedFeatures)
{
if (pPhysicalDeviceFeatures == NULL || pSupportedFeatures == NULL)
{
return;
}
if (pFeaturesToMerge != NULL)
{
assert(pFeaturesToMerge->sType == pPhysicalDeviceFeatures->sType);
}
assert(pSupportedFeatures->sType == pPhysicalDeviceFeatures->sType);
switch (pPhysicalDeviceFeatures->sType)
{
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES:
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, samplerMirrorClampToEdge);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, samplerMirrorClampToEdge);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, drawIndirectCount);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, storageBuffer8BitAccess);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, uniformAndStorageBuffer8BitAccess);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, storagePushConstant8);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, shaderBufferInt64Atomics);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, shaderSharedInt64Atomics);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, shaderFloat16);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, shaderInt8);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, descriptorIndexing);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, shaderInputAttachmentArrayDynamicIndexing);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, shaderUniformTexelBufferArrayDynamicIndexing);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, shaderStorageTexelBufferArrayDynamicIndexing);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, shaderUniformBufferArrayNonUniformIndexing);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, shaderSampledImageArrayNonUniformIndexing);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, shaderStorageBufferArrayNonUniformIndexing);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, shaderStorageImageArrayNonUniformIndexing);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, shaderInputAttachmentArrayNonUniformIndexing);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, shaderUniformTexelBufferArrayNonUniformIndexing);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, shaderStorageTexelBufferArrayNonUniformIndexing);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, descriptorBindingUniformBufferUpdateAfterBind);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, descriptorBindingSampledImageUpdateAfterBind);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, descriptorBindingStorageImageUpdateAfterBind);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, descriptorBindingStorageBufferUpdateAfterBind);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, descriptorBindingUniformTexelBufferUpdateAfterBind);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, descriptorBindingStorageTexelBufferUpdateAfterBind);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, descriptorBindingUpdateUnusedWhilePending);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, descriptorBindingPartiallyBound);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, descriptorBindingVariableDescriptorCount);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, runtimeDescriptorArray);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, samplerFilterMinmax);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, scalarBlockLayout);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, imagelessFramebuffer);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, uniformBufferStandardLayout);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, shaderSubgroupExtendedTypes);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, separateDepthStencilLayouts);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, hostQueryReset);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, timelineSemaphore);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, bufferDeviceAddress);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, bufferDeviceAddressCaptureReplay);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, bufferDeviceAddressMultiDevice);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, vulkanMemoryModel);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, vulkanMemoryModelDeviceScope);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, vulkanMemoryModelAvailabilityVisibilityChains);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, shaderOutputViewportIndex);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, shaderOutputLayer);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan12Features, subgroupBroadcastDynamicId);
break;
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES:
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan13Features, robustImageAccess);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan13Features, robustImageAccess);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan13Features, inlineUniformBlock);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan13Features, descriptorBindingInlineUniformBlockUpdateAfterBind);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan13Features, pipelineCreationCacheControl);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan13Features, privateData);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan13Features, shaderDemoteToHelperInvocation);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan13Features, shaderTerminateInvocation);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan13Features, subgroupSizeControl);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan13Features, computeFullSubgroups);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan13Features, synchronization2);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan13Features, textureCompressionASTC_HDR);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan13Features, shaderZeroInitializeWorkgroupMemory);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan13Features, dynamicRendering);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan13Features, shaderIntegerDotProduct);
SL_VK_FEATURE_MERGE_SUPPORT(VkPhysicalDeviceVulkan13Features, maintenance4);
break;
default:
break;
}
}
//! Interface to provide to slSetVulkanInfo when manually hooking Vulkan API and NOT
//! leveraging vkCreateDevice and vkCreateInstance proxies provided by SL.
//!
//! {0EED6FD5-82CD-43A9-BDB5-47A5BA2F45D6}
SL_STRUCT_BEGIN(VulkanInfo, StructType({ 0xeed6fd5, 0x82cd, 0x43a9, { 0xbd, 0xb5, 0x47, 0xa5, 0xba, 0x2f, 0x45, 0xd6 } }), kStructVersion3)
VkDevice device {};
VkInstance instance{};
VkPhysicalDevice physicalDevice{};
//! IMPORTANT:
//!
//! SL features can request additional graphics or compute queues.
//! The below values provide information about the queue families and
//! starting index at which SL queues are created.
uint32_t computeQueueIndex{};
uint32_t computeQueueFamily{};
uint32_t graphicsQueueIndex{};
uint32_t graphicsQueueFamily{};
uint32_t opticalFlowQueueIndex{};
uint32_t opticalFlowQueueFamily{};
bool useNativeOpticalFlowMode = false;
uint32_t computeQueueCreateFlags{};
uint32_t graphicsQueueCreateFlags{};
uint32_t opticalFlowQueueCreateFlags{};
SL_STRUCT_END()
}
using PFun_slSetVulkanInfo = sl::Result(const sl::VulkanInfo& info);
//! Specify Vulkan specific information
//!
//! Use this method to provide Vulkan device, instance information to SL.
//!
//! IMPORTANT: Only call this API if NOT using vkCreateDevice and vkCreateInstance proxies provided by SL.
//
//! @param info Reference to the structure providing the information
//!
//! This method is NOT thread safe and should be called IMMEDIATELY after base interface is created.
SL_API sl::Result slSetVulkanInfo(const sl::VulkanInfo& info);

View File

@ -0,0 +1,113 @@
/*
* Copyright (c) 2022-2023 NVIDIA CORPORATION. All rights reserved
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#pragma once
#include "sl.h"
struct VkPhysicalDevice_T;
struct VkDevice_T;
struct VkInstance_T;
using VkPhysicalDevice = VkPhysicalDevice_T*;
using VkDevice = VkDevice_T*;
using VkInstance = VkInstance_T*;
namespace sl
{
//! NOTE: Adding new hooks require sl.interposer to be recompiled
//!
//! IMPORTANT: Since SL interposer proxies supports many different versions of various D3D/DXGI interfaces
//! we use only base interface names for our hooks.
//!
//! For example if API was added in IDXGISwapChain5::FUNCTION it is still named eIDXGISwapChain_FUNCTION (there is no 5 in the name)
//!
enum class FunctionHookID : uint32_t
{
//! Mandatory - IDXGIFactory*
eIDXGIFactory_CreateSwapChain,
eIDXGIFactory_CreateSwapChainForHwnd,
eIDXGIFactory_CreateSwapChainForCoreWindow,
//! Mandatory - IDXGISwapChain*
eIDXGISwapChain_Present,
eIDXGISwapChain_Present1,
eIDXGISwapChain_GetBuffer,
eIDXGISwapChain_GetDesc,
eIDXGISwapChain_ResizeBuffers,
eIDXGISwapChain_ResizeBuffers1,
eIDXGISwapChain_GetCurrentBackBufferIndex,
eIDXGISwapChain_SetFullscreenState,
//! Internal - please ignore when doing manual hooking
eIDXGISwapChain_Destroyed,
//! Mandatory - ID3D12Device*
eID3D12Device_CreateCommandQueue,
//! Mandatory - Vulkan
eVulkan_Present,
eVulkan_CreateSwapchainKHR,
eVulkan_DestroySwapchainKHR,
eVulkan_GetSwapchainImagesKHR,
eVulkan_AcquireNextImageKHR,
eVulkan_DeviceWaitIdle,
eVulkan_CreateWin32SurfaceKHR,
eVulkan_DestroySurfaceKHR,
eMaxNum
};
#ifndef SL_CASE_STR
#define SL_CASE_STR(a) case a : return #a;
#endif
inline const char* getFunctionHookIDAsStr(FunctionHookID v)
{
switch (v)
{
SL_CASE_STR(FunctionHookID::eIDXGIFactory_CreateSwapChain);
SL_CASE_STR(FunctionHookID::eIDXGIFactory_CreateSwapChainForHwnd);
SL_CASE_STR(FunctionHookID::eIDXGIFactory_CreateSwapChainForCoreWindow);
SL_CASE_STR(FunctionHookID::eIDXGISwapChain_Present);
SL_CASE_STR(FunctionHookID::eIDXGISwapChain_Present1);
SL_CASE_STR(FunctionHookID::eIDXGISwapChain_GetBuffer);
SL_CASE_STR(FunctionHookID::eIDXGISwapChain_ResizeBuffers);
SL_CASE_STR(FunctionHookID::eIDXGISwapChain_ResizeBuffers1);
SL_CASE_STR(FunctionHookID::eIDXGISwapChain_GetCurrentBackBufferIndex);
SL_CASE_STR(FunctionHookID::eIDXGISwapChain_SetFullscreenState);
SL_CASE_STR(FunctionHookID::eIDXGISwapChain_Destroyed);
SL_CASE_STR(FunctionHookID::eID3D12Device_CreateCommandQueue);
SL_CASE_STR(FunctionHookID::eVulkan_Present);
SL_CASE_STR(FunctionHookID::eVulkan_CreateSwapchainKHR);
SL_CASE_STR(FunctionHookID::eVulkan_DestroySwapchainKHR);
SL_CASE_STR(FunctionHookID::eVulkan_GetSwapchainImagesKHR);
SL_CASE_STR(FunctionHookID::eVulkan_AcquireNextImageKHR);
SL_CASE_STR(FunctionHookID::eVulkan_DeviceWaitIdle);
SL_CASE_STR(FunctionHookID::eVulkan_CreateWin32SurfaceKHR);
SL_CASE_STR(FunctionHookID::eVulkan_DestroySurfaceKHR);
};
return "Unknown";
}
} // namespace sl

View File

@ -0,0 +1,54 @@
/*
* Copyright (c) 2022-2024 NVIDIA CORPORATION. All rights reserved
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#pragma once
#include "sl_core_types.h"
namespace sl
{
// {f9147248-3ebc-4c44-9be9-8dee5e6c05f1}
SL_STRUCT_BEGIN(LatewarpOptions, StructType({ 0xf9147248, 0x3ebc, 0x4c44, { 0x9b, 0xe9, 0x8d, 0xee, 0x5e, 0x6c, 0x05, 0xf1 } }), kStructVersion2)
unsigned int reserved0 = 0u;
bool latewarpActive = false;
//! Optional - if specified Latewarp will return any errors which occur when calling underlying API (DXGI or Vulkan or NGX)
PFunOnAPIErrorCallback* onErrorCallback{};
//! IMPORTANT: New members go here or if optional can be chained in a new struct, see sl_struct.h for details
SL_STRUCT_END()
}
//! Sets Latewarp options
//!
//! Call this method to turn Latewarp on/off, change mode etc.
//!
//! @param options Specifies options to use
//! @return sl::ResultCode::eOk if successful, error code otherwise (see sl_result.h for details)
//!
//! This method is NOT thread safe.
using PFun_slLatewarpSetOptions = sl::Result(const sl::ViewportHandle& viewport, const sl::LatewarpOptions& options);
inline sl::Result slLatewarpSetOptions(const sl::ViewportHandle& viewport, const sl::LatewarpOptions& options)
{
SL_FEATURE_FUN_IMPORT_STATIC(sl::kFeatureLatewarp, slLatewarpSetOptions);
return s_slLatewarpSetOptions(viewport, options);
}

View File

@ -0,0 +1,221 @@
/*
* Copyright (c) 2022-2023 NVIDIA CORPORATION. All rights reserved
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#pragma once
#include "sl.h"
#include "sl_consts.h"
namespace sl
{
inline void matrixMul(float4x4& result, const float4x4& a, const float4x4& b)
{
// Alias raw pointers over the input matrices
const float* pA = &a[0].x;
const float* pB = &b[0].x;
result[0].x = (float)((pA[0] * pB[0]) + (pA[1] * pB[4]) + (pA[2] * pB[8]) + (pA[3] * pB[12]));
result[0].y = (float)((pA[0] * pB[1]) + (pA[1] * pB[5]) + (pA[2] * pB[9]) + (pA[3] * pB[13]));
result[0].z = (float)((pA[0] * pB[2]) + (pA[1] * pB[6]) + (pA[2] * pB[10]) + (pA[3] * pB[14]));
result[0].w = (float)((pA[0] * pB[3]) + (pA[1] * pB[7]) + (pA[2] * pB[11]) + (pA[3] * pB[15]));
result[1].x = (float)((pA[4] * pB[0]) + (pA[5] * pB[4]) + (pA[6] * pB[8]) + (pA[7] * pB[12]));
result[1].y = (float)((pA[4] * pB[1]) + (pA[5] * pB[5]) + (pA[6] * pB[9]) + (pA[7] * pB[13]));
result[1].z = (float)((pA[4] * pB[2]) + (pA[5] * pB[6]) + (pA[6] * pB[10]) + (pA[7] * pB[14]));
result[1].w = (float)((pA[4] * pB[3]) + (pA[5] * pB[7]) + (pA[6] * pB[11]) + (pA[7] * pB[15]));
result[2].x = (float)((pA[8] * pB[0]) + (pA[9] * pB[4]) + (pA[10] * pB[8]) + (pA[11] * pB[12]));
result[2].y = (float)((pA[8] * pB[1]) + (pA[9] * pB[5]) + (pA[10] * pB[9]) + (pA[11] * pB[13]));
result[2].z = (float)((pA[8] * pB[2]) + (pA[9] * pB[6]) + (pA[10] * pB[10]) + (pA[11] * pB[14]));
result[2].w = (float)((pA[8] * pB[3]) + (pA[9] * pB[7]) + (pA[10] * pB[11]) + (pA[11] * pB[15]));
result[3].x = (float)((pA[12] * pB[0]) + (pA[13] * pB[4]) + (pA[14] * pB[8]) + (pA[15] * pB[12]));
result[3].y = (float)((pA[12] * pB[1]) + (pA[13] * pB[5]) + (pA[14] * pB[9]) + (pA[15] * pB[13]));
result[3].z = (float)((pA[12] * pB[2]) + (pA[13] * pB[6]) + (pA[14] * pB[10]) + (pA[15] * pB[14]));
result[3].w = (float)((pA[12] * pB[3]) + (pA[13] * pB[7]) + (pA[14] * pB[11]) + (pA[15] * pB[15]));
}
inline void matrixFullInvert(float4x4& result, const float4x4& mat)
{
// Matrix inversion code from https://stackoverflow.com/questions/1148309/inverting-a-4x4-matrix
// Alias raw pointers over the input matrix and the result
const float* pMat = &mat[0].x;
float* pResult = &result[0].x;
pResult[0] = pMat[5] * pMat[10] * pMat[15] - pMat[5] * pMat[11] * pMat[14] - pMat[9] * pMat[6] * pMat[15] + pMat[9] * pMat[7] * pMat[14] + pMat[13] * pMat[6] * pMat[11] - pMat[13] * pMat[7] * pMat[10];
pResult[4] = -pMat[4] * pMat[10] * pMat[15] + pMat[4] * pMat[11] * pMat[14] + pMat[8] * pMat[6] * pMat[15] - pMat[8] * pMat[7] * pMat[14] - pMat[12] * pMat[6] * pMat[11] + pMat[12] * pMat[7] * pMat[10];
pResult[8] = pMat[4] * pMat[9] * pMat[15] - pMat[4] * pMat[11] * pMat[13] - pMat[8] * pMat[5] * pMat[15] + pMat[8] * pMat[7] * pMat[13] + pMat[12] * pMat[5] * pMat[11] - pMat[12] * pMat[7] * pMat[9];
pResult[12] = -pMat[4] * pMat[9] * pMat[14] + pMat[4] * pMat[10] * pMat[13] + pMat[8] * pMat[5] * pMat[14] - pMat[8] * pMat[6] * pMat[13] - pMat[12] * pMat[5] * pMat[10] + pMat[12] * pMat[6] * pMat[9];
pResult[1] = -pMat[1] * pMat[10] * pMat[15] + pMat[1] * pMat[11] * pMat[14] + pMat[9] * pMat[2] * pMat[15] - pMat[9] * pMat[3] * pMat[14] - pMat[13] * pMat[2] * pMat[11] + pMat[13] * pMat[3] * pMat[10];
pResult[5] = pMat[0] * pMat[10] * pMat[15] - pMat[0] * pMat[11] * pMat[14] - pMat[8] * pMat[2] * pMat[15] + pMat[8] * pMat[3] * pMat[14] + pMat[12] * pMat[2] * pMat[11] - pMat[12] * pMat[3] * pMat[10];
pResult[9] = -pMat[0] * pMat[9] * pMat[15] + pMat[0] * pMat[11] * pMat[13] + pMat[8] * pMat[1] * pMat[15] - pMat[8] * pMat[3] * pMat[13] - pMat[12] * pMat[1] * pMat[11] + pMat[12] * pMat[3] * pMat[9];
pResult[13] = pMat[0] * pMat[9] * pMat[14] - pMat[0] * pMat[10] * pMat[13] - pMat[8] * pMat[1] * pMat[14] + pMat[8] * pMat[2] * pMat[13] + pMat[12] * pMat[1] * pMat[10] - pMat[12] * pMat[2] * pMat[9];
pResult[2] = pMat[1] * pMat[6] * pMat[15] - pMat[1] * pMat[7] * pMat[14] - pMat[5] * pMat[2] * pMat[15] + pMat[5] * pMat[3] * pMat[14] + pMat[13] * pMat[2] * pMat[7] - pMat[13] * pMat[3] * pMat[6];
pResult[6] = -pMat[0] * pMat[6] * pMat[15] + pMat[0] * pMat[7] * pMat[14] + pMat[4] * pMat[2] * pMat[15] - pMat[4] * pMat[3] * pMat[14] - pMat[12] * pMat[2] * pMat[7] + pMat[12] * pMat[3] * pMat[6];
pResult[10] = pMat[0] * pMat[5] * pMat[15] - pMat[0] * pMat[7] * pMat[13] - pMat[4] * pMat[1] * pMat[15] + pMat[4] * pMat[3] * pMat[13] + pMat[12] * pMat[1] * pMat[7] - pMat[12] * pMat[3] * pMat[5];
pResult[14] = -pMat[0] * pMat[5] * pMat[14] + pMat[0] * pMat[6] * pMat[13] + pMat[4] * pMat[1] * pMat[14] - pMat[4] * pMat[2] * pMat[13] - pMat[12] * pMat[1] * pMat[6] + pMat[12] * pMat[2] * pMat[5];
pResult[3] = -pMat[1] * pMat[6] * pMat[11] + pMat[1] * pMat[7] * pMat[10] + pMat[5] * pMat[2] * pMat[11] - pMat[5] * pMat[3] * pMat[10] - pMat[9] * pMat[2] * pMat[7] + pMat[9] * pMat[3] * pMat[6];
pResult[7] = pMat[0] * pMat[6] * pMat[11] - pMat[0] * pMat[7] * pMat[10] - pMat[4] * pMat[2] * pMat[11] + pMat[4] * pMat[3] * pMat[10] + pMat[8] * pMat[2] * pMat[7] - pMat[8] * pMat[3] * pMat[6];
pResult[11] = -pMat[0] * pMat[5] * pMat[11] + pMat[0] * pMat[7] * pMat[9] + pMat[4] * pMat[1] * pMat[11] - pMat[4] * pMat[3] * pMat[9] - pMat[8] * pMat[1] * pMat[7] + pMat[8] * pMat[3] * pMat[5];
pResult[15] = pMat[0] * pMat[5] * pMat[10] - pMat[0] * pMat[6] * pMat[9] - pMat[4] * pMat[1] * pMat[10] + pMat[4] * pMat[2] * pMat[9] + pMat[8] * pMat[1] * pMat[6] - pMat[8] * pMat[2] * pMat[5];
float det = pMat[0] * pResult[0] + pMat[1] * pResult[4] + pMat[2] * pResult[8] + pMat[3] * pResult[12];
if (det != 0.f)
{
det = 1.0f / det;
for (int i = 0; i < 16; ++i)
{
pResult[i] *= det;
}
}
}
// Specialised lightweight matrix invert when the matrix is known to be orthonormal
inline void matrixOrthoNormalInvert(float4x4& result, const float4x4& mat)
{
// Transpose the first 3x3
result[0].x = mat[0].x;
result[0].y = mat[1].x;
result[0].z = mat[2].x;
result[1].x = mat[0].y;
result[1].y = mat[1].y;
result[1].z = mat[2].y;
result[2].x = mat[0].z;
result[2].y = mat[1].z;
result[2].z = mat[2].z;
// Invert the translation
result[3].x = -((mat[3].x * mat[0].x) + (mat[3].y * mat[0].y) + (mat[3].z * mat[0].z));
result[3].y = -((mat[3].x * mat[1].x) + (mat[3].y * mat[1].y) + (mat[3].z * mat[1].z));
result[3].z = -((mat[3].x * mat[2].x) + (mat[3].y * mat[2].y) + (mat[3].z * mat[2].z));
// Fill in the remaining constants
result[0].w = 0.0f;
result[1].w = 0.0f;
result[2].w = 0.0f;
result[3].w = 1.0f;
}
inline void vectorNormalize(float3& v)
{
float k = 1.f / sqrtf((v.x * v.x) + (v.y * v.y) + (v.z * v.z));
v.x *= k;
v.y *= k;
v.z *= k;
}
inline void vectorCrossProduct(float3& result, const float3& a, const float3& b)
{
result.x = a.y * b.z - a.z * b.y;
result.y = a.z * b.x - a.x * b.z;
result.z = a.x * b.y - a.y * b.x;
}
// Calculate a cameraToPrevCamera matrix from cameraToWorld and cameraToWorldPrev matrices
// but do so in such a way as to avoid precision issues.
//
// Traditionally, you might go something like this...
//
// worldToCameraPrev = invert(cameraToWorldPrev)
// cameraToPrevCamera = cameraToWorld * worldToCameraPrev
//
// But if you do that, you will subject yourself to fp32 precision issues if the camera is
// any kind of reasonable distance from the origin, because you'll end up adding small
// numbers to large numbers due to the large translations.
//
// But the camera's absolute position in the world doesn't matter at all to the result.
// What we're interested in is the camera's motion.
// So if we add the same thing to the translations of cameraToWorld and cameraToWorldPrev
// then we should get the same result.
// If we choose to subtract the current camera's translation in world space, then we will
// change a potentially very large translation value into a very small one - thereby
// sidestepping the precision issues.
inline void calcCameraToPrevCamera(float4x4& outCameraToPrevCamera, const float4x4& cameraToWorld, const float4x4& cameraToWorldPrev)
{
// Create translated versions of cameraToWorld and cameraToWorldPrev, translated to
// so that the current camera is effectively at the world origin.
// CC == 'Camera-Centred'
float4x4 cameraToCcWorld = cameraToWorld;
cameraToCcWorld[3] = float4(0, 0, 0, 1);
float4x4 cameraToCcWorldPrev = cameraToWorldPrev;
cameraToCcWorldPrev[3].x -= cameraToWorld[3].x;
cameraToCcWorldPrev[3].y -= cameraToWorld[3].y;
cameraToCcWorldPrev[3].z -= cameraToWorld[3].z;
// We can use an optimised invert if we assume that the camera matrix is orthonormal
float4x4 ccWorldToCameraPrev;
matrixOrthoNormalInvert(ccWorldToCameraPrev, cameraToCcWorldPrev);
matrixMul(outCameraToPrevCamera, cameraToCcWorld, ccWorldToCameraPrev);
}
// Calculate some of the matrix fields in Constants
// This can be used to validate what the app is providing, or tease out precision issues
// The matrices that are recalculated are...
// - clipToCameraView
// - clipToPrevClip
// - prevClipToClip
inline void recalculateCameraMatrices(Constants& values)
{
// Form a camera-to-world matrix from the camera fields
vectorNormalize(values.cameraRight);
vectorNormalize(values.cameraFwd);
vectorCrossProduct(values.cameraUp, values.cameraFwd, values.cameraRight);
vectorNormalize(values.cameraUp);
float4x4 cameraViewToWorld = {
float4(values.cameraRight.x, values.cameraRight.y, values.cameraRight.z, 0.f),
float4(values.cameraUp.x, values.cameraUp.y, values.cameraUp.z, 0.f),
float4(values.cameraFwd.x, values.cameraFwd.y, values.cameraFwd.z, 0.f),
float4(values.cameraPos.x, values.cameraPos.y, values.cameraPos.z, 1.f)
};
// ********* DO NOT USE THIS IN ANYTHING PROPER *********
// Crap storage of cameraViewToWorldPrev and cameraViewToClipPrev
// These should be provided by the app, or stored by association with the view index.
static float4x4 cameraViewToWorldPrev = {
float4(1, 0, 0, 0),
float4(0, 1, 0, 0),
float4(0, 0, 1, 0),
float4(0, 0, 0, 1),
};
static float4x4 cameraViewToClipPrev = {
float4(1, 0, 0, 0),
float4(0, 1, 0, 0),
float4(0, 0, 1, 0),
float4(0, 0, 0, 1),
};
matrixFullInvert(values.clipToCameraView, values.cameraViewToClip);
float4x4 cameraViewToPrevCameraView;
calcCameraToPrevCamera(cameraViewToPrevCameraView, cameraViewToWorld, cameraViewToWorldPrev);
float4x4 clipToPrevCameraView;
matrixMul(clipToPrevCameraView, values.clipToCameraView, cameraViewToPrevCameraView);
matrixMul(values.clipToPrevClip, clipToPrevCameraView, cameraViewToClipPrev);
matrixFullInvert(values.prevClipToClip, values.clipToPrevClip);
// ********* DO NOT USE THIS IN ANYTHING PROPER *********
cameraViewToWorldPrev = cameraViewToWorld;
cameraViewToClipPrev = values.cameraViewToClip;
}
}

View File

@ -0,0 +1,102 @@
/*
* Copyright (c) 2022-2023 NVIDIA CORPORATION. All rights reserved
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#pragma once
namespace sl
{
enum class NISMode : uint32_t
{
eOff,
eScaler,
eSharpen,
eCount
};
enum class NISHDR : uint32_t
{
eNone,
eLinear,
ePQ,
eCount
};
// {676610E5-9674-4D3A-9C8A-F495D01B36F3}
SL_STRUCT_BEGIN(NISOptions, StructType({ 0x676610e5, 0x9674, 0x4d3a, { 0x9c, 0x8a, 0xf4, 0x95, 0xd0, 0x1b, 0x36, 0xf3 } }), kStructVersion1)
//! Specifies which mode should be used
NISMode mode = NISMode::eScaler;
//! Specifies which hdr mode should be used
NISHDR hdrMode = NISHDR::eNone;
//! Specifies sharpening level in range [0,1]
float sharpness = 0.0f;
//! IMPORTANT: New members go here or if optional can be chained in a new struct, see sl_struct.h for details
SL_STRUCT_END()
//! Returned by the NIS plugin
//!
// {71AB4FD0-D959-4C2A-AF69-ED4850BD4E3D}
SL_STRUCT_BEGIN(NISState, StructType({ 0x71ab4fd0, 0xd959, 0x4c2a, { 0xaf, 0x69, 0xed, 0x48, 0x50, 0xbd, 0x4e, 0x3d } }), kStructVersion1)
//! Specified the amount of memory expected to be used
uint64_t estimatedVRAMUsageInBytes {};
//! IMPORTANT: New members go here or if optional can be chained in a new struct, see sl_struct.h for details
SL_STRUCT_END()
}
//! Sets NIS options
//!
//! Call this method to turn DLSS on/off, change mode etc.
//!
//! @param viewport Specified viewport we are working with
//! @param options Specifies NIS options to use
//! @return sl::ResultCode::eOk if successful, error code otherwise (see sl_result.h for details)
//!
//! This method is NOT thread safe.
using PFun_slNISSetOptions = sl::Result(const sl::ViewportHandle& viewport, const sl::NISOptions& options);
//! Provides NIS state for the given viewport
//!
//! Call this method to obtain VRAM usage and other information.
//!
//! @param viewport Specified viewport we are working with
//! @param state Reference to a structure where state is to be returned
//! @return sl::ResultCode::eOk if successful, error code otherwise (see sl_result.h for details)
//!
//! This method is NOT thread safe.
using PFun_slNISGetState = sl::Result(const sl::ViewportHandle& viewport, sl::NISState& state);
//! HELPERS
//!
inline sl::Result slNISSetOptions(const sl::ViewportHandle& viewport, const sl::NISOptions& options)
{
SL_FEATURE_FUN_IMPORT_STATIC(sl::kFeatureNIS, slNISSetOptions);
return s_slNISSetOptions(viewport, options);
}
inline sl::Result slNISGetState(const sl::ViewportHandle& viewport, sl::NISState& state)
{
SL_FEATURE_FUN_IMPORT_STATIC(sl::kFeatureNIS, slNISGetState);
return s_slNISGetState(viewport, state);
}

View File

@ -0,0 +1,68 @@
/*
* Copyright 2014-2023 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO USER:
*
* This source code is subject to NVIDIA ownership rights under U.S. and
* international Copyright laws.
*
* This software and the information contained herein is PROPRIETARY and
* CONFIDENTIAL to NVIDIA and is being provided under the terms and conditions
* of a form of NVIDIA software license agreement.
*
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
* OR PERFORMANCE OF THIS SOURCE CODE.
*
* U.S. Government End Users. This source code is a "commercial item" as
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
* "commercial computer software" and "commercial computer software
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
* and is provided to the U.S. Government only as a commercial end item.
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
* source code with only those rights set forth herein.
*
* Any use of this source code in individual and commercial software must
* include, in the user documentation and internal comments to the code,
* the above Disclaimer and U.S. Government End Users Notice.
*/
#pragma once
namespace sl
{
//! If your plugin does not have any constants then the code below can be removed
//!
enum class NvPerfMode : uint32_t
{
eOff,
eOn
};
//! IMPORTANT: Each structure must have a unique GUID assigned, change this as needed
//!
// {29DF7FE0-273A-4D72-B481-2DC823D5B1AD}
SL_STRUCT_BEGIN(NvPerfConstants, StructType({ 0x29df7fe0, 0x273a, 0x4d72, { 0xb4, 0x81, 0x2d, 0xc8, 0x23, 0xd5, 0xb1, 0xad } }), kStructVersion1)
NvPerfMode mode = NvPerfMode::eOff;
//! IMPORTANT: New members go here or if optional can be chained in a new struct, see sl_struct.h for details
SL_STRUCT_END()
//! IMPORTANT: Each structure must have a unique GUID assigned, change this as needed
//!
// {39DF7FE0-283A-4D72-B481-2DC823D5B1AD}
SL_STRUCT_BEGIN(NvPerfSettings, StructType({ 0x39df7fe0, 0x283a, 0x4d72, { 0xb4, 0x81, 0x2d, 0xc8, 0x23, 0xd5, 0xb1, 0xad } }), kStructVersion1)
//! IMPORTANT: New members go here or if optional can be chained in a new struct, see sl_struct.h for details
SL_STRUCT_END()
}

View File

@ -0,0 +1,158 @@
/*
* Copyright (c) 2023 NVIDIA CORPORATION. All rights reserved
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#pragma once
#include <cassert>
namespace sl
{
//! Hot-key which should be used instead of custom message for PC latency marker
enum class PCLHotKey: int16_t
{
eUsePingMessage = 0,
eVK_F13 = 0x7C,
eVK_F14 = 0x7D,
eVK_F15 = 0x7E,
};
// {cfa32f9b-023c-420e-9056-6832b74f89b4}
SL_STRUCT_BEGIN(PCLOptions, StructType({ 0xcfa32f9b, 0x023c, 0x420e, { 0x90, 0x56, 0x68, 0x32, 0xb7, 0x4f, 0x89, 0xb4 } }), kStructVersion1)
//! Specifies the hot-key which should be used instead of custom message for PC latency marker
//! Possible values: VK_F13, VK_F14, VK_F15
PCLHotKey virtualKey = PCLHotKey::eUsePingMessage;
//! ThreadID for PCL messages
uint32_t idThread = 0;
//! IMPORTANT: New members go here or if optional can be chained in a new struct, see sl_struct.h for details
SL_STRUCT_END()
// {cfa32f9b-023c-420e-9056-6832b74f89b5}
SL_STRUCT_BEGIN(PCLState, StructType({ 0xcfa32f9b, 0x023c, 0x420e, { 0x90, 0x56, 0x68, 0x32, 0xb7, 0x4f, 0x89, 0xb5 } }), kStructVersion1)
//! Specifies PCL Windows message id (if PCLOptions::virtualKey is 0)
uint32_t statsWindowMessage;
//! IMPORTANT: New members go here or if optional can be chained in a new struct, see sl_struct.h for details
SL_STRUCT_END()
enum class PCLMarker: uint32_t
{
eSimulationStart = 0,
eSimulationEnd = 1,
eRenderSubmitStart = 2,
eRenderSubmitEnd = 3,
ePresentStart = 4,
ePresentEnd = 5,
//eInputSample = 6, // Deprecated
eTriggerFlash = 7,
ePCLatencyPing = 8,
eOutOfBandRenderSubmitStart = 9,
eOutOfBandRenderSubmitEnd = 10,
eOutOfBandPresentStart = 11,
eOutOfBandPresentEnd = 12,
eControllerInputSample = 13,
eDeltaTCalculation = 14,
eLateWarpPresentStart = 15,
eLateWarpPresentEnd = 16,
eCameraConstructed = 17,
eLateWarpRenderSubmitStart = 18,
eLateWarpRenderSubmitEnd = 19,
eMaximum
};
// c++23 has to_underlying implementation
#if __cplusplus == 202302L
using to_underlying = std::to_underlying;
#else
// Return `enum class` member as value of underlying type (i.e. an int). Basically same as:
// static_cast<std::underlying_type_t<decltype(value)>>(value);
// See c++23s std::to_underlying()
template<class T>
constexpr auto to_underlying(T value)
{
return std::underlying_type_t<T>(value);
}
#endif
// {cfa32f9b-023c-420e-9056-6832b74f89b6}
SL_STRUCT_BEGIN(PCLHelper, StructType({ 0xcfa32f9b, 0x023c, 0x420e, { 0x90, 0x56, 0x68, 0x32, 0xb7, 0x4f, 0x89, 0xb6 } }), kStructVersion1)
PCLHelper(PCLMarker m) : BaseStructure(PCLHelper::s_structType, kStructVersion1), marker(m) {};
PCLMarker get() const { return marker; };
private:
PCLMarker marker;
SL_STRUCT_END()
}
//! Provides PCL settings
//!
//! Call this method to get stats etc.
//!
//! @param state Reference to a structure where states are returned
//! @return sl::ResultCode::eOk if successful, error code otherwise (see sl_result.h for details)
//!
//! This method is NOT thread safe.
using PFun_slPCLGetState = sl::Result(sl::PCLState& state);
//! Sets PCL marker
//!
//! Call this method to set specific PCL marker
//!
//! @param marker Specifies which marker to use
//! @param frame Specifies current frame
//! @return sl::ResultCode::eOk if successful, error code otherwise (see sl_result.h for details)
//!
//! This method is thread safe.
using PFun_slPCLSetMarker = sl::Result(sl::PCLMarker marker, const sl::FrameToken& frame);
//! Sets PCL options
//!
//! Call this method to set PCL options.
//!
//! @param options Specifies options to use
//! @return sl::ResultCode::eOk if successful, error code otherwise (see sl_result.h for details)
//!
//! This method is NOT thread safe.
using PFun_slPCLSetOptions = sl::Result(const sl::PCLOptions& options);
//! HELPERS
//!
inline sl::Result slPCLGetState(sl::PCLState& state)
{
SL_FEATURE_FUN_IMPORT_STATIC(sl::kFeaturePCL, slPCLGetState);
return s_slPCLGetState(state);
}
inline sl::Result slPCLSetMarker(sl::PCLMarker marker, const sl::FrameToken& frame)
{
SL_FEATURE_FUN_IMPORT_STATIC(sl::kFeaturePCL, slPCLSetMarker);
return s_slPCLSetMarker(marker, frame);
}
inline sl::Result slPCLSetOptions(const sl::PCLOptions& options)
{
SL_FEATURE_FUN_IMPORT_STATIC(sl::kFeaturePCL, slPCLSetOptions);
return s_slPCLSetOptions(options);
}

View File

@ -0,0 +1,220 @@
/*
* Copyright (c) 2022-2023 NVIDIA CORPORATION. All rights reserved
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#pragma once
#include "sl_pcl.h"
namespace sl
{
enum ReflexMode
{
eOff,
eLowLatency,
eLowLatencyWithBoost,
// ReflexMode is a C-enum (rather than enum class) so we can't add an eCount value
// without polluting the global namespace (and conflicts with SMSCGMode::eCount in sl.dlss_g/defines.h)
ReflexMode_eCount
};
// {F03AF81A-6D0B-4902-A651-C4965E215434}
SL_STRUCT_BEGIN(ReflexOptions, StructType({ 0xf03af81a, 0x6d0b, 0x4902, { 0xa6, 0x51, 0xc4, 0x96, 0x5e, 0x21, 0x54, 0x34 } }), kStructVersion1)
//! Specifies which mode should be used
ReflexMode mode = ReflexMode::eOff;
//! Specifies if frame limiting (FPS cap) is enabled (0 to disable, microseconds otherwise).
//! One benefit of using Reflex's FPS cap over other implementations is the driver would be aware and can provide better optimizations.
//! This setting is independent of ReflexOptions::mode; it can even be used with mode == ReflexMode::eOff.
//! The value is used each time you call slReflexSetOptions/slSetData, make sure to initialize when changing one of the other Reflex options during frame limiting.
//! It is overridden (ignored) by frameLimitUs if set in sl.reflex.json in non-production builds.
uint32_t frameLimitUs = 0;
//! This should only be enabled in specific scenarios with subtle caveats.
//! Most integrations should leave it unset unless advised otherwise by the Reflex team
bool useMarkersToOptimize = false;
//! Specifies the hot-key which should be used instead of custom message for PC latency marker
//! Possible values: VK_F13, VK_F14, VK_F15
uint16_t virtualKey = 0;
//! ThreadID for PCL Stats messages
uint32_t idThread = 0;
//! IMPORTANT: New members go here or if optional can be chained in a new struct, see sl_struct.h for details
SL_STRUCT_END()
// {0D569B37-A1C8-4453-BE4D-40F4DE57952B}
SL_STRUCT_BEGIN(ReflexReport, StructType({ 0xd569b37, 0xa1c8, 0x4453, { 0xbe, 0x4d, 0x40, 0xf4, 0xde, 0x57, 0x95, 0x2b } }), kStructVersion1)
//! Various latency related stats
uint64_t frameID{};
uint64_t inputSampleTime{};
uint64_t simStartTime{};
uint64_t simEndTime{};
uint64_t renderSubmitStartTime{};
uint64_t renderSubmitEndTime{};
uint64_t presentStartTime{};
uint64_t presentEndTime{};
uint64_t driverStartTime{};
uint64_t driverEndTime{};
uint64_t osRenderQueueStartTime{};
uint64_t osRenderQueueEndTime{};
uint64_t gpuRenderStartTime{};
uint64_t gpuRenderEndTime{};
uint32_t gpuActiveRenderTimeUs{};
uint32_t gpuFrameTimeUs{};
//! IMPORTANT: New members go here or if optional can be chained in a new struct, see sl_struct.h for details
SL_STRUCT_END()
// {F0BB5985-DAF9-4728-B2FD-AE80A2BD7989}
SL_STRUCT_BEGIN(ReflexState, StructType({ 0xf0bb5985, 0xdaf9, 0x4728, { 0xb2, 0xfd, 0xae, 0x80, 0xa2, 0xbd, 0x79, 0x89 } }), kStructVersion1)
//! Specifies if low-latency mode is available or not
bool lowLatencyAvailable = false;
//! Specifies if the frameReport below contains valid data or not
bool latencyReportAvailable = false;
//! Specifies low latency Windows message id (if ReflexOptions::virtualKey is 0)
uint32_t statsWindowMessage;
//! Reflex report per frame
ReflexReport frameReport[64];
//! Specifies ownership of flash indicator toggle (true = driver, false = application)
bool flashIndicatorDriverControlled = false;
//! IMPORTANT: New members go here or if optional can be chained in a new struct, see sl_struct.h for details
SL_STRUCT_END()
// {c83cbb02-b4e2-4260-9ca2-d0c3de3a9684}
SL_STRUCT_BEGIN(ReflexCameraData, StructType({ 0xc83cbb02, 0xb4e2, 0x4260, { 0x9c, 0xa2, 0xd0, 0xc3, 0xde, 0x3a, 0x96, 0x84 } }), kStructVersion1)
float4x4 worldToViewMatrix;
float4x4 viewToClipMatrix;
float4x4 prevRenderedWorldToViewMatrix;
float4x4 prevRenderedViewToClipMatrix;
//! IMPORTANT: New members go here or if optional can be chained in a new struct, see sl_struct.h for details
SL_STRUCT_END()
// {8b960090-a807-4c85-b02f-1069950d066c}
SL_STRUCT_BEGIN(ReflexPredictedCameraData, StructType({ 0x8b960090, 0xa807, 0x4c85, { 0xb0, 0x2f, 0x10, 0x69, 0x95, 0x0d, 0x06, 0x6c } }), kStructVersion1)
float4x4 predictedWorldToViewMatrix;
float4x4 predictedViewToClipMatrix;
//! IMPORTANT: New members go here or if optional can be chained in a new struct, see sl_struct.h for details
SL_STRUCT_END()
using MarkerUnderlying = std::underlying_type_t<PCLMarker>;
// {E268B3DC-F963-4C37-9776-AF048E132621}
SL_STRUCT_BEGIN(ReflexHelper, StructType({ 0xe268b3dc, 0xf963, 0x4c37, { 0x97, 0x76, 0xaf, 0x4, 0x8e, 0x13, 0x26, 0x21 } }), kStructVersion1)
ReflexHelper(MarkerUnderlying m) : BaseStructure(ReflexHelper::s_structType, kStructVersion1), marker(m) {};
ReflexHelper(PCLMarker m) : BaseStructure(ReflexHelper::s_structType, kStructVersion1), marker(to_underlying(m)) {};
operator MarkerUnderlying () const { return marker; };
private:
// May be kReflexMarkerSleep which is not a valid PCLMarker value
MarkerUnderlying marker;
SL_STRUCT_END()
}
//! Provides Reflex settings
//!
//! Call this method to check if Reflex is on, get stats etc.
//!
//! @param state Reference to a structure where states are returned
//! @return sl::ResultCode::eOk if successful, error code otherwise (see sl_result.h for details)
//!
//! This method is NOT thread safe.
using PFun_slReflexGetState = sl::Result(sl::ReflexState& state);
//! Tells reflex to sleep the app
//!
//! Call this method to invoke Reflex sleep in your application.
//!
//! @param frame Specifies current frame
//! @return sl::ResultCode::eOk if successful, error code otherwise (see sl_result.h for details)
//!
//! This method is thread safe.
using PFun_slReflexSleep = sl::Result(const sl::FrameToken& frame);
//! Sets Reflex options
//!
//! Call this method to turn Reflex on/off, change mode etc.
//!
//! @param options Specifies options to use
//! @return sl::ResultCode::eOk if successful, error code otherwise (see sl_result.h for details)
//!
//! This method is NOT thread safe.
using PFun_slReflexSetOptions = sl::Result(const sl::ReflexOptions& options);
//! Sets Reflex camera data
//!
//! Call this method to inform Reflex of upcoming camera data
//!
//! @param viewport The viewport the camera corresponds to
//! @param frame The frame to set camera data for
//! @param inCameraData Camera data for an upcoming render frame
//! @return sl::ResultCode::eOk if successful, error code otherwise (see sl_result.h for details)
//!
//! This method is thread safe.
using PFun_slReflexSetCameraData = sl::Result(const sl::ViewportHandle& viewport, const sl::FrameToken& frame, const sl::ReflexCameraData& inCameraData);
//! Gets predicted Reflex camera data
//!
//! Call this method to get a prediction of upcoming camera data
//!
//! @param viewport The viewport the camera corresponds to
//! @param frame The frame to get camera data for (if available)
//! @param outCameraData Predicted Camera data for an upcoming render frame
//! @return sl::ResultCode::eOk if successful, error code otherwise (see sl_result.h for details)
//!
//! This method is thread safe.
using PFun_slReflexGetPredictedCameraData = sl::Result(const sl::ViewportHandle& viewport, const sl::FrameToken& frame, sl::ReflexPredictedCameraData& outCameraData);
//! HELPERS
//!
inline sl::Result slReflexGetState(sl::ReflexState& state)
{
SL_FEATURE_FUN_IMPORT_STATIC(sl::kFeatureReflex, slReflexGetState);
return s_slReflexGetState(state);
}
inline sl::Result slReflexSleep(const sl::FrameToken& frame)
{
SL_FEATURE_FUN_IMPORT_STATIC(sl::kFeatureReflex, slReflexSleep);
return s_slReflexSleep(frame);
}
inline sl::Result slReflexSetOptions(const sl::ReflexOptions& options)
{
SL_FEATURE_FUN_IMPORT_STATIC(sl::kFeatureReflex, slReflexSetOptions);
return s_slReflexSetOptions(options);
}
inline sl::Result slReflexSetCameraData(const sl::ViewportHandle& viewport, const sl::FrameToken& frame, const sl::ReflexCameraData& inCameraData)
{
SL_FEATURE_FUN_IMPORT_STATIC(sl::kFeatureReflex, slReflexSetCameraData);
return s_slReflexSetCameraData(viewport, frame, inCameraData);
}
inline sl::Result slReflexGetPredictedCameraData(const sl::ViewportHandle& viewport, const sl::FrameToken& frame, sl::ReflexPredictedCameraData& outCameraData)
{
SL_FEATURE_FUN_IMPORT_STATIC(sl::kFeatureReflex, slReflexGetPredictedCameraData);
return s_slReflexGetPredictedCameraData(viewport, frame, outCameraData);
}

View File

@ -0,0 +1,77 @@
/*
* Copyright (c) 2022-2024 NVIDIA CORPORATION. All rights reserved
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#pragma once
#define SL_CHECK(f) {auto _r = f; if(_r != sl::Result::eOk) return _r;}
#define SL_FAILED(r, f) sl::Result r = f; r != sl::Result::eOk
#define SL_SUCCEEDED(r, f) sl::Result r = f; r == sl::Result::eOk
namespace sl
{
enum class Result
{
eOk,
eErrorIO,
eErrorDriverOutOfDate,
eErrorOSOutOfDate,
eErrorOSDisabledHWS,
eErrorDeviceNotCreated,
eErrorNoSupportedAdapterFound,
eErrorAdapterNotSupported,
eErrorNoPlugins,
eErrorVulkanAPI,
eErrorDXGIAPI,
eErrorD3DAPI,
// NRD was removed
eErrorNRDAPI,
eErrorNVAPI,
eErrorReflexAPI,
eErrorNGXFailed,
eErrorJSONParsing,
eErrorMissingProxy,
eErrorMissingResourceState,
eErrorInvalidIntegration,
eErrorMissingInputParameter,
eErrorNotInitialized,
eErrorComputeFailed,
eErrorInitNotCalled,
eErrorExceptionHandler,
eErrorInvalidParameter,
eErrorMissingConstants,
eErrorDuplicatedConstants,
eErrorMissingOrInvalidAPI,
eErrorCommonConstantsMissing,
eErrorUnsupportedInterface,
eErrorFeatureMissing,
eErrorFeatureNotSupported,
eErrorFeatureMissingHooks,
eErrorFeatureFailedToLoad,
eErrorFeatureWrongPriority,
eErrorFeatureMissingDependency,
eErrorFeatureManagerInvalidState,
eErrorInvalidState,
eWarnOutOfVRAM,
};
}

View File

@ -0,0 +1,463 @@
/*
* Copyright (c) 2022-2023 NVIDIA CORPORATION. All rights reserved
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#pragma once
#define _UNICODE 1
#define UNICODE 1
#include <tchar.h>
#include <stdio.h>
#include <stdlib.h>
#include <windows.h>
#include <Softpub.h>
#include <wincrypt.h>
#include <wintrust.h>
#include <inttypes.h>
#define GetProc(hModule, procName, proc) (((NULL == proc) && (NULL == (*((FARPROC*)&proc) = GetProcAddress(hModule, procName)))) ? FALSE : TRUE)
typedef BOOL(WINAPI* PfnCryptMsgClose)(IN HCRYPTMSG hCryptMsg);
static PfnCryptMsgClose pfnCryptMsgClose = NULL;
typedef BOOL(WINAPI* PfnCertCloseStore)(IN HCERTSTORE hCertStore, DWORD dwFlags);
static PfnCertCloseStore pfnCertCloseStore = NULL;
typedef HCERTSTORE (WINAPI* PfnCertOpenStore)(
_In_ LPCSTR lpszStoreProvider,
_In_ DWORD dwEncodingType,
_In_opt_ HCRYPTPROV_LEGACY hCryptProv,
_In_ DWORD dwFlags,
_In_opt_ const void* pvPara
);
static PfnCertOpenStore pfnCertOpenStore = NULL;
typedef BOOL(WINAPI* PfnCertFreeCertificateContext)(IN PCCERT_CONTEXT pCertContext);
static PfnCertFreeCertificateContext pfnCertFreeCertificateContext = NULL;
typedef PCCERT_CONTEXT(WINAPI* PfnCertFindCertificateInStore)(
IN HCERTSTORE hCertStore,
IN DWORD dwCertEncodingType,
IN DWORD dwFindFlags,
IN DWORD dwFindType,
IN const void* pvFindPara,
IN PCCERT_CONTEXT pPrevCertContext
);
static PfnCertFindCertificateInStore pfnCertFindCertificateInStore = NULL;
typedef BOOL(WINAPI* PfnCryptMsgGetParam)(
IN HCRYPTMSG hCryptMsg,
IN DWORD dwParamType,
IN DWORD dwIndex,
OUT void* pvData,
IN OUT DWORD* pcbData
);
static PfnCryptMsgGetParam pfnCryptMsgGetParam = NULL;
typedef HCRYPTMSG (WINAPI* PfnCryptMsgOpenToDecode)(
_In_ DWORD dwMsgEncodingType,
_In_ DWORD dwFlags,
_In_ DWORD dwMsgType,
_In_opt_ HCRYPTPROV_LEGACY hCryptProv,
_Reserved_ PCERT_INFO pRecipientInfo,
_In_opt_ PCMSG_STREAM_INFO pStreamInfo
);
PfnCryptMsgOpenToDecode pfnCryptMsgOpenToDecode = {};
typedef BOOL (WINAPI* PfnCryptMsgUpdate)(
_In_ HCRYPTMSG hCryptMsg,
_In_reads_bytes_opt_(cbData) const BYTE* pbData,
_In_ DWORD cbData,
_In_ BOOL fFinal
);
PfnCryptMsgUpdate pfnCryptMsgUpdate = {};
typedef BOOL(WINAPI* PfnCryptQueryObject)(
DWORD dwObjectType,
const void* pvObject,
DWORD dwExpectedContentTypeFlags,
DWORD dwExpectedFormatTypeFlags,
DWORD dwFlags,
DWORD* pdwMsgAndCertEncodingType,
DWORD* pdwContentType,
DWORD* pdwFormatType,
HCERTSTORE* phCertStore,
HCRYPTMSG* phMsg,
const void** ppvContext
);
static PfnCryptQueryObject pfnCryptQueryObject = NULL;
typedef BOOL(WINAPI* PfnCryptDecodeObjectEx)(
IN DWORD dwCertEncodingType,
IN LPCSTR lpszStructType,
IN const BYTE* pbEncoded,
IN DWORD cbEncoded,
IN DWORD dwFlags,
IN PCRYPT_DECODE_PARA pDecodePara,
OUT void* pvStructInfo,
IN OUT DWORD* pcbStructInfo
);
static PfnCryptDecodeObjectEx pfnCryptDecodeObjectEx = NULL;
typedef LONG(WINAPI* PfnWinVerifyTrust)(
IN HWND hwnd,
IN GUID* pgActionID,
IN LPVOID pWVTData
);
static PfnWinVerifyTrust pfnWinVerifyTrust = NULL;
namespace sl
{
namespace security
{
bool isSignedByNVIDIA(const wchar_t* pathToFile)
{
bool valid = false;
// Now let's make sure this is actually signed by NVIDIA
DWORD dwEncoding, dwContentType, dwFormatType;
HCERTSTORE hStore = NULL;
HCRYPTMSG hMsg = NULL;
PCMSG_SIGNER_INFO pSignerInfo = NULL;
DWORD dwSignerInfo;
if (!pfnCertOpenStore)
{
// We only support Win10+ so we can search for module in system32 directly
auto hModCrypt32 = LoadLibraryExW(L"crypt32.dll", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32);
if (!hModCrypt32 ||
!GetProc(hModCrypt32, "CryptMsgClose", pfnCryptMsgClose) ||
!GetProc(hModCrypt32, "CertOpenStore", pfnCertOpenStore) ||
!GetProc(hModCrypt32, "CertCloseStore", pfnCertCloseStore) ||
!GetProc(hModCrypt32, "CertFreeCertificateContext", pfnCertFreeCertificateContext) ||
!GetProc(hModCrypt32, "CertFindCertificateInStore", pfnCertFindCertificateInStore) ||
!GetProc(hModCrypt32, "CryptMsgGetParam", pfnCryptMsgGetParam) ||
!GetProc(hModCrypt32, "CryptMsgUpdate", pfnCryptMsgUpdate) ||
!GetProc(hModCrypt32, "CryptMsgOpenToDecode", pfnCryptMsgOpenToDecode) ||
!GetProc(hModCrypt32, "CryptQueryObject", pfnCryptQueryObject) ||
!GetProc(hModCrypt32, "CryptDecodeObjectEx", pfnCryptDecodeObjectEx))
{
return false;
}
}
// Get message handle and store handle from the signed file.
auto bResult = pfnCryptQueryObject(CERT_QUERY_OBJECT_FILE,
pathToFile,
CERT_QUERY_CONTENT_FLAG_PKCS7_SIGNED_EMBED,
CERT_QUERY_FORMAT_FLAG_BINARY,
0,
&dwEncoding,
&dwContentType,
&dwFormatType,
&hStore,
&hMsg,
NULL);
if (!bResult)
{
return false;
}
// Get signer information size.
bResult = pfnCryptMsgGetParam(hMsg,
CMSG_SIGNER_INFO_PARAM,
0,
NULL,
&dwSignerInfo);
if (!bResult)
{
return false;
}
// Allocate memory for signer information.
pSignerInfo = (PCMSG_SIGNER_INFO)LocalAlloc(LPTR, dwSignerInfo);
if (!pSignerInfo)
{
return false;
}
// Get Signer Information.
bResult = pfnCryptMsgGetParam(hMsg,
CMSG_SIGNER_INFO_PARAM,
0,
(PVOID)pSignerInfo,
&dwSignerInfo);
if (!bResult)
{
LocalFree(pSignerInfo);
return false;
}
// Look for nested signature
constexpr const char* kOID_NESTED_SIGNATURE = "1.3.6.1.4.1.311.2.4.1";
for (DWORD i = 0; i < pSignerInfo->UnauthAttrs.cAttr; i++)
{
if (strcmp(kOID_NESTED_SIGNATURE, pSignerInfo->UnauthAttrs.rgAttr[i].pszObjId) == 0)
{
HCRYPTMSG hMsg2 = pfnCryptMsgOpenToDecode(X509_ASN_ENCODING | PKCS_7_ASN_ENCODING, 0, 0, NULL, NULL, NULL);
if (hMsg2)
{
if (pfnCryptMsgUpdate(hMsg2,pSignerInfo->UnauthAttrs.rgAttr[i].rgValue->pbData,pSignerInfo->UnauthAttrs.rgAttr[i].rgValue->cbData,TRUE))
{
dwSignerInfo = 0;
pfnCryptMsgGetParam(hMsg2, CMSG_SIGNER_INFO_PARAM, 0, NULL, &dwSignerInfo);
if (dwSignerInfo != 0)
{
PCMSG_SIGNER_INFO pSignerInfo2 = (PCMSG_SIGNER_INFO)LocalAlloc(LPTR, dwSignerInfo);
if (pSignerInfo2)
{
if (pfnCryptMsgGetParam(hMsg2, CMSG_SIGNER_INFO_PARAM, 0, (PVOID)pSignerInfo2, &dwSignerInfo))
{
CRYPT_DATA_BLOB c7Data;
c7Data.pbData = pSignerInfo->UnauthAttrs.rgAttr[i].rgValue->pbData;
c7Data.cbData = pSignerInfo->UnauthAttrs.rgAttr[i].rgValue->cbData;
auto hStore2 = pfnCertOpenStore(CERT_STORE_PROV_PKCS7, X509_ASN_ENCODING | PKCS_7_ASN_ENCODING, NULL, 0, &c7Data);
if (!hStore2)
{
LocalFree(pSignerInfo2);
return false;
}
CERT_INFO CertInfo{};
PCCERT_CONTEXT pCertContext = NULL;
// Search for the signer certificate in the temporary certificate store.
CertInfo.Issuer = pSignerInfo2->Issuer;
CertInfo.SerialNumber = pSignerInfo2->SerialNumber;
pCertContext = pfnCertFindCertificateInStore(hStore2,
(X509_ASN_ENCODING | PKCS_7_ASN_ENCODING),
0,
CERT_FIND_SUBJECT_CERT,
(PVOID)&CertInfo,
NULL);
if (!pCertContext)
{
LocalFree(pSignerInfo2);
pfnCertCloseStore(hStore2, CERT_CLOSE_STORE_FORCE_FLAG);
return false;
}
void* decodedPublicKey{};
DWORD decodedPublicLength{};
if (pfnCryptDecodeObjectEx((PKCS_7_ASN_ENCODING | X509_ASN_ENCODING),
CNG_RSA_PUBLIC_KEY_BLOB,
pCertContext->pCertInfo->SubjectPublicKeyInfo.PublicKey.pbData,
pCertContext->pCertInfo->SubjectPublicKeyInfo.PublicKey.cbData,
CRYPT_ENCODE_ALLOC_FLAG,
NULL,
&decodedPublicKey,
&decodedPublicLength))
{
static uint8_t s_rsaStreamlinePublicKey[] =
{
0x52, 0x53, 0x41, 0x31, 0x00, 0x0c, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x80, 0x01, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0xc1, 0x8e, 0x40, 0xc3, 0xf5,
0xa7, 0x01, 0x9a, 0x37, 0x6b, 0x47, 0xa8, 0x58, 0xe8, 0xbe, 0xe3, 0x55, 0x0a, 0xee, 0x0f, 0x0d,
0x32, 0xaa, 0x12, 0xf9, 0x56, 0x7f, 0x5d, 0xfd, 0x82, 0x09, 0x33, 0x21, 0x42, 0xf2, 0xe8, 0x74,
0x98, 0x51, 0xb3, 0x88, 0x74, 0xcd, 0x00, 0x6e, 0xb1, 0x08, 0x10, 0x4b, 0xf1, 0xda, 0xd6, 0x97,
0x87, 0xd4, 0x9c, 0xb1, 0x13, 0xa8, 0xa2, 0x86, 0x15, 0x0e, 0xc1, 0xa5, 0x9c, 0xe5, 0x90, 0x9b,
0xbe, 0x69, 0xdc, 0x6a, 0x82, 0xbe, 0xb4, 0x4b, 0x4b, 0xfa, 0x95, 0x8e, 0xc1, 0xfc, 0x2b, 0x61,
0x95, 0xd1, 0x91, 0xed, 0xeb, 0x87, 0xe7, 0x09, 0x84, 0x05, 0x41, 0x03, 0xb0, 0x2d, 0xd4, 0x39,
0x7f, 0x62, 0x06, 0x56, 0x33, 0x93, 0x7e, 0x77, 0x54, 0x06, 0x77, 0x2b, 0x75, 0x05, 0xbc, 0xeb,
0x98, 0xea, 0xc0, 0xa2, 0xca, 0x98, 0x86, 0x0f, 0x10, 0x65, 0xde, 0x19, 0x2c, 0xa6, 0x1e, 0x93,
0xb0, 0x92, 0x5d, 0x5f, 0x5b, 0x6f, 0x79, 0x6d, 0x2c, 0x76, 0xa6, 0x67, 0x50, 0xaa, 0x8f, 0xc2,
0x4c, 0xf1, 0x08, 0xf7, 0xc0, 0x27, 0x29, 0xf0, 0x68, 0xf4, 0x64, 0x00, 0x1c, 0xb6, 0x28, 0x1e,
0x25, 0xb8, 0xf3, 0x8a, 0xd1, 0x6e, 0x65, 0xa3, 0x61, 0x9d, 0xf8, 0xca, 0x4a, 0x41, 0x60, 0x80,
0x62, 0xdf, 0x41, 0xa4, 0x8b, 0xdc, 0x97, 0xee, 0xeb, 0x64, 0x6f, 0xe4, 0x8f, 0x4b, 0xdf, 0x24,
0x01, 0x80, 0xd9, 0xb4, 0x0a, 0xec, 0x0d, 0x3e, 0xb7, 0x76, 0xba, 0xe9, 0xe7, 0xde, 0x07, 0xdd,
0x30, 0xc8, 0x4a, 0x14, 0x79, 0xec, 0x15, 0xed, 0x5c, 0xc6, 0xcc, 0xd4, 0xe6, 0x06, 0x3c, 0x42,
0x92, 0x10, 0xf7, 0x7c, 0x80, 0x1e, 0x78, 0xd3, 0xb4, 0x9f, 0xc2, 0x3b, 0xa8, 0x7b, 0xa0, 0xe3,
0x0c, 0xd9, 0xad, 0x2e, 0x09, 0x72, 0xe2, 0x8f, 0x54, 0x28, 0x87, 0x3c, 0xba, 0x7c, 0x97, 0x80,
0xdc, 0x09, 0xb5, 0x12, 0x34, 0x78, 0x9a, 0x26, 0xd0, 0xa3, 0xa7, 0xa7, 0x1b, 0x25, 0x19, 0xe5,
0x6e, 0xbe, 0xd7, 0x5a, 0x91, 0x32, 0xc4, 0xa9, 0x2f, 0xcc, 0xd5, 0x82, 0x4b, 0x5b, 0x9f, 0xad,
0xf3, 0x2f, 0xed, 0x4f, 0x33, 0xe1, 0x50, 0x33, 0xd6, 0x90, 0x79, 0x22, 0xe5, 0x1c, 0xc7, 0x35,
0xe7, 0x58, 0xe6, 0xb4, 0x8b, 0xc4, 0x28, 0x20, 0xec, 0xca, 0x70, 0xbb, 0x02, 0x1b, 0x48, 0xd8,
0x84, 0x51, 0x24, 0x33, 0x2a, 0x08, 0xb1, 0x15, 0x4e, 0xbc, 0x88, 0xa5, 0xe1, 0x37, 0x76, 0x70,
0xe6, 0xdf, 0x3f, 0x73, 0xfd, 0x0d, 0x8a, 0xd9, 0x0d, 0xa5, 0x35, 0xb2, 0xb4, 0x01, 0x42, 0x96,
0xc4, 0xaa, 0x1c, 0xeb, 0x68, 0x62, 0x36, 0xbf, 0xef, 0x5e, 0x2a, 0x3d, 0x18, 0x91, 0x8b, 0x92,
0x0a, 0x1e, 0xce, 0x98, 0x5b, 0x7b, 0x64, 0x42, 0x09, 0xb0, 0x1d
};
valid = decodedPublicLength == sizeof(s_rsaStreamlinePublicKey) && memcmp(s_rsaStreamlinePublicKey, decodedPublicKey, decodedPublicLength) == 0;
LocalFree(decodedPublicKey);
}
pfnCertFreeCertificateContext(pCertContext);
pfnCertCloseStore(hStore2, CERT_CLOSE_STORE_FORCE_FLAG);
}
LocalFree(pSignerInfo2);
}
}
}
pfnCryptMsgClose(hMsg2);
}
break;
}
}
LocalFree(pSignerInfo);
pfnCryptMsgClose(hMsg);
pfnCertCloseStore(hStore, CERT_CLOSE_STORE_FORCE_FLAG);
return valid;
}
//! See https://docs.microsoft.com/en-us/windows/win32/seccrypto/example-c-program--verifying-the-signature-of-a-pe-file
//!
//! IMPORTANT: Always pass in the FULL PATH to the file, relative paths are NOT allowed!
bool verifyEmbeddedSignature(const wchar_t* pathToFile)
{
bool valid = true;
LONG lStatus = {};
// Initialize the WINTRUST_FILE_INFO structure.
WINTRUST_FILE_INFO FileData;
memset(&FileData, 0, sizeof(FileData));
FileData.cbStruct = sizeof(WINTRUST_FILE_INFO);
FileData.pcwszFilePath = pathToFile;
FileData.hFile = NULL;
FileData.pgKnownSubject = NULL;
if (!pfnWinVerifyTrust)
{
// We only support Win10+ so we can search for module in system32 directly
auto hModWintrust = LoadLibraryExW(L"wintrust.dll", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32);
if (!hModWintrust || !GetProc(hModWintrust, "WinVerifyTrust", pfnWinVerifyTrust))
{
return false;
}
}
/*
WVTPolicyGUID specifies the policy to apply on the file
WINTRUST_ACTION_GENERIC_VERIFY_V2 policy checks:
1) The certificate used to sign the file chains up to a root
certificate located in the trusted root certificate store. This
implies that the identity of the publisher has been verified by
a certification authority.
2) In cases where user interface is displayed (which this example
does not do), WinVerifyTrust will check for whether the
end entity certificate is stored in the trusted publisher store,
implying that the user trusts content from this publisher.
3) The end entity certificate has sufficient permission to sign
code, as indicated by the presence of a code signing EKU or no
EKU.
*/
GUID WVTPolicyGUID = WINTRUST_ACTION_GENERIC_VERIFY_V2;
WINTRUST_DATA WinTrustData;
// Initialize the WinVerifyTrust input data structure.
// Default all fields to 0.
memset(&WinTrustData, 0, sizeof(WinTrustData));
WinTrustData.cbStruct = sizeof(WinTrustData);
// Use default code signing EKU.
WinTrustData.pPolicyCallbackData = NULL;
// No data to pass to SIP.
WinTrustData.pSIPClientData = NULL;
// Disable WVT UI.
WinTrustData.dwUIChoice = WTD_UI_NONE;
// No revocation checking.
WinTrustData.fdwRevocationChecks = WTD_REVOKE_NONE;
// Verify an embedded signature on a file.
WinTrustData.dwUnionChoice = WTD_CHOICE_FILE;
// Verify action.
WinTrustData.dwStateAction = WTD_STATEACTION_VERIFY;
// Verification sets this value.
WinTrustData.hWVTStateData = NULL;
// Not used.
WinTrustData.pwszURLReference = NULL;
// This is not applicable if there is no UI because it changes
// the UI to accommodate running applications instead of
// installing applications.
WinTrustData.dwUIContext = 0;
// Set pFile.
WinTrustData.pFile = &FileData;
// First verify the primary signature (index 0) to determine how many secondary signatures
// are present. We use WSS_VERIFY_SPECIFIC and dwIndex to do this, also setting
// WSS_GET_SECONDARY_SIG_COUNT to have the number of secondary signatures returned.
WINTRUST_SIGNATURE_SETTINGS SignatureSettings = {};
CERT_STRONG_SIGN_PARA StrongSigPolicy = {};
SignatureSettings.cbStruct = sizeof(WINTRUST_SIGNATURE_SETTINGS);
SignatureSettings.dwFlags = WSS_GET_SECONDARY_SIG_COUNT | WSS_VERIFY_SPECIFIC;
SignatureSettings.dwIndex = 0;
WinTrustData.pSignatureSettings = &SignatureSettings;
StrongSigPolicy.cbSize = sizeof(CERT_STRONG_SIGN_PARA);
StrongSigPolicy.dwInfoChoice = CERT_STRONG_SIGN_OID_INFO_CHOICE;
StrongSigPolicy.pszOID = (LPSTR)szOID_CERT_STRONG_SIGN_OS_CURRENT;
WinTrustData.pSignatureSettings->pCryptoPolicy = &StrongSigPolicy;
// WinVerifyTrust verifies signatures as specified by the GUID and Wintrust_Data.
lStatus = pfnWinVerifyTrust(NULL, &WVTPolicyGUID, &WinTrustData);
// First signature must be validated by the OS
valid = lStatus == ERROR_SUCCESS;
if (!valid)
{
printf("File '%S' is NOT correctly signed - Streamline will not load unsecured modules\n", pathToFile);
}
else
{
// Now there has to be a secondary one
valid &= WinTrustData.pSignatureSettings->cSecondarySigs == 1;
if (!valid)
{
printf("File '%S' does not have the secondary NVIDIA signature - Streamline will not load unsecured modules\n", pathToFile);
}
else
{
// The secondary signature must be from NVIDIA
valid &= isSignedByNVIDIA(pathToFile);
if (valid)
{
printf("File '%S' is signed by NVIDIA and the signature was verified.\n", pathToFile);
}
else
{
printf("File '%S' is NOT correctly signed - Streamline will not load unsecured modules\n", pathToFile);
}
}
}
// Any hWVTStateData must be released by a call with close.
WinTrustData.dwStateAction = WTD_STATEACTION_CLOSE;
lStatus = pfnWinVerifyTrust(NULL, &WVTPolicyGUID, &WinTrustData);
return valid;
}
}
}

Some files were not shown because too many files have changed in this diff Show More