Skip to content

Commit e256a0d

Browse files
committed
[shadows] improved performance by precalculating vogel samples and improved penumbra quality
1 parent eb18db4 commit e256a0d

1 file changed

Lines changed: 153 additions & 55 deletions

File tree

data/shaders/shadow_mapping.hlsl

Lines changed: 153 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -23,68 +23,128 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
2323
#include "common.hlsl"
2424
//====================
2525

26-
// shadow mapping constants
27-
// note: golden angle (2.399963 radians) maximizes surface coverag
28-
static const uint g_shadow_sample_count = 4; // number of samples for shadow filtering
29-
static const float g_shadow_filter_size = 1.0f; // base filter size (gets x4 for directional lights)
26+
// shadow mapping constants (golden angle 2.399963 radians maximizes surface coverage)
27+
static const uint g_shadow_sample_count = 4; // shadow filtering samples
28+
static const float g_shadow_filter_size = 1.0f; // base filter size (x4 for directional lights)
3029
static const float g_shadow_cascade_blend_threshold = 0.8f; // cascade transition threshold
31-
static const uint g_penumbra_sample_count = 8; // samples for penumbra estimation
30+
static const uint g_penumbra_sample_count = 8; // penumbra estimation samples
3231
static const float g_penumbra_filter_size = 128.0f; // penumbra search radius
3332
static const float g_shadow_sample_reciprocal = 1.0f / (float)g_shadow_sample_count;
33+
static const float g_minimum_penumbra_size = 0.5f; // minimum penumbra to prevent hard shadows
34+
static const float g_maximum_penumbra_size = 64.0f; // maximum penumbra size
35+
static const float g_contact_hardening_factor = 0.5f; // contact hardening strength (0=soft, 1=hard)
36+
static const float g_slope_bias_scale = 2.0f; // slope-scaled bias multiplier
37+
static const float g_constant_bias_scale = 0.0001f; // constant bias scale
38+
39+
// pre-computed vogel disk samples (golden angle spiral, rotated at runtime for temporal jitter)
40+
static const float2 g_vogel_samples_shadow[g_shadow_sample_count] =
41+
{
42+
float2(0.353553f, 0.000000f), // sample 0
43+
float2(-0.451544f, 0.413652f), // sample 1
44+
float2(0.069116f, -0.787542f), // sample 2
45+
float2(0.569143f, 0.742345f) // sample 3
46+
};
3447

35-
// generate vogel disk sample for poisson disk sampling
36-
// uses golden angle spiral for optimal distribution
37-
float2 vogel_disk_sample(uint sample_index, uint sample_count, float angle)
48+
static const float2 g_vogel_samples_penumbra[g_penumbra_sample_count] =
49+
{
50+
float2(0.250000f, 0.000000f), // sample 0
51+
float2(-0.319290f, 0.292496f), // sample 1
52+
float2(0.048872f, -0.556877f), // sample 2
53+
float2(0.402445f, 0.524917f), // sample 3
54+
float2(-0.738535f, -0.130636f), // sample 4
55+
float2(0.699604f, -0.445032f), // sample 5
56+
float2(-0.234003f, 0.870484f), // sample 6
57+
float2(-0.446273f, -0.859268f) // sample 7
58+
};
59+
60+
// rotate 2d vector by angle (radians) using rotation matrix
61+
float2 rotate_2d(float2 v, float angle)
3862
{
39-
const float golden_angle = 2.399963f; // radians (137.508 degrees)
40-
float radius = sqrt(sample_index + 0.5f) / sqrt(sample_count);
41-
float theta = sample_index * golden_angle + angle;
4263
float sine, cosine;
43-
sincos(theta, sine, cosine);
64+
sincos(angle, sine, cosine);
65+
return float2(
66+
v.x * cosine - v.y * sine,
67+
v.x * sine + v.y * cosine
68+
);
69+
}
70+
71+
// get vogel disk sample from pre-computed lookup table with rotation
72+
float2 vogel_disk_sample(uint sample_index, uint sample_count, float angle)
73+
{
74+
float2 sample;
75+
76+
// lookup pre-computed sample based on sample count
77+
if (sample_count == g_shadow_sample_count)
78+
{
79+
sample = g_vogel_samples_shadow[sample_index];
80+
}
81+
else if (sample_count == g_penumbra_sample_count)
82+
{
83+
sample = g_vogel_samples_penumbra[sample_index];
84+
}
85+
else
86+
{
87+
// fallback for other sample counts (shouldn't happen in practice)
88+
const float golden_angle = 2.399963f;
89+
float radius = sqrt(sample_index + 0.5f) / sqrt(sample_count);
90+
float theta = sample_index * golden_angle + angle;
91+
float sine, cosine;
92+
sincos(theta, sine, cosine);
93+
return float2(cosine, sine) * radius;
94+
}
4495

45-
return float2(cosine, sine) * radius;
96+
// apply rotation for temporal jitter
97+
return rotate_2d(sample, angle);
4698
}
4799

48-
// estimate penumbra size using blocker search (percentage closer soft shadows)
49-
// larger depth difference between receiver and blocker = larger penumbra
50-
float compute_penumbra(Light light, float rotation_angle, float3 sample_coords, float receiver_depth)
100+
// estimate penumbra size using pcss (percentage closer soft shadows) with improved blocker search
101+
float compute_penumbra(Light light, float rotation_angle, float3 sample_coords, float receiver_depth, float light_distance)
51102
{
52-
float penumbra = 1.0f;
103+
float penumbra = g_minimum_penumbra_size;
53104
float blocker_depth_sum = 0.0f;
54105
uint blocker_count = 0;
106+
float search_radius = g_penumbra_filter_size * light.atlas_texel_size[0].x;
55107

56-
// search for blockers in neighborhood
108+
// search for blockers in neighborhood with adaptive search radius
57109
for(uint i = 0; i < g_penumbra_sample_count; i++)
58110
{
59-
float2 offset = vogel_disk_sample(i, g_penumbra_sample_count, rotation_angle) * light.atlas_texel_size[0] * g_penumbra_filter_size;
111+
float2 offset = vogel_disk_sample(i, g_penumbra_sample_count, rotation_angle) * search_radius;
60112
float depth = light.sample_depth(sample_coords + float3(offset, 0.0f));
61113

62-
// accumulate blockers (depth > receiver means blocker is closer to light)
63-
if(depth > receiver_depth)
114+
// accumulate blockers (depth > receiver means blocker is closer to light, threshold avoids noise)
115+
if(depth > receiver_depth + 0.0001f)
64116
{
65117
blocker_depth_sum += depth;
66118
blocker_count++;
67119
}
68120
}
69121

70-
// compute penumbra size based on average blocker depth
122+
// compute penumbra size based on average blocker depth (pcss formula)
71123
if (blocker_count > 0)
72124
{
73125
float blocker_depth_avg = blocker_depth_sum / float(blocker_count);
74126

75-
// depth difference determines penumbra size (larger gap = softer shadow)
76-
float depth_difference = abs(receiver_depth - blocker_depth_avg);
127+
// pcss formula: penumbra = (receiver_depth - blocker_depth) * light_size / blocker_depth (accounts for light distance)
128+
float depth_difference = receiver_depth - blocker_depth_avg;
77129

78-
// normalize by blocker depth to get relative penumbra size
79-
penumbra = depth_difference / (blocker_depth_avg + FLT_MIN);
130+
// avoid division by zero and negative penumbra
131+
if (depth_difference > 0.0f && blocker_depth_avg > 0.0f)
132+
{
133+
// scale by light distance for realistic penumbra (closer blockers create larger penumbra)
134+
float light_size_factor = saturate(light_distance * 0.1f);
135+
penumbra = (depth_difference / blocker_depth_avg) * light_size_factor * search_radius;
136+
137+
// apply contact hardening: reduce penumbra near contact points (shadows harder when blocker close to receiver)
138+
float contact_factor = saturate(depth_difference * 1000.0f);
139+
penumbra = lerp(penumbra * g_contact_hardening_factor, penumbra, contact_factor);
140+
}
80141
}
81142

82-
// scale and clamp penumbra to reasonable range
83-
return clamp(penumbra * 16.0f, 1.0f, 1024.0f);
143+
// clamp penumbra to reasonable range
144+
return clamp(penumbra, g_minimum_penumbra_size, g_maximum_penumbra_size);
84145
}
85146

86-
// compute shadow factor using vogel disk sampling with adaptive penumbra
87-
// uses percentage closer filtering with temporal jitter for noise reduction
147+
// compute shadow factor using vogel disk sampling with adaptive penumbra and improved edge handling
88148
float vogel_depth(Light light, Surface surface, float3 sample_coords, float receiver_depth, float filter_size_multiplier = 1.0f)
89149
{
90150
float shadow_factor = 0.0f;
@@ -93,52 +153,79 @@ float vogel_depth(Light light, Surface surface, float3 sample_coords, float rece
93153
float temporal_offset = noise_interleaved_gradient(surface.pos);
94154
float temporal_angle = temporal_offset * PI2;
95155

156+
// compute light distance for penumbra calculation
157+
float light_distance = light.is_directional() ? 1000.0f : length(surface.position - light.position);
158+
96159
// estimate penumbra size for adaptive filtering
97-
float penumbra = compute_penumbra(light, temporal_angle, sample_coords, receiver_depth);
160+
float penumbra = compute_penumbra(light, temporal_angle, sample_coords, receiver_depth, light_distance);
98161

99162
// sample shadow map using vogel disk pattern
163+
float valid_sample_count = 0.0f;
164+
100165
for (uint i = 0; i < g_shadow_sample_count; i++)
101166
{
102167
// compute filter size with penumbra adaptation
103168
float2 filter_size = light.atlas_texel_size[0] * g_shadow_filter_size * filter_size_multiplier * penumbra;
104169
float2 offset = vogel_disk_sample(i, g_shadow_sample_count, temporal_angle) * filter_size;
105170
float2 sample_uv = sample_coords.xy + offset;
106171

107-
// check if sample is within shadow map bounds
172+
// check if sample is within shadow map bounds with smooth fade (fade over 10% of texture)
173+
float2 uv_clamped = clamp(sample_uv, 0.0f, 1.0f);
174+
float2 fade_dist = min(sample_uv, 1.0f - sample_uv) * 10.0f;
175+
float fade_factor = saturate(min(fade_dist.x, fade_dist.y));
108176
float is_valid = step(0.0f, sample_uv.x) * step(sample_uv.x, 1.0f) *
109177
step(0.0f, sample_uv.y) * step(sample_uv.y, 1.0f);
110178

111179
// compare depths: 1.0 = lit, 0.0 = shadowed
112-
// treat out-of-bounds samples as lit to avoid hard edges
113-
float depth_sample = light.compare_depth(float3(sample_uv, sample_coords.z), receiver_depth);
114-
shadow_factor += depth_sample + (1.0f - depth_sample) * (1.0f - is_valid);
180+
float depth_sample = light.compare_depth(float3(uv_clamped, sample_coords.z), receiver_depth);
181+
182+
// apply smooth fade at boundaries to avoid hard edges
183+
depth_sample = lerp(1.0f, depth_sample, fade_factor * is_valid + (1.0f - is_valid));
184+
185+
shadow_factor += depth_sample;
186+
valid_sample_count += is_valid;
115187
}
116188

117-
// average samples to get final shadow factor
118-
return shadow_factor * g_shadow_sample_reciprocal;
189+
// normalize by valid samples (better than fixed count for edge cases)
190+
float sample_count = max(valid_sample_count, 1.0f);
191+
return shadow_factor / sample_count;
119192
}
120193

121-
// compute normal offset to reduce shadow acne (peter panning)
122-
// offset surface along normal to avoid self-shadowing artifacts
194+
// compute improved depth bias using slope-scaled bias technique (combines constant and slope-scaled bias)
123195
float3 compute_normal_offset(Surface surface, Light light, uint cascade_index)
124196
{
197+
// get light direction in world space
198+
float3 light_dir = light.is_directional() ? normalize(-light.forward.xyz) :
199+
normalize(surface.position - light.position);
200+
201+
// compute surface slope (steeper slopes need more bias, sin of angle between normal and light)
202+
float n_dot_l = dot(surface.normal, light_dir);
203+
float slope = sqrt(1.0f - n_dot_l * n_dot_l);
204+
125205
// base bias per cascade: larger bias for far cascades (coarser resolution)
126206
float base_bias = (cascade_index == 0) ? 60.0f : 600.0f;
127-
207+
128208
// get texel size in world space units
129-
float texel_size = light.atlas_texel_size[cascade_index].x; // assuming square texels
130-
131-
// offset along surface normal proportional to texel size
132-
float3 normal_offset = surface.normal * base_bias * texel_size;
133-
134-
// clamp to prevent excessive offset that could cause light leaking
135-
normal_offset = clamp(normal_offset, -0.5f * base_bias, 0.5f * base_bias);
136-
209+
float texel_size = light.atlas_texel_size[cascade_index].x;
210+
211+
// slope-scaled bias: steeper slopes get more bias (prevents shadow acne on surfaces nearly perpendicular to light)
212+
float slope_bias = slope * g_slope_bias_scale * texel_size;
213+
214+
// constant bias: minimum offset to prevent self-shadowing
215+
float constant_bias = g_constant_bias_scale * base_bias * texel_size;
216+
217+
// combine biases and apply along surface normal
218+
float total_bias = constant_bias + slope_bias;
219+
float3 normal_offset = surface.normal * total_bias;
220+
221+
// clamp to prevent excessive offset that could cause light leaking (conservative clamp based on texel size)
222+
float max_offset = texel_size * base_bias * 0.5f;
223+
normal_offset = clamp(normal_offset, -max_offset, max_offset);
224+
137225
return normal_offset;
138226
}
139227

140-
// compute shadow factor for a surface point from a light source
141-
// handles point lights (cube maps), directional lights (cascades), and spot lights
228+
// compute shadow factor for a surface point from a light source (handles point, directional, and spot lights)
142229
float compute_shadow(Surface surface, Light light)
143230
{
144231
float3 to_light = light.position - surface.position;
@@ -190,7 +277,7 @@ float compute_shadow(Surface surface, Light light)
190277
float shadow_near = vogel_depth(light, surface, float3(uv_near, near_cascade), ndc_near.z, 4.0f);
191278
shadow = shadow_near;
192279

193-
// directional lights: blend between near and far cascades
280+
// directional lights: blend between near and far cascades with improved transition
194281
if (light.is_directional())
195282
{
196283
const uint far_cascade = 1;
@@ -202,10 +289,21 @@ float compute_shadow(Surface surface, Light light)
202289
float2 uv_far = ndc_to_uv(ndc_far.xy);
203290
float shadow_far = vogel_depth(light, surface, float3(uv_far, far_cascade), ndc_far.z, 4.0f);
204291

205-
// blend cascades based on distance from cascade edge
206-
float edge_dist = max(abs(ndc_near.x), abs(ndc_near.y));
207-
float blend_factor = smoothstep(0.7f, 1.0f, edge_dist);
208-
shadow = lerp(shadow_near, shadow_far, blend_factor);
292+
// improved cascade blending: use distance from center and edge (smoother transitions, reduces popping)
293+
float2 ndc_dist_from_center = abs(ndc_near.xy);
294+
float max_dist = max(ndc_dist_from_center.x, ndc_dist_from_center.y);
295+
296+
// blend based on distance from cascade center (start at 60%, finish at 95% from center)
297+
float blend_start = 0.6f;
298+
float blend_end = 0.95f;
299+
float blend_factor = smoothstep(blend_start, blend_end, max_dist);
300+
301+
// consider depth difference for better quality (prefer near cascade when both valid, reduce blend when depths similar)
302+
float depth_diff = abs(ndc_near.z - ndc_far.z);
303+
float depth_factor = saturate(depth_diff * 10.0f);
304+
blend_factor *= depth_factor;
305+
306+
shadow = lerp(shadow_near, shadow_far, blend_factor);
209307
}
210308

211309
// apply range fade for spot lights

0 commit comments

Comments
 (0)