@@ -23,68 +23,128 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
2323#include "common.hlsl"
2424//====================
2525
26- // shadow mapping constants
27- // note: golden angle (2.399963 radians) maximizes surface coverag
28- static const uint g_shadow_sample_count = 4 ; // number of samples for shadow filtering
29- static const float g_shadow_filter_size = 1.0f ; // base filter size (gets x4 for directional lights)
26+ // shadow mapping constants (golden angle 2.399963 radians maximizes surface coverage)
27+ static const uint g_shadow_sample_count = 4 ; // shadow filtering samples
28+ static const float g_shadow_filter_size = 1.0f ; // base filter size (x4 for directional lights)
3029static const float g_shadow_cascade_blend_threshold = 0.8f ; // cascade transition threshold
31- static const uint g_penumbra_sample_count = 8 ; // samples for penumbra estimation
30+ static const uint g_penumbra_sample_count = 8 ; // penumbra estimation samples
3231static const float g_penumbra_filter_size = 128.0f ; // penumbra search radius
3332static const float g_shadow_sample_reciprocal = 1.0f / (float )g_shadow_sample_count;
33+ static const float g_minimum_penumbra_size = 0.5f ; // minimum penumbra to prevent hard shadows
34+ static const float g_maximum_penumbra_size = 64.0f ; // maximum penumbra size
35+ static const float g_contact_hardening_factor = 0.5f ; // contact hardening strength (0=soft, 1=hard)
36+ static const float g_slope_bias_scale = 2.0f ; // slope-scaled bias multiplier
37+ static const float g_constant_bias_scale = 0.0001f ; // constant bias scale
38+
39+ // pre-computed vogel disk samples (golden angle spiral, rotated at runtime for temporal jitter)
40+ static const float2 g_vogel_samples_shadow[g_shadow_sample_count] =
41+ {
42+ float2 (0.353553f , 0.000000f ), // sample 0
43+ float2 (-0.451544f , 0.413652f ), // sample 1
44+ float2 (0.069116f , -0.787542f ), // sample 2
45+ float2 (0.569143f , 0.742345f ) // sample 3
46+ };
3447
35- // generate vogel disk sample for poisson disk sampling
36- // uses golden angle spiral for optimal distribution
37- float2 vogel_disk_sample (uint sample_index, uint sample_count, float angle)
48+ static const float2 g_vogel_samples_penumbra[g_penumbra_sample_count] =
49+ {
50+ float2 (0.250000f , 0.000000f ), // sample 0
51+ float2 (-0.319290f , 0.292496f ), // sample 1
52+ float2 (0.048872f , -0.556877f ), // sample 2
53+ float2 (0.402445f , 0.524917f ), // sample 3
54+ float2 (-0.738535f , -0.130636f ), // sample 4
55+ float2 (0.699604f , -0.445032f ), // sample 5
56+ float2 (-0.234003f , 0.870484f ), // sample 6
57+ float2 (-0.446273f , -0.859268f ) // sample 7
58+ };
59+
60+ // rotate 2d vector by angle (radians) using rotation matrix
61+ float2 rotate_2d (float2 v, float angle)
3862{
39- const float golden_angle = 2.399963f ; // radians (137.508 degrees)
40- float radius = sqrt (sample_index + 0.5f ) / sqrt (sample_count);
41- float theta = sample_index * golden_angle + angle;
4263 float sine, cosine;
43- sincos (theta, sine, cosine);
64+ sincos (angle, sine, cosine);
65+ return float2 (
66+ v.x * cosine - v.y * sine,
67+ v.x * sine + v.y * cosine
68+ );
69+ }
70+
71+ // get vogel disk sample from pre-computed lookup table with rotation
72+ float2 vogel_disk_sample (uint sample_index, uint sample_count, float angle)
73+ {
74+ float2 sample ;
75+
76+ // lookup pre-computed sample based on sample count
77+ if (sample_count == g_shadow_sample_count)
78+ {
79+ sample = g_vogel_samples_shadow[sample_index];
80+ }
81+ else if (sample_count == g_penumbra_sample_count)
82+ {
83+ sample = g_vogel_samples_penumbra[sample_index];
84+ }
85+ else
86+ {
87+ // fallback for other sample counts (shouldn't happen in practice)
88+ const float golden_angle = 2.399963f ;
89+ float radius = sqrt (sample_index + 0.5f ) / sqrt (sample_count);
90+ float theta = sample_index * golden_angle + angle;
91+ float sine, cosine;
92+ sincos (theta, sine, cosine);
93+ return float2 (cosine, sine) * radius;
94+ }
4495
45- return float2 (cosine, sine) * radius;
96+ // apply rotation for temporal jitter
97+ return rotate_2d (sample , angle);
4698}
4799
48- // estimate penumbra size using blocker search (percentage closer soft shadows)
49- // larger depth difference between receiver and blocker = larger penumbra
50- float compute_penumbra (Light light, float rotation_angle, float3 sample_coords, float receiver_depth)
100+ // estimate penumbra size using pcss (percentage closer soft shadows) with improved blocker search
101+ float compute_penumbra (Light light, float rotation_angle, float3 sample_coords, float receiver_depth, float light_distance)
51102{
52- float penumbra = 1.0f ;
103+ float penumbra = g_minimum_penumbra_size ;
53104 float blocker_depth_sum = 0.0f ;
54105 uint blocker_count = 0 ;
106+ float search_radius = g_penumbra_filter_size * light.atlas_texel_size[0 ].x;
55107
56- // search for blockers in neighborhood
108+ // search for blockers in neighborhood with adaptive search radius
57109 for (uint i = 0 ; i < g_penumbra_sample_count; i++)
58110 {
59- float2 offset = vogel_disk_sample (i, g_penumbra_sample_count, rotation_angle) * light.atlas_texel_size[ 0 ] * g_penumbra_filter_size ;
111+ float2 offset = vogel_disk_sample (i, g_penumbra_sample_count, rotation_angle) * search_radius ;
60112 float depth = light.sample_depth (sample_coords + float3 (offset, 0.0f ));
61113
62- // accumulate blockers (depth > receiver means blocker is closer to light)
63- if (depth > receiver_depth)
114+ // accumulate blockers (depth > receiver means blocker is closer to light, threshold avoids noise )
115+ if (depth > receiver_depth + 0.0001f )
64116 {
65117 blocker_depth_sum += depth;
66118 blocker_count++;
67119 }
68120 }
69121
70- // compute penumbra size based on average blocker depth
122+ // compute penumbra size based on average blocker depth (pcss formula)
71123 if (blocker_count > 0 )
72124 {
73125 float blocker_depth_avg = blocker_depth_sum / float (blocker_count);
74126
75- // depth difference determines penumbra size (larger gap = softer shadow )
76- float depth_difference = abs ( receiver_depth - blocker_depth_avg) ;
127+ // pcss formula: penumbra = (receiver_depth - blocker_depth) * light_size / blocker_depth (accounts for light distance )
128+ float depth_difference = receiver_depth - blocker_depth_avg;
77129
78- // normalize by blocker depth to get relative penumbra size
79- penumbra = depth_difference / (blocker_depth_avg + FLT_MIN);
130+ // avoid division by zero and negative penumbra
131+ if (depth_difference > 0.0f && blocker_depth_avg > 0.0f )
132+ {
133+ // scale by light distance for realistic penumbra (closer blockers create larger penumbra)
134+ float light_size_factor = saturate (light_distance * 0.1f );
135+ penumbra = (depth_difference / blocker_depth_avg) * light_size_factor * search_radius;
136+
137+ // apply contact hardening: reduce penumbra near contact points (shadows harder when blocker close to receiver)
138+ float contact_factor = saturate (depth_difference * 1000.0f );
139+ penumbra = lerp (penumbra * g_contact_hardening_factor, penumbra, contact_factor);
140+ }
80141 }
81142
82- // scale and clamp penumbra to reasonable range
83- return clamp (penumbra * 16.0f , 1.0f , 1024.0f );
143+ // clamp penumbra to reasonable range
144+ return clamp (penumbra, g_minimum_penumbra_size, g_maximum_penumbra_size );
84145}
85146
86- // compute shadow factor using vogel disk sampling with adaptive penumbra
87- // uses percentage closer filtering with temporal jitter for noise reduction
147+ // compute shadow factor using vogel disk sampling with adaptive penumbra and improved edge handling
88148float vogel_depth (Light light, Surface surface, float3 sample_coords, float receiver_depth, float filter_size_multiplier = 1.0f )
89149{
90150 float shadow_factor = 0.0f ;
@@ -93,52 +153,79 @@ float vogel_depth(Light light, Surface surface, float3 sample_coords, float rece
93153 float temporal_offset = noise_interleaved_gradient (surface.pos);
94154 float temporal_angle = temporal_offset * PI2;
95155
156+ // compute light distance for penumbra calculation
157+ float light_distance = light.is_directional () ? 1000.0f : length (surface.position - light.position);
158+
96159 // estimate penumbra size for adaptive filtering
97- float penumbra = compute_penumbra (light, temporal_angle, sample_coords, receiver_depth);
160+ float penumbra = compute_penumbra (light, temporal_angle, sample_coords, receiver_depth, light_distance );
98161
99162 // sample shadow map using vogel disk pattern
163+ float valid_sample_count = 0.0f ;
164+
100165 for (uint i = 0 ; i < g_shadow_sample_count; i++)
101166 {
102167 // compute filter size with penumbra adaptation
103168 float2 filter_size = light.atlas_texel_size[0 ] * g_shadow_filter_size * filter_size_multiplier * penumbra;
104169 float2 offset = vogel_disk_sample (i, g_shadow_sample_count, temporal_angle) * filter_size;
105170 float2 sample_uv = sample_coords.xy + offset;
106171
107- // check if sample is within shadow map bounds
172+ // check if sample is within shadow map bounds with smooth fade (fade over 10% of texture)
173+ float2 uv_clamped = clamp (sample_uv, 0.0f , 1.0f );
174+ float2 fade_dist = min (sample_uv, 1.0f - sample_uv) * 10.0f ;
175+ float fade_factor = saturate (min (fade_dist.x, fade_dist.y));
108176 float is_valid = step (0.0f , sample_uv.x) * step (sample_uv.x, 1.0f ) *
109177 step (0.0f , sample_uv.y) * step (sample_uv.y, 1.0f );
110178
111179 // compare depths: 1.0 = lit, 0.0 = shadowed
112- // treat out-of-bounds samples as lit to avoid hard edges
113- float depth_sample = light.compare_depth (float3 (sample_uv, sample_coords.z), receiver_depth);
114- shadow_factor += depth_sample + (1.0f - depth_sample) * (1.0f - is_valid);
180+ float depth_sample = light.compare_depth (float3 (uv_clamped, sample_coords.z), receiver_depth);
181+
182+ // apply smooth fade at boundaries to avoid hard edges
183+ depth_sample = lerp (1.0f , depth_sample, fade_factor * is_valid + (1.0f - is_valid));
184+
185+ shadow_factor += depth_sample;
186+ valid_sample_count += is_valid;
115187 }
116188
117- // average samples to get final shadow factor
118- return shadow_factor * g_shadow_sample_reciprocal;
189+ // normalize by valid samples (better than fixed count for edge cases)
190+ float sample_count = max (valid_sample_count, 1.0f );
191+ return shadow_factor / sample_count;
119192}
120193
121- // compute normal offset to reduce shadow acne (peter panning)
122- // offset surface along normal to avoid self-shadowing artifacts
194+ // compute improved depth bias using slope-scaled bias technique (combines constant and slope-scaled bias)
123195float3 compute_normal_offset (Surface surface, Light light, uint cascade_index)
124196{
197+ // get light direction in world space
198+ float3 light_dir = light.is_directional () ? normalize (-light.forward.xyz) :
199+ normalize (surface.position - light.position);
200+
201+ // compute surface slope (steeper slopes need more bias, sin of angle between normal and light)
202+ float n_dot_l = dot (surface.normal, light_dir);
203+ float slope = sqrt (1.0f - n_dot_l * n_dot_l);
204+
125205 // base bias per cascade: larger bias for far cascades (coarser resolution)
126206 float base_bias = (cascade_index == 0 ) ? 60.0f : 600.0f ;
127-
207+
128208 // get texel size in world space units
129- float texel_size = light.atlas_texel_size[cascade_index].x; // assuming square texels
130-
131- // offset along surface normal proportional to texel size
132- float3 normal_offset = surface.normal * base_bias * texel_size;
133-
134- // clamp to prevent excessive offset that could cause light leaking
135- normal_offset = clamp (normal_offset, -0.5f * base_bias, 0.5f * base_bias);
136-
209+ float texel_size = light.atlas_texel_size[cascade_index].x;
210+
211+ // slope-scaled bias: steeper slopes get more bias (prevents shadow acne on surfaces nearly perpendicular to light)
212+ float slope_bias = slope * g_slope_bias_scale * texel_size;
213+
214+ // constant bias: minimum offset to prevent self-shadowing
215+ float constant_bias = g_constant_bias_scale * base_bias * texel_size;
216+
217+ // combine biases and apply along surface normal
218+ float total_bias = constant_bias + slope_bias;
219+ float3 normal_offset = surface.normal * total_bias;
220+
221+ // clamp to prevent excessive offset that could cause light leaking (conservative clamp based on texel size)
222+ float max_offset = texel_size * base_bias * 0.5f ;
223+ normal_offset = clamp (normal_offset, -max_offset, max_offset);
224+
137225 return normal_offset;
138226}
139227
140- // compute shadow factor for a surface point from a light source
141- // handles point lights (cube maps), directional lights (cascades), and spot lights
228+ // compute shadow factor for a surface point from a light source (handles point, directional, and spot lights)
142229float compute_shadow (Surface surface, Light light)
143230{
144231 float3 to_light = light.position - surface.position;
@@ -190,7 +277,7 @@ float compute_shadow(Surface surface, Light light)
190277 float shadow_near = vogel_depth (light, surface, float3 (uv_near, near_cascade), ndc_near.z, 4.0f );
191278 shadow = shadow_near;
192279
193- // directional lights: blend between near and far cascades
280+ // directional lights: blend between near and far cascades with improved transition
194281 if (light.is_directional ())
195282 {
196283 const uint far_cascade = 1 ;
@@ -202,10 +289,21 @@ float compute_shadow(Surface surface, Light light)
202289 float2 uv_far = ndc_to_uv (ndc_far.xy);
203290 float shadow_far = vogel_depth (light, surface, float3 (uv_far, far_cascade), ndc_far.z, 4.0f );
204291
205- // blend cascades based on distance from cascade edge
206- float edge_dist = max (abs (ndc_near.x), abs (ndc_near.y));
207- float blend_factor = smoothstep (0.7f , 1.0f , edge_dist);
208- shadow = lerp (shadow_near, shadow_far, blend_factor );
292+ // improved cascade blending: use distance from center and edge (smoother transitions, reduces popping)
293+ float2 ndc_dist_from_center = abs (ndc_near.xy);
294+ float max_dist = max (ndc_dist_from_center.x, ndc_dist_from_center.y);
295+
296+ // blend based on distance from cascade center (start at 60%, finish at 95% from center)
297+ float blend_start = 0.6f ;
298+ float blend_end = 0.95f ;
299+ float blend_factor = smoothstep (blend_start, blend_end, max_dist);
300+
301+ // consider depth difference for better quality (prefer near cascade when both valid, reduce blend when depths similar)
302+ float depth_diff = abs (ndc_near.z - ndc_far.z);
303+ float depth_factor = saturate (depth_diff * 10.0f );
304+ blend_factor *= depth_factor;
305+
306+ shadow = lerp (shadow_near, shadow_far, blend_factor );
209307 }
210308
211309 // apply range fade for spot lights
0 commit comments