From a9e372607f4c136f28450c9a11a51668e73c82ee Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Thu, 12 Mar 2026 19:06:22 -0500 Subject: [PATCH 1/5] refactor: replace cross-product ray-triangle test with Moller-Trumbore barycentric algorithm The previous f_intersects_triangle used three cross products and sign checks against the triangle normal, which is winding-order dependent. If vertices are loaded in the wrong order, the sign flips and intersections are missed. Replace with the Moller-Trumbore algorithm, which computes barycentric coordinates (u, v) directly from the vertices. This is vertex winding-order independent, uses two cross products instead of three, and no longer depends on triangle%n. Also remove the now-dead tri%n copy in f_model_is_inside_flat. Co-Authored-By: Claude Opus 4.6 --- src/common/m_model.fpp | 59 ++++++++++++++++-------------------------- 1 file changed, 23 insertions(+), 36 deletions(-) diff --git a/src/common/m_model.fpp b/src/common/m_model.fpp index 642d3b9522..8265375979 100644 --- a/src/common/m_model.fpp +++ b/src/common/m_model.fpp @@ -619,7 +619,6 @@ contains nHits = 0 do q = 1, ntrs tri%v(:, :) = gpu_trs_v(:, :, q, pid) - tri%n(1:3) = gpu_trs_n(1:3, q, pid) nHits = nHits + f_intersects_triangle(ray, tri) end do ! if the ray intersected an odd number of times, we must be inside @@ -638,11 +637,13 @@ contains end function f_model_is_inside_flat - ! From https://www.scratchapixel.com/lessons/3e-basic-rendering/ray-tracing-rendering-a-triangle/ray-triangle-intersection-geometric-solution.html - !> This procedure checks if a ray intersects a triangle. + !> This procedure checks if a ray intersects a triangle using the + !! Moller-Trumbore algorithm (barycentric coordinates). Unlike the + !! previous cross-product sign test, this is vertex winding-order + !! independent. !! @param ray Ray. !! @param triangle Triangle. - !! @return True if the ray intersects the triangle, false otherwise. + !! @return 1 if the ray intersects the triangle, 0 otherwise. function f_intersects_triangle(ray, triangle) result(intersects) $:GPU_ROUTINE(parallelism='[seq]') @@ -652,46 +653,32 @@ contains integer :: intersects - real(wp) :: N(3), P(3), C(3), edge(3), vp(3) - real(wp) :: d, t, NdotRayDirection + real(wp) :: edge1(3), edge2(3), h(3), s(3), q(3) + real(wp) :: a, f, u, v, t intersects = 0 - N(1:3) = triangle%n(1:3) - NdotRayDirection = dot_product(N(1:3), ray%d(1:3)) - if (abs(NdotRayDirection) < 0.0000001_wp) then - return - end if + edge1 = triangle%v(2, :) - triangle%v(1, :) + edge2 = triangle%v(3, :) - triangle%v(1, :) + h = f_cross(ray%d, edge2) + a = dot_product(edge1, h) - d = -sum(N(:)*triangle%v(1, :)) - t = -(sum(N(:)*ray%o(:)) + d)/NdotRayDirection - if (t < 0) then - return - end if + if (abs(a) < 1e-7_wp) return - P = ray%o + t*ray%d - edge = triangle%v(2, :) - triangle%v(1, :) - vp = P - triangle%v(1, :) - C = f_cross(edge, vp) - if (sum(N(:)*C(:)) < 0) then - return - end if + f = 1.0_wp/a + s = ray%o - triangle%v(1, :) + u = f*dot_product(s, h) - edge = triangle%v(3, :) - triangle%v(2, :) - vp = P - triangle%v(2, :) - C = f_cross(edge, vp) - if (sum(N(:)*C(:)) < 0) then - return - end if + if (u < 0.0_wp .or. u > 1.0_wp) return - edge = triangle%v(1, :) - triangle%v(3, :) - vp = P - triangle%v(3, :) - C = f_cross(edge, vp) - if (sum(N(:)*C(:)) < 0) then - return - end if + q = f_cross(s, edge1) + v = f*dot_product(ray%d, q) + + if (v < 0.0_wp .or. u + v > 1.0_wp) return + + t = f*dot_product(edge2, q) - intersects = 1 + if (t > 0.0_wp) intersects = 1 end function f_intersects_triangle From a1d3d9e8ea2b5a90657e241bfdcd9d5931e77f78 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Thu, 12 Mar 2026 19:27:56 -0500 Subject: [PATCH 2/5] refactor: replace ray casting with generalized winding number for inside/outside test Replace the 26-ray casting approach in f_model_is_inside_flat with the generalized winding number (Jacobson et al., SIGGRAPH 2013). The winding number sums the solid angle subtended by each triangle at the query point using the Van Oosterom-Strackee formula. This is robust to small triangles: a tiny triangle contributes a proportionally small solid angle rather than being missed entirely by rays. It is also winding-order independent and branch-free in the inner loop, which is GPU-friendly. Co-Authored-By: Claude Opus 4.6 --- src/common/m_model.fpp | 82 ++++++++++++++++++++++-------------------- 1 file changed, 44 insertions(+), 38 deletions(-) diff --git a/src/common/m_model.fpp b/src/common/m_model.fpp index 8265375979..e6849cb402 100644 --- a/src/common/m_model.fpp +++ b/src/common/m_model.fpp @@ -583,11 +583,16 @@ contains end function f_model_is_inside - !> This procedure, given a cell center will determine if a point exists instide a surface - !! @param ntrs Number of triangles in the model - !! @param pid Patch ID od this model + !> This procedure determines if a point is inside a surface using + !! the generalized winding number (Jacobson et al., SIGGRAPH 2013). + !! The winding number is the sum of signed solid angles subtended + !! by each triangle, normalized by 4*pi. Returns ~1.0 inside, + !! ~0.0 outside. Unlike ray casting, this is robust to small + !! triangles and vertex winding order. + !! @param ntrs Number of triangles in the model. + !! @param pid Patch ID of this model. !! @param point Point to test. - !! @return fraction The perfentage of candidate rays cast indicate that we are inside the model + !! @return fraction Winding number (~1.0 inside, ~0.0 outside). function f_model_is_inside_flat(ntrs, pid, point) result(fraction) $:GPU_ROUTINE(parallelism='[seq]') @@ -597,43 +602,44 @@ contains real(wp), dimension(1:3), intent(in) :: point real(wp) :: fraction - type(t_ray) :: ray - type(t_triangle) :: tri - integer :: i, j, k, q, nInOrOut, nHits - ! cast 26 rays from the point and count the number at leave the boundary - nInOrOut = 0 - do i = -1, 1 - do j = -1, 1 - do k = -1, 1 - if (i /= 0 .or. j /= 0 .or. k /= 0) then - ! We cannot get inersections if the ray is exactly in line with triangle plane - if (p == 0 .and. k == 0) cycle - - ! generate the ray - ray%o = point - ray%d(:) = [real(i, wp), real(j, wp), real(k, wp)] - ray%d = ray%d/sqrt(real(abs(i) + abs(j) + abs(k), wp)) - - ! count the number of intersections - nHits = 0 - do q = 1, ntrs - tri%v(:, :) = gpu_trs_v(:, :, q, pid) - nHits = nHits + f_intersects_triangle(ray, tri) - end do - ! if the ray intersected an odd number of times, we must be inside - nInOrOut = nInOrOut + mod(nHits, 2) - end if - end do - end do + real(wp) :: r1(3), r2(3), r3(3) + real(wp) :: r1_mag, r2_mag, r3_mag + real(wp) :: numerator, denominator + integer :: q + + fraction = 0.0_wp + + do q = 1, ntrs + r1 = gpu_trs_v(1, :, q, pid) - point + r2 = gpu_trs_v(2, :, q, pid) - point + r3 = gpu_trs_v(3, :, q, pid) - point + + r1_mag = sqrt(dot_product(r1, r1)) + r2_mag = sqrt(dot_product(r2, r2)) + r3_mag = sqrt(dot_product(r3, r3)) + + ! Van Oosterom-Strackee formula: + ! tan(Omega/2) = numerator / denominator + ! numerator = scalar triple product r1 . (r2 x r3) + numerator = r1(1)*(r2(2)*r3(3) - r2(3)*r3(2)) & + + r1(2)*(r2(3)*r3(1) - r2(1)*r3(3)) & + + r1(3)*(r2(1)*r3(2) - r2(2)*r3(1)) + + denominator = r1_mag*r2_mag*r3_mag & + + dot_product(r1, r2)*r3_mag & + + dot_product(r2, r3)*r1_mag & + + dot_product(r3, r1)*r2_mag + + ! Solid angle = 2 * atan2(num, den). + ! atan2(0,0) = 0 per IEEE 754, so degenerate triangles + ! contribute nothing without special casing. + fraction = fraction + atan2(numerator, denominator) end do - if (p == 0) then - ! in 2D, we skipped 8 rays - fraction = real(nInOrOut)/18._wp - else - fraction = real(nInOrOut)/26._wp - end if + ! Winding number = total solid angle / (4 * pi) + ! Each triangle contributes 2*atan2, so sum / (2*pi) + fraction = fraction/(2.0_wp*acos(-1.0_wp)) end function f_model_is_inside_flat From 28d8d500ffa8a0249db6ba61bb3023c6a3d55356 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Thu, 12 Mar 2026 19:39:08 -0500 Subject: [PATCH 3/5] fix: add 2D winding number branch for p==0 simulations The 3D solid angle (Van Oosterom-Strackee) degenerates when all triangles are coplanar (z=0), producing near-zero winding numbers everywhere. For 2D (p==0), use the 2D winding number instead: sum the signed angle subtended by each boundary edge via atan2(cross,dot). This uses the existing gpu_boundary_v and gpu_boundary_edge_count arrays that are already computed and uploaded for 2D STL models. Co-Authored-By: Claude Opus 4.6 --- src/common/m_model.fpp | 85 ++++++++++++++++++++++++++---------------- 1 file changed, 53 insertions(+), 32 deletions(-) diff --git a/src/common/m_model.fpp b/src/common/m_model.fpp index e6849cb402..e84ae93f02 100644 --- a/src/common/m_model.fpp +++ b/src/common/m_model.fpp @@ -585,10 +585,11 @@ contains !> This procedure determines if a point is inside a surface using !! the generalized winding number (Jacobson et al., SIGGRAPH 2013). - !! The winding number is the sum of signed solid angles subtended - !! by each triangle, normalized by 4*pi. Returns ~1.0 inside, + !! In 3D, sums the solid angle subtended by each triangle (Van + !! Oosterom-Strackee formula). In 2D (p==0), sums the signed + !! angle subtended by each boundary edge. Returns ~1.0 inside, !! ~0.0 outside. Unlike ray casting, this is robust to small - !! triangles and vertex winding order. + !! triangles/edges and vertex winding order. !! @param ntrs Number of triangles in the model. !! @param pid Patch ID of this model. !! @param point Point to test. @@ -606,40 +607,60 @@ contains real(wp) :: r1(3), r2(3), r3(3) real(wp) :: r1_mag, r2_mag, r3_mag real(wp) :: numerator, denominator + real(wp) :: d1(2), d2(2) integer :: q fraction = 0.0_wp - do q = 1, ntrs - r1 = gpu_trs_v(1, :, q, pid) - point - r2 = gpu_trs_v(2, :, q, pid) - point - r3 = gpu_trs_v(3, :, q, pid) - point - - r1_mag = sqrt(dot_product(r1, r1)) - r2_mag = sqrt(dot_product(r2, r2)) - r3_mag = sqrt(dot_product(r3, r3)) - - ! Van Oosterom-Strackee formula: - ! tan(Omega/2) = numerator / denominator - ! numerator = scalar triple product r1 . (r2 x r3) - numerator = r1(1)*(r2(2)*r3(3) - r2(3)*r3(2)) & - + r1(2)*(r2(3)*r3(1) - r2(1)*r3(3)) & - + r1(3)*(r2(1)*r3(2) - r2(2)*r3(1)) - - denominator = r1_mag*r2_mag*r3_mag & - + dot_product(r1, r2)*r3_mag & - + dot_product(r2, r3)*r1_mag & - + dot_product(r3, r1)*r2_mag - - ! Solid angle = 2 * atan2(num, den). - ! atan2(0,0) = 0 per IEEE 754, so degenerate triangles - ! contribute nothing without special casing. - fraction = fraction + atan2(numerator, denominator) - end do + if (p == 0) then + ! 2D winding number: sum signed angles subtended by + ! each boundary edge at the query point. + do q = 1, gpu_boundary_edge_count(pid) + d1(1) = gpu_boundary_v(q, 1, 1, pid) - point(1) + d1(2) = gpu_boundary_v(q, 1, 2, pid) - point(2) + d2(1) = gpu_boundary_v(q, 2, 1, pid) - point(1) + d2(2) = gpu_boundary_v(q, 2, 2, pid) - point(2) + + ! Signed angle = atan2(d1 x d2, d1 . d2) + fraction = fraction + atan2( & + d1(1)*d2(2) - d1(2)*d2(1), & + d1(1)*d2(1) + d1(2)*d2(2)) + end do + + ! 2D winding number = total angle / (2*pi) + fraction = fraction/(2.0_wp*acos(-1.0_wp)) + else + ! 3D winding number: sum solid angles via Van + ! Oosterom-Strackee formula. + do q = 1, ntrs + r1 = gpu_trs_v(1, :, q, pid) - point + r2 = gpu_trs_v(2, :, q, pid) - point + r3 = gpu_trs_v(3, :, q, pid) - point + + r1_mag = sqrt(dot_product(r1, r1)) + r2_mag = sqrt(dot_product(r2, r2)) + r3_mag = sqrt(dot_product(r3, r3)) + + ! tan(Omega/2) = numerator / denominator + ! numerator = scalar triple product r1 . (r2 x r3) + numerator = r1(1)*(r2(2)*r3(3) - r2(3)*r3(2)) & + + r1(2)*(r2(3)*r3(1) - r2(1)*r3(3)) & + + r1(3)*(r2(1)*r3(2) - r2(2)*r3(1)) + + denominator = r1_mag*r2_mag*r3_mag & + + dot_product(r1, r2)*r3_mag & + + dot_product(r2, r3)*r1_mag & + + dot_product(r3, r1)*r2_mag + + ! atan2(0,0) = 0 per IEEE 754, so degenerate + ! triangles contribute nothing. + fraction = fraction + atan2(numerator, denominator) + end do - ! Winding number = total solid angle / (4 * pi) - ! Each triangle contributes 2*atan2, so sum / (2*pi) - fraction = fraction/(2.0_wp*acos(-1.0_wp)) + ! Winding number = total solid angle / (4*pi) + ! Each triangle contributes 2*atan2, so sum / (2*pi) + fraction = fraction/(2.0_wp*acos(-1.0_wp)) + end if end function f_model_is_inside_flat From a819dd9fab4a61e11687094ac72f8507c67ff508 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Thu, 12 Mar 2026 19:48:47 -0500 Subject: [PATCH 4/5] fix: guard degenerate triangles explicitly and fix lint Add explicit tiny() guard for degenerate triangles (zero-area or query point on vertex) instead of relying on atan2(0,0) behavior, which is compiler-dependent. Add comment explaining the Moller-Trumbore epsilon threshold. Co-Authored-By: Claude Opus 4.6 --- src/common/m_model.fpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/common/m_model.fpp b/src/common/m_model.fpp index e84ae93f02..655cddadad 100644 --- a/src/common/m_model.fpp +++ b/src/common/m_model.fpp @@ -641,6 +641,10 @@ contains r2_mag = sqrt(dot_product(r2, r2)) r3_mag = sqrt(dot_product(r3, r3)) + ! Skip degenerate triangles (zero-area or query + ! point coincident with a vertex). + if (r1_mag*r2_mag*r3_mag < tiny(1.0_wp)) cycle + ! tan(Omega/2) = numerator / denominator ! numerator = scalar triple product r1 . (r2 x r3) numerator = r1(1)*(r2(2)*r3(3) - r2(3)*r3(2)) & @@ -652,8 +656,6 @@ contains + dot_product(r2, r3)*r1_mag & + dot_product(r3, r1)*r2_mag - ! atan2(0,0) = 0 per IEEE 754, so degenerate - ! triangles contribute nothing. fraction = fraction + atan2(numerator, denominator) end do @@ -690,6 +692,8 @@ contains h = f_cross(ray%d, edge2) a = dot_product(edge1, h) + ! Ray nearly parallel to triangle plane; threshold matches + ! the original implementation and is safe for wp precision. if (abs(a) < 1e-7_wp) return f = 1.0_wp/a From 58273b10ed6c8fb586b5dfa42de799cdcec0c1c3 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Thu, 12 Mar 2026 19:57:16 -0500 Subject: [PATCH 5/5] fix: precision-safe epsilon, accurate comments per review - Use max(1e-7, 10*epsilon) for Moller-Trumbore parallel-ray threshold so single-precision builds get a sensible floor above machine epsilon. - Fix comment on degenerate guard: only fires for vertex-coincident query points (subnormal/zero magnitudes), not general degeneracy. - Fix winding number normalization comment: each atan2 returns Omega/2, so dividing by 2*pi gives sum(Omega)/(4*pi). Co-Authored-By: Claude Opus 4.6 --- src/common/m_model.fpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/common/m_model.fpp b/src/common/m_model.fpp index 655cddadad..f655130662 100644 --- a/src/common/m_model.fpp +++ b/src/common/m_model.fpp @@ -641,8 +641,8 @@ contains r2_mag = sqrt(dot_product(r2, r2)) r3_mag = sqrt(dot_product(r3, r3)) - ! Skip degenerate triangles (zero-area or query - ! point coincident with a vertex). + ! Skip if query point is coincident with a vertex + ! (magnitudes are zero/subnormal). if (r1_mag*r2_mag*r3_mag < tiny(1.0_wp)) cycle ! tan(Omega/2) = numerator / denominator @@ -659,8 +659,8 @@ contains fraction = fraction + atan2(numerator, denominator) end do - ! Winding number = total solid angle / (4*pi) - ! Each triangle contributes 2*atan2, so sum / (2*pi) + ! Each atan2 returns Omega/2 per triangle; divide + ! by 2*pi to get winding number = sum(Omega)/(4*pi). fraction = fraction/(2.0_wp*acos(-1.0_wp)) end if @@ -692,9 +692,9 @@ contains h = f_cross(ray%d, edge2) a = dot_product(edge1, h) - ! Ray nearly parallel to triangle plane; threshold matches - ! the original implementation and is safe for wp precision. - if (abs(a) < 1e-7_wp) return + ! Ray nearly parallel to triangle plane. In single precision + ! builds epsilon(1.0) ~ 1.2e-7, so use 10*epsilon as a floor. + if (abs(a) < max(1e-7_wp, 10.0_wp*epsilon(1.0_wp))) return f = 1.0_wp/a s = ray%o - triangle%v(1, :)