Improve mipmapping accuracy (#12)

Lilaa3 · web-flow · commit 21436dec1116 · 2025-11-28T18:48:20.000Z
* change texture shader location

* quantize lod

* first iteration of the parallel rdp code (suprisingly less branch-happy then rt64)

* fix naming

* first round (working)

* clamp const bool

* bye bye more branches

* mipBase

* pass in more arguments

* we only need this later

* one less branch

* move comment

* fix default tex size

* Update textures.glsl

* minLod

* update from main
diff --git a/common.py b/common.py
@@ -69,6 +69,7 @@ def get_scene_render_state(scene: bpy.types.Scene):
         convert=quantize_tuple(f64render_rs.default_convert, 9.0, -1.0, 1.0),
         cc=SOLID_CC,
         tex_confs=([get_tile_conf(getattr(f64render_rs, f"default_tex{i}")) for i in range(0, 8)]),
+        tex_size=(32, 32),
     )
     state.lights[0] = F64Light(
         quantize_srgb(fast64_rs.light0Color, force_alpha=True), quantize_direction(fast64_rs.light0Direction)
diff --git a/material/parser.py b/material/parser.py
@@ -394,7 +394,7 @@ def f64_material_parse(f3d_mat: "F3DMaterialProperty", always_set: bool, set_lig
         state.cc = get_cc_settings(f3d_mat)
     if always_set or (f3d_mat.set_prim and cc_uses["Primitive"]):
         state.prim_color = quantize_srgb(f3d_mat.prim_color)
-        state.prim_lod = (f3d_mat.prim_lod_frac, f3d_mat.prim_lod_min)
+        state.prim_lod = quantize_tuple((f3d_mat.prim_lod_frac, f3d_mat.prim_lod_min), 8)
     if always_set or (f3d_mat.set_env and cc_uses["Environment"]):
         state.env_color = quantize_srgb(f3d_mat.env_color)
     if always_set or (f3d_mat.set_key and cc_uses["Key"]):  # extra 0 for alignment
diff --git a/renderer.py b/renderer.py
@@ -1,3 +1,4 @@
+from io import StringIO
 import math
 import pathlib
 import time
@@ -97,24 +98,24 @@ def init_shader(self, scene: bpy.types.Scene):
         print("Compiling shader")
 
         shaderPath = (pathlib.Path(__file__).parent / "shader").resolve()
-        shaderVert = ""
-        shaderFrag = ""
-
-        with open(shaderPath / "utils.glsl", "r", encoding="utf-8") as f:
-            shaderUtils = f.read()
-            shaderVert += shaderUtils
-            shaderFrag += shaderUtils
-
-        with open(shaderPath / "defines.glsl", "r", encoding="utf-8") as f:
-            shaderDef = f.read()
-            shaderVert += shaderDef
-            shaderFrag += shaderDef
-
-        with open(shaderPath / "main3d.vert.glsl", "r", encoding="utf-8") as f:
-            shaderVert += f.read()
+        shaderVert = StringIO()
+        shaderFrag = StringIO()
+
+        general_shaders = ("utils.glsl", "defines.glsl")
+        vertex_shaders = ("main3d.vert.glsl",)
+        frag_shaders = (
+            "textures.glsl",
+            "main3d.frag.glsl",
+        )
 
-        with open(shaderPath / "main3d.frag.glsl", "r", encoding="utf-8") as f:
-            shaderFrag += f.read()
+        for shader in general_shaders + vertex_shaders:
+            with open(shaderPath / shader, "r", encoding="utf-8") as f:
+                shaderVert.write(f.read())
+                shaderVert.write("\n")
+        for shader in general_shaders + frag_shaders:
+            with open(shaderPath / shader, "r", encoding="utf-8") as f:
+                shaderFrag.write(f.read())
+                shaderFrag.write("\n")
 
         shader_info = gpu.types.GPUShaderCreateInfo()
 
@@ -158,8 +159,8 @@ def init_shader(self, scene: bpy.types.Scene):
         else:
             shader_info.fragment_out(0, "VEC4", "FragColor")
 
-        shader_info.vertex_source(shaderVert)
-        shader_info.fragment_source(shaderFrag)
+        shader_info.vertex_source(shaderVert.getvalue())
+        shader_info.fragment_source(shaderFrag.getvalue())
 
         self.shader = gpu.shader.create_from_info(shader_info)
         self.shader_fallback = gpu.shader.from_builtin(
diff --git a/shader/main3d.frag.glsl b/shader/main3d.frag.glsl
@@ -3,129 +3,9 @@
   #extension GL_ARB_fragment_shader_interlock : enable
   layout(pixel_interlock_unordered) in;
 #endif
-#ifdef GL_ARB_derivative_control
-  #extension GL_ARB_derivative_control : enable
-#endif
 
 #define DECAL_DEPTH_DELTA 100
 
-vec4 quantize3Bit(in vec4 color) {
-  return vec4(round(color.rgb * 8.0) / 8.0, step(0.5, color.a));
-}
-
-vec4 quantize4Bit(in vec4 color) {
-  return round(color * 16.0) / 16.0; // (16 seems more accurate than 15)
-}
-
-vec4 quantizeTexture(uint flags, vec4 color) {
-  vec4 colorQuant = flagSelect(flags, TEX_FLAG_4BIT, color, quantize4Bit(color));
-  colorQuant = flagSelect(flags, TEX_FLAG_3BIT, colorQuant, quantize3Bit(colorQuant));
-  colorQuant.rgb = linearToGamma(colorQuant.rgb);
-  return flagSelect(flags, TEX_FLAG_MONO, colorQuant.rgba, colorQuant.rrrr);
-}
-
-vec4 sampleSampler(in const sampler2D tex, in const TileConf tileConf, in vec2 uvCoord, in const uint texFilter) {
-  // https://github.com/rt64/rt64/blob/61aa08f517cd16c1dbee4e097768b08e2a060307/src/shaders/TextureSampler.hlsli#L156-L276
-  const ivec2 texSize = textureSize(tex, 0);
-
-  uvCoord *= tileConf.shift;
-
-#ifdef SIMULATE_LOW_PRECISION
-  // Simulates the lower precision of the hardware's coordinate interpolation.
-  uvCoord = round(uvCoord * LOW_PRECISION) / LOW_PRECISION;
-#endif
-
-  uvCoord -= tileConf.low;
-
-  const vec2 isClamp      = step(tileConf.mask, vec2(1.0)); // if mask is negated, clamp
-  const vec2 isMirror     = step(tileConf.high, vec2(0.0)); // if high is negated, mirror
-  const vec2 mask         = abs(tileConf.mask);
-  const vec2 highMinusLow = abs(tileConf.high) - abs(tileConf.low);
-
-  if (texFilter != G_TF_POINT) {
-    uvCoord -= 0.5 * tileConf.shift;
-    const vec2 texelBaseInt = floor(uvCoord);
-    const vec4 sample00 = wrappedMirrorSample(tex, texelBaseInt,              mask, highMinusLow, isClamp, isMirror);
-    const vec4 sample01 = wrappedMirrorSample(tex, texelBaseInt + vec2(0, 1), mask, highMinusLow, isClamp, isMirror);
-    const vec4 sample10 = wrappedMirrorSample(tex, texelBaseInt + vec2(1, 0), mask, highMinusLow, isClamp, isMirror);
-    const vec4 sample11 = wrappedMirrorSample(tex, texelBaseInt + vec2(1, 1), mask, highMinusLow, isClamp, isMirror);
-    const vec2 fracPart = uvCoord - texelBaseInt;
-#ifdef USE_LINEAR_FILTER
-    return quantizeTexture(tileConf.flags, mix(mix(sample00, sample10, fracPart.x), mix(sample01, sample11, fracPart.x), fracPart.y));
-#else
-    if (texFilter == G_TF_AVERAGE && all(lessThanEqual(vec2(1 / LOW_PRECISION), abs(fracPart - 0.5)))) {
-        return quantizeTexture(tileConf.flags, (sample00 + sample01 + sample10 + sample11) / 4.0f);
-    }
-    else {
-      // Originally written by ArthurCarvalho
-      // Sourced from https://www.emutalk.net/threads/emulating-nintendo-64-3-sample-bilinear-filtering-using-shaders.54215/
-      vec4 tri0 = mix(sample00, sample10, fracPart.x) + (sample01 - sample00) * fracPart.y;
-      vec4 tri1 = mix(sample11, sample01, 1.0 - fracPart.x) + (sample10 - sample11) * (1.0 - fracPart.y);
-      return quantizeTexture(tileConf.flags, mix(tri0, tri1, step(1.0, fracPart.x + fracPart.y)));
-    }
-#endif
-  }
-  else {
-    return quantizeTexture(tileConf.flags, wrappedMirrorSample(tex, ivec2(floor(uvCoord)), mask, highMinusLow, isClamp, isMirror));
-  }
-}
-
-vec4 sampleIndex(in const uint textureIndex, in const vec2 uvCoord, in const uint texFilter) {
-  TileConf tileConf = material.texConfs[textureIndex];
-  switch (textureIndex) {
-    default: return sampleSampler(tex0, tileConf, uvCoord, texFilter);
-    case 1: return sampleSampler(tex1, tileConf, uvCoord, texFilter);
-    case 2: return sampleSampler(tex2, tileConf, uvCoord, texFilter);
-    case 3: return sampleSampler(tex3, tileConf, uvCoord, texFilter);
-    case 4: return sampleSampler(tex4, tileConf, uvCoord, texFilter);
-    case 5: return sampleSampler(tex5, tileConf, uvCoord, texFilter);
-    case 6: return sampleSampler(tex6, tileConf, uvCoord, texFilter);
-    case 7: return sampleSampler(tex7, tileConf, uvCoord, texFilter);
-  }
-}
-
-float computeLOD(inout uint tileIndex0, inout uint tileIndex1) {
-  // https://github.com/rt64/rt64/blob/0ca92eeb6c2f58ce3581c65f87f7261b8ac0fea0/src/shaders/TextureSampler.hlsli#L18
-  if (textLOD() == G_TL_TILE)
-    return 1.0f;
-  const uint texDetail = textDetail();
-  const bool lodSharpen = texDetail == G_TD_SHARPEN;
-  const bool lodDetail = texDetail == G_TD_DETAIL;
-  const bool lodSharpDetail = lodSharpen || lodDetail;
-
-#ifdef GL_ARB_derivative_control
-  const vec2 dfd = abs(vec2(dFdxCoarse(inputUV.x), dFdyCoarse(inputUV.y)));
-#else
-  const vec2 dfd = abs(vec2(dFdx(inputUV.x), dFdy(inputUV.y)));
-#endif
-  float maxDst = max(dfd.x, dfd.y);
-
-  if (lodSharpDetail) 
-    maxDst = max(maxDst, material.primLod.y);
-
-  int tileBase = int(floor(log2(maxDst)));
-  float lodFraction = maxDst / pow(2, max(tileBase, 0)) - 1.0;
-
-  if (lodSharpen && maxDst < 1.0)
-    lodFraction = maxDst - 1.0;
-
-  if (lodDetail) {
-    if (lodFraction < 0.0)
-      lodFraction = maxDst;
-    tileBase += 1;
-  } else if (tileBase >= material.mipCount)
-    lodFraction = 1.0;
-
-  if (lodSharpDetail) 
-    tileBase = max(tileBase, 0);
-  else 
-    lodFraction = max(lodFraction, 0.0);
-
-  tileIndex0 = clamp(tileBase, 0, material.mipCount);
-  tileIndex1 = clamp(tileBase + 1, 0, material.mipCount);
-  return lodFraction;
-}
-
 vec3 cc_fetchColor(in int val, in vec4 shade, in vec4 comb, in float lodFraction, in vec4 texData0, in vec4 texData1)
 {
        if(val == CC_C_COMB       ) return comb.rgb;
@@ -264,9 +144,18 @@ void main()
 
   vec4 ccShade = geoModeSelect(G_SHADE_SMOOTH, cc_shade_flat, cc_shade);
 
+#ifdef GL_ARB_derivative_control
+  const vec2 dx = abs(vec2(dFdxCoarse(inputUV.x), dFdyCoarse(inputUV.x)));
+  const vec2 dy = abs(vec2(dFdxCoarse(inputUV.y), dFdyCoarse(inputUV.y)));
+#else
+  const vec2 dx = abs(vec2(dFdx(inputUV.x), dFdy(inputUV.x)));
+  const vec2 dy = abs(vec2(dFdx(inputUV.y), dFdy(inputUV.y)));
+#endif
+
   uint tex0Index = 0;
   uint tex1Index = 1;
-  const float lodFraction = computeLOD(tex0Index, tex1Index);
+  float lodFraction = 0.0;
+  computeLOD(tex0Index, tex1Index, textLOD(), textDetail(), material.primLod.y, dx, dy, false, lodFraction);
 
   vec4 texData0 = sampleIndex(tex0Index, inputUV, texFilter);
   vec4 texData1 = sampleIndex(tex1Index, inputUV, texFilter);
diff --git a/shader/textures.glsl b/shader/textures.glsl
@@ -0,0 +1,150 @@
+#ifdef GL_ARB_derivative_control
+  #extension GL_ARB_derivative_control : enable
+#endif
+
+vec4 quantize3Bit(in vec4 color) {
+  return vec4(round(color.rgb * 8.0) / 8.0, step(0.5, color.a));
+}
+
+vec4 quantize4Bit(in vec4 color) {
+  return round(color * 16.0) / 16.0; // (16 seems more accurate than 15)
+}
+
+vec4 quantizeTexture(uint flags, vec4 color) {
+  vec4 colorQuant = flagSelect(flags, TEX_FLAG_4BIT, color, quantize4Bit(color));
+  colorQuant = flagSelect(flags, TEX_FLAG_3BIT, colorQuant, quantize3Bit(colorQuant));
+  colorQuant.rgb = linearToGamma(colorQuant.rgb);
+  return flagSelect(flags, TEX_FLAG_MONO, colorQuant.rgba, colorQuant.rrrr);
+}
+
+vec2 mirrorUV(const vec2 uvIn, const vec2 uvBound)
+{
+    vec2 uvMod2 = mod(uvIn, uvBound * 2.0 + 1.0);
+    return mix(uvMod2, (uvBound * 2.0) - uvMod2, step(uvBound, uvMod2));
+}
+
+vec4 wrappedMirrorSample(const sampler2D tex, vec2 uv, const vec2 mask, const vec2 highMinusLow, const vec2 isClamp, const vec2 isMirror)
+{
+  const ivec2 texSize = textureSize(tex, 0);
+
+  // first apply clamping if enabled (clamp S/T, low S/T -> high S/T)
+  const vec2 uvClamp = clamp(uv, vec2(0.0), highMinusLow);
+  uv = mix(uv, uvClamp, isClamp);
+
+  // then mirror the result if needed (mirror S/T)
+  const vec2 uvMirror = mirrorUV(uv, mask - 0.5);
+  uv = mix(uv, uvMirror, isMirror);
+  
+  // clamp again (mask S/T), this is also done to avoid OOB texture access
+  uv = mod(uv, min(texSize, mask));
+
+  return texelFetch(tex, ivec2(floor(uv)), 0);
+}
+
+vec4 sampleSampler(in const sampler2D tex, in const TileConf tileConf, in vec2 uvCoord, in const uint texFilter) {
+  // https://github.com/rt64/rt64/blob/61aa08f517cd16c1dbee4e097768b08e2a060307/src/shaders/TextureSampler.hlsli#L156-L276
+  const ivec2 texSize = textureSize(tex, 0);
+
+  uvCoord *= tileConf.shift;
+
+#ifdef SIMULATE_LOW_PRECISION
+  // Simulates the lower precision of the hardware's coordinate interpolation.
+  uvCoord = round(uvCoord * LOW_PRECISION) / LOW_PRECISION;
+#endif
+
+  uvCoord -= tileConf.low;
+
+  const vec2 isClamp      = step(tileConf.mask, vec2(1.0)); // if mask is negated, clamp
+  const vec2 isMirror     = step(tileConf.high, vec2(0.0)); // if high is negated, mirror
+  const vec2 mask         = abs(tileConf.mask);
+  const vec2 highMinusLow = abs(tileConf.high) - abs(tileConf.low);
+
+  if (texFilter != G_TF_POINT) {
+    uvCoord -= 0.5 * tileConf.shift;
+    const vec2 texelBaseInt = floor(uvCoord);
+    const vec4 sample00 = wrappedMirrorSample(tex, texelBaseInt,              mask, highMinusLow, isClamp, isMirror);
+    const vec4 sample01 = wrappedMirrorSample(tex, texelBaseInt + vec2(0, 1), mask, highMinusLow, isClamp, isMirror);
+    const vec4 sample10 = wrappedMirrorSample(tex, texelBaseInt + vec2(1, 0), mask, highMinusLow, isClamp, isMirror);
+    const vec4 sample11 = wrappedMirrorSample(tex, texelBaseInt + vec2(1, 1), mask, highMinusLow, isClamp, isMirror);
+    const vec2 fracPart = uvCoord - texelBaseInt;
+#ifdef USE_LINEAR_FILTER
+    return quantizeTexture(tileConf.flags, mix(mix(sample00, sample10, fracPart.x), mix(sample01, sample11, fracPart.x), fracPart.y));
+#else
+    if (texFilter == G_TF_AVERAGE && all(lessThanEqual(vec2(1 / LOW_PRECISION), abs(fracPart - 0.5)))) {
+        return quantizeTexture(tileConf.flags, (sample00 + sample01 + sample10 + sample11) / 4.0f);
+    }
+    else {
+      // Originally written by ArthurCarvalho
+      // Sourced from https://www.emutalk.net/threads/emulating-nintendo-64-3-sample-bilinear-filtering-using-shaders.54215/
+      vec4 tri0 = mix(sample00, sample10, fracPart.x) + (sample01 - sample00) * fracPart.y;
+      vec4 tri1 = mix(sample11, sample01, 1.0 - fracPart.x) + (sample10 - sample11) * (1.0 - fracPart.y);
+      return quantizeTexture(tileConf.flags, mix(tri0, tri1, step(1.0, fracPart.x + fracPart.y)));
+    }
+#endif
+  }
+  else {
+    return quantizeTexture(tileConf.flags, wrappedMirrorSample(tex, ivec2(floor(uvCoord)), mask, highMinusLow, isClamp, isMirror));
+  }
+}
+
+vec4 sampleIndex(in const uint textureIndex, in const vec2 uvCoord, in const uint texFilter) {
+  TileConf tileConf = material.texConfs[textureIndex];
+  switch (textureIndex) {
+    default: return sampleSampler(tex0, tileConf, uvCoord, texFilter);
+    case 1: return sampleSampler(tex1, tileConf, uvCoord, texFilter);
+    case 2: return sampleSampler(tex2, tileConf, uvCoord, texFilter);
+    case 3: return sampleSampler(tex3, tileConf, uvCoord, texFilter);
+    case 4: return sampleSampler(tex4, tileConf, uvCoord, texFilter);
+    case 5: return sampleSampler(tex5, tileConf, uvCoord, texFilter);
+    case 6: return sampleSampler(tex6, tileConf, uvCoord, texFilter);
+    case 7: return sampleSampler(tex7, tileConf, uvCoord, texFilter);
+  }
+}
+
+void computeLOD(
+    inout uint tileIndex0,
+    inout uint tileIndex1,
+    const bool textLOD,
+    const uint textDetail,
+    const float minLod,
+    const vec2 dx,
+    const vec2 dy,
+    const bool perspectiveOverflow, // this should be possible from what I've read in parallel-rdp, can always be removed
+    out float lodFrac
+) {
+    const bool sharpen = textDetail == G_TD_SHARPEN;
+    const bool detail = textDetail == G_TD_DETAIL;
+    const bool clam = textDetail == G_TD_CLAMP;
+
+    const vec2 dfd = max(dx, dy);
+    // TODO: should this value be scaled by clipping planes?
+    const float maxDist = max(dfd.x, dfd.y);
+
+    const uint mipBase = uint(floor(log2(maxDist)));
+    const bool distant = perspectiveOverflow || maxDist >= 16384.0;
+    const bool aboveCount = mipBase >= material.mipCount;
+    const bool maxDistant = distant || aboveCount;
+    const bool magnify = maxDist < 1.0;
+
+    const float detailFrac = max(minLod, maxDist) - float(sharpen); 
+    const float magnifedFrac = mix(float(maxDistant), detailFrac, float(!clam));
+    const float distantFrac = float(distant || (aboveCount && clam));
+    const float notClampedFrac = max(maxDist / pow(2, max(mipBase, 0)) - 1.0, minLod);
+
+    const float notMagnifedFrac = mix(distantFrac, notClampedFrac, !maxDistant || !clam);
+    lodFrac = mix(notMagnifedFrac, magnifedFrac, float(!distant && magnify));
+
+    if (textLOD) {
+        const uint tileOffset = maxDistant ? material.mipCount : (mipBase * int(!(maxDistant && clam)));
+        tileIndex0 = tileIndex0 + tileOffset;
+        tileIndex1 = tileIndex0;
+        if (detail) {
+            tileIndex1 += (int(!(maxDistant || magnify)) + 1);
+            tileIndex0 += int(!magnify);
+        } else {
+            tileIndex1 += uint(!maxDistant && (sharpen || !magnify));
+        }
+        tileIndex0 &= 7;
+        tileIndex1 &= 7;
+    }
+}
diff --git a/shader/utils.glsl b/shader/utils.glsl

Original file line number	Diff line number	Diff line change
`@@ -69,6 +69,7 @@ def get_scene_render_state(scene: bpy.types.Scene):`
`69`	`69`	`convert=quantize_tuple(f64render_rs.default_convert, 9.0, -1.0, 1.0),`
`70`	`70`	`cc=SOLID_CC,`
`71`	`71`	`tex_confs=([get_tile_conf(getattr(f64render_rs, f"default_tex{i}")) for i in range(0, 8)]),`
	`72`	`+ tex_size=(32, 32),`
`72`	`73`	`)`
`73`	`74`	`state.lights[0] = F64Light(`
`74`	`75`	`quantize_srgb(fast64_rs.light0Color, force_alpha=True), quantize_direction(fast64_rs.light0Direction)`