Skip to content

Commit 21436de

Browse files
authored
Improve mipmapping accuracy (#12)
* change texture shader location * quantize lod * first iteration of the parallel rdp code (suprisingly less branch-happy then rt64) * fix naming * first round (working) * clamp const bool * bye bye more branches * mipBase * pass in more arguments * we only need this later * one less branch * move comment * fix default tex size * Update textures.glsl * minLod * update from main
1 parent 6173abe commit 21436de

6 files changed

Lines changed: 183 additions & 166 deletions

File tree

common.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ def get_scene_render_state(scene: bpy.types.Scene):
6969
convert=quantize_tuple(f64render_rs.default_convert, 9.0, -1.0, 1.0),
7070
cc=SOLID_CC,
7171
tex_confs=([get_tile_conf(getattr(f64render_rs, f"default_tex{i}")) for i in range(0, 8)]),
72+
tex_size=(32, 32),
7273
)
7374
state.lights[0] = F64Light(
7475
quantize_srgb(fast64_rs.light0Color, force_alpha=True), quantize_direction(fast64_rs.light0Direction)

material/parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -394,7 +394,7 @@ def f64_material_parse(f3d_mat: "F3DMaterialProperty", always_set: bool, set_lig
394394
state.cc = get_cc_settings(f3d_mat)
395395
if always_set or (f3d_mat.set_prim and cc_uses["Primitive"]):
396396
state.prim_color = quantize_srgb(f3d_mat.prim_color)
397-
state.prim_lod = (f3d_mat.prim_lod_frac, f3d_mat.prim_lod_min)
397+
state.prim_lod = quantize_tuple((f3d_mat.prim_lod_frac, f3d_mat.prim_lod_min), 8)
398398
if always_set or (f3d_mat.set_env and cc_uses["Environment"]):
399399
state.env_color = quantize_srgb(f3d_mat.env_color)
400400
if always_set or (f3d_mat.set_key and cc_uses["Key"]): # extra 0 for alignment

renderer.py

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from io import StringIO
12
import math
23
import pathlib
34
import time
@@ -97,24 +98,24 @@ def init_shader(self, scene: bpy.types.Scene):
9798
print("Compiling shader")
9899

99100
shaderPath = (pathlib.Path(__file__).parent / "shader").resolve()
100-
shaderVert = ""
101-
shaderFrag = ""
102-
103-
with open(shaderPath / "utils.glsl", "r", encoding="utf-8") as f:
104-
shaderUtils = f.read()
105-
shaderVert += shaderUtils
106-
shaderFrag += shaderUtils
107-
108-
with open(shaderPath / "defines.glsl", "r", encoding="utf-8") as f:
109-
shaderDef = f.read()
110-
shaderVert += shaderDef
111-
shaderFrag += shaderDef
112-
113-
with open(shaderPath / "main3d.vert.glsl", "r", encoding="utf-8") as f:
114-
shaderVert += f.read()
101+
shaderVert = StringIO()
102+
shaderFrag = StringIO()
103+
104+
general_shaders = ("utils.glsl", "defines.glsl")
105+
vertex_shaders = ("main3d.vert.glsl",)
106+
frag_shaders = (
107+
"textures.glsl",
108+
"main3d.frag.glsl",
109+
)
115110

116-
with open(shaderPath / "main3d.frag.glsl", "r", encoding="utf-8") as f:
117-
shaderFrag += f.read()
111+
for shader in general_shaders + vertex_shaders:
112+
with open(shaderPath / shader, "r", encoding="utf-8") as f:
113+
shaderVert.write(f.read())
114+
shaderVert.write("\n")
115+
for shader in general_shaders + frag_shaders:
116+
with open(shaderPath / shader, "r", encoding="utf-8") as f:
117+
shaderFrag.write(f.read())
118+
shaderFrag.write("\n")
118119

119120
shader_info = gpu.types.GPUShaderCreateInfo()
120121

@@ -158,8 +159,8 @@ def init_shader(self, scene: bpy.types.Scene):
158159
else:
159160
shader_info.fragment_out(0, "VEC4", "FragColor")
160161

161-
shader_info.vertex_source(shaderVert)
162-
shader_info.fragment_source(shaderFrag)
162+
shader_info.vertex_source(shaderVert.getvalue())
163+
shader_info.fragment_source(shaderFrag.getvalue())
163164

164165
self.shader = gpu.shader.create_from_info(shader_info)
165166
self.shader_fallback = gpu.shader.from_builtin(

shader/main3d.frag.glsl

Lines changed: 10 additions & 121 deletions
Original file line numberDiff line numberDiff line change
@@ -3,129 +3,9 @@
33
#extension GL_ARB_fragment_shader_interlock : enable
44
layout(pixel_interlock_unordered) in;
55
#endif
6-
#ifdef GL_ARB_derivative_control
7-
#extension GL_ARB_derivative_control : enable
8-
#endif
96

107
#define DECAL_DEPTH_DELTA 100
118

12-
vec4 quantize3Bit(in vec4 color) {
13-
return vec4(round(color.rgb * 8.0) / 8.0, step(0.5, color.a));
14-
}
15-
16-
vec4 quantize4Bit(in vec4 color) {
17-
return round(color * 16.0) / 16.0; // (16 seems more accurate than 15)
18-
}
19-
20-
vec4 quantizeTexture(uint flags, vec4 color) {
21-
vec4 colorQuant = flagSelect(flags, TEX_FLAG_4BIT, color, quantize4Bit(color));
22-
colorQuant = flagSelect(flags, TEX_FLAG_3BIT, colorQuant, quantize3Bit(colorQuant));
23-
colorQuant.rgb = linearToGamma(colorQuant.rgb);
24-
return flagSelect(flags, TEX_FLAG_MONO, colorQuant.rgba, colorQuant.rrrr);
25-
}
26-
27-
vec4 sampleSampler(in const sampler2D tex, in const TileConf tileConf, in vec2 uvCoord, in const uint texFilter) {
28-
// https://github.com/rt64/rt64/blob/61aa08f517cd16c1dbee4e097768b08e2a060307/src/shaders/TextureSampler.hlsli#L156-L276
29-
const ivec2 texSize = textureSize(tex, 0);
30-
31-
uvCoord *= tileConf.shift;
32-
33-
#ifdef SIMULATE_LOW_PRECISION
34-
// Simulates the lower precision of the hardware's coordinate interpolation.
35-
uvCoord = round(uvCoord * LOW_PRECISION) / LOW_PRECISION;
36-
#endif
37-
38-
uvCoord -= tileConf.low;
39-
40-
const vec2 isClamp = step(tileConf.mask, vec2(1.0)); // if mask is negated, clamp
41-
const vec2 isMirror = step(tileConf.high, vec2(0.0)); // if high is negated, mirror
42-
const vec2 mask = abs(tileConf.mask);
43-
const vec2 highMinusLow = abs(tileConf.high) - abs(tileConf.low);
44-
45-
if (texFilter != G_TF_POINT) {
46-
uvCoord -= 0.5 * tileConf.shift;
47-
const vec2 texelBaseInt = floor(uvCoord);
48-
const vec4 sample00 = wrappedMirrorSample(tex, texelBaseInt, mask, highMinusLow, isClamp, isMirror);
49-
const vec4 sample01 = wrappedMirrorSample(tex, texelBaseInt + vec2(0, 1), mask, highMinusLow, isClamp, isMirror);
50-
const vec4 sample10 = wrappedMirrorSample(tex, texelBaseInt + vec2(1, 0), mask, highMinusLow, isClamp, isMirror);
51-
const vec4 sample11 = wrappedMirrorSample(tex, texelBaseInt + vec2(1, 1), mask, highMinusLow, isClamp, isMirror);
52-
const vec2 fracPart = uvCoord - texelBaseInt;
53-
#ifdef USE_LINEAR_FILTER
54-
return quantizeTexture(tileConf.flags, mix(mix(sample00, sample10, fracPart.x), mix(sample01, sample11, fracPart.x), fracPart.y));
55-
#else
56-
if (texFilter == G_TF_AVERAGE && all(lessThanEqual(vec2(1 / LOW_PRECISION), abs(fracPart - 0.5)))) {
57-
return quantizeTexture(tileConf.flags, (sample00 + sample01 + sample10 + sample11) / 4.0f);
58-
}
59-
else {
60-
// Originally written by ArthurCarvalho
61-
// Sourced from https://www.emutalk.net/threads/emulating-nintendo-64-3-sample-bilinear-filtering-using-shaders.54215/
62-
vec4 tri0 = mix(sample00, sample10, fracPart.x) + (sample01 - sample00) * fracPart.y;
63-
vec4 tri1 = mix(sample11, sample01, 1.0 - fracPart.x) + (sample10 - sample11) * (1.0 - fracPart.y);
64-
return quantizeTexture(tileConf.flags, mix(tri0, tri1, step(1.0, fracPart.x + fracPart.y)));
65-
}
66-
#endif
67-
}
68-
else {
69-
return quantizeTexture(tileConf.flags, wrappedMirrorSample(tex, ivec2(floor(uvCoord)), mask, highMinusLow, isClamp, isMirror));
70-
}
71-
}
72-
73-
vec4 sampleIndex(in const uint textureIndex, in const vec2 uvCoord, in const uint texFilter) {
74-
TileConf tileConf = material.texConfs[textureIndex];
75-
switch (textureIndex) {
76-
default: return sampleSampler(tex0, tileConf, uvCoord, texFilter);
77-
case 1: return sampleSampler(tex1, tileConf, uvCoord, texFilter);
78-
case 2: return sampleSampler(tex2, tileConf, uvCoord, texFilter);
79-
case 3: return sampleSampler(tex3, tileConf, uvCoord, texFilter);
80-
case 4: return sampleSampler(tex4, tileConf, uvCoord, texFilter);
81-
case 5: return sampleSampler(tex5, tileConf, uvCoord, texFilter);
82-
case 6: return sampleSampler(tex6, tileConf, uvCoord, texFilter);
83-
case 7: return sampleSampler(tex7, tileConf, uvCoord, texFilter);
84-
}
85-
}
86-
87-
float computeLOD(inout uint tileIndex0, inout uint tileIndex1) {
88-
// https://github.com/rt64/rt64/blob/0ca92eeb6c2f58ce3581c65f87f7261b8ac0fea0/src/shaders/TextureSampler.hlsli#L18
89-
if (textLOD() == G_TL_TILE)
90-
return 1.0f;
91-
const uint texDetail = textDetail();
92-
const bool lodSharpen = texDetail == G_TD_SHARPEN;
93-
const bool lodDetail = texDetail == G_TD_DETAIL;
94-
const bool lodSharpDetail = lodSharpen || lodDetail;
95-
96-
#ifdef GL_ARB_derivative_control
97-
const vec2 dfd = abs(vec2(dFdxCoarse(inputUV.x), dFdyCoarse(inputUV.y)));
98-
#else
99-
const vec2 dfd = abs(vec2(dFdx(inputUV.x), dFdy(inputUV.y)));
100-
#endif
101-
float maxDst = max(dfd.x, dfd.y);
102-
103-
if (lodSharpDetail)
104-
maxDst = max(maxDst, material.primLod.y);
105-
106-
int tileBase = int(floor(log2(maxDst)));
107-
float lodFraction = maxDst / pow(2, max(tileBase, 0)) - 1.0;
108-
109-
if (lodSharpen && maxDst < 1.0)
110-
lodFraction = maxDst - 1.0;
111-
112-
if (lodDetail) {
113-
if (lodFraction < 0.0)
114-
lodFraction = maxDst;
115-
tileBase += 1;
116-
} else if (tileBase >= material.mipCount)
117-
lodFraction = 1.0;
118-
119-
if (lodSharpDetail)
120-
tileBase = max(tileBase, 0);
121-
else
122-
lodFraction = max(lodFraction, 0.0);
123-
124-
tileIndex0 = clamp(tileBase, 0, material.mipCount);
125-
tileIndex1 = clamp(tileBase + 1, 0, material.mipCount);
126-
return lodFraction;
127-
}
128-
1299
vec3 cc_fetchColor(in int val, in vec4 shade, in vec4 comb, in float lodFraction, in vec4 texData0, in vec4 texData1)
13010
{
13111
if(val == CC_C_COMB ) return comb.rgb;
@@ -264,9 +144,18 @@ void main()
264144

265145
vec4 ccShade = geoModeSelect(G_SHADE_SMOOTH, cc_shade_flat, cc_shade);
266146

147+
#ifdef GL_ARB_derivative_control
148+
const vec2 dx = abs(vec2(dFdxCoarse(inputUV.x), dFdyCoarse(inputUV.x)));
149+
const vec2 dy = abs(vec2(dFdxCoarse(inputUV.y), dFdyCoarse(inputUV.y)));
150+
#else
151+
const vec2 dx = abs(vec2(dFdx(inputUV.x), dFdy(inputUV.x)));
152+
const vec2 dy = abs(vec2(dFdx(inputUV.y), dFdy(inputUV.y)));
153+
#endif
154+
267155
uint tex0Index = 0;
268156
uint tex1Index = 1;
269-
const float lodFraction = computeLOD(tex0Index, tex1Index);
157+
float lodFraction = 0.0;
158+
computeLOD(tex0Index, tex1Index, textLOD(), textDetail(), material.primLod.y, dx, dy, false, lodFraction);
270159

271160
vec4 texData0 = sampleIndex(tex0Index, inputUV, texFilter);
272161
vec4 texData1 = sampleIndex(tex1Index, inputUV, texFilter);

shader/textures.glsl

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
#ifdef GL_ARB_derivative_control
2+
#extension GL_ARB_derivative_control : enable
3+
#endif
4+
5+
vec4 quantize3Bit(in vec4 color) {
6+
return vec4(round(color.rgb * 8.0) / 8.0, step(0.5, color.a));
7+
}
8+
9+
vec4 quantize4Bit(in vec4 color) {
10+
return round(color * 16.0) / 16.0; // (16 seems more accurate than 15)
11+
}
12+
13+
vec4 quantizeTexture(uint flags, vec4 color) {
14+
vec4 colorQuant = flagSelect(flags, TEX_FLAG_4BIT, color, quantize4Bit(color));
15+
colorQuant = flagSelect(flags, TEX_FLAG_3BIT, colorQuant, quantize3Bit(colorQuant));
16+
colorQuant.rgb = linearToGamma(colorQuant.rgb);
17+
return flagSelect(flags, TEX_FLAG_MONO, colorQuant.rgba, colorQuant.rrrr);
18+
}
19+
20+
vec2 mirrorUV(const vec2 uvIn, const vec2 uvBound)
21+
{
22+
vec2 uvMod2 = mod(uvIn, uvBound * 2.0 + 1.0);
23+
return mix(uvMod2, (uvBound * 2.0) - uvMod2, step(uvBound, uvMod2));
24+
}
25+
26+
vec4 wrappedMirrorSample(const sampler2D tex, vec2 uv, const vec2 mask, const vec2 highMinusLow, const vec2 isClamp, const vec2 isMirror)
27+
{
28+
const ivec2 texSize = textureSize(tex, 0);
29+
30+
// first apply clamping if enabled (clamp S/T, low S/T -> high S/T)
31+
const vec2 uvClamp = clamp(uv, vec2(0.0), highMinusLow);
32+
uv = mix(uv, uvClamp, isClamp);
33+
34+
// then mirror the result if needed (mirror S/T)
35+
const vec2 uvMirror = mirrorUV(uv, mask - 0.5);
36+
uv = mix(uv, uvMirror, isMirror);
37+
38+
// clamp again (mask S/T), this is also done to avoid OOB texture access
39+
uv = mod(uv, min(texSize, mask));
40+
41+
return texelFetch(tex, ivec2(floor(uv)), 0);
42+
}
43+
44+
vec4 sampleSampler(in const sampler2D tex, in const TileConf tileConf, in vec2 uvCoord, in const uint texFilter) {
45+
// https://github.com/rt64/rt64/blob/61aa08f517cd16c1dbee4e097768b08e2a060307/src/shaders/TextureSampler.hlsli#L156-L276
46+
const ivec2 texSize = textureSize(tex, 0);
47+
48+
uvCoord *= tileConf.shift;
49+
50+
#ifdef SIMULATE_LOW_PRECISION
51+
// Simulates the lower precision of the hardware's coordinate interpolation.
52+
uvCoord = round(uvCoord * LOW_PRECISION) / LOW_PRECISION;
53+
#endif
54+
55+
uvCoord -= tileConf.low;
56+
57+
const vec2 isClamp = step(tileConf.mask, vec2(1.0)); // if mask is negated, clamp
58+
const vec2 isMirror = step(tileConf.high, vec2(0.0)); // if high is negated, mirror
59+
const vec2 mask = abs(tileConf.mask);
60+
const vec2 highMinusLow = abs(tileConf.high) - abs(tileConf.low);
61+
62+
if (texFilter != G_TF_POINT) {
63+
uvCoord -= 0.5 * tileConf.shift;
64+
const vec2 texelBaseInt = floor(uvCoord);
65+
const vec4 sample00 = wrappedMirrorSample(tex, texelBaseInt, mask, highMinusLow, isClamp, isMirror);
66+
const vec4 sample01 = wrappedMirrorSample(tex, texelBaseInt + vec2(0, 1), mask, highMinusLow, isClamp, isMirror);
67+
const vec4 sample10 = wrappedMirrorSample(tex, texelBaseInt + vec2(1, 0), mask, highMinusLow, isClamp, isMirror);
68+
const vec4 sample11 = wrappedMirrorSample(tex, texelBaseInt + vec2(1, 1), mask, highMinusLow, isClamp, isMirror);
69+
const vec2 fracPart = uvCoord - texelBaseInt;
70+
#ifdef USE_LINEAR_FILTER
71+
return quantizeTexture(tileConf.flags, mix(mix(sample00, sample10, fracPart.x), mix(sample01, sample11, fracPart.x), fracPart.y));
72+
#else
73+
if (texFilter == G_TF_AVERAGE && all(lessThanEqual(vec2(1 / LOW_PRECISION), abs(fracPart - 0.5)))) {
74+
return quantizeTexture(tileConf.flags, (sample00 + sample01 + sample10 + sample11) / 4.0f);
75+
}
76+
else {
77+
// Originally written by ArthurCarvalho
78+
// Sourced from https://www.emutalk.net/threads/emulating-nintendo-64-3-sample-bilinear-filtering-using-shaders.54215/
79+
vec4 tri0 = mix(sample00, sample10, fracPart.x) + (sample01 - sample00) * fracPart.y;
80+
vec4 tri1 = mix(sample11, sample01, 1.0 - fracPart.x) + (sample10 - sample11) * (1.0 - fracPart.y);
81+
return quantizeTexture(tileConf.flags, mix(tri0, tri1, step(1.0, fracPart.x + fracPart.y)));
82+
}
83+
#endif
84+
}
85+
else {
86+
return quantizeTexture(tileConf.flags, wrappedMirrorSample(tex, ivec2(floor(uvCoord)), mask, highMinusLow, isClamp, isMirror));
87+
}
88+
}
89+
90+
vec4 sampleIndex(in const uint textureIndex, in const vec2 uvCoord, in const uint texFilter) {
91+
TileConf tileConf = material.texConfs[textureIndex];
92+
switch (textureIndex) {
93+
default: return sampleSampler(tex0, tileConf, uvCoord, texFilter);
94+
case 1: return sampleSampler(tex1, tileConf, uvCoord, texFilter);
95+
case 2: return sampleSampler(tex2, tileConf, uvCoord, texFilter);
96+
case 3: return sampleSampler(tex3, tileConf, uvCoord, texFilter);
97+
case 4: return sampleSampler(tex4, tileConf, uvCoord, texFilter);
98+
case 5: return sampleSampler(tex5, tileConf, uvCoord, texFilter);
99+
case 6: return sampleSampler(tex6, tileConf, uvCoord, texFilter);
100+
case 7: return sampleSampler(tex7, tileConf, uvCoord, texFilter);
101+
}
102+
}
103+
104+
void computeLOD(
105+
inout uint tileIndex0,
106+
inout uint tileIndex1,
107+
const bool textLOD,
108+
const uint textDetail,
109+
const float minLod,
110+
const vec2 dx,
111+
const vec2 dy,
112+
const bool perspectiveOverflow, // this should be possible from what I've read in parallel-rdp, can always be removed
113+
out float lodFrac
114+
) {
115+
const bool sharpen = textDetail == G_TD_SHARPEN;
116+
const bool detail = textDetail == G_TD_DETAIL;
117+
const bool clam = textDetail == G_TD_CLAMP;
118+
119+
const vec2 dfd = max(dx, dy);
120+
// TODO: should this value be scaled by clipping planes?
121+
const float maxDist = max(dfd.x, dfd.y);
122+
123+
const uint mipBase = uint(floor(log2(maxDist)));
124+
const bool distant = perspectiveOverflow || maxDist >= 16384.0;
125+
const bool aboveCount = mipBase >= material.mipCount;
126+
const bool maxDistant = distant || aboveCount;
127+
const bool magnify = maxDist < 1.0;
128+
129+
const float detailFrac = max(minLod, maxDist) - float(sharpen);
130+
const float magnifedFrac = mix(float(maxDistant), detailFrac, float(!clam));
131+
const float distantFrac = float(distant || (aboveCount && clam));
132+
const float notClampedFrac = max(maxDist / pow(2, max(mipBase, 0)) - 1.0, minLod);
133+
134+
const float notMagnifedFrac = mix(distantFrac, notClampedFrac, !maxDistant || !clam);
135+
lodFrac = mix(notMagnifedFrac, magnifedFrac, float(!distant && magnify));
136+
137+
if (textLOD) {
138+
const uint tileOffset = maxDistant ? material.mipCount : (mipBase * int(!(maxDistant && clam)));
139+
tileIndex0 = tileIndex0 + tileOffset;
140+
tileIndex1 = tileIndex0;
141+
if (detail) {
142+
tileIndex1 += (int(!(maxDistant || magnify)) + 1);
143+
tileIndex0 += int(!magnify);
144+
} else {
145+
tileIndex1 += uint(!maxDistant && (sharpen || !magnify));
146+
}
147+
tileIndex0 &= 7;
148+
tileIndex1 &= 7;
149+
}
150+
}

0 commit comments

Comments
 (0)