Skip to content

Commit 58dc85b

Browse files
committed
Add RGB - LogLuv conversion
* Also added missing f32x4x4 dot operators. * Combined tone-mapping and color-space.
1 parent a1a681e commit 58dc85b

4 files changed

Lines changed: 175 additions & 78 deletions

File tree

include/math/color-space.hpp

Lines changed: 114 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,16 @@
2323
namespace math
2424
{
2525

26+
/**
27+
* @brief Default gamma correction value.
28+
*
29+
* @details
30+
* The default gamma correction value of 2.2 is chosen because it approximates the natural response curve of CRT
31+
* monitors, which were the most common display technology when gamma correction became standard. Modern displays
32+
* have inherited this standard, as it closely matches how human vision perceives brightness non-linearly.
33+
*/
34+
constexpr float defaultGamma = 2.2f;
35+
2636
/**
2737
* @brief Converts linear RGBA color to the sRGB color space.
2838
* @param rgba target linear RGBA color
@@ -46,6 +56,65 @@ static f32x4 srgbToRgb(f32x4 sRGB) noexcept
4656
return r;
4757
}
4858

59+
/**
60+
* @brief Applies gamma correction to the specified color.
61+
*
62+
* @details
63+
* Gamma correction is used to adjust the brightness of an image or display to match the
64+
* nonlinear response of display devices, such as monitors. It compensates for the fact that
65+
* displays do not linearly represent the light intensity of a color.
66+
*
67+
* @param color target linear RGB color to gamma correct
68+
* @param invGamma inverse gamma correction value (1.0/x)
69+
*/
70+
static f32x4 gammaCorrection(f32x4 color, float invGamma) noexcept
71+
{
72+
return f32x4(pow(color, f32x4(invGamma)), color.getW());
73+
}
74+
/**
75+
* @brief Applies gamma correction to the specified color.
76+
* @details See the @ref gammaCorrection().
77+
* @param color target linear RGB color to gamma correct
78+
*/
79+
static f32x4 gammaCorrection(f32x4 color) noexcept
80+
{
81+
return f32x4(pow(color, f32x4(1.0f / defaultGamma)), color.getW());
82+
}
83+
84+
/**
85+
* @brief Applies gamma correction to the specified color. (Fast approximation)
86+
* @details See the @ref gammaCorrection().
87+
*
88+
* @param color target linear RGB color to gamma correct
89+
* @param invGamma inverse gamma correction value (1.0/x)
90+
*/
91+
static f32x4 fastGammaCorrection(f32x4 color, float invGamma) noexcept
92+
{
93+
return f32x4(fastPow(color, f32x4(invGamma)), color.getW());
94+
}
95+
/**
96+
* @brief Applies gamma correction to the specified color. (Fast approximation)
97+
* @details See the @ref gammaCorrection().
98+
* @param color target linear RGB color to gamma correct
99+
*/
100+
static f32x4 fastGammaCorrection(f32x4 color) noexcept
101+
{
102+
return f32x4(fastPow(color, f32x4(1.0f / defaultGamma)), color.getW());
103+
}
104+
105+
/**
106+
* @brief Calculates relative luminance of a color. (Rec. 709)
107+
* @param x target linear color
108+
*/
109+
static float calcLum(f32x4 x) noexcept { return dot3(x, f32x4(0.2126f, 0.7152f, 0.0722f)); }
110+
/**
111+
* @brief Calculates perceptual brightness (Luma) of a color. (Rec. 709)
112+
* @param rgb target linear RGB color
113+
*/
114+
static float rgbToLuma(f32x4 rgb) noexcept { return calcLum(fastGammaCorrection(rgb)); }
115+
116+
//**********************************************************************************************************************
117+
// Linear sRGB <-> CIE XYZ
49118
static const f32x4x4 rgbToXyzMat = f32x4x4
50119
(
51120
0.41239079926595934f, 0.21263900587151027f, 0.01933081871559182f, 0.0f,
@@ -65,12 +134,12 @@ static const f32x4x4 xyzToRgbMat = f32x4x4
65134
* @brief Converts linear sRGB color to the CIE XYZ color space.
66135
* @param rgb target linear sRGB color
67136
*/
68-
static f32x4 rgbToXyz(f32x4 rgb) noexcept { return multiply3x3(rgbToXyzMat, rgb); }
137+
static f32x4 rgbToXyz(f32x4 rgb) noexcept { return dot3x3(rgbToXyzMat, rgb); }
69138
/**
70139
* @brief Converts CIE XYZ color to the linear sRGB color space.
71140
* @param xyz target CIE XYZ color
72141
*/
73-
static f32x4 xyzToRgb(f32x4 xyz) noexcept { return multiply3x3(xyzToRgbMat, xyz); }
142+
static f32x4 xyzToRgb(f32x4 xyz) noexcept { return dot3x3(xyzToRgbMat, xyz); }
74143

75144
/**
76145
* @brief Converts CIE XYZ color to the CIE xyY color space.
@@ -102,4 +171,47 @@ static f32x4 rgbToXyy(f32x4 rgb) noexcept { return xyzToXyy(rgbToXyz(rgb)); }
102171
*/
103172
static f32x4 xyyToRgb(f32x4 xyy) noexcept { return xyzToRgb(xyyToXyz(xyy)); }
104173

174+
//**********************************************************************************************************************
175+
// Linear sRGB <-> LogLuv
176+
static const f32x4x4 rgbToLogLuvMat = f32x4x4
177+
(
178+
0.2209f, 0.1138f, 0.0102f, 0.0f,
179+
0.3390f, 0.6780f, 0.1130f, 0.0f,
180+
0.4184f, 0.7319f, 0.2969f, 0.0f,
181+
0.0f , 0.0f , 0.0f , 0.0f
182+
);
183+
static const f32x4x4 logLuvToRgbMat = f32x4x4
184+
(
185+
6.0014f, -1.3320f, 0.3008f, 0.0f,
186+
-2.7008f, 3.1029f, -1.0882f, 0.0f,
187+
-1.7996f, -5.7721f, 5.6268f, 0.0f,
188+
0.0f , 0.0f , 0.0f , 0.0f
189+
);
190+
191+
/**
192+
* @brief Encodes linear RGB color (HDR) to the LogLuv format.
193+
* @param rgb target linear RGB color
194+
*/
195+
static uint32 rgbToLogLuv(f32x4 rgb) noexcept
196+
{
197+
auto luv = max(dot3x3(rgbToLogLuvMat, rgb), f32x4(1e-6f));
198+
auto uv = (uint2)fma(saturate((float2)luv / luv.getZ()), float2(255.0f), float2(0.5f));
199+
auto logLuv = (uint32)std::fma(saturate(std::fma(std::log2(
200+
luv.getY()), 1.0f / 64.0f, 0.5f)), 65535.0f, 0.5f);
201+
logLuv |= (uv.x << 24u) | (uv.y << 16u);
202+
return dot3(rgb, rgb) > 0.0f ? logLuv : 0;
203+
}
204+
/**
205+
* @brief Decodes linear RGB color (HDR) from the LogLuv format.
206+
* @param logLuv target LogLuv encoded color
207+
*/
208+
static f32x4 logLuvToRgb(uint32 logLuv)
209+
{
210+
if (logLuv == 0) return f32x4::zero;
211+
f32x4 luv; auto uv = float2(uint2(logLuv >> 24u, logLuv >> 16u) & 255u) * (1.0f / 255.0f);
212+
luv.floats.y = std::exp2(std::fma(logLuv & 65535u, (1.0f / 65535.0f) * 64.0f, -32.0f));
213+
luv.floats.z = luv.floats.y / uv.y; luv.floats.x = luv.floats.z * uv.x;
214+
return max(dot3x3(logLuvToRgbMat, luv), f32x4::zero);
215+
}
216+
105217
} // namespace math

include/math/matrix/transform.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ static f32x4x4 translate(f32x4 t) noexcept
6060
*/
6161
static f32x4x4 translate(const f32x4x4& m, f32x4 t) noexcept
6262
{
63-
return f32x4x4(m.c0, m.c1, m.c2, f32x4(m.c3 + multiply3x3(m, t), m.c3.getW()));
63+
return f32x4x4(m.c0, m.c1, m.c2, f32x4(m.c3 + dot3x3(m, t), m.c3.getW()));
6464
}
6565
/**
6666
* @brief Applies translation transformation to an object in 3D space. [r = translate(t) * m]
@@ -320,7 +320,7 @@ static quat lookAtQuat(f32x4 direction, f32x4 up = f32x4::top) noexcept
320320
static f32x4x4 inverseTransRot(const f32x4x4& m)
321321
{
322322
auto t = transpose4x4(m);
323-
setTranslation(t, -multiply3x3(t, getTranslation(m)));
323+
setTranslation(t, -dot3x3(t, getTranslation(m)));
324324
return t;
325325
}
326326

include/math/simd/matrix/float.hpp

Lines changed: 59 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) f32x4x4
187187
}
188188

189189
/**
190-
* @brief Calculates dot product between two SIMD matrices.
190+
* @brief Calculates 4x4 dot product between two SIMD matrices.
191191
* @param[in] m target SIMD matrix to dot by
192192
*/
193193
f32x4x4 operator*(const f32x4x4& m) const noexcept
@@ -214,12 +214,12 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) f32x4x4
214214
result[i].data = r;
215215
}
216216
#else
217-
result = f32x4x4((*((const float4x4*)this) * (*((const float4x4*)&m))));
217+
result = f32x4x4(*((const float4x4*)this) * (*((const float4x4*)&m)));
218218
#endif
219219
return result;
220220
}
221221
/**
222-
* @brief Calculates dot product between SIMD matrix and vector.
222+
* @brief Calculates 4x4 dot product between SIMD matrix and vector.
223223
* @param v target SIMD vector to dot by
224224
*/
225225
f32x4 operator*(f32x4 v) const noexcept
@@ -237,7 +237,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) f32x4x4
237237
r = vmlaq_f32(r, c3.data, vdupq_laneq_f32(v.data, 3));
238238
return r;
239239
#else
240-
return f32x4((*((const float4x4*)this) * (*((const float4*)&v))));
240+
return f32x4(*((const float4x4*)this) * (*((const float4*)&v)));
241241
#endif
242242
}
243243

@@ -306,12 +306,38 @@ static bool isBinaryLess(const f32x4x4& a, const f32x4x4& b) noexcept
306306
return memcmp(&a, &b, sizeof(f32x4x4)) < 0;
307307
}
308308

309+
/**
310+
* @brief Calculates 4x4 dot product between vector and SIMD matrix. (v * m)
311+
*
312+
* @param v target SIMD vector to use
313+
* @param[in] m target SIMD matrix to dot by
314+
*/
315+
static f32x4 operator*(f32x4 v, const f32x4x4& m) noexcept
316+
{
317+
#if defined(MATH_SIMD_SUPPORT_SSE) || defined(MATH_SIMD_SUPPORT_AVX2)
318+
auto r = _mm_mul_ps(m.c0.data, v.data);
319+
r = MATH_SIMD_FMA(m.c1.data, v.data, r);
320+
r = MATH_SIMD_FMA(m.c2.data, v.data, r);
321+
r = MATH_SIMD_FMA(m.c3.data, v.data, r);
322+
return r;
323+
#elif defined(MATH_SIMD_SUPPORT_NEON)
324+
auto r = vmulq_f32(m.c0.data, v.data);
325+
r = vmlaq_f32(r, m.c1.data, v.data);
326+
r = vmlaq_f32(r, m.c2.data, v.data);
327+
r = vmlaq_f32(r, m.c3.data, v.data);
328+
return r;
329+
#else
330+
return f32x4(*((const float4*)&v) * (*((const float4x4*)this)));
331+
#endif
332+
}
333+
309334
/**
310335
* @brief Calculates 3x3 dot product between two SIMD matrices.
336+
*
311337
* @param[in] a first SIMD matrix to use
312338
* @param[in] b second SIMD matrix to use
313339
*/
314-
static f32x4x4 multiply3x3(const f32x4x4& a, const f32x4x4& b) noexcept
340+
static f32x4x4 dot3x3(const f32x4x4& a, const f32x4x4& b) noexcept
315341
{
316342
f32x4x4 result;
317343
#if defined(MATH_SIMD_SUPPORT_SSE) || defined(MATH_SIMD_SUPPORT_AVX2)
@@ -338,28 +364,48 @@ static f32x4x4 multiply3x3(const f32x4x4& a, const f32x4x4& b) noexcept
338364
return result;
339365
}
340366
/**
341-
* @brief Calculates 3x3 dot product between SIMD matrix and vector.
367+
* @brief Calculates 3x3 dot product between SIMD matrix and vector. (m * v)
368+
*
342369
* @param[in] m target SIMD matrix to use
343370
* @param v target SIMD vector to dot by
344371
*/
345-
static f32x4 multiply3x3(const f32x4x4& m, f32x4 v) noexcept
372+
static f32x4 dot3x3(const f32x4x4& m, f32x4 v) noexcept
346373
{
347-
f32x4 result;
348374
#if defined(MATH_SIMD_SUPPORT_SSE) || defined(MATH_SIMD_SUPPORT_AVX2)
349375
auto r = _mm_mul_ps(m.c0.data, _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(0, 0, 0, 0)));
350376
r = MATH_SIMD_FMA(m.c1.data, _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(1, 1, 1, 1)), r);
351377
r = MATH_SIMD_FMA(m.c2.data, _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(2, 2, 2, 2)), r);
352-
result = r;
378+
return f32x4(r).swizzle<SwX, SwY, SwZ>();
353379
#elif defined(MATH_SIMD_SUPPORT_NEON)
354380
auto r = vmulq_f32(m.c0.data, vdupq_laneq_f32(v.data, 0));
355381
r = vmlaq_f32(r, m.c1.data, vdupq_laneq_f32(v.data, 1));
356382
r = vmlaq_f32(r, m.c2.data, vdupq_laneq_f32(v.data, 2));
357-
result = r;
383+
return f32x4(r).swizzle<SwX, SwY, SwZ>();
384+
#else
385+
return f32x4((float3)v * float3x3((float3)m.c0, (float3)m.c1, (float3)m.c2));
386+
#endif
387+
}
388+
/**
389+
* @brief Calculates 3x3 dot product between SIMD matrix and vector. (v * m)
390+
*
391+
* @param v target SIMD vector to use
392+
* @param[in] m target SIMD matrix to dot by
393+
*/
394+
static f32x4 dot3x3(f32x4 v, const f32x4x4& m) noexcept
395+
{
396+
#if defined(MATH_SIMD_SUPPORT_SSE) || defined(MATH_SIMD_SUPPORT_AVX2)
397+
auto r = _mm_mul_ps(m.c0.data, v.data);
398+
r = MATH_SIMD_FMA(m.c1.data, v.data, r);
399+
r = MATH_SIMD_FMA(m.c2.data, v.data, r);
400+
return f32x4(r).swizzle<SwX, SwY, SwZ>();
401+
#elif defined(MATH_SIMD_SUPPORT_NEON)
402+
auto r = vmulq_f32(m.c0.data, v.data);
403+
r = vmlaq_f32(r, m.c1.data, v.data);
404+
r = vmlaq_f32(r, m.c2.data, v.data);
405+
return f32x4(r).swizzle<SwX, SwY, SwZ>();
358406
#else
359-
auto t = float3x3((float3)m.c0, (float3)m.c1, (float3)m.c2) * (float3)v;
360-
result = f32x4(t.x, t.y, t.z, t.z);
407+
return f32x4((float3)v * float3x3((float3)m.c0, (float3)m.c1, (float3)m.c2));
361408
#endif
362-
return result.swizzle<SwX, SwY, SwZ>();
363409
}
364410

365411
/**

include/math/tone-mapping.hpp

Lines changed: 0 additions & 61 deletions
This file was deleted.

0 commit comments

Comments
 (0)