libSoftFloat/fp32-to-fp64-conversion.shader_test at master · Hopetech/libSoftFloat · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
# Conversion from float to double
# IEEE 754 compliant

[require]
GLSL >= 1.30

[vertex shader]
#version 130

void main()
{
    gl_Position = gl_Vertex;
}

[fragment shader]
#version 130

/* Packs the sign `zSign', the exponent `zExp', and the significand formed by
 * the concatenation of `zFrac0' and `zFrac1' into a double-precision floating-
 * point value, returning the result.  After being shifted into the proper
 * positions, the three fields `zSign', `zExp', and `zFrac0' are simply added
 * together to form the most significant 32 bits of the result.  This means
 * that any integer portion of `zFrac0' will be added into the exponent.  Since
 * a properly normalized significand will have an integer portion equal to 1,
 * the `zExp' input should be 1 less than the desired result exponent whenever
 * `zFrac0' and `zFrac1' concatenated form a complete, normalized significand.
 */
uvec2
packFloat64( uint zSign, uint zExp, uint zFrac0, uint zFrac1 )
{
    uvec2 z;

    z.x = ( zSign<<31 ) + ( zExp<<20 ) + zFrac0;
    z.y = zFrac1;
    return z;
}

/* Returns the number of leading 0 bits before the most-significant 1 bit of
 * `a'.  If `a' is zero, 32 is returned.
 */
uint
countLeadingZeros32( uint a )
{
    if ( a == 0u )
        return 32u;

    uint shiftCount = 0u;
    if ( ( a & 0xFFFF0000u ) == 0u ) { shiftCount += 16u; a <<= 16; }
    if ( ( a & 0xFF000000u ) == 0u ) { shiftCount += 8u; a <<= 8; }
    if ( ( a & 0xF0000000u ) == 0u ) { shiftCount += 4u; a <<= 4; }
    if ( ( a & 0xC0000000u ) == 0u ) { shiftCount += 2u; a <<= 2; }
    if ( ( a & 0x80000000u ) == 0u ) { shiftCount += 1u; }
    return shiftCount;
}

/* Normalizes the subnormal single-precision floating-point value represented
 * by the denormalized significand `aFrac'.  The normalized exponent and
 * significand are stored at the locations pointed to by `zExpPtr' and
 * `zFracPtr', respectively.
 */
void
normalizeFloat32Subnormal( uint aFrac,
                           inout uint zExpPtr,
                           inout uint zFracPtr )
{
    uint shiftCount;

    shiftCount = countLeadingZeros32( aFrac ) - 8u;
    zFracPtr = aFrac<<shiftCount;
    zExpPtr = 1u - shiftCount;
}

/* Returns the fraction bits of the single-precision floating-point value `a'.*/
uint
extractFloat32Frac( uint a )
{
    return a & 0x007FFFFFu;
}

/* Returns the exponent bits of the single-precision floating-point value `a'.*/
uint
extractFloat32Exp( uint a )
{
    return (a>>23) & 0xFFu;
}

/* Returns the sign bit of the single-precision floating-point value `a'.*/
uint
extractFloat32Sign( uint a )
{
    return a>>31;
}

/* Returns the result of converting the single-precision floating-point value
 * `a' to the double-precision floating-point format.
 */
uvec2
fp32_to_fp64( uint a )
{
    uint aFrac;
    uint aExp;
    uint aSign;

    aFrac = extractFloat32Frac( a );
    aExp = extractFloat32Exp( a );
    aSign = extractFloat32Sign( a );

    if ( aExp == 0xFFu ) {
        if ( aFrac != 0u ) {
            /* NaN */
            return uvec2(
                ( ( aSign<<31 ) | 0x7FF00000u | ( aFrac>>3 ) ),
                ( aFrac<<29 )
            );
        }
        /* Inf */
        return packFloat64( aSign, 0x7FFu, 0u, 0u );
    }

    if ( aExp == 0u ) {
        if ( aFrac != 0u ) {
            /* Denormals */
            normalizeFloat32Subnormal( aFrac, aExp, aFrac );
            --aExp;
        }
    /* Zero */
    return packFloat64( aSign, 0u, 0u, 0u );
    }

    return packFloat64( aSign, aExp + 0x380u, aFrac>>3, aFrac<<29 );
}

uniform uint a;
uniform uvec2 expected;

void main()
{
    /* Generate green if the expected value is producted, red
     * otherwise.
     */
    gl_FragColor = fp32_to_fp64(a) == expected
        ? vec4(0.0, 1.0, 0.0, 1.0)
        : vec4(1.0, 0.0, 0.0, 1.0);
}

[test]
# A bunch of tests to run.  The 'uniform' lines set the uniforms.  The
# 'draw rect' line draws a rectangle that covers the whole window.
# The 'probe all' line verifies that every pixel contains the expected
# color.

# Try +0.0
uniform uint a         0x00000000
uniform uvec2 expected 0x00000000 0x00000000
draw rect -1 -1 2 2
probe all rgba 0.0 1.0 0.0 1.0

# Try -0.0
uniform uint a         0x80000000
uniform uvec2 expected 0x80000000 0x00000000
draw rect -1 -1 2 2
probe all rgba 0.0 1.0 0.0 1.0

# Try +Inf
uniform uint a         0x7F800000
uniform uvec2 expected 0x7FF00000 0x00000000
draw rect -1 -1 2 2
probe all rgba 0.0 1.0 0.0 1.0

# Try -Inf
uniform uint a         0xFF800000
uniform uvec2 expected 0xFFF00000 0x00000000
draw rect -1 -1 2 2
probe all rgba 0.0 1.0 0.0 1.0

# Denormal
uniform uint a         0x00000020
uniform uvec2 expected 0x00000000 0x00000000
draw rect -1 -1 2 2
probe all rgba 0.0 1.0 0.0 1.0

# Try 50
uniform uint a         0x42480000
uniform uvec2 expected 0x40490000 0x00000000
draw rect -1 -1 2 2
probe all rgba 0.0 1.0 0.0 1.0