Skip to content

Commit 04ed6bc

Browse files
committed
Update to lates versions from optimization branch
1 parent ae86c2d commit 04ed6bc

1 file changed

Lines changed: 38 additions & 24 deletions

File tree

src/encode.c

Lines changed: 38 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,8 @@
55
#include "common.h"
66

77

8-
struct RGB {
9-
float r;
10-
float g;
11-
float b;
12-
};
13-
148
static void multiplyBasisFunction(
15-
struct RGB *factors, int factorsCount, int width, int height, uint8_t *rgb, size_t bytesPerRow,
9+
float factors[][4], int factorsCount, int width, int height, uint8_t *rgb, size_t bytesPerRow,
1610
float *cosX, float *cosY);
1711
static char *encode_int(int value, int length, char *destination);
1812

@@ -31,12 +25,15 @@ static void init_sRGBToLinear_cache() {
3125
}
3226
}
3327

34-
const char *blurHashForPixels(int xComponents, int yComponents, int width, int height, uint8_t *rgb, size_t bytesPerRow, char *destination) {
28+
const char *blurHashForPixels(int xComponents, int yComponents, int width, int height, uint8_t *rgb, size_t bytesPerRow) {
29+
static char buffer[2 + 4 + (9 * 9 - 1) * 2 + 1];
30+
3531
if(xComponents < 1 || xComponents > 9) return NULL;
3632
if(yComponents < 1 || yComponents > 9) return NULL;
3733

38-
struct RGB factors[9 * 9] = {0};
34+
float factors[yComponents * xComponents][4];
3935
int factorsCount = xComponents * yComponents;
36+
memset(factors, 0, sizeof(factors));
4037

4138
init_sRGBToLinear_cache();
4239

@@ -67,18 +64,18 @@ const char *blurHashForPixels(int xComponents, int yComponents, int width, int h
6764
free(cosX);
6865
free(cosY);
6966

70-
float *dc = (float *)factors;
71-
float *ac = dc + 3;
67+
float *dc = factors[0];
68+
float *ac = dc + 4;
7269
int acCount = factorsCount - 1;
73-
char *ptr = destination;
70+
char *ptr = buffer;
7471

7572
int sizeFlag = (xComponents - 1) + (yComponents - 1) * 9;
7673
ptr = encode_int(sizeFlag, 1, ptr);
7774

7875
float maximumValue;
7976
if(acCount > 0) {
8077
float actualMaximumValue = 0;
81-
for(int i = 0; i < acCount * 3; i++) {
78+
for(int i = 0; i < acCount * 4; i++) {
8279
actualMaximumValue = fmaxf(fabsf(ac[i]), actualMaximumValue);
8380
}
8481

@@ -93,42 +90,59 @@ const char *blurHashForPixels(int xComponents, int yComponents, int width, int h
9390
ptr = encode_int(encodeDC(dc[0], dc[1], dc[2]), 4, ptr);
9491

9592
for(int i = 0; i < acCount; i++) {
96-
ptr = encode_int(encodeAC(ac[i * 3 + 0], ac[i * 3 + 1], ac[i * 3 + 2], maximumValue), 2, ptr);
93+
ptr = encode_int(encodeAC(ac[i * 4 + 0], ac[i * 4 + 1], ac[i * 4 + 2], maximumValue), 2, ptr);
9794
}
9895

9996
*ptr = 0;
10097

101-
return destination;
98+
return buffer;
10299
}
103100

104101
static void multiplyBasisFunction(
105-
struct RGB *factors, int factorsCount, int width, int height, uint8_t *rgb, size_t bytesPerRow,
102+
float factors[][4], int factorsCount, int width, int height, uint8_t *rgb, size_t bytesPerRow,
106103
float *cosX, float *cosY
107104
) {
108105
for(int y = 0; y < height; y++) {
109106
uint8_t *src = rgb + y * bytesPerRow;
110107
float *cosYLocal = cosY + y * factorsCount;
111-
for(int x = 0; x < width; x++) {
112-
float pixel[3];
108+
int x = 0;
109+
for(; x < width - 3; x += 4) {
110+
float *cosXLocal = cosX + x * factorsCount;
111+
float pixel0[4] = {sRGBToLinear_cache[src[3 * (x+0) + 0]], sRGBToLinear_cache[src[3 * (x+0) + 1]], sRGBToLinear_cache[src[3 * (x+0) + 2]]};
112+
float pixel1[4] = {sRGBToLinear_cache[src[3 * (x+1) + 0]], sRGBToLinear_cache[src[3 * (x+1) + 1]], sRGBToLinear_cache[src[3 * (x+1) + 2]]};
113+
float pixel2[4] = {sRGBToLinear_cache[src[3 * (x+2) + 0]], sRGBToLinear_cache[src[3 * (x+2) + 1]], sRGBToLinear_cache[src[3 * (x+2) + 2]]};
114+
float pixel3[4] = {sRGBToLinear_cache[src[3 * (x+3) + 0]], sRGBToLinear_cache[src[3 * (x+3) + 1]], sRGBToLinear_cache[src[3 * (x+3) + 2]]};
115+
for (int i = 0; i < factorsCount; i++) {
116+
float basis0 = cosYLocal[i] * cosXLocal[i + 0 * factorsCount];
117+
float basis1 = cosYLocal[i] * cosXLocal[i + 1 * factorsCount];
118+
float basis2 = cosYLocal[i] * cosXLocal[i + 2 * factorsCount];
119+
float basis3 = cosYLocal[i] * cosXLocal[i + 3 * factorsCount];
120+
factors[i][0] += basis0 * pixel0[0] + basis1 * pixel1[0] + basis2 * pixel2[0] + basis3 * pixel3[0];
121+
factors[i][1] += basis0 * pixel0[1] + basis1 * pixel1[1] + basis2 * pixel2[1] + basis3 * pixel3[1];
122+
factors[i][2] += basis0 * pixel0[2] + basis1 * pixel1[2] + basis2 * pixel2[2] + basis3 * pixel3[2];
123+
}
124+
}
125+
for(; x < width; x++) {
126+
float pixel[4];
113127
float *cosXLocal = cosX + x * factorsCount;
114128
pixel[0] = sRGBToLinear_cache[src[3 * x + 0]];
115129
pixel[1] = sRGBToLinear_cache[src[3 * x + 1]];
116130
pixel[2] = sRGBToLinear_cache[src[3 * x + 2]];
117131
for (int i = 0; i < factorsCount; i++) {
118132
float basis = cosYLocal[i] * cosXLocal[i];
119-
factors[i].r += basis * pixel[0];
120-
factors[i].g += basis * pixel[1];
121-
factors[i].b += basis * pixel[2];
133+
factors[i][0] += basis * pixel[0];
134+
factors[i][1] += basis * pixel[1];
135+
factors[i][2] += basis * pixel[2];
122136
}
123137
}
124138
}
125139

126140
for (int i = 0; i < factorsCount; i++) {
127141
float normalisation = (i == 0) ? 1 : 2;
128142
float scale = normalisation / (width * height);
129-
factors[i].r *= scale;
130-
factors[i].g *= scale;
131-
factors[i].b *= scale;
143+
factors[i][0] *= scale;
144+
factors[i][1] *= scale;
145+
factors[i][2] *= scale;
132146
}
133147
}
134148

0 commit comments

Comments
 (0)