Skip to content

Commit ed7127e

Browse files
committed
RS-FNT: simd indices as member variables
1 parent 968bc75 commit ed7127e

2 files changed

Lines changed: 17 additions & 20 deletions

File tree

src/fec_rs_fnt.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,11 @@ class RsFnt : public FecCode<T> {
6060
// decoding context used in encoding of systematic FNT
6161
std::unique_ptr<DecodeContext<T>> enc_context;
6262

63+
// Indices used for accelerated functions
64+
size_t simd_vec_len;
65+
size_t simd_trailing_len;
66+
size_t simd_offset;
67+
6368
public:
6469
RsFnt(
6570
FecType type,
@@ -70,6 +75,12 @@ class RsFnt : public FecCode<T> {
7075
: FecCode<T>(type, word_size, n_data, n_parities, pkt_size)
7176
{
7277
this->fec_init();
78+
79+
// Indices used for accelerated functions
80+
const unsigned ratio = simd::countof<T>();
81+
simd_vec_len = this->pkt_size / ratio;
82+
simd_trailing_len = this->pkt_size - simd_vec_len * ratio;
83+
simd_offset = simd_vec_len * ratio;
7384
}
7485

7586
inline void check_params() override

src/fec_vectorisation.cpp

Lines changed: 6 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -53,20 +53,13 @@ void RsFnt<uint16_t>::encode_post_process(
5353
uint16_t threshold = this->gf->card_minus_one();
5454
unsigned code_len = this->n_outputs;
5555

56-
// number of elements per vector register
57-
unsigned vec_size = simd::countof<uint16_t>();
58-
// number of vector registers per fragment packet
59-
size_t vecs_nb = size / vec_size;
60-
// odd number of elements not vectorized
61-
size_t last_len = size - vecs_nb * vec_size;
62-
6356
simd::encode_post_process(
64-
output, props, offset, code_len, threshold, vecs_nb);
57+
output, props, offset, code_len, threshold, simd_vec_len);
6558

66-
if (last_len > 0) {
59+
if (simd_trailing_len > 0) {
6760
for (unsigned i = 0; i < code_len; ++i) {
6861
uint16_t* chunk = output.get(i);
69-
for (size_t j = vecs_nb * vec_size; j < size; ++j) {
62+
for (size_t j = simd_offset; j < size; ++j) {
7063
if (chunk[j] == threshold) {
7164
props[i].add(offset + j, OOR_MARK);
7265
}
@@ -85,20 +78,13 @@ void RsFnt<uint32_t>::encode_post_process(
8578
const uint32_t threshold = this->gf->card_minus_one();
8679
const unsigned code_len = this->n_outputs;
8780

88-
// number of elements per vector register
89-
const unsigned vec_size = simd::countof<uint32_t>();
90-
// number of vector registers per fragment packet
91-
const size_t vecs_nb = size / vec_size;
92-
// odd number of elements not vectorized
93-
const size_t last_len = size - vecs_nb * vec_size;
94-
9581
simd::encode_post_process(
96-
output, props, offset, code_len, threshold, vecs_nb);
82+
output, props, offset, code_len, threshold, simd_vec_len);
9783

98-
if (last_len > 0) {
84+
if (simd_trailing_len > 0) {
9985
for (unsigned i = 0; i < code_len; ++i) {
10086
uint32_t* chunk = output.get(i);
101-
for (size_t j = vecs_nb * vec_size; j < size; ++j) {
87+
for (size_t j = simd_offset; j < size; ++j) {
10288
if (chunk[j] == threshold) {
10389
props[i].add(offset + j, OOR_MARK);
10490
}

0 commit comments

Comments
 (0)