Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 16 additions & 10 deletions src/CodeGen_Hexagon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1158,24 +1158,28 @@ Value *CodeGen_Hexagon::shuffle_vectors(Value *a, Value *b,
llvm::Type *result_ty = get_vector_type(element_ty, result_elements);

// Try to rewrite shuffles that only access the elements of b.
int min = indices[0];
for (size_t i = 1; i < indices.size(); i++) {
if (indices[i] != -1 && indices[i] < min) {
min = indices[i];
int min = INT_MAX;
int max = -1;
for (int idx : indices) {
if (idx != -1) {
min = std::min(min, idx);
max = std::max(max, idx);
}
}
Comment on lines +1161 to 1168
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is exactly a "premature optimization is the root of all evil" situation.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do miss Haskell sometimes... minimum (filter (/= -1) indices)

if (min == INT_MAX) {
return llvm::PoisonValue::get(result_ty);
}
if (min >= a_elements) {
vector<int> shifted_indices(indices);
for (int &i : shifted_indices) {
if (i != -1) {
i -= a_elements;
}
}
return shuffle_vectors(b, shifted_indices);
return shuffle_vectors(b, b, shifted_indices);
}

// Try to rewrite shuffles that only access the elements of a.
int max = *std::max_element(indices.begin(), indices.end());
if (max < a_elements) {
BitCastInst *a_cast = dyn_cast<BitCastInst>(a);
CallInst *a_call = dyn_cast<CallInst>(a_cast ? a_cast->getOperand(0) : a);
Expand Down Expand Up @@ -1683,7 +1687,7 @@ Value *CodeGen_Hexagon::vlut(Value *lut, Value *idx, int min_index, int max_inde
// contains the result of each range, and a condition vector
// indicating whether the result should be used.
vector<std::pair<Value *, Value *>> ranges;
for (int min_index_i = 0; min_index_i < max_index; min_index_i += 256) {
for (int min_index_i = 0; min_index_i <= max_index; min_index_i += 256) {
// Make a vector of the indices shifted such that the min of
// this range is at 0. Use 16-bit indices for this.
Value *min_index_i_val = create_vector(i16x_t, min_index_i);
Expand All @@ -1697,9 +1701,11 @@ Value *CodeGen_Hexagon::vlut(Value *lut, Value *idx, int min_index, int max_inde
// truncate to 8 bits, as vlut requires.
indices = call_intrin(i8x_t, "halide.hexagon.pack.vh", {indices});

int range_extent_i = std::min(max_index - min_index_i, 255);
Value *range_i = vlut256(slice_vector(lut, min_index_i, range_extent_i),
indices, 0, range_extent_i);
int local_max_index = std::min(max_index - min_index_i, 255);
int slice_size = local_max_index + 1;

Value *range_i = vlut256(slice_vector(lut, min_index_i, slice_size),
indices, 0, local_max_index);
ranges.emplace_back(range_i, use_index);
}

Expand Down
Loading