Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/passes/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ set(passes_SOURCES
RemoveUnusedModuleElements.cpp
RemoveUnusedTypes.cpp
ReorderFunctions.cpp
ReorderFunctionsBySimilarity.cpp
ReorderGlobals.cpp
ReorderLocals.cpp
ReorderTypes.cpp
Expand Down
182 changes: 182 additions & 0 deletions src/passes/ReorderFunctionsBySimilarity.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
/*
* Copyright 2026 WebAssembly Community Group participants
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

//
// Sorts functions by structural similarity. This groups mutually-compressible
// instruction sequences together, maximizing subsequent compression ratio
// (e.g., Gzip/Brotli).
//

#include <algorithm>
#include <memory>
#include <vector>

#include "ir/utils.h"
#include "pass.h"
#include "support/threads.h"
#include "wasm.h"

namespace wasm {

// Post-order traversal visitor to extract instruction sequence
struct OpcodeSequenceBuilder
: public PostWalker<OpcodeSequenceBuilder,
UnifiedExpressionVisitor<OpcodeSequenceBuilder>> {
std::vector<uint32_t> sequence;
const size_t max_len = 512;

void visitExpression(Expression* curr) {
if (sequence.size() >= max_len) {
return;
}
// Append the core expression ID
sequence.push_back(curr->_id);

// Capture important immediate type/operator information
// TODO: There's probably more data that would be useful to capture.
Comment on lines +48 to +49
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You could probably extract and reuse the HashStringifyWalker from Outlining.cpp. It turns expression trees into strings by shallowly hashing each expression, including all of its immediates. You would just want it to use a normal PostWalker (but probably modified to also call addUniqueSymbol at control flow boundaries, e.g. end and else) instead of the custom StringifyWalker it currently uses. Nothing a little extra templating can't solve!

if (auto* unary = curr->dynCast<Unary>()) {
sequence.push_back(unary->op);
} else if (auto* binary = curr->dynCast<Binary>()) {
sequence.push_back(binary->op);
} else if (auto* load = curr->dynCast<Load>()) {
sequence.push_back(load->bytes);
sequence.push_back(load->offset);
} else if (auto* store = curr->dynCast<Store>()) {
sequence.push_back(store->bytes);
sequence.push_back(store->offset);
} else if (auto* localGet = curr->dynCast<LocalGet>()) {
sequence.push_back(localGet->type.getID());
} else if (auto* localSet = curr->dynCast<LocalSet>()) {
sequence.push_back(localSet->type.getID());
} else if (auto* const_ = curr->dynCast<Const>()) {
sequence.push_back(const_->type.getID());
}
}
};

struct ReorderFunctionsBySimilarity : public Pass {
bool requiresNonNullableLocalFixups() override { return false; }

void run(Module* module) override {
// If the number of defined functions is small, similarity-based reordering
// does not help and can regress size due to breaking natural call
// proximity.
Comment on lines +75 to +76
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not call proximity, but LEB size, right?

size_t numDefined = 0;
for (const auto& func : module->functions) {
if (!func->imported()) {
numDefined++;
}
}
size_t minFunctions = 150;
auto arg = getArgumentOrDefault("reorder-functions-by-similarity", "");
if (!arg.empty()) {
minFunctions = std::stoul(arg);
}
if (numDefined < minFunctions) {
return;
}

// 1. Separate imported and defined functions
std::vector<std::unique_ptr<Function>> importedFuncs;
std::vector<std::unique_ptr<Function>> definedFuncs;

for (auto& func : module->functions) {
if (func->imported()) {
importedFuncs.push_back(std::move(func));
} else {
definedFuncs.push_back(std::move(func));
}
}

// 2. Build keys for defined functions in parallel
struct FunctionSortKey {
std::unique_ptr<Function> func;
std::string typeStr;
std::vector<std::string> varsStrs;
std::vector<uint32_t> opcodeSequence;
size_t originalIndex;

bool operator<(const FunctionSortKey& other) const {
if (typeStr != other.typeStr) {
return typeStr < other.typeStr;
}
if (varsStrs != other.varsStrs) {
return varsStrs < other.varsStrs;
}
if (opcodeSequence != other.opcodeSequence) {
return opcodeSequence < other.opcodeSequence;
}
return originalIndex < other.originalIndex;
}
};

size_t numThreads = ThreadPool::get()->size();
std::vector<std::function<ThreadWorkState()>> doWorkers;
Comment on lines +126 to +127
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we have any other passes that use ThreadPool directly. This is typically done by ParallelFunctionAnalysis or with a nested Pass for which isFunctionParallel() returns true.

std::atomic<size_t> nextFunction(0);
size_t numFunctions = definedFuncs.size();

std::vector<FunctionSortKey> keys(numFunctions);

for (size_t i = 0; i < numThreads; i++) {
doWorkers.push_back([&]() {
while (true) {
auto index = nextFunction.fetch_add(1);
if (index >= numFunctions) {
return ThreadWorkState::Finished;
}
auto& func = definedFuncs[index];

FunctionSortKey key;
key.typeStr = func->type.toString();

key.varsStrs.reserve(func->vars.size());
for (auto var : func->vars) {
key.varsStrs.push_back(var.toString());
}

OpcodeSequenceBuilder builder;
builder.walk(func->body);
key.opcodeSequence = std::move(builder.sequence);

key.originalIndex = index;
key.func = std::move(func);
keys[index] = std::move(key);
}
});
}
ThreadPool::get()->work(doWorkers);

// 3. Sort defined functions by the similarity heuristic
std::sort(keys.begin(), keys.end());
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorting only works when the similarities are at the beginning of the strings, right? It seems like looking for matching substrings would be more robust. You could check out what Outlining.cpp does with a suffix tree to find common substrings, for example.


// 4. Re-assemble module->functions vector
module->functions.clear();
module->functions.reserve(importedFuncs.size() + keys.size());

for (auto& func : importedFuncs) {
module->functions.push_back(std::move(func));
}
for (auto& key : keys) {
module->functions.push_back(std::move(key.func));
}
}
};

Pass* createReorderFunctionsBySimilarityPass() {
return new ReorderFunctionsBySimilarity();
}

} // namespace wasm
3 changes: 3 additions & 0 deletions src/passes/pass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,9 @@ void PassRegistry::registerPasses() {
registerPass("reorder-functions-by-name",
"sorts functions by name (useful for debugging)",
createReorderFunctionsByNamePass);
registerPass("reorder-functions-by-similarity",
"sorts functions by similarity to improve compression",
createReorderFunctionsBySimilarityPass);
registerPass("reorder-functions",
"sorts functions by access frequency",
createReorderFunctionsPass);
Expand Down
1 change: 1 addition & 0 deletions src/passes/passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ Pass* createRemoveUnusedNonFunctionModuleElementsPass();
Pass* createRemoveUnusedNamesPass();
Pass* createRemoveUnusedTypesPass();
Pass* createReorderFunctionsByNamePass();
Pass* createReorderFunctionsBySimilarityPass();
Pass* createReorderFunctionsPass();
Pass* createReorderGlobalsPass();
Pass* createReorderGlobalsAlwaysPass();
Expand Down
3 changes: 3 additions & 0 deletions test/lit/help/wasm-metadce.test
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,9 @@
;; CHECK-NEXT: --reorder-functions-by-name sorts functions by name (useful
;; CHECK-NEXT: for debugging)
;; CHECK-NEXT:
;; CHECK-NEXT: --reorder-functions-by-similarity sorts functions by similarity to
;; CHECK-NEXT: improve compression
;; CHECK-NEXT:
;; CHECK-NEXT: --reorder-globals sorts globals by access
;; CHECK-NEXT: frequency
;; CHECK-NEXT:
Expand Down
3 changes: 3 additions & 0 deletions test/lit/help/wasm-opt.test
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,9 @@
;; CHECK-NEXT: --reorder-functions-by-name sorts functions by name (useful
;; CHECK-NEXT: for debugging)
;; CHECK-NEXT:
;; CHECK-NEXT: --reorder-functions-by-similarity sorts functions by similarity to
;; CHECK-NEXT: improve compression
;; CHECK-NEXT:
;; CHECK-NEXT: --reorder-globals sorts globals by access
;; CHECK-NEXT: frequency
;; CHECK-NEXT:
Expand Down
3 changes: 3 additions & 0 deletions test/lit/help/wasm2js.test
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,9 @@
;; CHECK-NEXT: --reorder-functions-by-name sorts functions by name (useful
;; CHECK-NEXT: for debugging)
;; CHECK-NEXT:
;; CHECK-NEXT: --reorder-functions-by-similarity sorts functions by similarity to
;; CHECK-NEXT: improve compression
;; CHECK-NEXT:
;; CHECK-NEXT: --reorder-globals sorts globals by access
;; CHECK-NEXT: frequency
;; CHECK-NEXT:
Expand Down
87 changes: 87 additions & 0 deletions test/lit/passes/reorder-functions-by-similarity.wast
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
;; `reorder-functions-by-similarity=0` disables the size threshold, forcing the compiler to reorder functions.
;; RUN: foreach %s %t wasm-opt -all --reorder-functions-by-similarity=0 -S -o - | filecheck %s

(module
;; CHECK: (type $0 (func (result i32)))
;; CHECK-NEXT: (type $1 (func (param i32) (result i32)))

;; CHECK: (func $sig_b (type $1) (param $0 i32) (result i32)
;; CHECK-NEXT: (i32.const 100)
;; CHECK-NEXT: )

;; CHECK: (func $sig_c (type $1) (param $0 i32) (result i32)
;; CHECK-NEXT: (i32.const 200)
;; CHECK-NEXT: )

;; CHECK: (func $body_add_2 (type $0) (result i32)
;; CHECK-NEXT: (i32.add
;; CHECK-NEXT: (i32.const 10)
;; CHECK-NEXT: (i32.const 20)
;; CHECK-NEXT: )
;; CHECK-NEXT: )

;; CHECK: (func $body_add_1 (type $0) (result i32)
;; CHECK-NEXT: (i32.add
;; CHECK-NEXT: (i32.const 1)
;; CHECK-NEXT: (i32.const 2)
;; CHECK-NEXT: )
;; CHECK-NEXT: )

;; CHECK: (func $body_sub (type $0) (result i32)
;; CHECK-NEXT: (i32.sub
;; CHECK-NEXT: (i32.const 1)
;; CHECK-NEXT: (i32.const 2)
;; CHECK-NEXT: )
;; CHECK-NEXT: )

;; CHECK: (func $locals_a (type $0) (result i32)
;; CHECK-NEXT: (local $0 i32)
;; CHECK-NEXT: (local $1 f64)
;; CHECK-NEXT: (i32.const 5)
;; CHECK-NEXT: )

;; CHECK: (func $locals_b (type $0) (result i32)
;; CHECK-NEXT: (local $0 i32)
;; CHECK-NEXT: (local $1 f64)
;; CHECK-NEXT: (i32.const 10)
;; CHECK-NEXT: )

;; Functions in mixed order:

;; Signature A
(func $body_sub (result i32)
(i32.sub (i32.const 1) (i32.const 2))
)

;; Signature B: (param i32) (result i32)
(func $sig_b (param i32) (result i32)
(i32.const 100)
)

;; Signature A, same body shape as $body_add_1
(func $body_add_2 (result i32)
(i32.add (i32.const 10) (i32.const 20))
)

;; Signature A, has local variables (i32 f64)
(func $locals_a (result i32)
(local i32 f64)
(i32.const 5)
)

;; Signature A, same body shape as $body_add_2
(func $body_add_1 (result i32)
(i32.add (i32.const 1) (i32.const 2))
)

;; Signature A, has local variables (i32 f64), same as $locals_a
(func $locals_b (result i32)
(local i32 f64)
(i32.const 10)
)

;; Signature B: (param i32) (result i32), same as $sig_b
(func $sig_c (param i32) (result i32)
(i32.const 200)
)
)
Loading