-
Notifications
You must be signed in to change notification settings - Fork 858
Add compression-oriented function reordering pass #8696
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,182 @@ | ||
| /* | ||
| * Copyright 2026 WebAssembly Community Group participants | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| // | ||
| // Sorts functions by structural similarity. This groups mutually-compressible | ||
| // instruction sequences together, maximizing subsequent compression ratio | ||
| // (e.g., Gzip/Brotli). | ||
| // | ||
|
|
||
| #include <algorithm> | ||
| #include <memory> | ||
| #include <vector> | ||
|
|
||
| #include "ir/utils.h" | ||
| #include "pass.h" | ||
| #include "support/threads.h" | ||
| #include "wasm.h" | ||
|
|
||
| namespace wasm { | ||
|
|
||
| // Post-order traversal visitor to extract instruction sequence | ||
| struct OpcodeSequenceBuilder | ||
| : public PostWalker<OpcodeSequenceBuilder, | ||
| UnifiedExpressionVisitor<OpcodeSequenceBuilder>> { | ||
| std::vector<uint32_t> sequence; | ||
| const size_t max_len = 512; | ||
|
|
||
| void visitExpression(Expression* curr) { | ||
| if (sequence.size() >= max_len) { | ||
| return; | ||
| } | ||
| // Append the core expression ID | ||
| sequence.push_back(curr->_id); | ||
|
|
||
| // Capture important immediate type/operator information | ||
| // TODO: There's probably more data that would be useful to capture. | ||
| if (auto* unary = curr->dynCast<Unary>()) { | ||
| sequence.push_back(unary->op); | ||
| } else if (auto* binary = curr->dynCast<Binary>()) { | ||
| sequence.push_back(binary->op); | ||
| } else if (auto* load = curr->dynCast<Load>()) { | ||
| sequence.push_back(load->bytes); | ||
| sequence.push_back(load->offset); | ||
| } else if (auto* store = curr->dynCast<Store>()) { | ||
| sequence.push_back(store->bytes); | ||
| sequence.push_back(store->offset); | ||
| } else if (auto* localGet = curr->dynCast<LocalGet>()) { | ||
| sequence.push_back(localGet->type.getID()); | ||
| } else if (auto* localSet = curr->dynCast<LocalSet>()) { | ||
| sequence.push_back(localSet->type.getID()); | ||
| } else if (auto* const_ = curr->dynCast<Const>()) { | ||
| sequence.push_back(const_->type.getID()); | ||
| } | ||
| } | ||
| }; | ||
|
|
||
| struct ReorderFunctionsBySimilarity : public Pass { | ||
| bool requiresNonNullableLocalFixups() override { return false; } | ||
|
|
||
| void run(Module* module) override { | ||
| // If the number of defined functions is small, similarity-based reordering | ||
| // does not help and can regress size due to breaking natural call | ||
| // proximity. | ||
|
Comment on lines
+75
to
+76
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not call proximity, but LEB size, right? |
||
| size_t numDefined = 0; | ||
| for (const auto& func : module->functions) { | ||
| if (!func->imported()) { | ||
| numDefined++; | ||
| } | ||
| } | ||
| size_t minFunctions = 150; | ||
| auto arg = getArgumentOrDefault("reorder-functions-by-similarity", ""); | ||
| if (!arg.empty()) { | ||
| minFunctions = std::stoul(arg); | ||
| } | ||
| if (numDefined < minFunctions) { | ||
| return; | ||
| } | ||
|
|
||
| // 1. Separate imported and defined functions | ||
| std::vector<std::unique_ptr<Function>> importedFuncs; | ||
| std::vector<std::unique_ptr<Function>> definedFuncs; | ||
|
|
||
| for (auto& func : module->functions) { | ||
| if (func->imported()) { | ||
| importedFuncs.push_back(std::move(func)); | ||
| } else { | ||
| definedFuncs.push_back(std::move(func)); | ||
| } | ||
| } | ||
|
|
||
| // 2. Build keys for defined functions in parallel | ||
| struct FunctionSortKey { | ||
| std::unique_ptr<Function> func; | ||
| std::string typeStr; | ||
| std::vector<std::string> varsStrs; | ||
| std::vector<uint32_t> opcodeSequence; | ||
| size_t originalIndex; | ||
|
|
||
| bool operator<(const FunctionSortKey& other) const { | ||
| if (typeStr != other.typeStr) { | ||
| return typeStr < other.typeStr; | ||
| } | ||
| if (varsStrs != other.varsStrs) { | ||
| return varsStrs < other.varsStrs; | ||
| } | ||
| if (opcodeSequence != other.opcodeSequence) { | ||
| return opcodeSequence < other.opcodeSequence; | ||
| } | ||
| return originalIndex < other.originalIndex; | ||
| } | ||
| }; | ||
|
|
||
| size_t numThreads = ThreadPool::get()->size(); | ||
| std::vector<std::function<ThreadWorkState()>> doWorkers; | ||
|
Comment on lines
+126
to
+127
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think we have any other passes that use |
||
| std::atomic<size_t> nextFunction(0); | ||
| size_t numFunctions = definedFuncs.size(); | ||
|
|
||
| std::vector<FunctionSortKey> keys(numFunctions); | ||
|
|
||
| for (size_t i = 0; i < numThreads; i++) { | ||
| doWorkers.push_back([&]() { | ||
| while (true) { | ||
| auto index = nextFunction.fetch_add(1); | ||
| if (index >= numFunctions) { | ||
| return ThreadWorkState::Finished; | ||
| } | ||
| auto& func = definedFuncs[index]; | ||
|
|
||
| FunctionSortKey key; | ||
| key.typeStr = func->type.toString(); | ||
|
|
||
| key.varsStrs.reserve(func->vars.size()); | ||
| for (auto var : func->vars) { | ||
| key.varsStrs.push_back(var.toString()); | ||
| } | ||
|
|
||
| OpcodeSequenceBuilder builder; | ||
| builder.walk(func->body); | ||
| key.opcodeSequence = std::move(builder.sequence); | ||
|
|
||
| key.originalIndex = index; | ||
| key.func = std::move(func); | ||
| keys[index] = std::move(key); | ||
| } | ||
| }); | ||
| } | ||
| ThreadPool::get()->work(doWorkers); | ||
|
|
||
| // 3. Sort defined functions by the similarity heuristic | ||
| std::sort(keys.begin(), keys.end()); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorting only works when the similarities are at the beginning of the strings, right? It seems like looking for matching substrings would be more robust. You could check out what Outlining.cpp does with a suffix tree to find common substrings, for example. |
||
|
|
||
| // 4. Re-assemble module->functions vector | ||
| module->functions.clear(); | ||
| module->functions.reserve(importedFuncs.size() + keys.size()); | ||
|
|
||
| for (auto& func : importedFuncs) { | ||
| module->functions.push_back(std::move(func)); | ||
| } | ||
| for (auto& key : keys) { | ||
| module->functions.push_back(std::move(key.func)); | ||
| } | ||
| } | ||
| }; | ||
|
|
||
| Pass* createReorderFunctionsBySimilarityPass() { | ||
| return new ReorderFunctionsBySimilarity(); | ||
| } | ||
|
|
||
| } // namespace wasm | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,87 @@ | ||
| ;; `reorder-functions-by-similarity=0` disables the size threshold, forcing the compiler to reorder functions. | ||
| ;; RUN: foreach %s %t wasm-opt -all --reorder-functions-by-similarity=0 -S -o - | filecheck %s | ||
|
|
||
| (module | ||
| ;; CHECK: (type $0 (func (result i32))) | ||
| ;; CHECK-NEXT: (type $1 (func (param i32) (result i32))) | ||
|
|
||
| ;; CHECK: (func $sig_b (type $1) (param $0 i32) (result i32) | ||
| ;; CHECK-NEXT: (i32.const 100) | ||
| ;; CHECK-NEXT: ) | ||
|
|
||
| ;; CHECK: (func $sig_c (type $1) (param $0 i32) (result i32) | ||
| ;; CHECK-NEXT: (i32.const 200) | ||
| ;; CHECK-NEXT: ) | ||
|
|
||
| ;; CHECK: (func $body_add_2 (type $0) (result i32) | ||
| ;; CHECK-NEXT: (i32.add | ||
| ;; CHECK-NEXT: (i32.const 10) | ||
| ;; CHECK-NEXT: (i32.const 20) | ||
| ;; CHECK-NEXT: ) | ||
| ;; CHECK-NEXT: ) | ||
|
|
||
| ;; CHECK: (func $body_add_1 (type $0) (result i32) | ||
| ;; CHECK-NEXT: (i32.add | ||
| ;; CHECK-NEXT: (i32.const 1) | ||
| ;; CHECK-NEXT: (i32.const 2) | ||
| ;; CHECK-NEXT: ) | ||
| ;; CHECK-NEXT: ) | ||
|
|
||
| ;; CHECK: (func $body_sub (type $0) (result i32) | ||
| ;; CHECK-NEXT: (i32.sub | ||
| ;; CHECK-NEXT: (i32.const 1) | ||
| ;; CHECK-NEXT: (i32.const 2) | ||
| ;; CHECK-NEXT: ) | ||
| ;; CHECK-NEXT: ) | ||
|
|
||
| ;; CHECK: (func $locals_a (type $0) (result i32) | ||
| ;; CHECK-NEXT: (local $0 i32) | ||
| ;; CHECK-NEXT: (local $1 f64) | ||
| ;; CHECK-NEXT: (i32.const 5) | ||
| ;; CHECK-NEXT: ) | ||
|
|
||
| ;; CHECK: (func $locals_b (type $0) (result i32) | ||
| ;; CHECK-NEXT: (local $0 i32) | ||
| ;; CHECK-NEXT: (local $1 f64) | ||
| ;; CHECK-NEXT: (i32.const 10) | ||
| ;; CHECK-NEXT: ) | ||
|
|
||
| ;; Functions in mixed order: | ||
|
|
||
| ;; Signature A | ||
| (func $body_sub (result i32) | ||
| (i32.sub (i32.const 1) (i32.const 2)) | ||
| ) | ||
|
|
||
| ;; Signature B: (param i32) (result i32) | ||
| (func $sig_b (param i32) (result i32) | ||
| (i32.const 100) | ||
| ) | ||
|
|
||
| ;; Signature A, same body shape as $body_add_1 | ||
| (func $body_add_2 (result i32) | ||
| (i32.add (i32.const 10) (i32.const 20)) | ||
| ) | ||
|
|
||
| ;; Signature A, has local variables (i32 f64) | ||
| (func $locals_a (result i32) | ||
| (local i32 f64) | ||
| (i32.const 5) | ||
| ) | ||
|
|
||
| ;; Signature A, same body shape as $body_add_2 | ||
| (func $body_add_1 (result i32) | ||
| (i32.add (i32.const 1) (i32.const 2)) | ||
| ) | ||
|
|
||
| ;; Signature A, has local variables (i32 f64), same as $locals_a | ||
| (func $locals_b (result i32) | ||
| (local i32 f64) | ||
| (i32.const 10) | ||
| ) | ||
|
|
||
| ;; Signature B: (param i32) (result i32), same as $sig_b | ||
| (func $sig_c (param i32) (result i32) | ||
| (i32.const 200) | ||
| ) | ||
| ) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You could probably extract and reuse the
HashStringifyWalkerfrom Outlining.cpp. It turns expression trees into strings by shallowly hashing each expression, including all of its immediates. You would just want it to use a normalPostWalker(but probably modified to also calladdUniqueSymbolat control flow boundaries, e.g.endandelse) instead of the customStringifyWalkerit currently uses. Nothing a little extra templating can't solve!