diff --git a/src/ir/module-splitting.cpp b/src/ir/module-splitting.cpp index f5825a93bf0..21d82d013a4 100644 --- a/src/ir/module-splitting.cpp +++ b/src/ir/module-splitting.cpp @@ -558,310 +558,6 @@ static void walkSegments(Walker& walker, Module* module) { } } -void ModuleSplitter::indirectReferencesToSecondaryFunctions() { - // Turn references to secondary functions into references to thunks that - // perform a direct call to the original referent. The direct calls in the - // thunks will be handled like all other cross-module calls later, in - // |indirectCallsToSecondaryFunctions|. - struct Gatherer : public PostWalker { - ModuleSplitter& parent; - - Gatherer(ModuleSplitter& parent) : parent(parent) {} - - // Collect RefFuncs in a map from the function name to all RefFuncs that - // refer to it. We only collect this for secondary funcs. - InsertOrderedMap> map; - - void visitRefFunc(RefFunc* curr) { - Module* currModule = getModule(); - // Add ref.func to the map when - // 1. ref.func's target func is in one of the secondary modules and - // 2. the current module is a different module (either the primary module - // or a different secondary module) - if (parent.allSecondaryFuncs.contains(curr->func) && - (currModule == &parent.primary || - parent.secondaries.at(parent.funcToSecondaryIndex.at(curr->func)) - .get() != currModule)) { - map[curr->func].push_back(curr); - } - } - } gatherer(*this); - // We shouldn't use collector.walkModuleCode here, because we don't want to - // walk global initializers. At this point, all globals are still in the - // primary module, so if we walk global initializers here, it will create - // unnecessary trampolines. - // - // For example, we have (global $a funcref (ref.func $foo)), and $foo was - // split into a secondary module. Because $a is at this point still in the - // primary module, $foo will be considered to exist in a different module, so - // this will create a trampoline for $foo. But it is possible that later we - // find out $a is exclusively used by that secondary module and move $a there. - // In that case, $a can just reference $foo locally, but if we scan global - // initializers here, we would have created an unnecessary trampoline for - // $foo. - walkSegments(gatherer, &primary); - for (auto& curr : primary.functions) { - if (!curr->imported()) { - gatherer.walkFunction(curr.get()); - } - } - for (auto& secondaryPtr : secondaries) { - gatherer.walkModule(secondaryPtr.get()); - } - - // Ignore references to secondary functions that occur in the active segment - // that will contain the imported placeholders. Indirect calls to table slots - // initialized by that segment will already go to the right place once the - // secondary module has been loaded and the table has been patched. - std::unordered_set ignore; - if (tableManager.activeSegment) { - for (auto* expr : tableManager.activeSegment->data) { - if (auto* ref = expr->dynCast()) { - ignore.insert(ref); - } - } - } - - // Fix up what we found: Generate trampolines as described earlier, and apply - // them. - Builder builder(primary); - // Generate the new trampoline function and add it to the module. - for (auto& [name, refFuncs] : gatherer.map) { - // Find the relevant (non-ignored) RefFuncs. If there are none, we can skip - // creating a thunk entirely. - std::vector relevantRefFuncs; - for (auto* refFunc : refFuncs) { - assert(refFunc->func == name); - if (!ignore.contains(refFunc)) { - relevantRefFuncs.push_back(refFunc); - } - } - if (relevantRefFuncs.empty()) { - continue; - } - - Name trampoline = getTrampoline(name); - // Update RefFuncs to refer to it. - for (auto* refFunc : relevantRefFuncs) { - refFunc->func = trampoline; - } - } -} - -void ModuleSplitter::indirectCallsToSecondaryFunctions() { - // Update direct calls of secondary functions to be indirect calls of their - // corresponding table indices instead. - struct CallIndirector : public PostWalker { - ModuleSplitter& parent; - CallIndirector(ModuleSplitter& parent) : parent(parent) {} - void visitCall(Call* curr) { - // Return if the call's target is not in one of the secondary module. - if (!parent.allSecondaryFuncs.contains(curr->target)) { - return; - } - // Return if the current module is the same module as the call's target, - // because we don't need a call_indirect within the same module. - Module* currModule = getModule(); - if (currModule != &parent.primary && - parent.secondaries.at(parent.funcToSecondaryIndex.at(curr->target)) - .get() == currModule) { - return; - } - - Builder builder(*getModule()); - Index secIndex = parent.funcToSecondaryIndex.at(curr->target); - auto* func = parent.secondaries.at(secIndex)->getFunction(curr->target); - auto tableSlot = - parent.tableManager.getSlot(curr->target, func->type.getHeapType()); - - replaceCurrent( - builder.makeCallIndirect(tableSlot.tableName, - tableSlot.makeExpr(parent.primary), - curr->operands, - func->type.getHeapType(), - curr->isReturn)); - } - }; - CallIndirector callIndirector(*this); - callIndirector.walkModule(&primary); - for (auto& secondaryPtr : secondaries) { - callIndirector.walkModule(secondaryPtr.get()); - } -} - -void ModuleSplitter::exportImportCalledPrimaryFunctions() { - // Find primary functions called/referred to from the secondary modules. - using CalledPrimaryToModules = std::map>; - for (auto& secondaryPtr : secondaries) { - Module* secondary = secondaryPtr.get(); - ModuleUtils::ParallelFunctionAnalysis callCollector( - *secondary, - [&](Function* func, CalledPrimaryToModules& calledPrimaryToModules) { - struct CallCollector : PostWalker { - const std::unordered_set& primaryFuncs; - CalledPrimaryToModules& calledPrimaryToModules; - CallCollector(const std::unordered_set& primaryFuncs, - CalledPrimaryToModules& calledPrimaryToModules) - : primaryFuncs(primaryFuncs), - calledPrimaryToModules(calledPrimaryToModules) {} - void visitCall(Call* curr) { - if (primaryFuncs.contains(curr->target)) { - calledPrimaryToModules[curr->target].insert(getModule()); - } - } - void visitRefFunc(RefFunc* curr) { - if (primaryFuncs.contains(curr->func)) { - calledPrimaryToModules[curr->func].insert(getModule()); - } - } - }; - CallCollector(primaryFuncs, calledPrimaryToModules) - .walkFunctionInModule(func, secondary); - }); - - CalledPrimaryToModules calledPrimaryToModules; - for (auto& [_, map] : callCollector.map) { - calledPrimaryToModules.merge(map); - } - - // Ensure each called primary function is exported and imported - for (auto& [func, modules] : calledPrimaryToModules) { - exportImportFunction(func, modules); - } - } -} - -void ModuleSplitter::setupTablePatching() { - if (!tableManager.activeTable) { - return; - } - - std::map> moduleToReplacedElems; - // Replace table references to secondary functions with an imported - // placeholder that encodes the table index in its name: - // `importNamespace`.`index`. - forEachElement( - primary, [&](Name table, Name, Index index, Expression*& elem) { - auto* ref = elem->dynCast(); - if (!ref) { - return; - } - if (!allSecondaryFuncs.contains(ref->func)) { - return; - } - assert(table == tableManager.activeTable->name); - - placeholderMap[table][index] = ref->func; - Index secondaryIndex = funcToSecondaryIndex.at(ref->func); - Module& secondary = *secondaries.at(secondaryIndex); - Name secondaryName = config.secondaryNames.at(secondaryIndex); - auto* secondaryFunc = secondary.getFunction(ref->func); - moduleToReplacedElems[&secondary][index] = secondaryFunc; - if (!config.usePlaceholders) { - // TODO: This can create active element segments with lots of nulls. We - // should optimize them like we do data segments with zeros. - elem = Builder(primary).makeRefNull(HeapType::nofunc); - return; - } - auto placeholder = std::make_unique(); - placeholder->module = config.placeholderNamespacePrefix.toString() + "." + - secondaryName.toString(); - placeholder->base = std::to_string(index); - placeholder->name = Names::getValidFunctionName( - primary, std::string("placeholder_") + placeholder->base.toString()); - placeholder->hasExplicitName = true; - placeholder->type = secondaryFunc->type.with(Inexact); - elem = Builder(primary).makeRefFunc(placeholder->name, placeholder->type); - primary.addFunction(std::move(placeholder)); - }); - - if (moduleToReplacedElems.size() == 0) { - // No placeholders to patch out of the table - return; - } - - for (auto& [secondaryPtr, replacedElems] : moduleToReplacedElems) { - Module& secondary = *secondaryPtr; - auto secondaryTable = - ModuleUtils::copyTable(tableManager.activeTable, secondary); - - if (tableManager.activeBase.global.size()) { - assert(tableManager.activeTableSegments.size() == 1 && - "Unexpected number of segments with non-const base"); - assert(secondary.tables.size() == 1 && secondary.elementSegments.empty()); - // Since addition is not currently allowed in initializer expressions, we - // need to start the new secondary segment where the primary segment - // starts. The secondary segment will contain the same primary functions - // as the primary module except in positions where it needs to overwrite a - // placeholder function. All primary functions in the table therefore need - // to be imported into the second module. TODO: use better strategies - // here, such as using ref.func in the start function or standardizing - // addition in initializer expressions. - ElementSegment* primarySeg = tableManager.activeTableSegments.front(); - std::vector secondaryElems; - secondaryElems.reserve(primarySeg->data.size()); - - // Copy functions from the primary segment to the secondary segment, - // replacing placeholders and creating new exports and imports as - // necessary. - auto replacement = replacedElems.begin(); - for (Index i = 0; - i < primarySeg->data.size() && replacement != replacedElems.end(); - ++i) { - if (replacement->first == i) { - // primarySeg->data[i] is a placeholder, so use the secondary - // function. - auto* func = replacement->second; - auto* ref = Builder(secondary).makeRefFunc(func->name, func->type); - secondaryElems.push_back(ref); - ++replacement; - } else if (auto* get = primarySeg->data[i]->dynCast()) { - exportImportFunction(get->func, {&secondary}); - auto* copied = - ExpressionManipulator::copy(primarySeg->data[i], secondary); - secondaryElems.push_back(copied); - } - } - - auto offset = ExpressionManipulator::copy(primarySeg->offset, secondary); - auto secondarySeg = std::make_unique( - secondaryTable->name, offset, secondaryTable->type, secondaryElems); - secondarySeg->setName(primarySeg->name, primarySeg->hasExplicitName); - secondary.addElementSegment(std::move(secondarySeg)); - return; - } - - // Create active table segments in the secondary module to patch in the - // original functions when it is instantiated. - Index currBase = replacedElems.begin()->first; - std::vector currData; - auto finishSegment = [&]() { - auto* offset = Builder(secondary).makeConst( - Literal::makeFromInt32(currBase, secondaryTable->addressType)); - auto secondarySeg = std::make_unique( - secondaryTable->name, offset, secondaryTable->type, currData); - Name name = Names::getValidElementSegmentName( - secondary, Name::fromInt(secondary.elementSegments.size())); - secondarySeg->setName(name, false); - secondary.addElementSegment(std::move(secondarySeg)); - }; - for (auto curr = replacedElems.begin(); curr != replacedElems.end(); - ++curr) { - if (curr->first != currBase + currData.size()) { - finishSegment(); - currBase = curr->first; - currData.clear(); - } - auto* func = curr->second; - currData.push_back( - Builder(secondary).makeRefFunc(func->name, func->type)); - } - if (currData.size()) { - finishSegment(); - } - } -} - void ModuleSplitter::shareImportableItems() { // Map internal names to (one of) their corresponding export names. Don't // consider functions because they have already been imported and exported as @@ -947,278 +643,582 @@ void ModuleSplitter::shareImportableItems() { } \ } -#include "wasm-delegations-fields.def" +#include "wasm-delegations-fields.def" + } + }; + + // Given a module, collect names used in the module + auto getUsedNames = [&](Module& module) { + UsedNames used; + ModuleUtils::ParallelFunctionAnalysis nameCollector( + module, [&](Function* func, UsedNames& used) { + if (!func->imported()) { + NameCollector(used).walk(func->body); + } + }); + + for (auto& [_, funcUsed] : nameCollector.map) { + used.globals.insert(funcUsed.globals.begin(), funcUsed.globals.end()); + used.memories.insert(funcUsed.memories.begin(), funcUsed.memories.end()); + used.tables.insert(funcUsed.tables.begin(), funcUsed.tables.end()); + used.tags.insert(funcUsed.tags.begin(), funcUsed.tags.end()); + } + + NameCollector collector(used); + // We shouldn't use collector.walkModuleCode here, because we don't want to + // walk global initializers. At this point, all globals are still in the + // primary module, so if we walk global initializers here, other globals + // appearing in their initializers will all be marked as used in the primary + // module, which is not what we want. + // + // For example, we have (global $a i32 (global.get $b)). Because $a is at + // this point still in the primary module, $b will be marked as "used" in + // the primary module. But $a can be moved to a secondary module later if it + // is used exclusively by that module. Then $b can be also moved, in case it + // doesn't have other uses. But if it is marked as "used" in the primary + // module, it can't. + walkSegments(collector, &module); + for (auto& segment : module.dataSegments) { + if (segment->memory.is()) { + used.memories.insert(segment->memory); + } + } + for (auto& segment : module.elementSegments) { + if (segment->table.is()) { + used.tables.insert(segment->table); + } + } + + // If primary module has exports, they are "used" in it. Secondary modules + // don't have exports, so this only applies to the primary module. + for (auto& ex : module.exports) { + switch (ex->kind) { + case ExternalKind::Global: + used.globals.insert(*ex->getInternalName()); + break; + case ExternalKind::Memory: + used.memories.insert(*ex->getInternalName()); + break; + case ExternalKind::Table: + used.tables.insert(*ex->getInternalName()); + break; + case ExternalKind::Tag: + used.tags.insert(*ex->getInternalName()); + break; + default: + break; + } + } + return used; + }; + + UsedNames primaryUsed = getUsedNames(primary); + std::vector secondaryUsed; + for (auto& secondaryPtr : secondaries) { + secondaryUsed.push_back(getUsedNames(*secondaryPtr)); + } + + // Compute the transitive closure of globals referenced in other globals' + // initializers. Since globals can reference other globals, we must ensure + // that if a global is used in a module, all its dependencies are also marked + // as used. + auto computeTransitiveGlobals = [&](UsedNames& used) { + UniqueNonrepeatingDeferredQueue worklist; + for (auto global : used.globals) { + worklist.push(global); + } + while (!worklist.empty()) { + Name name = worklist.pop(); + // At this point all globals are still in the primary module, so this + // exists + auto* global = primary.getGlobal(name); + if (!global->imported() && global->init) { + for (auto* get : FindAll(global->init).list) { + worklist.push(get->name); + used.globals.insert(get->name); + } + } + } + }; + + computeTransitiveGlobals(primaryUsed); + for (auto& used : secondaryUsed) { + computeTransitiveGlobals(used); + } + + // Given a name and module item kind, returns the list of secondary modules + // using that name + auto getUsingSecondaries = [&](const Name& name, auto UsedNames::* field) { + std::vector usingModules; + for (size_t i = 0; i < secondaries.size(); ++i) { + if ((secondaryUsed[i].*field).contains(name)) { + usingModules.push_back(secondaries[i].get()); + } + } + return usingModules; + }; + + // Share module items with secondary modules. + // 1. Only share an item with the modules that use it + // 2. If an item is used by only a single secondary module, move the item to + // that secondary module. If an item is used by multiple modules (including + // the primary and secondary modules), export the item from the primary and + // import it from the using secondary modules. + + std::vector memoriesToRemove; + for (auto& memory : primary.memories) { + auto usingSecondaries = + getUsingSecondaries(memory->name, &UsedNames::memories); + bool usedInPrimary = primaryUsed.memories.contains(memory->name); + + if (!usedInPrimary && usingSecondaries.size() == 1) { + auto* secondary = usingSecondaries[0]; + ModuleUtils::copyMemory(memory.get(), *secondary); + memoriesToRemove.push_back(memory->name); + } else { + for (auto* secondary : usingSecondaries) { + auto* secondaryMemory = + ModuleUtils::copyMemory(memory.get(), *secondary); + makeImportExport( + *memory, *secondaryMemory, "memory", ExternalKind::Memory); + } + } + } + for (auto& name : memoriesToRemove) { + primary.removeMemory(name); + } + + std::vector tablesToRemove; + for (auto& table : primary.tables) { + auto usingSecondaries = + getUsingSecondaries(table->name, &UsedNames::tables); + bool usedInPrimary = primaryUsed.tables.contains(table->name); + + if (!usedInPrimary && usingSecondaries.size() == 1) { + auto* secondary = usingSecondaries[0]; + // In case we copied this table to this secondary module in + // setupTablePatching(), !usedInPrimary can't be satisfied, because the + // primary module should have an element segment that refers to this + // table. + assert(!secondary->getTableOrNull(table->name)); + ModuleUtils::copyTable(table.get(), *secondary); + tablesToRemove.push_back(table->name); + } else { + for (auto* secondary : usingSecondaries) { + // 1. In case we copied this table to this secondary module in + // setupTablePatching(), secondary.getTableOrNull(table->name) is not + // null, and we need to import it. + // 2. As in the case with other module elements, if the table is used in + // the secondary module's instructions, we need to export it. + auto secondaryTable = secondary->getTableOrNull(table->name); + if (!secondaryTable) { + secondaryTable = ModuleUtils::copyTable(table.get(), *secondary); + } + makeImportExport(*table, *secondaryTable, "table", ExternalKind::Table); + } + } + } + for (auto& name : tablesToRemove) { + primary.removeTable(name); + } + + std::vector globalsToRemove; + for (auto& global : primary.globals) { + if (global->mutable_) { + assert(primary.features.hasMutableGlobals() && + "TODO: add wrapper functions for disallowed mutable globals"); + } + + auto usingSecondaries = + getUsingSecondaries(global->name, &UsedNames::globals); + bool inPrimary = primaryUsed.globals.contains(global->name); + + if (!inPrimary && usingSecondaries.empty()) { + // It's not used anywhere, so delete it. Unlike other unused module items + // (memories, tables, and tags) that can just sit in the primary module + // and later be DCE'ed by another pass, we should remove it here, because + // an unused global can contain an initializer that refers to another + // global that will be moved to a secondary module, like + // (global $unused i32 (global.get $a)) // $a is moved to a secondary + globalsToRemove.push_back(global->name); + + } else if (!inPrimary && usingSecondaries.size() == 1) { + // We are moving this global to this secondary module + auto* secondary = usingSecondaries[0]; + auto* secondaryGlobal = ModuleUtils::copyGlobal(global.get(), *secondary); + globalsToRemove.push_back(global->name); + + if (secondaryGlobal->init) { + // When a global's initializer contains ref.func + for (auto* ref : FindAll(secondaryGlobal->init).list) { + // If ref.func's function is in a different secondary module, we + // create a trampoline here. + if (auto targetIndexIt = funcToSecondaryIndex.find(ref->func); + targetIndexIt != funcToSecondaryIndex.end()) { + if (secondaries[targetIndexIt->second].get() != secondary) { + ref->func = getTrampoline(ref->func); + } + } + // 1. If ref.func's function is in the primary module, we export it + // here. + // 2. If ref.func's function is in a different secondary module and we + // just created a trampoline for it in the primary module above, we + // export the trampoline here. + if (primary.getFunctionOrNull(ref->func)) { + exportImportFunction(ref->func, {secondary}); + } + // If ref.func's function is in the same secondary module, we don't + // need to do anything. The ref.func can directly reference the + // function. + } + } + + } else { // We are NOT moving this global to the secondary module + if (global->init) { + for (auto* ref : FindAll(global->init).list) { + // If we are exporting this global from the primary module, we should + // create a trampoline here, because we skipped doing it for global + // initializers in indirectReferencesToSecondaryFunctions. + if (allSecondaryFuncs.contains(ref->func)) { + ref->func = getTrampoline(ref->func); + } + } + } + + for (auto* secondary : usingSecondaries) { + auto* secondaryGlobal = + ModuleUtils::copyGlobal(global.get(), *secondary); + makeImportExport( + *global, *secondaryGlobal, "global", ExternalKind::Global); + } + } + } + for (auto& name : globalsToRemove) { + primary.removeGlobal(name); + } + + std::vector tagsToRemove; + for (auto& tag : primary.tags) { + auto usingSecondaries = getUsingSecondaries(tag->name, &UsedNames::tags); + bool usedInPrimary = primaryUsed.tags.contains(tag->name); + + if (!usedInPrimary && usingSecondaries.size() == 1) { + auto* secondary = usingSecondaries[0]; + ModuleUtils::copyTag(tag.get(), *secondary); + tagsToRemove.push_back(tag->name); + } else { + for (auto* secondary : usingSecondaries) { + auto* secondaryTag = ModuleUtils::copyTag(tag.get(), *secondary); + makeImportExport(*tag, *secondaryTag, "tag", ExternalKind::Tag); + } } - }; + } + for (auto& name : tagsToRemove) { + primary.removeTag(name); + } +} - // Given a module, collect names used in the module - auto getUsedNames = [&](Module& module) { - UsedNames used; - ModuleUtils::ParallelFunctionAnalysis nameCollector( - module, [&](Function* func, UsedNames& used) { - if (!func->imported()) { - NameCollector(used).walk(func->body); - } - }); +void ModuleSplitter::indirectReferencesToSecondaryFunctions() { + // Turn references to secondary functions into references to thunks that + // perform a direct call to the original referent. The direct calls in the + // thunks will be handled like all other cross-module calls later, in + // |indirectCallsToSecondaryFunctions|. + struct Gatherer : public PostWalker { + ModuleSplitter& parent; - for (auto& [_, funcUsed] : nameCollector.map) { - used.globals.insert(funcUsed.globals.begin(), funcUsed.globals.end()); - used.memories.insert(funcUsed.memories.begin(), funcUsed.memories.end()); - used.tables.insert(funcUsed.tables.begin(), funcUsed.tables.end()); - used.tags.insert(funcUsed.tags.begin(), funcUsed.tags.end()); - } + Gatherer(ModuleSplitter& parent) : parent(parent) {} - NameCollector collector(used); - // We shouldn't use collector.walkModuleCode here, because we don't want to - // walk global initializers. At this point, all globals are still in the - // primary module, so if we walk global initializers here, other globals - // appearing in their initializers will all be marked as used in the primary - // module, which is not what we want. - // - // For example, we have (global $a i32 (global.get $b)). Because $a is at - // this point still in the primary module, $b will be marked as "used" in - // the primary module. But $a can be moved to a secondary module later if it - // is used exclusively by that module. Then $b can be also moved, in case it - // doesn't have other uses. But if it is marked as "used" in the primary - // module, it can't. - walkSegments(collector, &module); - for (auto& segment : module.dataSegments) { - if (segment->memory.is()) { - used.memories.insert(segment->memory); + // Collect RefFuncs in a map from the function name to all RefFuncs that + // refer to it. We only collect this for secondary funcs. + InsertOrderedMap> map; + + void visitRefFunc(RefFunc* curr) { + Module* currModule = getModule(); + // Add ref.func to the map when + // 1. ref.func's target func is in one of the secondary modules and + // 2. the current module is a different module (either the primary module + // or a different secondary module) + if (parent.allSecondaryFuncs.contains(curr->func) && + (currModule == &parent.primary || + parent.secondaries.at(parent.funcToSecondaryIndex.at(curr->func)) + .get() != currModule)) { + map[curr->func].push_back(curr); } } - for (auto& segment : module.elementSegments) { - if (segment->table.is()) { - used.tables.insert(segment->table); - } + } gatherer(*this); + // We shouldn't use collector.walkModuleCode here, because we don't want to + // walk global initializers. At this point, all globals are still in the + // primary module, so if we walk global initializers here, it will create + // unnecessary trampolines. + // + // For example, we have (global $a funcref (ref.func $foo)), and $foo was + // split into a secondary module. Because $a is at this point still in the + // primary module, $foo will be considered to exist in a different module, so + // this will create a trampoline for $foo. But it is possible that later we + // find out $a is exclusively used by that secondary module and move $a there. + // In that case, $a can just reference $foo locally, but if we scan global + // initializers here, we would have created an unnecessary trampoline for + // $foo. + walkSegments(gatherer, &primary); + for (auto& curr : primary.functions) { + if (!curr->imported()) { + gatherer.walkFunction(curr.get()); } + } + for (auto& secondaryPtr : secondaries) { + gatherer.walkModule(secondaryPtr.get()); + } - // If primary module has exports, they are "used" in it. Secondary modules - // don't have exports, so this only applies to the primary module. - for (auto& ex : module.exports) { - switch (ex->kind) { - case ExternalKind::Global: - used.globals.insert(*ex->getInternalName()); - break; - case ExternalKind::Memory: - used.memories.insert(*ex->getInternalName()); - break; - case ExternalKind::Table: - used.tables.insert(*ex->getInternalName()); - break; - case ExternalKind::Tag: - used.tags.insert(*ex->getInternalName()); - break; - default: - break; + // Ignore references to secondary functions that occur in the active segment + // that will contain the imported placeholders. Indirect calls to table slots + // initialized by that segment will already go to the right place once the + // secondary module has been loaded and the table has been patched. + std::unordered_set ignore; + if (tableManager.activeSegment) { + for (auto* expr : tableManager.activeSegment->data) { + if (auto* ref = expr->dynCast()) { + ignore.insert(ref); } } - return used; - }; - - UsedNames primaryUsed = getUsedNames(primary); - std::vector secondaryUsed; - for (auto& secondaryPtr : secondaries) { - secondaryUsed.push_back(getUsedNames(*secondaryPtr)); } - // Compute the transitive closure of globals referenced in other globals' - // initializers. Since globals can reference other globals, we must ensure - // that if a global is used in a module, all its dependencies are also marked - // as used. - auto computeTransitiveGlobals = [&](UsedNames& used) { - UniqueNonrepeatingDeferredQueue worklist; - for (auto global : used.globals) { - worklist.push(global); - } - while (!worklist.empty()) { - Name name = worklist.pop(); - // At this point all globals are still in the primary module, so this - // exists - auto* global = primary.getGlobal(name); - if (!global->imported() && global->init) { - for (auto* get : FindAll(global->init).list) { - worklist.push(get->name); - used.globals.insert(get->name); - } + // Fix up what we found: Generate trampolines as described earlier, and apply + // them. + Builder builder(primary); + // Generate the new trampoline function and add it to the module. + for (auto& [name, refFuncs] : gatherer.map) { + // Find the relevant (non-ignored) RefFuncs. If there are none, we can skip + // creating a thunk entirely. + std::vector relevantRefFuncs; + for (auto* refFunc : refFuncs) { + assert(refFunc->func == name); + if (!ignore.contains(refFunc)) { + relevantRefFuncs.push_back(refFunc); } } - }; + if (relevantRefFuncs.empty()) { + continue; + } - computeTransitiveGlobals(primaryUsed); - for (auto& used : secondaryUsed) { - computeTransitiveGlobals(used); + Name trampoline = getTrampoline(name); + // Update RefFuncs to refer to it. + for (auto* refFunc : relevantRefFuncs) { + refFunc->func = trampoline; + } } +} - // Given a name and module item kind, returns the list of secondary modules - // using that name - auto getUsingSecondaries = [&](const Name& name, auto UsedNames::* field) { - std::vector usingModules; - for (size_t i = 0; i < secondaries.size(); ++i) { - if ((secondaryUsed[i].*field).contains(name)) { - usingModules.push_back(secondaries[i].get()); +void ModuleSplitter::indirectCallsToSecondaryFunctions() { + // Update direct calls of secondary functions to be indirect calls of their + // corresponding table indices instead. + struct CallIndirector : public PostWalker { + ModuleSplitter& parent; + CallIndirector(ModuleSplitter& parent) : parent(parent) {} + void visitCall(Call* curr) { + // Return if the call's target is not in one of the secondary module. + if (!parent.allSecondaryFuncs.contains(curr->target)) { + return; + } + // Return if the current module is the same module as the call's target, + // because we don't need a call_indirect within the same module. + Module* currModule = getModule(); + if (currModule != &parent.primary && + parent.secondaries.at(parent.funcToSecondaryIndex.at(curr->target)) + .get() == currModule) { + return; } + + Builder builder(*getModule()); + Index secIndex = parent.funcToSecondaryIndex.at(curr->target); + auto* func = parent.secondaries.at(secIndex)->getFunction(curr->target); + auto tableSlot = + parent.tableManager.getSlot(curr->target, func->type.getHeapType()); + + replaceCurrent( + builder.makeCallIndirect(tableSlot.tableName, + tableSlot.makeExpr(parent.primary), + curr->operands, + func->type.getHeapType(), + curr->isReturn)); } - return usingModules; }; + CallIndirector callIndirector(*this); + callIndirector.walkModule(&primary); + for (auto& secondaryPtr : secondaries) { + callIndirector.walkModule(secondaryPtr.get()); + } +} - // Share module items with secondary modules. - // 1. Only share an item with the modules that use it - // 2. If an item is used by only a single secondary module, move the item to - // that secondary module. If an item is used by multiple modules (including - // the primary and secondary modules), export the item from the primary and - // import it from the using secondary modules. +void ModuleSplitter::exportImportCalledPrimaryFunctions() { + // Find primary functions called/referred to from the secondary modules. + using CalledPrimaryToModules = std::map>; + for (auto& secondaryPtr : secondaries) { + Module* secondary = secondaryPtr.get(); + ModuleUtils::ParallelFunctionAnalysis callCollector( + *secondary, + [&](Function* func, CalledPrimaryToModules& calledPrimaryToModules) { + struct CallCollector : PostWalker { + const std::unordered_set& primaryFuncs; + CalledPrimaryToModules& calledPrimaryToModules; + CallCollector(const std::unordered_set& primaryFuncs, + CalledPrimaryToModules& calledPrimaryToModules) + : primaryFuncs(primaryFuncs), + calledPrimaryToModules(calledPrimaryToModules) {} + void visitCall(Call* curr) { + if (primaryFuncs.contains(curr->target)) { + calledPrimaryToModules[curr->target].insert(getModule()); + } + } + void visitRefFunc(RefFunc* curr) { + if (primaryFuncs.contains(curr->func)) { + calledPrimaryToModules[curr->func].insert(getModule()); + } + } + }; + CallCollector(primaryFuncs, calledPrimaryToModules) + .walkFunctionInModule(func, secondary); + }); - std::vector memoriesToRemove; - for (auto& memory : primary.memories) { - auto usingSecondaries = - getUsingSecondaries(memory->name, &UsedNames::memories); - bool usedInPrimary = primaryUsed.memories.contains(memory->name); + CalledPrimaryToModules calledPrimaryToModules; + for (auto& [_, map] : callCollector.map) { + calledPrimaryToModules.merge(map); + } - if (!usedInPrimary && usingSecondaries.size() == 1) { - auto* secondary = usingSecondaries[0]; - ModuleUtils::copyMemory(memory.get(), *secondary); - memoriesToRemove.push_back(memory->name); - } else { - for (auto* secondary : usingSecondaries) { - auto* secondaryMemory = - ModuleUtils::copyMemory(memory.get(), *secondary); - makeImportExport( - *memory, *secondaryMemory, "memory", ExternalKind::Memory); - } + // Ensure each called primary function is exported and imported + for (auto& [func, modules] : calledPrimaryToModules) { + exportImportFunction(func, modules); } } - for (auto& name : memoriesToRemove) { - primary.removeMemory(name); - } +} - std::vector tablesToRemove; - for (auto& table : primary.tables) { - auto usingSecondaries = - getUsingSecondaries(table->name, &UsedNames::tables); - bool usedInPrimary = primaryUsed.tables.contains(table->name); +void ModuleSplitter::setupTablePatching() { + if (!tableManager.activeTable) { + return; + } - if (!usedInPrimary && usingSecondaries.size() == 1) { - auto* secondary = usingSecondaries[0]; - // In case we copied this table to this secondary module in - // setupTablePatching(), !usedInPrimary can't be satisfied, because the - // primary module should have an element segment that refers to this - // table. - assert(!secondary->getTableOrNull(table->name)); - ModuleUtils::copyTable(table.get(), *secondary); - tablesToRemove.push_back(table->name); - } else { - for (auto* secondary : usingSecondaries) { - // 1. In case we copied this table to this secondary module in - // setupTablePatching(), secondary.getTableOrNull(table->name) is not - // null, and we need to import it. - // 2. As in the case with other module elements, if the table is used in - // the secondary module's instructions, we need to export it. - auto secondaryTable = secondary->getTableOrNull(table->name); - if (!secondaryTable) { - secondaryTable = ModuleUtils::copyTable(table.get(), *secondary); - } - makeImportExport(*table, *secondaryTable, "table", ExternalKind::Table); + std::map> moduleToReplacedElems; + // Replace table references to secondary functions with an imported + // placeholder that encodes the table index in its name: + // `importNamespace`.`index`. + forEachElement( + primary, [&](Name table, Name, Index index, Expression*& elem) { + auto* ref = elem->dynCast(); + if (!ref) { + return; } - } - } - for (auto& name : tablesToRemove) { - primary.removeTable(name); - } + if (!allSecondaryFuncs.contains(ref->func)) { + return; + } + assert(table == tableManager.activeTable->name); - std::vector globalsToRemove; - for (auto& global : primary.globals) { - if (global->mutable_) { - assert(primary.features.hasMutableGlobals() && - "TODO: add wrapper functions for disallowed mutable globals"); - } + placeholderMap[table][index] = ref->func; + Index secondaryIndex = funcToSecondaryIndex.at(ref->func); + Module& secondary = *secondaries.at(secondaryIndex); + Name secondaryName = config.secondaryNames.at(secondaryIndex); + auto* secondaryFunc = secondary.getFunction(ref->func); + moduleToReplacedElems[&secondary][index] = secondaryFunc; + if (!config.usePlaceholders) { + // TODO: This can create active element segments with lots of nulls. We + // should optimize them like we do data segments with zeros. + elem = Builder(primary).makeRefNull(HeapType::nofunc); + return; + } + auto placeholder = std::make_unique(); + placeholder->module = config.placeholderNamespacePrefix.toString() + "." + + secondaryName.toString(); + placeholder->base = std::to_string(index); + placeholder->name = Names::getValidFunctionName( + primary, std::string("placeholder_") + placeholder->base.toString()); + placeholder->hasExplicitName = true; + placeholder->type = secondaryFunc->type.with(Inexact); + elem = Builder(primary).makeRefFunc(placeholder->name, placeholder->type); + primary.addFunction(std::move(placeholder)); + }); - auto usingSecondaries = - getUsingSecondaries(global->name, &UsedNames::globals); - bool inPrimary = primaryUsed.globals.contains(global->name); + if (moduleToReplacedElems.size() == 0) { + // No placeholders to patch out of the table + return; + } - if (!inPrimary && usingSecondaries.empty()) { - // It's not used anywhere, so delete it. Unlike other unused module items - // (memories, tables, and tags) that can just sit in the primary module - // and later be DCE'ed by another pass, we should remove it here, because - // an unused global can contain an initializer that refers to another - // global that will be moved to a secondary module, like - // (global $unused i32 (global.get $a)) // $a is moved to a secondary - globalsToRemove.push_back(global->name); + for (auto& [secondaryPtr, replacedElems] : moduleToReplacedElems) { + Module& secondary = *secondaryPtr; + auto secondaryTable = + ModuleUtils::copyTable(tableManager.activeTable, secondary); - } else if (!inPrimary && usingSecondaries.size() == 1) { - // We are moving this global to this secondary module - auto* secondary = usingSecondaries[0]; - auto* secondaryGlobal = ModuleUtils::copyGlobal(global.get(), *secondary); - globalsToRemove.push_back(global->name); + if (tableManager.activeBase.global.size()) { + assert(tableManager.activeTableSegments.size() == 1 && + "Unexpected number of segments with non-const base"); + assert(secondary.tables.size() == 1 && secondary.elementSegments.empty()); + // Since addition is not currently allowed in initializer expressions, we + // need to start the new secondary segment where the primary segment + // starts. The secondary segment will contain the same primary functions + // as the primary module except in positions where it needs to overwrite a + // placeholder function. All primary functions in the table therefore need + // to be imported into the second module. TODO: use better strategies + // here, such as using ref.func in the start function or standardizing + // addition in initializer expressions. + ElementSegment* primarySeg = tableManager.activeTableSegments.front(); + std::vector secondaryElems; + secondaryElems.reserve(primarySeg->data.size()); - if (secondaryGlobal->init) { - // When a global's initializer contains ref.func - for (auto* ref : FindAll(secondaryGlobal->init).list) { - // If ref.func's function is in a different secondary module, we - // create a trampoline here. - if (auto targetIndexIt = funcToSecondaryIndex.find(ref->func); - targetIndexIt != funcToSecondaryIndex.end()) { - if (secondaries[targetIndexIt->second].get() != secondary) { - ref->func = getTrampoline(ref->func); - } - } - // 1. If ref.func's function is in the primary module, we export it - // here. - // 2. If ref.func's function is in a different secondary module and we - // just created a trampoline for it in the primary module above, we - // export the trampoline here. - if (primary.getFunctionOrNull(ref->func)) { - exportImportFunction(ref->func, {secondary}); - } - // If ref.func's function is in the same secondary module, we don't - // need to do anything. The ref.func can directly reference the + // Copy functions from the primary segment to the secondary segment, + // replacing placeholders and creating new exports and imports as + // necessary. + auto replacement = replacedElems.begin(); + for (Index i = 0; + i < primarySeg->data.size() && replacement != replacedElems.end(); + ++i) { + if (replacement->first == i) { + // primarySeg->data[i] is a placeholder, so use the secondary // function. + auto* func = replacement->second; + auto* ref = Builder(secondary).makeRefFunc(func->name, func->type); + secondaryElems.push_back(ref); + ++replacement; + } else if (auto* get = primarySeg->data[i]->dynCast()) { + exportImportFunction(get->func, {&secondary}); + auto* copied = + ExpressionManipulator::copy(primarySeg->data[i], secondary); + secondaryElems.push_back(copied); } } - } else { // We are NOT moving this global to the secondary module - if (global->init) { - for (auto* ref : FindAll(global->init).list) { - // If we are exporting this global from the primary module, we should - // create a trampoline here, because we skipped doing it for global - // initializers in indirectReferencesToSecondaryFunctions. - if (allSecondaryFuncs.contains(ref->func)) { - ref->func = getTrampoline(ref->func); - } - } - } - - for (auto* secondary : usingSecondaries) { - auto* secondaryGlobal = - ModuleUtils::copyGlobal(global.get(), *secondary); - makeImportExport( - *global, *secondaryGlobal, "global", ExternalKind::Global); - } + auto offset = ExpressionManipulator::copy(primarySeg->offset, secondary); + auto secondarySeg = std::make_unique( + secondaryTable->name, offset, secondaryTable->type, secondaryElems); + secondarySeg->setName(primarySeg->name, primarySeg->hasExplicitName); + secondary.addElementSegment(std::move(secondarySeg)); + return; } - } - for (auto& name : globalsToRemove) { - primary.removeGlobal(name); - } - - std::vector tagsToRemove; - for (auto& tag : primary.tags) { - auto usingSecondaries = getUsingSecondaries(tag->name, &UsedNames::tags); - bool usedInPrimary = primaryUsed.tags.contains(tag->name); - if (!usedInPrimary && usingSecondaries.size() == 1) { - auto* secondary = usingSecondaries[0]; - ModuleUtils::copyTag(tag.get(), *secondary); - tagsToRemove.push_back(tag->name); - } else { - for (auto* secondary : usingSecondaries) { - auto* secondaryTag = ModuleUtils::copyTag(tag.get(), *secondary); - makeImportExport(*tag, *secondaryTag, "tag", ExternalKind::Tag); + // Create active table segments in the secondary module to patch in the + // original functions when it is instantiated. + Index currBase = replacedElems.begin()->first; + std::vector currData; + auto finishSegment = [&]() { + auto* offset = Builder(secondary).makeConst( + Literal::makeFromInt32(currBase, secondaryTable->addressType)); + auto secondarySeg = std::make_unique( + secondaryTable->name, offset, secondaryTable->type, currData); + Name name = Names::getValidElementSegmentName( + secondary, Name::fromInt(secondary.elementSegments.size())); + secondarySeg->setName(name, false); + secondary.addElementSegment(std::move(secondarySeg)); + }; + for (auto curr = replacedElems.begin(); curr != replacedElems.end(); + ++curr) { + if (curr->first != currBase + currData.size()) { + finishSegment(); + currBase = curr->first; + currData.clear(); } + auto* func = curr->second; + currData.push_back( + Builder(secondary).makeRefFunc(func->name, func->type)); + } + if (currData.size()) { + finishSegment(); } - } - for (auto& name : tagsToRemove) { - primary.removeTag(name); } }