Skip to content
1 change: 1 addition & 0 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -3941,6 +3941,7 @@ class Compiler

GenTree* gtFoldExpr(GenTree* tree);
GenTree* gtFoldExprConst(GenTree* tree);
GenTree* gtFoldDistributiveArithmetic(GenTree* tree);
GenTree* gtFoldIndirConst(GenTreeIndir* indir);
GenTree* gtFoldExprSpecial(GenTree* tree);
GenTree* gtFoldExprSpecialFloating(GenTree* tree);
Expand Down
76 changes: 76 additions & 0 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15050,6 +15050,10 @@ GenTree* Compiler::gtFoldExpr(GenTree* tree)

return gtFoldExprCompare(tree);
}
else if (tree->OperIs(GT_AND, GT_OR, GT_XOR, GT_ADD, GT_SUB))
{
return gtFoldDistributiveArithmetic(tree);
}
}

/* Return the original node (folded/bashed or not) */
Expand Down Expand Up @@ -18019,6 +18023,78 @@ GenTree* Compiler::gtFoldExprConst(GenTree* tree)
return tree;
}

//------------------------------------------------------------------------
// gtFoldDistributiveArithmetic: Optimizes distributive Arithmetic.
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be better to computer CNF and DNF expression minimization then compare their weights here in the future: #127533 (comment)? I was experimenting with it in gentree but this seems like a better place instead. 🙂

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is that some general framework for doing arithmetic/logic simplifications?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are Boolean normalization/minimization forms from Boolean algebra, similar to techniques used in logic/circuit simplification. For side-effect-free expressions, we could theoretically derive CNF/DNF/ANF candidates, eliminate redundant minterms via prime implicant and don’t-care analysis, then choose the lowest-cost form using some weight metric. The idea would be to pick whichever representation is less “chatty” / cheaper for the JIT.
https://www.cs.cornell.edu/courses/cs3410/2024fa/assignments/circuits/instructions.html

Here’s a better sample program that selects between CNF, DNF, ANF, and the original input form. I also added a threshold for Espresso heuristic backoff to simulate how this could behave with JIT time budget. (ANF is treated separately since it has its own implementation https://en.wikipedia.org/wiki/Algebraic_normal_form)

Program.cs.txt
output:

% dotnet run                  

input:
!(!a || (b && !c))

Input(normalized):
!(!a || (b && !c))

CNF:
(a && (!b || c))

DNF:
((a && !b) || (a && c))

ANF:
a ~ (a && b) ~ (a && b && c)

Better: CNF (Input size = 8, CNF size = 6, DNF size = 8, ANF size = 11)
--------------------------------------------------------------------------------

input:
(a && b) || (c && d) || (e && f)

Input(normalized):
(((a && b) || (c && d)) || (e && f))

CNF:
(((((((((b || d) || f) && ((a || d) || f)) && ((b || c) || f)) && ((a || c) || f)) && ((b || d) || e)) && ((a || d) || e)) && ((b || c) || e)) && ((a || c) || e))

DNF:
(((a && b) || (c && d)) || (e && f))

ANF:
(a && b) ~ (c && d) ~ (a && b && c && d) ~ (e && f) ~ (a && b && e && f) ~ (c && d && e && f) ~ (a && b && c && d && e && f)

Better: tie: Input, DNF (Input size = 11, CNF size = 47, DNF size = 11, ANF size = 47)
--------------------------------------------------------------------------------

input:
(a && !b) || (!a && b)

Input(normalized):
((a && !b) || (!a && b))

CNF:
((a || b) && (!a || !b))

DNF:
((a && !b) || (!a && b))

ANF:
a ~ b

Better: ANF (Input size = 9, CNF size = 9, DNF size = 9, ANF size = 3)
--------------------------------------------------------------------------------

input:
(a && b) || (a && c)

Input(normalized):
((a && b) || (a && c))

CNF:
((b || c) && a)

DNF:
((a && b) || (a && c))

ANF:
(a && b) ~ (a && c) ~ (a && b && c)

Better: CNF (Input size = 7, CNF size = 5, DNF size = 7, ANF size = 13)
--------------------------------------------------------------------------------

From the sample output can see that any of the representations, including the original input tree, can "win" for a given expression depending on the minimization/cost metric.

Copy link
Copy Markdown
Contributor Author

@BoyBaykiller BoyBaykiller May 10, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thats cool. Not sure how practical it would be in a real compiler compared but definitely interesting.
I guess make a PR : )

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup, I think if diffs support it (significant enough), we may be able to enable it for NativeAOT/R2R at least. I will try to land a PoC PR. :)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You don't really need to do anything fancy here even. You can reuse the vpternlog tables from the hwintrinsics info to allow simplifying any sequence of binary operations involving no more than 3 operands.

//
// Arguments:
// tree - the unchecked GT_AND/GT_OR/GT_XOR/GT_ADD/GT_SUB tree to optimize.
//
// Return Value:
// The unchanged tree or optimized tree with oper GT_MUL/GT_OR/GT_AND.
//
GenTree* Compiler::gtFoldDistributiveArithmetic(GenTree* tree)
{
assert(tree->OperIs(GT_AND, GT_OR, GT_XOR, GT_ADD, GT_SUB));

if (opts.OptimizationDisabled())
{
return tree;
}

if (tree->gtOverflowEx() || !varTypeIsIntegralOrI(tree))
{
return tree;
}

if ((tree->gtFlags & (GTF_PERSISTENT_SIDE_EFFECTS | GTF_ORDER_SIDEEFF)) != 0)
{
return tree;
}

GenTree* op1 = tree->gtGetOp1();
GenTree* op2 = tree->gtGetOp2();

auto isLeftDistributive = [](genTreeOps op1, genTreeOps op2) {
// op1 is left distributive over op2 iff:
// "A op1 (B op2 C)" <==> "(A op1 B) op2 (A op1 C)"
switch (op1)
{
case GT_AND:
return op2 == GT_OR || op2 == GT_XOR || op2 == GT_AND;

case GT_OR:
return op2 == GT_AND || op2 == GT_OR;

case GT_MUL:
return op2 == GT_ADD || op2 == GT_SUB;

default:
return false;
}
};

if ((op1->OperGet() == op2->OperGet()) && isLeftDistributive(op1->OperGet(), tree->OperGet()))
{
if (op1->gtGetOp1()->OperIsAnyLocal() && op2->gtGetOp1()->OperIsAnyLocal())
{
if (GenTree::Compare(op1->gtGetOp1(), op2->gtGetOp1()))
{
tree->AsOp()->gtOp1 = op1->gtGetOp1();
tree->AsOp()->gtOp2 =
gtFoldExpr(gtNewOperNode(tree->OperGet(), tree->TypeGet(), op1->gtGetOp2(), op2->gtGetOp2()));
tree->SetOper(op1->OperGet(), GenTree::PRESERVE_VN);

if (fgGlobalMorph)
{
fgMorphTreeDone(tree->gtGetOp2());
}
}
}
}

return tree;
}

//------------------------------------------------------------------------
// gtFoldIndirConst: Attempt to fold an "IND(addr)" expression to a constant.
//
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/morph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10440,7 +10440,7 @@ void Compiler::fgPushConstantsRight(GenTreeOp* tree)
}

//------------------------------------------------------------------------
// fgOptimizeCommutativeArithmetic: Optimizes commutative operations.
// fgOptimizeCommutativeArithmetic: Optimizes commutative arithemtic.
//
// Arguments:
// tree - the unchecked GT_ADD/GT_MUL/GT_OR/GT_XOR/GT_AND tree to optimize.
Expand Down
6 changes: 6 additions & 0 deletions src/coreclr/jit/optimizebools.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1198,6 +1198,12 @@ void OptBoolsDsc::optOptimizeBoolsUpdateTrees()

GenTree* cmpOp1 = m_foldOp == GT_NONE ? m_c1 : m_compiler->gtNewOperNode(m_foldOp, m_foldType, m_c1, m_c2);

// There may be new opportunities for distributive arithmetic optimization
if (m_foldOp != GT_NONE)
{
cmpOp1 = m_compiler->gtFoldExpr(cmpOp1);
}

GenTree* t1Comp = m_testInfo1.compTree;
t1Comp->SetOper(m_cmpOp);
t1Comp->AsOp()->gtOp1 = cmpOp1;
Expand Down
Loading