Skip to content

Commit df5461a

Browse files
committed
[Z80] Introduce backwards scavenging and allocator hints to eliminate LEA chaining
- Also adds more cases to isReallyTriviallyReMaterializable, modeled after x86
1 parent 2aa3061 commit df5461a

3 files changed

Lines changed: 298 additions & 58 deletions

File tree

llvm/lib/Target/Z80/Z80InstrInfo.cpp

Lines changed: 135 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -1125,34 +1125,6 @@ unsigned Z80InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
11251125
return 0;
11261126
}
11271127

1128-
static Register scavengeOrCreateRegister(const TargetRegisterClass *RC,
1129-
MachineRegisterInfo &MRI,
1130-
MachineBasicBlock::iterator II,
1131-
RegScavenger *RS = nullptr,
1132-
int SPAdj = 0,
1133-
bool AllowSpill = true) {
1134-
// v17 removed scavengeRegister (forward-looking) in favor of
1135-
// scavengeRegisterBackwards. scavengeRegisterBackwards however has complex
1136-
// state requirements. it uses the scavenger's internal MBBI iterator and
1137-
// can assert if no register survives the backwards scan to begin().
1138-
//
1139-
// During PEI's eliminateFrameIndex, the scavenger state may not be properly
1140-
// synchronized with the instruction being processed, causing crashes
1141-
//
1142-
// FindUnusedReg is stateless and fall back to creating a virtual register
1143-
// that will be resolved by the scavengeFrameVirtualRegs pass later
1144-
// TODO: this is worth looking into
1145-
if (RS) {
1146-
if (Register Reg = RS->FindUnusedReg(RC))
1147-
return Reg;
1148-
}
1149-
return MRI.createVirtualRegister(RC);
1150-
}
1151-
1152-
static Register findUnusedRegister(const TargetRegisterClass *RC,
1153-
RegScavenger *RS) {
1154-
return RS ? RS->FindUnusedReg(RC) : Register();
1155-
}
11561128

11571129
static Register findUnusedOrCreateRegister(const TargetRegisterClass *RC,
11581130
MachineRegisterInfo &MRI,
@@ -1207,6 +1179,7 @@ bool Z80InstrInfo::rewriteFrameIndex(MachineInstr &MI, unsigned FIOperandNum,
12071179

12081180
unsigned Opc = MI.getOpcode();
12091181
bool IllegalLEA = Opc == Z80::LEA16ro && !Subtarget.hasEZ80Ops();
1182+
12101183
if (TRI.isFrameOffsetLegal(&MI, BaseReg, Offset) && !IllegalLEA) {
12111184
MI.getOperand(FIOperandNum).ChangeToRegister(BaseReg, false);
12121185
if (!NewOffset && (Opc == Z80::PEA24o || Opc == Z80::PEA16o)) {
@@ -1219,10 +1192,26 @@ bool Z80InstrInfo::rewriteFrameIndex(MachineInstr &MI, unsigned FIOperandNum,
12191192
canonicalizePhysRegsTo24Bit(MI, TRI);
12201193
return false;
12211194
}
1195+
LLVM_DEBUG(dbgs() << "Z80FrameIndex: Offset " << NewOffset << " not legal for "
1196+
<< TRI.getName(BaseReg) << ", RS=" << (RS ? "yes" : "no") << "\n");
12221197

12231198
bool SaveFlags = RS && RS->isRegUsed(Z80::F);
12241199
const TargetRegisterClass *OffsetRC =
12251200
Is24Bit ? &Z80::O24RegClass : &Z80::O16RegClass;
1201+
LLVM_DEBUG({
1202+
if (RS) {
1203+
dbgs() << " Live O24/O16 regs: ";
1204+
bool First = true;
1205+
for (MCPhysReg Reg : *OffsetRC) {
1206+
if (RS->isRegUsed(Reg)) {
1207+
if (!First) dbgs() << ", ";
1208+
dbgs() << TRI.getName(Reg);
1209+
First = false;
1210+
}
1211+
}
1212+
dbgs() << "\n";
1213+
}
1214+
});
12261215
const TargetRegisterClass *AddrScratchRC =
12271216
Is24Bit ? &Z80::A24RegClass : &Z80::A16RegClass;
12281217
const TargetRegisterClass *IndexScratchRC =
@@ -1371,16 +1360,19 @@ bool Z80InstrInfo::rewriteFrameIndex(MachineInstr &MI, unsigned FIOperandNum,
13711360
bool SpillOffsetReg = false;
13721361
if (!RS) {
13731362
ScratchReg = findUnusedOrCreateRegister(AddrScratchRC, MRI, /*RS=*/nullptr);
1374-
} else if (BaseReg.isPhysical()) {
1375-
// prefer an unused index register (IY/IX) for scratch if available. that
1376-
// keeps the original opcode (indexed addressing with 0 offset)
1377-
ScratchReg = selectUnusedNoOverlap(IndexScratchRC, /*Exclude0=*/BaseReg,
1378-
/*Exclude1=*/Register(),
1379-
/*IncludeReserved=*/false);
1380-
if (!ScratchReg)
1381-
ScratchReg = selectUnusedNoOverlap(AddrScratchRC, /*Exclude0=*/BaseReg,
1382-
/*Exclude1=*/Register());
1383-
}
1363+
} else if (BaseReg.isPhysical()) {
1364+
// prefer an unused index register (IY/IX) for scratch if available. that
1365+
// keeps the original opcode (indexed addressing with 0 offset)
1366+
ScratchReg = selectUnusedNoOverlap(IndexScratchRC, /*Exclude0=*/BaseReg,
1367+
/*Exclude1=*/Register(),
1368+
/*IncludeReserved=*/false);
1369+
if (!ScratchReg)
1370+
ScratchReg = selectUnusedNoOverlap(AddrScratchRC, /*Exclude0=*/BaseReg,
1371+
/*Exclude1=*/Register());
1372+
}
1373+
1374+
LLVM_DEBUG(dbgs() << " ScratchReg candidate: "
1375+
<< (ScratchReg ? TRI.getName(ScratchReg) : "none") << "\n");
13841376

13851377
if (ScratchReg) {
13861378
// take the scratch reg rewrite path only when we can get a truly unused
@@ -1393,35 +1385,52 @@ bool Z80InstrInfo::rewriteFrameIndex(MachineInstr &MI, unsigned FIOperandNum,
13931385
} else {
13941386
OffsetReg = selectUnusedNoOverlap(OffsetRC, /*Exclude0=*/ScratchReg,
13951387
/*Exclude1=*/BaseReg);
1388+
LLVM_DEBUG(dbgs() << " OffsetReg from selectUnusedNoOverlap: "
1389+
<< (OffsetReg ? TRI.getName(OffsetReg) : "none") << "\n");
13961390
if (!OffsetReg &&
13971391
HasLEA &&
13981392
(Is24Bit ? Z80::I24RegClass : Z80::I16RegClass).contains(ScratchReg)) {
1399-
// if we can get an unused index scratch register but no unused offset
1400-
// register, avoid push/adjust/pop of the base by materializing the
1401-
// adjusted address directly into the scratch using chunked LEA\
1402-
// TODO: revisit this
1403-
copyRegister(MBB, II, DL, ScratchReg, BaseReg);
1404-
emitChunkedLEAAdjust(ScratchReg, NewOffset);
1405-
1406-
if (IllegalLEA) {
1407-
copyRegister(MBB, II, DL, MI.getOperand(0).getReg(), ScratchReg);
1408-
MI.eraseFromParent();
1409-
return true;
1393+
1394+
LLVM_DEBUG(dbgs() << " Has unused index scratch but no offset reg, trying scavenge\n");
1395+
if (!isInt<8>(NewOffset)) {
1396+
OffsetReg = RS->scavengeRegisterBackwards(
1397+
*OffsetRC, II, /*RestoreAfter=*/false, SPAdj, /*AllowSpill=*/true);
1398+
LLVM_DEBUG(dbgs() << " scavengeRegisterBackwards returned: "
1399+
<< (OffsetReg ? TRI.getName(OffsetReg) : "none") << "\n");
1400+
if (OffsetReg) {
1401+
RS->setRegUsed(OffsetReg);
1402+
}
14101403
}
14111404

1412-
MI.getOperand(FIOperandNum).ChangeToRegister(ScratchReg, false);
1413-
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0);
1414-
if (Is24Bit)
1415-
canonicalizePhysRegsTo24Bit(MI, TRI);
1416-
return false;
1405+
if (!OffsetReg) {
1406+
LLVM_DEBUG(dbgs() << " Using LEA chains with ScratchReg=" << TRI.getName(ScratchReg) << "\n");
1407+
copyRegister(MBB, II, DL, ScratchReg, BaseReg);
1408+
emitChunkedLEAAdjust(ScratchReg, NewOffset);
1409+
1410+
if (IllegalLEA) {
1411+
copyRegister(MBB, II, DL, MI.getOperand(0).getReg(), ScratchReg);
1412+
MI.eraseFromParent();
1413+
return true;
1414+
}
1415+
1416+
MI.getOperand(FIOperandNum).ChangeToRegister(ScratchReg, false);
1417+
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0);
1418+
if (Is24Bit)
1419+
canonicalizePhysRegsTo24Bit(MI, TRI);
1420+
return false;
1421+
}
14171422
}
14181423

1419-
if (!OffsetReg)
1424+
if (!OffsetReg) {
1425+
LLVM_DEBUG(dbgs() << " No offset reg available, clearing ScratchReg\n");
14201426
ScratchReg = Register();
1427+
}
14211428
}
14221429
}
14231430

14241431
if (ScratchReg) {
1432+
LLVM_DEBUG(dbgs() << " Using scratch reg path: ScratchReg=" << TRI.getName(ScratchReg)
1433+
<< ", OffsetReg=" << TRI.getName(OffsetReg) << "\n");
14251434
BuildMI(MBB, II, DL, get(Is24Bit ? Z80::LD24ri : Z80::LD16ri), OffsetReg)
14261435
.addImm(NewOffset);
14271436
copyRegister(MBB, II, DL, ScratchReg, BaseReg);
@@ -1496,6 +1505,7 @@ bool Z80InstrInfo::rewriteFrameIndex(MachineInstr &MI, unsigned FIOperandNum,
14961505
return true;
14971506
}
14981507

1508+
LLVM_DEBUG(dbgs() << " Fallback: push/adjust/pop BaseReg path\n");
14991509
applySPAdjust(
15001510
*BuildMI(MBB, II, DL, get(Is24Bit ? Z80::PUSH24r : Z80::PUSH16r))
15011511
.addReg(BaseReg));
@@ -1510,6 +1520,8 @@ bool Z80InstrInfo::rewriteFrameIndex(MachineInstr &MI, unsigned FIOperandNum,
15101520
Register UnusedOffset =
15111521
selectUnusedNoOverlap(OffsetRC, /*Exclude0=*/BaseReg,
15121522
/*Exclude1=*/Register());
1523+
LLVM_DEBUG(dbgs() << " UnusedOffset from fallback selectUnusedNoOverlap: "
1524+
<< (UnusedOffset ? TRI.getName(UnusedOffset) : "none") << "\n");
15131525
if (UnusedOffset) {
15141526
BuildMI(MBB, II, DL, get(Is24Bit ? Z80::LD24ri : Z80::LD16ri),
15151527
UnusedOffset)
@@ -1527,7 +1539,35 @@ bool Z80InstrInfo::rewriteFrameIndex(MachineInstr &MI, unsigned FIOperandNum,
15271539
applySPAdjust(*BuildMI(MBB, II, DL,
15281540
get(Is24Bit ? Z80::POP24AF : Z80::POP16AF)));
15291541
} else {
1530-
emitChunkedLEAAdjust(BaseReg, NewOffset);
1542+
// No unused register available - use scavengeRegisterBackwards to
1543+
LLVM_DEBUG(dbgs() << "Z80FrameIndex: No unused offset reg, trying scavengeRegisterBackwards\n");
1544+
Register ScavengedReg = RS->scavengeRegisterBackwards(
1545+
*OffsetRC, II, /*RestoreAfter=*/false, SPAdj, /*AllowSpill=*/true);
1546+
LLVM_DEBUG(dbgs() << "Z80FrameIndex: scavengeRegisterBackwards returned: ";
1547+
if (ScavengedReg) dbgs() << printReg(ScavengedReg, &TRI);
1548+
else dbgs() << "null";
1549+
dbgs() << "\n");
1550+
if (ScavengedReg) {
1551+
RS->setRegUsed(ScavengedReg);
1552+
BuildMI(MBB, II, DL, get(Is24Bit ? Z80::LD24ri : Z80::LD16ri),
1553+
ScavengedReg)
1554+
.addImm(NewOffset);
1555+
if (SaveFlags)
1556+
applySPAdjust(*BuildMI(MBB, II, DL,
1557+
get(Is24Bit ? Z80::PUSH24AF : Z80::PUSH16AF)))
1558+
.findRegisterUseOperand(Z80::AF)
1559+
->setIsUndef();
1560+
BuildMI(MBB, II, DL, get(Is24Bit ? Z80::ADD24ao : Z80::ADD16ao), BaseReg)
1561+
.addReg(BaseReg)
1562+
.addReg(ScavengedReg, RegState::Kill)
1563+
->addRegisterDead(Z80::F, &TRI);
1564+
if (SaveFlags)
1565+
applySPAdjust(*BuildMI(MBB, II, DL,
1566+
get(Is24Bit ? Z80::POP24AF : Z80::POP16AF)));
1567+
} else {
1568+
LLVM_DEBUG(dbgs() << "Z80FrameIndex: Scavenging failed, falling back to LEA chunks for offset " << NewOffset << "\n");
1569+
emitChunkedLEAAdjust(BaseReg, NewOffset);
1570+
}
15311571
}
15321572
} else {
15331573
// for small adjustments, always prefer LEA so we dont clobber flags or
@@ -1552,8 +1592,30 @@ bool Z80InstrInfo::rewriteFrameIndex(MachineInstr &MI, unsigned FIOperandNum,
15521592
applySPAdjust(
15531593
*BuildMI(MBB, II, DL, get(Is24Bit ? Z80::POP24AF : Z80::POP16AF)));
15541594
} else {
1555-
// with a scavenger and no unused offset temp fall back to LEA chunks
1556-
emitChunkedLEAAdjust(BaseReg, NewOffset);
1595+
// with a scavenger but no unused offset temp - use scavengeRegisterBackwards
1596+
Register ScavengedReg = RS->scavengeRegisterBackwards(
1597+
*OffsetRC, II, /*RestoreAfter=*/false, SPAdj, /*AllowSpill=*/true);
1598+
if (ScavengedReg) {
1599+
RS->setRegUsed(ScavengedReg);
1600+
BuildMI(MBB, II, DL, get(Is24Bit ? Z80::LD24ri : Z80::LD16ri),
1601+
ScavengedReg)
1602+
.addImm(NewOffset);
1603+
if (SaveFlags)
1604+
applySPAdjust(*BuildMI(MBB, II, DL,
1605+
get(Is24Bit ? Z80::PUSH24AF : Z80::PUSH16AF)))
1606+
.findRegisterUseOperand(Z80::AF)
1607+
->setIsUndef();
1608+
BuildMI(MBB, II, DL, get(Is24Bit ? Z80::ADD24ao : Z80::ADD16ao), BaseReg)
1609+
.addReg(BaseReg)
1610+
.addReg(ScavengedReg, RegState::Kill)
1611+
->addRegisterDead(Z80::F, &TRI);
1612+
if (SaveFlags)
1613+
applySPAdjust(*BuildMI(MBB, II, DL,
1614+
get(Is24Bit ? Z80::POP24AF : Z80::POP16AF)));
1615+
} else {
1616+
// Scavenging failed - fall back to LEA chunks
1617+
emitChunkedLEAAdjust(BaseReg, NewOffset);
1618+
}
15571619
}
15581620
}
15591621
} else {
@@ -1625,10 +1687,25 @@ unsigned Z80InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
16251687

16261688
bool Z80InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI) const {
16271689
switch (MI.getOpcode()) {
1690+
16281691
case Z80::LD8r0:
16291692
case Z80::LD24r0:
16301693
case Z80::LD24r_1:
16311694
return true;
1695+
1696+
case Z80::LD8ri:
1697+
case Z80::LD16ri:
1698+
case Z80::LD24ri:
1699+
return true;
1700+
1701+
case Z80::LEA16ro:
1702+
case Z80::LEA24ro: {
1703+
const MachineOperand &Base = MI.getOperand(1);
1704+
const MachineOperand &Offset = MI.getOperand(2);
1705+
if (Base.isReg() && Offset.isImm())
1706+
return true;
1707+
return false;
1708+
}
16321709
}
16331710
return false;
16341711
}

0 commit comments

Comments
 (0)