Skip to content

Commit

Permalink
[ARM] Optimize {s,u}{add,sub}.with.overflow.
Browse files Browse the repository at this point in the history
The ARM backend contains code that tries to optimize compares by replacing them with an existing instruction that sets the flags the same way. This allows it to replace a "cmp" with a "adds", generalizing the code that replaces "cmp" with "sub". It also heuristically disables sinking of instructions that could potentially be used to replace compares (currently only if they're next to each other).

Differential revision: https://reviews.llvm.org/D38378

llvm-svn: 322737
  • Loading branch information
jgalenson committed Jan 17, 2018
1 parent 727f153 commit fe7fa40
Show file tree
Hide file tree
Showing 4 changed files with 130 additions and 47 deletions.
97 changes: 74 additions & 23 deletions llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
Expand Up @@ -2534,14 +2534,28 @@ inline static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC) {
}
}

/// getCmpToAddCondition - assume the flags are set by CMP(a,b), return
/// the condition code if we modify the instructions such that flags are
/// set by ADD(a,b,X).
inline static ARMCC::CondCodes getCmpToAddCondition(ARMCC::CondCodes CC) {
switch (CC) {
default: return ARMCC::AL;
case ARMCC::HS: return ARMCC::LO;
case ARMCC::LO: return ARMCC::HS;
case ARMCC::VS: return ARMCC::VS;
case ARMCC::VC: return ARMCC::VC;
}
}

/// isRedundantFlagInstr - check whether the first instruction, whose only
/// purpose is to update flags, can be made redundant.
/// CMPrr can be made redundant by SUBrr if the operands are the same.
/// CMPri can be made redundant by SUBri if the operands are the same.
/// CMPrr(r0, r1) can be made redundant by ADDr[ri](r0, r1, X).
/// This function can be extended later on.
inline static bool isRedundantFlagInstr(MachineInstr *CmpI, unsigned SrcReg,
unsigned SrcReg2, int ImmValue,
MachineInstr *OI) {
inline static bool isRedundantFlagInstr(const MachineInstr *CmpI,
unsigned SrcReg, unsigned SrcReg2,
int ImmValue, const MachineInstr *OI) {
if ((CmpI->getOpcode() == ARM::CMPrr ||
CmpI->getOpcode() == ARM::t2CMPrr) &&
(OI->getOpcode() == ARM::SUBrr ||
Expand All @@ -2559,6 +2573,14 @@ inline static bool isRedundantFlagInstr(MachineInstr *CmpI, unsigned SrcReg,
OI->getOperand(1).getReg() == SrcReg &&
OI->getOperand(2).getImm() == ImmValue)
return true;

if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
(OI->getOpcode() == ARM::ADDrr || OI->getOpcode() == ARM::t2ADDrr ||
OI->getOpcode() == ARM::ADDri || OI->getOpcode() == ARM::t2ADDri) &&
OI->getOperand(0).isReg() && OI->getOperand(1).isReg() &&
OI->getOperand(0).getReg() == SrcReg &&
OI->getOperand(1).getReg() == SrcReg2)
return true;
return false;
}

Expand Down Expand Up @@ -2661,17 +2683,18 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
if (I == B) return false;

// There are two possible candidates which can be changed to set CPSR:
// One is MI, the other is a SUB instruction.
// For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
// One is MI, the other is a SUB or ADD instruction.
// For CMPrr(r1,r2), we are looking for SUB(r1,r2), SUB(r2,r1), or
// ADDr[ri](r1, r2, X).
// For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).
MachineInstr *Sub = nullptr;
MachineInstr *SubAdd = nullptr;
if (SrcReg2 != 0)
// MI is not a candidate for CMPrr.
MI = nullptr;
else if (MI->getParent() != CmpInstr.getParent() || CmpValue != 0) {
// Conservatively refuse to convert an instruction which isn't in the same
// BB as the comparison.
// For CMPri w/ CmpValue != 0, a Sub may still be a candidate.
// For CMPri w/ CmpValue != 0, a SubAdd may still be a candidate.
// Thus we cannot return here.
if (CmpInstr.getOpcode() == ARM::CMPri ||
CmpInstr.getOpcode() == ARM::t2CMPri)
Expand Down Expand Up @@ -2713,38 +2736,43 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
}
I = CmpInstr;
E = MI;
} else {
// Allow the loop below to search E (which was initially MI). Since MI and
// SubAdd have different tests, even if that instruction could not be MI, it
// could still potentially be SubAdd.
--E;
}

// Check that CPSR isn't set between the comparison instruction and the one we
// want to change. At the same time, search for Sub.
// want to change. At the same time, search for SubAdd.
const TargetRegisterInfo *TRI = &getRegisterInfo();
--I;
for (; I != E; --I) {
const MachineInstr &Instr = *I;

// Check whether CmpInstr can be made redundant by the current instruction.
if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &*I)) {
SubAdd = &*I;
break;
}

if (Instr.modifiesRegister(ARM::CPSR, TRI) ||
Instr.readsRegister(ARM::CPSR, TRI))
// This instruction modifies or uses CPSR after the one we want to
// change. We can't do this transformation.
return false;

// Check whether CmpInstr can be made redundant by the current instruction.
if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &*I)) {
Sub = &*I;
break;
}

if (I == B)
// The 'and' is below the comparison instruction.
return false;
}

// Return false if no candidates exist.
if (!MI && !Sub)
if (!MI && !SubAdd)
return false;

// The single candidate is called MI.
if (!MI) MI = Sub;
if (!MI) MI = SubAdd;

// We can't use a predicated instruction - it doesn't always write the flags.
if (isPredicated(*MI))
Expand Down Expand Up @@ -2802,25 +2830,31 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
break;
}

if (Sub) {
ARMCC::CondCodes NewCC = getSwappedCondition(CC);
if (NewCC == ARMCC::AL)
return false;
if (SubAdd) {
// If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
// on CMP needs to be updated to be based on SUB.
// If we have ADD(r1, r2, X) and CMP(r1, r2), the condition code also
// needs to be modified.
// Push the condition code operands to OperandsToUpdate.
// If it is safe to remove CmpInstr, the condition code of these
// operands will be modified.
if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
Sub->getOperand(2).getReg() == SrcReg) {
unsigned Opc = SubAdd->getOpcode();
bool IsSub = Opc == ARM::SUBrr || Opc == ARM::t2SUBrr ||
Opc == ARM::SUBri || Opc == ARM::t2SUBri;
if (!IsSub || (SrcReg2 != 0 && SubAdd->getOperand(1).getReg() == SrcReg2 &&
SubAdd->getOperand(2).getReg() == SrcReg)) {
// VSel doesn't support condition code update.
if (IsInstrVSel)
return false;
// Ensure we can swap the condition.
ARMCC::CondCodes NewCC = (IsSub ? getSwappedCondition(CC) : getCmpToAddCondition(CC));
if (NewCC == ARMCC::AL)
return false;
OperandsToUpdate.push_back(
std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
}
} else {
// No Sub, so this is x = <op> y, z; cmp x, 0.
// No SubAdd, so this is x = <op> y, z; cmp x, 0.
switch (CC) {
case ARMCC::EQ: // Z
case ARMCC::NE: // Z
Expand Down Expand Up @@ -2874,6 +2908,23 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
return true;
}

bool ARMBaseInstrInfo::shouldSink(const MachineInstr &MI) const {
// Do not sink MI if it might be used to optimize a redundant compare.
// We heuristically only look at the instruction immediately following MI to
// avoid potentially searching the entire basic block.
if (isPredicated(MI))
return true;
MachineBasicBlock::const_iterator Next = &MI;
++Next;
unsigned SrcReg, SrcReg2;
int CmpMask, CmpValue;
if (Next != MI.getParent()->end() &&
analyzeCompare(*Next, SrcReg, SrcReg2, CmpMask, CmpValue) &&
isRedundantFlagInstr(&*Next, SrcReg, SrcReg2, CmpValue, &MI))
return false;
return true;
}

bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
unsigned Reg,
MachineRegisterInfo *MRI) const {
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/ARM/ARMBaseInstrInfo.h
Expand Up @@ -215,6 +215,8 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo {

bool expandPostRAPseudo(MachineInstr &MI) const override;

bool shouldSink(const MachineInstr &MI) const override;

void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
unsigned DestReg, unsigned SubIdx,
const MachineInstr &Orig,
Expand Down
15 changes: 7 additions & 8 deletions llvm/test/CodeGen/ARM/intrinsics-overflow.ll
Expand Up @@ -33,10 +33,10 @@ define i32 @sadd_overflow(i32 %a, i32 %b) #0 {

; CHECK-LABEL: sadd_overflow:

; ARM: add r[[R2:[0-9]+]], r[[R0:[0-9]+]], r[[R1:[0-9]+]]
; ARM: mov r[[R1]], #1
; ARM: cmp r[[R2]], r[[R0]]
; ARM: movvc r[[R1]], #0
; ARM: adds r[[R2:[0-9]+]], r[[R0:[0-9]+]], r[[R1:[0-9]+]]
; ARM: mov r[[R0]], #1
; ARM: movvc r[[R0]], #0
; ARM: mov pc, lr

; THUMBV6: mov r[[R2:[0-9]+]], r[[R0:[0-9]+]]
; THUMBV6: adds r[[R3:[0-9]+]], r[[R2]], r[[R1:[0-9]+]]
Expand All @@ -47,11 +47,10 @@ define i32 @sadd_overflow(i32 %a, i32 %b) #0 {
; THUMBV6: mov r[[R0]], r[[R1]]
; THUMBV6: .L[[LABEL]]:

; THUMBV7: movs r[[R1]], #1
; THUMBV7: cmp r[[R2]], r[[R0]]
; THUMBV7: adds r[[R2:[0-9]+]], r[[R0]], r[[R1:[0-9]+]]
; THUMBV7: mov.w r[[R0:[0-9]+]], #1
; THUMBV7: it vc
; THUMBV7: movvc r[[R1]], #0
; THUMBV7: mov r[[R0]], r[[R1]]
; THUMBV7: movvc r[[R0]], #0
}

define i32 @usub_overflow(i32 %a, i32 %b) #0 {
Expand Down
63 changes: 47 additions & 16 deletions llvm/test/CodeGen/ARM/su-addsub-overflow.ll
Expand Up @@ -2,9 +2,7 @@

define i32 @sadd(i32 %a, i32 %b) local_unnamed_addr #0 {
; CHECK-LABEL: sadd:
; CHECK: mov r[[R0:[0-9]+]], r0
; CHECK-NEXT: add r[[R1:[0-9]+]], r[[R0]], r1
; CHECK-NEXT: cmp r[[R1]], r[[R0]]
; CHECK: adds r0, r0, r1
; CHECK-NEXT: movvc pc, lr
entry:
%0 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b)
Expand All @@ -23,10 +21,8 @@ cont:

define i32 @uadd(i32 %a, i32 %b) local_unnamed_addr #0 {
; CHECK-LABEL: uadd:
; CHECK: mov r[[R0:[0-9]+]], r0
; CHECK-NEXT: adds r[[R1:[0-9]+]], r[[R0]], r1
; CHECK-NEXT: cmp r[[R1]], r[[R0]]
; CHECK-NEXT: movhs pc, lr
; CHECK: adds r0, r0, r1
; CHECK-NEXT: movlo pc, lr
entry:
%0 = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
%1 = extractvalue { i32, i1 } %0, 1
Expand All @@ -44,8 +40,7 @@ cont:

define i32 @ssub(i32 %a, i32 %b) local_unnamed_addr #0 {
; CHECK-LABEL: ssub:
; CHECK: cmp r0, r1
; CHECK-NEXT: subvc r0, r0, r1
; CHECK: subs r0, r0, r1
; CHECK-NEXT: movvc pc, lr
entry:
%0 = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
Expand All @@ -64,9 +59,7 @@ cont:

define i32 @usub(i32 %a, i32 %b) local_unnamed_addr #0 {
; CHECK-LABEL: usub:
; CHECK: mov r[[R0:[0-9]+]], r0
; CHECK-NEXT: subs r[[R1:[0-9]+]], r[[R0]], r1
; CHECK-NEXT: cmp r[[R0]], r1
; CHECK: subs r0, r0, r1
; CHECK-NEXT: movhs pc, lr
entry:
%0 = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
Expand All @@ -87,11 +80,9 @@ define void @sum(i32* %a, i32* %b, i32 %n) local_unnamed_addr #0 {
; CHECK-LABEL: sum:
; CHECK: ldr [[R0:r[0-9]+]],
; CHECK-NEXT: ldr [[R1:r[0-9]+|lr]],
; CHECK-NEXT: add [[R2:r[0-9]+]], [[R1]], [[R0]]
; CHECK-NEXT: cmp [[R2]], [[R1]]
; CHECK-NEXT: adds [[R2:r[0-9]+]], [[R1]], [[R0]]
; CHECK-NEXT: strvc [[R2]],
; CHECK-NEXT: addvc
; CHECK-NEXT: cmpvc
; CHECK-NEXT: addsvc
; CHECK-NEXT: bvs
entry:
%cmp7 = icmp eq i32 %n, 0
Expand Down Expand Up @@ -128,6 +119,46 @@ cont2:

}

define void @extern_loop(i32 %n) local_unnamed_addr #0 {
; Do not replace the compare around the clobbering call.
; CHECK: add {{r[0-9]+}}, {{r[0-9]+}}, #1
; CHECK-NEXT: bl external_fn
; CHECK: cmp
entry:
%0 = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %n, i32 1)
%1 = extractvalue { i32, i1 } %0, 1
br i1 %1, label %trap, label %cont.lr.ph

cont.lr.ph:
%2 = extractvalue { i32, i1 } %0, 0
%cmp5 = icmp sgt i32 %2, 0
br i1 %cmp5, label %for.body.preheader, label %for.cond.cleanup

for.body.preheader:
br label %for.body

trap:
tail call void @llvm.trap() #2
unreachable

for.cond.cleanup:
ret void

for.body:
%i.046 = phi i32 [ %5, %cont1 ], [ 0, %for.body.preheader ]
tail call void bitcast (void (...)* @external_fn to void ()*)() #4
%3 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %i.046, i32 1)
%4 = extractvalue { i32, i1 } %3, 1
br i1 %4, label %trap, label %cont1

cont1:
%5 = extractvalue { i32, i1 } %3, 0
%cmp = icmp slt i32 %5, %2
br i1 %cmp, label %for.body, label %for.cond.cleanup
}

declare void @external_fn(...) local_unnamed_addr #0

declare void @llvm.trap() #2
declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) #1
declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1
Expand Down

0 comments on commit fe7fa40

Please sign in to comment.