[FastISel][X86] Fix smul.with.overflow.i8 lowering.
Add custom lowering code for signed multiply instruction selection, because the default FastISel instruction selection for ISD::MUL will use unsigned multiply for the i8 type and signed multiply for all other types. This would set the incorrect flags for the overflow check. This fixes <rdar://problem/17549300> llvm-svn: 212493
This commit is contained in:
parent
4c5b4054b2
commit
665ea71fcd
|
@ -2402,7 +2402,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
|
||||||
case Intrinsic::usub_with_overflow:
|
case Intrinsic::usub_with_overflow:
|
||||||
BaseOpc = ISD::SUB; CondOpc = X86::SETBr; break;
|
BaseOpc = ISD::SUB; CondOpc = X86::SETBr; break;
|
||||||
case Intrinsic::smul_with_overflow:
|
case Intrinsic::smul_with_overflow:
|
||||||
BaseOpc = ISD::MUL; CondOpc = X86::SETOr; break;
|
BaseOpc = X86ISD::SMUL; CondOpc = X86::SETOr; break;
|
||||||
case Intrinsic::umul_with_overflow:
|
case Intrinsic::umul_with_overflow:
|
||||||
BaseOpc = X86ISD::UMUL; CondOpc = X86::SETOr; break;
|
BaseOpc = X86ISD::UMUL; CondOpc = X86::SETOr; break;
|
||||||
}
|
}
|
||||||
|
@ -2430,10 +2430,11 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
|
||||||
RHSIsKill);
|
RHSIsKill);
|
||||||
}
|
}
|
||||||
|
|
||||||
// FastISel doesn't have a pattern for X86::MUL*r. Emit it manually.
|
// FastISel doesn't have a pattern for all X86::MUL*r and X86::IMUL*r. Emit
|
||||||
|
// it manually.
|
||||||
if (BaseOpc == X86ISD::UMUL && !ResultReg) {
|
if (BaseOpc == X86ISD::UMUL && !ResultReg) {
|
||||||
static const unsigned MULOpc[] =
|
static const unsigned MULOpc[] =
|
||||||
{ X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r };
|
{ X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r };
|
||||||
static const unsigned Reg[] = { X86::AL, X86::AX, X86::EAX, X86::RAX };
|
static const unsigned Reg[] = { X86::AL, X86::AX, X86::EAX, X86::RAX };
|
||||||
// First copy the first operand into RAX, which is an implicit input to
|
// First copy the first operand into RAX, which is an implicit input to
|
||||||
// the X86::MUL*r instruction.
|
// the X86::MUL*r instruction.
|
||||||
|
@ -2442,6 +2443,21 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
|
||||||
.addReg(LHSReg, getKillRegState(LHSIsKill));
|
.addReg(LHSReg, getKillRegState(LHSIsKill));
|
||||||
ResultReg = FastEmitInst_r(MULOpc[VT.SimpleTy-MVT::i8],
|
ResultReg = FastEmitInst_r(MULOpc[VT.SimpleTy-MVT::i8],
|
||||||
TLI.getRegClassFor(VT), RHSReg, RHSIsKill);
|
TLI.getRegClassFor(VT), RHSReg, RHSIsKill);
|
||||||
|
} else if (BaseOpc == X86ISD::SMUL && !ResultReg) {
|
||||||
|
static const unsigned MULOpc[] =
|
||||||
|
{ X86::IMUL8r, X86::IMUL16rr, X86::IMUL32rr, X86::IMUL64rr };
|
||||||
|
if (VT == MVT::i8) {
|
||||||
|
// Copy the first operand into AL, which is an implicit input to the
|
||||||
|
// X86::IMUL8r instruction.
|
||||||
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
||||||
|
TII.get(TargetOpcode::COPY), X86::AL)
|
||||||
|
.addReg(LHSReg, getKillRegState(LHSIsKill));
|
||||||
|
ResultReg = FastEmitInst_r(MULOpc[0], TLI.getRegClassFor(VT), RHSReg,
|
||||||
|
RHSIsKill);
|
||||||
|
} else
|
||||||
|
ResultReg = FastEmitInst_rr(MULOpc[VT.SimpleTy-MVT::i8],
|
||||||
|
TLI.getRegClassFor(VT), LHSReg, LHSIsKill,
|
||||||
|
RHSReg, RHSIsKill);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!ResultReg)
|
if (!ResultReg)
|
||||||
|
|
|
@ -261,6 +261,34 @@ entry:
|
||||||
}
|
}
|
||||||
|
|
||||||
; SMULO
|
; SMULO
|
||||||
|
define zeroext i1 @smulo.i8(i8 %v1, i8 %v2, i8* %res) {
|
||||||
|
entry:
|
||||||
|
; FAST-LABEL: smulo.i8
|
||||||
|
; FAST: movb %dil, %al
|
||||||
|
; FAST-NEXT: imulb %sil
|
||||||
|
; FAST-NEXT: seto %cl
|
||||||
|
%t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %v1, i8 %v2)
|
||||||
|
%val = extractvalue {i8, i1} %t, 0
|
||||||
|
%obit = extractvalue {i8, i1} %t, 1
|
||||||
|
store i8 %val, i8* %res
|
||||||
|
ret i1 %obit
|
||||||
|
}
|
||||||
|
|
||||||
|
define zeroext i1 @smulo.i16(i16 %v1, i16 %v2, i16* %res) {
|
||||||
|
entry:
|
||||||
|
; DAG-LABEL: smulo.i16
|
||||||
|
; DAG: imulw %si, %di
|
||||||
|
; DAG-NEXT: seto %al
|
||||||
|
; FAST-LABEL: smulo.i16
|
||||||
|
; FAST: imulw %si, %di
|
||||||
|
; FAST-NEXT: seto %al
|
||||||
|
%t = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %v1, i16 %v2)
|
||||||
|
%val = extractvalue {i16, i1} %t, 0
|
||||||
|
%obit = extractvalue {i16, i1} %t, 1
|
||||||
|
store i16 %val, i16* %res
|
||||||
|
ret i1 %obit
|
||||||
|
}
|
||||||
|
|
||||||
define zeroext i1 @smulo.i32(i32 %v1, i32 %v2, i32* %res) {
|
define zeroext i1 @smulo.i32(i32 %v1, i32 %v2, i32* %res) {
|
||||||
entry:
|
entry:
|
||||||
; DAG-LABEL: smulo.i32
|
; DAG-LABEL: smulo.i32
|
||||||
|
@ -292,6 +320,34 @@ entry:
|
||||||
}
|
}
|
||||||
|
|
||||||
; UMULO
|
; UMULO
|
||||||
|
define zeroext i1 @umulo.i8(i8 %v1, i8 %v2, i8* %res) {
|
||||||
|
entry:
|
||||||
|
; FAST-LABEL: umulo.i8
|
||||||
|
; FAST: movb %dil, %al
|
||||||
|
; FAST-NEXT: mulb %sil
|
||||||
|
; FAST-NEXT: seto %cl
|
||||||
|
%t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %v1, i8 %v2)
|
||||||
|
%val = extractvalue {i8, i1} %t, 0
|
||||||
|
%obit = extractvalue {i8, i1} %t, 1
|
||||||
|
store i8 %val, i8* %res
|
||||||
|
ret i1 %obit
|
||||||
|
}
|
||||||
|
|
||||||
|
define zeroext i1 @umulo.i16(i16 %v1, i16 %v2, i16* %res) {
|
||||||
|
entry:
|
||||||
|
; DAG-LABEL: umulo.i16
|
||||||
|
; DAG: mulw %si
|
||||||
|
; DAG-NEXT: seto
|
||||||
|
; FAST-LABEL: umulo.i16
|
||||||
|
; FAST: mulw %si
|
||||||
|
; FAST-NEXT: seto
|
||||||
|
%t = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %v1, i16 %v2)
|
||||||
|
%val = extractvalue {i16, i1} %t, 0
|
||||||
|
%obit = extractvalue {i16, i1} %t, 1
|
||||||
|
store i16 %val, i16* %res
|
||||||
|
ret i1 %obit
|
||||||
|
}
|
||||||
|
|
||||||
define zeroext i1 @umulo.i32(i32 %v1, i32 %v2, i32* %res) {
|
define zeroext i1 @umulo.i32(i32 %v1, i32 %v2, i32* %res) {
|
||||||
entry:
|
entry:
|
||||||
; DAG-LABEL: umulo.i32
|
; DAG-LABEL: umulo.i32
|
||||||
|
@ -665,7 +721,7 @@ continue:
|
||||||
ret i1 true
|
ret i1 true
|
||||||
}
|
}
|
||||||
|
|
||||||
declare {i8, i1} @llvm.sadd.with.overflow.i8(i8, i8) nounwind readnone
|
declare {i8, i1} @llvm.sadd.with.overflow.i8 (i8, i8 ) nounwind readnone
|
||||||
declare {i16, i1} @llvm.sadd.with.overflow.i16(i16, i16) nounwind readnone
|
declare {i16, i1} @llvm.sadd.with.overflow.i16(i16, i16) nounwind readnone
|
||||||
declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
|
declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
|
||||||
declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone
|
declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone
|
||||||
|
@ -675,8 +731,12 @@ declare {i32, i1} @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone
|
||||||
declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone
|
declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone
|
||||||
declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone
|
declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone
|
||||||
declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone
|
declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone
|
||||||
|
declare {i8, i1} @llvm.smul.with.overflow.i8 (i8, i8 ) nounwind readnone
|
||||||
|
declare {i16, i1} @llvm.smul.with.overflow.i16(i16, i16) nounwind readnone
|
||||||
declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32) nounwind readnone
|
declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32) nounwind readnone
|
||||||
declare {i64, i1} @llvm.smul.with.overflow.i64(i64, i64) nounwind readnone
|
declare {i64, i1} @llvm.smul.with.overflow.i64(i64, i64) nounwind readnone
|
||||||
|
declare {i8, i1} @llvm.umul.with.overflow.i8 (i8, i8 ) nounwind readnone
|
||||||
|
declare {i16, i1} @llvm.umul.with.overflow.i16(i16, i16) nounwind readnone
|
||||||
declare {i32, i1} @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone
|
declare {i32, i1} @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone
|
||||||
declare {i64, i1} @llvm.umul.with.overflow.i64(i64, i64) nounwind readnone
|
declare {i64, i1} @llvm.umul.with.overflow.i64(i64, i64) nounwind readnone
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue