[AArch64] Add FMOVH0: materialize 0 using zero register for f16 values
Instead of loading 0 from a constant pool, it's of course much better to materialize it using an fmov and the zero register. Thanks to Ahmed Bougacha for the suggestion. Differential Revision: https://reviews.llvm.org/D37102 llvm-svn: 311662
This commit is contained in:
parent
5d67d8916e
commit
b0eb5fb317
|
@ -500,10 +500,12 @@ void AArch64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
|
|||
void AArch64AsmPrinter::EmitFMov0(const MachineInstr &MI) {
|
||||
unsigned DestReg = MI.getOperand(0).getReg();
|
||||
if (STI->hasZeroCycleZeroing()) {
|
||||
// Convert S/D register to corresponding Q register
|
||||
if (AArch64::S0 <= DestReg && DestReg <= AArch64::S31) {
|
||||
// Convert H/S/D register to corresponding Q register
|
||||
if (AArch64::H0 <= DestReg && DestReg <= AArch64::H31)
|
||||
DestReg = AArch64::Q0 + (DestReg - AArch64::H0);
|
||||
else if (AArch64::S0 <= DestReg && DestReg <= AArch64::S31)
|
||||
DestReg = AArch64::Q0 + (DestReg - AArch64::S0);
|
||||
} else {
|
||||
else {
|
||||
assert(AArch64::D0 <= DestReg && DestReg <= AArch64::D31);
|
||||
DestReg = AArch64::Q0 + (DestReg - AArch64::D0);
|
||||
}
|
||||
|
@ -516,6 +518,11 @@ void AArch64AsmPrinter::EmitFMov0(const MachineInstr &MI) {
|
|||
MCInst FMov;
|
||||
switch (MI.getOpcode()) {
|
||||
default: llvm_unreachable("Unexpected opcode");
|
||||
case AArch64::FMOVH0:
|
||||
FMov.setOpcode(AArch64::FMOVWHr);
|
||||
FMov.addOperand(MCOperand::createReg(DestReg));
|
||||
FMov.addOperand(MCOperand::createReg(AArch64::WZR));
|
||||
break;
|
||||
case AArch64::FMOVS0:
|
||||
FMov.setOpcode(AArch64::FMOVWSr);
|
||||
FMov.addOperand(MCOperand::createReg(DestReg));
|
||||
|
@ -635,6 +642,7 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
|
|||
return;
|
||||
}
|
||||
|
||||
case AArch64::FMOVH0:
|
||||
case AArch64::FMOVS0:
|
||||
case AArch64::FMOVD0:
|
||||
EmitFMov0(*MI);
|
||||
|
|
|
@ -4895,13 +4895,15 @@ bool AArch64TargetLowering::isOffsetFoldingLegal(
|
|||
bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
|
||||
// We can materialize #0.0 as fmov $Rd, XZR for 64-bit and 32-bit cases.
|
||||
// FIXME: We should be able to handle f128 as well with a clever lowering.
|
||||
if (Imm.isPosZero() && (VT == MVT::f64 || VT == MVT::f32)) {
|
||||
if (Imm.isPosZero() && (VT == MVT::f16 || VT == MVT::f64 || VT == MVT::f32)) {
|
||||
DEBUG(dbgs() << "Legal fp imm: materialize 0 using the zero register\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
StringRef FPType;
|
||||
bool IsLegal = false;
|
||||
SmallString<128> ImmStrVal;
|
||||
Imm.toString(ImmStrVal);
|
||||
|
||||
if (VT == MVT::f64) {
|
||||
FPType = "f64";
|
||||
|
@ -4915,14 +4917,14 @@ bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
|
|||
}
|
||||
|
||||
if (IsLegal) {
|
||||
DEBUG(dbgs() << "Is legal " << FPType << " imm value: yes\n");
|
||||
DEBUG(dbgs() << "Legal " << FPType << " imm value: " << ImmStrVal << "\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!FPType.empty())
|
||||
DEBUG(dbgs() << "Is legal " << FPType << " imm value: no\n");
|
||||
DEBUG(dbgs() << "Illegal " << FPType << " imm value: " << ImmStrVal << "\n");
|
||||
else
|
||||
DEBUG(dbgs() << "Is legal " << "fp imm: no, unsupported fp type\n");
|
||||
DEBUG(dbgs() << "Illegal fp imm " << ImmStrVal << ": unsupported fp type\n");
|
||||
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -748,6 +748,7 @@ bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
|
|||
|
||||
// It is cheap to zero out registers if the subtarget has ZeroCycleZeroing
|
||||
// feature.
|
||||
case AArch64::FMOVH0:
|
||||
case AArch64::FMOVS0:
|
||||
case AArch64::FMOVD0:
|
||||
return Subtarget.hasZeroCycleZeroing();
|
||||
|
|
|
@ -2690,6 +2690,8 @@ defm FMOV : UnscaledConversion<"fmov">;
|
|||
|
||||
// Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable
|
||||
let isReMaterializable = 1, isCodeGenOnly = 1, isAsCheapAsAMove = 1 in {
|
||||
def FMOVH0 : Pseudo<(outs FPR16:$Rd), (ins), [(set f16:$Rd, (fpimm0))]>,
|
||||
Sched<[WriteF]>;
|
||||
def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>,
|
||||
Sched<[WriteF]>;
|
||||
def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>,
|
||||
|
|
|
@ -1,15 +1,18 @@
|
|||
; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=+fullfp16 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ILLEGAL
|
||||
; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=+fullfp16,+zcz | FileCheck %s --check-prefix=CHECK-ZCZ
|
||||
; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=-fullfp16 | FileCheck %s --check-prefix=CHECK-NOFP16 --check-prefix=CHECK-ILLEGAL
|
||||
|
||||
define half @Const0() {
|
||||
entry:
|
||||
ret half 0xH0000
|
||||
}
|
||||
; CHECK-ILLEGAL: .[[LBL0:LCPI0_[0-9]]]:
|
||||
; CHECK-ILLEGAL-NEXT: .hword 0 // half 0
|
||||
; CHECK-ILLEGAL-LABEL: Const0:
|
||||
; CHECK-ILLEGAL: adrp x[[NUM:[0-9]+]], .[[LBL0]]
|
||||
; CHECK-ILLEGAL-NEXT: ldr h0, [x[[NUM]], :lo12:.[[LBL0]]]
|
||||
; CHECK-DAG-ILLEGAL-LABEL: Const0:
|
||||
; CHECK-DAG-ILLEGAL-NEXT: fmov h0, wzr
|
||||
; CHECK-DAG-ILLEGAL-NEXT: ret
|
||||
|
||||
; CHECK-ZCZ-LABEL: Const0:
|
||||
; CHECK-ZCZ: movi v0.2d, #0000000000000000
|
||||
; CHECK-ZCZ-NEXT: ret
|
||||
|
||||
define half @Const1() {
|
||||
entry:
|
||||
|
|
Loading…
Reference in New Issue