[AArch64] Add FMOVH0: materialize 0 using zero register for f16 values

Instead of loading 0 from a constant pool, it's of course much better to
materialize it using an fmov and the zero register.

Thanks to Ahmed Bougacha for the suggestion.

Differential Revision: https://reviews.llvm.org/D37102

llvm-svn: 311662
This commit is contained in:
Sjoerd Meijer 2017-08-24 14:47:06 +00:00
parent 5d67d8916e
commit b0eb5fb317
5 changed files with 28 additions and 12 deletions

View File

@ -500,10 +500,12 @@ void AArch64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
void AArch64AsmPrinter::EmitFMov0(const MachineInstr &MI) {
unsigned DestReg = MI.getOperand(0).getReg();
if (STI->hasZeroCycleZeroing()) {
// Convert S/D register to corresponding Q register
if (AArch64::S0 <= DestReg && DestReg <= AArch64::S31) {
// Convert H/S/D register to corresponding Q register
if (AArch64::H0 <= DestReg && DestReg <= AArch64::H31)
DestReg = AArch64::Q0 + (DestReg - AArch64::H0);
else if (AArch64::S0 <= DestReg && DestReg <= AArch64::S31)
DestReg = AArch64::Q0 + (DestReg - AArch64::S0);
} else {
else {
assert(AArch64::D0 <= DestReg && DestReg <= AArch64::D31);
DestReg = AArch64::Q0 + (DestReg - AArch64::D0);
}
@ -516,6 +518,11 @@ void AArch64AsmPrinter::EmitFMov0(const MachineInstr &MI) {
MCInst FMov;
switch (MI.getOpcode()) {
default: llvm_unreachable("Unexpected opcode");
case AArch64::FMOVH0:
FMov.setOpcode(AArch64::FMOVWHr);
FMov.addOperand(MCOperand::createReg(DestReg));
FMov.addOperand(MCOperand::createReg(AArch64::WZR));
break;
case AArch64::FMOVS0:
FMov.setOpcode(AArch64::FMOVWSr);
FMov.addOperand(MCOperand::createReg(DestReg));
@ -635,6 +642,7 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
}
case AArch64::FMOVH0:
case AArch64::FMOVS0:
case AArch64::FMOVD0:
EmitFMov0(*MI);

View File

@ -4895,13 +4895,15 @@ bool AArch64TargetLowering::isOffsetFoldingLegal(
bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
// We can materialize #0.0 as fmov $Rd, XZR for 64-bit and 32-bit cases.
// FIXME: We should be able to handle f128 as well with a clever lowering.
if (Imm.isPosZero() && (VT == MVT::f64 || VT == MVT::f32)) {
if (Imm.isPosZero() && (VT == MVT::f16 || VT == MVT::f64 || VT == MVT::f32)) {
DEBUG(dbgs() << "Legal fp imm: materialize 0 using the zero register\n");
return true;
}
StringRef FPType;
bool IsLegal = false;
SmallString<128> ImmStrVal;
Imm.toString(ImmStrVal);
if (VT == MVT::f64) {
FPType = "f64";
@ -4915,14 +4917,14 @@ bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
}
if (IsLegal) {
DEBUG(dbgs() << "Is legal " << FPType << " imm value: yes\n");
DEBUG(dbgs() << "Legal " << FPType << " imm value: " << ImmStrVal << "\n");
return true;
}
if (!FPType.empty())
DEBUG(dbgs() << "Is legal " << FPType << " imm value: no\n");
DEBUG(dbgs() << "Illegal " << FPType << " imm value: " << ImmStrVal << "\n");
else
DEBUG(dbgs() << "Is legal " << "fp imm: no, unsupported fp type\n");
DEBUG(dbgs() << "Illegal fp imm " << ImmStrVal << ": unsupported fp type\n");
return false;
}

View File

@ -748,6 +748,7 @@ bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
// It is cheap to zero out registers if the subtarget has ZeroCycleZeroing
// feature.
case AArch64::FMOVH0:
case AArch64::FMOVS0:
case AArch64::FMOVD0:
return Subtarget.hasZeroCycleZeroing();

View File

@ -2690,6 +2690,8 @@ defm FMOV : UnscaledConversion<"fmov">;
// Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable
let isReMaterializable = 1, isCodeGenOnly = 1, isAsCheapAsAMove = 1 in {
def FMOVH0 : Pseudo<(outs FPR16:$Rd), (ins), [(set f16:$Rd, (fpimm0))]>,
Sched<[WriteF]>;
def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>,
Sched<[WriteF]>;
def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>,

View File

@ -1,15 +1,18 @@
; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=+fullfp16 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ILLEGAL
; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=+fullfp16,+zcz | FileCheck %s --check-prefix=CHECK-ZCZ
; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=-fullfp16 | FileCheck %s --check-prefix=CHECK-NOFP16 --check-prefix=CHECK-ILLEGAL
define half @Const0() {
entry:
ret half 0xH0000
}
; CHECK-ILLEGAL: .[[LBL0:LCPI0_[0-9]]]:
; CHECK-ILLEGAL-NEXT: .hword 0 // half 0
; CHECK-ILLEGAL-LABEL: Const0:
; CHECK-ILLEGAL: adrp x[[NUM:[0-9]+]], .[[LBL0]]
; CHECK-ILLEGAL-NEXT: ldr h0, [x[[NUM]], :lo12:.[[LBL0]]]
; CHECK-DAG-ILLEGAL-LABEL: Const0:
; CHECK-DAG-ILLEGAL-NEXT: fmov h0, wzr
; CHECK-DAG-ILLEGAL-NEXT: ret
; CHECK-ZCZ-LABEL: Const0:
; CHECK-ZCZ: movi v0.2d, #0000000000000000
; CHECK-ZCZ-NEXT: ret
define half @Const1() {
entry: