From b0eb5fb317cbe831badc4da5843b1d526eff758a Mon Sep 17 00:00:00 2001 From: Sjoerd Meijer Date: Thu, 24 Aug 2017 14:47:06 +0000 Subject: [PATCH] [AArch64] Add FMOVH0: materialize 0 using zero register for f16 values Instead of loading 0 from a constant pool, it's of course much better to materialize it using an fmov and the zero register. Thanks to Ahmed Bougacha for the suggestion. Differential Revision: https://reviews.llvm.org/D37102 llvm-svn: 311662 --- llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp | 14 +++++++++++--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 10 ++++++---- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 1 + llvm/lib/Target/AArch64/AArch64InstrInfo.td | 2 ++ llvm/test/CodeGen/AArch64/f16-imm.ll | 13 ++++++++----- 5 files changed, 28 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index 19112eefa726..277c88c87c90 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -500,10 +500,12 @@ void AArch64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM, void AArch64AsmPrinter::EmitFMov0(const MachineInstr &MI) { unsigned DestReg = MI.getOperand(0).getReg(); if (STI->hasZeroCycleZeroing()) { - // Convert S/D register to corresponding Q register - if (AArch64::S0 <= DestReg && DestReg <= AArch64::S31) { + // Convert H/S/D register to corresponding Q register + if (AArch64::H0 <= DestReg && DestReg <= AArch64::H31) + DestReg = AArch64::Q0 + (DestReg - AArch64::H0); + else if (AArch64::S0 <= DestReg && DestReg <= AArch64::S31) DestReg = AArch64::Q0 + (DestReg - AArch64::S0); - } else { + else { assert(AArch64::D0 <= DestReg && DestReg <= AArch64::D31); DestReg = AArch64::Q0 + (DestReg - AArch64::D0); } @@ -516,6 +518,11 @@ void AArch64AsmPrinter::EmitFMov0(const MachineInstr &MI) { MCInst FMov; switch (MI.getOpcode()) { default: llvm_unreachable("Unexpected opcode"); + case AArch64::FMOVH0: + FMov.setOpcode(AArch64::FMOVWHr); + FMov.addOperand(MCOperand::createReg(DestReg)); + FMov.addOperand(MCOperand::createReg(AArch64::WZR)); + break; case AArch64::FMOVS0: FMov.setOpcode(AArch64::FMOVWSr); FMov.addOperand(MCOperand::createReg(DestReg)); @@ -635,6 +642,7 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } + case AArch64::FMOVH0: case AArch64::FMOVS0: case AArch64::FMOVD0: EmitFMov0(*MI); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index d674062276e0..6ccb404081dd 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4895,13 +4895,15 @@ bool AArch64TargetLowering::isOffsetFoldingLegal( bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { // We can materialize #0.0 as fmov $Rd, XZR for 64-bit and 32-bit cases. // FIXME: We should be able to handle f128 as well with a clever lowering. - if (Imm.isPosZero() && (VT == MVT::f64 || VT == MVT::f32)) { + if (Imm.isPosZero() && (VT == MVT::f16 || VT == MVT::f64 || VT == MVT::f32)) { DEBUG(dbgs() << "Legal fp imm: materialize 0 using the zero register\n"); return true; } StringRef FPType; bool IsLegal = false; + SmallString<128> ImmStrVal; + Imm.toString(ImmStrVal); if (VT == MVT::f64) { FPType = "f64"; @@ -4915,14 +4917,14 @@ bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { } if (IsLegal) { - DEBUG(dbgs() << "Is legal " << FPType << " imm value: yes\n"); + DEBUG(dbgs() << "Legal " << FPType << " imm value: " << ImmStrVal << "\n"); return true; } if (!FPType.empty()) - DEBUG(dbgs() << "Is legal " << FPType << " imm value: no\n"); + DEBUG(dbgs() << "Illegal " << FPType << " imm value: " << ImmStrVal << "\n"); else - DEBUG(dbgs() << "Is legal " << "fp imm: no, unsupported fp type\n"); + DEBUG(dbgs() << "Illegal fp imm " << ImmStrVal << ": unsupported fp type\n"); return false; } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index e5324c601e51..07a4f8c2c36a 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -748,6 +748,7 @@ bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const { // It is cheap to zero out registers if the subtarget has ZeroCycleZeroing // feature. + case AArch64::FMOVH0: case AArch64::FMOVS0: case AArch64::FMOVD0: return Subtarget.hasZeroCycleZeroing(); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index d97134356f43..b31180dc008a 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -2690,6 +2690,8 @@ defm FMOV : UnscaledConversion<"fmov">; // Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable let isReMaterializable = 1, isCodeGenOnly = 1, isAsCheapAsAMove = 1 in { +def FMOVH0 : Pseudo<(outs FPR16:$Rd), (ins), [(set f16:$Rd, (fpimm0))]>, + Sched<[WriteF]>; def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>, Sched<[WriteF]>; def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>, diff --git a/llvm/test/CodeGen/AArch64/f16-imm.ll b/llvm/test/CodeGen/AArch64/f16-imm.ll index 760df037f221..84c27312d1e1 100644 --- a/llvm/test/CodeGen/AArch64/f16-imm.ll +++ b/llvm/test/CodeGen/AArch64/f16-imm.ll @@ -1,15 +1,18 @@ ; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=+fullfp16 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ILLEGAL +; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=+fullfp16,+zcz | FileCheck %s --check-prefix=CHECK-ZCZ ; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=-fullfp16 | FileCheck %s --check-prefix=CHECK-NOFP16 --check-prefix=CHECK-ILLEGAL define half @Const0() { entry: ret half 0xH0000 } -; CHECK-ILLEGAL: .[[LBL0:LCPI0_[0-9]]]: -; CHECK-ILLEGAL-NEXT: .hword 0 // half 0 -; CHECK-ILLEGAL-LABEL: Const0: -; CHECK-ILLEGAL: adrp x[[NUM:[0-9]+]], .[[LBL0]] -; CHECK-ILLEGAL-NEXT: ldr h0, [x[[NUM]], :lo12:.[[LBL0]]] +; CHECK-DAG-ILLEGAL-LABEL: Const0: +; CHECK-DAG-ILLEGAL-NEXT: fmov h0, wzr +; CHECK-DAG-ILLEGAL-NEXT: ret + +; CHECK-ZCZ-LABEL: Const0: +; CHECK-ZCZ: movi v0.2d, #0000000000000000 +; CHECK-ZCZ-NEXT: ret define half @Const1() { entry: