[SPARC] Clean up the support for disabling fsmuld and fmuls instructions.

Summary: Also enable no-fsmuld for sparcv7 (which doesn't have the instruction). The previous code which used a post-processing pass to do this was unnecessary; disabling the instruction is entirely sufficient. Reviewers: jacob_hansen, ekedaigle Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D35576 llvm-svn: 308661
2017-07-20 20:09:11 +00:00 · 2017-07-20 20:09:11 +00:00 · bb76d48d59
parent 04787bbc98
commit bb76d48d59
11 changed files with 63 additions and 292 deletions
--- a/llvm/lib/Target/Sparc/LeonFeatures.td
+++ b/llvm/lib/Target/Sparc/LeonFeatures.td
@ -52,20 +52,6 @@ def InsertNOPLoad: SubtargetFeature<
  "LEON3 erratum fix: Insert a NOP instruction after every single-cycle load instruction when the next instruction is another load/store instruction" 
 >;

-def FixFSMULD : SubtargetFeature<
-  "fixfsmuld",
-  "FixFSMULD",
-  "true",
-  "LEON erratum fix: Do not use FSMULD" 
->;
-
-def ReplaceFMULS : SubtargetFeature<
-  "replacefmuls",
-  "ReplaceFMULS",
-  "true",
-  "LEON erratum fix: Replace FMULS instruction with FMULD and relevant conversion instructions" 
->;
-
 def DetectRoundChange : SubtargetFeature<
  "detectroundchange",
  "DetectRoundChange",
--- a/llvm/lib/Target/Sparc/LeonPasses.cpp
+++ b/llvm/lib/Target/Sparc/LeonPasses.cpp
@ -24,39 +24,6 @@ using namespace llvm;
 LEONMachineFunctionPass::LEONMachineFunctionPass(char &ID)
    : MachineFunctionPass(ID) {}

-int LEONMachineFunctionPass::GetRegIndexForOperand(MachineInstr &MI,
-                                                   int OperandIndex) {
-  if (MI.getNumOperands() > 0) {
-    if (OperandIndex == LAST_OPERAND) {
-      OperandIndex = MI.getNumOperands() - 1;
-    }
-
-    if (MI.getNumOperands() > (unsigned)OperandIndex &&
-        MI.getOperand(OperandIndex).isReg()) {
-      return (int)MI.getOperand(OperandIndex).getReg();
-    }
-  }
-
-  static int NotFoundIndex = -10;
-  // Return a different number each time to avoid any comparisons between the
-  // values returned.
-  NotFoundIndex -= 10;
-  return NotFoundIndex;
-}
-
-// finds a new free FP register
-// checks also the AllocatedRegisters vector
-int LEONMachineFunctionPass::getUnusedFPRegister(MachineRegisterInfo &MRI) {
-  for (int RegisterIndex = SP::F0; RegisterIndex <= SP::F31; ++RegisterIndex) {
-    if (!MRI.isPhysRegUsed(RegisterIndex) &&
-        !is_contained(UsedRegisters, RegisterIndex)) {
-      return RegisterIndex;
-    }
-  }
-
-  return -1;
-}
-
 //*****************************************************************************
 //**** InsertNOPLoad pass
 //*****************************************************************************
@ -93,182 +60,6 @@ bool InsertNOPLoad::runOnMachineFunction(MachineFunction &MF) {
  return Modified;
 }

-//*****************************************************************************
-//**** FixFSMULD pass
-//*****************************************************************************
-// This pass fixes the incorrectly working FSMULD instruction that exists for
-// some earlier versions of the LEON processor line.
-//
-// The pass should convert the FSMULD operands to double precision in scratch
-// registers, then calculate the result with the FMULD instruction. Therefore,
-// the pass should replace operations of the form:
-// fsmuld %f20,%f21,%f8
-// with the sequence:
-// fstod %f20,%f0
-// fstod %f21,%f2
-// fmuld %f0,%f2,%f8
-//
-char FixFSMULD::ID = 0;
-
-FixFSMULD::FixFSMULD() : LEONMachineFunctionPass(ID) {}
-
-bool FixFSMULD::runOnMachineFunction(MachineFunction &MF) {
-  Subtarget = &MF.getSubtarget<SparcSubtarget>();
-  const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
-  DebugLoc DL = DebugLoc();
-
-  bool Modified = false;
-  for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) {
-    MachineBasicBlock &MBB = *MFI;
-    for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++MBBI) {
-
-      MachineInstr &MI = *MBBI;
-      unsigned Opcode = MI.getOpcode();
-
-      const int UNASSIGNED_INDEX = -1;
-      int Reg1Index = UNASSIGNED_INDEX;
-      int Reg2Index = UNASSIGNED_INDEX;
-      int Reg3Index = UNASSIGNED_INDEX;
-
-      if (Opcode == SP::FSMULD && MI.getNumOperands() == 3) {
-        // take the registers from fsmuld %f20,%f21,%f8
-        Reg1Index = MI.getOperand(0).getReg();
-        Reg2Index = MI.getOperand(1).getReg();
-        Reg3Index = MI.getOperand(2).getReg();
-      }
-
-      if (Reg1Index != UNASSIGNED_INDEX && Reg2Index != UNASSIGNED_INDEX &&
-          Reg3Index != UNASSIGNED_INDEX) {
-        clearUsedRegisterList();
-        MachineBasicBlock::iterator NMBBI = std::next(MBBI);
-        // Whatever Reg3Index is hasn't been used yet, so we need to reserve it.
-        markRegisterUsed(Reg3Index);
-        const int ScratchReg1Index = getUnusedFPRegister(MF.getRegInfo());
-        markRegisterUsed(ScratchReg1Index);
-        const int ScratchReg2Index = getUnusedFPRegister(MF.getRegInfo());
-        markRegisterUsed(ScratchReg2Index);
-
-        if (ScratchReg1Index == UNASSIGNED_INDEX ||
-            ScratchReg2Index == UNASSIGNED_INDEX) {
-          errs() << "Cannot allocate free scratch registers for the FixFSMULD "
-                    "pass."
-                 << "\n";
-        } else {
-          // create fstod %f20,%f0
-          BuildMI(MBB, MBBI, DL, TII.get(SP::FSTOD))
-              .addReg(ScratchReg1Index)
-              .addReg(Reg1Index);
-
-          // create fstod %f21,%f2
-          BuildMI(MBB, MBBI, DL, TII.get(SP::FSTOD))
-              .addReg(ScratchReg2Index)
-              .addReg(Reg2Index);
-
-          // create fmuld %f0,%f2,%f8
-          BuildMI(MBB, MBBI, DL, TII.get(SP::FMULD))
-              .addReg(Reg3Index)
-              .addReg(ScratchReg1Index)
-              .addReg(ScratchReg2Index);
-
-          MI.eraseFromParent();
-          MBBI = NMBBI;
-
-          Modified = true;
-        }
-      }
-    }
-  }
-
-  return Modified;
-}
-
-//*****************************************************************************
-//**** ReplaceFMULS pass
-//*****************************************************************************
-// This pass fixes the incorrectly working FMULS instruction that exists for
-// some earlier versions of the LEON processor line.
-//
-// This pass converts the FMULS operands to double precision in scratch
-// registers, then calculates the result with the FMULD instruction.
-// The pass should replace operations of the form:
-// fmuls %f20,%f21,%f8
-// with the sequence:
-// fstod %f20,%f0
-// fstod %f21,%f2
-// fmuld %f0,%f2,%f8
-//
-char ReplaceFMULS::ID = 0;
-
-ReplaceFMULS::ReplaceFMULS() : LEONMachineFunctionPass(ID) {}
-
-bool ReplaceFMULS::runOnMachineFunction(MachineFunction &MF) {
-  Subtarget = &MF.getSubtarget<SparcSubtarget>();
-  const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
-  DebugLoc DL = DebugLoc();
-
-  bool Modified = false;
-  for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) {
-    MachineBasicBlock &MBB = *MFI;
-    for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++MBBI) {
-      MachineInstr &MI = *MBBI;
-      unsigned Opcode = MI.getOpcode();
-
-      const int UNASSIGNED_INDEX = -1;
-      int Reg1Index = UNASSIGNED_INDEX;
-      int Reg2Index = UNASSIGNED_INDEX;
-      int Reg3Index = UNASSIGNED_INDEX;
-
-      if (Opcode == SP::FMULS && MI.getNumOperands() == 3) {
-        // take the registers from fmuls %f20,%f21,%f8
-        Reg1Index = MI.getOperand(0).getReg();
-        Reg2Index = MI.getOperand(1).getReg();
-        Reg3Index = MI.getOperand(2).getReg();
-      }
-
-      if (Reg1Index != UNASSIGNED_INDEX && Reg2Index != UNASSIGNED_INDEX &&
-          Reg3Index != UNASSIGNED_INDEX) {
-        clearUsedRegisterList();
-        MachineBasicBlock::iterator NMBBI = std::next(MBBI);
-        // Whatever Reg3Index is hasn't been used yet, so we need to reserve it.
-        markRegisterUsed(Reg3Index);
-        const int ScratchReg1Index = getUnusedFPRegister(MF.getRegInfo());
-        markRegisterUsed(ScratchReg1Index);
-        const int ScratchReg2Index = getUnusedFPRegister(MF.getRegInfo());
-        markRegisterUsed(ScratchReg2Index);
-
-        if (ScratchReg1Index == UNASSIGNED_INDEX ||
-            ScratchReg2Index == UNASSIGNED_INDEX) {
-          errs() << "Cannot allocate free scratch registers for the "
-                    "ReplaceFMULS pass."
-                 << "\n";
-        } else {
-          // create fstod %f20,%f0
-          BuildMI(MBB, MBBI, DL, TII.get(SP::FSTOD))
-              .addReg(ScratchReg1Index)
-              .addReg(Reg1Index);
-
-          // create fstod %f21,%f2
-          BuildMI(MBB, MBBI, DL, TII.get(SP::FSTOD))
-              .addReg(ScratchReg2Index)
-              .addReg(Reg2Index);
-
-          // create fmuld %f0,%f2,%f8
-          BuildMI(MBB, MBBI, DL, TII.get(SP::FMULD))
-              .addReg(Reg3Index)
-              .addReg(ScratchReg1Index)
-              .addReg(ScratchReg2Index);
-
-          MI.eraseFromParent();
-          MBBI = NMBBI;
-
-          Modified = true;
-        }
-      }
-    }
-  }
-
-  return Modified;
-}


 //*****************************************************************************
--- a/llvm/lib/Target/Sparc/LeonPasses.h
+++ b/llvm/lib/Target/Sparc/LeonPasses.h
@ -57,32 +57,6 @@ public:
  }
 };

-class LLVM_LIBRARY_VISIBILITY FixFSMULD : public LEONMachineFunctionPass {
-public:
-  static char ID;
-
-  FixFSMULD();
-  bool runOnMachineFunction(MachineFunction &MF) override;
-
-  StringRef getPassName() const override {
-    return "FixFSMULD: Erratum Fix LBR31: do not select FSMULD";
-  }
-};
-
-class LLVM_LIBRARY_VISIBILITY ReplaceFMULS : public LEONMachineFunctionPass {
-public:
-  static char ID;
-
-  ReplaceFMULS();
-  bool runOnMachineFunction(MachineFunction &MF) override;
-
-  StringRef getPassName() const override {
-    return "ReplaceFMULS: Erratum Fix LBR32: replace FMULS instruction with a "
-           "routine using conversions/double precision operations to replace "
-           "FMULS";
-  }
-};
-
 class LLVM_LIBRARY_VISIBILITY DetectRoundChange
    : public LEONMachineFunctionPass {
 public:
--- a/llvm/lib/Target/Sparc/Sparc.td
+++ b/llvm/lib/Target/Sparc/Sparc.td
@ -24,6 +24,13 @@ def FeatureSoftMulDiv
  : SubtargetFeature<"soft-mul-div", "UseSoftMulDiv", "true",
                     "Use software emulation for integer multiply and divide">;

+def FeatureNoFSMULD
+  : SubtargetFeature<"no-fsmuld", "HasNoFSMULD", "true",
+                     "Disable the fsmuld instruction.">;
+def FeatureNoFMULS
+  : SubtargetFeature<"no-fmuls", "HasNoFMULS", "true",
+                     "Disable the fmuls instruction.">;
+
 def FeatureV9
  : SubtargetFeature<"v9", "IsV9", "true",
                     "Enable SPARC-V9 instructions">;
@ -79,7 +86,7 @@ class Proc<string Name, list<SubtargetFeature> Features>
 : Processor<Name, NoItineraries, Features>;

 def : Proc<"generic",         []>;
-def : Proc<"v7",              [FeatureSoftMulDiv]>;
+def : Proc<"v7",              [FeatureSoftMulDiv, FeatureNoFSMULD]>;
 def : Proc<"v8",              []>;
 def : Proc<"supersparc",      []>;
 def : Proc<"sparclite",       []>;
@ -129,7 +136,7 @@ def : Processor<"leon3", LEON3Itineraries,
 // LEON 3 FT (UT699). Provides features for the UT699 processor
 // - covers all the erratum fixes for LEON3, but does not support the CASA instruction.
 def : Processor<"ut699", LEON3Itineraries,
-                [FeatureLeon, InsertNOPLoad, FixFSMULD, ReplaceFMULS, FixAllFDIVSQRT]>;
+                [FeatureLeon, InsertNOPLoad, FeatureNoFSMULD, FeatureNoFMULS, FixAllFDIVSQRT]>;

 // LEON3 FT (GR712RC). Provides features for the GR712RC processor.
 // - covers all the erratum fixed for LEON3 and support for the CASA instruction. 
--- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@ -1828,9 +1828,7 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
    setOperationAction(ISD::FSQRT, MVT::f32, Promote);
  }

-  if (Subtarget->replaceFMULS()) {
-    // Promote FMULS to FMULD instructions instead as
-    // the former instructions generate errata on LEON processors.
+  if (Subtarget->hasNoFMULS()) {
    setOperationAction(ISD::FMUL, MVT::f32, Promote);
  }

--- a/llvm/lib/Target/Sparc/SparcInstrInfo.td
+++ b/llvm/lib/Target/Sparc/SparcInstrInfo.td
@ -61,8 +61,8 @@ def HasLeonCASA : Predicate<"Subtarget->hasLeonCasa()">;
 def HasUMAC_SMAC : Predicate<"Subtarget->hasUmacSmac()">;

 def HasNoFdivSqrtFix : Predicate<"!Subtarget->fixAllFDIVSQRT()">;
-def HasNoFmulsFix : Predicate<"!Subtarget->replaceFMULS()">;
-def HasNoFsmuldFix : Predicate<"!Subtarget->fixFSMULD()">;
+def HasFMULS : Predicate<"!Subtarget->hasNoFMULS()">;
+def HasFSMULD : Predicate<"!Subtarget->hasNoFSMULD()">;

 // UseDeprecatedInsts - This predicate is true when the target processor is a
 // V8, or when it is V9 but the V8 deprecated instructions are efficient enough
@ -1236,14 +1236,12 @@ def FSUBQ  : F3_3<2, 0b110100, 0b001000111,


 // Floating-point Multiply and Divide Instructions, p. 147
-// FMULS generates an erratum on LEON processors, so by disabling this instruction
-// this will be promoted to use FMULD with doubles instead.
-let Predicates = [HasNoFmulsFix] in 
 def FMULS  : F3_3<2, 0b110100, 0b001001001,
                  (outs FPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2),
                  "fmuls $rs1, $rs2, $rd",
                  [(set f32:$rd, (fmul f32:$rs1, f32:$rs2))],
-                  IIC_fpu_muls>;
+                  IIC_fpu_muls>,
+		  Requires<[HasFMULS]>;
 def FMULD  : F3_3<2, 0b110100, 0b001001010,
                  (outs DFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2),
                  "fmuld $rs1, $rs2, $rd",
@ -1255,13 +1253,13 @@ def FMULQ  : F3_3<2, 0b110100, 0b001001011,
                  [(set f128:$rd, (fmul f128:$rs1, f128:$rs2))]>,
                  Requires<[HasHardQuad]>;

-let Predicates = [HasNoFsmuldFix] in
 def FSMULD : F3_3<2, 0b110100, 0b001101001,
                  (outs DFPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2),
                  "fsmuld $rs1, $rs2, $rd",
                  [(set f64:$rd, (fmul (fpextend f32:$rs1),
                                        (fpextend f32:$rs2)))],
-                  IIC_fpu_muld>;
+                  IIC_fpu_muld>,
+		  Requires<[HasFSMULD]>;
 def FDMULQ : F3_3<2, 0b110100, 0b001101110,
                  (outs QFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2),
                  "fdmulq $rs1, $rs2, $rd",
--- a/llvm/lib/Target/Sparc/SparcSubtarget.cpp
+++ b/llvm/lib/Target/Sparc/SparcSubtarget.cpp
@ -36,14 +36,14 @@ SparcSubtarget &SparcSubtarget::initializeSubtargetDependencies(StringRef CPU,
  HasHardQuad = false;
  UsePopc = false;
  UseSoftFloat = false;
+  HasNoFSMULD = false;
+  HasNoFMULS = false;

  // Leon features
  HasLeonCasa = false;
  HasUmacSmac = false;
  PerformSDIVReplace = false;
  InsertNOPLoad = false;
-  FixFSMULD = false;
-  ReplaceFMULS = false;
  FixAllFDIVSQRT = false;
  DetectRoundChange = false;

--- a/llvm/lib/Target/Sparc/SparcSubtarget.h
+++ b/llvm/lib/Target/Sparc/SparcSubtarget.h
@ -41,13 +41,13 @@ class SparcSubtarget : public SparcGenSubtargetInfo {
  bool HasHardQuad;
  bool UsePopc;
  bool UseSoftFloat;
+  bool HasNoFSMULD;
+  bool HasNoFMULS;

  // LEON features
  bool HasUmacSmac;
  bool HasLeonCasa;
  bool InsertNOPLoad;
-  bool FixFSMULD;
-  bool ReplaceFMULS;
  bool FixAllFDIVSQRT;
  bool DetectRoundChange;
  bool PerformSDIVReplace;
@ -87,14 +87,14 @@ public:
  bool hasHardQuad() const { return HasHardQuad; }
  bool usePopc() const { return UsePopc; }
  bool useSoftFloat() const { return UseSoftFloat; }
+  bool hasNoFSMULD() const { return HasNoFSMULD; }
+  bool hasNoFMULS() const { return HasNoFMULS; }

  // Leon options
  bool hasUmacSmac() const { return HasUmacSmac; }
  bool performSDIVReplace() const { return PerformSDIVReplace; }
  bool hasLeonCasa() const { return HasLeonCasa; }
  bool insertNOPLoad() const { return InsertNOPLoad; }
-  bool fixFSMULD() const { return FixFSMULD; }
-  bool replaceFMULS() const { return ReplaceFMULS; }
  bool fixAllFDIVSQRT() const { return FixAllFDIVSQRT; }
  bool detectRoundChange() const { return DetectRoundChange; }

--- a/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
@ -149,14 +149,6 @@ void SparcPassConfig::addPreEmitPass(){
  {
    addPass(new InsertNOPLoad());
  }
-  if (this->getSparcTargetMachine().getSubtargetImpl()->fixFSMULD())
-  {
-    addPass(new FixFSMULD());
-  }
-  if (this->getSparcTargetMachine().getSubtargetImpl()->replaceFMULS())
-  {
-    addPass(new ReplaceFMULS());
-  }
  if (this->getSparcTargetMachine().getSubtargetImpl()->detectRoundChange()) {
    addPass(new DetectRoundChange());
  }
--- a/llvm/test/CodeGen/SPARC/LeonReplaceFMULSPassUT.ll
+++ b/llvm/test/CodeGen/SPARC/LeonReplaceFMULSPassUT.ll
@ -1,13 +0,0 @@
-; RUN: llc %s -O0 -march=sparc -mattr=replacefmuls -o - | FileCheck %s
-
-; CHECK-LABEL: test_replace_fmuls
-; CHECK:       fsmuld %f1, %f0, %f2
-; CHECK:       fdtos %f2, %f0
-; NOFIX-LABEL: test_replace_fmuls
-; NOFIX:       fmuls %f1, %f0, %f0
-define float @test_replace_fmuls(float %a, float %b) {
-entry:
-  %mul = fmul float %a, %b
-
-  ret float %mul
-}
--- a/llvm/test/CodeGen/SPARC/disable-fsmuld-fmuls.ll
+++ b/llvm/test/CodeGen/SPARC/disable-fsmuld-fmuls.ll
@ -0,0 +1,38 @@
+; RUN: llc %s -march=sparc -o - | FileCheck --check-prefix=CHECK --check-prefix=DEFAULT %s
+; RUN: llc %s -march=sparc -mattr=no-fmuls -o - | FileCheck --check-prefix=CHECK --check-prefix=NO-FMULS %s
+; RUN: llc %s -march=sparc -mattr=no-fsmuld -o - | FileCheck --check-prefix=CHECK --check-prefix=NO-FSMULD %s
+; RUN: llc %s -march=sparc -mattr=no-fsmuld,no-fmuls -o - | FileCheck --check-prefix=CHECK --check-prefix=NO-BOTH %s
+
+;;; Test case ensures that the no-fsmuld and no-fmuls features disable
+;;; the relevant instruction, and alternative sequences get emitted
+;;; instead.
+
+; CHECK-LABEL: test_float_mul:
+; DEFAULT:     fmuls
+; NO-FSMULD:   fmuls
+; NO-FMULS:    fsmuld
+; NO-FMULS:    fdtos
+; NO-BOTH:     fstod
+; NO-BOTH:     fstod
+; NO-BOTH:     fmuld
+; NO-BOTH:     fdtos
+define float @test_float_mul(float %a, float %b) {
+entry:
+  %mul = fmul float %a, %b
+
+  ret float %mul
+}
+
+; CHECK-LABEL: test_float_mul_double:
+; DEFAULT:     fsmuld
+; NO-FSMULD:   fstod
+; NO-FSMULD:   fstod
+; NO-FSMULD:   fmuld
+define double @test_float_mul_double(float %a, float %b) {
+entry:
+  %a_double = fpext float %a to double
+  %b_double = fpext float %b to double
+  %mul = fmul double %a_double, %b_double
+
+  ret double %mul
+}