[PowerPC] Add subregister classes for f64 VSX values

We had stored both f64 values and v2f64, etc. values in the VSX registers. This worked, but was suboptimal because we would always spill 16-byte values even through we almost always had scalar 8-byte values. This resulted in an increase in stack-size use, extra memory bandwidth, etc. To fix this, I've added 64-bit subregisters of the Altivec registers, and combined those with the existing scalar floating-point registers to form a class of VSX scalar floating-point registers. The ABI code has also been enhanced to use this register class and some other necessary improvements have been made. llvm-svn: 205075
2014-03-29 05:29:01 +00:00 · 2014-03-29 05:29:01 +00:00 · 19be506a5e
parent ab57a1555a
commit 19be506a5e
10 changed files with 358 additions and 62 deletions
--- a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@ -114,6 +114,25 @@ static unsigned VSRegs[64] = {
  PPC::VSH24, PPC::VSH25, PPC::VSH26, PPC::VSH27,
  PPC::VSH28, PPC::VSH29, PPC::VSH30, PPC::VSH31
 };
+static unsigned VSFRegs[64] = {
+  PPC::F0,  PPC::F1,  PPC::F2,  PPC::F3,
+  PPC::F4,  PPC::F5,  PPC::F6,  PPC::F7,
+  PPC::F8,  PPC::F9,  PPC::F10, PPC::F11,
+  PPC::F12, PPC::F13, PPC::F14, PPC::F15,
+  PPC::F16, PPC::F17, PPC::F18, PPC::F19,
+  PPC::F20, PPC::F21, PPC::F22, PPC::F23,
+  PPC::F24, PPC::F25, PPC::F26, PPC::F27,
+  PPC::F28, PPC::F29, PPC::F30, PPC::F31,
+
+  PPC::VF0,  PPC::VF1,  PPC::VF2,  PPC::VF3,
+  PPC::VF4,  PPC::VF5,  PPC::VF6,  PPC::VF7,
+  PPC::VF8,  PPC::VF9,  PPC::VF10, PPC::VF11,
+  PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
+  PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
+  PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
+  PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
+  PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
+};
 static unsigned CRBITRegs[32] = {
  PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN,
  PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN,
@ -479,6 +498,11 @@ public:
    Inst.addOperand(MCOperand::CreateReg(VSRegs[getVSReg()]));
  }

+  void addRegVSFRCOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(VSFRegs[getVSReg()]));
+  }
+
  void addRegCRBITRCOperands(MCInst &Inst, unsigned N) const {
    assert(N == 1 && "Invalid number of operands!");
    Inst.addOperand(MCOperand::CreateReg(CRBITRegs[getCRBit()]));
--- a/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@ -112,6 +112,26 @@ static const unsigned VSRegs[] = {
  PPC::VSH28, PPC::VSH29, PPC::VSH30, PPC::VSH31
 };

+static const unsigned VSFRegs[] = {
+  PPC::F0, PPC::F1, PPC::F2, PPC::F3,
+  PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+  PPC::F8, PPC::F9, PPC::F10, PPC::F11,
+  PPC::F12, PPC::F13, PPC::F14, PPC::F15,
+  PPC::F16, PPC::F17, PPC::F18, PPC::F19,
+  PPC::F20, PPC::F21, PPC::F22, PPC::F23,
+  PPC::F24, PPC::F25, PPC::F26, PPC::F27,
+  PPC::F28, PPC::F29, PPC::F30, PPC::F31,
+
+  PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3,
+  PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7,
+  PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11,
+  PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
+  PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
+  PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
+  PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
+  PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
+};
+
 static const unsigned GPRegs[] = {
  PPC::R0, PPC::R1, PPC::R2, PPC::R3,
  PPC::R4, PPC::R5, PPC::R6, PPC::R7,
@ -189,6 +209,12 @@ static DecodeStatus DecodeVSRCRegisterClass(MCInst &Inst, uint64_t RegNo,
  return decodeRegisterClass(Inst, RegNo, VSRegs);
 }

+static DecodeStatus DecodeVSFRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+                                            uint64_t Address,
+                                            const void *Decoder) {
+  return decodeRegisterClass(Inst, RegNo, VSFRegs);
+}
+
 static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint64_t RegNo,
                                            uint64_t Address,
                                            const void *Decoder) {
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@ -573,7 +573,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)

      setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);

-      addRegisterClass(MVT::f64, &PPC::VSRCRegClass);
+      addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);

      addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
      addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
@ -2156,7 +2156,10 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
          RC = &PPC::F4RCRegClass;
          break;
        case MVT::f64:
-          RC = &PPC::F8RCRegClass;
+          if (PPCSubTarget.hasVSX())
+            RC = &PPC::VSFRCRegClass;
+          else
+            RC = &PPC::F8RCRegClass;
          break;
        case MVT::v16i8:
        case MVT::v8i16:
@ -2559,7 +2562,9 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
        if (ObjectVT == MVT::f32)
          VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
        else
-          VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
+          VReg = MF.addLiveIn(FPR[FPR_idx], PPCSubTarget.hasVSX() ?
+                                            &PPC::VSFRCRegClass :
+                                            &PPC::F8RCRegClass);

        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
        ++FPR_idx;
@ -8506,8 +8511,10 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
  } else if (Constraint == "wc") { // an individual CR bit.
    return std::make_pair(0U, &PPC::CRBITRCRegClass);
  } else if (Constraint == "wa" || Constraint == "wd" ||
-             Constraint == "wf" || Constraint == "ws") {
+             Constraint == "wf") {
    return std::make_pair(0U, &PPC::VSRCRegClass);
+  } else if (Constraint == "ws") {
+    return std::make_pair(0U, &PPC::VSFRCRegClass);
  }

  std::pair<unsigned, const TargetRegisterClass*> R =
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@ -744,6 +744,8 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
    // copies are generated, they are close enough to some use that the
    // lower-latency form is preferable.
    Opc = PPC::XXLOR;
+  else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg))
+    Opc = PPC::XXLORf;
  else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
    Opc = PPC::CROR;
  else
@ -815,6 +817,12 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
                                               getKillRegState(isKill)),
                                       FrameIdx));
    NonRI = true;
+  } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) {
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STXSDX))
+                                       .addReg(SrcReg,
+                                               getKillRegState(isKill)),
+                                       FrameIdx));
+    NonRI = true;
  } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
    assert(TM.getSubtargetImpl()->isDarwin() &&
           "VRSAVE only needs spill/restore on Darwin");
@ -906,6 +914,10 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LXVD2X), DestReg),
                                       FrameIdx));
    NonRI = true;
+  } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) {
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LXSDX), DestReg),
+                                       FrameIdx));
+    NonRI = true;
  } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
    assert(TM.getSubtargetImpl()->isDarwin() &&
           "VRSAVE only needs spill/restore on Darwin");
@ -1638,7 +1650,7 @@ protected:

        // The addend and this instruction must be in the same block.

-        if (AddendMI->getParent() != MI->getParent())
+        if (!AddendMI || AddendMI->getParent() != MI->getParent())
          continue;

        // The addend must be a full copy within the same register class.
@ -1646,9 +1658,18 @@ protected:
        if (!AddendMI->isFullCopy())
          continue;

-        if (MRI.getRegClass(AddendMI->getOperand(0).getReg()) !=
-            MRI.getRegClass(AddendMI->getOperand(1).getReg()))
-          continue;
+        unsigned AddendSrcReg = AddendMI->getOperand(1).getReg();
+        if (TargetRegisterInfo::isVirtualRegister(AddendSrcReg)) {
+          if (MRI.getRegClass(AddendMI->getOperand(0).getReg()) !=
+              MRI.getRegClass(AddendSrcReg))
+            continue;
+        } else {
+          // If AddendSrcReg is a physical register, make sure the destination
+          // register class contains it.
+          if (!MRI.getRegClass(AddendMI->getOperand(0).getReg())
+                ->contains(AddendSrcReg))
+            continue;
+        }

        // In theory, there could be other uses of the addend copy before this
        // fma.  We could deal with this, but that would require additional
@ -1678,8 +1699,8 @@ protected:
          OtherProdOp  = 2;
        }

-	// If there are no killed product operands, then this transformation is
-	// likely not profitable.
+        // If there are no killed product operands, then this transformation is
+        // likely not profitable.
        if (!KilledProdOp)
          continue;

--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@ -18,6 +18,13 @@ def vsrc : RegisterOperand<VSRC> {
  let ParserMatchClass = PPCRegVSRCAsmOperand;
 }

+def PPCRegVSFRCAsmOperand : AsmOperandClass {
+  let Name = "RegVSFRC"; let PredicateMethod = "isVSRegNumber";
+}
+def vsfrc : RegisterOperand<VSFRC> {
+  let ParserMatchClass = PPCRegVSFRCAsmOperand;
+}
+
 multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, dag OOL, dag IOL,
                    string asmbase, string asmstr, InstrItinClass itin,
                    list<dag> pattern> {
@ -41,7 +48,7 @@ let Uses = [RM] in {
  // Load indexed instructions
  let mayLoad = 1, canFoldAsLoad = 1 in {
    def LXSDX : XForm_1<31, 588,
-                        (outs vsrc:$XT), (ins memrr:$src),
+                        (outs vsfrc:$XT), (ins memrr:$src),
                        "lxsdx $XT, $src", IIC_LdStLFD,
                        [(set f64:$XT, (load xoaddr:$src))]>;

@ -62,7 +69,7 @@ let Uses = [RM] in {
  // Store indexed instructions
  let mayStore = 1 in {
    def STXSDX : XX1Form<31, 716,
-                        (outs), (ins vsrc:$XT, memrr:$dst),
+                        (outs), (ins vsfrc:$XT, memrr:$dst),
                        "stxsdx $XT, $dst", IIC_LdStSTFD,
                        [(store f64:$XT, xoaddr:$dst)]>;

@ -79,11 +86,11 @@ let Uses = [RM] in {
  // Add/Mul Instructions
  let isCommutable = 1 in {
    def XSADDDP : XX3Form<60, 32,
-                          (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+                          (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
                          "xsadddp $XT, $XA, $XB", IIC_VecFP,
                          [(set f64:$XT, (fadd f64:$XA, f64:$XB))]>;
    def XSMULDP : XX3Form<60, 48,
-                          (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+                          (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
                          "xsmuldp $XT, $XA, $XB", IIC_VecFP,
                          [(set f64:$XT, (fmul f64:$XA, f64:$XB))]>;

@ -110,7 +117,7 @@ let Uses = [RM] in {

  // Subtract Instructions
  def XSSUBDP : XX3Form<60, 40,
-                        (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+                        (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
                        "xssubdp $XT, $XA, $XB", IIC_VecFP,
                        [(set f64:$XT, (fsub f64:$XA, f64:$XB))]>;

@ -127,14 +134,14 @@ let Uses = [RM] in {
  let BaseName = "XSMADDADP" in {
  let isCommutable = 1 in
  def XSMADDADP : XX3Form<60, 33,
-                          (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+                          (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
                          "xsmaddadp $XT, $XA, $XB", IIC_VecFP,
                          [(set f64:$XT, (fma f64:$XA, f64:$XB, f64:$XTi))]>,
                          RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
                          AltVSXFMARel;
  let IsVSXFMAAlt = 1 in
  def XSMADDMDP : XX3Form<60, 41,
-                          (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+                          (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
                          "xsmaddmdp $XT, $XA, $XB", IIC_VecFP, []>,
                          RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
                          AltVSXFMARel;
@ -143,14 +150,14 @@ let Uses = [RM] in {
  let BaseName = "XSMSUBADP" in {
  let isCommutable = 1 in
  def XSMSUBADP : XX3Form<60, 49,
-                          (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+                          (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
                          "xsmsubadp $XT, $XA, $XB", IIC_VecFP,
                          [(set f64:$XT, (fma f64:$XA, f64:$XB, (fneg f64:$XTi)))]>,
                          RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
                          AltVSXFMARel;
  let IsVSXFMAAlt = 1 in
  def XSMSUBMDP : XX3Form<60, 57,
-                          (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+                          (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
                          "xsmsubmdp $XT, $XA, $XB", IIC_VecFP, []>,
                          RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
                          AltVSXFMARel;
@ -159,14 +166,14 @@ let Uses = [RM] in {
  let BaseName = "XSNMADDADP" in {
  let isCommutable = 1 in
  def XSNMADDADP : XX3Form<60, 161,
-                          (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+                          (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
                          "xsnmaddadp $XT, $XA, $XB", IIC_VecFP,
                          [(set f64:$XT, (fneg (fma f64:$XA, f64:$XB, f64:$XTi)))]>,
                          RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
                          AltVSXFMARel;
  let IsVSXFMAAlt = 1 in
  def XSNMADDMDP : XX3Form<60, 169,
-                          (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+                          (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
                          "xsnmaddmdp $XT, $XA, $XB", IIC_VecFP, []>,
                          RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
                          AltVSXFMARel;
@ -175,14 +182,14 @@ let Uses = [RM] in {
  let BaseName = "XSNMSUBADP" in {
  let isCommutable = 1 in
  def XSNMSUBADP : XX3Form<60, 177,
-                          (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+                          (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
                          "xsnmsubadp $XT, $XA, $XB", IIC_VecFP,
                          [(set f64:$XT, (fneg (fma f64:$XA, f64:$XB, (fneg f64:$XTi))))]>,
                          RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
                          AltVSXFMARel;
  let IsVSXFMAAlt = 1 in
  def XSNMSUBMDP : XX3Form<60, 185,
-                          (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+                          (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
                          "xsnmsubmdp $XT, $XA, $XB", IIC_VecFP, []>,
                          RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
                          AltVSXFMARel;
@ -318,28 +325,28 @@ let Uses = [RM] in {

  // Division Instructions
  def XSDIVDP : XX3Form<60, 56,
-                        (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+                        (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
                        "xsdivdp $XT, $XA, $XB", IIC_VecFP,
                        [(set f64:$XT, (fdiv f64:$XA, f64:$XB))]>;
  def XSSQRTDP : XX2Form<60, 75,
-                        (outs vsrc:$XT), (ins vsrc:$XB),
+                        (outs vsfrc:$XT), (ins vsfrc:$XB),
                        "xssqrtdp $XT, $XB", IIC_VecFP,
                        [(set f64:$XT, (fsqrt f64:$XB))]>;

  def XSREDP : XX2Form<60, 90,
-                        (outs vsrc:$XT), (ins vsrc:$XB),
+                        (outs vsfrc:$XT), (ins vsfrc:$XB),
                        "xsredp $XT, $XB", IIC_VecFP,
                        [(set f64:$XT, (PPCfre f64:$XB))]>;
  def XSRSQRTEDP : XX2Form<60, 74,
-                           (outs vsrc:$XT), (ins vsrc:$XB),
+                           (outs vsfrc:$XT), (ins vsfrc:$XB),
                           "xsrsqrtedp $XT, $XB", IIC_VecFP,
                           [(set f64:$XT, (PPCfrsqrte f64:$XB))]>;

  def XSTDIVDP : XX3Form_1<60, 61,
-                         (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB),
+                         (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
                         "xstdivdp $crD, $XA, $XB", IIC_VecFP, []>;
  def XSTSQRTDP : XX2Form_1<60, 106,
-                          (outs crrc:$crD), (ins vsrc:$XB),
+                          (outs crrc:$crD), (ins vsfrc:$XB),
                          "xstsqrtdp $crD, $XB", IIC_VecFP, []>;

  def XVDIVDP : XX3Form<60, 120,
@ -394,10 +401,10 @@ let Uses = [RM] in {

  // Compare Instructions
  def XSCMPODP : XX3Form_1<60, 43,
-                           (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB),
+                           (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
                           "xscmpodp $crD, $XA, $XB", IIC_VecFPCompare, []>;
  def XSCMPUDP : XX3Form_1<60, 35,
-                           (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB),
+                           (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
                           "xscmpudp $crD, $XA, $XB", IIC_VecFPCompare, []>;

  defm XVCMPEQDP : XX3Form_Rcr<60, 99,
@ -421,19 +428,19 @@ let Uses = [RM] in {

  // Move Instructions
  def XSABSDP : XX2Form<60, 345,
-                      (outs vsrc:$XT), (ins vsrc:$XB),
+                      (outs vsfrc:$XT), (ins vsfrc:$XB),
                      "xsabsdp $XT, $XB", IIC_VecFP,
                      [(set f64:$XT, (fabs f64:$XB))]>;
  def XSNABSDP : XX2Form<60, 361,
-                      (outs vsrc:$XT), (ins vsrc:$XB),
+                      (outs vsfrc:$XT), (ins vsfrc:$XB),
                      "xsnabsdp $XT, $XB", IIC_VecFP,
                      [(set f64:$XT, (fneg (fabs f64:$XB)))]>;
  def XSNEGDP : XX2Form<60, 377,
-                      (outs vsrc:$XT), (ins vsrc:$XB),
+                      (outs vsfrc:$XT), (ins vsfrc:$XB),
                      "xsnegdp $XT, $XB", IIC_VecFP,
                      [(set f64:$XT, (fneg f64:$XB))]>;
  def XSCPSGNDP : XX3Form<60, 176,
-                      (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+                      (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
                      "xscpsgndp $XT, $XA, $XB", IIC_VecFP,
                      [(set f64:$XT, (fcopysign f64:$XB, f64:$XA))]>;

@ -476,33 +483,33 @@ let Uses = [RM] in {

  // Conversion Instructions
  def XSCVDPSP : XX2Form<60, 265,
-                      (outs vsrc:$XT), (ins vsrc:$XB),
+                      (outs vsfrc:$XT), (ins vsfrc:$XB),
                      "xscvdpsp $XT, $XB", IIC_VecFP, []>;
  def XSCVDPSXDS : XX2Form<60, 344,
-                      (outs vsrc:$XT), (ins vsrc:$XB),
+                      (outs vsfrc:$XT), (ins vsfrc:$XB),
                      "xscvdpsxds $XT, $XB", IIC_VecFP,
                      [(set f64:$XT, (PPCfctidz f64:$XB))]>;
  def XSCVDPSXWS : XX2Form<60, 88,
-                      (outs vsrc:$XT), (ins vsrc:$XB),
+                      (outs vsfrc:$XT), (ins vsfrc:$XB),
                      "xscvdpsxws $XT, $XB", IIC_VecFP,
                      [(set f64:$XT, (PPCfctiwz f64:$XB))]>;
  def XSCVDPUXDS : XX2Form<60, 328,
-                      (outs vsrc:$XT), (ins vsrc:$XB),
+                      (outs vsfrc:$XT), (ins vsfrc:$XB),
                      "xscvdpuxds $XT, $XB", IIC_VecFP,
                      [(set f64:$XT, (PPCfctiduz f64:$XB))]>;
  def XSCVDPUXWS : XX2Form<60, 72,
-                      (outs vsrc:$XT), (ins vsrc:$XB),
+                      (outs vsfrc:$XT), (ins vsfrc:$XB),
                      "xscvdpuxws $XT, $XB", IIC_VecFP,
                      [(set f64:$XT, (PPCfctiwuz f64:$XB))]>;
  def XSCVSPDP : XX2Form<60, 329,
-                      (outs vsrc:$XT), (ins vsrc:$XB),
+                      (outs vsfrc:$XT), (ins vsfrc:$XB),
                      "xscvspdp $XT, $XB", IIC_VecFP, []>;
  def XSCVSXDDP : XX2Form<60, 376,
-                      (outs vsrc:$XT), (ins vsrc:$XB),
+                      (outs vsfrc:$XT), (ins vsfrc:$XB),
                      "xscvsxddp $XT, $XB", IIC_VecFP,
                      [(set f64:$XT, (PPCfcfid f64:$XB))]>;
  def XSCVUXDDP : XX2Form<60, 360,
-                      (outs vsrc:$XT), (ins vsrc:$XB),
+                      (outs vsfrc:$XT), (ins vsfrc:$XB),
                      "xscvuxddp $XT, $XB", IIC_VecFP,
                      [(set f64:$XT, (PPCfcfidu f64:$XB))]>;

@ -568,23 +575,23 @@ let Uses = [RM] in {

  // Rounding Instructions
  def XSRDPI : XX2Form<60, 73,
-                      (outs vsrc:$XT), (ins vsrc:$XB),
+                      (outs vsfrc:$XT), (ins vsfrc:$XB),
                      "xsrdpi $XT, $XB", IIC_VecFP,
                      [(set f64:$XT, (frnd f64:$XB))]>;
  def XSRDPIC : XX2Form<60, 107,
-                      (outs vsrc:$XT), (ins vsrc:$XB),
+                      (outs vsfrc:$XT), (ins vsfrc:$XB),
                      "xsrdpic $XT, $XB", IIC_VecFP,
                      [(set f64:$XT, (fnearbyint f64:$XB))]>;
  def XSRDPIM : XX2Form<60, 121,
-                      (outs vsrc:$XT), (ins vsrc:$XB),
+                      (outs vsfrc:$XT), (ins vsfrc:$XB),
                      "xsrdpim $XT, $XB", IIC_VecFP,
                      [(set f64:$XT, (ffloor f64:$XB))]>;
  def XSRDPIP : XX2Form<60, 105,
-                      (outs vsrc:$XT), (ins vsrc:$XB),
+                      (outs vsfrc:$XT), (ins vsfrc:$XB),
                      "xsrdpip $XT, $XB", IIC_VecFP,
                      [(set f64:$XT, (fceil f64:$XB))]>;
  def XSRDPIZ : XX2Form<60, 89,
-                      (outs vsrc:$XT), (ins vsrc:$XB),
+                      (outs vsfrc:$XT), (ins vsfrc:$XB),
                      "xsrdpiz $XT, $XB", IIC_VecFP,
                      [(set f64:$XT, (ftrunc f64:$XB))]>;

@ -633,10 +640,10 @@ let Uses = [RM] in {
  // Max/Min Instructions
  let isCommutable = 1 in {
  def XSMAXDP : XX3Form<60, 160,
-                        (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+                        (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
                        "xsmaxdp $XT, $XA, $XB", IIC_VecFP, []>;
  def XSMINDP : XX3Form<60, 168,
-                        (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+                        (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
                        "xsmindp $XT, $XA, $XB", IIC_VecFP, []>;

  def XVMAXDP : XX3Form<60, 224,
@ -676,6 +683,10 @@ let Uses = [RM] in {
                      (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
                      "xxlor $XT, $XA, $XB", IIC_VecGeneral,
                      [(set v4i32:$XT, (or v4i32:$XA, v4i32:$XB))]>;
+  let isCodeGenOnly = 1 in
+  def XXLORf: XX3Form<60, 146,
+                      (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
+                      "xxlor $XT, $XA, $XB", IIC_VecGeneral, []>;
  def XXLXOR : XX3Form<60, 154,
                       (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
                       "xxlxor $XT, $XA, $XB", IIC_VecGeneral,
@ -724,12 +735,12 @@ def : InstAlias<"xxswapd $XT, $XB",

 let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
 def : Pat<(v2f64 (scalar_to_vector f64:$A)),
-          (v2f64 (COPY_TO_REGCLASS $A, VSRC))>;
+          (v2f64 (SUBREG_TO_REG (i64 1), $A, sub_64))>;

 def : Pat<(f64 (vector_extract v2f64:$S, 0)),
-          (f64 (COPY_TO_REGCLASS $S, VSRC))>;
+          (f64 (EXTRACT_SUBREG $S, sub_64))>;
 def : Pat<(f64 (vector_extract v2f64:$S, 1)),
-          (f64 (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSRC))>;
+          (f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>;

 // Additional fnmsub patterns: -a*c + b == -(a*c - b)
 def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B),
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@ -229,16 +229,33 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
  case PPC::F8RCRegClassID:
  case PPC::F4RCRegClassID:
  case PPC::VRRCRegClassID:
+  case PPC::VFRCRegClassID:
  case PPC::VSLRCRegClassID:
  case PPC::VSHRCRegClassID:
    return 32 - DefaultSafety;
  case PPC::VSRCRegClassID:
+  case PPC::VSFRCRegClassID:
    return 64 - DefaultSafety;
  case PPC::CRRCRegClassID:
    return 8 - DefaultSafety;
  }
 }

+const TargetRegisterClass*
+PPCRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC)const {
+  if (Subtarget.hasVSX()) {
+    // With VSX, we can inflate various sub-register classes to the full VSX
+    // register set.
+
+    if (RC == &PPC::F8RCRegClass)
+      return &PPC::VSFRCRegClass;
+    else if (RC == &PPC::VRRCRegClass)
+      return &PPC::VSRCRegClass;
+  }
+
+  return TargetRegisterInfo::getLargestLegalSuperClass(RC);
+}
+
 //===----------------------------------------------------------------------===//
 // Stack Frame Processing methods
 //===----------------------------------------------------------------------===//
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@ -40,6 +40,9 @@ public:
  unsigned getRegPressureLimit(const TargetRegisterClass *RC,
                               MachineFunction &MF) const;

+  const TargetRegisterClass*
+  getLargestLegalSuperClass(const TargetRegisterClass *RC) const;
+
  /// Code Generation virtual methods...
  const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
  const uint32_t *getCallPreservedMask(CallingConv::ID CC) const;
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
@ -49,9 +49,19 @@ class FPR<bits<5> num, string n> : PPCReg<n> {
  let HWEncoding{4-0} = num;
 }

-// VR - One of the 32 128-bit vector registers
-class VR<bits<5> num, string n> : PPCReg<n> {
+// VF - One of the 32 64-bit floating-point subregisters of the vector
+// registers (used by VSX).
+class VF<bits<5> num, string n> : PPCReg<n> {
  let HWEncoding{4-0} = num;
+  let HWEncoding{5} = 1;
+}
+
+// VR - One of the 32 128-bit vector registers
+class VR<VF SubReg, string n> : PPCReg<n> {
+  let HWEncoding{4-0} = SubReg.HWEncoding{4-0};
+  let HWEncoding{5} = 0;
+  let SubRegs = [SubReg];
+  let SubRegIndices = [sub_64];
 }

 // VSRL - One of the 32 128-bit VSX registers that overlap with the scalar
@ -99,9 +109,14 @@ foreach Index = 0-31 in {
                DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>;
 }

+// Floating-point vector subregisters (for VSX)
+foreach Index = 0-31 in {
+  def VF#Index : VF<Index, "vs" # !add(Index, 32)>;
+}
+
 // Vector registers
 foreach Index = 0-31 in {
-  def V#Index : VR<Index, "v"#Index>,
+  def V#Index : VR<!cast<VF>("VF"#Index), "v"#Index>,
                DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>;
 }

@ -235,18 +250,27 @@ def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v4f32], 128,

 // VSX register classes (the allocation order mirrors that of the corresponding
 // subregister classes).
-def VSLRC : RegisterClass<"PPC", [v4i32,v4f32,f64,v2f64,v2i64], 128,
+def VSLRC : RegisterClass<"PPC", [v4i32,v4f32,v2f64,v2i64], 128,
                          (add (sequence "VSL%u", 0, 13),
                               (sequence "VSL%u", 31, 14))>;
-def VSHRC : RegisterClass<"PPC", [v4i32,v4f32,f64,v2f64,v2i64], 128,
+def VSHRC : RegisterClass<"PPC", [v4i32,v4f32,v2f64,v2i64], 128,
                          (add VSH2, VSH3, VSH4, VSH5, VSH0, VSH1, VSH6, VSH7,
 			       VSH8, VSH9, VSH10, VSH11, VSH12, VSH13, VSH14,
                               VSH15, VSH16, VSH17, VSH18, VSH19, VSH31, VSH30,
                               VSH29, VSH28, VSH27, VSH26, VSH25, VSH24, VSH23,
                               VSH22, VSH21, VSH20)>;
-def VSRC  : RegisterClass<"PPC", [v4i32,v4f32,f64,v2f64,v2i64], 128,
+def VSRC  : RegisterClass<"PPC", [v4i32,v4f32,v2f64,v2i64], 128,
                          (add VSLRC, VSHRC)>;

+// Register classes for the 64-bit "scalar" VSX subregisters.
+def VFRC :  RegisterClass<"PPC", [f64], 64,
+                          (add VF2, VF3, VF4, VF5, VF0, VF1, VF6, VF7,
+                               VF8, VF9, VF10, VF11, VF12, VF13, VF14,
+                               VF15, VF16, VF17, VF18, VF19, VF31, VF30,
+                               VF29, VF28, VF27, VF26, VF25, VF24, VF23,
+                               VF22, VF21, VF20)>;
+def VSFRC : RegisterClass<"PPC", [f64], 64, (add F8RC, VFRC)>;
+
 def CRBITRC : RegisterClass<"PPC", [i1], 32,
  (add CR2LT, CR2GT, CR2EQ, CR2UN,
       CR3LT, CR3GT, CR3EQ, CR3UN,
--- a/llvm/test/CodeGen/PowerPC/vsx-fma-m.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx-fma-m.ll
@ -64,7 +64,7 @@ entry:
  ret void

 ; CHECK-LABEL: @test3
-; CHECK-DAG: xxlor [[F1:[0-9]+]], 1, 1
+; CHECK-DAG: fmr [[F1:[0-9]+]], 1
 ; CHECK-DAG: li [[C1:[0-9]+]], 24
 ; CHECK-DAG: li [[C2:[0-9]+]], 16
 ; CHECK-DAG: li [[C3:[0-9]+]], 8
@ -80,7 +80,7 @@ entry:
 ; CHECK-DAG: stxsdx 2, 8, [[C1]]
 ; CHECK-DAG: stxsdx 1, 8, [[C2]]
 ; CHECK-DAG: stxsdx 4, 8, [[C3]]
-; CHECK-DAG: blr
+; CHECK: blr
 }

 define void @test4(double %a, double %b, double %c, double %e, double %f, double* nocapture %d) #0 {
@ -99,7 +99,7 @@ entry:
  ret void

 ; CHECK-LABEL: @test4
-; CHECK-DAG: xxlor [[F1:[0-9]+]], 1, 1
+; CHECK-DAG: fmr [[F1:[0-9]+]], 1
 ; CHECK-DAG: li [[C1:[0-9]+]], 8
 ; CHECK-DAG: li [[C2:[0-9]+]], 16
 ; CHECK-DAG: xsmaddmdp 4, 2, 1
@ -120,5 +120,119 @@ entry:

 declare double @llvm.fma.f64(double, double, double) #0

+define void @testv1(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %e, <2 x double>* nocapture %d) #0 {
+entry:
+  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %a)
+  store <2 x double> %0, <2 x double>* %d, align 8
+  %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %e, <2 x double> %a)
+  %arrayidx1 = getelementptr inbounds <2 x double>* %d, i64 1
+  store <2 x double> %1, <2 x double>* %arrayidx1, align 8
+  ret void
+
+; CHECK-LABEL: @testv1
+; CHECK-DAG: xvmaddmdp 36, 35, 34
+; CHECK-DAG: xvmaddadp 34, 35, 37
+; CHECK-DAG: li [[C1:[0-9]+]], 16
+; CHECK-DAG: stxvd2x 36, 0, 3
+; CHECK-DAG: stxvd2x 34, 3, [[C1:[0-9]+]]
+; CHECK: blr
+}
+
+define void @testv2(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %e, <2 x double> %f, <2 x double>* nocapture %d) #0 {
+entry:
+  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %a)
+  store <2 x double> %0, <2 x double>* %d, align 8
+  %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %e, <2 x double> %a)
+  %arrayidx1 = getelementptr inbounds <2 x double>* %d, i64 1
+  store <2 x double> %1, <2 x double>* %arrayidx1, align 8
+  %2 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %f, <2 x double> %a)
+  %arrayidx2 = getelementptr inbounds <2 x double>* %d, i64 2
+  store <2 x double> %2, <2 x double>* %arrayidx2, align 8
+  ret void
+
+; CHECK-LABEL: @testv2
+; CHECK-DAG: xvmaddmdp 36, 35, 34
+; CHECK-DAG: xvmaddmdp 37, 35, 34
+; CHECK-DAG: li [[C1:[0-9]+]], 16
+; CHECK-DAG: li [[C2:[0-9]+]], 32
+; CHECK-DAG: xvmaddadp 34, 35, 38
+; CHECK-DAG: stxvd2x 36, 0, 3
+; CHECK-DAG: stxvd2x 37, 3, [[C1:[0-9]+]]
+; CHECK-DAG: stxvd2x 34, 3, [[C2:[0-9]+]]
+; CHECK: blr
+}
+
+define void @testv3(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %e, <2 x double> %f, <2 x double>* nocapture %d) #0 {
+entry:
+  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %a)
+  store <2 x double> %0, <2 x double>* %d, align 8
+  %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %e, <2 x double> %a)
+  %2 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %1)
+  %arrayidx1 = getelementptr inbounds <2 x double>* %d, i64 3
+  store <2 x double> %2, <2 x double>* %arrayidx1, align 8
+  %3 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %f, <2 x double> %a)
+  %arrayidx2 = getelementptr inbounds <2 x double>* %d, i64 2
+  store <2 x double> %3, <2 x double>* %arrayidx2, align 8
+  %arrayidx3 = getelementptr inbounds <2 x double>* %d, i64 1
+  store <2 x double> %1, <2 x double>* %arrayidx3, align 8
+  ret void
+
+; CHECK-LABEL: @testv3
+; CHECK-DAG: xxlor [[V1:[0-9]+]], 34, 34
+; CHECK-DAG: xvmaddmdp 37, 35, 34
+; CHECK-DAG: li [[C1:[0-9]+]], 48
+; CHECK-DAG: li [[C2:[0-9]+]], 32
+; CHECK-DAG: xvmaddadp 34, 35, 38
+; CHECK-DAG: li [[C3:[0-9]+]], 16
+
+; Note: We could convert this next FMA to M-type as well, but it would require
+; re-ordering the instructions.
+; CHECK-DAG: xvmaddadp [[V1]], 35, 36
+
+; CHECK-DAG: xvmaddmdp 35, 36, 37
+; CHECK-DAG: stxvd2x 32, 0, 3
+; CHECK-DAG: stxvd2x 35, 3, [[C1]]
+; CHECK-DAG: stxvd2x 34, 3, [[C2]]
+; CHECK-DAG: stxvd2x 37, 3, [[C3]]
+; CHECK: blr
+}
+
+define void @testv4(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %e, <2 x double> %f, <2 x double>* nocapture %d) #0 {
+entry:
+  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %a)
+  store <2 x double> %0, <2 x double>* %d, align 8
+  %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %e, <2 x double> %a)
+  %arrayidx1 = getelementptr inbounds <2 x double>* %d, i64 1
+  store <2 x double> %1, <2 x double>* %arrayidx1, align 8
+  %2 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %1)
+  %arrayidx3 = getelementptr inbounds <2 x double>* %d, i64 3
+  store <2 x double> %2, <2 x double>* %arrayidx3, align 8
+  %3 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %f, <2 x double> %a)
+  %arrayidx4 = getelementptr inbounds <2 x double>* %d, i64 2
+  store <2 x double> %3, <2 x double>* %arrayidx4, align 8
+  ret void
+
+; CHECK-LABEL: @testv4
+; CHECK-DAG: xxlor [[V1:[0-9]+]], 34, 34
+; CHECK-DAG: xvmaddmdp 37, 35, 34
+; CHECK-DAG: li [[C1:[0-9]+]], 16
+; CHECK-DAG: li [[C2:[0-9]+]], 32
+; CHECK-DAG: xvmaddadp 34, 35, 38
+
+; Note: We could convert this next FMA to M-type as well, but it would require
+; re-ordering the instructions.
+; CHECK-DAG: xvmaddadp [[V1]], 35, 36
+
+; CHECK-DAG: stxvd2x 32, 0, 3
+; CHECK-DAG: stxvd2x 37, 3, [[C1]]
+; CHECK-DAG: li [[C3:[0-9]+]], 48
+; CHECK-DAG: xvmaddadp 37, 35, 36
+; CHECK-DAG: stxvd2x 37, 3, [[C3]]
+; CHECK-DAG: stxvd2x 34, 3, [[C2]]
+; CHECK: blr
+}
+
+declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #0
+
 attributes #0 = { nounwind readnone }

--- a/llvm/test/CodeGen/PowerPC/vsx-spill.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx-spill.ll
@ -0,0 +1,49 @@
+; RUN: llc -mcpu=pwr7 -mattr=+vsx < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define double @foo1(double %a) nounwind {
+entry:
+  call void asm sideeffect "", "~{f0},~{f1},~{f2},~{f3},~{f4},~{f5},~{f6},~{f7},~{f8},~{f9},~{f10},~{f11},~{f12},~{f13},~{f14},~{f15},~{f16},~{f17},~{f18},~{f19},~{f20},~{f21},~{f22},~{f23},~{f24},~{f25},~{f26},~{f27},~{f28},~{f29},~{f30},~{f31}"() nounwind
+  br label %return
+
+; CHECK: @foo1
+; CHECK: xxlor [[R1:[0-9]+]], 1, 1
+; CHECK: xxlor 1, [[R1]], [[R1]]
+; CHECK: blr
+
+return:                                           ; preds = %entry
+  ret double %a
+}
+
+define double @foo2(double %a) nounwind {
+entry:
+  %b = fadd double %a, %a
+  call void asm sideeffect "", "~{f0},~{f1},~{f2},~{f3},~{f4},~{f5},~{f6},~{f7},~{f8},~{f9},~{f10},~{f11},~{f12},~{f13},~{f14},~{f15},~{f16},~{f17},~{f18},~{f19},~{f20},~{f21},~{f22},~{f23},~{f24},~{f25},~{f26},~{f27},~{f28},~{f29},~{f30},~{f31}"() nounwind
+  br label %return
+
+; CHECK: @foo2
+; CHECK: {{xxlor|xsadddp}} [[R1:[0-9]+]], 1, 1
+; CHECK: {{xxlor|xsadddp}} 1, [[R1]], [[R1]]
+; CHECK: blr
+
+return:                                           ; preds = %entry
+  ret double %b
+}
+
+define double @foo3(double %a) nounwind {
+entry:
+  call void asm sideeffect "", "~{f0},~{f1},~{f2},~{f3},~{f4},~{f5},~{f6},~{f7},~{f8},~{f9},~{f10},~{f11},~{f12},~{f13},~{f14},~{f15},~{f16},~{f17},~{f18},~{f19},~{f20},~{f21},~{f22},~{f23},~{f24},~{f25},~{f26},~{f27},~{f28},~{f29},~{f30},~{f31},~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() nounwind
+  br label %return
+
+; CHECK: @foo3
+; CHECK: stxsdx 1,
+; CHECK: lxsdx [[R1:[0-9]+]],
+; CHECK: xsadddp 1, [[R1]], [[R1]]
+; CHECK: blr
+
+return:                                           ; preds = %entry
+  %b = fadd double %a, %a
+  ret double %b
+}
+