Power9] Implement new vsx instructions: compare and conversion

This change implements the following vsx instructions: Quad/Double-Precision Compare: xscmpoqp xscmpuqp xscmpexpdp xscmpexpqp xscmpeqdp xscmpgedp xscmpgtdp xscmpnedp xvcmpnedp(.) xvcmpnesp(.) Quad-Precision Floating-Point Conversion xscvqpdp(o) xscvdpqp xscvqpsdz xscvqpswz xscvqpudz xscvqpuwz xscvsdqp xscvudqp xscvdphp xscvhpdp xvcvhpsp xvcvsphp xsrqpi xsrqpix xsrqpxp 28 instructions Phabricator: http://reviews.llvm.org/D16709 llvm-svn: 262068
2016-02-26 21:11:55 +00:00 · 2016-02-26 21:11:55 +00:00 · 93612ec5f2
parent e50f744743
commit 93612ec5f2
8 changed files with 453 additions and 0 deletions
--- a/llvm/lib/Target/PowerPC/PPC.td
+++ b/llvm/lib/Target/PowerPC/PPC.td
@ -124,6 +124,12 @@ def FeatureP8Crypto : SubtargetFeature<"crypto", "HasP8Crypto", "true",
 def FeatureP8Vector  : SubtargetFeature<"power8-vector", "HasP8Vector", "true",
                                        "Enable POWER8 vector instructions",
                                        [FeatureVSX, FeatureP8Altivec]>;
+def FeatureP9Altivec : SubtargetFeature<"power9-altivec", "HasP9Altivec", "true",
+                                        "Enable POWER9 Altivec instructions",
+                                        [FeatureP8Altivec]>;
+def FeatureP9Vector  : SubtargetFeature<"power9-vector", "HasP9Vector", "true",
+                                        "Enable POWER9 vector instructions",
+                                        [FeatureP8Vector, FeatureP9Altivec]>;
 def FeatureDirectMove :
  SubtargetFeature<"direct-move", "HasDirectMove", "true",
                   "Enable Power8 direct move instructions",
--- a/llvm/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrFormats.td
@ -747,6 +747,13 @@ class XForm_htm3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
  let Inst{31}    = RC;
 }

+// e.g. [PO VRT XO VRB XO /] or [PO VRT XO VRB XO RO]
+class X_RD5_XO5_RS5<bits<6> opcode, bits<5> xo2, bits<10> xo, dag OOL, dag IOL,
+                    string asmstr, InstrItinClass itin, list<dag> pattern>
+  : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+  let A = xo2;
+}
+
 // XX*-Form (VSX)
 class XX1Form<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, 
              InstrItinClass itin, list<dag> pattern>
@ -820,6 +827,22 @@ class XX2Form_2<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr,
  let Inst{31}    = XT{5};
 }

+class XX2_RD6_XO5_RS6<bits<6> opcode, bits<5> xo2, bits<9> xo, dag OOL, dag IOL,
+                      string asmstr, InstrItinClass itin, list<dag> pattern>
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<6> XT;
+  bits<6> XB;
+
+  let Pattern = pattern;
+
+  let Inst{6-10}  = XT{4-0};
+  let Inst{11-15} = xo2;
+  let Inst{16-20} = XB{4-0};
+  let Inst{21-29} = xo;
+  let Inst{30}    = XB{5};
+  let Inst{31}    = XT{5};
+}
+
 class XX3Form<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, 
              InstrItinClass itin, list<dag> pattern>
  : I<opcode, OOL, IOL, asmstr, itin> {
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@ -1783,3 +1783,138 @@ def : Pat<(i64 (bitconvert f64:$S)),
 def : Pat<(f64 (bitconvert i64:$S)),
          (f64 (MTVSRD $S))>;
 }
+
+// The following VSX instructions were introduced in Power ISA 3.0
+def HasP9Vector : Predicate<"PPCSubTarget->hasP9Vector()">;
+let Predicates = [HasP9Vector] in {
+
+  // [PO VRT XO VRB XO /]
+  class X_VT5_XO5_VB5<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
+                      list<dag> pattern>
+    : X_RD5_XO5_RS5<opcode, xo2, xo, (outs vrrc:$vT), (ins vrrc:$vB),
+                    !strconcat(opc, " $vT, $vB"), IIC_VecFP, pattern>;
+
+  // [PO VRT XO VRB XO RO], Round to Odd version of [PO VRT XO VRB XO /]
+  class X_VT5_XO5_VB5_Ro<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
+                         list<dag> pattern>
+    : X_VT5_XO5_VB5<opcode, xo2, xo, opc, pattern>, isDOT;
+
+  // [PO VRT XO VRB XO /], but the VRB is only used the left 64 bits (or less),
+  // So we use different operand class for VRB
+  class X_VT5_XO5_VB5_TyVB<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
+                           RegisterOperand vbtype, list<dag> pattern>
+    : X_RD5_XO5_RS5<opcode, xo2, xo, (outs vrrc:$vT), (ins vbtype:$vB),
+                    !strconcat(opc, " $vT, $vB"), IIC_VecFP, pattern>;
+
+  // [PO T XO B XO BX TX]
+  class XX2_XT6_XO5_XB6<bits<6> opcode, bits<5> xo2, bits<9> xo, string opc,
+                        RegisterOperand vtype, list<dag> pattern>
+    : XX2_RD6_XO5_RS6<opcode, xo2, xo, (outs vtype:$XT), (ins vtype:$XB),
+                      !strconcat(opc, " $XT, $XB"), IIC_VecFP, pattern>;
+
+  // [PO T A B XO AX BX TX], src and dest register use different operand class
+  class XX3_XT5_XA5_XB5<bits<6> opcode, bits<8> xo, string opc,
+                  RegisterOperand xty, RegisterOperand aty, RegisterOperand bty,
+                  InstrItinClass itin, list<dag> pattern>
+    : XX3Form<opcode, xo, (outs xty:$XT), (ins aty:$XA, bty:$XB),
+              !strconcat(opc, " $XT, $XA, $XB"), itin, pattern>;
+
+  //===--------------------------------------------------------------------===//
+  // Quad/Double-Precision Compare Instructions:
+
+  // [PO BF // VRA VRB XO /]
+  class X_BF3_VA5_VB5<bits<6> opcode, bits<10> xo, string opc,
+                      list<dag> pattern>
+    : XForm_17<opcode, xo, (outs crrc:$crD), (ins vrrc:$VA, vrrc:$VB),
+               !strconcat(opc, " $crD, $VA, $VB"), IIC_FPCompare> {
+    let Pattern = pattern;
+  }
+
+  // QP Compare Ordered/Unordered
+  def XSCMPOQP : X_BF3_VA5_VB5<63, 132, "xscmpoqp", []>;
+  def XSCMPUQP : X_BF3_VA5_VB5<63, 644, "xscmpuqp", []>;
+
+  // DP/QP Compare Exponents
+  def XSCMPEXPDP : XX3Form_1<60, 59,
+                             (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
+                             "xscmpexpdp $crD, $XA, $XB", IIC_FPCompare, []>;
+  def XSCMPEXPQP : X_BF3_VA5_VB5<63, 164, "xscmpexpqp", []>;
+
+  // DP Compare ==, >=, >, !=
+  // Use vsrc for XT, because the entire register of XT is set.
+  // XT.dword[1] = 0x0000_0000_0000_0000
+  def XSCMPEQDP : XX3_XT5_XA5_XB5<60,  3, "xscmpeqdp", vsrc, vsfrc, vsfrc,
+                                  IIC_FPCompare, []>;
+  def XSCMPGEDP : XX3_XT5_XA5_XB5<60, 19, "xscmpgedp", vsrc, vsfrc, vsfrc,
+                                  IIC_FPCompare, []>;
+  def XSCMPGTDP : XX3_XT5_XA5_XB5<60, 11, "xscmpgtdp", vsrc, vsfrc, vsfrc,
+                                  IIC_FPCompare, []>;
+  def XSCMPNEDP : XX3_XT5_XA5_XB5<60, 27, "xscmpnedp", vsrc, vsfrc, vsfrc,
+                                  IIC_FPCompare, []>;
+  // Vector Compare Not Equal
+  def XVCMPNEDP  : XX3Form_Rc<60, 123,
+                              (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+                              "xvcmpnedp  $XT, $XA, $XB", IIC_VecFPCompare, []>;
+  let Defs = [CR6] in
+  def XVCMPNEDPo : XX3Form_Rc<60, 123,
+                              (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+                              "xvcmpnedp. $XT, $XA, $XB", IIC_VecFPCompare, []>,
+                              isDOT;
+  def XVCMPNESP  : XX3Form_Rc<60,  91,
+                              (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+                              "xvcmpnesp  $XT, $XA, $XB", IIC_VecFPCompare, []>;
+  let Defs = [CR6] in
+  def XVCMPNESPo : XX3Form_Rc<60,  91,
+                              (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+                              "xvcmpnesp. $XT, $XA, $XB", IIC_VecFPCompare, []>,
+                              isDOT;
+
+  //===--------------------------------------------------------------------===//
+  // Quad-Precision Floating-Point Conversion Instructions:
+
+  // Convert DP -> QP
+  def XSCVDPQP  : X_VT5_XO5_VB5_TyVB<63, 22, 836, "xscvdpqp", vsfrc, []>;
+
+  // Round & Convert QP -> DP (dword[1] is set to zero)
+  def XSCVQPDP  : X_VT5_XO5_VB5   <63, 20, 836, "xscvqpdp" , []>;
+  def XSCVQPDPO : X_VT5_XO5_VB5_Ro<63, 20, 836, "xscvqpdpo", []>;
+
+  // Truncate & Convert QP -> (Un)Signed (D)Word (dword[1] is set to zero)
+  def XSCVQPSDZ : X_VT5_XO5_VB5<63, 25, 836, "xscvqpsdz", []>;
+  def XSCVQPSWZ : X_VT5_XO5_VB5<63,  9, 836, "xscvqpswz", []>;
+  def XSCVQPUDZ : X_VT5_XO5_VB5<63, 17, 836, "xscvqpudz", []>;
+  def XSCVQPUWZ : X_VT5_XO5_VB5<63,  1, 836, "xscvqpuwz", []>;
+
+  // Convert (Un)Signed DWord -> QP
+  def XSCVSDQP  : X_VT5_XO5_VB5_TyVB<63, 10, 836, "xscvsdqp", vsfrc, []>;
+  def XSCVUDQP  : X_VT5_XO5_VB5_TyVB<63,  2, 836, "xscvudqp", vsfrc, []>;
+
+  //===--------------------------------------------------------------------===//
+  // Round to Floating-Point Integer Instructions
+
+  // (Round &) Convert DP <-> HP
+  // Note! xscvdphp's src and dest register both use the left 64 bits, so we use
+  // vsfrc for src and dest register. xscvhpdp's src only use the left 16 bits,
+  // but we still use vsfrc for it.
+  def XSCVDPHP : XX2_XT6_XO5_XB6<60, 17, 347, "xscvdphp", vsfrc, []>;
+  def XSCVHPDP : XX2_XT6_XO5_XB6<60, 16, 347, "xscvhpdp", vsfrc, []>;
+
+  // Vector HP -> SP
+  def XVCVHPSP : XX2_XT6_XO5_XB6<60, 24, 475, "xvcvhpsp", vsrc, []>;
+  def XVCVSPHP : XX2_XT6_XO5_XB6<60, 25, 475, "xvcvsphp", vsrc, []>;
+
+  class Z23_VT5_R1_VB5_RMC2_EX1<bits<6> opcode, bits<8> xo, bit ex, string opc,
+                                list<dag> pattern>
+    : Z23Form_1<opcode, xo,
+                (outs vrrc:$vT), (ins u1imm:$r, vrrc:$vB, u2imm:$rmc),
+                !strconcat(opc, " $r, $vT, $vB, $rmc"), IIC_VecFP, pattern> {
+    let RC = ex;
+  }
+
+  // Round to Quad-Precision Integer [with Inexact]
+  def XSRQPI   : Z23_VT5_R1_VB5_RMC2_EX1<63,  5, 0, "xsrqpi" , []>;
+  def XSRQPIX  : Z23_VT5_R1_VB5_RMC2_EX1<63,  5, 1, "xsrqpix", []>;
+
+  // Round Quad-Precision to Double-Extended Precision (fp80)
+  def XSRQPXP  : Z23_VT5_R1_VB5_RMC2_EX1<63, 37, 0, "xsrqpxp", []>;
+} // end HasP9Vector
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@ -70,6 +70,8 @@ void PPCSubtarget::initializeEnvironment() {
  HasP8Vector = false;
  HasP8Altivec = false;
  HasP8Crypto = false;
+  HasP9Vector = false;
+  HasP9Altivec = false;
  HasFCPSGN = false;
  HasFSQRT = false;
  HasFRE = false;
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h
@ -92,6 +92,8 @@ protected:
  bool HasP8Vector;
  bool HasP8Altivec;
  bool HasP8Crypto;
+  bool HasP9Vector;
+  bool HasP9Altivec;
  bool HasFCPSGN;
  bool HasFSQRT;
  bool HasFRE, HasFRES, HasFRSQRTE, HasFRSQRTES;
@ -230,6 +232,8 @@ public:
  bool hasP8Vector() const { return HasP8Vector; }
  bool hasP8Altivec() const { return HasP8Altivec; }
  bool hasP8Crypto() const { return HasP8Crypto; }
+  bool hasP9Vector() const { return HasP9Vector; }
+  bool hasP9Altivec() const { return HasP9Altivec; }
  bool hasMFOCRF() const { return HasMFOCRF; }
  bool hasISEL() const { return HasISEL; }
  bool hasPOPCNTD() const { return HasPOPCNTD; }
--- a/llvm/lib/Target/PowerPC/README_P9.txt
+++ b/llvm/lib/Target/PowerPC/README_P9.txt
@ -0,0 +1,87 @@
+//===- README_P9.txt - Notes for improving Power9 code gen ----------------===//
+
+TODO: Instructions Need Implement Instrinstics or Map to LLVM IR
+
+Altivec:
+
+VSX:
+
+- QP Compare Ordered/Unordered: xscmpoqp xscmpuqp
+  . ref: XSCMPUDP
+      def XSCMPUDP : XX3Form_1<60, 35,
+                               (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
+                               "xscmpudp $crD, $XA, $XB", IIC_FPCompare, []>;
+
+  . No SDAG, intrinsic, builtin are required??
+    Or llvm fcmp order/unorder compare??
+
+- DP/QP Compare Exponents: xscmpexpdp xscmpexpqp
+  . No SDAG, intrinsic, builtin are required?
+
+- DP Compare ==, >=, >, !=: xscmpeqdp xscmpgedp xscmpgtdp xscmpnedp
+  . I checked existing instruction "XSCMPUDP". They are different in target
+    register. "XSCMPUDP" write to CR field, xscmp*dp write to VSX register
+
+  . Use instrinsic:
+    (set i128:$XT, (int_ppc_vsx_xscmpeqdp f64:$XA, f64:$XB))
+    (set i128:$XT, (int_ppc_vsx_xscmpgedp f64:$XA, f64:$XB))
+    (set i128:$XT, (int_ppc_vsx_xscmpgtdp f64:$XA, f64:$XB))
+    (set i128:$XT, (int_ppc_vsx_xscmpnedp f64:$XA, f64:$XB))
+
+- Vector Compare Not Equal: xvcmpnedp xvcmpnedp. xvcmpnesp xvcmpnesp.
+  . Similar to xvcmpeqdp:
+      defm XVCMPEQDP : XX3Form_Rcr<60, 99,
+                                 "xvcmpeqdp", "$XT, $XA, $XB", IIC_VecFPCompare,
+                                 int_ppc_vsx_xvcmpeqdp, v2i64, v2f64>;
+
+  . So we should use "XX3Form_Rcr" to implement instrinsic
+
+- Convert DP -> QP: xscvdpqp
+  . Similar to XSCVDPSP:
+      def XSCVDPSP : XX2Form<60, 265,
+                          (outs vsfrc:$XT), (ins vsfrc:$XB),
+                          "xscvdpsp $XT, $XB", IIC_VecFP, []>;
+  . So, No SDAG, intrinsic, builtin are required??
+
+- Round & Convert QP -> DP (dword[1] is set to zero): xscvqpdp xscvqpdpo
+  . Similar to XSCVDPSP
+  . No SDAG, intrinsic, builtin are required??
+
+- Truncate & Convert QP -> (Un)Signed (D)Word (dword[1] is set to zero):
+  xscvqpsdz xscvqpswz xscvqpudz xscvqpuwz
+  . According to PowerISA_V3.0, these are similar to "XSCVDPSXDS", "XSCVDPSXWS",
+    "XSCVDPUXDS", "XSCVDPUXWS"
+
+  . DAG patterns:
+    (set f128:$XT, (PPCfctidz f128:$XB))    // xscvqpsdz
+    (set f128:$XT, (PPCfctiwz f128:$XB))    // xscvqpswz
+    (set f128:$XT, (PPCfctiduz f128:$XB))   // xscvqpudz
+    (set f128:$XT, (PPCfctiwuz f128:$XB))   // xscvqpuwz
+
+- Convert (Un)Signed DWord -> QP: xscvsdqp xscvudqp
+  . Similar to XSCVSXDSP
+  . (set f128:$XT, (PPCfcfids f64:$XB))     // xscvsdqp
+    (set f128:$XT, (PPCfcfidus f64:$XB))    // xscvudqp
+
+- (Round &) Convert DP <-> HP: xscvdphp xscvhpdp
+  . Similar to XSCVDPSP
+  . No SDAG, intrinsic, builtin are required??
+
+- Vector HP -> SP: xvcvhpsp xvcvsphp
+  . Similar to XVCVDPSP:
+      def XVCVDPSP : XX2Form<60, 393,
+                          (outs vsrc:$XT), (ins vsrc:$XB),
+                          "xvcvdpsp $XT, $XB", IIC_VecFP, []>;
+  . No SDAG, intrinsic, builtin are required??
+
+- Round to Quad-Precision Integer: xsrqpi xsrqpix
+  . These are combination of "XSRDPI", "XSRDPIC", "XSRDPIM", .., because you
+    need to assign rounding mode in instruction
+  . Provide builtin?
+    (set f128:$vT, (int_ppc_vsx_xsrqpi f128:$vB))
+    (set f128:$vT, (int_ppc_vsx_xsrqpix f128:$vB))
+
+- Round Quad-Precision to Double-Extended Precision (fp80): xsrqpxp
+  . Provide builtin?
+    (set f128:$vT, (int_ppc_vsx_xsrqpxp f128:$vB))
+
--- a/llvm/test/MC/Disassembler/PowerPC/vsx.txt
+++ b/llvm/test/MC/Disassembler/PowerPC/vsx.txt
@ -539,3 +539,89 @@

 # CHECK: mtvsrwz 0, 3
 0x7c 0x03 0x01 0xe6
+
+# Power9 Instructions:
+
+# CHECK: xscmpoqp 6, 31, 27
+0xff 0x1f 0xd9 0x08
+
+# CHECK: xscmpuqp 6, 31, 27
+0xff 0x1f 0xdd 0x08
+
+# CHECK: xscmpexpdp 6, 63, 27
+0xf3 0x1f 0xd9 0xdc
+
+# CHECK: xscmpexpqp 6, 31, 27
+0xff 0x1f 0xd9 0x48
+
+# CHECK: xscmpeqdp 7, 63, 27
+0xf0 0xff 0xd8 0x1c
+
+# CHECK: xscmpgedp 7, 63, 27
+0xf0 0xff 0xd8 0x9c
+
+# CHECK: xscmpgtdp 7, 63, 27
+0xf0 0xff 0xd8 0x5c
+
+# CHECK: xscmpnedp 7, 63, 27
+0xf0 0xff 0xd8 0xdc
+
+# CHECK: xvcmpnedp 7, 63, 27
+0xf0 0xff 0xdb 0xdc
+
+# CHECK: xvcmpnedp. 7, 63, 27
+0xf0 0xff 0xdf 0xdc
+
+# CHECK: xvcmpnesp 7, 63, 27
+0xf0 0xff 0xda 0xdc
+
+# CHECK: xvcmpnesp. 7, 63, 27
+0xf0 0xff 0xde 0xdc
+
+# CHECK: xscvdpqp 7, 27
+0xfc 0xf6 0xde 0x88
+
+# CHECK: xscvqpdp 7, 27
+0xfc 0xf4 0xde 0x88
+
+# CHECK: xscvqpdpo 7, 27
+0xfc 0xf4 0xde 0x89
+
+# CHECK: xscvqpsdz 7, 27
+0xfc 0xf9 0xde 0x88
+
+# CHECK: xscvqpswz 7, 27
+0xfc 0xe9 0xde 0x88
+
+# CHECK: xscvqpudz 7, 27
+0xfc 0xf1 0xde 0x88
+
+# CHECK: xscvqpuwz 7, 27
+0xfc 0xe1 0xde 0x88
+
+# CHECK: xscvsdqp 7, 27
+0xfc 0xea 0xde 0x88
+
+# CHECK: xscvudqp 7, 27
+0xfc 0xe2 0xde 0x88
+
+# CHECK: xscvdphp 7, 63
+0xf0 0xf1 0xfd 0x6e
+
+# CHECK: xscvhpdp 7, 63
+0xf0 0xf0 0xfd 0x6e
+
+# CHECK: xvcvhpsp 7, 63
+0xf0 0xf8 0xff 0x6e
+
+# CHECK: xvcvsphp 7, 63
+0xf0 0xf9 0xff 0x6e
+
+# CHECK: xsrqpi 1, 7, 27, 2
+0xfc 0xe1 0xdc 0x0a
+
+# CHECK: xsrqpix 1, 7, 27, 2
+0xfc 0xe1 0xdc 0x0b
+
+# CHECK: xsrqpxp 1, 7, 27, 2
+0xfc 0xe1 0xdc 0x4a
--- a/llvm/test/MC/PowerPC/vsx.s
+++ b/llvm/test/MC/PowerPC/vsx.s
@ -547,3 +547,113 @@
 # CHECK-BE: mtvsrwz 0, 3                       # encoding: [0x7c,0x03,0x01,0xe6]
 # CHECK-LE: mtvsrwz 0, 3                       # encoding: [0xe6,0x01,0x03,0x7c]
            mtvsrwz 0, 3
+
+# Power9 Instructions:
+
+# Compare Ordered/Unordered
+# CHECK-BE: xscmpoqp 6, 31, 27                 # encoding: [0xff,0x1f,0xd9,0x08]
+# CHECK-LE: xscmpoqp 6, 31, 27                 # encoding: [0x08,0xd9,0x1f,0xff]
+            xscmpoqp 6, 31, 27
+# CHECK-BE: xscmpuqp 6, 31, 27                 # encoding: [0xff,0x1f,0xdd,0x08]
+# CHECK-LE: xscmpuqp 6, 31, 27                 # encoding: [0x08,0xdd,0x1f,0xff]
+            xscmpuqp 6, 31, 27
+
+# Compare Exponents
+# CHECK-BE: xscmpexpdp 6, 63, 27               # encoding: [0xf3,0x1f,0xd9,0xdc]
+# CHECK-LE: xscmpexpdp 6, 63, 27               # encoding: [0xdc,0xd9,0x1f,0xf3]
+            xscmpexpdp 6, 63, 27
+# CHECK-BE: xscmpexpqp 6, 31, 27               # encoding: [0xff,0x1f,0xd9,0x48]
+# CHECK-LE: xscmpexpqp 6, 31, 27               # encoding: [0x48,0xd9,0x1f,0xff]
+            xscmpexpqp 6, 31, 27
+
+# Compare ==, >=, >, !=
+# CHECK-BE: xscmpeqdp 7, 63, 27                # encoding: [0xf0,0xff,0xd8,0x1c]
+# CHECK-LE: xscmpeqdp 7, 63, 27                # encoding: [0x1c,0xd8,0xff,0xf0]
+            xscmpeqdp 7, 63, 27
+# CHECK-BE: xscmpgedp 7, 63, 27                # encoding: [0xf0,0xff,0xd8,0x9c]
+# CHECK-LE: xscmpgedp 7, 63, 27                # encoding: [0x9c,0xd8,0xff,0xf0]
+            xscmpgedp 7, 63, 27
+# CHECK-BE: xscmpgtdp 7, 63, 27                # encoding: [0xf0,0xff,0xd8,0x5c]
+# CHECK-LE: xscmpgtdp 7, 63, 27                # encoding: [0x5c,0xd8,0xff,0xf0]
+            xscmpgtdp 7, 63, 27
+# CHECK-BE: xscmpnedp 7, 63, 27                # encoding: [0xf0,0xff,0xd8,0xdc]
+# CHECK-LE: xscmpnedp 7, 63, 27                # encoding: [0xdc,0xd8,0xff,0xf0]
+            xscmpnedp 7, 63, 27
+
+# Vector Compare Not Equal
+# CHECK-BE: xvcmpnedp 7, 63, 27                # encoding: [0xf0,0xff,0xdb,0xdc]
+# CHECK-LE: xvcmpnedp 7, 63, 27                # encoding: [0xdc,0xdb,0xff,0xf0]
+            xvcmpnedp 7, 63, 27
+# CHECK-BE: xvcmpnedp. 7, 63, 27               # encoding: [0xf0,0xff,0xdf,0xdc]
+# CHECK-LE: xvcmpnedp. 7, 63, 27               # encoding: [0xdc,0xdf,0xff,0xf0]
+            xvcmpnedp. 7, 63, 27
+# CHECK-BE: xvcmpnesp 7, 63, 27                # encoding: [0xf0,0xff,0xda,0xdc]
+# CHECK-LE: xvcmpnesp 7, 63, 27                # encoding: [0xdc,0xda,0xff,0xf0]
+            xvcmpnesp 7, 63, 27
+# CHECK-BE: xvcmpnesp. 7, 63, 27               # encoding: [0xf0,0xff,0xde,0xdc]
+# CHECK-LE: xvcmpnesp. 7, 63, 27               # encoding: [0xdc,0xde,0xff,0xf0]
+            xvcmpnesp. 7, 63, 27
+
+# Convert DP -> QP
+# CHECK-BE: xscvdpqp 7, 27                     # encoding: [0xfc,0xf6,0xde,0x88]
+# CHECK-LE: xscvdpqp 7, 27                     # encoding: [0x88,0xde,0xf6,0xfc]
+            xscvdpqp 7, 27
+
+# Round & Convert QP -> DP
+# CHECK-BE: xscvqpdp 7, 27                     # encoding: [0xfc,0xf4,0xde,0x88]
+# CHECK-LE: xscvqpdp 7, 27                     # encoding: [0x88,0xde,0xf4,0xfc]
+            xscvqpdp 7, 27
+# CHECK-BE: xscvqpdpo 7, 27                    # encoding: [0xfc,0xf4,0xde,0x89]
+# CHECK-LE: xscvqpdpo 7, 27                    # encoding: [0x89,0xde,0xf4,0xfc]
+            xscvqpdpo 7, 27
+
+# Truncate & Convert QP -> (Un)Signed (D)Word
+# CHECK-BE: xscvqpsdz 7, 27                    # encoding: [0xfc,0xf9,0xde,0x88]
+# CHECK-LE: xscvqpsdz 7, 27                    # encoding: [0x88,0xde,0xf9,0xfc]
+            xscvqpsdz 7, 27
+# CHECK-BE: xscvqpswz 7, 27                    # encoding: [0xfc,0xe9,0xde,0x88]
+# CHECK-LE: xscvqpswz 7, 27                    # encoding: [0x88,0xde,0xe9,0xfc]
+            xscvqpswz 7, 27
+# CHECK-BE: xscvqpudz 7, 27                    # encoding: [0xfc,0xf1,0xde,0x88]
+# CHECK-LE: xscvqpudz 7, 27                    # encoding: [0x88,0xde,0xf1,0xfc]
+            xscvqpudz 7, 27
+# CHECK-BE: xscvqpuwz 7, 27                    # encoding: [0xfc,0xe1,0xde,0x88]
+# CHECK-LE: xscvqpuwz 7, 27                    # encoding: [0x88,0xde,0xe1,0xfc]
+            xscvqpuwz 7, 27
+
+# Convert (Un)Signed DWord -> QP
+# CHECK-BE: xscvsdqp 7, 27                     # encoding: [0xfc,0xea,0xde,0x88]
+# CHECK-LE: xscvsdqp 7, 27                     # encoding: [0x88,0xde,0xea,0xfc]
+            xscvsdqp 7, 27
+# CHECK-BE: xscvudqp 7, 27                     # encoding: [0xfc,0xe2,0xde,0x88]
+# CHECK-LE: xscvudqp 7, 27                     # encoding: [0x88,0xde,0xe2,0xfc]
+            xscvudqp 7, 27
+
+# (Round &) Convert DP <-> HP
+# CHECK-BE: xscvdphp 7, 63                     # encoding: [0xf0,0xf1,0xfd,0x6e]
+# CHECK-LE: xscvdphp 7, 63                     # encoding: [0x6e,0xfd,0xf1,0xf0]
+            xscvdphp 7, 63
+# CHECK-BE: xscvhpdp 7, 63                     # encoding: [0xf0,0xf0,0xfd,0x6e]
+# CHECK-LE: xscvhpdp 7, 63                     # encoding: [0x6e,0xfd,0xf0,0xf0]
+            xscvhpdp 7, 63
+
+# HP -> SP
+# CHECK-BE: xvcvhpsp 7, 63                     # encoding: [0xf0,0xf8,0xff,0x6e]
+# CHECK-LE: xvcvhpsp 7, 63                     # encoding: [0x6e,0xff,0xf8,0xf0]
+            xvcvhpsp 7, 63
+# CHECK-BE: xvcvsphp 7, 63                     # encoding: [0xf0,0xf9,0xff,0x6e]
+# CHECK-LE: xvcvsphp 7, 63                     # encoding: [0x6e,0xff,0xf9,0xf0]
+            xvcvsphp 7, 63
+
+# Round to Quad-Precision Integer [with Inexact]
+# CHECK-BE: xsrqpi 1, 7, 27, 2                 # encoding: [0xfc,0xe1,0xdc,0x0a]
+# CHECK-LE: xsrqpi 1, 7, 27, 2                 # encoding: [0x0a,0xdc,0xe1,0xfc]
+            xsrqpi 1, 7, 27, 2
+# CHECK-BE: xsrqpix 1, 7, 27, 2                # encoding: [0xfc,0xe1,0xdc,0x0b]
+# CHECK-LE: xsrqpix 1, 7, 27, 2                # encoding: [0x0b,0xdc,0xe1,0xfc]
+            xsrqpix 1, 7, 27, 2
+
+# Round Quad-Precision to Double-Extended Precision
+# CHECK-BE: xsrqpxp 1, 7, 27, 2                # encoding: [0xfc,0xe1,0xdc,0x4a]
+# CHECK-LE: xsrqpxp 1, 7, 27, 2                # encoding: [0x4a,0xdc,0xe1,0xfc]
+            xsrqpxp 1, 7, 27, 2