In visitSTORE, always use FindBetterChain, rather than only when UseAA is enabled.

Recommiting after fixup of 32-bit aliasing sign offset bug in DAGCombiner.

    * Simplify Consecutive Merge Store Candidate Search

    Now that address aliasing is much less conservative, push through
    simplified store merging search and chain alias analysis which only
    checks for parallel stores through the chain subgraph. This is cleaner
    as the separation of non-interfering loads/stores from the
    store-merging logic.

    When merging stores search up the chain through a single load, and
    finds all possible stores by looking down from through a load and a
    TokenFactor to all stores visited.

    This improves the quality of the output SelectionDAG and the output
    Codegen (save perhaps for some ARM cases where we correctly constructs
    wider loads, but then promotes them to float operations which appear
    but requires more expensive constant generation).

    Some minor peephole optimizations to deal with improved SubDAG shapes (listed below)

    Additional Minor Changes:

      1. Finishes removing unused AliasLoad code

      2. Unifies the chain aggregation in the merged stores across code
         paths

      3. Re-add the Store node to the worklist after calling
         SimplifyDemandedBits.

      4. Increase GatherAllAliasesMaxDepth from 6 to 18. That number is
         arbitrary, but seems sufficient to not cause regressions in
         tests.

      5. Remove Chain dependencies of Memory operations on CopyfromReg
         nodes as these are captured by data dependence

      6. Forward loads-store values through tokenfactors containing
          {CopyToReg,CopyFromReg} Values.

      7. Peephole to convert buildvector of extract_vector_elt to
         extract_subvector if possible (see
         CodeGen/AArch64/store-merge.ll)

      8. Store merging for the ARM target is restricted to 32-bit as
         some in some contexts invalid 64-bit operations are being
         generated. This can be removed once appropriate checks are
         added.

    This finishes the change Matt Arsenault started in r246307 and
    jyknight's original patch.

    Many tests required some changes as memory operations are now
    reorderable, improving load-store forwarding. One test in
    particular is worth noting:

      CodeGen/PowerPC/ppc64-align-long-double.ll - Improved load-store
      forwarding converts a load-store pair into a parallel store and
      a memory-realized bitcast of the same value. However, because we
      lose the sharing of the explicit and implicit store values we
      must create another local store. A similar transformation
      happens before SelectionDAG as well.

    Reviewers: arsenm, hfinkel, tstellarAMD, jyknight, nhaehnle

llvm-svn: 296476
This commit is contained in:
Nirav Dave 2017-02-28 14:24:15 +00:00
parent 9f57a4f14a
commit f830dec3f2
74 changed files with 3085 additions and 3132 deletions

View File

@ -363,6 +363,9 @@ public:
return false;
}
/// Returns if it's reasonable to merge stores to MemVT size.
virtual bool canMergeStoresTo(EVT MemVT) const { return true; }
/// \brief Return true if it is cheap to speculate a call to intrinsic cttz.
virtual bool isCheapToSpeculateCttz() const {
return false;

File diff suppressed because it is too large Load Diff

View File

@ -850,7 +850,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
MinFunctionAlignment = 0;
PrefFunctionAlignment = 0;
PrefLoopAlignment = 0;
GatherAllAliasesMaxDepth = 6;
GatherAllAliasesMaxDepth = 18;
MinStackArgumentAlignment = 1;
// TODO: the default will be switched to 0 in the next commit, along
// with the Target-specific changes necessary.

View File

@ -9254,7 +9254,7 @@ static SDValue performSTORECombine(SDNode *N,
return SDValue();
}
/// This function handles the log2-shuffle pattern produced by the
/// This function handles the log2-shuffle pattern produced by the
/// LoopVectorizer for the across vector reduction. It consists of
/// log2(NumVectorElements) steps and, in each step, 2^(s) elements
/// are reduced, where s is an induction variable from 0 to

View File

@ -500,6 +500,11 @@ class InstrItineraryData;
bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
unsigned &Cost) const override;
bool canMergeStoresTo(EVT MemVT) const override {
// Do not merge to larger than i32.
return (MemVT.getSizeInBits() <= 32);
}
bool isCheapToSpeculateCttz() const override;
bool isCheapToSpeculateCtlz() const override;

View File

@ -59,10 +59,10 @@ define i64 @test_hfa_ignores_gprs([7 x float], [2 x float] %in, i64, i64 %res) {
}
; [2 x float] should not be promoted to double by the Darwin varargs handling,
; but should go in an 8-byte aligned slot.
; but should go in an 8-byte aligned slot and can be merged as integer stores.
define void @test_varargs_stackalign() {
; CHECK-LABEL: test_varargs_stackalign:
; CHECK-DARWINPCS: stp {{w[0-9]+}}, {{w[0-9]+}}, [sp, #16]
; CHECK-DARWINPCS: str {{x[0-9]+}}, [sp, #16]
call void(...) @callee([3 x float] undef, [2 x float] [float 1.0, float 2.0])
ret void

View File

@ -205,10 +205,7 @@ declare i32 @args_i32(i32, i32, i32, i32, i32, i32, i32, i32, i16 signext, i32,
define i32 @test8(i32 %argc, i8** nocapture %argv) nounwind {
entry:
; CHECK-LABEL: test8
; CHECK: strb {{w[0-9]+}}, [sp, #3]
; CHECK: strb wzr, [sp, #2]
; CHECK: strb {{w[0-9]+}}, [sp, #1]
; CHECK: strb wzr, [sp]
; CHECK: str w8, [sp]
; CHECK: bl
; FAST-LABEL: test8
; FAST: strb {{w[0-9]+}}, [sp]

View File

@ -13,8 +13,8 @@ define void @t2() nounwind ssp {
entry:
; CHECK-LABEL: t2:
; CHECK: strh wzr, [sp, #32]
; CHECK: stp xzr, xzr, [sp, #16]
; CHECK: str xzr, [sp, #8]
; CHECK: stp xzr, xzr, [sp, #8]
; CHECK: str xzr, [sp, #24]
%buf = alloca [26 x i8], align 1
%0 = getelementptr inbounds [26 x i8], [26 x i8]* %buf, i32 0, i32 0
call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 26, i32 1, i1 false)

View File

@ -99,7 +99,7 @@ define void @test_nospare([8 x i64], [8 x float], ...) {
; __stack field should point just past them.
define void @test_offsetstack([8 x i64], [2 x i64], [3 x float], ...) {
; CHECK-LABEL: test_offsetstack:
; CHECK: sub sp, sp, #80
; CHECK: stp {{q[0-9]+}}, {{q[0-9]+}}, [sp, #-80]!
; CHECK: add [[STACK_TOP:x[0-9]+]], sp, #96
; CHECK: add x[[VAR:[0-9]+]], {{x[0-9]+}}, :lo12:var
; CHECK: str [[STACK_TOP]], [x[[VAR]]]

View File

@ -4,8 +4,7 @@
@g0 = external global <3 x float>, align 16
@g1 = external global <3 x float>, align 4
; CHECK: ldr s[[R0:[0-9]+]], {{\[}}[[R1:x[0-9]+]]{{\]}}, #4
; CHECK: ld1{{\.?s?}} { v[[R0]]{{\.?s?}} }[1], {{\[}}[[R1]]{{\]}}
; CHECK: ldr q[[R0:[0-9]+]], {{\[}}[[R1:x[0-9]+]], :lo12:g0
; CHECK: str d[[R0]]
define void @blam() {

View File

@ -1,5 +1,4 @@
; RUN: llc --combiner-alias-analysis=false < %s | FileCheck %s
; RUN: llc --combiner-alias-analysis=true < %s | FileCheck %s
; RUN: llc < %s | FileCheck %s
; This test checks that we do not merge stores together which have
; dependencies through their non-chain operands (e.g. one store is the

View File

@ -1,13 +1,21 @@
; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+amdgpu-debugger-insert-nops -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+amdgpu-debugger-insert-nops -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK
; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+amdgpu-debugger-insert-nops -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECKNOP
; CHECK: test01.cl:2:{{[0-9]+}}
; CHECK-NEXT: s_nop 0
; This test expects that we have one instance for each line in some order with "s_nop 0" instances after each.
; CHECK: test01.cl:3:{{[0-9]+}}
; CHECK-NEXT: s_nop 0
; Check that each line appears at least once
; CHECK-DAG: test01.cl:2:3
; CHECK-DAG: test01.cl:3:3
; CHECK-DAG: test01.cl:4:3
; CHECK: test01.cl:4:{{[0-9]+}}
; CHECK-NEXT: s_nop 0
; Check that each of each of the lines consists of the line output, followed by "s_nop 0"
; CHECKNOP: test01.cl:{{[234]}}:3
; CHECKNOP-NEXT: s_nop 0
; CHECKNOP: test01.cl:{{[234]}}:3
; CHECKNOP-NEXT: s_nop 0
; CHECKNOP: test01.cl:{{[234]}}:3
; CHECKNOP-NEXT: s_nop 0
; CHECK: test01.cl:5:{{[0-9]+}}
; CHECK-NEXT: s_nop 0
@ -21,7 +29,7 @@ entry:
call void @llvm.dbg.declare(metadata i32 addrspace(1)** %A.addr, metadata !17, metadata !18), !dbg !19
%0 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4, !dbg !20
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %0, i32 0, !dbg !20
store i32 1, i32 addrspace(1)* %arrayidx, align 4, !dbg !21
store i32 1, i32 addrspace(1)* %arrayidx, align 4, !dbg !20
%1 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4, !dbg !22
%arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %1, i32 1, !dbg !22
store i32 2, i32 addrspace(1)* %arrayidx1, align 4, !dbg !23

View File

@ -255,11 +255,9 @@ define void @dynamic_insertelement_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> %a
; GCN: buffer_load_ubyte v{{[0-9]+}}, off
; GCN: buffer_load_ubyte v{{[0-9]+}}, off
; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:6
; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:5
; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:4
; GCN: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:5
; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:6
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte

View File

@ -1,8 +1,5 @@
; RUN: llc -march=amdgcn -verify-machineinstrs -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-NOAA %s
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-NOAA %s
; RUN: llc -march=amdgcn -verify-machineinstrs -combiner-alias-analysis -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -combiner-alias-analysis -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s
; RUN: llc -march=amdgcn -verify-machineinstrs -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s
; This test is mostly to test DAG store merging, so disable the vectorizer.
; Run with devices with different unaligned load restrictions.
@ -150,12 +147,7 @@ define void @merge_global_store_4_constants_f32(float addrspace(1)* %out) #0 {
}
; GCN-LABEL: {{^}}merge_global_store_4_constants_mixed_i32_f32:
; GCN-NOAA: buffer_store_dwordx4 v
; GCN-AA: buffer_store_dwordx2
; GCN-AA: buffer_store_dword v
; GCN-AA: buffer_store_dword v
; GCN-AA: buffer_store_dwordx4 v
; GCN: s_endpgm
define void @merge_global_store_4_constants_mixed_i32_f32(float addrspace(1)* %out) #0 {
%out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
@ -474,17 +466,9 @@ define void @merge_global_store_4_adjacent_loads_i8_natural_align(i8 addrspace(1
ret void
}
; This works once AA is enabled on the subtarget
; GCN-LABEL: {{^}}merge_global_store_4_vector_elts_loads_v4i32:
; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]]
; GCN-NOAA: buffer_store_dword v
; GCN-NOAA: buffer_store_dword v
; GCN-NOAA: buffer_store_dword v
; GCN-NOAA: buffer_store_dword v
; GCN-AA: buffer_store_dwordx4 [[LOAD]]
; GCN: buffer_store_dwordx4 [[LOAD]]
; GCN: s_endpgm
define void @merge_global_store_4_vector_elts_loads_v4i32(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
%out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1

View File

@ -32,10 +32,10 @@
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:40{{$}}
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:44{{$}}
; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:4{{$}}
; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:8{{$}}
; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:12{{$}}
; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:4{{$}}
; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:8{{$}}
; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:12{{$}}
define void @private_elt_size_v4i32(<4 x i32> addrspace(1)* %out, i32 addrspace(1)* %index.array) #0 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@ -130,8 +130,8 @@ entry:
; HSA-ELT8: private_element_size = 2
; HSA-ELT4: private_element_size = 1
; HSA-ELTGE8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:16
; HSA-ELTGE8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:24
; HSA-ELTGE8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, {{off|v[0-9]}}, s[0:3], s9 offset:1
; HSA-ELTGE8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, {{off|v[0-9]}}, s[0:3], s9 offset:2
; HSA-ELTGE8: buffer_load_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen

View File

@ -157,9 +157,8 @@ define void @reorder_global_load_local_store_global_load(i32 addrspace(1)* %out,
; FUNC-LABEL: @reorder_local_offsets
; CI: ds_read2_b32 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:100 offset1:102
; CI: ds_write2_b32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} offset0:3 offset1:100
; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:12
; CI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:408
; CI-DAG: ds_write2_b32 {{v[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:3 offset1:100
; CI-DAG: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:408
; CI: buffer_store_dword
; CI: s_endpgm
define void @reorder_local_offsets(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* noalias nocapture readnone %gptr, i32 addrspace(3)* noalias nocapture %ptr0) #0 {
@ -181,12 +180,12 @@ define void @reorder_local_offsets(i32 addrspace(1)* nocapture %out, i32 addrspa
}
; FUNC-LABEL: @reorder_global_offsets
; CI: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400
; CI: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:408
; CI: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12
; CI: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400
; CI: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:408
; CI: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12
; CI-DAG: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400
; CI-DAG: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:408
; CI-DAG: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12
; CI-DAG: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400
; CI-DAG: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:408
; CI: buffer_store_dword
; CI: s_endpgm
define void @reorder_global_offsets(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* noalias nocapture readnone %gptr, i32 addrspace(1)* noalias nocapture %ptr0) #0 {
%ptr1 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i32 3

View File

@ -12,7 +12,8 @@ define void @test_byval_8_bytes_alignment(i32 %i, ...) {
entry:
; CHECK: sub sp, sp, #12
; CHECK: sub sp, sp, #4
; CHECK: stmib sp, {r1, r2, r3}
; CHECK: add r0, sp, #4
; CHECK: stm sp, {r0, r1, r2, r3}
%g = alloca i8*
%g1 = bitcast i8** %g to i8*
call void @llvm.va_start(i8* %g1)

View File

@ -1,5 +1,4 @@
; RUN: llc < %s -mtriple=armv7-apple-ios -O0 | FileCheck %s -check-prefix=NO-REALIGN
; RUN: llc < %s -mtriple=armv7-apple-ios -O0 | FileCheck %s -check-prefix=REALIGN
; RUN: llc < %s -mtriple=armv7-apple-ios -O0 | FileCheck %s
; rdar://12713765
; When realign-stack is set to false, make sure we are not creating stack
@ -8,29 +7,31 @@
define void @test1(<16 x float>* noalias sret %agg.result) nounwind ssp "no-realign-stack" {
entry:
; NO-REALIGN-LABEL: test1
; NO-REALIGN: mov r[[R2:[0-9]+]], r[[R1:[0-9]+]]
; NO-REALIGN: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]!
; NO-REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #32
; NO-REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #48
; NO-REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1:[0-9]+]], #48
; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #32
; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
; NO-REALIGN: mov r[[R3:[0-9]+]], r[[R1]]
; NO-REALIGN: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R3]]:128]!
; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R3]]:128]
; NO-REALIGN: add r[[R2:[0-9]+]], r[[R0:0]], #48
; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
; NO-REALIGN: add r[[R2:[0-9]+]], r[[R0]], #32
; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
; NO-REALIGN: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]!
; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]
; CHECK-LABEL: test1
; CHECK: ldr r[[R1:[0-9]+]], [pc, r1]
; CHECK: add r[[R2:[0-9]+]], r1, #48
; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
; CHECK: mov r[[R2:[0-9]+]], r[[R1]]
; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]!
; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
; CHECK: add r[[R1:[0-9]+]], r[[R1]], #32
; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
; CHECK: mov r[[R1:[0-9]+]], sp
; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
; CHECK: add r[[R2:[0-9]+]], r[[R1]], #32
; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]!
; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]!
; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
; CHECK: add r[[R1:[0-9]+]], r0, #48
; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
; CHECK: add r[[R1:[0-9]+]], r0, #32
; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
; CHECK: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r0:128]!
; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r0:128]
%retval = alloca <16 x float>, align 16
%0 = load <16 x float>, <16 x float>* @T3_retval, align 16
store <16 x float> %0, <16 x float>* %retval
@ -41,32 +42,33 @@ entry:
define void @test2(<16 x float>* noalias sret %agg.result) nounwind ssp {
entry:
; REALIGN-LABEL: test2
; REALIGN: bfc sp, #0, #6
; REALIGN: mov r[[R2:[0-9]+]], r[[R1:[0-9]+]]
; REALIGN: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]!
; REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
; REALIGN: add r[[R2:[0-9]+]], r[[R1]], #32
; REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
; REALIGN: add r[[R2:[0-9]+]], r[[R1]], #48
; REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
; CHECK: ldr r[[R1:[0-9]+]], [pc, r1]
; CHECK: add r[[R2:[0-9]+]], r[[R1]], #48
; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
; CHECK: mov r[[R2:[0-9]+]], r[[R1]]
; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]!
; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
; CHECK: add r[[R1:[0-9]+]], r[[R1]], #32
; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
; CHECK: mov r[[R1:[0-9]+]], sp
; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
; CHECK: orr r[[R2:[0-9]+]], r[[R1]], #32
; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]!
; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]!
; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
; CHECK: add r[[R1:[0-9]+]], r0, #48
; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
; CHECK: add r[[R1:[0-9]+]], r0, #32
; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
; CHECK: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r0:128]!
; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r0:128]
; REALIGN: orr r[[R2:[0-9]+]], r[[R1:[0-9]+]], #48
; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
; REALIGN: orr r[[R2:[0-9]+]], r[[R1]], #32
; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
; REALIGN: orr r[[R2:[0-9]+]], r[[R1]], #16
; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
; REALIGN: add r[[R1:[0-9]+]], r[[R0:0]], #48
; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
; REALIGN: add r[[R1:[0-9]+]], r[[R0]], #32
; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
; REALIGN: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]!
; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]
%retval = alloca <16 x float>, align 16
%retval = alloca <16 x float>, align 16
%0 = load <16 x float>, <16 x float>* @T3_retval, align 16
store <16 x float> %0, <16 x float>* %retval
%1 = load <16 x float>, <16 x float>* %retval

View File

@ -16,22 +16,22 @@ define void @foo(i64* %addr) {
; an LDMIA was created with both a FrameIndex and an offset, which
; is not allowed.
; CHECK-WITH-LDRD: strd {{r[0-9]+}}, {{r[0-9]+}}, [sp, #8]
; CHECK-WITH-LDRD: strd {{r[0-9]+}}, {{r[0-9]+}}, [sp]
; CHECK-WITH-LDRD-DAG: strd {{r[0-9]+}}, {{r[0-9]+}}, [sp, #8]
; CHECK-WITH-LDRD-DAG: strd {{r[0-9]+}}, {{r[0-9]+}}, [sp]
; CHECK-WITH-LDRD: ldrd {{r[0-9]+}}, {{r[0-9]+}}, [sp, #8]
; CHECK-WITH-LDRD: ldrd {{r[0-9]+}}, {{r[0-9]+}}, [sp]
; CHECK-WITH-LDRD-DAG: ldrd {{r[0-9]+}}, {{r[0-9]+}}, [sp, #8]
; CHECK-WITH-LDRD-DAG: ldrd {{r[0-9]+}}, {{r[0-9]+}}, [sp]
; We also want to ensure the register scavenger is working (i.e. an
; offset from sp can be generated), so we need two spills.
; CHECK-WITHOUT-LDRD: add [[ADDRREG:[a-z0-9]+]], sp, #{{[0-9]+}}
; CHECK-WITHOUT-LDRD: stm [[ADDRREG]], {r{{[0-9]+}}, r{{[0-9]+}}}
; CHECK-WITHOUT-LDRD: stm sp, {r{{[0-9]+}}, r{{[0-9]+}}}
; CHECK-WITHOUT-LDRD-DAG: add [[ADDRREG:[a-z0-9]+]], sp, #{{[0-9]+}}
; CHECK-WITHOUT-LDRD-DAG: stm [[ADDRREG]], {r{{[0-9]+}}, r{{[0-9]+}}}
; CHECK-WITHOUT-LDRD-DAG: stm sp, {r{{[0-9]+}}, r{{[0-9]+}}}
; In principle LLVM may have to recalculate the offset. At the moment
; it reuses the original though.
; CHECK-WITHOUT-LDRD: ldm [[ADDRREG]], {r{{[0-9]+}}, r{{[0-9]+}}}
; CHECK-WITHOUT-LDRD: ldm sp, {r{{[0-9]+}}, r{{[0-9]+}}}
; CHECK-WITHOUT-LDRD-DAG: ldm [[ADDRREG]], {r{{[0-9]+}}, r{{[0-9]+}}}
; CHECK-WITHOUT-LDRD-DAG: ldm sp, {r{{[0-9]+}}, r{{[0-9]+}}}
store volatile i64 %val1, i64* %addr
store volatile i64 %val2, i64* %addr

View File

@ -9,8 +9,6 @@ entry:
; CHECK-LABEL: t:
; CHECK: vpop {d8}
; CHECK-NOT: vpopne
; CHECK: pop {r7, pc}
; CHECK: vpop {d8}
; CHECK: pop {r7, pc}
br i1 undef, label %if.else, label %if.then

View File

@ -6,9 +6,7 @@ define void @i24_or(i24* %a) {
; LE-LABEL: i24_or:
; LE: @ BB#0:
; LE-NEXT: ldrh r1, [r0]
; LE-NEXT: ldrb r2, [r0, #2]
; LE-NEXT: orr r1, r1, #384
; LE-NEXT: strb r2, [r0, #2]
; LE-NEXT: strh r1, [r0]
; LE-NEXT: mov pc, lr
;
@ -114,14 +112,9 @@ define void @i24_insert_bit(i24* %a, i1 zeroext %bit) {
define void @i56_or(i56* %a) {
; LE-LABEL: i56_or:
; LE: @ BB#0:
; LE-NEXT: mov r2, r0
; LE-NEXT: ldr r12, [r0]
; LE-NEXT: ldrh r3, [r2, #4]!
; LE-NEXT: ldrb r1, [r2, #2]
; LE-NEXT: strb r1, [r2, #2]
; LE-NEXT: orr r1, r12, #384
; LE-NEXT: ldr r1, [r0]
; LE-NEXT: orr r1, r1, #384
; LE-NEXT: str r1, [r0]
; LE-NEXT: strh r3, [r2]
; LE-NEXT: mov pc, lr
;
; BE-LABEL: i56_or:
@ -149,36 +142,29 @@ define void @i56_or(i56* %a) {
define void @i56_and_or(i56* %a) {
; LE-LABEL: i56_and_or:
; LE: @ BB#0:
; LE-NEXT: mov r2, r0
; LE-NEXT: ldr r1, [r0]
; LE-NEXT: ldrh r12, [r2, #4]!
; LE-NEXT: orr r1, r1, #384
; LE-NEXT: ldrb r3, [r2, #2]
; LE-NEXT: bic r1, r1, #127
; LE-NEXT: strb r3, [r2, #2]
; LE-NEXT: str r1, [r0]
; LE-NEXT: strh r12, [r2]
; LE-NEXT: mov pc, lr
;
; BE-LABEL: i56_and_or:
; BE: @ BB#0:
; BE-NEXT: .save {r11, lr}
; BE-NEXT: push {r11, lr}
; BE-NEXT: mov r2, r0
; BE-NEXT: ldr lr, [r0]
; BE-NEXT: mov r1, r0
; BE-NEXT: mov r3, #128
; BE-NEXT: ldrh r12, [r2, #4]!
; BE-NEXT: strb r3, [r2, #2]
; BE-NEXT: lsl r3, r12, #8
; BE-NEXT: orr r3, r3, lr, lsl #24
; BE-NEXT: orr r3, r3, #384
; BE-NEXT: lsr r1, r3, #8
; BE-NEXT: strh r1, [r2]
; BE-NEXT: bic r1, lr, #255
; BE-NEXT: orr r1, r1, r3, lsr #24
; BE-NEXT: ldrh r2, [r1, #4]!
; BE-NEXT: strb r3, [r1, #2]
; BE-NEXT: lsl r2, r2, #8
; BE-NEXT: ldr r12, [r0]
; BE-NEXT: orr r2, r2, r12, lsl #24
; BE-NEXT: orr r2, r2, #384
; BE-NEXT: lsr r3, r2, #8
; BE-NEXT: strh r3, [r1]
; BE-NEXT: bic r1, r12, #255
; BE-NEXT: orr r1, r1, r2, lsr #24
; BE-NEXT: str r1, [r0]
; BE-NEXT: pop {r11, lr}
; BE-NEXT: mov pc, lr
%b = load i56, i56* %a, align 1
%c = and i56 %b, -128
%d = or i56 %c, 384
@ -189,18 +175,10 @@ define void @i56_and_or(i56* %a) {
define void @i56_insert_bit(i56* %a, i1 zeroext %bit) {
; LE-LABEL: i56_insert_bit:
; LE: @ BB#0:
; LE-NEXT: .save {r11, lr}
; LE-NEXT: push {r11, lr}
; LE-NEXT: mov r3, r0
; LE-NEXT: ldr lr, [r0]
; LE-NEXT: ldrh r12, [r3, #4]!
; LE-NEXT: ldrb r2, [r3, #2]
; LE-NEXT: strb r2, [r3, #2]
; LE-NEXT: bic r2, lr, #8192
; LE-NEXT: ldr r2, [r0]
; LE-NEXT: bic r2, r2, #8192
; LE-NEXT: orr r1, r2, r1, lsl #13
; LE-NEXT: str r1, [r0]
; LE-NEXT: strh r12, [r3]
; LE-NEXT: pop {r11, lr}
; LE-NEXT: mov pc, lr
;
; BE-LABEL: i56_insert_bit:

View File

@ -6,9 +6,9 @@ define void @multiple_store() {
; CHECK: movs [[VAL:r[0-9]+]], #42
; CHECK: movt r[[BASE1]], #15
; CHECK: str [[VAL]], [r[[BASE1]]]
; CHECK: str [[VAL]], [r[[BASE1]], #24]
; CHECK: str.w [[VAL]], [r[[BASE1]], #42]
; CHECK-DAG: str [[VAL]], [r[[BASE1]]]
; CHECK-DAG: str [[VAL]], [r[[BASE1]], #24]
; CHECK-DAG: str.w [[VAL]], [r[[BASE1]], #42]
; CHECK: movw r[[BASE2:[0-9]+]], #20394
; CHECK: movt r[[BASE2]], #18

View File

@ -13,50 +13,55 @@
; Function Attrs: nounwind uwtable
define i32 @ebpf_filter(%struct.__sk_buff* nocapture readnone %ebpf_packet) #0 section "socket1" {
; CHECK: r2 = r10
; CHECK: r2 += -2
; CHECK: r1 = 0
; CHECK: *(u16 *)(r2 + 6) = r1
; CHECK: *(u16 *)(r2 + 4) = r1
; CHECK: *(u16 *)(r2 + 2) = r1
; CHECK: r2 = 6
; CHECK: *(u8 *)(r10 - 7) = r2
; CHECK: r2 = 5
; CHECK: *(u8 *)(r10 - 8) = r2
; CHECK: r2 = 7
; CHECK: *(u8 *)(r10 - 6) = r2
; CHECK: r2 = 8
; CHECK: *(u8 *)(r10 - 5) = r2
; CHECK: r2 = 9
; CHECK: *(u8 *)(r10 - 4) = r2
; CHECK: r2 = 10
; CHECK: *(u8 *)(r10 - 3) = r2
; CHECK: *(u16 *)(r10 + 24) = r1
; CHECK: *(u16 *)(r10 + 22) = r1
; CHECK: *(u16 *)(r10 + 20) = r1
; CHECK: *(u16 *)(r10 + 18) = r1
; CHECK: *(u16 *)(r10 + 16) = r1
; CHECK: *(u16 *)(r10 + 14) = r1
; CHECK: *(u16 *)(r10 + 12) = r1
; CHECK: *(u16 *)(r10 + 10) = r1
; CHECK: *(u16 *)(r10 + 8) = r1
; CHECK: *(u16 *)(r10 + 6) = r1
; CHECK: *(u16 *)(r10 - 2) = r1
; CHECK: *(u16 *)(r10 + 26) = r1
; CHECK: r2 = r10
; CHECK: r2 += -8
; CHECK: r1 = <MCOperand Expr:(routing)>ll
; CHECK: call bpf_map_lookup_elem
; CHECK: exit
%key = alloca %struct.routing_key_2, align 1
%1 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 0, i32 0, i64 0
; CHECK: r1 = 5
; CHECK: *(u8 *)(r10 - 8) = r1
store i8 5, i8* %1, align 1
%2 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 0, i32 0, i64 1
; CHECK: r1 = 6
; CHECK: *(u8 *)(r10 - 7) = r1
store i8 6, i8* %2, align 1
%3 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 0, i32 0, i64 2
; CHECK: r1 = 7
; CHECK: *(u8 *)(r10 - 6) = r1
store i8 7, i8* %3, align 1
%4 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 0, i32 0, i64 3
; CHECK: r1 = 8
; CHECK: *(u8 *)(r10 - 5) = r1
store i8 8, i8* %4, align 1
%5 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 0, i32 0, i64 4
; CHECK: r1 = 9
; CHECK: *(u8 *)(r10 - 4) = r1
store i8 9, i8* %5, align 1
%6 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 0, i32 0, i64 5
; CHECK: r1 = 10
; CHECK: *(u8 *)(r10 - 3) = r1
store i8 10, i8* %6, align 1
%7 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 1, i32 0, i64 0
; CHECK: r1 = r10
; CHECK: r1 += -2
; CHECK: r2 = 0
; CHECK: *(u16 *)(r1 + 6) = r2
; CHECK: *(u16 *)(r1 + 4) = r2
; CHECK: *(u16 *)(r1 + 2) = r2
; CHECK: *(u16 *)(r10 + 24) = r2
; CHECK: *(u16 *)(r10 + 22) = r2
; CHECK: *(u16 *)(r10 + 20) = r2
; CHECK: *(u16 *)(r10 + 18) = r2
; CHECK: *(u16 *)(r10 + 16) = r2
; CHECK: *(u16 *)(r10 + 14) = r2
; CHECK: *(u16 *)(r10 + 12) = r2
; CHECK: *(u16 *)(r10 + 10) = r2
; CHECK: *(u16 *)(r10 + 8) = r2
; CHECK: *(u16 *)(r10 + 6) = r2
; CHECK: *(u16 *)(r10 - 2) = r2
; CHECK: *(u16 *)(r10 + 26) = r2
call void @llvm.memset.p0i8.i64(i8* %7, i8 0, i64 30, i32 1, i1 false)
%8 = call i32 (%struct.bpf_map_def*, %struct.routing_key_2*, ...) bitcast (i32 (...)* @bpf_map_lookup_elem to i32 (%struct.bpf_map_def*, %struct.routing_key_2*, ...)*)(%struct.bpf_map_def* nonnull @routing, %struct.routing_key_2* nonnull %key) #3
ret i32 undef

View File

@ -1,4 +1,4 @@
; RUN: llc -march=msp430 -combiner-alias-analysis < %s | FileCheck %s
; RUN: llc -march=msp430 < %s | FileCheck %s
target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
target triple = "msp430-generic-generic"
@foo = common global i16 0, align 2

View File

@ -63,39 +63,39 @@ entry:
; NEW-DAG: sd $5, 16([[R2]])
; O32 has run out of argument registers and starts using the stack
; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 24($sp)
; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 28($sp)
; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 16($sp)
; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 20($sp)
; O32-DAG: sw [[R3]], 24([[R2]])
; O32-DAG: sw [[R4]], 28([[R2]])
; NEW-DAG: sd $6, 24([[R2]])
; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 32($sp)
; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 36($sp)
; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 24($sp)
; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 28($sp)
; O32-DAG: sw [[R3]], 32([[R2]])
; O32-DAG: sw [[R4]], 36([[R2]])
; NEW-DAG: sd $7, 32([[R2]])
; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 40($sp)
; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 44($sp)
; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 32($sp)
; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 36($sp)
; O32-DAG: sw [[R3]], 40([[R2]])
; O32-DAG: sw [[R4]], 44([[R2]])
; NEW-DAG: sd $8, 40([[R2]])
; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 48($sp)
; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 52($sp)
; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 40($sp)
; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 44($sp)
; O32-DAG: sw [[R3]], 48([[R2]])
; O32-DAG: sw [[R4]], 52([[R2]])
; NEW-DAG: sd $9, 48([[R2]])
; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 56($sp)
; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 60($sp)
; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 48($sp)
; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 52($sp)
; O32-DAG: sw [[R3]], 56([[R2]])
; O32-DAG: sw [[R4]], 60([[R2]])
; NEW-DAG: sd $10, 56([[R2]])
; N32/N64 have run out of registers and starts using the stack too
; O32-DAG: lw [[R3:\$[0-9]+]], 64($sp)
; O32-DAG: lw [[R4:\$[0-9]+]], 68($sp)
; O32-DAG: lw [[R3:\$[0-9]+]], 56($sp)
; O32-DAG: lw [[R4:\$[0-9]+]], 60($sp)
; O32-DAG: sw [[R3]], 64([[R2]])
; O32-DAG: sw [[R4]], 68([[R2]])
; NEW-DAG: ld [[R3:\$[0-9]+]], 0($sp)

View File

@ -315,12 +315,11 @@ entry:
; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte
; order.
; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords)
; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA_TMP2]])
; O32-DAG: sw [[ARG1]], 8([[GV]])
; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
; O32-DAG: sw [[VA2]], 0([[SP]])
; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
; O32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 4
; O32-DAG: sw [[VA3]], 0([[SP]])
; O32-DAG: lw [[ARG1:\$[0-9]+]], 4([[VA_TMP2]])
; O32-DAG: sw [[ARG1]], 12([[GV]])
; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords)
@ -349,10 +348,9 @@ entry:
; Load the second argument from the variable portion and copy it to the global.
; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]])
; O32-DAG: sw [[ARG2]], 16([[GV]])
; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
; O32-DAG: sw [[VA2]], 0([[SP]])
; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]])
; O32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 4
; O32-DAG: sw [[VA3]], 0([[SP]])
; O32-DAG: lw [[ARG2:\$[0-9]+]], 4([[VA_TMP2]])
; O32-DAG: sw [[ARG2]], 20([[GV]])
; NEW-DAG: ld [[ARG2:\$[0-9]+]], 0([[VA2]])
@ -678,12 +676,11 @@ entry:
; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte
; order.
; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords)
; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA_TMP2]])
; O32-DAG: sw [[ARG1]], 8([[GV]])
; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
; O32-DAG: sw [[VA2]], 0([[SP]])
; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
; O32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 4
; O32-DAG: sw [[VA3]], 0([[SP]])
; O32-DAG: lw [[ARG1:\$[0-9]+]], 4([[VA_TMP2]])
; O32-DAG: sw [[ARG1]], 12([[GV]])
; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords)
@ -712,10 +709,9 @@ entry:
; Load the second argument from the variable portion and copy it to the global.
; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]])
; O32-DAG: sw [[ARG2]], 16([[GV]])
; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
; O32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 4
; O32-DAG: sw [[VA2]], 0([[SP]])
; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]])
; O32-DAG: lw [[ARG2:\$[0-9]+]], 4([[VA_TMP2]])
; O32-DAG: sw [[ARG2]], 20([[GV]])
; NEW-DAG: ld [[ARG2:\$[0-9]+]], 0([[VA2]])
@ -1040,10 +1036,9 @@ entry:
; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords)
; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
; O32-DAG: sw [[ARG1]], 8([[GV]])
; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
; O32-DAG: sw [[VA2]], 0([[SP]])
; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
; O32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 4
; O32-DAG: sw [[VA3]], 0([[SP]])
; O32-DAG: lw [[ARG1:\$[0-9]+]], 4([[VA_TMP2]])
; O32-DAG: sw [[ARG1]], 12([[GV]])
; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords)
@ -1072,10 +1067,9 @@ entry:
; Load the second argument from the variable portion and copy it to the global.
; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]])
; O32-DAG: sw [[ARG2]], 16([[GV]])
; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
; O32-DAG: sw [[VA2]], 0([[SP]])
; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]])
; O32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 4
; O32-DAG: sw [[VA3]], 0([[SP]])
; O32-DAG: lw [[ARG2:\$[0-9]+]], 4([[VA_TMP2]])
; O32-DAG: sw [[ARG2]], 20([[GV]])
; NEW-DAG: ld [[ARG2:\$[0-9]+]], 0([[VA2]])

View File

@ -132,20 +132,19 @@ entry:
define internal fastcc void @callee0(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8, i32 %a9, i32 %a10, i32 %a11, i32 %a12, i32 %a13, i32 %a14, i32 %a15, i32 %a16) nounwind noinline {
entry:
; CHECK: callee0
; CHECK: sw $4
; CHECK: sw $5
; CHECK: sw $6
; CHECK: sw $7
; CHECK: sw $8
; CHECK: sw $9
; CHECK: sw $10
; CHECK: sw $11
; CHECK: sw $12
; CHECK: sw $13
; CHECK: sw $14
; CHECK: sw $15
; CHECK: sw $24
; CHECK: sw $3
; CHECK-DAG: sw $4
; CHECK-DAG: sw $5
; CHECK-DAG: sw $7
; CHECK-DAG: sw $8
; CHECK-DAG: sw $9
; CHECK-DAG: sw $10
; CHECK-DAG: sw $11
; CHECK-DAG: sw $12
; CHECK-DAG: sw $13
; CHECK-DAG: sw $14
; CHECK-DAG: sw $15
; CHECK-DAG: sw $24
; CHECK-DAG: sw $3
; t6, t7 and t8 are reserved in NaCl and cannot be used for fastcc.
; CHECK-NACL-NOT: sw $14
@ -223,27 +222,27 @@ entry:
define internal fastcc void @callee1(float %a0, float %a1, float %a2, float %a3, float %a4, float %a5, float %a6, float %a7, float %a8, float %a9, float %a10, float %a11, float %a12, float %a13, float %a14, float %a15, float %a16, float %a17, float %a18, float %a19, float %a20) nounwind noinline {
entry:
; CHECK: callee1
; CHECK: swc1 $f0
; CHECK: swc1 $f1
; CHECK: swc1 $f2
; CHECK: swc1 $f3
; CHECK: swc1 $f4
; CHECK: swc1 $f5
; CHECK: swc1 $f6
; CHECK: swc1 $f7
; CHECK: swc1 $f8
; CHECK: swc1 $f9
; CHECK: swc1 $f10
; CHECK: swc1 $f11
; CHECK: swc1 $f12
; CHECK: swc1 $f13
; CHECK: swc1 $f14
; CHECK: swc1 $f15
; CHECK: swc1 $f16
; CHECK: swc1 $f17
; CHECK: swc1 $f18
; CHECK: swc1 $f19
; CHECK-LABEL: callee1:
; CHECK-DAG: swc1 $f0
; CHECK-DAG: swc1 $f1
; CHECK-DAG: swc1 $f2
; CHECK-DAG: swc1 $f3
; CHECK-DAG: swc1 $f4
; CHECK-DAG: swc1 $f5
; CHECK-DAG: swc1 $f6
; CHECK-DAG: swc1 $f7
; CHECK-DAG: swc1 $f8
; CHECK-DAG: swc1 $f9
; CHECK-DAG: swc1 $f10
; CHECK-DAG: swc1 $f11
; CHECK-DAG: swc1 $f12
; CHECK-DAG: swc1 $f13
; CHECK-DAG: swc1 $f14
; CHECK-DAG: swc1 $f15
; CHECK-DAG: swc1 $f16
; CHECK-DAG: swc1 $f17
; CHECK-DAG: swc1 $f18
; CHECK-DAG: swc1 $f19
store float %a0, float* @gf0, align 4
store float %a1, float* @gf1, align 4
@ -316,8 +315,6 @@ entry:
; NOODDSPREG-LABEL: callee2:
; NOODDSPREG: addiu $sp, $sp, -[[OFFSET:[0-9]+]]
; Check that first 10 arguments are received in even float registers
; f0, f2, ... , f18. Check that 11th argument is received on stack.
@ -333,7 +330,7 @@ entry:
; NOODDSPREG-DAG: swc1 $f16, 32($[[R0]])
; NOODDSPREG-DAG: swc1 $f18, 36($[[R0]])
; NOODDSPREG-DAG: lwc1 $[[F0:f[0-9]*[02468]]], [[OFFSET]]($sp)
; NOODDSPREG-DAG: lwc1 $[[F0:f[0-9]*[02468]]], 0($sp)
; NOODDSPREG-DAG: swc1 $[[F0]], 40($[[R0]])
store float %a0, float* getelementptr ([11 x float], [11 x float]* @fa, i32 0, i32 0), align 4
@ -397,7 +394,6 @@ entry:
; FP64-NOODDSPREG-LABEL: callee3:
; FP64-NOODDSPREG: addiu $sp, $sp, -[[OFFSET:[0-9]+]]
; Check that first 10 arguments are received in even float registers
; f0, f2, ... , f18. Check that 11th argument is received on stack.
@ -414,7 +410,7 @@ entry:
; FP64-NOODDSPREG-DAG: sdc1 $f16, 64($[[R0]])
; FP64-NOODDSPREG-DAG: sdc1 $f18, 72($[[R0]])
; FP64-NOODDSPREG-DAG: ldc1 $[[F0:f[0-9]*[02468]]], [[OFFSET]]($sp)
; FP64-NOODDSPREG-DAG: ldc1 $[[F0:f[0-9]*[02468]]], 0($sp)
; FP64-NOODDSPREG-DAG: sdc1 $[[F0]], 80($[[R0]])
store double %a0, double* getelementptr ([11 x double], [11 x double]* @da, i32 0, i32 0), align 8

View File

@ -250,12 +250,18 @@ entry:
; MIPS64-EB: ld $[[PTR:[0-9]+]], %got_disp(struct_s0)(
; MIPS64R6: ld $[[PTR:[0-9]+]], %got_disp(struct_s0)(
; FIXME: We should be able to do better than this on MIPS32r6/MIPS64r6 since
; we have unaligned halfword load/store available
; ALL-DAG: lbu $[[R1:[0-9]+]], 0($[[PTR]])
; ALL-DAG: sb $[[R1]], 2($[[PTR]])
; ALL-DAG: lbu $[[R1:[0-9]+]], 1($[[PTR]])
; ALL-DAG: sb $[[R1]], 3($[[PTR]])
; MIPS32-DAG: lbu $[[R1:[0-9]+]], 0($[[PTR]])
; MIPS32-DAG: sb $[[R1]], 2($[[PTR]])
; MIPS32-DAG: lbu $[[R2:[0-9]+]], 1($[[PTR]])
; MIPS32-DAG: sb $[[R2]], 3($[[PTR]])
; MIPS32R6: lhu $[[R1:[0-9]+]], 0($[[PTR]])
; MIPS32R6: sh $[[R1]], 2($[[PTR]])
; MIPS64-DAG: lbu $[[R1:[0-9]+]], 0($[[PTR]])
; MIPS64-DAG: sb $[[R1]], 2($[[PTR]])
; MIPS64-DAG: lbu $[[R2:[0-9]+]], 1($[[PTR]])
; MIPS64-DAG: sb $[[R2]], 3($[[PTR]])
%0 = load %struct.S0, %struct.S0* getelementptr inbounds (%struct.S0, %struct.S0* @struct_s0, i32 0), align 1
store %struct.S0 %0, %struct.S0* getelementptr inbounds (%struct.S0, %struct.S0* @struct_s0, i32 1), align 1
@ -268,37 +274,54 @@ entry:
; MIPS32-EL: lw $[[PTR:[0-9]+]], %got(struct_s1)(
; MIPS32-EB: lw $[[PTR:[0-9]+]], %got(struct_s1)(
; MIPS32-DAG: lbu $[[R1:[0-9]+]], 0($[[PTR]])
; MIPS32-DAG: sb $[[R1]], 4($[[PTR]])
; MIPS32-DAG: lbu $[[R1:[0-9]+]], 1($[[PTR]])
; MIPS32-DAG: sb $[[R1]], 5($[[PTR]])
; MIPS32-DAG: lbu $[[R1:[0-9]+]], 2($[[PTR]])
; MIPS32-DAG: sb $[[R1]], 6($[[PTR]])
; MIPS32-DAG: lbu $[[R1:[0-9]+]], 3($[[PTR]])
; MIPS32-DAG: sb $[[R1]], 7($[[PTR]])
; MIPS32-EL-DAG: lwl $[[R1:[0-9]+]], 3($[[PTR]])
; MIPS32-EL-DAG: lwr $[[R1]], 0($[[PTR]])
; MIPS32-EL-DAG: swl $[[R1]], 7($[[PTR]])
; MIPS32-EL-DAG: swr $[[R1]], 4($[[PTR]])
; MIPS32-EB-DAG: lwl $[[R1:[0-9]+]], 0($[[PTR]])
; MIPS32-EB-DAG: lwr $[[R1]], 3($[[PTR]])
; MIPS32-EB-DAG: swl $[[R1]], 4($[[PTR]])
; MIPS32-EB-DAG: swr $[[R1]], 7($[[PTR]])
; MIPS32-NOLEFTRIGHT-DAG: lbu $[[R1:[0-9]+]], 0($[[PTR]])
; MIPS32-NOLEFTRIGHT-DAG: sb $[[R1]], 4($[[PTR]])
; MIPS32-NOLEFTRIGHT-DAG: lbu $[[R1:[0-9]+]], 1($[[PTR]])
; MIPS32-NOLEFTRIGHT-DAG: sb $[[R1]], 5($[[PTR]])
; MIPS32-NOLEFTRIGHT-DAG: lbu $[[R1:[0-9]+]], 2($[[PTR]])
; MIPS32-NOLEFTRIGHT-DAG: sb $[[R1]], 6($[[PTR]])
; MIPS32-NOLEFTRIGHT-DAG: lbu $[[R1:[0-9]+]], 3($[[PTR]])
; MIPS32-NOLEFTRIGHT-DAG: sb $[[R1]], 7($[[PTR]])
; MIPS32R6: lw $[[PTR:[0-9]+]], %got(struct_s1)(
; MIPS32R6-DAG: lhu $[[R1:[0-9]+]], 0($[[PTR]])
; MIPS32R6-DAG: sh $[[R1]], 4($[[PTR]])
; MIPS32R6-DAG: lhu $[[R1:[0-9]+]], 2($[[PTR]])
; MIPS32R6-DAG: sh $[[R1]], 6($[[PTR]])
; MIPS32R6-DAG: lw $[[R1:[0-9]+]], 0($[[PTR]])
; MIPS32R6-DAG: sw $[[R1]], 4($[[PTR]])
; MIPS64-EL: ld $[[PTR:[0-9]+]], %got_disp(struct_s1)(
; MIPS64-EB: ld $[[PTR:[0-9]+]], %got_disp(struct_s1)(
; MIPS64-DAG: lbu $[[R1:[0-9]+]], 0($[[PTR]])
; MIPS64-DAG: sb $[[R1]], 4($[[PTR]])
; MIPS64-DAG: lbu $[[R1:[0-9]+]], 1($[[PTR]])
; MIPS64-DAG: sb $[[R1]], 5($[[PTR]])
; MIPS64-DAG: lbu $[[R1:[0-9]+]], 2($[[PTR]])
; MIPS64-DAG: sb $[[R1]], 6($[[PTR]])
; MIPS64-DAG: lbu $[[R1:[0-9]+]], 3($[[PTR]])
; MIPS64-DAG: sb $[[R1]], 7($[[PTR]])
; MIPS64-EL-DAG: lwl $[[R1:[0-9]+]], 3($[[PTR]])
; MIPS64-EL-DAG: lwr $[[R1]], 0($[[PTR]])
; MIPS64-EL-DAG: swl $[[R1]], 7($[[PTR]])
; MIPS64-EL-DAG: swr $[[R1]], 4($[[PTR]])
; MIPS64-EB-DAG: lwl $[[R1:[0-9]+]], 0($[[PTR]])
; MIPS64-EB-DAG: lwr $[[R1]], 3($[[PTR]])
; MIPS64-EB-DAG: swl $[[R1]], 4($[[PTR]])
; MIPS64-EB-DAG: swr $[[R1]], 7($[[PTR]])
; MIPS64-NOLEFTRIGHT-DAG: lbu $[[R1:[0-9]+]], 0($[[PTR]])
; MIPS64-NOLEFTRIGHT-DAG: sb $[[R1]], 4($[[PTR]])
; MIPS64-NOLEFTRIGHT-DAG: lbu $[[R1:[0-9]+]], 1($[[PTR]])
; MIPS64-NOLEFTRIGHT-DAG: sb $[[R1]], 5($[[PTR]])
; MIPS64-NOLEFTRIGHT-DAG: lbu $[[R1:[0-9]+]], 2($[[PTR]])
; MIPS64-NOLEFTRIGHT-DAG: sb $[[R1]], 6($[[PTR]])
; MIPS64-NOLEFTRIGHT-DAG: lbu $[[R1:[0-9]+]], 3($[[PTR]])
; MIPS64-NOLEFTRIGHT-DAG: sb $[[R1]], 7($[[PTR]])
; MIPS64R6: ld $[[PTR:[0-9]+]], %got_disp(struct_s1)(
; MIPS64R6-DAG: lhu $[[R1:[0-9]+]], 0($[[PTR]])
; MIPS64R6-DAG: sh $[[R1]], 4($[[PTR]])
; MIPS64R6-DAG: lhu $[[R1:[0-9]+]], 2($[[PTR]])
; MIPS64R6-DAG: sh $[[R1]], 6($[[PTR]])
; MIPS64R6-DAG: lw $[[R1:[0-9]+]], 0($[[PTR]])
; MIPS64R6-DAG: sw $[[R1]], 4($[[PTR]])
%0 = load %struct.S1, %struct.S1* getelementptr inbounds (%struct.S1, %struct.S1* @struct_s1, i32 0), align 1
store %struct.S1 %0, %struct.S1* getelementptr inbounds (%struct.S1, %struct.S1* @struct_s1, i32 1), align 1
@ -336,30 +359,21 @@ entry:
; MIPS32R6-DAG: sw $[[R1]], 12($[[PTR]])
; MIPS64-EL: ld $[[PTR:[0-9]+]], %got_disp(struct_s2)(
; MIPS64-EL-DAG: lwl $[[R1:[0-9]+]], 3($[[PTR]])
; MIPS64-EL-DAG: lwr $[[R1]], 0($[[PTR]])
; MIPS64-EL-DAG: swl $[[R1]], 11($[[PTR]])
; MIPS64-EL-DAG: swr $[[R1]], 8($[[PTR]])
; MIPS64-EL-DAG: lwl $[[R1:[0-9]+]], 7($[[PTR]])
; MIPS64-EL-DAG: lwr $[[R1]], 4($[[PTR]])
; MIPS64-EL-DAG: swl $[[R1]], 15($[[PTR]])
; MIPS64-EL-DAG: swr $[[R1]], 12($[[PTR]])
; MIPS64-EL-DAG: ldl $[[R1:[0-9]+]], 7($[[PTR]])
; MIPS64-EL-DAG: ldr $[[R1]], 0($[[PTR]])
; MIPS64-EL-DAG: sdl $[[R1]], 15($[[PTR]])
; MIPS64-EL-DAG: sdr $[[R1]], 8($[[PTR]])
; MIPS64-EB: ld $[[PTR:[0-9]+]], %got_disp(struct_s2)(
; MIPS64-EB-DAG: lwl $[[R1:[0-9]+]], 0($[[PTR]])
; MIPS64-EB-DAG: lwr $[[R1]], 3($[[PTR]])
; MIPS64-EB-DAG: swl $[[R1]], 8($[[PTR]])
; MIPS64-EB-DAG: swr $[[R1]], 11($[[PTR]])
; MIPS64-EB-DAG: lwl $[[R1:[0-9]+]], 4($[[PTR]])
; MIPS64-EB-DAG: lwr $[[R1]], 7($[[PTR]])
; MIPS64-EB-DAG: swl $[[R1]], 12($[[PTR]])
; MIPS64-EB-DAG: swr $[[R1]], 15($[[PTR]])
; MIPS64-EB-DAG: ldl $[[R1:[0-9]+]], 0($[[PTR]])
; MIPS64-EB-DAG: ldr $[[R1]], 7($[[PTR]])
; MIPS64-EB-DAG: sdl $[[R1]], 8($[[PTR]])
; MIPS64-EB-DAG: sdr $[[R1]], 15($[[PTR]])
; MIPS64R6: ld $[[PTR:[0-9]+]], %got_disp(struct_s2)(
; MIPS64R6-DAG: lw $[[R1:[0-9]+]], 0($[[PTR]])
; MIPS64R6-DAG: sw $[[R1]], 8($[[PTR]])
; MIPS64R6-DAG: lw $[[R1:[0-9]+]], 4($[[PTR]])
; MIPS64R6-DAG: sw $[[R1]], 12($[[PTR]])
; MIPS64R6-DAG: ld $[[R1:[0-9]+]], 0($[[PTR]])
; MIPS64R6-DAG: sd $[[R1]], 8($[[PTR]])
%0 = load %struct.S2, %struct.S2* getelementptr inbounds (%struct.S2, %struct.S2* @struct_s2, i32 0), align 1
store %struct.S2 %0, %struct.S2* getelementptr inbounds (%struct.S2, %struct.S2* @struct_s2, i32 1), align 1
@ -416,17 +430,17 @@ entry:
; MIPS64-EL-DAG: lwl $[[R1:[0-9]+]], 3($[[PTR]])
; MIPS64-EL-DAG: lwr $[[R1]], 0($[[PTR]])
; MIPS64-EB: ld $[[SPTR:[0-9]+]], %got_disp(arr)(
; MIPS64-EB-DAG: lwl $[[R1:[0-9]+]], 0($[[PTR]])
; MIPS64-EB-DAG: lwr $[[R1]], 3($[[PTR]])
; MIPS64-EB-DAG: dsll $[[R1]], $[[R1]], 32
; MIPS64-EB: ld $[[SPTR:[0-9]+]], %got_disp(arr)(
; MIPS64-EB-DAG: lbu $[[R2:[0-9]+]], 5($[[PTR]])
; MIPS64-EB-DAG: lbu $[[R3:[0-9]+]], 4($[[PTR]])
; MIPS64-EB-DAG: dsll $[[T0:[0-9]+]], $[[R3]], 8
; MIPS64-EB-DAG: or $[[T1:[0-9]+]], $[[T0]], $[[R2]]
; MIPS64-EB-DAG: dsll $[[T1]], $[[T1]], 16
; MIPS64-EB-DAG: or $[[T3:[0-9]+]], $[[R1]], $[[T1]]
; MIPS64-EB-DAG: lbu $[[R4:[0-9]+]], 6($[[PTR]])
; MIPS64-EB-DAG: dsll $[[T1]], $[[T1]], 16
; MIPS64-EB-DAG: lwl $[[R1:[0-9]+]], 0($[[PTR]])
; MIPS64-EB-DAG: lwr $[[R1]], 3($[[PTR]])
; MIPS64-EB-DAG: dsll $[[R5:[0-9]+]], $[[R1]], 32
; MIPS64-EB-DAG: or $[[T3:[0-9]+]], $[[R5]], $[[T1]]
; MIPS64-EB-DAG: dsll $[[T4:[0-9]+]], $[[R4]], 8
; MIPS64-EB-DAG: or $4, $[[T3]], $[[T4]]

View File

@ -13,6 +13,6 @@ entry:
ret i32 0
}
; CHECK: li16 ${{[2-7]|16|17}}, 1
; CHECK: addiu ${{[0-9]+}}, $zero, 2148
; CHECK: li16 ${{[2-7]|16|17}}, 1
; CHECK: ori ${{[0-9]+}}, $zero, 33332

View File

@ -4,8 +4,8 @@
@gld1 = external global fp128
; CHECK: foo0
; CHECK: sdc1 $f12, %lo(gld0)(${{[0-9]+}})
; CHECK: sdc1 $f13, 8(${{[0-9]+}})
; CHECK-DAG: sdc1 $f12, %lo(gld0)(${{[0-9]+}})
; CHECK-DAG: sdc1 $f13, 8(${{[0-9]+}})
define void @foo0(fp128 %a0) {
entry:
@ -14,8 +14,8 @@ entry:
}
; CHECK: foo1
; CHECK: ldc1 $f12, %lo(gld0)(${{[0-9]+}})
; CHECK: ldc1 $f13, 8(${{[0-9]+}})
; CHECK-DAG: ldc1 $f12, %lo(gld0)(${{[0-9]+}})
; CHECK-DAG: ldc1 $f13, 8(${{[0-9]+}})
define void @foo1() {
entry:
@ -26,11 +26,11 @@ entry:
declare void @foo2(fp128)
; CHECK: foo3:
; CHECK: daddiu $[[R0:[0-9]+]], ${{[0-9]+}}, %hi(gld0)
; CHECK: dsll $[[R1:[0-9]+]], $[[R0]], 16
; CHECK: daddiu $[[R2:[0-9]+]], $[[R1:[0-9]+]], %lo(gld0)
; CHECK: sdc1 $f0, %lo(gld0)($[[R1]])
; CHECK: daddiu $[[R2:[0-9]]], $[[R1]], %lo(gld0)
; CHECK: sdc1 $f2, 8($[[R2]])
; CHECK: daddiu $[[R3:[0-9]+]], ${{[0-9]+}}, %hi(gld1)
; CHECK: dsll $[[R4:[0-9]+]], $[[R3]], 16
@ -39,7 +39,6 @@ declare void @foo2(fp128)
; CHECK: ldc1 $f2, 8($[[R5]])
define fp128 @foo3() {
entry:
%call = tail call fp128 @foo4()

View File

@ -577,10 +577,10 @@ entry:
; ALL-LABEL: store_LD_LD:
; ALL: ld $[[R0:[0-9]+]], %got_disp(gld1)
; ALL: ld $[[R1:[0-9]+]], 0($[[R0]])
; ALL: ld $[[R2:[0-9]+]], 8($[[R0]])
; ALL: ld $[[R3:[0-9]+]], %got_disp(gld0)
; ALL: sd $[[R2]], 8($[[R3]])
; ALL: ld $[[R1:[0-9]+]], 0($[[R0]])
; ALL: sd $[[R1]], 0($[[R3]])
define void @store_LD_LD() {

View File

@ -130,12 +130,12 @@
; MM-MNO-PIC: addiu $[[R1:[0-9]+]], $[[R0]], %lo(_gp_disp)
; MM-MNO-PIC: addu $[[R2:[0-9]+]], $[[R1]], $25
; MM-MNO-PIC: lw $[[R3:[0-9]+]], %got(g0)($[[R2]])
; MM-MNO-PIC: lw16 $[[R4:[0-9]+]], 0($[[R3]])
; MM-MNO-PIC: lw16 $[[R5:[0-9]+]], 4($[[R3]])
; MM-MNO-LE-PIC: mtc1 $[[R4]], $f0
; MM-MNO-LE-PIC: mthc1 $[[R5]], $f0
; MM-MNO-BE-PIC: mtc1 $[[R5]], $f0
; MM-MNO-BE-PIC: mthc1 $[[R4]], $f0
; MM-MNO-PIC-DAG: lw16 $[[R4:[0-9]+]], 0($[[R3]])
; MM-MNO-PIC-DAG: lw16 $[[R5:[0-9]+]], 4($[[R3]])
; MM-MNO-LE-PIC-DAG: mtc1 $[[R4]], $f0
; MM-MNO-LE-PIC-DAG: mthc1 $[[R5]], $f0
; MM-MNO-BE-PIC-DAG: mtc1 $[[R5]], $f0
; MM-MNO-BE-PIC-DAG: mthc1 $[[R4]], $f0
; MM-STATIC-PIC: lui $[[R0:[0-9]+]], %hi(g0)
; MM-STATIC-PIC: ldc1 $f0, %lo(g0)($[[R0]])
@ -214,13 +214,13 @@ entry:
; MM-MNO-PIC: lui $[[R0:[0-9]+]], %hi(_gp_disp)
; MM-MNO-PIC: addiu $[[R1:[0-9]+]], $[[R0]], %lo(_gp_disp)
; MM-MNO-PIC: addu $[[R2:[0-9]+]], $[[R1]], $25
; MM-MNO-LE-PIC: mfc1 $[[R3:[0-9]+]], $f12
; MM-MNO-BE-PIC: mfhc1 $[[R3:[0-9]+]], $f12
; MM-MNO-PIC: lw $[[R4:[0-9]+]], %got(g0)($[[R2]])
; MM-MNO-PIC: sw16 $[[R3]], 0($[[R4]])
; MM-MNO-LE-PIC: mfhc1 $[[R5:[0-9]+]], $f12
; MM-MNO-BE-PIC: mfc1 $[[R5:[0-9]+]], $f12
; MM-MNO-PIC: sw16 $[[R5]], 4($[[R4]])
; MM-MNO-LE-PIC-DAG: mfc1 $[[R3:[0-9]+]], $f12
; MM-MNO-BE-PIC-DAG: mfhc1 $[[R3:[0-9]+]], $f12
; MM-MNO-PIC-DAG: lw $[[R4:[0-9]+]], %got(g0)($[[R2]])
; MM-MNO-PIC-DAG: sw16 $[[R3]], 0($[[R4]])
; MM-MNO-LE-PIC-DAG: mfhc1 $[[R5:[0-9]+]], $f12
; MM-MNO-BE-PIC-DAG: mfc1 $[[R5:[0-9]+]], $f12
; MM-MNO-PIC-DAG: sw16 $[[R5]], 4($[[R4]])
; MM-STATIC-PIC: lui $[[R0:[0-9]+]], %hi(g0)
; MM-STATIC-PIC: sdc1 $f12, %lo(g0)($[[R0]])
@ -267,8 +267,8 @@ entry:
; MM-MNO-PIC: sll16 $[[R0:[0-9]+]], $5, 3
; MM-MNO-PIC: addu16 $[[R1:[0-9]+]], $4, $[[R0]]
; MM-MNO-PIC: lw16 $[[R2:[0-9]+]], 0($[[R1]])
; MM-MNO-PIC: lw16 $[[R3:[0-9]+]], 4($[[R1]])
; MM-MNO-PIC-DAG: lw16 $[[R2:[0-9]+]], 0($[[R1]])
; MM-MNO-PIC-DAG: lw16 $[[R3:[0-9]+]], 4($[[R1]])
; MM-MNO-LE-PIC: mtc1 $[[R2]], $f0
; MM-MNO-LE-PIC: mthc1 $[[R3]], $f0
; MM-MNO-BE-PIC: mtc1 $[[R3]], $f0
@ -313,14 +313,14 @@ entry:
; MM: addu16 $[[R1:[0-9]+]], $6, $[[R0]]
; MM: sdc1 $f12, 0($[[R1]])
; MM-MNO-PIC: sll16 $[[R0:[0-9]+]], $7, 3
; MM-MNO-PIC: addu16 $[[R1:[0-9]+]], $6, $[[R0]]
; MM-MNO-LE-PIC: mfc1 $[[R2:[0-9]+]], $f12
; MM-MNO-BE-PIC: mfhc1 $[[R2:[0-9]+]], $f12
; MM-MNO-PIC: sw16 $[[R2]], 0($[[R1]])
; MM-MNO-LE-PIC: mfhc1 $[[R3:[0-9]+]], $f12
; MM-MNO-BE-PIC: mfc1 $[[R3:[0-9]+]], $f12
; MM-MNO-PIC: sw16 $[[R3]], 4($[[R1]])
; MM-MNO-PIC: sll16 $[[R0:[0-9]+]], $7, 3
; MM-MNO-PIC: addu16 $[[R1:[0-9]+]], $6, $[[R0]]
; MM-MNO-LE-PIC-DAG: mfc1 $[[R2:[0-9]+]], $f12
; MM-MNO-BE-PIC-DAG: mfhc1 $[[R2:[0-9]+]], $f12
; MM-MNO-PIC-DAG: sw16 $[[R2]], 0($[[R1]])
; MM-MNO-LE-PIC-DAG: mfhc1 $[[R3:[0-9]+]], $f12
; MM-MNO-BE-PIC-DAG: mfc1 $[[R3:[0-9]+]], $f12
; MM-MNO-PIC-DAG: sw16 $[[R3]], 4($[[R1]])
; MM-STATIC-PIC: sll16 $[[R0:[0-9]+]], $7, 3
; MM-STATIC-PIC: addu16 $[[R1:[0-9]+]], $6, $[[R0]]

View File

@ -234,15 +234,15 @@ entry:
; MIPS32: insert.w $w[[W0]][1], $[[R1]]
; MIPS32: insert.w $w[[W0]][3], $[[R1]]
; MIPS64-N64: ld $[[R3:[0-9]+]], %got_disp(h)
; MIPS64-N32: lw $[[R3:[0-9]+]], %got_disp(h)
; MIPS64: dmfc1 $[[R1:[0-9]+]], $f[[F2]]
; MIPS64: fill.d $w[[W0:[0-9]+]], $[[R1]]
; MIPS64-N64-DAG: ld $[[R3:[0-9]+]], %got_disp(h)
; MIPS64-N32-DAG: lw $[[R3:[0-9]+]], %got_disp(h)
; MIPS64-DAG: dmfc1 $[[R1:[0-9]+]], $f[[F2]]
; MIPS64-DAG: fill.d $w[[W0:[0-9]+]], $[[R1]]
; ALL: fexdo.w $w[[W1:[0-9]+]], $w[[W0]], $w[[W0]]
; ALL: fexdo.h $w[[W2:[0-9]+]], $w[[W1]], $w[[W1]]
; ALL-DAG: fexdo.w $w[[W1:[0-9]+]], $w[[W0]], $w[[W0]]
; ALL-DAG: fexdo.h $w[[W2:[0-9]+]], $w[[W1]], $w[[W1]]
; MIPS32: lw $[[R3:[0-9]+]], %got(h)
; MIPS32-DAG: lw $[[R3:[0-9]+]], %got(h)
; ALL: copy_u.h $[[R2:[0-9]+]], $w[[W2]]
; ALL: sh $[[R2]], 0($[[R3]])

View File

@ -336,8 +336,8 @@ entry:
; CHECK: llvm_mips_st_b_valid_range_tests:
; CHECK: ld.b
; CHECK: st.b [[R1:\$w[0-9]+]], -512(
; CHECK: st.b [[R1:\$w[0-9]+]], 511(
; CHECK-DAG: st.b [[R1:\$w[0-9]+]], -512(
; CHECK-DAG: st.b [[R1:\$w[0-9]+]], 511(
; CHECK: .size llvm_mips_st_b_valid_range_tests
;
@ -351,10 +351,10 @@ entry:
}
; CHECK: llvm_mips_st_b_invalid_range_tests:
; CHECK: addiu $2, $1, -513
; CHECK: addiu $2, $1, 512
; CHECK: ld.b
; CHECK: st.b [[R1:\$w[0-9]+]], 0(
; CHECK: addiu $1, $1, 512
; CHECK: addiu $1, $1, -513
; CHECK: st.b [[R1:\$w[0-9]+]], 0(
; CHECK: .size llvm_mips_st_b_invalid_range_tests
;
@ -404,8 +404,8 @@ entry:
; CHECK: llvm_mips_st_h_valid_range_tests:
; CHECK: ld.h
; CHECK: st.h [[R1:\$w[0-9]+]], -1024(
; CHECK: st.h [[R1:\$w[0-9]+]], 1022(
; CHECK-DAG: st.h [[R1:\$w[0-9]+]], -1024(
; CHECK-DAG: st.h [[R1:\$w[0-9]+]], 1022(
; CHECK: .size llvm_mips_st_h_valid_range_tests
;
@ -419,10 +419,10 @@ entry:
}
; CHECK: llvm_mips_st_h_invalid_range_tests:
; CHECK: addiu $2, $1, -1026
; CHECK: addiu $2, $1, 1024
; CHECK: ld.h
; CHECK: st.h [[R1:\$w[0-9]+]], 0(
; CHECK: addiu $1, $1, 1024
; CHECK: addiu $1, $1, -1026
; CHECK: st.h [[R1:\$w[0-9]+]], 0(
; CHECK: .size llvm_mips_st_h_invalid_range_tests
;
@ -472,8 +472,8 @@ entry:
; CHECK: llvm_mips_st_w_valid_range_tests:
; CHECK: ld.w
; CHECK: st.w [[R1:\$w[0-9]+]], -2048(
; CHECK: st.w [[R1:\$w[0-9]+]], 2044(
; CHECK-DAG: st.w [[R1:\$w[0-9]+]], -2048(
; CHECK-DAG: st.w [[R1:\$w[0-9]+]], 2044(
; CHECK: .size llvm_mips_st_w_valid_range_tests
;
@ -487,10 +487,10 @@ entry:
}
; CHECK: llvm_mips_st_w_invalid_range_tests:
; CHECK: addiu $2, $1, -2052
; CHECK: addiu $2, $1, 2048
; CHECK: ld.w
; CHECK: st.w [[R1:\$w[0-9]+]], 0(
; CHECK: addiu $1, $1, 2048
; CHECK: addiu $1, $1, -2052
; CHECK: st.w [[R1:\$w[0-9]+]], 0(
; CHECK: .size llvm_mips_st_w_invalid_range_tests
;
@ -540,8 +540,8 @@ entry:
; CHECK: llvm_mips_st_d_valid_range_tests:
; CHECK: ld.d
; CHECK: st.d [[R1:\$w[0-9]+]], -4096(
; CHECK: st.d [[R1:\$w[0-9]+]], 4088(
; CHECK-DAG: st.d [[R1:\$w[0-9]+]], -4096(
; CHECK-DAG: st.d [[R1:\$w[0-9]+]], 4088(
; CHECK: .size llvm_mips_st_d_valid_range_tests
;
@ -555,10 +555,10 @@ entry:
}
; CHECK: llvm_mips_st_d_invalid_range_tests:
; CHECK: addiu $2, $1, -4104
; CHECK: addiu $2, $1, 4096
; CHECK: ld.d
; CHECK: st.d [[R1:\$w[0-9]+]], 0(
; CHECK: addiu $1, $1, 4096
; CHECK: addiu $1, $1, -4104
; CHECK: st.d [[R1:\$w[0-9]+]], 0(
; CHECK: .size llvm_mips_st_d_invalid_range_tests
;

View File

@ -45,20 +45,18 @@ declare void @callee3(float, %struct.S3* byval, %struct.S1* byval)
define void @f2(float %f, %struct.S1* nocapture byval %s1) nounwind {
entry:
; CHECK: addiu $sp, $sp, -48
; CHECK: sw $7, 60($sp)
; CHECK: sw $6, 56($sp)
; CHECK: lw $4, 80($sp)
; CHECK: ldc1 $f[[F0:[0-9]+]], 72($sp)
; CHECK: lw $[[R3:[0-9]+]], 64($sp)
; CHECK: lw $[[R4:[0-9]+]], 68($sp)
; CHECK: lw $[[R2:[0-9]+]], 60($sp)
; CHECK: lh $[[R1:[0-9]+]], 58($sp)
; CHECK: lb $[[R0:[0-9]+]], 56($sp)
; CHECK: sw $[[R0]], 32($sp)
; CHECK: sw $[[R1]], 28($sp)
; CHECK: sw $[[R2]], 24($sp)
; CHECK: sw $[[R4]], 20($sp)
; CHECK: sw $[[R3]], 16($sp)
; CHECK-DAG: sw $7, 60($sp)
; CHECK-DAG: sw $6, 56($sp)
; CHECK-DAG: ldc1 $f[[F0:[0-9]+]], 72($sp)
; CHECK-DAG: lw $[[R3:[0-9]+]], 64($sp)
; CHECK-DAG: lw $[[R4:[0-9]+]], 68($sp)
; CHECK-DAG: lh $[[R1:[0-9]+]], 58($sp)
; CHECK-DAG: lb $[[R0:[0-9]+]], 56($sp)
; CHECK-DAG: sw $[[R0]], 32($sp)
; CHECK-DAG: sw $[[R1]], 28($sp)
; CHECK-DAG: sw $[[R4]], 20($sp)
; CHECK-DAG: sw $[[R3]], 16($sp)
; CHECK-DAG: sw $7, 24($sp)
; CHECK: mfc1 $6, $f[[F0]]
%i2 = getelementptr inbounds %struct.S1, %struct.S1* %s1, i32 0, i32 5
@ -82,13 +80,11 @@ declare void @callee4(i32, double, i64, i32, i16 signext, i8 signext, float)
define void @f3(%struct.S2* nocapture byval %s2) nounwind {
entry:
; CHECK: addiu $sp, $sp, -48
; CHECK: sw $7, 60($sp)
; CHECK: sw $6, 56($sp)
; CHECK: sw $5, 52($sp)
; CHECK: sw $4, 48($sp)
; CHECK: lw $4, 48($sp)
; CHECK: lw $[[R0:[0-9]+]], 60($sp)
; CHECK: sw $[[R0]], 24($sp)
; CHECK-DAG: sw $7, 60($sp)
; CHECK-DAG: sw $6, 56($sp)
; CHECK-DAG: sw $5, 52($sp)
; CHECK-DAG: sw $4, 48($sp)
; CHECK-DAG: sw $7, 24($sp)
%arrayidx = getelementptr inbounds %struct.S2, %struct.S2* %s2, i32 0, i32 0, i32 0
%tmp = load i32, i32* %arrayidx, align 4
@ -101,14 +97,14 @@ entry:
define void @f4(float %f, %struct.S3* nocapture byval %s3, %struct.S1* nocapture byval %s1) nounwind {
entry:
; CHECK: addiu $sp, $sp, -48
; CHECK: sw $7, 60($sp)
; CHECK: sw $6, 56($sp)
; CHECK: sw $5, 52($sp)
; CHECK: lw $4, 60($sp)
; CHECK: lw $[[R1:[0-9]+]], 80($sp)
; CHECK: lb $[[R0:[0-9]+]], 52($sp)
; CHECK: sw $[[R0]], 32($sp)
; CHECK: sw $[[R1]], 24($sp)
; CHECK-DAG: sw $7, 60($sp)
; CHECK-DAG: sw $6, 56($sp)
; CHECK-DAG: sw $5, 52($sp)
; CHECK-DAG: lw $[[R1:[0-9]+]], 80($sp)
; CHECK-DAG: lb $[[R0:[0-9]+]], 52($sp)
; CHECK-DAG: sw $[[R0]], 32($sp)
; CHECK-DAG: sw $[[R1]], 24($sp)
; CHECK: move $4, $7
%i = getelementptr inbounds %struct.S1, %struct.S1* %s1, i32 0, i32 2
%tmp = load i32, i32* %i, align 4

View File

@ -29,9 +29,9 @@ entry:
; CHECK-LABEL: va1:
; CHECK: addiu $sp, $sp, -16
; CHECK: sw $5, 20($sp)
; CHECK: sw $7, 28($sp)
; CHECK: sw $6, 24($sp)
; CHECK: sw $5, 20($sp)
; CHECK: lw $2, 20($sp)
}
@ -83,8 +83,8 @@ entry:
; CHECK-LABEL: va3:
; CHECK: addiu $sp, $sp, -16
; CHECK: sw $7, 28($sp)
; CHECK: sw $6, 24($sp)
; CHECK: sw $7, 28($sp)
; CHECK: lw $2, 24($sp)
}

View File

@ -60,33 +60,34 @@ equal:
unequal:
ret i8* %array2_ptr
}
; CHECK-LABEL: func2:
; CHECK: ld [[REG2:[0-9]+]], 72(1)
; CHECK: cmpld {{([0-9]+,)?}}4, [[REG2]]
; CHECK-DAG: std [[REG2]], -[[OFFSET1:[0-9]+]]
; CHECK: cmpld {{([0-9]+,)?}}4, 6
; CHECK-DAG: std 6, 72(1)
; CHECK-DAG: std 5, 64(1)
; CHECK-DAG: std 6, -[[OFFSET1:[0-9]+]]
; CHECK-DAG: std 4, -[[OFFSET2:[0-9]+]]
; CHECK: ld 3, -[[OFFSET2]](1)
; CHECK: ld 3, -[[OFFSET1]](1)
; DARWIN32: _func2:
; DARWIN32: addi r[[REG1:[0-9]+]], r[[REGSP:[0-9]+]], 36
; DARWIN32: lwz r[[REG2:[0-9]+]], 44(r[[REGSP]])
; DARWIN32-LABEL: _func2
; DARWIN32-DAG: addi r[[REG8:[0-9]+]], r[[REGSP:[0-9]+]], 36
; DARWIN32-DAG: lwz r[[REG2:[0-9]+]], 44(r[[REGSP]])
; DARWIN32: mr
; DARWIN32: mr r[[REG3:[0-9]+]], r[[REGA:[0-9]+]]
; DARWIN32: cmplw {{(cr[0-9]+,)?}}r[[REGA]], r[[REG2]]
; DARWIN32: stw r[[REG3]], -[[OFFSET1:[0-9]+]]
; DARWIN32: stw r[[REG2]], -[[OFFSET2:[0-9]+]]
; DARWIN32: lwz r3, -[[OFFSET1]]
; DARWIN32: lwz r3, -[[OFFSET2]]
; DARWIN32: mr r[[REG7:[0-9]+]], r5
; DARWIN32-DAG: cmplw {{(cr[0-9]+,)?}}r5, r[[REG2]]
; DARWIN32-DAG: stw r[[REG7]], -[[OFFSET1:[0-9]+]]
; DARWIN32-DAG: stw r[[REG2]], -[[OFFSET2:[0-9]+]]
; DARWIN32-DAG: lwz r3, -[[OFFSET1]]
; DARWIN32-DAG: lwz r3, -[[OFFSET2]]
; DARWIN64: _func2:
; DARWIN64: ld r[[REG2:[0-9]+]], 72(r1)
; DARWIN64: mr
; DARWIN64: mr r[[REG3:[0-9]+]], r[[REGA:[0-9]+]]
; DARWIN64: cmpld {{(cr[0-9]+,)?}}r[[REGA]], r[[REG2]]
; DARWIN64: std r[[REG3]], -[[OFFSET1:[0-9]+]]
; DARWIN64: std r[[REG2]], -[[OFFSET2:[0-9]+]]
; DARWIN64: std r[[REG3]], -[[OFFSET1:[0-9]+]]
; DARWIN64: ld r3, -[[OFFSET1]]
; DARWIN64: ld r3, -[[OFFSET2]]
@ -106,24 +107,24 @@ unequal:
}
; CHECK-LABEL: func3:
; CHECK: ld [[REG3:[0-9]+]], 72(1)
; CHECK: ld [[REG4:[0-9]+]], 56(1)
; CHECK: cmpld {{([0-9]+,)?}}[[REG4]], [[REG3]]
; CHECK: std [[REG3]], -[[OFFSET1:[0-9]+]](1)
; CHECK: std [[REG4]], -[[OFFSET2:[0-9]+]](1)
; CHECK: cmpld {{([0-9]+,)?}}4, 6
; CHECK-DAG: std 4, -[[OFFSET2:[0-9]+]](1)
; CHECK-DAG: std 6, -[[OFFSET1:[0-9]+]](1)
; CHECK: ld 3, -[[OFFSET2]](1)
; CHECK: ld 3, -[[OFFSET1]](1)
; DARWIN32: _func3:
; DARWIN32: addi r[[REG1:[0-9]+]], r[[REGSP:[0-9]+]], 36
; DARWIN32: addi r[[REG2:[0-9]+]], r[[REGSP]], 24
; DARWIN32: lwz r[[REG3:[0-9]+]], 44(r[[REGSP]])
; DARWIN32: lwz r[[REG4:[0-9]+]], 32(r[[REGSP]])
; DARWIN32: cmplw {{(cr[0-9]+,)?}}r[[REG4]], r[[REG3]]
; DARWIN32: stw r[[REG3]], -[[OFFSET1:[0-9]+]]
; DARWIN32: stw r[[REG4]], -[[OFFSET2:[0-9]+]]
; DARWIN32: lwz r3, -[[OFFSET2]]
; DARWIN32: lwz r3, -[[OFFSET1]]
; DARWIN32-LABEL: _func3:
; DARWIN32-DAG: stw r[[REG8:[0-9]+]], 44(r[[REGSP:[0-9]+]])
; DARWIN32-DAG: stw r[[REG5:[0-9]+]], 32(r[[REGSP]])
; DARWIN32-DAG: addi r[[REG5a:[0-9]+]], r[[REGSP:[0-9]+]], 36
; DARWIN32-DAG: addi r[[REG8a:[0-9]+]], r[[REGSP]], 24
; DARWIN32-DAG: lwz r[[REG5a:[0-9]+]], 44(r[[REGSP]])
; DARWIN32-DAG: lwz r[[REG8a:[0-9]+]], 32(r[[REGSP]])
; DARWIN32-DAG: cmplw {{(cr[0-9]+,)?}}r[[REG8a]], r[[REG5a]]
; DARWIN32-DAG: stw r[[REG5a]], -[[OFFSET1:[0-9]+]]
; DARWIN32-DAG: stw r[[REG8a]], -[[OFFSET2:[0-9]+]]
; DARWIN32-DAG: lwz r3, -[[OFFSET1:[0-9]+]]
; DARWIN32-DAG: lwz r3, -[[OFFSET2:[0-9]+]]
; DARWIN64: _func3:
; DARWIN64: ld r[[REG3:[0-9]+]], 72(r1)

View File

@ -24,10 +24,10 @@ entry:
}
; CHECK-LABEL: foo:
; CHECK: lfd 1
; CHECK: lfd 2
; CHECK: lfd 3
; CHECK: lfd 4
; CHECK-DAG: lfd 1
; CHECK-DAG: lfd 2
; CHECK-DAG: lfd 3
; CHECK_DAG: lfd 4
define { float, float } @oof() nounwind {
entry:
@ -50,6 +50,6 @@ entry:
}
; CHECK-LABEL: oof:
; CHECK: lfs 2
; CHECK: lfs 1
; CHECK-DAG: lfs 2
; CHECK-DAG: lfs 1

View File

@ -18,31 +18,31 @@ entry:
ret void
}
; CHECK: std 6, 184(1)
; CHECK: std 5, 176(1)
; CHECK: std 4, 168(1)
; CHECK: std 3, 160(1)
; CHECK: lbz {{[0-9]+}}, 167(1)
; CHECK: lhz {{[0-9]+}}, 165(1)
; CHECK: stb {{[0-9]+}}, 55(1)
; CHECK: sth {{[0-9]+}}, 53(1)
; CHECK: lbz {{[0-9]+}}, 175(1)
; CHECK: lwz {{[0-9]+}}, 171(1)
; CHECK: stb {{[0-9]+}}, 63(1)
; CHECK: stw {{[0-9]+}}, 59(1)
; CHECK: lhz {{[0-9]+}}, 182(1)
; CHECK: lwz {{[0-9]+}}, 178(1)
; CHECK: sth {{[0-9]+}}, 70(1)
; CHECK: stw {{[0-9]+}}, 66(1)
; CHECK: lbz {{[0-9]+}}, 191(1)
; CHECK: lhz {{[0-9]+}}, 189(1)
; CHECK: lwz {{[0-9]+}}, 185(1)
; CHECK: stb {{[0-9]+}}, 79(1)
; CHECK: sth {{[0-9]+}}, 77(1)
; CHECK: stw {{[0-9]+}}, 73(1)
; CHECK: ld 6, 72(1)
; CHECK: ld 5, 64(1)
; CHECK: ld 4, 56(1)
; CHECK: ld 3, 48(1)
; CHECK-DAG: std 3, 160(1)
; CHECK-DAG: std 6, 184(1)
; CHECK-DAG: std 5, 176(1)
; CHECK-DAG: std 4, 168(1)
; CHECK-DAG: lbz {{[0-9]+}}, 167(1)
; CHECK-DAG: lhz {{[0-9]+}}, 165(1)
; CHECK-DAG: stb {{[0-9]+}}, 55(1)
; CHECK-DAG-DAG: sth {{[0-9]+}}, 53(1)
; CHECK-DAG: lbz {{[0-9]+}}, 175(1)
; CHECK-DAG: lwz {{[0-9]+}}, 171(1)
; CHECK-DAG: stb {{[0-9]+}}, 63(1)
; CHECK-DAG: stw {{[0-9]+}}, 59(1)
; CHECK-DAG: lhz {{[0-9]+}}, 182(1)
; CHECK-DAG: lwz {{[0-9]+}}, 178(1)
; CHECK-DAG: sth {{[0-9]+}}, 70(1)
; CHECK-DAG: stw {{[0-9]+}}, 66(1)
; CHECK-DAG: lbz {{[0-9]+}}, 191(1)
; CHECK-DAG: lhz {{[0-9]+}}, 189(1)
; CHECK-DAG: lwz {{[0-9]+}}, 185(1)
; CHECK-DAG: stb {{[0-9]+}}, 79(1)
; CHECK-DAG: sth {{[0-9]+}}, 77(1)
; CHECK-DAG: stw {{[0-9]+}}, 73(1)
; CHECK-DAG: ld 6, 72(1)
; CHECK-DAG: ld 5, 64(1)
; CHECK-DAG: ld 4, 56(1)
; CHECK-DAG: ld 3, 48(1)
declare void @check(%struct.S3* byval, %struct.S5* byval, %struct.S6* byval, %struct.S7* byval)

View File

@ -1,6 +1,6 @@
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O0 -fast-isel=false -mattr=-vsx < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O0 -fast-isel=false -mattr=+vsx < %s | FileCheck -check-prefix=CHECK-VSX %s
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -O0 -fast-isel=false -mattr=+vsx < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -O0 -fast-isel=false -mattr=+vsx < %s | FileCheck -check-prefix=CHECK-P9 %s
; Verify internal alignment of long double in a struct. The double
; argument comes in in GPR3; GPR4 is skipped; GPRs 5 and 6 contain
@ -19,19 +19,44 @@ entry:
ret ppc_fp128 %0
}
; The additional stores are caused because we forward the value in the
; store->load->bitcast path to make a store and bitcast of the same
; value. Since the target does bitcast through memory and we no longer
; remember the address we need to do the store in a fresh local
; address.
; CHECK-DAG: std 6, 72(1)
; CHECK-DAG: std 5, 64(1)
; CHECK-DAG: std 4, 56(1)
; CHECK-DAG: std 3, 48(1)
; CHECK: lfd 1, 64(1)
; CHECK: lfd 2, 72(1)
; CHECK-DAG: std 5, -16(1)
; CHECK-DAG: std 6, -8(1)
; CHECK-DAG: lfd 1, -16(1)
; CHECK-DAG: lfd 2, -8(1)
; FIXMECHECK: lfd 1, 64(1)
; FIXMECHECK: lfd 2, 72(1)
; CHECK-VSX-DAG: std 6, 72(1)
; CHECK-VSX-DAG: std 5, 64(1)
; CHECK-VSX-DAG: std 4, 56(1)
; CHECK-VSX-DAG: std 3, 48(1)
; CHECK-VSX: li 3, 16
; CHECK-VSX: addi 4, 1, 48
; CHECK-VSX: lxsdx 1, 4, 3
; CHECK-VSX: li 3, 24
; CHECK-VSX: lxsdx 2, 4, 3
; CHECK-VSX-DAG: std 5, -16(1)
; CHECK-VSX-DAG: std 6, -8(1)
; CHECK-VSX: addi 3, 1, -16
; CHECK-VSX: lxsdx 1, 0, 3
; CHECK-VSX: addi 3, 1, -8
; CHECK-VSX: lxsdx 2, 0, 3
; FIXME-VSX: addi 4, 1, 48
; FIXME-VSX: lxsdx 1, 4, 3
; FIXME-VSX: li 3, 24
; FIXME-VSX: lxsdx 2, 4, 3
; CHECK-P9: std 6, 72(1)
; CHECK-P9: std 5, 64(1)
; CHECK-P9: std 4, 56(1)
; CHECK-P9: std 3, 48(1)
; CHECK-P9: mtvsrd 1, 5
; CHECK-P9: mtvsrd 2, 6

View File

@ -113,13 +113,13 @@ entry:
%add13 = add nsw i32 %add11, %6
ret i32 %add13
; CHECK: lha {{[0-9]+}}, 126(1)
; CHECK: lha {{[0-9]+}}, 132(1)
; CHECK: lbz {{[0-9]+}}, 119(1)
; CHECK: lwz {{[0-9]+}}, 140(1)
; CHECK: lwz {{[0-9]+}}, 144(1)
; CHECK: lwz {{[0-9]+}}, 152(1)
; CHECK: lwz {{[0-9]+}}, 160(1)
; CHECK-DAG: lha {{[0-9]+}}, 126(1)
; CHECK-DAG: lha {{[0-9]+}}, 132(1)
; CHECK-DAG: lbz {{[0-9]+}}, 119(1)
; CHECK-DAG: lwz {{[0-9]+}}, 140(1)
; CHECK-DAG: lwz {{[0-9]+}}, 144(1)
; CHECK-DAG: lwz {{[0-9]+}}, 152(1)
; CHECK-DAG: lwz {{[0-9]+}}, 160(1)
}
define i32 @caller2() nounwind {
@ -205,11 +205,11 @@ entry:
%add13 = add nsw i32 %add11, %6
ret i32 %add13
; CHECK: lha {{[0-9]+}}, 126(1)
; CHECK: lha {{[0-9]+}}, 133(1)
; CHECK: lbz {{[0-9]+}}, 119(1)
; CHECK: lwz {{[0-9]+}}, 140(1)
; CHECK: lwz {{[0-9]+}}, 147(1)
; CHECK: lwz {{[0-9]+}}, 154(1)
; CHECK: lwz {{[0-9]+}}, 161(1)
; CHECK-DAG: lha {{[0-9]+}}, 126(1)
; CHECK-DAG: lha {{[0-9]+}}, 133(1)
; CHECK-DAG: lbz {{[0-9]+}}, 119(1)
; CHECK-DAG: lwz {{[0-9]+}}, 140(1)
; CHECK-DAG: lwz {{[0-9]+}}, 147(1)
; CHECK-DAG: lwz {{[0-9]+}}, 154(1)
; CHECK-DAG: lwz {{[0-9]+}}, 161(1)
}

View File

@ -59,6 +59,7 @@ entry:
%call = call i32 @callee1(%struct.s1* byval %p1, %struct.s2* byval %p2, %struct.s3* byval %p3, %struct.s4* byval %p4, %struct.s5* byval %p5, %struct.s6* byval %p6, %struct.s7* byval %p7)
ret i32 %call
; CHECK-LABEL: caller1
; CHECK: ld 9, 112(31)
; CHECK: ld 8, 120(31)
; CHECK: ld 7, 128(31)
@ -97,20 +98,21 @@ entry:
%add13 = add nsw i32 %add11, %6
ret i32 %add13
; CHECK: std 9, 96(1)
; CHECK: std 8, 88(1)
; CHECK: std 7, 80(1)
; CHECK: stw 6, 76(1)
; CHECK: stw 5, 68(1)
; CHECK: sth 4, 62(1)
; CHECK: stb 3, 55(1)
; CHECK: lha {{[0-9]+}}, 62(1)
; CHECK: lha {{[0-9]+}}, 68(1)
; CHECK: lbz {{[0-9]+}}, 55(1)
; CHECK: lwz {{[0-9]+}}, 76(1)
; CHECK: lwz {{[0-9]+}}, 80(1)
; CHECK: lwz {{[0-9]+}}, 88(1)
; CHECK: lwz {{[0-9]+}}, 96(1)
; CHECK-LABEL: callee1
; CHECK-DAG: std 9, 96(1)
; CHECK-DAG: std 8, 88(1)
; CHECK-DAG: std 7, 80(1)
; CHECK-DAG: stw 6, 76(1)
; CHECK-DAG: stw 5, 68(1)
; CHECK-DAG: sth 4, 62(1)
; CHECK-DAG: stb 3, 55(1)
; CHECK-DAG: lha {{[0-9]+}}, 62(1)
; CHECK-DAG: lha {{[0-9]+}}, 68(1)
; CHECK-DAG: lbz {{[0-9]+}}, 55(1)
; CHECK-DAG: lwz {{[0-9]+}}, 76(1)
; CHECK-DAG: lwz {{[0-9]+}}, 80(1)
; CHECK-DAG: lwz {{[0-9]+}}, 88(1)
; CHECK-DAG: lwz {{[0-9]+}}, 96(1)
}
define i32 @caller2() nounwind {
@ -139,6 +141,7 @@ entry:
%call = call i32 @callee2(%struct.t1* byval %p1, %struct.t2* byval %p2, %struct.t3* byval %p3, %struct.t4* byval %p4, %struct.t5* byval %p5, %struct.t6* byval %p6, %struct.t7* byval %p7)
ret i32 %call
; CHECK-LABEL: caller2
; CHECK: stb {{[0-9]+}}, 71(1)
; CHECK: sth {{[0-9]+}}, 69(1)
; CHECK: stb {{[0-9]+}}, 87(1)
@ -184,18 +187,19 @@ entry:
%add13 = add nsw i32 %add11, %6
ret i32 %add13
; CHECK: std 9, 96(1)
; CHECK: std 8, 88(1)
; CHECK: std 7, 80(1)
; CHECK: stw 6, 76(1)
; CHECK: std 5, 64(1)
; CHECK: sth 4, 62(1)
; CHECK: stb 3, 55(1)
; CHECK: lha {{[0-9]+}}, 62(1)
; CHECK: lha {{[0-9]+}}, 69(1)
; CHECK: lbz {{[0-9]+}}, 55(1)
; CHECK: lwz {{[0-9]+}}, 76(1)
; CHECK: lwz {{[0-9]+}}, 83(1)
; CHECK: lwz {{[0-9]+}}, 90(1)
; CHECK: lwz {{[0-9]+}}, 97(1)
; CHECK-LABEL: callee2
; CHECK-DAG: std 9, 96(1)
; CHECK-DAG: std 8, 88(1)
; CHECK-DAG: std 7, 80(1)
; CHECK-DAG: stw 6, 76(1)
; CHECK-DAG: std 5, 64(1)
; CHECK-DAG: sth 4, 62(1)
; CHECK-DAG: stb 3, 55(1)
; CHECK-DAG: lha {{[0-9]+}}, 62(1)
; CHECK-DAG: lha {{[0-9]+}}, 69(1)
; CHECK-DAG: lbz {{[0-9]+}}, 55(1)
; CHECK-DAG: lwz {{[0-9]+}}, 76(1)
; CHECK-DAG: lwz {{[0-9]+}}, 83(1)
; CHECK-DAG: lwz {{[0-9]+}}, 90(1)
; CHECK-DAG: lwz {{[0-9]+}}, 97(1)
}

View File

@ -1,10 +1,7 @@
; Check that unaligned accesses are allowed in general. We check the
; few exceptions (like CRL) in their respective test files.
;
; FIXME: -combiner-alias-analysis (the default for SystemZ) stops
; f1 from being optimized.
; RUN: llc < %s -mtriple=s390x-linux-gnu -combiner-alias-analysis=false \
; RUN: | FileCheck %s
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
; Check that these four byte stores become a single word store.
define void @f1(i8 *%ptr) {

View File

@ -9,9 +9,9 @@
define void @_Z19getClosestDiagonal3ii(%0* noalias sret, i32, i32) nounwind {
; CHECK: bl ___muldf3
; CHECK: bl ___muldf3
; CHECK: beq LBB0
; CHECK: bl ___muldf3
; CHECK: bl ___muldf3
; <label>:3
switch i32 %1, label %4 [
i32 0, label %5

View File

@ -74,15 +74,17 @@ define zeroext i16 @test6() {
}
; Accessing the bottom of a large array shouldn't require materializing a base
;
; CHECK: movs [[REG:r[0-9]+]], #1
; CHECK: str [[REG]], [sp, #16]
; CHECK: str [[REG]], [sp, #4]
define void @test7() {
%arr = alloca [200 x i32], align 4
; CHECK: movs [[REG:r[0-9]+]], #1
; CHECK: str [[REG]], [sp, #4]
%arrayidx = getelementptr inbounds [200 x i32], [200 x i32]* %arr, i32 0, i32 1
store i32 1, i32* %arrayidx, align 4
; CHECK: str [[REG]], [sp, #16]
%arrayidx1 = getelementptr inbounds [200 x i32], [200 x i32]* %arr, i32 0, i32 4
store i32 1, i32* %arrayidx1, align 4
@ -96,30 +98,36 @@ define void @test8() {
%arr1 = alloca [224 x i32], align 4
; CHECK: movs [[REG:r[0-9]+]], #1
; CHECK: str [[REG]], [sp]
; CHECK-DAG: str [[REG]], [sp]
%arr1idx1 = getelementptr inbounds [224 x i32], [224 x i32]* %arr1, i32 0, i32 0
store i32 1, i32* %arr1idx1, align 4
; Offset in range for sp-based store, but not for non-sp-based store
; CHECK: str [[REG]], [sp, #128]
; CHECK-DAG: str [[REG]], [sp, #128]
%arr1idx2 = getelementptr inbounds [224 x i32], [224 x i32]* %arr1, i32 0, i32 32
store i32 1, i32* %arr1idx2, align 4
; CHECK: str [[REG]], [sp, #896]
; CHECK-DAG: str [[REG]], [sp, #896]
%arr2idx1 = getelementptr inbounds [224 x i32], [224 x i32]* %arr2, i32 0, i32 0
store i32 1, i32* %arr2idx1, align 4
; %arr2 is in range, but this element of it is not
; CHECK: str [[REG]], [{{r[0-9]+}}]
; CHECK-DAG: ldr [[RA:r[0-9]+]], .LCPI7_2
; CHECK-DAG: add [[RA]], sp
; CHECK-DAG: str [[REG]], [{{r[0-9]+}}]
%arr2idx2 = getelementptr inbounds [224 x i32], [224 x i32]* %arr2, i32 0, i32 32
store i32 1, i32* %arr2idx2, align 4
; %arr3 is not in range
; CHECK: str [[REG]], [{{r[0-9]+}}]
; CHECK-DAG: ldr [[RB:r[0-9]+]], .LCPI7_3
; CHECK-DAG: add [[RB]], sp
; CHECK-DAG: str [[REG]], [{{r[0-9]+}}]
%arr3idx1 = getelementptr inbounds [224 x i32], [224 x i32]* %arr3, i32 0, i32 0
store i32 1, i32* %arr3idx1, align 4
; CHECK: str [[REG]], [{{r[0-9]+}}]
; CHECK-DAG: ldr [[RC:r[0-9]+]], .LCPI7_4
; CHECK-DAG: add [[RC]], sp
; CHECK-DAG: str [[REG]], [{{r[0-9]+}}]
%arr3idx2 = getelementptr inbounds [224 x i32], [224 x i32]* %arr3, i32 0, i32 32
store i32 1, i32* %arr3idx2, align 4

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -combiner-alias-analysis -march=x86-64 -mcpu=core2 | FileCheck %s
; RUN: llc < %s -march=x86-64 -mcpu=core2 | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-apple-darwin10.4"

View File

@ -3,8 +3,8 @@
; CHECK: merge_stores_can
; CHECK: callq foo
; CHECK: xorps %xmm0, %xmm0
; CHECK-NEXT: movl 36(%rsp), %ebp
; CHECK-NEXT: movups %xmm0
; CHECK-NEXT: movl 36(%rsp), %ebp
; CHECK: callq foo
; CHECK: ret
declare i32 @foo([10 x i32]* )

View File

@ -111,8 +111,7 @@ define void @merge_const_store_vec(i32 %count, %struct.B* nocapture %p) nounwind
; CHECK-LABEL: merge_nonconst_store:
; CHECK: movl $67305985
; CHECK: movb
; CHECK: movb
; CHECK: movb
; CHECK: movw
; CHECK: movb
; CHECK: ret
define void @merge_nonconst_store(i32 %count, i8 %zz, %struct.A* nocapture %p) nounwind uwtable noinline ssp {
@ -292,16 +291,12 @@ block4: ; preds = %4, %.lr.ph
ret void
}
;; On x86, even unaligned copies should be merged to vector ops.
;; TODO: however, this cannot happen at the moment, due to brokenness
;; in MergeConsecutiveStores. See UseAA FIXME in DAGCombiner.cpp
;; visitSTORE.
;; On x86, even unaligned copies can be merged to vector ops.
; CHECK-LABEL: merge_loads_no_align:
; load:
; CHECK-NOT: vmovups ;; TODO
; CHECK: vmovups
; store:
; CHECK-NOT: vmovups ;; TODO
; CHECK: vmovups
; CHECK: ret
define void @merge_loads_no_align(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
%a1 = icmp sgt i32 %count, 0
@ -583,8 +578,8 @@ define void @merge_vec_element_and_scalar_load([6 x i64]* %array) {
; CHECK-LABEL: merge_vec_element_and_scalar_load
; CHECK: movq (%rdi), %rax
; CHECK-NEXT: movq 8(%rdi), %rcx
; CHECK-NEXT: movq %rax, 32(%rdi)
; CHECK-NEXT: movq 8(%rdi), %rax
; CHECK-NEXT: movq %rax, 40(%rdi)
; CHECK-NEXT: movq %rcx, 40(%rdi)
; CHECK-NEXT: retq
}

View File

@ -1159,10 +1159,6 @@ define void @ktest_2(<32 x float> %in, float * %base) {
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
; KNL-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
; KNL-NEXT: vpsllw $7, %ymm2, %ymm2
; KNL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
; KNL-NEXT: vpxor %ymm3, %ymm3, %ymm3
; KNL-NEXT: vpcmpgtb %ymm2, %ymm3, %ymm2
; KNL-NEXT: vmovups 4(%rdi), %zmm3 {%k2} {z}
; KNL-NEXT: vmovups 68(%rdi), %zmm4 {%k1} {z}
; KNL-NEXT: vcmpltps %zmm4, %zmm1, %k0

View File

@ -11,9 +11,9 @@ define void @cftx020(double* nocapture %a) {
; CHECK-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vmovupd (%rdi), %xmm1
; CHECK-NEXT: vsubpd 16(%rdi), %xmm1, %xmm1
; CHECK-NEXT: vmovupd %xmm0, (%rdi)
; CHECK-NEXT: vmovupd %xmm1, 16(%rdi)
; CHECK-NEXT: vsubpd 16(%rdi), %xmm1, %xmm0
; CHECK-NEXT: vmovupd %xmm0, 16(%rdi)
; CHECK-NEXT: retq
entry:
%0 = load double, double* %a, align 8

View File

@ -360,47 +360,47 @@ define <16 x i8> @_clearupper16xi8a(<16 x i8>) nounwind {
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %r9d
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %r8d
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %edi
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm1
; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; SSE-NEXT: movd %esi, %xmm0
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
; SSE-NEXT: movd %ecx, %xmm2
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm2
; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
; SSE-NEXT: movd %edx, %xmm0
; SSE-NEXT: movd %esi, %xmm1
; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; SSE-NEXT: movd %edi, %xmm0
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
; SSE-NEXT: movd %edx, %xmm3
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm3
; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
; SSE-NEXT: movd %r9d, %xmm0
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm1
; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; SSE-NEXT: movd %r8d, %xmm0
; SSE-NEXT: movd %ecx, %xmm2
; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; SSE-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero
; SSE-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm2
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm3
; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm2
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm4
; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
; SSE-NEXT: pand {{.*}}(%rip), %xmm0
; SSE-NEXT: retq
;
@ -487,92 +487,92 @@ define <32 x i8> @_clearupper32xi8a(<32 x i8>) nounwind {
; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %r9d
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %r8d
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %edi
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm1
; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; SSE-NEXT: movd %esi, %xmm0
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
; SSE-NEXT: movd %ecx, %xmm2
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm2
; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
; SSE-NEXT: movd %edx, %xmm0
; SSE-NEXT: movd %esi, %xmm1
; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; SSE-NEXT: movd %edi, %xmm0
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
; SSE-NEXT: movd %edx, %xmm3
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm3
; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
; SSE-NEXT: movd %r9d, %xmm0
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm1
; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; SSE-NEXT: movd %r8d, %xmm0
; SSE-NEXT: movd %ecx, %xmm2
; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; SSE-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero
; SSE-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm2
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm3
; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm2
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm4
; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; SSE-NEXT: pand %xmm2, %xmm0
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm1
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %r9d
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %r8d
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %edi
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm3
; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
; SSE-NEXT: movd %esi, %xmm1
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
; SSE-NEXT: movd %ecx, %xmm4
; SSE-NEXT: movd %eax, %xmm1
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm4
; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
; SSE-NEXT: movd %edx, %xmm1
; SSE-NEXT: movd %esi, %xmm3
; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
; SSE-NEXT: movd %edi, %xmm1
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
; SSE-NEXT: movd %edx, %xmm5
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm1
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm5
; SSE-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm1[0],xmm5[1],xmm1[1],xmm5[2],xmm1[2],xmm5[3],xmm1[3],xmm5[4],xmm1[4],xmm5[5],xmm1[5],xmm5[6],xmm1[6],xmm5[7],xmm1[7]
; SSE-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3],xmm5[4],xmm3[4],xmm5[5],xmm3[5],xmm5[6],xmm3[6],xmm5[7],xmm3[7]
; SSE-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7]
; SSE-NEXT: movd %r9d, %xmm1
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm1
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm3
; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
; SSE-NEXT: movd %r8d, %xmm1
; SSE-NEXT: movd %ecx, %xmm4
; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3],xmm3[4],xmm5[4],xmm3[5],xmm5[5],xmm3[6],xmm5[6],xmm3[7],xmm5[7]
; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero
; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
; SSE-NEXT: movd {{.*#+}} xmm6 = mem[0],zero,zero,zero
; SSE-NEXT: movd {{.*#+}} xmm5 = mem[0],zero,zero,zero
; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1],xmm1[2],xmm5[2],xmm1[3],xmm5[3],xmm1[4],xmm5[4],xmm1[5],xmm5[5],xmm1[6],xmm5[6],xmm1[7],xmm5[7]
; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm4
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm5
; SSE-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7]
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm4
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm6
; SSE-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm4[0],xmm6[1],xmm4[1],xmm6[2],xmm4[2],xmm6[3],xmm4[3],xmm6[4],xmm4[4],xmm6[5],xmm4[5],xmm6[6],xmm4[6],xmm6[7],xmm4[7]
; SSE-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3],xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7]
; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7]
; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1],xmm1[2],xmm5[2],xmm1[3],xmm5[3],xmm1[4],xmm5[4],xmm1[5],xmm5[5],xmm1[6],xmm5[6],xmm1[7],xmm5[7]
; SSE-NEXT: pand %xmm2, %xmm1
; SSE-NEXT: retq
;
@ -1180,91 +1180,87 @@ define <32 x i8> @_clearupper32xi8b(<32 x i8>) nounwind {
;
; AVX1-LABEL: _clearupper32xi8b:
; AVX1: # BB#0:
; AVX1-NEXT: pushq %rbp
; AVX1-NEXT: pushq %r15
; AVX1-NEXT: pushq %r14
; AVX1-NEXT: pushq %r13
; AVX1-NEXT: pushq %r12
; AVX1-NEXT: pushq %rbx
; AVX1-NEXT: vpextrq $1, %xmm0, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: vmovq %xmm0, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
; AVX1-NEXT: movq -{{[0-9]+}}(%rsp), %rdx
; AVX1-NEXT: movq %rcx, %r8
; AVX1-NEXT: movq %rcx, %r9
; AVX1-NEXT: movq %rcx, %r10
; AVX1-NEXT: movq %rcx, %r11
; AVX1-NEXT: movq %rcx, %r14
; AVX1-NEXT: movq %rcx, %r15
; AVX1-NEXT: movq %rdx, %r12
; AVX1-NEXT: movq %rdx, %r13
; AVX1-NEXT: movq %rdx, %rdi
; AVX1-NEXT: movq %rdx, %rax
; AVX1-NEXT: movq -{{[0-9]+}}(%rsp), %r14
; AVX1-NEXT: vpextrq $1, %xmm0, %rdx
; AVX1-NEXT: movq %rdx, %r8
; AVX1-NEXT: movq %rdx, %r9
; AVX1-NEXT: movq %rdx, %r11
; AVX1-NEXT: movq %rdx, %rsi
; AVX1-NEXT: movq %rdx, %rbx
; AVX1-NEXT: movq %rdx, %rbp
; AVX1-NEXT: movq %rdx, %rdi
; AVX1-NEXT: movq %rdx, %rcx
; AVX1-NEXT: movq %rdx, %rax
; AVX1-NEXT: andb $15, %dl
; AVX1-NEXT: movb %dl, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: movq %rcx, %rdx
; AVX1-NEXT: andb $15, %cl
; AVX1-NEXT: movb %cl, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: shrq $56, %rbp
; AVX1-NEXT: andb $15, %bpl
; AVX1-NEXT: movb %bpl, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: shrq $48, %rbx
; AVX1-NEXT: andb $15, %bl
; AVX1-NEXT: movb %bl, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: shrq $40, %rsi
; AVX1-NEXT: andb $15, %sil
; AVX1-NEXT: movb %sil, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: shrq $32, %rax
; AVX1-NEXT: shrq $56, %rax
; AVX1-NEXT: andb $15, %al
; AVX1-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: shrq $24, %rdi
; AVX1-NEXT: movq %r14, %r10
; AVX1-NEXT: shrq $48, %rcx
; AVX1-NEXT: andb $15, %cl
; AVX1-NEXT: movb %cl, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: movq %r14, %rdx
; AVX1-NEXT: shrq $40, %rdi
; AVX1-NEXT: andb $15, %dil
; AVX1-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: shrq $16, %r13
; AVX1-NEXT: andb $15, %r13b
; AVX1-NEXT: movb %r13b, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: shrq $8, %r12
; AVX1-NEXT: andb $15, %r12b
; AVX1-NEXT: movb %r12b, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: shrq $8, %r8
; AVX1-NEXT: shrq $16, %r9
; AVX1-NEXT: shrq $24, %r10
; AVX1-NEXT: shrq $32, %r11
; AVX1-NEXT: shrq $40, %r14
; AVX1-NEXT: shrq $48, %r15
; AVX1-NEXT: shrq $56, %rdx
; AVX1-NEXT: andb $15, %dl
; AVX1-NEXT: movb %dl, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: andb $15, %r15b
; AVX1-NEXT: movb %r15b, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: andb $15, %r14b
; AVX1-NEXT: movb %r14b, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: movq %r14, %rax
; AVX1-NEXT: shrq $32, %rsi
; AVX1-NEXT: andb $15, %sil
; AVX1-NEXT: movb %sil, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: movq %r14, %rcx
; AVX1-NEXT: shrq $24, %r11
; AVX1-NEXT: andb $15, %r11b
; AVX1-NEXT: movb %r11b, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: andb $15, %r10b
; AVX1-NEXT: movb %r10b, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: movq %r14, %rsi
; AVX1-NEXT: shrq $16, %r9
; AVX1-NEXT: andb $15, %r9b
; AVX1-NEXT: movb %r9b, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: movq %r14, %rdi
; AVX1-NEXT: shrq $8, %r8
; AVX1-NEXT: andb $15, %r8b
; AVX1-NEXT: movb %r8b, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: movq %r14, %rbx
; AVX1-NEXT: andb $15, %r14b
; AVX1-NEXT: movb %r14b, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: shrq $8, %r10
; AVX1-NEXT: shrq $16, %rdx
; AVX1-NEXT: shrq $24, %rax
; AVX1-NEXT: shrq $32, %rcx
; AVX1-NEXT: shrq $40, %rsi
; AVX1-NEXT: shrq $48, %rdi
; AVX1-NEXT: shrq $56, %rbx
; AVX1-NEXT: andb $15, %bl
; AVX1-NEXT: movb %bl, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: andb $15, %dil
; AVX1-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: andb $15, %sil
; AVX1-NEXT: movb %sil, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: andb $15, %cl
; AVX1-NEXT: movb %cl, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: andb $15, %al
; AVX1-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: andb $15, %dl
; AVX1-NEXT: movb %dl, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: andb $15, %r10b
; AVX1-NEXT: movb %r10b, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: movq %rax, %rcx
; AVX1-NEXT: movq %rax, %r8
; AVX1-NEXT: movq %rax, %rdx
; AVX1-NEXT: movq %rax, %rsi
; AVX1-NEXT: movq %rax, %rdi
; AVX1-NEXT: movl %eax, %ebp
; AVX1-NEXT: movl %eax, %ebx
; AVX1-NEXT: movl %eax, %ecx
; AVX1-NEXT: vmovd %eax, %xmm1
; AVX1-NEXT: shrl $8, %eax
; AVX1-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
; AVX1-NEXT: shrl $16, %ebx
; AVX1-NEXT: vpinsrb $2, %ebx, %xmm1, %xmm1
; AVX1-NEXT: shrl $24, %ebp
; AVX1-NEXT: vpinsrb $3, %ebp, %xmm1, %xmm1
; AVX1-NEXT: shrl $16, %ecx
; AVX1-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1
; AVX1-NEXT: shrl $24, %ebx
; AVX1-NEXT: vpinsrb $3, %ebx, %xmm1, %xmm1
; AVX1-NEXT: shrq $32, %rdi
; AVX1-NEXT: vpinsrb $4, %edi, %xmm1, %xmm1
; AVX1-NEXT: shrq $40, %rsi
@ -1274,8 +1270,8 @@ define <32 x i8> @_clearupper32xi8b(<32 x i8>) nounwind {
; AVX1-NEXT: shrq $48, %rdx
; AVX1-NEXT: vpinsrb $6, %edx, %xmm1, %xmm1
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
; AVX1-NEXT: shrq $56, %rcx
; AVX1-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm0
; AVX1-NEXT: shrq $56, %r8
; AVX1-NEXT: vpinsrb $7, %r8d, %xmm1, %xmm0
; AVX1-NEXT: movl %eax, %ecx
; AVX1-NEXT: shrl $8, %ecx
; AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
@ -1343,100 +1339,92 @@ define <32 x i8> @_clearupper32xi8b(<32 x i8>) nounwind {
; AVX1-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: popq %rbx
; AVX1-NEXT: popq %r12
; AVX1-NEXT: popq %r13
; AVX1-NEXT: popq %r14
; AVX1-NEXT: popq %r15
; AVX1-NEXT: popq %rbp
; AVX1-NEXT: retq
;
; AVX2-LABEL: _clearupper32xi8b:
; AVX2: # BB#0:
; AVX2-NEXT: pushq %rbp
; AVX2-NEXT: pushq %r15
; AVX2-NEXT: pushq %r14
; AVX2-NEXT: pushq %r13
; AVX2-NEXT: pushq %r12
; AVX2-NEXT: pushq %rbx
; AVX2-NEXT: vpextrq $1, %xmm0, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: vmovq %xmm0, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
; AVX2-NEXT: movq -{{[0-9]+}}(%rsp), %rdx
; AVX2-NEXT: movq %rcx, %r8
; AVX2-NEXT: movq %rcx, %r9
; AVX2-NEXT: movq %rcx, %r10
; AVX2-NEXT: movq %rcx, %r11
; AVX2-NEXT: movq %rcx, %r14
; AVX2-NEXT: movq %rcx, %r15
; AVX2-NEXT: movq %rdx, %r12
; AVX2-NEXT: movq %rdx, %r13
; AVX2-NEXT: movq %rdx, %rdi
; AVX2-NEXT: movq %rdx, %rax
; AVX2-NEXT: movq -{{[0-9]+}}(%rsp), %r14
; AVX2-NEXT: vpextrq $1, %xmm0, %rdx
; AVX2-NEXT: movq %rdx, %r8
; AVX2-NEXT: movq %rdx, %r9
; AVX2-NEXT: movq %rdx, %r11
; AVX2-NEXT: movq %rdx, %rsi
; AVX2-NEXT: movq %rdx, %rbx
; AVX2-NEXT: movq %rdx, %rbp
; AVX2-NEXT: movq %rdx, %rdi
; AVX2-NEXT: movq %rdx, %rcx
; AVX2-NEXT: movq %rdx, %rax
; AVX2-NEXT: andb $15, %dl
; AVX2-NEXT: movb %dl, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq %rcx, %rdx
; AVX2-NEXT: andb $15, %cl
; AVX2-NEXT: movb %cl, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: shrq $56, %rbp
; AVX2-NEXT: andb $15, %bpl
; AVX2-NEXT: movb %bpl, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: shrq $48, %rbx
; AVX2-NEXT: andb $15, %bl
; AVX2-NEXT: movb %bl, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: shrq $40, %rsi
; AVX2-NEXT: andb $15, %sil
; AVX2-NEXT: movb %sil, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: shrq $32, %rax
; AVX2-NEXT: shrq $56, %rax
; AVX2-NEXT: andb $15, %al
; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: shrq $24, %rdi
; AVX2-NEXT: movq %r14, %r10
; AVX2-NEXT: shrq $48, %rcx
; AVX2-NEXT: andb $15, %cl
; AVX2-NEXT: movb %cl, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq %r14, %rdx
; AVX2-NEXT: shrq $40, %rdi
; AVX2-NEXT: andb $15, %dil
; AVX2-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: shrq $16, %r13
; AVX2-NEXT: andb $15, %r13b
; AVX2-NEXT: movb %r13b, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: shrq $8, %r12
; AVX2-NEXT: andb $15, %r12b
; AVX2-NEXT: movb %r12b, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: shrq $8, %r8
; AVX2-NEXT: shrq $16, %r9
; AVX2-NEXT: shrq $24, %r10
; AVX2-NEXT: shrq $32, %r11
; AVX2-NEXT: shrq $40, %r14
; AVX2-NEXT: shrq $48, %r15
; AVX2-NEXT: shrq $56, %rdx
; AVX2-NEXT: andb $15, %dl
; AVX2-NEXT: movb %dl, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: andb $15, %r15b
; AVX2-NEXT: movb %r15b, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: andb $15, %r14b
; AVX2-NEXT: movb %r14b, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq %r14, %rax
; AVX2-NEXT: shrq $32, %rsi
; AVX2-NEXT: andb $15, %sil
; AVX2-NEXT: movb %sil, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq %r14, %rcx
; AVX2-NEXT: shrq $24, %r11
; AVX2-NEXT: andb $15, %r11b
; AVX2-NEXT: movb %r11b, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: andb $15, %r10b
; AVX2-NEXT: movb %r10b, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq %r14, %rsi
; AVX2-NEXT: shrq $16, %r9
; AVX2-NEXT: andb $15, %r9b
; AVX2-NEXT: movb %r9b, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq %r14, %rdi
; AVX2-NEXT: shrq $8, %r8
; AVX2-NEXT: andb $15, %r8b
; AVX2-NEXT: movb %r8b, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq %r14, %rbx
; AVX2-NEXT: andb $15, %r14b
; AVX2-NEXT: movb %r14b, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: shrq $8, %r10
; AVX2-NEXT: shrq $16, %rdx
; AVX2-NEXT: shrq $24, %rax
; AVX2-NEXT: shrq $32, %rcx
; AVX2-NEXT: shrq $40, %rsi
; AVX2-NEXT: shrq $48, %rdi
; AVX2-NEXT: shrq $56, %rbx
; AVX2-NEXT: andb $15, %bl
; AVX2-NEXT: movb %bl, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: andb $15, %dil
; AVX2-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: andb $15, %sil
; AVX2-NEXT: movb %sil, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: andb $15, %cl
; AVX2-NEXT: movb %cl, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: andb $15, %al
; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: andb $15, %dl
; AVX2-NEXT: movb %dl, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: andb $15, %r10b
; AVX2-NEXT: movb %r10b, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: movq %rax, %rcx
; AVX2-NEXT: movq %rax, %r8
; AVX2-NEXT: movq %rax, %rdx
; AVX2-NEXT: movq %rax, %rsi
; AVX2-NEXT: movq %rax, %rdi
; AVX2-NEXT: movl %eax, %ebp
; AVX2-NEXT: movl %eax, %ebx
; AVX2-NEXT: movl %eax, %ecx
; AVX2-NEXT: vmovd %eax, %xmm1
; AVX2-NEXT: shrl $8, %eax
; AVX2-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
; AVX2-NEXT: shrl $16, %ebx
; AVX2-NEXT: vpinsrb $2, %ebx, %xmm1, %xmm1
; AVX2-NEXT: shrl $24, %ebp
; AVX2-NEXT: vpinsrb $3, %ebp, %xmm1, %xmm1
; AVX2-NEXT: shrl $16, %ecx
; AVX2-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1
; AVX2-NEXT: shrl $24, %ebx
; AVX2-NEXT: vpinsrb $3, %ebx, %xmm1, %xmm1
; AVX2-NEXT: shrq $32, %rdi
; AVX2-NEXT: vpinsrb $4, %edi, %xmm1, %xmm1
; AVX2-NEXT: shrq $40, %rsi
@ -1446,8 +1434,8 @@ define <32 x i8> @_clearupper32xi8b(<32 x i8>) nounwind {
; AVX2-NEXT: shrq $48, %rdx
; AVX2-NEXT: vpinsrb $6, %edx, %xmm1, %xmm1
; AVX2-NEXT: vpextrq $1, %xmm0, %rax
; AVX2-NEXT: shrq $56, %rcx
; AVX2-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm0
; AVX2-NEXT: shrq $56, %r8
; AVX2-NEXT: vpinsrb $7, %r8d, %xmm1, %xmm0
; AVX2-NEXT: movl %eax, %ecx
; AVX2-NEXT: shrl $8, %ecx
; AVX2-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
@ -1515,11 +1503,7 @@ define <32 x i8> @_clearupper32xi8b(<32 x i8>) nounwind {
; AVX2-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX2-NEXT: popq %rbx
; AVX2-NEXT: popq %r12
; AVX2-NEXT: popq %r13
; AVX2-NEXT: popq %r14
; AVX2-NEXT: popq %r15
; AVX2-NEXT: popq %rbp
; AVX2-NEXT: retq
%x4 = bitcast <32 x i8> %0 to <64 x i4>
%r0 = insertelement <64 x i4> %x4, i4 zeroinitializer, i32 1

View File

@ -1,20 +0,0 @@
; RUN: llc < %s -march=x86-64 -combiner-global-alias-analysis -combiner-alias-analysis
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
%struct.Hash_Key = type { [4 x i32], i32 }
@g_flipV_hashkey = external global %struct.Hash_Key, align 16 ; <%struct.Hash_Key*> [#uses=1]
define void @foo() nounwind {
%t0 = load i32, i32* undef, align 16 ; <i32> [#uses=1]
%t1 = load i32, i32* null, align 4 ; <i32> [#uses=1]
%t2 = srem i32 %t0, 32 ; <i32> [#uses=1]
%t3 = shl i32 1, %t2 ; <i32> [#uses=1]
%t4 = xor i32 %t3, %t1 ; <i32> [#uses=1]
store i32 %t4, i32* null, align 4
%t5 = getelementptr %struct.Hash_Key, %struct.Hash_Key* @g_flipV_hashkey, i64 0, i32 0, i64 0 ; <i32*> [#uses=2]
%t6 = load i32, i32* %t5, align 4 ; <i32> [#uses=1]
%t7 = shl i32 1, undef ; <i32> [#uses=1]
%t8 = xor i32 %t7, %t6 ; <i32> [#uses=1]
store i32 %t8, i32* %t5, align 4
unreachable
}

View File

@ -1,23 +0,0 @@
; RUN: llc < %s --combiner-alias-analysis --combiner-global-alias-analysis
; PR4880
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
target triple = "i386-pc-linux-gnu"
%struct.alst_node = type { %struct.node }
%struct.arg_node = type { %struct.node, i8*, %struct.alst_node* }
%struct.arglst_node = type { %struct.alst_node, %struct.arg_node*, %struct.arglst_node* }
%struct.lam_node = type { %struct.alst_node, %struct.arg_node*, %struct.alst_node* }
%struct.node = type { i32 (...)**, %struct.node* }
define i32 @._ZN8lam_node18resolve_name_clashEP8arg_nodeP9alst_node._ZNK8lam_nodeeqERK8exp_node._ZN11arglst_nodeD0Ev(%struct.lam_node* %this.this, %struct.arg_node* %outer_arg, %struct.alst_node* %env.cmp, %struct.arglst_node* %this, i32 %functionID) {
comb_entry:
%.SV59 = alloca %struct.node* ; <%struct.node**> [#uses=1]
%0 = load i32 (...)**, i32 (...)*** null, align 4 ; <i32 (...)**> [#uses=1]
%1 = getelementptr inbounds i32 (...)*, i32 (...)** %0, i32 3 ; <i32 (...)**> [#uses=1]
%2 = load i32 (...)*, i32 (...)** %1, align 4 ; <i32 (...)*> [#uses=1]
store %struct.node* undef, %struct.node** %.SV59
%3 = bitcast i32 (...)* %2 to i32 (%struct.node*)* ; <i32 (%struct.node*)*> [#uses=1]
%4 = tail call i32 %3(%struct.node* undef) ; <i32> [#uses=0]
unreachable
}

View File

@ -9,19 +9,22 @@ target triple = "i686-unknown-linux-gnu"
@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
; CHECK-LABEL: func:
; This tests whether eax is properly saved/restored around the lahf/sahf
; instruction sequences.
; This tests whether eax is properly saved/restored around the
; lahf/sahf instruction sequences. We make mem op volatile to prevent
; their reordering to avoid spills.
define i32 @func() {
entry:
%bval = load i8, i8* @b
%inc = add i8 %bval, 1
store i8 %inc, i8* @b
%cval = load i32, i32* @c
store volatile i8 %inc, i8* @b
%cval = load volatile i32, i32* @c
%inc1 = add nsw i32 %cval, 1
store i32 %inc1, i32* @c
%aval = load i8, i8* @a
store volatile i32 %inc1, i32* @c
%aval = load volatile i8, i8* @a
%inc2 = add i8 %aval, 1
store i8 %inc2, i8* @a
store volatile i8 %inc2, i8* @a
; Copy flags produced by the incb of %inc1 to a register, need to save+restore
; eax around it. The flags will be reused by %tobool.
; CHECK: pushl %eax

View File

@ -51,19 +51,11 @@ define void @merge_vec_element_store(<4 x double> %v, double* %ptr) {
}
;; TODO: FAST *should* be:
;; movups (%rdi), %xmm0
;; movups %xmm0, 40(%rdi)
;; ..but is not currently. See the UseAA FIXME in DAGCombiner.cpp
;; visitSTORE.
define void @merge_vec_load_and_stores(i64 *%ptr) {
; FAST-LABEL: merge_vec_load_and_stores:
; FAST: # BB#0:
; FAST-NEXT: movq (%rdi), %rax
; FAST-NEXT: movq 8(%rdi), %rcx
; FAST-NEXT: movq %rax, 40(%rdi)
; FAST-NEXT: movq %rcx, 48(%rdi)
; FAST-NEXT: movups (%rdi), %xmm0
; FAST-NEXT: movups %xmm0, 40(%rdi)
; FAST-NEXT: retq
;
; SLOW-LABEL: merge_vec_load_and_stores:

View File

@ -1,9 +1,9 @@
; RUN: llc -march=x86 < %s | FileCheck %s
; CHECK-LABEL: @bar
; CHECK: movl $1074339512,
; CHECK: movl $1374389535,
; CHECK: movl $1078523331,
; CHECK-DAG: movl $1074339512,
; CHECK-DAG: movl $1374389535,
; CHECK-DAG: movl $1078523331,
define void @bar() unnamed_addr {
entry-block:
%a = alloca double

View File

@ -16,19 +16,20 @@ target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
; CHECK-NEXT: movl 20(%esp), %edx
; CHECK-NEXT: paddd (%edx), %xmm0
; CHECK-NEXT: movdqa %xmm0, (%edx)
; CHECK-NEXT: movl (%edx), %esi
; CHECK-NEXT: movl 12(%edx), %edi
; CHECK-NEXT: movl 8(%edx), %ebx
; CHECK-NEXT: movl 4(%edx), %edx
; CHECK-NEXT: shll $4, %ecx
; CHECK-NEXT: movl (%edx), %esi
; CHECK-NEXT: movl 4(%edx), %edi
; CHECK-NEXT: shll $4, %ecx
; CHECK-NEXT: movl 8(%edx), %ebx
; CHECK-NEXT: movl 12(%edx), %edx
; CHECK-NEXT: movl %esi, 12(%eax,%ecx)
; CHECK-NEXT: movl %edx, (%eax,%ecx)
; CHECK-NEXT: movl %edi, (%eax,%ecx)
; CHECK-NEXT: movl %ebx, 8(%eax,%ecx)
; CHECK-NEXT: movl %edi, 4(%eax,%ecx)
; CHECK-NEXT: movl %edx, 4(%eax,%ecx)
; CHECK-NEXT: popl %esi
; CHECK-NEXT: popl %edi
; CHECK-NEXT: popl %ebx
; CHECK-NEXT: retl
define void @test_extractelement_legalization_storereuse(<4 x i32> %a, i32* nocapture %x, i32* nocapture readonly %y, i32 %i) #0 {
entry:
%0 = bitcast i32* %y to <4 x i32>*

View File

@ -6,45 +6,122 @@ define void @add(i256* %p, i256* %q) nounwind {
; X32-LABEL: add:
; X32: # BB#0:
; X32-NEXT: pushl %ebp
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %edi
; X32-NEXT: pushl %esi
; X32-NEXT: subl $16, %esp
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl 8(%ecx), %edi
; X32-NEXT: movl (%ecx), %esi
; X32-NEXT: movl 4(%ecx), %ebx
; X32-NEXT: movl 28(%eax), %edx
; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
; X32-NEXT: movl 24(%eax), %edx
; X32-NEXT: addl (%eax), %esi
; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill
; X32-NEXT: adcl 4(%eax), %ebx
; X32-NEXT: movl %ebx, (%esp) # 4-byte Spill
; X32-NEXT: adcl 8(%eax), %edi
; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill
; X32-NEXT: movl 20(%eax), %ebx
; X32-NEXT: movl 12(%eax), %esi
; X32-NEXT: movl 16(%eax), %edi
; X32-NEXT: adcl 12(%ecx), %esi
; X32-NEXT: adcl 16(%ecx), %edi
; X32-NEXT: adcl 20(%ecx), %ebx
; X32-NEXT: adcl 24(%ecx), %edx
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
; X32-NEXT: adcl 28(%ecx), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload
; X32-NEXT: movl %ebp, 8(%ecx)
; X32-NEXT: movl (%esp), %ebp # 4-byte Reload
; X32-NEXT: movl %ebp, 4(%ecx)
; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload
; X32-NEXT: movl %ebp, (%ecx)
; X32-NEXT: movl %esi, 12(%ecx)
; X32-NEXT: movl %edi, 16(%ecx)
; X32-NEXT: movl %ebx, 20(%ecx)
; X32-NEXT: movl %edx, 24(%ecx)
; X32-NEXT: movl %eax, 28(%ecx)
; X32-NEXT: addl $16, %esp
; X32-NEXT: subl $28, %esp
; X32-NEXT: movl 12(%ebp), %edi
; X32-NEXT: movl 8(%ebp), %eax
; X32-NEXT: movl (%eax), %ecx
; X32-NEXT: movl (%edi), %edx
; X32-NEXT: movl %ecx, %esi
; X32-NEXT: addl %edx, %esi
; X32-NEXT: movl 4(%edi), %ebx
; X32-NEXT: movl 4(%eax), %esi
; X32-NEXT: adcl %ebx, %esi
; X32-NEXT: pushl %eax
; X32-NEXT: seto %al
; X32-NEXT: lahf
; X32-NEXT: movl %eax, %esi
; X32-NEXT: popl %eax
; X32-NEXT: movl %esi, -32(%ebp) # 4-byte Spill
; X32-NEXT: movl %esi, -16(%ebp) # 4-byte Spill
; X32-NEXT: addl %edx, %ecx
; X32-NEXT: movl %ecx, -40(%ebp) # 4-byte Spill
; X32-NEXT: movl 8(%edi), %edx
; X32-NEXT: movl %edx, -28(%ebp) # 4-byte Spill
; X32-NEXT: movl 28(%edi), %ecx
; X32-NEXT: movl %ecx, -36(%ebp) # 4-byte Spill
; X32-NEXT: movl 24(%edi), %ecx
; X32-NEXT: movl %ecx, -20(%ebp) # 4-byte Spill
; X32-NEXT: movl 20(%edi), %ecx
; X32-NEXT: movl 16(%edi), %esi
; X32-NEXT: movl %esi, -24(%ebp) # 4-byte Spill
; X32-NEXT: movl 12(%edi), %edi
; X32-NEXT: adcl %ebx, 4(%eax)
; X32-NEXT: movl 8(%eax), %ebx
; X32-NEXT: movl -16(%ebp), %esi # 4-byte Reload
; X32-NEXT: pushl %eax
; X32-NEXT: movl %esi, %eax
; X32-NEXT: addb $127, %al
; X32-NEXT: sahf
; X32-NEXT: popl %eax
; X32-NEXT: adcl %edx, %ebx
; X32-NEXT: pushl %eax
; X32-NEXT: seto %al
; X32-NEXT: lahf
; X32-NEXT: movl %eax, %ebx
; X32-NEXT: popl %eax
; X32-NEXT: adcl %edi, 12(%eax)
; X32-NEXT: pushl %eax
; X32-NEXT: movl %ebx, %eax
; X32-NEXT: addb $127, %al
; X32-NEXT: sahf
; X32-NEXT: popl %eax
; X32-NEXT: adcl 12(%eax), %edi
; X32-NEXT: pushl %eax
; X32-NEXT: seto %al
; X32-NEXT: lahf
; X32-NEXT: movl %eax, %esi
; X32-NEXT: popl %eax
; X32-NEXT: movl 16(%eax), %ebx
; X32-NEXT: movl -24(%ebp), %edx # 4-byte Reload
; X32-NEXT: adcl %edx, %ebx
; X32-NEXT: pushl %eax
; X32-NEXT: seto %al
; X32-NEXT: lahf
; X32-NEXT: movl %eax, %ebx
; X32-NEXT: popl %eax
; X32-NEXT: pushl %eax
; X32-NEXT: seto %al
; X32-NEXT: lahf
; X32-NEXT: movl %eax, %edi
; X32-NEXT: popl %eax
; X32-NEXT: pushl %eax
; X32-NEXT: movl %esi, %eax
; X32-NEXT: addb $127, %al
; X32-NEXT: sahf
; X32-NEXT: popl %eax
; X32-NEXT: adcl %edx, 16(%eax)
; X32-NEXT: movl -32(%ebp), %edx # 4-byte Reload
; X32-NEXT: pushl %eax
; X32-NEXT: movl %edx, %eax
; X32-NEXT: addb $127, %al
; X32-NEXT: sahf
; X32-NEXT: popl %eax
; X32-NEXT: movl -28(%ebp), %edx # 4-byte Reload
; X32-NEXT: adcl %edx, 8(%eax)
; X32-NEXT: pushl %eax
; X32-NEXT: movl %edi, %eax
; X32-NEXT: addb $127, %al
; X32-NEXT: sahf
; X32-NEXT: popl %eax
; X32-NEXT: adcl %ecx, 20(%eax)
; X32-NEXT: pushl %eax
; X32-NEXT: movl %ebx, %eax
; X32-NEXT: addb $127, %al
; X32-NEXT: sahf
; X32-NEXT: popl %eax
; X32-NEXT: adcl 20(%eax), %ecx
; X32-NEXT: pushl %eax
; X32-NEXT: seto %al
; X32-NEXT: lahf
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: popl %eax
; X32-NEXT: movl -20(%ebp), %edx # 4-byte Reload
; X32-NEXT: adcl %edx, 24(%eax)
; X32-NEXT: pushl %eax
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: addb $127, %al
; X32-NEXT: sahf
; X32-NEXT: popl %eax
; X32-NEXT: adcl 24(%eax), %edx
; X32-NEXT: movl -36(%ebp), %ecx # 4-byte Reload
; X32-NEXT: adcl %ecx, 28(%eax)
; X32-NEXT: movl -40(%ebp), %ecx # 4-byte Reload
; X32-NEXT: movl %ecx, (%eax)
; X32-NEXT: addl $28, %esp
; X32-NEXT: popl %esi
; X32-NEXT: popl %edi
; X32-NEXT: popl %ebx
@ -53,18 +130,28 @@ define void @add(i256* %p, i256* %q) nounwind {
;
; X64-LABEL: add:
; X64: # BB#0:
; X64-NEXT: movq 16(%rdi), %rax
; X64-NEXT: movq (%rdi), %r8
; X64-NEXT: movq 8(%rdi), %rdx
; X64-NEXT: movq 24(%rsi), %rcx
; X64-NEXT: addq (%rsi), %r8
; X64-NEXT: adcq 8(%rsi), %rdx
; X64-NEXT: adcq 16(%rsi), %rax
; X64-NEXT: adcq 24(%rdi), %rcx
; X64-NEXT: movq %rax, 16(%rdi)
; X64-NEXT: movq %rdx, 8(%rdi)
; X64-NEXT: movq %r8, (%rdi)
; X64-NEXT: movq %rcx, 24(%rdi)
; X64-NEXT: pushq %rbp
; X64-NEXT: movq %rsp, %rbp
; X64-NEXT: movq (%rdi), %rdx
; X64-NEXT: movq 8(%rdi), %r9
; X64-NEXT: movq 24(%rsi), %r8
; X64-NEXT: movq 8(%rsi), %r10
; X64-NEXT: movq 16(%rsi), %rcx
; X64-NEXT: movq (%rsi), %rsi
; X64-NEXT: movq %rdx, %rax
; X64-NEXT: addq %rsi, %rax
; X64-NEXT: adcq %r10, 8(%rdi)
; X64-NEXT: addq %rsi, %rdx
; X64-NEXT: adcq %r10, %r9
; X64-NEXT: pushfq
; X64-NEXT: popq %rax
; X64-NEXT: adcq %rcx, 16(%rdi)
; X64-NEXT: pushq %rax
; X64-NEXT: popfq
; X64-NEXT: adcq 16(%rdi), %rcx
; X64-NEXT: adcq %r8, 24(%rdi)
; X64-NEXT: movq %rdx, (%rdi)
; X64-NEXT: popq %rbp
; X64-NEXT: retq
%a = load i256, i256* %p
%b = load i256, i256* %q
@ -76,43 +163,110 @@ define void @sub(i256* %p, i256* %q) nounwind {
; X32-LABEL: sub:
; X32: # BB#0:
; X32-NEXT: pushl %ebp
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %edi
; X32-NEXT: pushl %esi
; X32-NEXT: subl $12, %esp
; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl 16(%ecx), %eax
; X32-NEXT: movl 12(%ecx), %edx
; X32-NEXT: movl 8(%ecx), %edi
; X32-NEXT: movl (%ecx), %esi
; X32-NEXT: movl 4(%ecx), %ebp
; X32-NEXT: subl (%ebx), %esi
; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill
; X32-NEXT: sbbl 4(%ebx), %ebp
; X32-NEXT: sbbl 8(%ebx), %edi
; X32-NEXT: sbbl 12(%ebx), %edx
; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
; X32-NEXT: sbbl 16(%ebx), %eax
; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
; X32-NEXT: movl 20(%ecx), %esi
; X32-NEXT: sbbl 20(%ebx), %esi
; X32-NEXT: subl $24, %esp
; X32-NEXT: movl 12(%ebp), %edi
; X32-NEXT: movl 8(%ebp), %ecx
; X32-NEXT: movl (%ecx), %eax
; X32-NEXT: movl 4(%ecx), %edx
; X32-NEXT: movl (%edi), %esi
; X32-NEXT: cmpl %esi, %eax
; X32-NEXT: movl 4(%edi), %ebx
; X32-NEXT: sbbl %ebx, %edx
; X32-NEXT: pushl %eax
; X32-NEXT: seto %al
; X32-NEXT: lahf
; X32-NEXT: movl %eax, %edx
; X32-NEXT: popl %eax
; X32-NEXT: movl %edx, -24(%ebp) # 4-byte Spill
; X32-NEXT: movl %edx, -16(%ebp) # 4-byte Spill
; X32-NEXT: subl %esi, %eax
; X32-NEXT: movl %eax, -36(%ebp) # 4-byte Spill
; X32-NEXT: movl 8(%edi), %esi
; X32-NEXT: movl 28(%edi), %eax
; X32-NEXT: movl %eax, -32(%ebp) # 4-byte Spill
; X32-NEXT: movl 24(%edi), %eax
; X32-NEXT: movl %eax, -28(%ebp) # 4-byte Spill
; X32-NEXT: movl 20(%edi), %eax
; X32-NEXT: movl %eax, -20(%ebp) # 4-byte Spill
; X32-NEXT: movl 16(%edi), %edx
; X32-NEXT: movl 12(%edi), %edi
; X32-NEXT: sbbl %ebx, 4(%ecx)
; X32-NEXT: movl 8(%ecx), %ebx
; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload
; X32-NEXT: movl %eax, %eax
; X32-NEXT: addb $127, %al
; X32-NEXT: sahf
; X32-NEXT: sbbl %esi, %ebx
; X32-NEXT: pushl %eax
; X32-NEXT: seto %al
; X32-NEXT: lahf
; X32-NEXT: movl %eax, %ebx
; X32-NEXT: popl %eax
; X32-NEXT: sbbl %edi, 12(%ecx)
; X32-NEXT: movl 12(%ecx), %eax
; X32-NEXT: pushl %eax
; X32-NEXT: movl %ebx, %eax
; X32-NEXT: addb $127, %al
; X32-NEXT: sahf
; X32-NEXT: popl %eax
; X32-NEXT: sbbl %edi, %eax
; X32-NEXT: seto %al
; X32-NEXT: lahf
; X32-NEXT: movl %eax, %eax
; X32-NEXT: movl 16(%ecx), %edi
; X32-NEXT: sbbl %edx, %edi
; X32-NEXT: pushl %eax
; X32-NEXT: seto %al
; X32-NEXT: lahf
; X32-NEXT: movl %eax, %edi
; X32-NEXT: popl %eax
; X32-NEXT: pushl %eax
; X32-NEXT: seto %al
; X32-NEXT: lahf
; X32-NEXT: movl %eax, %ebx
; X32-NEXT: popl %eax
; X32-NEXT: movl %eax, %eax
; X32-NEXT: addb $127, %al
; X32-NEXT: sahf
; X32-NEXT: sbbl %edx, 16(%ecx)
; X32-NEXT: movl -24(%ebp), %eax # 4-byte Reload
; X32-NEXT: movl %eax, %eax
; X32-NEXT: addb $127, %al
; X32-NEXT: sahf
; X32-NEXT: sbbl %esi, 8(%ecx)
; X32-NEXT: pushl %eax
; X32-NEXT: movl %ebx, %eax
; X32-NEXT: addb $127, %al
; X32-NEXT: sahf
; X32-NEXT: popl %eax
; X32-NEXT: movl -20(%ebp), %edx # 4-byte Reload
; X32-NEXT: sbbl %edx, 20(%ecx)
; X32-NEXT: movl 20(%ecx), %eax
; X32-NEXT: pushl %eax
; X32-NEXT: movl %edi, %eax
; X32-NEXT: addb $127, %al
; X32-NEXT: sahf
; X32-NEXT: popl %eax
; X32-NEXT: sbbl %edx, %eax
; X32-NEXT: seto %al
; X32-NEXT: lahf
; X32-NEXT: movl %eax, %eax
; X32-NEXT: movl -28(%ebp), %esi # 4-byte Reload
; X32-NEXT: sbbl %esi, 24(%ecx)
; X32-NEXT: movl 24(%ecx), %edx
; X32-NEXT: sbbl 24(%ebx), %edx
; X32-NEXT: movl 28(%ecx), %eax
; X32-NEXT: sbbl 28(%ebx), %eax
; X32-NEXT: movl %edi, 8(%ecx)
; X32-NEXT: movl %ebp, 4(%ecx)
; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload
; X32-NEXT: movl %edi, (%ecx)
; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload
; X32-NEXT: movl %edi, 12(%ecx)
; X32-NEXT: movl (%esp), %edi # 4-byte Reload
; X32-NEXT: movl %edi, 16(%ecx)
; X32-NEXT: movl %esi, 20(%ecx)
; X32-NEXT: movl %edx, 24(%ecx)
; X32-NEXT: movl %eax, 28(%ecx)
; X32-NEXT: addl $12, %esp
; X32-NEXT: movl %eax, %eax
; X32-NEXT: addb $127, %al
; X32-NEXT: sahf
; X32-NEXT: sbbl %esi, %edx
; X32-NEXT: movl -32(%ebp), %eax # 4-byte Reload
; X32-NEXT: sbbl %eax, 28(%ecx)
; X32-NEXT: movl -36(%ebp), %eax # 4-byte Reload
; X32-NEXT: movl %eax, (%ecx)
; X32-NEXT: addl $24, %esp
; X32-NEXT: popl %esi
; X32-NEXT: popl %edi
; X32-NEXT: popl %ebx
@ -121,18 +275,28 @@ define void @sub(i256* %p, i256* %q) nounwind {
;
; X64-LABEL: sub:
; X64: # BB#0:
; X64-NEXT: movq 24(%rdi), %r8
; X64-NEXT: movq 16(%rdi), %rcx
; X64-NEXT: movq (%rdi), %rdx
; X64-NEXT: movq 8(%rdi), %rax
; X64-NEXT: subq (%rsi), %rdx
; X64-NEXT: sbbq 8(%rsi), %rax
; X64-NEXT: sbbq 16(%rsi), %rcx
; X64-NEXT: sbbq 24(%rsi), %r8
; X64-NEXT: movq %rcx, 16(%rdi)
; X64-NEXT: movq %rax, 8(%rdi)
; X64-NEXT: movq %rdx, (%rdi)
; X64-NEXT: movq %r8, 24(%rdi)
; X64-NEXT: pushq %rbp
; X64-NEXT: movq %rsp, %rbp
; X64-NEXT: movq (%rdi), %rax
; X64-NEXT: movq 8(%rdi), %rcx
; X64-NEXT: movq 24(%rsi), %r8
; X64-NEXT: movq 8(%rsi), %rdx
; X64-NEXT: movq 16(%rsi), %r9
; X64-NEXT: movq (%rsi), %rsi
; X64-NEXT: cmpq %rsi, %rax
; X64-NEXT: sbbq %rdx, 8(%rdi)
; X64-NEXT: subq %rsi, %rax
; X64-NEXT: sbbq %rdx, %rcx
; X64-NEXT: pushfq
; X64-NEXT: popq %rcx
; X64-NEXT: sbbq %r9, 16(%rdi)
; X64-NEXT: movq 16(%rdi), %rdx
; X64-NEXT: pushq %rcx
; X64-NEXT: popfq
; X64-NEXT: sbbq %r9, %rdx
; X64-NEXT: sbbq %r8, 24(%rdi)
; X64-NEXT: movq %rax, (%rdi)
; X64-NEXT: popq %rbp
; X64-NEXT: retq
%a = load i256, i256* %p
%b = load i256, i256* %q

View File

@ -55,8 +55,7 @@ target triple = "i386-apple-macosx10.5"
;
; CHECK-NEXT: L_e$non_lazy_ptr, [[E:%[a-z]+]]
; CHECK-NEXT: movb [[D]], ([[E]])
; CHECK-NEXT: L_f$non_lazy_ptr, [[F:%[a-z]+]]
; CHECK-NEXT: movsbl ([[F]]), [[CONV:%[a-z]+]]
; CHECK-NEXT: movsbl ([[E]]), [[CONV:%[a-z]+]]
; CHECK-NEXT: movl $6, [[CONV:%[a-z]+]]
; The eflags is used in the next instruction.
; If that instruction disappear, we are not exercising the bug
@ -96,7 +95,7 @@ for.end: ; preds = %for.cond.preheader
%.b3 = load i1, i1* @d, align 1
%tmp2 = select i1 %.b3, i8 0, i8 6
store i8 %tmp2, i8* @e, align 1
%tmp3 = load i8, i8* @f, align 1
%tmp3 = load i8, i8* @e, align 1
%conv = sext i8 %tmp3 to i32
%add = add nsw i32 %conv, 1
%rem = srem i32 %tmp1, %add

View File

@ -65,20 +65,20 @@ define void @i24_insert_bit(i24* %a, i1 zeroext %bit) {
define void @i56_or(i56* %a) {
; CHECK-LABEL: i56_or:
; CHECK: # BB#0:
; ACHECK-NEXT: movzwl 4(%rdi), %eax
; ACHECK-NEXT: movzbl 6(%rdi), %ecx
; ACHECK-NEXT: movl (%rdi), %edx
; ACHECK-NEXT: movb %cl, 6(%rdi)
; ACHECK-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<kill> %RCX<def>
; ACHECK-NEXT: shll $16, %ecx
; ACHECK-NEXT: orl %eax, %ecx
; ACHECK-NEXT: shlq $32, %rcx
; ACHECK-NEXT: orq %rcx, %rdx
; ACHECK-NEXT: orq $384, %rdx # imm = 0x180
; ACHECK-NEXT: movl %edx, (%rdi)
; ACHECK-NEXT: shrq $32, %rdx
; ACHECK-NEXT: movw %dx, 4(%rdi)
; ACHECK-NEXT: retq
; CHECK-NEXT: movzwl 4(%rdi), %eax
; CHECK-NEXT: movzbl 6(%rdi), %ecx
; CHECK-NEXT: movl (%rdi), %edx
; CHECK-NEXT: movb %cl, 6(%rdi)
; CHECK-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<kill> %RCX<def>
; CHECK-NEXT: shll $16, %ecx
; CHECK-NEXT: orl %eax, %ecx
; CHECK-NEXT: shlq $32, %rcx
; CHECK-NEXT: orq %rcx, %rdx
; CHECK-NEXT: orq $384, %rdx # imm = 0x180
; CHECK-NEXT: movl %edx, (%rdi)
; CHECK-NEXT: shrq $32, %rdx
; CHECK-NEXT: movw %dx, 4(%rdi)
; CHECK-NEXT: retq
%aa = load i56, i56* %a, align 1
%b = or i56 %aa, 384
store i56 %b, i56* %a, align 1
@ -100,10 +100,10 @@ define void @i56_and_or(i56* %a) {
; CHECK-NEXT: andq %rax, %rcx
; CHECK-NEXT: movl %ecx, (%rdi)
; CHECK-NEXT: movq %rcx, %rax
; CHECK-NEXT: shrq $48, %rax
; CHECK-NEXT: movb %al, 6(%rdi)
; CHECK-NEXT: shrq $32, %rcx
; CHECK-NEXT: movw %cx, 4(%rdi)
; CHECK-NEXT: shrq $32, %rax
; CHECK-NEXT: movw %ax, 4(%rdi)
; CHECK-NEXT: shrq $48, %rcx
; CHECK-NEXT: movb %cl, 6(%rdi)
; CHECK-NEXT: retq
%b = load i56, i56* %a, align 1
%c = and i56 %b, -128
@ -126,10 +126,10 @@ define void @i56_insert_bit(i56* %a, i1 zeroext %bit) {
; CHECK-NEXT: shlq $13, %rax
; CHECK-NEXT: movabsq $72057594037919743, %rdx # imm = 0xFFFFFFFFFFDFFF
; CHECK-NEXT: andq %rcx, %rdx
; CHECK-NEXT: orq %rdx, %rax
; CHECK-NEXT: orq %rdx, %ra
; CHECK-NEXT: movl %eax, (%rdi)
; CHECK-NEXT: shrq $48, %rdx
; CHECK-NEXT: movb %dl, 6(%rdi)
; CHECK-NEXT: movl %eax, (%rdi)
; CHECK-NEXT: shrq $32, %rax
; CHECK-NEXT: movw %ax, 4(%rdi)
; CHECK-NEXT: retq

View File

@ -1,7 +1,6 @@
; RUN: llc -march=x86-64 < %s | FileCheck %s
; RUN: llc -march=x86-64 < %s
; Check for a sane output. This testcase used to crash. See PR29132.
; CHECK: leal -1
; This testcase used to crash. See PR29132.
target triple = "x86_64-unknown-linux-gnu"

View File

@ -7,10 +7,8 @@ define void @test(i64* %P) nounwind {
; CHECK: # BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl (%eax), %ecx
; CHECK-NEXT: movl 4(%eax), %edx
; CHECK-NEXT: xorl $1, %ecx
; CHECK-NEXT: orl $2, %ecx
; CHECK-NEXT: movl %edx, 4(%eax)
; CHECK-NEXT: movl %ecx, (%eax)
; CHECK-NEXT: retl
%tmp1 = load i64, i64* %P, align 8

View File

@ -1037,12 +1037,12 @@ define <2 x i64> @merge_2i64_i64_12_volatile(i64* %ptr) nounwind uwtable noinlin
define <4 x float> @merge_4f32_f32_2345_volatile(float* %ptr) nounwind uwtable noinline ssp {
; SSE2-LABEL: merge_4f32_f32_2345_volatile:
; SSE2: # BB#0:
; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE2-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSE2-NEXT: retq
;
@ -1065,13 +1065,13 @@ define <4 x float> @merge_4f32_f32_2345_volatile(float* %ptr) nounwind uwtable n
; X32-SSE1-LABEL: merge_4f32_f32_2345_volatile:
; X32-SSE1: # BB#0:
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-SSE1-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X32-SSE1-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; X32-SSE1-DAG: movss 8(%eax), %[[R0:xmm[0-3]]] # [[R0]] = mem[0],zero,zero,zero
; X32-SSE1-DAG: movss 12(%eax), %[[R1:xmm[0-3]]] # [[R1]] = mem[0],zero,zero,zero
; X32-SSE1-DAG: movss 16(%eax), %[[R2:xmm[0-3]]] # [[R2]] = mem[0],zero,zero,zero
; X32-SSE1-DAG: movss 20(%eax), %[[R3:xmm[0-3]]] # [[R3]] = mem[0],zero,zero,zero
; X32-SSE1-DAG: unpcklps %[[R2]], %[[R0]] # [[R0]] = [[R0]][0],[[R2]][0],[[R0]][1],[[R2]][1]
; X32-SSE1-DAG: unpcklps %[[R3]], %[[R1]] # [[R1]] = [[R1]][0],[[R3]][0],[[R1]][1],[[R3]][1]
; X32-SSE1-DAG: unpcklps %[[R1]], %[[R0]] # [[R0]] = [[R0]][0],[[R1]][0],[[R0]][1],[[R1]][1]
; X32-SSE1-NEXT: retl
;
; X32-SSE41-LABEL: merge_4f32_f32_2345_volatile:

View File

@ -668,10 +668,10 @@ define <16 x i16> @merge_16i16_i16_0uu3zzuuuuuzCuEF_volatile(i16* %ptr) nounwind
; AVX1: # BB#0:
; AVX1-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX1-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm1
; AVX1-NEXT: vpinsrw $3, 6(%rdi), %xmm1, %xmm1
; AVX1-NEXT: vpinsrw $4, 24(%rdi), %xmm0, %xmm0
; AVX1-NEXT: vpinsrw $6, 28(%rdi), %xmm0, %xmm0
; AVX1-NEXT: vpinsrw $7, 30(%rdi), %xmm0, %xmm0
; AVX1-NEXT: vpinsrw $3, 6(%rdi), %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
@ -679,10 +679,10 @@ define <16 x i16> @merge_16i16_i16_0uu3zzuuuuuzCuEF_volatile(i16* %ptr) nounwind
; AVX2: # BB#0:
; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX2-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm1
; AVX2-NEXT: vpinsrw $3, 6(%rdi), %xmm1, %xmm1
; AVX2-NEXT: vpinsrw $4, 24(%rdi), %xmm0, %xmm0
; AVX2-NEXT: vpinsrw $6, 28(%rdi), %xmm0, %xmm0
; AVX2-NEXT: vpinsrw $7, 30(%rdi), %xmm0, %xmm0
; AVX2-NEXT: vpinsrw $3, 6(%rdi), %xmm1, %xmm1
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX2-NEXT: retq
;
@ -690,10 +690,10 @@ define <16 x i16> @merge_16i16_i16_0uu3zzuuuuuzCuEF_volatile(i16* %ptr) nounwind
; AVX512F: # BB#0:
; AVX512F-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm1
; AVX512F-NEXT: vpinsrw $3, 6(%rdi), %xmm1, %xmm1
; AVX512F-NEXT: vpinsrw $4, 24(%rdi), %xmm0, %xmm0
; AVX512F-NEXT: vpinsrw $6, 28(%rdi), %xmm0, %xmm0
; AVX512F-NEXT: vpinsrw $7, 30(%rdi), %xmm0, %xmm0
; AVX512F-NEXT: vpinsrw $3, 6(%rdi), %xmm1, %xmm1
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX512F-NEXT: retq
;
@ -702,10 +702,10 @@ define <16 x i16> @merge_16i16_i16_0uu3zzuuuuuzCuEF_volatile(i16* %ptr) nounwind
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; X32-AVX-NEXT: vpinsrw $0, (%eax), %xmm0, %xmm1
; X32-AVX-NEXT: vpinsrw $3, 6(%eax), %xmm1, %xmm1
; X32-AVX-NEXT: vpinsrw $4, 24(%eax), %xmm0, %xmm0
; X32-AVX-NEXT: vpinsrw $6, 28(%eax), %xmm0, %xmm0
; X32-AVX-NEXT: vpinsrw $7, 30(%eax), %xmm0, %xmm0
; X32-AVX-NEXT: vpinsrw $3, 6(%eax), %xmm1, %xmm1
; X32-AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; X32-AVX-NEXT: retl
%ptr0 = getelementptr inbounds i16, i16* %ptr, i64 0

View File

@ -21,11 +21,11 @@
; DBGDAG-DAG: [[LD2:t[0-9]+]]: i16,ch = load<LD2[%tmp81](align=1)> [[ENTRYTOKEN]], [[BASEPTR]], undef:i64
; DBGDAG-DAG: [[LD1:t[0-9]+]]: i8,ch = load<LD1[%tmp12]> [[ENTRYTOKEN]], [[ADDPTR]], undef:i64
; DBGDAG: [[LOADTOKEN:t[0-9]+]]: ch = TokenFactor [[LD2]]:1, [[LD1]]:1
; DBGDAG-DAG: [[ST1:t[0-9]+]]: ch = store<ST1[%tmp14]> [[ENTRYTOKEN]], [[LD1]], t{{[0-9]+}}, undef:i64
; DBGDAG-DAG: [[LOADTOKEN:t[0-9]+]]: ch = TokenFactor [[LD2]]:1, [[LD1]]:1
; DBGDAG-DAG: [[ST2:t[0-9]+]]: ch = store<ST2[%tmp10](align=1)> [[LOADTOKEN]], [[LD2]], t{{[0-9]+}}, undef:i64
; DBGDAG-DAG: [[ST1:t[0-9]+]]: ch = store<ST1[%tmp14]> [[ST2]], [[LD1]], t{{[0-9]+}}, undef:i64
; DBGDAG: X86ISD::RET_FLAG [[ST1]],
; DBGDAG: X86ISD::RET_FLAG t{{[0-9]+}},
; DBGDAG: Type-legalized selection DAG: BB#0 'merge_store_partial_overlap_load:'
define void @merge_store_partial_overlap_load([4 x i8]* %tmp) {

View File

@ -1,31 +0,0 @@
; RUN: llc < %s -mtriple x86_64-apple-macosx10.9.0 | FileCheck %s
; PR18023
; CHECK: movabsq $4294967296, %rcx
; CHECK: movq %rcx, (%rax)
; CHECK: movl $1, 4(%rax)
; CHECK: movl $0, 4(%rax)
; CHECK: movq $1, 4(%rax)
@c = common global i32 0, align 4
@a = common global [3 x i32] zeroinitializer, align 4
@b = common global i32 0, align 4
@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
define void @func() {
store i32 1, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @a, i64 0, i64 1), align 4
store i32 0, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @a, i64 0, i64 0), align 4
%1 = load volatile i32, i32* @b, align 4
store i32 1, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @a, i64 0, i64 1), align 4
store i32 0, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @a, i64 0, i64 1), align 4
%2 = load volatile i32, i32* @b, align 4
store i32 1, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @a, i64 0, i64 1), align 4
store i32 0, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @a, i64 0, i64 2), align 4
%3 = load volatile i32, i32* @b, align 4
store i32 3, i32* @c, align 4
%4 = load i32, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @a, i64 0, i64 1), align 4
%call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 %4)
ret void
}
declare i32 @printf(i8*, ...)

View File

@ -1,8 +1,8 @@
; RUN: llc -mtriple=x86_64-unknown-unknown -force-split-store < %s | FileCheck %s
; CHECK-LABEL: int32_float_pair
; CHECK: movl %edi, (%rsi)
; CHECK: movss %xmm0, 4(%rsi)
; CHECK-DAG: movl %edi, (%rsi)
; CHECK-DAG: movss %xmm0, 4(%rsi)
define void @int32_float_pair(i32 %tmp1, float %tmp2, i64* %ref.tmp) {
entry:
%t0 = bitcast float %tmp2 to i32
@ -15,8 +15,8 @@ entry:
}
; CHECK-LABEL: float_int32_pair
; CHECK: movss %xmm0, (%rsi)
; CHECK: movl %edi, 4(%rsi)
; CHECK-DAG: movss %xmm0, (%rsi)
; CHECK-DAG: movl %edi, 4(%rsi)
define void @float_int32_pair(float %tmp1, i32 %tmp2, i64* %ref.tmp) {
entry:
%t0 = bitcast float %tmp1 to i32
@ -29,9 +29,9 @@ entry:
}
; CHECK-LABEL: int16_float_pair
; CHECK: movzwl %di, %eax
; CHECK: movl %eax, (%rsi)
; CHECK: movss %xmm0, 4(%rsi)
; CHECK-DAG: movzwl %di, %eax
; CHECK-DAG: movl %eax, (%rsi)
; CHECK-DAG: movss %xmm0, 4(%rsi)
define void @int16_float_pair(i16 signext %tmp1, float %tmp2, i64* %ref.tmp) {
entry:
%t0 = bitcast float %tmp2 to i32
@ -44,9 +44,9 @@ entry:
}
; CHECK-LABEL: int8_float_pair
; CHECK: movzbl %dil, %eax
; CHECK: movl %eax, (%rsi)
; CHECK: movss %xmm0, 4(%rsi)
; CHECK-DAG: movzbl %dil, %eax
; CHECK-DAG: movl %eax, (%rsi)
; CHECK-DAG: movss %xmm0, 4(%rsi)
define void @int8_float_pair(i8 signext %tmp1, float %tmp2, i64* %ref.tmp) {
entry:
%t0 = bitcast float %tmp2 to i32
@ -146,10 +146,9 @@ entry:
; CHECK: movw %di, (%rdx)
; CHECK: shrl $16, %edi
; CHECK: movb %dil, 2(%rdx)
; CHECK: movl %esi, %eax
; CHECK: shrl $16, %eax
; CHECK: movb %al, 6(%rdx)
; CHECK: movw %si, 4(%rdx)
; CHECK: movw %si, 4(%rdx)
; CHECK: shrl $16, %esi
; CHECK: movb %sil, 6(%rdx)
define void @int24_int24_pair(i24 signext %tmp1, i24 signext %tmp2, i48* %ref.tmp) {
entry:
%t1 = zext i24 %tmp2 to i48

View File

@ -13,9 +13,9 @@ target triple = "x86_64-unknown-linux-gnu"
;; the same result in memory in the end.
; CHECK-LABEL: redundant_stores_merging:
; CHECK: movl $123, e+8(%rip)
; CHECK: movabsq $1958505086977, %rax
; CHECK: movabsq $528280977409, %rax
; CHECK: movq %rax, e+4(%rip)
; CHECK: movl $456, e+8(%rip)
define void @redundant_stores_merging() {
entry:
store i32 1, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 1), align 4
@ -26,9 +26,9 @@ entry:
;; This variant tests PR25154.
; CHECK-LABEL: redundant_stores_merging_reverse:
; CHECK: movl $123, e+8(%rip)
; CHECK: movabsq $1958505086977, %rax
; CHECK: movabsq $528280977409, %rax
; CHECK: movq %rax, e+4(%rip)
; CHECK: movl $456, e+8(%rip)
define void @redundant_stores_merging_reverse() {
entry:
store i32 123, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 2), align 4
@ -45,9 +45,8 @@ entry:
;; a movl, after the store to 3).
;; CHECK-LABEL: overlapping_stores_merging:
;; CHECK: movw $0, b+2(%rip)
;; CHECK: movl $1, b(%rip)
;; CHECK: movw $2, b+3(%rip)
;; CHECK: movw $1, b(%rip)
define void @overlapping_stores_merging() {
entry:
store i16 0, i16* bitcast (i8* getelementptr inbounds ([8 x i8], [8 x i8]* @b, i64 0, i64 2) to i16*), align 2

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
@ -13,16 +14,16 @@ define <4 x double> @var_shuffle_v4f64_v4f64_xxxx_i64(<4 x double> %x, i64 %i0,
; ALL-NEXT: movq %rsp, %rbp
; ALL-NEXT: andq $-32, %rsp
; ALL-NEXT: subq $64, %rsp
; ALL-NEXT: andl $3, %ecx
; ALL-NEXT: andl $3, %edx
; ALL-NEXT: andl $3, %esi
; ALL-NEXT: andl $3, %edi
; ALL-NEXT: andl $3, %ecx
; ALL-NEXT: andl $3, %edx
; ALL-NEXT: vmovaps %ymm0, (%rsp)
; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; ALL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; ALL-NEXT: movq %rbp, %rsp
; ALL-NEXT: popq %rbp
; ALL-NEXT: retq
@ -68,16 +69,16 @@ define <4 x double> @var_shuffle_v4f64_v4f64_uxx0_i64(<4 x double> %x, i64 %i0,
define <4 x double> @var_shuffle_v4f64_v2f64_xxxx_i64(<2 x double> %x, i64 %i0, i64 %i1, i64 %i2, i64 %i3) nounwind {
; ALL-LABEL: var_shuffle_v4f64_v2f64_xxxx_i64:
; ALL: # BB#0:
; ALL-NEXT: andl $1, %ecx
; ALL-NEXT: andl $1, %edx
; ALL-NEXT: andl $1, %esi
; ALL-NEXT: andl $1, %edi
; ALL-NEXT: andl $1, %ecx
; ALL-NEXT: andl $1, %edx
; ALL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; ALL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; ALL-NEXT: retq
%x0 = extractelement <2 x double> %x, i64 %i0
%x1 = extractelement <2 x double> %x, i64 %i1
@ -97,18 +98,18 @@ define <4 x i64> @var_shuffle_v4i64_v4i64_xxxx_i64(<4 x i64> %x, i64 %i0, i64 %i
; AVX1-NEXT: movq %rsp, %rbp
; AVX1-NEXT: andq $-32, %rsp
; AVX1-NEXT: subq $64, %rsp
; AVX1-NEXT: andl $3, %ecx
; AVX1-NEXT: andl $3, %edx
; AVX1-NEXT: andl $3, %esi
; AVX1-NEXT: andl $3, %edi
; AVX1-NEXT: andl $3, %esi
; AVX1-NEXT: andl $3, %edx
; AVX1-NEXT: andl $3, %ecx
; AVX1-NEXT: vmovaps %ymm0, (%rsp)
; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX1-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: movq %rbp, %rsp
; AVX1-NEXT: popq %rbp
; AVX1-NEXT: retq
@ -119,18 +120,18 @@ define <4 x i64> @var_shuffle_v4i64_v4i64_xxxx_i64(<4 x i64> %x, i64 %i0, i64 %i
; AVX2-NEXT: movq %rsp, %rbp
; AVX2-NEXT: andq $-32, %rsp
; AVX2-NEXT: subq $64, %rsp
; AVX2-NEXT: andl $3, %ecx
; AVX2-NEXT: andl $3, %edx
; AVX2-NEXT: andl $3, %esi
; AVX2-NEXT: andl $3, %edi
; AVX2-NEXT: andl $3, %esi
; AVX2-NEXT: andl $3, %edx
; AVX2-NEXT: andl $3, %ecx
; AVX2-NEXT: vmovaps %ymm0, (%rsp)
; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX2-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX2-NEXT: movq %rbp, %rsp
; AVX2-NEXT: popq %rbp
; AVX2-NEXT: retq
@ -152,12 +153,12 @@ define <4 x i64> @var_shuffle_v4i64_v4i64_xx00_i64(<4 x i64> %x, i64 %i0, i64 %i
; AVX1-NEXT: movq %rsp, %rbp
; AVX1-NEXT: andq $-32, %rsp
; AVX1-NEXT: subq $64, %rsp
; AVX1-NEXT: andl $3, %esi
; AVX1-NEXT: andl $3, %edi
; AVX1-NEXT: andl $3, %esi
; AVX1-NEXT: vmovaps %ymm0, (%rsp)
; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: movq %rbp, %rsp
@ -170,12 +171,12 @@ define <4 x i64> @var_shuffle_v4i64_v4i64_xx00_i64(<4 x i64> %x, i64 %i0, i64 %i
; AVX2-NEXT: movq %rsp, %rbp
; AVX2-NEXT: andq $-32, %rsp
; AVX2-NEXT: subq $64, %rsp
; AVX2-NEXT: andl $3, %esi
; AVX2-NEXT: andl $3, %edi
; AVX2-NEXT: andl $3, %esi
; AVX2-NEXT: vmovaps %ymm0, (%rsp)
; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: movq %rbp, %rsp
@ -195,34 +196,34 @@ define <4 x i64> @var_shuffle_v4i64_v4i64_xx00_i64(<4 x i64> %x, i64 %i0, i64 %i
define <4 x i64> @var_shuffle_v4i64_v2i64_xxxx_i64(<2 x i64> %x, i64 %i0, i64 %i1, i64 %i2, i64 %i3) nounwind {
; AVX1-LABEL: var_shuffle_v4i64_v2i64_xxxx_i64:
; AVX1: # BB#0:
; AVX1-NEXT: andl $1, %ecx
; AVX1-NEXT: andl $1, %edx
; AVX1-NEXT: andl $1, %esi
; AVX1-NEXT: andl $1, %edi
; AVX1-NEXT: andl $1, %esi
; AVX1-NEXT: andl $1, %edx
; AVX1-NEXT: andl $1, %ecx
; AVX1-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX1-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: var_shuffle_v4i64_v2i64_xxxx_i64:
; AVX2: # BB#0:
; AVX2-NEXT: andl $1, %ecx
; AVX2-NEXT: andl $1, %edx
; AVX2-NEXT: andl $1, %esi
; AVX2-NEXT: andl $1, %edi
; AVX2-NEXT: andl $1, %esi
; AVX2-NEXT: andl $1, %edx
; AVX2-NEXT: andl $1, %ecx
; AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX2-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX2-NEXT: retq
%x0 = extractelement <2 x i64> %x, i64 %i0
%x1 = extractelement <2 x i64> %x, i64 %i1
@ -260,16 +261,14 @@ define <8 x float> @var_shuffle_v8f32_v8f32_xxxxxxxx_i32(<8 x float> %x, i32 %i0
; ALL-NEXT: movl 24(%rbp), %eax
; ALL-NEXT: andl $7, %eax
; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
; ALL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; ALL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; ALL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3]
; ALL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3]
; ALL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],mem[0]
; ALL-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
; ALL-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0],mem[0],xmm3[2,3]
; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm3[0,1],xmm0[0],xmm3[3]
; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0]
; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; ALL-NEXT: movq %rbp, %rsp
; ALL-NEXT: popq %rbp
; ALL-NEXT: retq
@ -313,16 +312,14 @@ define <8 x float> @var_shuffle_v8f32_v4f32_xxxxxxxx_i32(<4 x float> %x, i32 %i0
; ALL-NEXT: movl {{[0-9]+}}(%rsp), %eax
; ALL-NEXT: andl $3, %eax
; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
; ALL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; ALL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; ALL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3]
; ALL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3]
; ALL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],mem[0]
; ALL-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
; ALL-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0],mem[0],xmm3[2,3]
; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm3[0,1],xmm0[0],xmm3[3]
; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0]
; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; ALL-NEXT: retq
%x0 = extractelement <4 x float> %x, i32 %i0
%x1 = extractelement <4 x float> %x, i32 %i1
@ -363,32 +360,25 @@ define <16 x i16> @var_shuffle_v16i16_v16i16_xxxxxxxxxxxxxxxx_i16(<16 x i16> %x,
; AVX1-NEXT: vmovd %eax, %xmm0
; AVX1-NEXT: movl 40(%rbp), %eax
; AVX1-NEXT: andl $15, %eax
; AVX1-NEXT: movzwl (%rsp,%rax,2), %eax
; AVX1-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
; AVX1-NEXT: vpinsrw $1, (%rsp,%rax,2), %xmm0, %xmm0
; AVX1-NEXT: movl 48(%rbp), %eax
; AVX1-NEXT: andl $15, %eax
; AVX1-NEXT: movzwl (%rsp,%rax,2), %eax
; AVX1-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
; AVX1-NEXT: vpinsrw $2, (%rsp,%rax,2), %xmm0, %xmm0
; AVX1-NEXT: movl 56(%rbp), %eax
; AVX1-NEXT: andl $15, %eax
; AVX1-NEXT: movzwl (%rsp,%rax,2), %eax
; AVX1-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0
; AVX1-NEXT: vpinsrw $3, (%rsp,%rax,2), %xmm0, %xmm0
; AVX1-NEXT: movl 64(%rbp), %eax
; AVX1-NEXT: andl $15, %eax
; AVX1-NEXT: movzwl (%rsp,%rax,2), %eax
; AVX1-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; AVX1-NEXT: vpinsrw $4, (%rsp,%rax,2), %xmm0, %xmm0
; AVX1-NEXT: movl 72(%rbp), %eax
; AVX1-NEXT: andl $15, %eax
; AVX1-NEXT: movzwl (%rsp,%rax,2), %eax
; AVX1-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
; AVX1-NEXT: vpinsrw $5, (%rsp,%rax,2), %xmm0, %xmm0
; AVX1-NEXT: movl 80(%rbp), %eax
; AVX1-NEXT: andl $15, %eax
; AVX1-NEXT: movzwl (%rsp,%rax,2), %eax
; AVX1-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
; AVX1-NEXT: vpinsrw $6, (%rsp,%rax,2), %xmm0, %xmm0
; AVX1-NEXT: movl 88(%rbp), %eax
; AVX1-NEXT: andl $15, %eax
; AVX1-NEXT: movzwl (%rsp,%rax,2), %eax
; AVX1-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; AVX1-NEXT: vpinsrw $7, (%rsp,%rax,2), %xmm0, %xmm0
; AVX1-NEXT: andl $15, %edi
; AVX1-NEXT: movzwl (%rsp,%rdi,2), %eax
; AVX1-NEXT: vmovd %eax, %xmm1
@ -404,12 +394,10 @@ define <16 x i16> @var_shuffle_v16i16_v16i16_xxxxxxxxxxxxxxxx_i16(<16 x i16> %x,
; AVX1-NEXT: vpinsrw $5, (%rsp,%r9,2), %xmm1, %xmm1
; AVX1-NEXT: movl 16(%rbp), %eax
; AVX1-NEXT: andl $15, %eax
; AVX1-NEXT: movzwl (%rsp,%rax,2), %eax
; AVX1-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
; AVX1-NEXT: vpinsrw $6, (%rsp,%rax,2), %xmm1, %xmm1
; AVX1-NEXT: movl 24(%rbp), %eax
; AVX1-NEXT: andl $15, %eax
; AVX1-NEXT: movzwl (%rsp,%rax,2), %eax
; AVX1-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; AVX1-NEXT: vpinsrw $7, (%rsp,%rax,2), %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: movq %rbp, %rsp
; AVX1-NEXT: popq %rbp
@ -434,32 +422,25 @@ define <16 x i16> @var_shuffle_v16i16_v16i16_xxxxxxxxxxxxxxxx_i16(<16 x i16> %x,
; AVX2-NEXT: vmovd %eax, %xmm0
; AVX2-NEXT: movl 40(%rbp), %eax
; AVX2-NEXT: andl $15, %eax
; AVX2-NEXT: movzwl (%rsp,%rax,2), %eax
; AVX2-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
; AVX2-NEXT: vpinsrw $1, (%rsp,%rax,2), %xmm0, %xmm0
; AVX2-NEXT: movl 48(%rbp), %eax
; AVX2-NEXT: andl $15, %eax
; AVX2-NEXT: movzwl (%rsp,%rax,2), %eax
; AVX2-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
; AVX2-NEXT: vpinsrw $2, (%rsp,%rax,2), %xmm0, %xmm0
; AVX2-NEXT: movl 56(%rbp), %eax
; AVX2-NEXT: andl $15, %eax
; AVX2-NEXT: movzwl (%rsp,%rax,2), %eax
; AVX2-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0
; AVX2-NEXT: vpinsrw $3, (%rsp,%rax,2), %xmm0, %xmm0
; AVX2-NEXT: movl 64(%rbp), %eax
; AVX2-NEXT: andl $15, %eax
; AVX2-NEXT: movzwl (%rsp,%rax,2), %eax
; AVX2-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; AVX2-NEXT: vpinsrw $4, (%rsp,%rax,2), %xmm0, %xmm0
; AVX2-NEXT: movl 72(%rbp), %eax
; AVX2-NEXT: andl $15, %eax
; AVX2-NEXT: movzwl (%rsp,%rax,2), %eax
; AVX2-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
; AVX2-NEXT: vpinsrw $5, (%rsp,%rax,2), %xmm0, %xmm0
; AVX2-NEXT: movl 80(%rbp), %eax
; AVX2-NEXT: andl $15, %eax
; AVX2-NEXT: movzwl (%rsp,%rax,2), %eax
; AVX2-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
; AVX2-NEXT: vpinsrw $6, (%rsp,%rax,2), %xmm0, %xmm0
; AVX2-NEXT: movl 88(%rbp), %eax
; AVX2-NEXT: andl $15, %eax
; AVX2-NEXT: movzwl (%rsp,%rax,2), %eax
; AVX2-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; AVX2-NEXT: vpinsrw $7, (%rsp,%rax,2), %xmm0, %xmm0
; AVX2-NEXT: andl $15, %edi
; AVX2-NEXT: movzwl (%rsp,%rdi,2), %eax
; AVX2-NEXT: vmovd %eax, %xmm1
@ -475,12 +456,10 @@ define <16 x i16> @var_shuffle_v16i16_v16i16_xxxxxxxxxxxxxxxx_i16(<16 x i16> %x,
; AVX2-NEXT: vpinsrw $5, (%rsp,%r9,2), %xmm1, %xmm1
; AVX2-NEXT: movl 16(%rbp), %eax
; AVX2-NEXT: andl $15, %eax
; AVX2-NEXT: movzwl (%rsp,%rax,2), %eax
; AVX2-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
; AVX2-NEXT: vpinsrw $6, (%rsp,%rax,2), %xmm1, %xmm1
; AVX2-NEXT: movl 24(%rbp), %eax
; AVX2-NEXT: andl $15, %eax
; AVX2-NEXT: movzwl (%rsp,%rax,2), %eax
; AVX2-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; AVX2-NEXT: vpinsrw $7, (%rsp,%rax,2), %xmm1, %xmm1
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX2-NEXT: movq %rbp, %rsp
; AVX2-NEXT: popq %rbp
@ -536,32 +515,25 @@ define <16 x i16> @var_shuffle_v16i16_v8i16_xxxxxxxxxxxxxxxx_i16(<8 x i16> %x, i
; AVX1-NEXT: vmovd %eax, %xmm0
; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax
; AVX1-NEXT: andl $7, %eax
; AVX1-NEXT: movzwl -24(%rsp,%rax,2), %eax
; AVX1-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
; AVX1-NEXT: vpinsrw $1, -24(%rsp,%rax,2), %xmm0, %xmm0
; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax
; AVX1-NEXT: andl $7, %eax
; AVX1-NEXT: movzwl -24(%rsp,%rax,2), %eax
; AVX1-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
; AVX1-NEXT: vpinsrw $2, -24(%rsp,%rax,2), %xmm0, %xmm0
; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax
; AVX1-NEXT: andl $7, %eax
; AVX1-NEXT: movzwl -24(%rsp,%rax,2), %eax
; AVX1-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0
; AVX1-NEXT: vpinsrw $3, -24(%rsp,%rax,2), %xmm0, %xmm0
; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax
; AVX1-NEXT: andl $7, %eax
; AVX1-NEXT: movzwl -24(%rsp,%rax,2), %eax
; AVX1-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; AVX1-NEXT: vpinsrw $4, -24(%rsp,%rax,2), %xmm0, %xmm0
; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax
; AVX1-NEXT: andl $7, %eax
; AVX1-NEXT: movzwl -24(%rsp,%rax,2), %eax
; AVX1-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
; AVX1-NEXT: vpinsrw $5, -24(%rsp,%rax,2), %xmm0, %xmm0
; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax
; AVX1-NEXT: andl $7, %eax
; AVX1-NEXT: movzwl -24(%rsp,%rax,2), %eax
; AVX1-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
; AVX1-NEXT: vpinsrw $6, -24(%rsp,%rax,2), %xmm0, %xmm0
; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax
; AVX1-NEXT: andl $7, %eax
; AVX1-NEXT: movzwl -24(%rsp,%rax,2), %eax
; AVX1-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; AVX1-NEXT: vpinsrw $7, -24(%rsp,%rax,2), %xmm0, %xmm0
; AVX1-NEXT: andl $7, %edi
; AVX1-NEXT: movzwl -24(%rsp,%rdi,2), %eax
; AVX1-NEXT: vmovd %eax, %xmm1
@ -577,12 +549,10 @@ define <16 x i16> @var_shuffle_v16i16_v8i16_xxxxxxxxxxxxxxxx_i16(<8 x i16> %x, i
; AVX1-NEXT: vpinsrw $5, -24(%rsp,%r9,2), %xmm1, %xmm1
; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax
; AVX1-NEXT: andl $7, %eax
; AVX1-NEXT: movzwl -24(%rsp,%rax,2), %eax
; AVX1-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
; AVX1-NEXT: vpinsrw $6, -24(%rsp,%rax,2), %xmm1, %xmm1
; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax
; AVX1-NEXT: andl $7, %eax
; AVX1-NEXT: movzwl -24(%rsp,%rax,2), %eax
; AVX1-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; AVX1-NEXT: vpinsrw $7, -24(%rsp,%rax,2), %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
@ -601,32 +571,25 @@ define <16 x i16> @var_shuffle_v16i16_v8i16_xxxxxxxxxxxxxxxx_i16(<8 x i16> %x, i
; AVX2-NEXT: vmovd %eax, %xmm0
; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
; AVX2-NEXT: andl $7, %eax
; AVX2-NEXT: movzwl -24(%rsp,%rax,2), %eax
; AVX2-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
; AVX2-NEXT: vpinsrw $1, -24(%rsp,%rax,2), %xmm0, %xmm0
; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
; AVX2-NEXT: andl $7, %eax
; AVX2-NEXT: movzwl -24(%rsp,%rax,2), %eax
; AVX2-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
; AVX2-NEXT: vpinsrw $2, -24(%rsp,%rax,2), %xmm0, %xmm0
; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
; AVX2-NEXT: andl $7, %eax
; AVX2-NEXT: movzwl -24(%rsp,%rax,2), %eax
; AVX2-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0
; AVX2-NEXT: vpinsrw $3, -24(%rsp,%rax,2), %xmm0, %xmm0
; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
; AVX2-NEXT: andl $7, %eax
; AVX2-NEXT: movzwl -24(%rsp,%rax,2), %eax
; AVX2-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; AVX2-NEXT: vpinsrw $4, -24(%rsp,%rax,2), %xmm0, %xmm0
; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
; AVX2-NEXT: andl $7, %eax
; AVX2-NEXT: movzwl -24(%rsp,%rax,2), %eax
; AVX2-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
; AVX2-NEXT: vpinsrw $5, -24(%rsp,%rax,2), %xmm0, %xmm0
; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
; AVX2-NEXT: andl $7, %eax
; AVX2-NEXT: movzwl -24(%rsp,%rax,2), %eax
; AVX2-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
; AVX2-NEXT: vpinsrw $6, -24(%rsp,%rax,2), %xmm0, %xmm0
; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
; AVX2-NEXT: andl $7, %eax
; AVX2-NEXT: movzwl -24(%rsp,%rax,2), %eax
; AVX2-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; AVX2-NEXT: vpinsrw $7, -24(%rsp,%rax,2), %xmm0, %xmm0
; AVX2-NEXT: andl $7, %edi
; AVX2-NEXT: movzwl -24(%rsp,%rdi,2), %eax
; AVX2-NEXT: vmovd %eax, %xmm1
@ -642,12 +605,10 @@ define <16 x i16> @var_shuffle_v16i16_v8i16_xxxxxxxxxxxxxxxx_i16(<8 x i16> %x, i
; AVX2-NEXT: vpinsrw $5, -24(%rsp,%r9,2), %xmm1, %xmm1
; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
; AVX2-NEXT: andl $7, %eax
; AVX2-NEXT: movzwl -24(%rsp,%rax,2), %eax
; AVX2-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
; AVX2-NEXT: vpinsrw $6, -24(%rsp,%rax,2), %xmm1, %xmm1
; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
; AVX2-NEXT: andl $7, %eax
; AVX2-NEXT: movzwl -24(%rsp,%rax,2), %eax
; AVX2-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; AVX2-NEXT: vpinsrw $7, -24(%rsp,%rax,2), %xmm1, %xmm1
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX2-NEXT: retq
%x0 = extractelement <8 x i16> %x, i32 %i0
@ -707,11 +668,11 @@ define <4 x i64> @mem_shuffle_v4i64_v4i64_xxxx_i64(<4 x i64> %x, i64* %i) nounwi
; AVX1-NEXT: vmovaps %ymm0, (%rsp)
; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX1-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; AVX1-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: movq %rbp, %rsp
; AVX1-NEXT: popq %rbp
; AVX1-NEXT: retq
@ -733,11 +694,11 @@ define <4 x i64> @mem_shuffle_v4i64_v4i64_xxxx_i64(<4 x i64> %x, i64* %i) nounwi
; AVX2-NEXT: vmovaps %ymm0, (%rsp)
; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX2-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; AVX2-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX2-NEXT: movq %rbp, %rsp
; AVX2-NEXT: popq %rbp
; AVX2-NEXT: retq
@ -774,11 +735,11 @@ define <4 x i64> @mem_shuffle_v4i64_v2i64_xxxx_i64(<2 x i64> %x, i64* %i) nounwi
; AVX1-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX1-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; AVX1-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: mem_shuffle_v4i64_v2i64_xxxx_i64:
@ -794,11 +755,11 @@ define <4 x i64> @mem_shuffle_v4i64_v2i64_xxxx_i64(<2 x i64> %x, i64* %i) nounwi
; AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX2-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; AVX2-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX2-NEXT: retq
%p0 = getelementptr inbounds i64, i64* %i, i32 0
%p1 = getelementptr inbounds i64, i64* %i, i32 1

View File

@ -103,7 +103,7 @@ entry:
}
; CHECK-LABEL: test_mixed_1
; CHECK: movaps %xmm1, 16(%{{(e|r)}}sp)
; CHECK: movaps 16(%{{(e|r)}}sp), %xmm0
; CHECK: movaps %xmm1, %xmm0
; CHECK: ret{{q|l}}
define x86_vectorcallcc <4 x float> @test_mixed_2(%struct.HVA4 inreg %a, %struct.HVA4* %b, <4 x float> %c) {
@ -149,7 +149,7 @@ entry:
}
; CHECK-LABEL: test_mixed_5
; CHECK: movaps %xmm5, 16(%{{(e|r)}}sp)
; CHECK: movaps 16(%{{(e|r)}}sp), %xmm0
; CHECK: movaps %xmm5, %xmm0
; CHECK: ret{{[ql]}}
define x86_vectorcallcc %struct.HVA4 @test_mixed_6(%struct.HVA4 inreg %a, %struct.HVA4* %b) {

View File

@ -27,23 +27,26 @@ catch:
; CHECK-LABEL: _use_except_handler3:
; CHECK: pushl %ebp
; CHECK: movl %esp, %ebp
; CHECK: pushl %ebx
; CHECK: pushl %edi
; CHECK: pushl %esi
; CHECK: subl ${{[0-9]+}}, %esp
; CHECK: movl $-1, -16(%ebp)
; CHECK: movl $L__ehtable$use_except_handler3, -20(%ebp)
; CHECK: leal -28(%ebp), %[[node:[^ ,]*]]
; CHECK: movl $__except_handler3, -24(%ebp)
; CHECK: movl %fs:0, %[[next:[^ ,]*]]
; CHECK: movl %[[next]], -28(%ebp)
; CHECK: movl %[[node]], %fs:0
; CHECK: calll _may_throw_or_crash
; CHECK-NEXT: movl %esp, %ebp
; CHECK-NEXT: pushl %ebx
; CHECK-NEXT: pushl %edi
; CHECK-NEXT: pushl %esi
; CHECK-NEXT: subl ${{[0-9]+}}, %esp
; CHECK-NEXT: movl %esp, -36(%ebp)
; CHECK-NEXT: movl $-1, -16(%ebp)
; CHECK-NEXT: movl $L__ehtable$use_except_handler3, -20(%ebp)
; CHECK-NEXT: leal -28(%ebp), %[[node:[^ ,]*]]
; CHECK-NEXT: movl $__except_handler3, -24(%ebp)
; CHECK-NEXT: movl %fs:0, %[[next:[^ ,]*]]
; CHECK-NEXT: movl %[[next]], -28(%ebp)
; CHECK-NEXT: movl %[[node]], %fs:0
; CHECK-NEXT: movl $0, -16(%ebp)
; CHECK-NEXT: calll _may_throw_or_crash
; CHECK: movl -28(%ebp), %[[next:[^ ,]*]]
; CHECK: movl %[[next]], %fs:0
; CHECK-NEXT: movl %[[next]], %fs:0
; CHECK: retl
; CHECK: LBB1_2: # %catch{{$}}
; CHECK-NEXT: LBB1_2: # %catch{{$}}
; CHECK: .section .xdata,"dr"
; CHECK-LABEL: L__ehtable$use_except_handler3:
@ -66,23 +69,37 @@ catch:
; CHECK-LABEL: _use_except_handler4:
; CHECK: pushl %ebp
; CHECK: movl %esp, %ebp
; CHECK: subl ${{[0-9]+}}, %esp
; CHECK: movl %esp, -36(%ebp)
; CHECK: movl $-2, -16(%ebp)
; CHECK: movl $L__ehtable$use_except_handler4, %[[lsda:[^ ,]*]]
; CHECK: xorl ___security_cookie, %[[lsda]]
; CHECK: movl %[[lsda]], -20(%ebp)
; CHECK: leal -28(%ebp), %[[node:[^ ,]*]]
; CHECK: movl $__except_handler4, -24(%ebp)
; CHECK: movl %fs:0, %[[next:[^ ,]*]]
; CHECK: movl %[[next]], -28(%ebp)
; CHECK: movl %[[node]], %fs:0
; CHECK: calll _may_throw_or_crash
; CHECK-NEXT: movl %esp, %ebp
; CHECK-NEXT: pushl %ebx
; CHECK-NEXT: pushl %edi
; CHECK-NEXT: pushl %esi
; CHECK-NEXT: subl ${{[0-9]+}}, %esp
; CHECK-NEXT: movl %ebp, %eax
; CHECK-NEXT: movl %esp, -36(%ebp)
; CHECK-NEXT: movl $-2, -16(%ebp)
; CHECK-NEXT: movl $L__ehtable$use_except_handler4, %[[lsda:[^ ,]*]]
; CHECK-NEXT: movl ___security_cookie, %[[seccookie:[^ ,]*]]
; CHECK-NEXT: xorl %[[seccookie]], %[[lsda]]
; CHECK-NEXT: movl %[[lsda]], -20(%ebp)
; CHECK-NEXT: xorl %[[seccookie]], %[[tmp1:[^ ,]*]]
; CHECK-NEXT: movl %[[tmp1]], -40(%ebp)
; CHECK-NEXT: leal -28(%ebp), %[[node:[^ ,]*]]
; CHECK-NEXT: movl $__except_handler4, -24(%ebp)
; CHECK-NEXT: movl %fs:0, %[[next:[^ ,]*]]
; CHECK-NEXT: movl %[[next]], -28(%ebp)
; CHECK-NEXT: movl %[[node]], %fs:0
; CHECK-NEXT: movl $0, -16(%ebp)
; CHECK-NEXT: calll _may_throw_or_crash
; CHECK: movl -28(%ebp), %[[next:[^ ,]*]]
; CHECK: movl %[[next]], %fs:0
; CHECK: retl
; CHECK: LBB2_2: # %catch{{$}}
; CHECK-NEXT: movl %[[next]], %fs:0
; CHECK-NEXT: addl $28, %esp
; CHECK-NEXT: popl %esi
; CHECK-NEXT: popl %edi
; CHECK-NEXT: popl %ebx
; CHECK-NEXT: popl %ebp
; CHECK-NEXT: retl
; CHECK-NEXT: LBB2_2: # %catch{{$}}
; CHECK: .section .xdata,"dr"
; CHECK-LABEL: L__ehtable$use_except_handler4:
@ -109,26 +126,33 @@ catch:
; CHECK-LABEL: _use_except_handler4_ssp:
; CHECK: pushl %ebp
; CHECK: movl %esp, %ebp
; CHECK: subl ${{[0-9]+}}, %esp
; CHECK: movl %ebp, %[[ehguard:[^ ,]*]]
; CHECK: movl %esp, -36(%ebp)
; CHECK: movl $-2, -16(%ebp)
; CHECK: movl $L__ehtable$use_except_handler4_ssp, %[[lsda:[^ ,]*]]
; CHECK: xorl ___security_cookie, %[[lsda]]
; CHECK: movl %[[lsda]], -20(%ebp)
; CHECK: xorl ___security_cookie, %[[ehguard]]
; CHECK: movl %[[ehguard]], -40(%ebp)
; CHECK: leal -28(%ebp), %[[node:[^ ,]*]]
; CHECK: movl $__except_handler4, -24(%ebp)
; CHECK: movl %fs:0, %[[next:[^ ,]*]]
; CHECK: movl %[[next]], -28(%ebp)
; CHECK: movl %[[node]], %fs:0
; CHECK: calll _may_throw_or_crash
; CHECK-NEXT: movl %esp, %ebp
; CHECK-NEXT: pushl %ebx
; CHECK-NEXT: pushl %edi
; CHECK-NEXT: pushl %esi
; CHECK-NEXT: subl ${{[0-9]+}}, %esp
; CHECK-NEXT: movl %ebp, %[[ehguard:[^ ,]*]]
; CHECK-NEXT: movl %esp, -36(%ebp)
; CHECK-NEXT: movl $-2, -16(%ebp)
; CHECK-NEXT: movl $L__ehtable$use_except_handler4_ssp, %[[lsda:[^ ,]*]]
; CHECK-NEXT: movl ___security_cookie, %[[seccookie:[^ ,]*]]
; CHECK-NEXT: xorl %[[seccookie]], %[[lsda]]
; CHECK-NEXT: movl %[[lsda]], -20(%ebp)
; CHECK-NEXT: xorl %[[seccookie]], %[[ehguard]]
; CHECK-NEXT: movl %[[ehguard]], -40(%ebp)
; CHECK-NEXT: leal -28(%ebp), %[[node:[^ ,]*]]
; CHECK-NEXT: movl $__except_handler4, -24(%ebp)
; CHECK-NEXT: movl %fs:0, %[[next:[^ ,]*]]
; CHECK-NEXT: movl %[[next]], -28(%ebp)
; CHECK-NEXT: movl %[[node]], %fs:0
; CHECK-NEXT: movl $0, -16(%ebp)
; CHECK-NEXT: calll _may_throw_or_crash
; CHECK: movl -28(%ebp), %[[next:[^ ,]*]]
; CHECK: movl %[[next]], %fs:0
; CHECK-NEXT: movl %[[next]], %fs:0
; CHECK: retl
; CHECK: [[catch:[^ ,]*]]: # %catch{{$}}
; CHECK-NEXT: [[catch:[^ ,]*]]: # %catch{{$}}
; CHECK: .section .xdata,"dr"
; CHECK-LABEL: L__ehtable$use_except_handler4_ssp:
@ -155,23 +179,26 @@ catch:
; CHECK-LABEL: _use_CxxFrameHandler3:
; CHECK: pushl %ebp
; CHECK: movl %esp, %ebp
; CHECK: subl ${{[0-9]+}}, %esp
; CHECK: movl %esp, -28(%ebp)
; CHECK: movl $-1, -16(%ebp)
; CHECK: leal -24(%ebp), %[[node:[^ ,]*]]
; CHECK: movl $___ehhandler$use_CxxFrameHandler3, -20(%ebp)
; CHECK: movl %fs:0, %[[next:[^ ,]*]]
; CHECK: movl %[[next]], -24(%ebp)
; CHECK: movl %[[node]], %fs:0
; CHECK: movl $0, -16(%ebp)
; CHECK: calll _may_throw_or_crash
; CHECK-NEXT: movl %esp, %ebp
; CHECK-NEXT: pushl %ebx
; CHECK-NEXT: pushl %edi
; CHECK-NEXT: pushl %esi
; CHECK-NEXT: subl ${{[0-9]+}}, %esp
; CHECK-NEXT: movl %esp, -28(%ebp)
; CHECK-NEXT: movl $-1, -16(%ebp)
; CHECK-NEXT: leal -24(%ebp), %[[node:[^ ,]*]]
; CHECK-NEXT: movl $___ehhandler$use_CxxFrameHandler3, -20(%ebp)
; CHECK-NEXT: movl %fs:0, %[[next:[^ ,]*]]
; CHECK-NEXT: movl %[[next]], -24(%ebp)
; CHECK-NEXT: movl %[[node]], %fs:0
; CHECK-NEXT: movl $0, -16(%ebp)
; CHECK-NEXT: calll _may_throw_or_crash
; CHECK: movl -24(%ebp), %[[next:[^ ,]*]]
; CHECK: movl %[[next]], %fs:0
; CHECK-NEXT: movl %[[next]], %fs:0
; CHECK: retl
; CHECK: .section .xdata,"dr"
; CHECK: .p2align 2
; CHECK-NEXT: .p2align 2
; CHECK-LABEL: L__ehtable$use_CxxFrameHandler3:
; CHECK-NEXT: .long 429065506
; CHECK-NEXT: .long 2
@ -185,8 +212,8 @@ catch:
; CHECK-LABEL: ___ehhandler$use_CxxFrameHandler3:
; CHECK: movl $L__ehtable$use_CxxFrameHandler3, %eax
; CHECK: jmp ___CxxFrameHandler3 # TAILCALL
; CHECK-NEXT: jmp ___CxxFrameHandler3 # TAILCALL
; CHECK: .safeseh __except_handler3
; CHECK: .safeseh __except_handler4
; CHECK: .safeseh ___ehhandler$use_CxxFrameHandler3
; CHECK-NEXT: .safeseh __except_handler4
; CHECK-NEXT: .safeseh ___ehhandler$use_CxxFrameHandler3

View File

@ -26,10 +26,10 @@ entry:
; CHECK-LABEL: test_vararg
; CHECK: extsp 6
; CHECK: stw lr, sp[1]
; CHECK: stw r3, sp[6]
; CHECK: stw r0, sp[3]
; CHECK: stw r1, sp[4]
; CHECK: stw r2, sp[5]
; CHECK: stw r3, sp[6]
; CHECK: ldaw r0, sp[3]
; CHECK: stw r0, sp[2]
%list = alloca i8*, align 4