R600/SI: Enable a lot of existing tests for VI (squashed commits)

This is a union of these commits: * R600/SI: Enable more tests for VI which need no changes * R600/SI: Enable V_BCNT tests for VI Differences: - v_bcnt_..._e32 -> _e64 - s_load_dword* inline offset is in bytes instead of dwords * R600/SI: Enable all tests for VI which use S_LOAD_DWORD The inline offset is changed from dwords to bytes. * R600/SI: Enable LDS tests for VI Differences: - the s_load_dword inline offset changed from dwords to bytes - the tests checked very little on CI, so they have been fixed to check all instructions that "SI" checked * R600/SI: Enable lshr tests for VI * R600/SI: Fix divrem64 tests - "v_lshl_64" was missing "b" before "64" - added VI-NOT checks * R600/SI: Enable the SI.tid test for VI * R600/SI: Enable the frem test for VI Also, the frem_f64 checking is added for CI-VI. * R600/SI: Add VI tests for rsq.clamped llvm-svn: 228830
2015-02-11 14:26:46 +00:00 · 2015-02-11 14:26:46 +00:00 · fa6607d0b6
parent 2a0e435db1
commit fa6607d0b6
38 changed files with 1165 additions and 994 deletions
--- a/llvm/test/CodeGen/R600/64bit-kernel-args.ll
+++ b/llvm/test/CodeGen/R600/64bit-kernel-args.ll
@ -1,9 +1,12 @@
-; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s --check-prefix=SI
+; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s --check-prefix=GCN --check-prefix=SI
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefix=GCN --check-prefix=VI

-; SI: {{^}}f64_kernel_arg:
+; GCN: {{^}}f64_kernel_arg:
 ; SI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x9
 ; SI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0xb
-; SI: buffer_store_dwordx2
+; VI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x24
+; VI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x2c
+; GCN: buffer_store_dwordx2
 define void @f64_kernel_arg(double addrspace(1)* %out, double  %in) {
 entry:
  store double %in, double addrspace(1)* %out
--- a/llvm/test/CodeGen/R600/atomic_cmp_swap_local.ll
+++ b/llvm/test/CodeGen/R600/atomic_cmp_swap_local.ll
@ -1,14 +1,17 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=CI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SICI -check-prefix=GCN -check-prefix=FUNC  %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=SICI -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s

 ; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i32_offset:
-; SI: v_mov_b32_e32 [[VCMP:v[0-9]+]], 7
-; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
-; SI-DAG: v_mov_b32_e32 [[VSWAP:v[0-9]+]], [[SWAP]]
-; SI: ds_cmpst_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[VCMP]], [[VSWAP]] offset:16 [M0]
-; SI: s_endpgm
+; GCN: v_mov_b32_e32 [[VCMP:v[0-9]+]], 7
+; SICI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SICI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; VI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+; VI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
+; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
+; GCN-DAG: v_mov_b32_e32 [[VSWAP:v[0-9]+]], [[SWAP]]
+; GCN: ds_cmpst_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[VCMP]], [[VSWAP]] offset:16 [M0]
+; GCN: s_endpgm
 define void @lds_atomic_cmpxchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %swap) nounwind {
  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
  %pair = cmpxchg i32 addrspace(3)* %gep, i32 7, i32 %swap seq_cst monotonic
@ -18,16 +21,18 @@ define void @lds_atomic_cmpxchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrs
 }

 ; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i64_offset:
-; SI-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], 7
-; SI-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], 0
-; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
-; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
-; SI-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
-; SI-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
-; SI: ds_cmpst_rtn_b64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32 [M0]
-; SI: buffer_store_dwordx2 [[RESULT]],
-; SI: s_endpgm
+; GCN-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], 7
+; GCN-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], 0
+; SICI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SICI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; VI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+; VI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x34
+; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
+; GCN-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
+; GCN-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
+; GCN: ds_cmpst_rtn_b64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32 [M0]
+; GCN: buffer_store_dwordx2 [[RESULT]],
+; GCN: s_endpgm
 define void @lds_atomic_cmpxchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr, i64 %swap) nounwind {
  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
  %pair = cmpxchg i64 addrspace(3)* %gep, i64 7, i64 %swap seq_cst monotonic
@ -38,8 +43,8 @@ define void @lds_atomic_cmpxchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrs

 ; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i32_bad_si_offset
 ; SI: ds_cmpst_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-; CI: ds_cmpst_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 [M0]
-; SI: s_endpgm
+; CIVI: ds_cmpst_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 [M0]
+; GCN: s_endpgm
 define void @lds_atomic_cmpxchg_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %swap, i32 %a, i32 %b) nounwind {
  %sub = sub i32 %a, %b
  %add = add i32 %sub, 4
@ -51,13 +56,15 @@ define void @lds_atomic_cmpxchg_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i3
 }

 ; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_noret_i32_offset:
-; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
-; SI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xa
-; SI-DAG: v_mov_b32_e32 [[VCMP:v[0-9]+]], 7
-; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
-; SI-DAG: v_mov_b32_e32 [[VSWAP:v[0-9]+]], [[SWAP]]
-; SI: ds_cmpst_b32 [[VPTR]], [[VCMP]], [[VSWAP]] offset:16 [M0]
-; SI: s_endpgm
+; SICI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
+; SICI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xa
+; VI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x24
+; VI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x28
+; GCN-DAG: v_mov_b32_e32 [[VCMP:v[0-9]+]], 7
+; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
+; GCN-DAG: v_mov_b32_e32 [[VSWAP:v[0-9]+]], [[SWAP]]
+; GCN: ds_cmpst_b32 [[VPTR]], [[VCMP]], [[VSWAP]] offset:16 [M0]
+; GCN: s_endpgm
 define void @lds_atomic_cmpxchg_noret_i32_offset(i32 addrspace(3)* %ptr, i32 %swap) nounwind {
  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
  %pair = cmpxchg i32 addrspace(3)* %gep, i32 7, i32 %swap seq_cst monotonic
@ -66,15 +73,17 @@ define void @lds_atomic_cmpxchg_noret_i32_offset(i32 addrspace(3)* %ptr, i32 %sw
 }

 ; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_noret_i64_offset:
-; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
-; SI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], 7
-; SI-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], 0
-; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
-; SI-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
-; SI-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
-; SI: ds_cmpst_b64 [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32 [M0]
-; SI: s_endpgm
+; SICI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
+; SICI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; VI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x24
+; VI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+; GCN-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], 7
+; GCN-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], 0
+; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
+; GCN-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
+; GCN-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
+; GCN: ds_cmpst_b64 [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32 [M0]
+; GCN: s_endpgm
 define void @lds_atomic_cmpxchg_noret_i64_offset(i64 addrspace(3)* %ptr, i64 %swap) nounwind {
  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
  %pair = cmpxchg i64 addrspace(3)* %gep, i64 7, i64 %swap seq_cst monotonic
--- a/llvm/test/CodeGen/R600/ctpop.ll
+++ b/llvm/test/CodeGen/R600/ctpop.ll
@ -1,4 +1,5 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC -check-prefix=VI %s
 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s

 declare i32 @llvm.ctpop.i32(i32) nounwind readnone
@ -8,11 +9,11 @@ declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) nounwind readnone
 declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>) nounwind readnone

 ; FUNC-LABEL: {{^}}s_ctpop_i32:
-; SI: s_load_dword [[SVAL:s[0-9]+]],
-; SI: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[SVAL]]
-; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
-; SI: buffer_store_dword [[VRESULT]],
-; SI: s_endpgm
+; GCN: s_load_dword [[SVAL:s[0-9]+]],
+; GCN: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[SVAL]]
+; GCN: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
+; GCN: buffer_store_dword [[VRESULT]],
+; GCN: s_endpgm

 ; EG: BCNT_INT
 define void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
@ -23,10 +24,10 @@ define void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {

 ; XXX - Why 0 in register?
 ; FUNC-LABEL: {{^}}v_ctpop_i32:
-; SI: buffer_load_dword [[VAL:v[0-9]+]],
-; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 0
-; SI: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
+; GCN: buffer_load_dword [[VAL:v[0-9]+]],
+; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 0
+; GCN: buffer_store_dword [[RESULT]],
+; GCN: s_endpgm

 ; EG: BCNT_INT
 define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
@ -37,12 +38,13 @@ define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noali
 }

 ; FUNC-LABEL: {{^}}v_ctpop_add_chain_i32:
-; SI: buffer_load_dword [[VAL1:v[0-9]+]],
-; SI: buffer_load_dword [[VAL0:v[0-9]+]],
-; SI: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], [[VAL1]], 0
+; GCN: buffer_load_dword [[VAL1:v[0-9]+]],
+; GCN: buffer_load_dword [[VAL0:v[0-9]+]],
+; GCN: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], [[VAL1]], 0
 ; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]]
-; SI: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
+; VI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]]
+; GCN: buffer_store_dword [[RESULT]],
+; GCN: s_endpgm

 ; EG: BCNT_INT
 ; EG: BCNT_INT
@ -57,11 +59,11 @@ define void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out, i32 addrspace
 }

 ; FUNC-LABEL: {{^}}v_ctpop_add_sgpr_i32:
-; SI: buffer_load_dword [[VAL0:v[0-9]+]],
-; SI-NEXT: s_waitcnt
-; SI-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL0]], s{{[0-9]+}}
-; SI-NEXT: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
+; GCN: buffer_load_dword [[VAL0:v[0-9]+]],
+; GCN-NEXT: s_waitcnt
+; GCN-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL0]], s{{[0-9]+}}
+; GCN-NEXT: buffer_store_dword [[RESULT]],
+; GCN: s_endpgm
 define void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1, i32 %sval) nounwind {
  %val0 = load i32 addrspace(1)* %in0, align 4
  %ctpop0 = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone
@ -71,9 +73,9 @@ define void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(
 }

 ; FUNC-LABEL: {{^}}v_ctpop_v2i32:
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: s_endpgm
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: s_endpgm

 ; EG: BCNT_INT
 ; EG: BCNT_INT
@ -85,11 +87,11 @@ define void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrs
 }

 ; FUNC-LABEL: {{^}}v_ctpop_v4i32:
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: s_endpgm
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: s_endpgm

 ; EG: BCNT_INT
 ; EG: BCNT_INT
@ -103,15 +105,15 @@ define void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrs
 }

 ; FUNC-LABEL: {{^}}v_ctpop_v8i32:
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: s_endpgm
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: s_endpgm

 ; EG: BCNT_INT
 ; EG: BCNT_INT
@ -129,23 +131,23 @@ define void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <8 x i32> addrs
 }

 ; FUNC-LABEL: {{^}}v_ctpop_v16i32:
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: s_endpgm
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: s_endpgm

 ; EG: BCNT_INT
 ; EG: BCNT_INT
@ -171,10 +173,10 @@ define void @v_ctpop_v16i32(<16 x i32> addrspace(1)* noalias %out, <16 x i32> ad
 }

 ; FUNC-LABEL: {{^}}v_ctpop_i32_add_inline_constant:
-; SI: buffer_load_dword [[VAL:v[0-9]+]],
-; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
-; SI: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
+; GCN: buffer_load_dword [[VAL:v[0-9]+]],
+; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
+; GCN: buffer_store_dword [[RESULT]],
+; GCN: s_endpgm

 ; EG: BCNT_INT
 define void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
@ -186,10 +188,10 @@ define void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32
 }

 ; FUNC-LABEL: {{^}}v_ctpop_i32_add_inline_constant_inv:
-; SI: buffer_load_dword [[VAL:v[0-9]+]],
-; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
-; SI: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
+; GCN: buffer_load_dword [[VAL:v[0-9]+]],
+; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
+; GCN: buffer_store_dword [[RESULT]],
+; GCN: s_endpgm

 ; EG: BCNT_INT
 define void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
@ -201,11 +203,12 @@ define void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out,
 }

 ; FUNC-LABEL: {{^}}v_ctpop_i32_add_literal:
-; SI: buffer_load_dword [[VAL:v[0-9]+]],
-; SI: v_mov_b32_e32 [[LIT:v[0-9]+]], 0x1869f
+; GCN: buffer_load_dword [[VAL:v[0-9]+]],
+; GCN: v_mov_b32_e32 [[LIT:v[0-9]+]], 0x1869f
 ; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]]
-; SI: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
+; VI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]]
+; GCN: buffer_store_dword [[RESULT]],
+; GCN: s_endpgm
 define void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
  %val = load i32 addrspace(1)* %in, align 4
  %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
@ -215,11 +218,11 @@ define void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspa
 }

 ; FUNC-LABEL: {{^}}v_ctpop_i32_add_var:
-; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]],
-; SI-DAG: s_load_dword [[VAR:s[0-9]+]],
-; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
-; SI: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
+; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]],
+; GCN-DAG: s_load_dword [[VAR:s[0-9]+]],
+; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
+; GCN: buffer_store_dword [[RESULT]],
+; GCN: s_endpgm

 ; EG: BCNT_INT
 define void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
@ -231,11 +234,11 @@ define void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1
 }

 ; FUNC-LABEL: {{^}}v_ctpop_i32_add_var_inv:
-; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]],
-; SI-DAG: s_load_dword [[VAR:s[0-9]+]],
-; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
-; SI: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
+; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]],
+; GCN-DAG: s_load_dword [[VAR:s[0-9]+]],
+; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
+; GCN: buffer_store_dword [[RESULT]],
+; GCN: s_endpgm

 ; EG: BCNT_INT
 define void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
@ -247,11 +250,12 @@ define void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspa
 }

 ; FUNC-LABEL: {{^}}v_ctpop_i32_add_vvar_inv:
-; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], {{0$}}
-; SI-DAG: buffer_load_dword [[VAR:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0 offset:16
+; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], {{0$}}
+; GCN-DAG: buffer_load_dword [[VAR:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0 offset:16
 ; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
-; SI: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
+; VI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
+; GCN: buffer_store_dword [[RESULT]],
+; GCN: s_endpgm

 ; EG: BCNT_INT
 define void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 addrspace(1)* noalias %constptr) nounwind {
@ -269,10 +273,11 @@ define void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrsp

 ; FUNC-LABEL: {{^}}ctpop_i32_in_br:
 ; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xd
-; SI: s_bcnt1_i32_b32  [[SRESULT:s[0-9]+]], [[VAL]]
-; SI: v_mov_b32_e32 [[RESULT]], [[SRESULT]]
-; SI: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
+; VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x34
+; GCN: s_bcnt1_i32_b32  [[SRESULT:s[0-9]+]], [[VAL]]
+; GCN: v_mov_b32_e32 [[RESULT]], [[SRESULT]]
+; GCN: buffer_store_dword [[RESULT]],
+; GCN: s_endpgm
 ; EG: BCNT_INT
 define void @ctpop_i32_in_br(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %ctpop_arg, i32 %cond) {
 entry:
--- a/llvm/test/CodeGen/R600/ctpop64.ll
+++ b/llvm/test/CodeGen/R600/ctpop64.ll
@ -1,4 +1,5 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s

 declare i64 @llvm.ctpop.i64(i64) nounwind readnone
 declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) nounwind readnone
@ -8,10 +9,11 @@ declare <16 x i64> @llvm.ctpop.v16i64(<16 x i64>) nounwind readnone

 ; FUNC-LABEL: {{^}}s_ctpop_i64:
 ; SI: s_load_dwordx2 [[SVAL:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI: s_bcnt1_i32_b64 [[SRESULT:s[0-9]+]], [[SVAL]]
-; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
-; SI: buffer_store_dword [[VRESULT]],
-; SI: s_endpgm
+; VI: s_load_dwordx2 [[SVAL:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+; GCN: s_bcnt1_i32_b64 [[SRESULT:s[0-9]+]], [[SVAL]]
+; GCN: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
+; GCN: buffer_store_dword [[VRESULT]],
+; GCN: s_endpgm
 define void @s_ctpop_i64(i32 addrspace(1)* noalias %out, i64 %val) nounwind {
  %ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone
  %truncctpop = trunc i64 %ctpop to i32
@ -20,11 +22,12 @@ define void @s_ctpop_i64(i32 addrspace(1)* noalias %out, i64 %val) nounwind {
 }

 ; FUNC-LABEL: {{^}}v_ctpop_i64:
-; SI: buffer_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}},
-; SI: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0
+; GCN: buffer_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}},
+; GCN: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0
 ; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
-; SI: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
+; VI-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
+; GCN: buffer_store_dword [[RESULT]],
+; GCN: s_endpgm
 define void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
  %val = load i64 addrspace(1)* %in, align 8
  %ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone
@ -34,9 +37,9 @@ define void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noali
 }

 ; FUNC-LABEL: {{^}}s_ctpop_v2i64:
-; SI: s_bcnt1_i32_b64
-; SI: s_bcnt1_i32_b64
-; SI: s_endpgm
+; GCN: s_bcnt1_i32_b64
+; GCN: s_bcnt1_i32_b64
+; GCN: s_endpgm
 define void @s_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> %val) nounwind {
  %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val) nounwind readnone
  %truncctpop = trunc <2 x i64> %ctpop to <2 x i32>
@ -45,11 +48,11 @@ define void @s_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> %val)
 }

 ; FUNC-LABEL: {{^}}s_ctpop_v4i64:
-; SI: s_bcnt1_i32_b64
-; SI: s_bcnt1_i32_b64
-; SI: s_bcnt1_i32_b64
-; SI: s_bcnt1_i32_b64
-; SI: s_endpgm
+; GCN: s_bcnt1_i32_b64
+; GCN: s_bcnt1_i32_b64
+; GCN: s_bcnt1_i32_b64
+; GCN: s_bcnt1_i32_b64
+; GCN: s_endpgm
 define void @s_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> %val) nounwind {
  %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %val) nounwind readnone
  %truncctpop = trunc <4 x i64> %ctpop to <4 x i32>
@ -58,11 +61,11 @@ define void @s_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> %val)
 }

 ; FUNC-LABEL: {{^}}v_ctpop_v2i64:
-; SI: v_bcnt_u32_b32
-; SI: v_bcnt_u32_b32
-; SI: v_bcnt_u32_b32
-; SI: v_bcnt_u32_b32
-; SI: s_endpgm
+; GCN: v_bcnt_u32_b32
+; GCN: v_bcnt_u32_b32
+; GCN: v_bcnt_u32_b32
+; GCN: v_bcnt_u32_b32
+; GCN: s_endpgm
 define void @v_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in) nounwind {
  %val = load <2 x i64> addrspace(1)* %in, align 16
  %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val) nounwind readnone
@ -72,15 +75,15 @@ define void @v_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> addrs
 }

 ; FUNC-LABEL: {{^}}v_ctpop_v4i64:
-; SI: v_bcnt_u32_b32
-; SI: v_bcnt_u32_b32
-; SI: v_bcnt_u32_b32
-; SI: v_bcnt_u32_b32
-; SI: v_bcnt_u32_b32
-; SI: v_bcnt_u32_b32
-; SI: v_bcnt_u32_b32
-; SI: v_bcnt_u32_b32
-; SI: s_endpgm
+; GCN: v_bcnt_u32_b32
+; GCN: v_bcnt_u32_b32
+; GCN: v_bcnt_u32_b32
+; GCN: v_bcnt_u32_b32
+; GCN: v_bcnt_u32_b32
+; GCN: v_bcnt_u32_b32
+; GCN: v_bcnt_u32_b32
+; GCN: v_bcnt_u32_b32
+; GCN: s_endpgm
 define void @v_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> addrspace(1)* noalias %in) nounwind {
  %val = load <4 x i64> addrspace(1)* %in, align 32
  %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %val) nounwind readnone
@ -94,11 +97,12 @@ define void @v_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> addrs

 ; FUNC-LABEL: {{^}}ctpop_i64_in_br:
 ; SI: s_load_dwordx2 s{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0xd
-; SI: s_bcnt1_i32_b64 [[RESULT:s[0-9]+]], {{s\[}}[[LOVAL]]:[[HIVAL]]{{\]}}
-; SI: v_mov_b32_e32 v[[VLO:[0-9]+]], [[RESULT]]
-; SI: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[HIVAL]]
-; SI: buffer_store_dwordx2 {{v\[}}[[VLO]]:[[VHI]]{{\]}}
-; SI: s_endpgm
+; VI: s_load_dwordx2 s{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0x34
+; GCN: s_bcnt1_i32_b64 [[RESULT:s[0-9]+]], {{s\[}}[[LOVAL]]:[[HIVAL]]{{\]}}
+; GCN: v_mov_b32_e32 v[[VLO:[0-9]+]], [[RESULT]]
+; GCN: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[HIVAL]]
+; GCN: buffer_store_dwordx2 {{v\[}}[[VLO]]:[[VHI]]{{\]}}
+; GCN: s_endpgm
 define void @ctpop_i64_in_br(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %ctpop_arg, i32 %cond) {
 entry:
  %tmp0 = icmp eq i32 %cond, 0
--- a/llvm/test/CodeGen/R600/fabs.ll
+++ b/llvm/test/CodeGen/R600/fabs.ll
@ -1,4 +1,5 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s


@ -10,7 +11,7 @@
 ; R600-NOT: AND
 ; R600: |PV.{{[XYZW]}}|

-; SI: v_and_b32
+; GCN: v_and_b32

 define void @fabs_fn_free(float addrspace(1)* %out, i32 %in) {
  %bc= bitcast i32 %in to float
@ -23,7 +24,7 @@ define void @fabs_fn_free(float addrspace(1)* %out, i32 %in) {
 ; R600-NOT: AND
 ; R600: |PV.{{[XYZW]}}|

-; SI: v_and_b32
+; GCN: v_and_b32

 define void @fabs_free(float addrspace(1)* %out, i32 %in) {
  %bc= bitcast i32 %in to float
@ -35,7 +36,7 @@ define void @fabs_free(float addrspace(1)* %out, i32 %in) {
 ; FUNC-LABEL: {{^}}fabs_f32:
 ; R600: |{{(PV|T[0-9])\.[XYZW]}}|

-; SI: v_and_b32
+; GCN: v_and_b32
 define void @fabs_f32(float addrspace(1)* %out, float %in) {
  %fabs = call float @llvm.fabs.f32(float %in)
  store float %fabs, float addrspace(1)* %out
@ -46,8 +47,8 @@ define void @fabs_f32(float addrspace(1)* %out, float %in) {
 ; R600: |{{(PV|T[0-9])\.[XYZW]}}|
 ; R600: |{{(PV|T[0-9])\.[XYZW]}}|

-; SI: v_and_b32
-; SI: v_and_b32
+; GCN: v_and_b32
+; GCN: v_and_b32
 define void @fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
  %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
  store <2 x float> %fabs, <2 x float> addrspace(1)* %out
@ -60,20 +61,21 @@ define void @fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
 ; R600: |{{(PV|T[0-9])\.[XYZW]}}|
 ; R600: |{{(PV|T[0-9])\.[XYZW]}}|

-; SI: v_and_b32
-; SI: v_and_b32
-; SI: v_and_b32
-; SI: v_and_b32
+; GCN: v_and_b32
+; GCN: v_and_b32
+; GCN: v_and_b32
+; GCN: v_and_b32
 define void @fabs_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
  %fabs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
  store <4 x float> %fabs, <4 x float> addrspace(1)* %out
  ret void
 }

-; SI-LABEL: {{^}}fabs_fn_fold:
+; GCN-LABEL: {{^}}fabs_fn_fold:
 ; SI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
-; SI-NOT: and
-; SI: v_mul_f32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}}
+; VI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c
+; GCN-NOT: and
+; GCN: v_mul_f32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}}
 define void @fabs_fn_fold(float addrspace(1)* %out, float %in0, float %in1) {
  %fabs = call float @fabs(float %in0)
  %fmul = fmul float %fabs, %in1
@ -81,10 +83,11 @@ define void @fabs_fn_fold(float addrspace(1)* %out, float %in0, float %in1) {
  ret void
 }

-; SI-LABEL: {{^}}fabs_fold:
+; GCN-LABEL: {{^}}fabs_fold:
 ; SI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
-; SI-NOT: and
-; SI: v_mul_f32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}}
+; VI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c
+; GCN-NOT: and
+; GCN: v_mul_f32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}}
 define void @fabs_fold(float addrspace(1)* %out, float %in0, float %in1) {
  %fabs = call float @llvm.fabs.f32(float %in0)
  %fmul = fmul float %fabs, %in1
--- a/llvm/test/CodeGen/R600/fceil64.ll
+++ b/llvm/test/CodeGen/R600/fceil64.ll
@ -1,5 +1,6 @@
-; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s

 declare double @llvm.ceil.f64(double) nounwind readnone
 declare <2 x double> @llvm.ceil.v2f64(<2 x double>) nounwind readnone
--- a/llvm/test/CodeGen/R600/fcopysign.f32.ll
+++ b/llvm/test/CodeGen/R600/fcopysign.f32.ll
@ -1,4 +1,5 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s


@ -10,12 +11,14 @@ declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>) nounwind read
 ; FUNC-LABEL: {{^}}test_copysign_f32:
 ; SI: s_load_dword [[SMAG:s[0-9]+]], {{.*}} 0xb
 ; SI: s_load_dword [[SSIGN:s[0-9]+]], {{.*}} 0xc
-; SI-DAG: v_mov_b32_e32 [[VSIGN:v[0-9]+]], [[SSIGN]]
-; SI-DAG: v_mov_b32_e32 [[VMAG:v[0-9]+]], [[SMAG]]
-; SI-DAG: s_mov_b32 [[SCONST:s[0-9]+]], 0x7fffffff
-; SI: v_bfi_b32 [[RESULT:v[0-9]+]], [[SCONST]], [[VMAG]], [[VSIGN]]
-; SI: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
+; VI: s_load_dword [[SMAG:s[0-9]+]], {{.*}} 0x2c
+; VI: s_load_dword [[SSIGN:s[0-9]+]], {{.*}} 0x30
+; GCN-DAG: v_mov_b32_e32 [[VSIGN:v[0-9]+]], [[SSIGN]]
+; GCN-DAG: v_mov_b32_e32 [[VMAG:v[0-9]+]], [[SMAG]]
+; GCN-DAG: s_mov_b32 [[SCONST:s[0-9]+]], 0x7fffffff
+; GCN: v_bfi_b32 [[RESULT:v[0-9]+]], [[SCONST]], [[VMAG]], [[VSIGN]]
+; GCN: buffer_store_dword [[RESULT]],
+; GCN: s_endpgm

 ; EG: BFI_INT
 define void @test_copysign_f32(float addrspace(1)* %out, float %mag, float %sign) nounwind {
@ -25,7 +28,7 @@ define void @test_copysign_f32(float addrspace(1)* %out, float %mag, float %sign
 }

 ; FUNC-LABEL: {{^}}test_copysign_v2f32:
-; SI: s_endpgm
+; GCN: s_endpgm

 ; EG: BFI_INT
 ; EG: BFI_INT
@ -36,7 +39,7 @@ define void @test_copysign_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %ma
 }

 ; FUNC-LABEL: {{^}}test_copysign_v4f32:
-; SI: s_endpgm
+; GCN: s_endpgm

 ; EG: BFI_INT
 ; EG: BFI_INT
--- a/llvm/test/CodeGen/R600/fcopysign.f64.ll
+++ b/llvm/test/CodeGen/R600/fcopysign.f64.ll
@ -1,4 +1,5 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s

 declare double @llvm.copysign.f64(double, double) nounwind readnone
 declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) nounwind readnone
@ -7,13 +8,15 @@ declare <4 x double> @llvm.copysign.v4f64(<4 x double>, <4 x double>) nounwind r
 ; FUNC-LABEL: {{^}}test_copysign_f64:
 ; SI-DAG: s_load_dwordx2 s{{\[}}[[SMAG_LO:[0-9]+]]:[[SMAG_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
 ; SI-DAG: s_load_dwordx2 s{{\[}}[[SSIGN_LO:[0-9]+]]:[[SSIGN_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
-; SI-DAG: v_mov_b32_e32 v[[VSIGN_HI:[0-9]+]], s[[SSIGN_HI]]
-; SI-DAG: v_mov_b32_e32 v[[VMAG_HI:[0-9]+]], s[[SMAG_HI]]
-; SI-DAG: s_mov_b32 [[SCONST:s[0-9]+]], 0x7fffffff
-; SI: v_bfi_b32 v[[VRESULT_HI:[0-9]+]], [[SCONST]], v[[VMAG_HI]], v[[VSIGN_HI]]
-; SI: v_mov_b32_e32 v[[VMAG_LO:[0-9]+]], s[[SMAG_LO]]
-; SI: buffer_store_dwordx2 v{{\[}}[[VMAG_LO]]:[[VRESULT_HI]]{{\]}}
-; SI: s_endpgm
+; VI-DAG: s_load_dwordx2 s{{\[}}[[SMAG_LO:[0-9]+]]:[[SMAG_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+; VI-DAG: s_load_dwordx2 s{{\[}}[[SSIGN_LO:[0-9]+]]:[[SSIGN_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x34
+; GCN-DAG: v_mov_b32_e32 v[[VSIGN_HI:[0-9]+]], s[[SSIGN_HI]]
+; GCN-DAG: v_mov_b32_e32 v[[VMAG_HI:[0-9]+]], s[[SMAG_HI]]
+; GCN-DAG: s_mov_b32 [[SCONST:s[0-9]+]], 0x7fffffff
+; GCN: v_bfi_b32 v[[VRESULT_HI:[0-9]+]], [[SCONST]], v[[VMAG_HI]], v[[VSIGN_HI]]
+; GCN: v_mov_b32_e32 v[[VMAG_LO:[0-9]+]], s[[SMAG_LO]]
+; GCN: buffer_store_dwordx2 v{{\[}}[[VMAG_LO]]:[[VRESULT_HI]]{{\]}}
+; GCN: s_endpgm
 define void @test_copysign_f64(double addrspace(1)* %out, double %mag, double %sign) nounwind {
  %result = call double @llvm.copysign.f64(double %mag, double %sign)
  store double %result, double addrspace(1)* %out, align 8
@ -21,7 +24,7 @@ define void @test_copysign_f64(double addrspace(1)* %out, double %mag, double %s
 }

 ; FUNC-LABEL: {{^}}test_copysign_v2f64:
-; SI: s_endpgm
+; GCN: s_endpgm
 define void @test_copysign_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %mag, <2 x double> %sign) nounwind {
  %result = call <2 x double> @llvm.copysign.v2f64(<2 x double> %mag, <2 x double> %sign)
  store <2 x double> %result, <2 x double> addrspace(1)* %out, align 8
@ -29,7 +32,7 @@ define void @test_copysign_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %
 }

 ; FUNC-LABEL: {{^}}test_copysign_v4f64:
-; SI: s_endpgm
+; GCN: s_endpgm
 define void @test_copysign_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %mag, <4 x double> %sign) nounwind {
  %result = call <4 x double> @llvm.copysign.v4f64(<4 x double> %mag, <4 x double> %sign)
  store <4 x double> %result, <4 x double> addrspace(1)* %out, align 8
--- a/llvm/test/CodeGen/R600/ffloor.f64.ll
+++ b/llvm/test/CodeGen/R600/ffloor.f64.ll
@ -1,5 +1,6 @@
-; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s

 declare double @llvm.floor.f64(double) nounwind readnone
 declare <2 x double> @llvm.floor.v2f64(<2 x double>) nounwind readnone
--- a/llvm/test/CodeGen/R600/fneg.f64.ll
+++ b/llvm/test/CodeGen/R600/fneg.f64.ll
@ -1,7 +1,8 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s

 ; FUNC-LABEL: {{^}}fneg_f64:
-; SI: v_xor_b32
+; GCN: v_xor_b32
 define void @fneg_f64(double addrspace(1)* %out, double %in) {
  %fneg = fsub double -0.000000e+00, %in
  store double %fneg, double addrspace(1)* %out
@ -9,8 +10,8 @@ define void @fneg_f64(double addrspace(1)* %out, double %in) {
 }

 ; FUNC-LABEL: {{^}}fneg_v2f64:
-; SI: v_xor_b32
-; SI: v_xor_b32
+; GCN: v_xor_b32
+; GCN: v_xor_b32
 define void @fneg_v2f64(<2 x double> addrspace(1)* nocapture %out, <2 x double> %in) {
  %fneg = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %in
  store <2 x double> %fneg, <2 x double> addrspace(1)* %out
@ -23,10 +24,10 @@ define void @fneg_v2f64(<2 x double> addrspace(1)* nocapture %out, <2 x double>
 ; R600: -PV
 ; R600: -PV

-; SI: v_xor_b32
-; SI: v_xor_b32
-; SI: v_xor_b32
-; SI: v_xor_b32
+; GCN: v_xor_b32
+; GCN: v_xor_b32
+; GCN: v_xor_b32
+; GCN: v_xor_b32
 define void @fneg_v4f64(<4 x double> addrspace(1)* nocapture %out, <4 x double> %in) {
  %fneg = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %in
  store <4 x double> %fneg, <4 x double> addrspace(1)* %out
@ -39,7 +40,7 @@ define void @fneg_v4f64(<4 x double> addrspace(1)* nocapture %out, <4 x double>

 ; FUNC-LABEL: {{^}}fneg_free_f64:
 ; FIXME: Unnecessary copy to VGPRs
-; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, -{{v\[[0-9]+:[0-9]+\]$}}
+; GCN: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, -{{v\[[0-9]+:[0-9]+\]$}}
 define void @fneg_free_f64(double addrspace(1)* %out, i64 %in) {
  %bc = bitcast i64 %in to double
  %fsub = fsub double 0.0, %bc
@ -47,10 +48,11 @@ define void @fneg_free_f64(double addrspace(1)* %out, i64 %in) {
  ret void
 }

-; SI-LABEL: {{^}}fneg_fold_f64:
+; GCN-LABEL: {{^}}fneg_fold_f64:
 ; SI: s_load_dwordx2 [[NEG_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-NOT: xor
-; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, -[[NEG_VALUE]], [[NEG_VALUE]]
+; VI: s_load_dwordx2 [[NEG_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
+; GCN-NOT: xor
+; GCN: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, -[[NEG_VALUE]], [[NEG_VALUE]]
 define void @fneg_fold_f64(double addrspace(1)* %out, double %in) {
  %fsub = fsub double -0.0, %in
  %fmul = fmul double %fsub, %in
--- a/llvm/test/CodeGen/R600/fneg.ll
+++ b/llvm/test/CodeGen/R600/fneg.ll
@ -1,10 +1,11 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s

 ; FUNC-LABEL: {{^}}fneg_f32:
 ; R600: -PV

-; SI: v_xor_b32
+; GCN: v_xor_b32
 define void @fneg_f32(float addrspace(1)* %out, float %in) {
  %fneg = fsub float -0.000000e+00, %in
  store float %fneg, float addrspace(1)* %out
@ -15,8 +16,8 @@ define void @fneg_f32(float addrspace(1)* %out, float %in) {
 ; R600: -PV
 ; R600: -PV

-; SI: v_xor_b32
-; SI: v_xor_b32
+; GCN: v_xor_b32
+; GCN: v_xor_b32
 define void @fneg_v2f32(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) {
  %fneg = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %in
  store <2 x float> %fneg, <2 x float> addrspace(1)* %out
@ -29,10 +30,10 @@ define void @fneg_v2f32(<2 x float> addrspace(1)* nocapture %out, <2 x float> %i
 ; R600: -PV
 ; R600: -PV

-; SI: v_xor_b32
-; SI: v_xor_b32
-; SI: v_xor_b32
-; SI: v_xor_b32
+; GCN: v_xor_b32
+; GCN: v_xor_b32
+; GCN: v_xor_b32
+; GCN: v_xor_b32
 define void @fneg_v4f32(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) {
  %fneg = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %in
  store <4 x float> %fneg, <4 x float> addrspace(1)* %out
@ -48,7 +49,7 @@ define void @fneg_v4f32(<4 x float> addrspace(1)* nocapture %out, <4 x float> %i
 ; R600: -KC0[2].Z

 ; XXX: We could use v_add_f32_e64 with the negate bit here instead.
-; SI: v_sub_f32_e64 v{{[0-9]}}, 0, s{{[0-9]+$}}
+; GCN: v_sub_f32_e64 v{{[0-9]}}, 0, s{{[0-9]+$}}
 define void @fneg_free_f32(float addrspace(1)* %out, i32 %in) {
  %bc = bitcast i32 %in to float
  %fsub = fsub float 0.0, %bc
@ -58,8 +59,9 @@ define void @fneg_free_f32(float addrspace(1)* %out, i32 %in) {

 ; FUNC-LABEL: {{^}}fneg_fold_f32:
 ; SI: s_load_dword [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
-; SI-NOT: xor
-; SI: v_mul_f32_e64 v{{[0-9]+}}, -[[NEG_VALUE]], [[NEG_VALUE]]
+; VI: s_load_dword [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c
+; GCN-NOT: xor
+; GCN: v_mul_f32_e64 v{{[0-9]+}}, -[[NEG_VALUE]], [[NEG_VALUE]]
 define void @fneg_fold_f32(float addrspace(1)* %out, float %in) {
  %fsub = fsub float -0.0, %in
  %fmul = fmul float %fsub, %in
--- a/llvm/test/CodeGen/R600/frem.ll
+++ b/llvm/test/CodeGen/R600/frem.ll
@ -1,16 +1,18 @@
-; RUN: llc -march=amdgcn -mcpu=SI -enable-misched < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -enable-misched < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -enable-misched < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -enable-misched < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s

 ; FUNC-LABEL: {{^}}frem_f32:
-; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{.*$}}
-; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{.*}} offset:16
-; SI-DAG: v_cmp
-; SI-DAG: v_mul_f32
-; SI: v_rcp_f32_e32
-; SI: v_mul_f32_e32
-; SI: v_mul_f32_e32
-; SI: v_trunc_f32_e32
-; SI: v_mad_f32
-; SI: s_endpgm
+; GCN-DAG: buffer_load_dword [[X:v[0-9]+]], {{.*$}}
+; GCN-DAG: buffer_load_dword [[Y:v[0-9]+]], {{.*}} offset:16
+; GCN-DAG: v_cmp
+; GCN-DAG: v_mul_f32
+; GCN: v_rcp_f32_e32
+; GCN: v_mul_f32_e32
+; GCN: v_mul_f32_e32
+; GCN: v_trunc_f32_e32
+; GCN: v_mad_f32
+; GCN: s_endpgm
 define void @frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
                      float addrspace(1)* %in2) #0 {
   %gep2 = getelementptr float addrspace(1)* %in2, i32 4
@ -22,14 +24,14 @@ define void @frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
 }

 ; FUNC-LABEL: {{^}}unsafe_frem_f32:
-; SI: buffer_load_dword [[Y:v[0-9]+]], {{.*}} offset:16
-; SI: buffer_load_dword [[X:v[0-9]+]], {{.*}}
-; SI: v_rcp_f32_e32 [[INVY:v[0-9]+]], [[Y]]
-; SI: v_mul_f32_e32 [[DIV:v[0-9]+]], [[INVY]], [[X]]
-; SI: v_trunc_f32_e32 [[TRUNC:v[0-9]+]], [[DIV]]
-; SI: v_mad_f32 [[RESULT:v[0-9]+]], -[[TRUNC]], [[Y]], [[X]]
-; SI: buffer_store_dword [[RESULT]]
-; SI: s_endpgm
+; GCN: buffer_load_dword [[Y:v[0-9]+]], {{.*}} offset:16
+; GCN: buffer_load_dword [[X:v[0-9]+]], {{.*}}
+; GCN: v_rcp_f32_e32 [[INVY:v[0-9]+]], [[Y]]
+; GCN: v_mul_f32_e32 [[DIV:v[0-9]+]], [[INVY]], [[X]]
+; GCN: v_trunc_f32_e32 [[TRUNC:v[0-9]+]], [[DIV]]
+; GCN: v_mad_f32 [[RESULT:v[0-9]+]], -[[TRUNC]], [[Y]], [[X]]
+; GCN: buffer_store_dword [[RESULT]]
+; GCN: s_endpgm
 define void @unsafe_frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
                             float addrspace(1)* %in2) #1 {
   %gep2 = getelementptr float addrspace(1)* %in2, i32 4
@ -40,11 +42,19 @@ define void @unsafe_frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
   ret void
 }

-; TODO: This should check something when f64 fdiv is implemented
-; correctly

 ; FUNC-LABEL: {{^}}frem_f64:
-; SI: s_endpgm
+; GCN: buffer_load_dwordx2 [[Y:v\[[0-9]+:[0-9]+\]]], {{.*}}, 0
+; GCN: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]], {{.*}}, 0
+; TODO: Check SI.
+; CI: v_rcp_f64_e32 [[INVY:v\[[0-9]+:[0-9]+\]]], [[Y]]
+; CI: v_mul_f64 [[DIV:v\[[0-9]+:[0-9]+\]]], [[X]], [[INVY]]
+; CI: v_trunc_f64_e32 [[TRUNC:v\[[0-9]+:[0-9]+\]]], [[DIV]]
+; CI: v_mul_f64 [[RESULTM:v\[[0-9]+:[0-9]+\]]], [[TRUNC]], [[Y]]
+; SI: v_mul_f64 [[RESULTM:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, [[Y]]
+; GCN: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[X]], -[[RESULTM]]
+; GCN: buffer_store_dwordx2 [[RESULT]], {{.*}}, 0
+; GCN: s_endpgm
 define void @frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
                      double addrspace(1)* %in2) #0 {
   %r0 = load double addrspace(1)* %in1, align 8
@ -55,11 +65,12 @@ define void @frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
 }

 ; FUNC-LABEL: {{^}}unsafe_frem_f64:
-; SI: v_rcp_f64_e32
-; SI: v_mul_f64
+; GCN: v_rcp_f64_e32
+; GCN: v_mul_f64
 ; SI: v_bfe_u32
-; SI: v_fma_f64
-; SI: s_endpgm
+; CI: v_trunc_f64_e32
+; GCN: v_fma_f64
+; GCN: s_endpgm
 define void @unsafe_frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
                             double addrspace(1)* %in2) #1 {
   %r0 = load double addrspace(1)* %in1, align 8
--- a/llvm/test/CodeGen/R600/ftrunc.f64.ll
+++ b/llvm/test/CodeGen/R600/ftrunc.f64.ll
@ -1,5 +1,6 @@
-; RUN: llc -march=amdgcn -mcpu=bonaire < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
 ; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=bonaire < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s

 declare double @llvm.trunc.f64(double) nounwind readnone
 declare <2 x double> @llvm.trunc.v2f64(<2 x double>) nounwind readnone
--- a/llvm/test/CodeGen/R600/gep-address-space.ll
+++ b/llvm/test/CodeGen/R600/gep-address-space.ll
@ -1,5 +1,6 @@
 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck --check-prefix=SI --check-prefix=CHECK %s
 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=CHECK %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=CHECK %s

 define void @use_gep_address_space([1024 x i32] addrspace(3)* %array) nounwind {
 ; CHECK-LABEL: {{^}}use_gep_address_space:
--- a/llvm/test/CodeGen/R600/gv-const-addrspace.ll
+++ b/llvm/test/CodeGen/R600/gv-const-addrspace.ll
@ -1,5 +1,6 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s


@b = internal addrspace(2) constant [1 x i16] [ i16 7 ], align 2
@ -9,6 +10,7 @@
 ; FUNC-LABEL: {{^}}float:
 ; FIXME: We should be using s_load_dword here.
 ; SI: buffer_load_dword
+; VI: s_load_dword

 ; EG-DAG: MOV {{\** *}}T2.X
 ; EG-DAG: MOV {{\** *}}T3.X
@ -31,6 +33,7 @@ entry:

 ; FIXME: We should be using s_load_dword here.
 ; SI: buffer_load_dword
+; VI: s_load_dword

 ; EG-DAG: MOV {{\** *}}T2.X
 ; EG-DAG: MOV {{\** *}}T3.X
@ -53,7 +56,7 @@ entry:
@struct_foo_gv = internal unnamed_addr addrspace(2) constant [1 x %struct.foo] [ %struct.foo { float 16.0, [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4] } ]

 ; FUNC-LABEL: {{^}}struct_foo_gv_load:
-; SI: s_load_dword
+; GCN: s_load_dword

 define void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) {
  %gep = getelementptr inbounds [1 x %struct.foo] addrspace(2)* @struct_foo_gv, i32 0, i32 0, i32 1, i32 %index
@ -70,6 +73,7 @@ define void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) {
 ; FUNC-LABEL: {{^}}array_v1_gv_load:
 ; FIXME: We should be using s_load_dword here.
 ; SI: buffer_load_dword
+; VI: s_load_dword
 define void @array_v1_gv_load(<1 x i32> addrspace(1)* %out, i32 %index) {
  %gep = getelementptr inbounds [4 x <1 x i32>] addrspace(2)* @array_v1_gv, i32 0, i32 %index
  %load = load <1 x i32> addrspace(2)* %gep, align 4
--- a/llvm/test/CodeGen/R600/imm.ll
+++ b/llvm/test/CodeGen/R600/imm.ll
@ -1,4 +1,5 @@
-; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=CHECK %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=CHECK %s

 ; Use a 64-bit value with lo bits that can be represented as an inline constant
 ; CHECK-LABEL: {{^}}i64_imm_inline_lo:
@ -303,7 +304,8 @@ define void @add_inline_imm_64_f32(float addrspace(1)* %out, float %x) {
 }

 ; CHECK-LABEL: {{^}}add_inline_imm_0.0_f64
-; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
 ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 0, [[VAL]]
 ; CHECK: buffer_store_dwordx2 [[REG]]
 define void @add_inline_imm_0.0_f64(double addrspace(1)* %out, double %x) {
@ -313,7 +315,8 @@ define void @add_inline_imm_0.0_f64(double addrspace(1)* %out, double %x) {
 }

 ; CHECK-LABEL: {{^}}add_inline_imm_0.5_f64
-; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
 ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 0.5, [[VAL]]
 ; CHECK: buffer_store_dwordx2 [[REG]]
 define void @add_inline_imm_0.5_f64(double addrspace(1)* %out, double %x) {
@ -323,7 +326,8 @@ define void @add_inline_imm_0.5_f64(double addrspace(1)* %out, double %x) {
 }

 ; CHECK-LABEL: {{^}}add_inline_imm_neg_0.5_f64
-; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
 ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -0.5, [[VAL]]
 ; CHECK: buffer_store_dwordx2 [[REG]]
 define void @add_inline_imm_neg_0.5_f64(double addrspace(1)* %out, double %x) {
@ -333,7 +337,8 @@ define void @add_inline_imm_neg_0.5_f64(double addrspace(1)* %out, double %x) {
 }

 ; CHECK-LABEL: {{^}}add_inline_imm_1.0_f64
-; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
 ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 1.0, [[VAL]]
 ; CHECK: buffer_store_dwordx2 [[REG]]
 define void @add_inline_imm_1.0_f64(double addrspace(1)* %out, double %x) {
@ -343,7 +348,8 @@ define void @add_inline_imm_1.0_f64(double addrspace(1)* %out, double %x) {
 }

 ; CHECK-LABEL: {{^}}add_inline_imm_neg_1.0_f64
-; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
 ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -1.0, [[VAL]]
 ; CHECK: buffer_store_dwordx2 [[REG]]
 define void @add_inline_imm_neg_1.0_f64(double addrspace(1)* %out, double %x) {
@ -353,7 +359,8 @@ define void @add_inline_imm_neg_1.0_f64(double addrspace(1)* %out, double %x) {
 }

 ; CHECK-LABEL: {{^}}add_inline_imm_2.0_f64
-; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
 ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 2.0, [[VAL]]
 ; CHECK: buffer_store_dwordx2 [[REG]]
 define void @add_inline_imm_2.0_f64(double addrspace(1)* %out, double %x) {
@ -363,7 +370,8 @@ define void @add_inline_imm_2.0_f64(double addrspace(1)* %out, double %x) {
 }

 ; CHECK-LABEL: {{^}}add_inline_imm_neg_2.0_f64
-; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
 ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -2.0, [[VAL]]
 ; CHECK: buffer_store_dwordx2 [[REG]]
 define void @add_inline_imm_neg_2.0_f64(double addrspace(1)* %out, double %x) {
@ -373,7 +381,8 @@ define void @add_inline_imm_neg_2.0_f64(double addrspace(1)* %out, double %x) {
 }

 ; CHECK-LABEL: {{^}}add_inline_imm_4.0_f64
-; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
 ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 4.0, [[VAL]]
 ; CHECK: buffer_store_dwordx2 [[REG]]
 define void @add_inline_imm_4.0_f64(double addrspace(1)* %out, double %x) {
@ -383,7 +392,8 @@ define void @add_inline_imm_4.0_f64(double addrspace(1)* %out, double %x) {
 }

 ; CHECK-LABEL: {{^}}add_inline_imm_neg_4.0_f64
-; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
 ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -4.0, [[VAL]]
 ; CHECK: buffer_store_dwordx2 [[REG]]
 define void @add_inline_imm_neg_4.0_f64(double addrspace(1)* %out, double %x) {
@ -394,7 +404,8 @@ define void @add_inline_imm_neg_4.0_f64(double addrspace(1)* %out, double %x) {


 ; CHECK-LABEL: {{^}}add_inline_imm_1_f64
-; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
 ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 1, [[VAL]]
 ; CHECK: buffer_store_dwordx2 [[REG]]
 define void @add_inline_imm_1_f64(double addrspace(1)* %out, double %x) {
@ -404,7 +415,8 @@ define void @add_inline_imm_1_f64(double addrspace(1)* %out, double %x) {
 }

 ; CHECK-LABEL: {{^}}add_inline_imm_2_f64
-; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
 ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 2, [[VAL]]
 ; CHECK: buffer_store_dwordx2 [[REG]]
 define void @add_inline_imm_2_f64(double addrspace(1)* %out, double %x) {
@ -414,7 +426,8 @@ define void @add_inline_imm_2_f64(double addrspace(1)* %out, double %x) {
 }

 ; CHECK-LABEL: {{^}}add_inline_imm_16_f64
-; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
 ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 16, [[VAL]]
 ; CHECK: buffer_store_dwordx2 [[REG]]
 define void @add_inline_imm_16_f64(double addrspace(1)* %out, double %x) {
@ -424,7 +437,8 @@ define void @add_inline_imm_16_f64(double addrspace(1)* %out, double %x) {
 }

 ; CHECK-LABEL: {{^}}add_inline_imm_neg_1_f64
-; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
 ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -1, [[VAL]]
 ; CHECK: buffer_store_dwordx2 [[REG]]
 define void @add_inline_imm_neg_1_f64(double addrspace(1)* %out, double %x) {
@ -434,7 +448,8 @@ define void @add_inline_imm_neg_1_f64(double addrspace(1)* %out, double %x) {
 }

 ; CHECK-LABEL: {{^}}add_inline_imm_neg_2_f64
-; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
 ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -2, [[VAL]]
 ; CHECK: buffer_store_dwordx2 [[REG]]
 define void @add_inline_imm_neg_2_f64(double addrspace(1)* %out, double %x) {
@ -444,7 +459,8 @@ define void @add_inline_imm_neg_2_f64(double addrspace(1)* %out, double %x) {
 }

 ; CHECK-LABEL: {{^}}add_inline_imm_neg_16_f64
-; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
 ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -16, [[VAL]]
 ; CHECK: buffer_store_dwordx2 [[REG]]
 define void @add_inline_imm_neg_16_f64(double addrspace(1)* %out, double %x) {
@ -454,7 +470,8 @@ define void @add_inline_imm_neg_16_f64(double addrspace(1)* %out, double %x) {
 }

 ; CHECK-LABEL: {{^}}add_inline_imm_63_f64
-; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
 ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 63, [[VAL]]
 ; CHECK: buffer_store_dwordx2 [[REG]]
 define void @add_inline_imm_63_f64(double addrspace(1)* %out, double %x) {
@ -464,7 +481,8 @@ define void @add_inline_imm_63_f64(double addrspace(1)* %out, double %x) {
 }

 ; CHECK-LABEL: {{^}}add_inline_imm_64_f64
-; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
 ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 64, [[VAL]]
 ; CHECK: buffer_store_dwordx2 [[REG]]
 define void @add_inline_imm_64_f64(double addrspace(1)* %out, double %x) {
--- a/llvm/test/CodeGen/R600/kernel-args.ll
+++ b/llvm/test/CodeGen/R600/kernel-args.ll
@ -1,11 +1,11 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG
-; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG
-; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=GCN --check-prefix=FUNC
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC
+; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC

-; EG-LABEL: {{^}}i8_arg:
+; FUNC-LABEL: {{^}}i8_arg:
 ; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; SI-LABEL: {{^}}i8_arg:
-; SI: buffer_load_ubyte
+; GCN: buffer_load_ubyte

 define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
 entry:
@ -14,10 +14,10 @@ entry:
  ret void
 }

-; EG-LABEL: {{^}}i8_zext_arg:
+; FUNC-LABEL: {{^}}i8_zext_arg:
 ; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; SI-LABEL: {{^}}i8_zext_arg:
 ; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
+; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c

 define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind {
 entry:
@ -26,10 +26,10 @@ entry:
  ret void
 }

-; EG-LABEL: {{^}}i8_sext_arg:
+; FUNC-LABEL: {{^}}i8_sext_arg:
 ; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; SI-LABEL: {{^}}i8_sext_arg:
 ; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
+; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c

 define void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind {
 entry:
@ -38,10 +38,9 @@ entry:
  ret void
 }

-; EG-LABEL: {{^}}i16_arg:
+; FUNC-LABEL: {{^}}i16_arg:
 ; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; SI-LABEL: {{^}}i16_arg:
-; SI: buffer_load_ushort
+; GCN: buffer_load_ushort

 define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind {
 entry:
@ -50,10 +49,10 @@ entry:
  ret void
 }

-; EG-LABEL: {{^}}i16_zext_arg:
+; FUNC-LABEL: {{^}}i16_zext_arg:
 ; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; SI-LABEL: {{^}}i16_zext_arg:
 ; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
+; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c

 define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind {
 entry:
@ -62,10 +61,10 @@ entry:
  ret void
 }

-; EG-LABEL: {{^}}i16_sext_arg:
+; FUNC-LABEL: {{^}}i16_sext_arg:
 ; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; SI-LABEL: {{^}}i16_sext_arg:
 ; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
+; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c

 define void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind {
 entry:
@ -74,176 +73,170 @@ entry:
  ret void
 }

-; EG-LABEL: {{^}}i32_arg:
+; FUNC-LABEL: {{^}}i32_arg:
 ; EG: T{{[0-9]\.[XYZW]}}, KC0[2].Z
-; SI-LABEL: {{^}}i32_arg:
-; s_load_dword s{{[0-9]}}, s[0:1], 0xb
+; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
+; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
 define void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) nounwind {
 entry:
  store i32 %in, i32 addrspace(1)* %out, align 4
  ret void
 }

-; EG-LABEL: {{^}}f32_arg:
+; FUNC-LABEL: {{^}}f32_arg:
 ; EG: T{{[0-9]\.[XYZW]}}, KC0[2].Z
-; SI-LABEL: {{^}}f32_arg:
-; s_load_dword s{{[0-9]}}, s[0:1], 0xb
+; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
+; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
 define void @f32_arg(float addrspace(1)* nocapture %out, float %in) nounwind {
 entry:
  store float %in, float addrspace(1)* %out, align 4
  ret void
 }

-; EG-LABEL: {{^}}v2i8_arg:
+; FUNC-LABEL: {{^}}v2i8_arg:
 ; EG: VTX_READ_8
 ; EG: VTX_READ_8
-; SI-LABEL: {{^}}v2i8_arg:
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
 define void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) {
 entry:
  store <2 x i8> %in, <2 x i8> addrspace(1)* %out
  ret void
 }

-; EG-LABEL: {{^}}v2i16_arg:
+; FUNC-LABEL: {{^}}v2i16_arg:
 ; EG: VTX_READ_16
 ; EG: VTX_READ_16
-; SI-LABEL: {{^}}v2i16_arg:
-; SI-DAG: buffer_load_ushort
-; SI-DAG: buffer_load_ushort
+; GCN-DAG: buffer_load_ushort
+; GCN-DAG: buffer_load_ushort
 define void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in) {
 entry:
  store <2 x i16> %in, <2 x i16> addrspace(1)* %out
  ret void
 }

-; EG-LABEL: {{^}}v2i32_arg:
+; FUNC-LABEL: {{^}}v2i32_arg:
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
-; SI-LABEL: {{^}}v2i32_arg:
 ; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
+; VI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x2c
 define void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 x i32> %in) nounwind {
 entry:
  store <2 x i32> %in, <2 x i32> addrspace(1)* %out, align 4
  ret void
 }

-; EG-LABEL: {{^}}v2f32_arg:
+; FUNC-LABEL: {{^}}v2f32_arg:
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
-; SI-LABEL: {{^}}v2f32_arg:
 ; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
+; VI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x2c
 define void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) nounwind {
 entry:
  store <2 x float> %in, <2 x float> addrspace(1)* %out, align 4
  ret void
 }

-; EG-LABEL: {{^}}v3i8_arg:
+; FUNC-LABEL: {{^}}v3i8_arg:
 ; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 40
 ; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 41
 ; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 42
-; SI-LABEL: {{^}}v3i8_arg:
 define void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x i8> %in) nounwind {
 entry:
  store <3 x i8> %in, <3 x i8> addrspace(1)* %out, align 4
  ret void
 }

-; EG-LABEL: {{^}}v3i16_arg:
+; FUNC-LABEL: {{^}}v3i16_arg:
 ; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 44
 ; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 46
 ; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 48
-; SI-LABEL: {{^}}v3i16_arg:
 define void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3 x i16> %in) nounwind {
 entry:
  store <3 x i16> %in, <3 x i16> addrspace(1)* %out, align 4
  ret void
 }
-; EG-LABEL: {{^}}v3i32_arg:
+; FUNC-LABEL: {{^}}v3i32_arg:
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
-; SI-LABEL: {{^}}v3i32_arg:
 ; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
+; VI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x34
 define void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 x i32> %in) nounwind {
 entry:
  store <3 x i32> %in, <3 x i32> addrspace(1)* %out, align 4
  ret void
 }

-; EG-LABEL: {{^}}v3f32_arg:
+; FUNC-LABEL: {{^}}v3f32_arg:
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
-; SI-LABEL: {{^}}v3f32_arg:
 ; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
+; VI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x34
 define void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <3 x float> %in) nounwind {
 entry:
  store <3 x float> %in, <3 x float> addrspace(1)* %out, align 4
  ret void
 }

-; EG-LABEL: {{^}}v4i8_arg:
+; FUNC-LABEL: {{^}}v4i8_arg:
 ; EG: VTX_READ_8
 ; EG: VTX_READ_8
 ; EG: VTX_READ_8
 ; EG: VTX_READ_8
-; SI-LABEL: {{^}}v4i8_arg:
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
 define void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) {
 entry:
  store <4 x i8> %in, <4 x i8> addrspace(1)* %out
  ret void
 }

-; EG-LABEL: {{^}}v4i16_arg:
+; FUNC-LABEL: {{^}}v4i16_arg:
 ; EG: VTX_READ_16
 ; EG: VTX_READ_16
 ; EG: VTX_READ_16
 ; EG: VTX_READ_16
-; SI-LABEL: {{^}}v4i16_arg:
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
 define void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in) {
 entry:
  store <4 x i16> %in, <4 x i16> addrspace(1)* %out
  ret void
 }

-; EG-LABEL: {{^}}v4i32_arg:
+; FUNC-LABEL: {{^}}v4i32_arg:
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
-; SI-LABEL: {{^}}v4i32_arg:
 ; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
+; VI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x34
 define void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> %in) nounwind {
 entry:
  store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4
  ret void
 }

-; EG-LABEL: {{^}}v4f32_arg:
+; FUNC-LABEL: {{^}}v4f32_arg:
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
-; SI-LABEL: {{^}}v4f32_arg:
 ; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
+; VI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x34
 define void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) nounwind {
 entry:
  store <4 x float> %in, <4 x float> addrspace(1)* %out, align 4
  ret void
 }

-; EG-LABEL: {{^}}v8i8_arg:
+; FUNC-LABEL: {{^}}v8i8_arg:
 ; EG: VTX_READ_8
 ; EG: VTX_READ_8
 ; EG: VTX_READ_8
@ -252,21 +245,20 @@ entry:
 ; EG: VTX_READ_8
 ; EG: VTX_READ_8
 ; EG: VTX_READ_8
-; SI-LABEL: {{^}}v8i8_arg:
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
 define void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) {
 entry:
  store <8 x i8> %in, <8 x i8> addrspace(1)* %out
  ret void
 }

-; EG-LABEL: {{^}}v8i16_arg:
+; FUNC-LABEL: {{^}}v8i16_arg:
 ; EG: VTX_READ_16
 ; EG: VTX_READ_16
 ; EG: VTX_READ_16
@ -275,22 +267,21 @@ entry:
 ; EG: VTX_READ_16
 ; EG: VTX_READ_16
 ; EG: VTX_READ_16
-; SI-LABEL: {{^}}v8i16_arg:
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
 define void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in) {
 entry:
  store <8 x i16> %in, <8 x i16> addrspace(1)* %out
  ret void
 }

-; EG-LABEL: {{^}}v8i32_arg:
+; FUNC-LABEL: {{^}}v8i32_arg:
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W
@ -299,15 +290,15 @@ entry:
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
-; SI-LABEL: {{^}}v8i32_arg:
 ; SI: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11
+; VI: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x44
 define void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 x i32> %in) nounwind {
 entry:
  store <8 x i32> %in, <8 x i32> addrspace(1)* %out, align 4
  ret void
 }

-; EG-LABEL: {{^}}v8f32_arg:
+; FUNC-LABEL: {{^}}v8f32_arg:
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W
@ -316,7 +307,6 @@ entry:
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
-; SI-LABEL: {{^}}v8f32_arg:
 ; SI: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11
 define void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind {
 entry:
@ -324,7 +314,7 @@ entry:
  ret void
 }

-; EG-LABEL: {{^}}v16i8_arg:
+; FUNC-LABEL: {{^}}v16i8_arg:
 ; EG: VTX_READ_8
 ; EG: VTX_READ_8
 ; EG: VTX_READ_8
@ -341,30 +331,29 @@ entry:
 ; EG: VTX_READ_8
 ; EG: VTX_READ_8
 ; EG: VTX_READ_8
-; SI-LABEL: {{^}}v16i8_arg:
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
 define void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in) {
 entry:
  store <16 x i8> %in, <16 x i8> addrspace(1)* %out
  ret void
 }

-; EG-LABEL: {{^}}v16i16_arg:
+; FUNC-LABEL: {{^}}v16i16_arg:
 ; EG: VTX_READ_16
 ; EG: VTX_READ_16
 ; EG: VTX_READ_16
@ -381,30 +370,29 @@ entry:
 ; EG: VTX_READ_16
 ; EG: VTX_READ_16
 ; EG: VTX_READ_16
-; SI-LABEL: {{^}}v16i16_arg:
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
 define void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16> %in) {
 entry:
  store <16 x i16> %in, <16 x i16> addrspace(1)* %out
  ret void
 }

-; EG-LABEL: {{^}}v16i32_arg:
+; FUNC-LABEL: {{^}}v16i32_arg:
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W
@ -421,15 +409,15 @@ entry:
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
-; SI-LABEL: {{^}}v16i32_arg:
 ; SI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19
+; VI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x64
 define void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <16 x i32> %in) nounwind {
 entry:
  store <16 x i32> %in, <16 x i32> addrspace(1)* %out, align 4
  ret void
 }

-; EG-LABEL: {{^}}v16f32_arg:
+; FUNC-LABEL: {{^}}v16f32_arg:
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W
@ -446,8 +434,8 @@ entry:
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
-; SI-LABEL: {{^}}v16f32_arg:
 ; SI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19
+; VI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x64
 define void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, <16 x float> %in) nounwind {
 entry:
  store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4
@ -455,18 +443,18 @@ entry:
 }

 ; FUNC-LABEL: {{^}}kernel_arg_i64:
-; SI: s_load_dwordx2
-; SI: s_load_dwordx2
-; SI: buffer_store_dwordx2
+; GCN: s_load_dwordx2
+; GCN: s_load_dwordx2
+; GCN: buffer_store_dwordx2
 define void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind {
  store i64 %a, i64 addrspace(1)* %out, align 8
  ret void
 }

 ; XFUNC-LABEL: {{^}}kernel_arg_v1i64:
-; XSI: s_load_dwordx2
-; XSI: s_load_dwordx2
-; XSI: buffer_store_dwordx2
+; XGCN: s_load_dwordx2
+; XGCN: s_load_dwordx2
+; XGCN: buffer_store_dwordx2
 ; define void @kernel_arg_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a) nounwind {
 ;   store <1 x i64> %a, <1 x i64> addrspace(1)* %out, align 8
 ;   ret void
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll
@ -1,25 +1,29 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s

 declare float @llvm.AMDGPU.div.fixup.f32(float, float, float) nounwind readnone
 declare double @llvm.AMDGPU.div.fixup.f64(double, double, double) nounwind readnone

-; SI-LABEL: {{^}}test_div_fixup_f32:
+; GCN-LABEL: {{^}}test_div_fixup_f32:
 ; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
 ; SI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
 ; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; SI-DAG: v_mov_b32_e32 [[VC:v[0-9]+]], [[SC]]
-; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
-; SI: v_div_fixup_f32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]]
-; SI: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
+; VI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+; VI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x34
+; VI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
+; GCN-DAG: v_mov_b32_e32 [[VC:v[0-9]+]], [[SC]]
+; GCN-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
+; GCN: v_div_fixup_f32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]]
+; GCN: buffer_store_dword [[RESULT]],
+; GCN: s_endpgm
 define void @test_div_fixup_f32(float addrspace(1)* %out, float %a, float %b, float %c) nounwind {
  %result = call float @llvm.AMDGPU.div.fixup.f32(float %a, float %b, float %c) nounwind readnone
  store float %result, float addrspace(1)* %out, align 4
  ret void
 }

-; SI-LABEL: {{^}}test_div_fixup_f64:
-; SI: v_div_fixup_f64
+; GCN-LABEL: {{^}}test_div_fixup_f64:
+; GCN: v_div_fixup_f64
 define void @test_div_fixup_f64(double addrspace(1)* %out, double %a, double %b, double %c) nounwind {
  %result = call double @llvm.AMDGPU.div.fixup.f64(double %a, double %b, double %c) nounwind readnone
  store double %result, double addrspace(1)* %out, align 8
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll
@ -1,25 +1,29 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s

 declare float @llvm.AMDGPU.div.fmas.f32(float, float, float, i1) nounwind readnone
 declare double @llvm.AMDGPU.div.fmas.f64(double, double, double, i1) nounwind readnone

-; SI-LABEL: {{^}}test_div_fmas_f32:
+; GCN-LABEL: {{^}}test_div_fmas_f32:
 ; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
 ; SI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
 ; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; SI-DAG: v_mov_b32_e32 [[VC:v[0-9]+]], [[SC]]
-; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
-; SI: v_div_fmas_f32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]]
-; SI: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
+; VI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+; VI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x34
+; VI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
+; GCN-DAG: v_mov_b32_e32 [[VC:v[0-9]+]], [[SC]]
+; GCN-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
+; GCN: v_div_fmas_f32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]]
+; GCN: buffer_store_dword [[RESULT]],
+; GCN: s_endpgm
 define void @test_div_fmas_f32(float addrspace(1)* %out, float %a, float %b, float %c, i1 %d) nounwind {
  %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 %d) nounwind readnone
  store float %result, float addrspace(1)* %out, align 4
  ret void
 }

-; SI-LABEL: {{^}}test_div_fmas_f64:
-; SI: v_div_fmas_f64
+; GCN-LABEL: {{^}}test_div_fmas_f64:
+; GCN: v_div_fmas_f64
 define void @test_div_fmas_f64(double addrspace(1)* %out, double %a, double %b, double %c, i1 %d) nounwind {
  %result = call double @llvm.AMDGPU.div.fmas.f64(double %a, double %b, double %c, i1 %d) nounwind readnone
  store double %result, double addrspace(1)* %out, align 8
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.f64.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.f64.ll
@ -1,9 +1,21 @@
 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s

 declare double @llvm.AMDGPU.rsq.clamped.f64(double) nounwind readnone

 ; FUNC-LABEL: {{^}}rsq_clamped_f64:
 ; SI: v_rsq_clamp_f64_e32
+
+; VI: v_rsq_f64_e32 [[RSQ:v\[[0-9]+:[0-9]+\]]], s[2:3]
+; TODO: this constant should be folded:
+; VI: s_mov_b32 s[[ALLBITS:[0-9+]]], -1
+; VI: s_mov_b32 s[[HIGH1:[0-9+]]], 0x7fefffff
+; VI: s_mov_b32 s[[LOW1:[0-9+]]], s[[ALLBITS]]
+; VI: v_min_f64 v[0:1], [[RSQ]], s{{\[}}[[LOW1]]:[[HIGH1]]]
+; VI: s_mov_b32 s[[HIGH2:[0-9+]]], 0xffefffff
+; VI: s_mov_b32 s[[LOW2:[0-9+]]], s[[ALLBITS]]
+; VI: v_max_f64 v[0:1], v[0:1], s{{\[}}[[LOW2]]:[[HIGH2]]]
+
 define void @rsq_clamped_f64(double addrspace(1)* %out, double %src) nounwind {
  %rsq_clamped = call double @llvm.AMDGPU.rsq.clamped.f64(double %src) nounwind readnone
  store double %rsq_clamped, double addrspace(1)* %out, align 8
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.ll
@ -1,4 +1,5 @@
 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s


@ -6,7 +7,15 @@ declare float @llvm.AMDGPU.rsq.clamped.f32(float) nounwind readnone

 ; FUNC-LABEL: {{^}}rsq_clamped_f32:
 ; SI: v_rsq_clamp_f32_e32
+
+; VI: v_rsq_f32_e32 [[RSQ:v[0-9]+]], {{s[0-9]+}}
+; VI: v_min_f32_e32 [[MIN:v[0-9]+]], 0x7f7fffff, [[RSQ]]
+; TODO: this constant should be folded:
+; VI: v_mov_b32_e32 [[MINFLT:v[0-9]+]], 0xff7fffff
+; VI: v_max_f32_e32 {{v[0-9]+}}, [[MIN]], [[MINFLT]]
+
 ; EG: RECIPSQRT_CLAMPED
+
 define void @rsq_clamped_f32(float addrspace(1)* %out, float %src) nounwind {
  %rsq_clamped = call float @llvm.AMDGPU.rsq.clamped.f32(float %src) nounwind readnone
  store float %rsq_clamped, float addrspace(1)* %out, align 4
--- a/llvm/test/CodeGen/R600/llvm.SI.tid.ll
+++ b/llvm/test/CodeGen/R600/llvm.SI.tid.ll
@ -1,7 +1,9 @@
-;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=GCN %s
+;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=VI --check-prefix=GCN %s

-;CHECK: v_mbcnt_lo_u32_b32_e64
-;CHECK: v_mbcnt_hi_u32_b32_e32
+;GCN: v_mbcnt_lo_u32_b32_e64
+;SI: v_mbcnt_hi_u32_b32_e32
+;VI: v_mbcnt_hi_u32_b32_e64

 define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) "ShaderType"="0" {
 main_body:
--- a/llvm/test/CodeGen/R600/llvm.rint.f64.ll
+++ b/llvm/test/CodeGen/R600/llvm.rint.f64.ll
@ -1,3 +1,4 @@
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s

--- a/llvm/test/CodeGen/R600/llvm.round.ll
+++ b/llvm/test/CodeGen/R600/llvm.round.ll
@ -1,4 +1,5 @@
 ; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s

 ; FUNC-LABEL: {{^}}round_f32:
--- a/llvm/test/CodeGen/R600/local-64.ll
+++ b/llvm/test/CodeGen/R600/local-64.ll
@ -1,5 +1,6 @@
 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck --check-prefix=SI --check-prefix=BOTH %s
 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=BOTH %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=BOTH %s

 ; BOTH-LABEL: {{^}}local_i32_load
 ; BOTH: ds_read_b32 [[REG:v[0-9]+]], v{{[0-9]+}} offset:28 [M0]
--- a/llvm/test/CodeGen/R600/local-atomics.ll
+++ b/llvm/test/CodeGen/R600/local-atomics.ll
@ -1,15 +1,16 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=CI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s

 ; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32:
 ; EG: LDS_WRXCHG_RET *
-; SI: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
-; SI: s_load_dword [[SPTR:s[0-9]+]],
-; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
-; SI: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0]
-; SI: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
+; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
+; GCN: s_load_dword [[SPTR:s[0-9]+]],
+; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
+; GCN: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0]
+; GCN: buffer_store_dword [[RESULT]],
+; GCN: s_endpgm
 define void @lds_atomic_xchg_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 seq_cst
  store i32 %result, i32 addrspace(1)* %out, align 4
@ -18,8 +19,8 @@ define void @lds_atomic_xchg_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %

 ; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32_offset:
 ; EG: LDS_WRXCHG_RET *
-; SI: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: s_endpgm
+; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
 define void @lds_atomic_xchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
  %result = atomicrmw xchg i32 addrspace(3)* %gep, i32 4 seq_cst
@ -30,12 +31,12 @@ define void @lds_atomic_xchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspac
 ; XXX - Is it really necessary to load 4 into VGPR?
 ; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32:
 ; EG: LDS_ADD_RET *
-; SI: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
-; SI: s_load_dword [[SPTR:s[0-9]+]],
-; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
-; SI: ds_add_rtn_u32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0]
-; SI: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
+; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
+; GCN: s_load_dword [[SPTR:s[0-9]+]],
+; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
+; GCN: ds_add_rtn_u32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0]
+; GCN: buffer_store_dword [[RESULT]],
+; GCN: s_endpgm
 define void @lds_atomic_add_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw add i32 addrspace(3)* %ptr, i32 4 seq_cst
  store i32 %result, i32 addrspace(1)* %out, align 4
@ -44,8 +45,8 @@ define void @lds_atomic_add_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p

 ; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32_offset:
 ; EG: LDS_ADD_RET *
-; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: s_endpgm
+; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
 define void @lds_atomic_add_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
  %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
@ -56,8 +57,8 @@ define void @lds_atomic_add_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
 ; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32_bad_si_offset:
 ; EG: LDS_ADD_RET *
 ; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} [M0]
-; CI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: s_endpgm
+; CIVI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
 define void @lds_atomic_add_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
  %sub = sub i32 %a, %b
  %add = add i32 %sub, 4
@ -69,9 +70,9 @@ define void @lds_atomic_add_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 ad

 ; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i32:
 ; EG: LDS_ADD_RET *
-; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
-; SI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] [M0]
-; SI: s_endpgm
+; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
+; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] [M0]
+; GCN: s_endpgm
 define void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst
  store i32 %result, i32 addrspace(1)* %out, align 4
@ -80,9 +81,9 @@ define void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p

 ; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i32_offset:
 ; EG: LDS_ADD_RET *
-; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
-; SI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] offset:16
-; SI: s_endpgm
+; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
+; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] offset:16
+; GCN: s_endpgm
 define void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
  %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
@ -93,8 +94,8 @@ define void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
 ; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i32_bad_si_offset:
 ; EG: LDS_ADD_RET *
 ; SI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} [M0]
-; CI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: s_endpgm
+; CIVI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
 define void @lds_atomic_inc_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
  %sub = sub i32 %a, %b
  %add = add i32 %sub, 4
@ -106,8 +107,8 @@ define void @lds_atomic_inc_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 ad

 ; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i32:
 ; EG: LDS_SUB_RET *
-; SI: ds_sub_rtn_u32
-; SI: s_endpgm
+; GCN: ds_sub_rtn_u32
+; GCN: s_endpgm
 define void @lds_atomic_sub_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 4 seq_cst
  store i32 %result, i32 addrspace(1)* %out, align 4
@ -116,8 +117,8 @@ define void @lds_atomic_sub_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p

 ; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i32_offset:
 ; EG: LDS_SUB_RET *
-; SI: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: s_endpgm
+; GCN: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
 define void @lds_atomic_sub_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
  %result = atomicrmw sub i32 addrspace(3)* %gep, i32 4 seq_cst
@ -127,9 +128,9 @@ define void @lds_atomic_sub_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace

 ; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i32:
 ; EG: LDS_SUB_RET *
-; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
-; SI: ds_dec_rtn_u32  v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] [M0]
-; SI: s_endpgm
+; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
+; GCN: ds_dec_rtn_u32  v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] [M0]
+; GCN: s_endpgm
 define void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst
  store i32 %result, i32 addrspace(1)* %out, align 4
@ -138,9 +139,9 @@ define void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p

 ; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i32_offset:
 ; EG: LDS_SUB_RET *
-; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
-; SI: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] offset:16
-; SI: s_endpgm
+; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
+; GCN: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] offset:16
+; GCN: s_endpgm
 define void @lds_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
  %result = atomicrmw sub i32 addrspace(3)* %gep, i32 1 seq_cst
@ -150,8 +151,8 @@ define void @lds_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace

 ; FUNC-LABEL: {{^}}lds_atomic_and_ret_i32:
 ; EG: LDS_AND_RET *
-; SI: ds_and_rtn_b32
-; SI: s_endpgm
+; GCN: ds_and_rtn_b32
+; GCN: s_endpgm
 define void @lds_atomic_and_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw and i32 addrspace(3)* %ptr, i32 4 seq_cst
  store i32 %result, i32 addrspace(1)* %out, align 4
@ -160,8 +161,8 @@ define void @lds_atomic_and_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p

 ; FUNC-LABEL: {{^}}lds_atomic_and_ret_i32_offset:
 ; EG: LDS_AND_RET *
-; SI: ds_and_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: s_endpgm
+; GCN: ds_and_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
 define void @lds_atomic_and_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
  %result = atomicrmw and i32 addrspace(3)* %gep, i32 4 seq_cst
@ -171,8 +172,8 @@ define void @lds_atomic_and_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace

 ; FUNC-LABEL: {{^}}lds_atomic_or_ret_i32:
 ; EG: LDS_OR_RET *
-; SI: ds_or_rtn_b32
-; SI: s_endpgm
+; GCN: ds_or_rtn_b32
+; GCN: s_endpgm
 define void @lds_atomic_or_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw or i32 addrspace(3)* %ptr, i32 4 seq_cst
  store i32 %result, i32 addrspace(1)* %out, align 4
@ -181,8 +182,8 @@ define void @lds_atomic_or_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %pt

 ; FUNC-LABEL: {{^}}lds_atomic_or_ret_i32_offset:
 ; EG: LDS_OR_RET *
-; SI: ds_or_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: s_endpgm
+; GCN: ds_or_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
 define void @lds_atomic_or_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
  %result = atomicrmw or i32 addrspace(3)* %gep, i32 4 seq_cst
@ -192,8 +193,8 @@ define void @lds_atomic_or_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(

 ; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i32:
 ; EG: LDS_XOR_RET *
-; SI: ds_xor_rtn_b32
-; SI: s_endpgm
+; GCN: ds_xor_rtn_b32
+; GCN: s_endpgm
 define void @lds_atomic_xor_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw xor i32 addrspace(3)* %ptr, i32 4 seq_cst
  store i32 %result, i32 addrspace(1)* %out, align 4
@ -202,8 +203,8 @@ define void @lds_atomic_xor_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p

 ; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i32_offset:
 ; EG: LDS_XOR_RET *
-; SI: ds_xor_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: s_endpgm
+; GCN: ds_xor_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
 define void @lds_atomic_xor_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
  %result = atomicrmw xor i32 addrspace(3)* %gep, i32 4 seq_cst
@ -221,8 +222,8 @@ define void @lds_atomic_xor_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace

 ; FUNC-LABEL: {{^}}lds_atomic_min_ret_i32:
 ; EG: LDS_MIN_INT_RET *
-; SI: ds_min_rtn_i32
-; SI: s_endpgm
+; GCN: ds_min_rtn_i32
+; GCN: s_endpgm
 define void @lds_atomic_min_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw min i32 addrspace(3)* %ptr, i32 4 seq_cst
  store i32 %result, i32 addrspace(1)* %out, align 4
@ -231,8 +232,8 @@ define void @lds_atomic_min_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p

 ; FUNC-LABEL: {{^}}lds_atomic_min_ret_i32_offset:
 ; EG: LDS_MIN_INT_RET *
-; SI: ds_min_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: s_endpgm
+; GCN: ds_min_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
 define void @lds_atomic_min_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
  %result = atomicrmw min i32 addrspace(3)* %gep, i32 4 seq_cst
@ -242,8 +243,8 @@ define void @lds_atomic_min_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace

 ; FUNC-LABEL: {{^}}lds_atomic_max_ret_i32:
 ; EG: LDS_MAX_INT_RET *
-; SI: ds_max_rtn_i32
-; SI: s_endpgm
+; GCN: ds_max_rtn_i32
+; GCN: s_endpgm
 define void @lds_atomic_max_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw max i32 addrspace(3)* %ptr, i32 4 seq_cst
  store i32 %result, i32 addrspace(1)* %out, align 4
@ -252,8 +253,8 @@ define void @lds_atomic_max_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p

 ; FUNC-LABEL: {{^}}lds_atomic_max_ret_i32_offset:
 ; EG: LDS_MAX_INT_RET *
-; SI: ds_max_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: s_endpgm
+; GCN: ds_max_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
 define void @lds_atomic_max_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
  %result = atomicrmw max i32 addrspace(3)* %gep, i32 4 seq_cst
@ -263,8 +264,8 @@ define void @lds_atomic_max_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace

 ; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i32:
 ; EG: LDS_MIN_UINT_RET *
-; SI: ds_min_rtn_u32
-; SI: s_endpgm
+; GCN: ds_min_rtn_u32
+; GCN: s_endpgm
 define void @lds_atomic_umin_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw umin i32 addrspace(3)* %ptr, i32 4 seq_cst
  store i32 %result, i32 addrspace(1)* %out, align 4
@ -273,8 +274,8 @@ define void @lds_atomic_umin_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %

 ; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i32_offset:
 ; EG: LDS_MIN_UINT_RET *
-; SI: ds_min_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: s_endpgm
+; GCN: ds_min_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
 define void @lds_atomic_umin_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
  %result = atomicrmw umin i32 addrspace(3)* %gep, i32 4 seq_cst
@ -284,8 +285,8 @@ define void @lds_atomic_umin_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspac

 ; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i32:
 ; EG: LDS_MAX_UINT_RET *
-; SI: ds_max_rtn_u32
-; SI: s_endpgm
+; GCN: ds_max_rtn_u32
+; GCN: s_endpgm
 define void @lds_atomic_umax_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw umax i32 addrspace(3)* %ptr, i32 4 seq_cst
  store i32 %result, i32 addrspace(1)* %out, align 4
@ -294,8 +295,8 @@ define void @lds_atomic_umax_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %

 ; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i32_offset:
 ; EG: LDS_MAX_UINT_RET *
-; SI: ds_max_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: s_endpgm
+; GCN: ds_max_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
 define void @lds_atomic_umax_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
  %result = atomicrmw umax i32 addrspace(3)* %gep, i32 4 seq_cst
@ -304,19 +305,19 @@ define void @lds_atomic_umax_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspac
 }

 ; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i32:
-; SI: s_load_dword [[SPTR:s[0-9]+]],
-; SI: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
-; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
-; SI: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0]
-; SI: s_endpgm
+; GCN: s_load_dword [[SPTR:s[0-9]+]],
+; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
+; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
+; GCN: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0]
+; GCN: s_endpgm
 define void @lds_atomic_xchg_noret_i32(i32 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 seq_cst
  ret void
 }

 ; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i32_offset:
-; SI: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: s_endpgm
+; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
 define void @lds_atomic_xchg_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
  %result = atomicrmw xchg i32 addrspace(3)* %gep, i32 4 seq_cst
@ -325,19 +326,19 @@ define void @lds_atomic_xchg_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {

 ; XXX - Is it really necessary to load 4 into VGPR?
 ; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32:
-; SI: s_load_dword [[SPTR:s[0-9]+]],
-; SI: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
-; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
-; SI: ds_add_u32 [[VPTR]], [[DATA]] [M0]
-; SI: s_endpgm
+; GCN: s_load_dword [[SPTR:s[0-9]+]],
+; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
+; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
+; GCN: ds_add_u32 [[VPTR]], [[DATA]] [M0]
+; GCN: s_endpgm
 define void @lds_atomic_add_noret_i32(i32 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw add i32 addrspace(3)* %ptr, i32 4 seq_cst
  ret void
 }

 ; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_offset:
-; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: s_endpgm
+; GCN: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
 define void @lds_atomic_add_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
  %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
@ -346,8 +347,8 @@ define void @lds_atomic_add_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {

 ; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_bad_si_offset
 ; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} [M0]
-; CI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 [M0]
-; SI: s_endpgm
+; CIVI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 [M0]
+; GCN: s_endpgm
 define void @lds_atomic_add_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
  %sub = sub i32 %a, %b
  %add = add i32 %sub, 4
@ -357,18 +358,18 @@ define void @lds_atomic_add_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32
 }

 ; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32:
-; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
-; SI: ds_inc_u32 v{{[0-9]+}}, [[NEGONE]] [M0]
-; SI: s_endpgm
+; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
+; GCN: ds_inc_u32 v{{[0-9]+}}, [[NEGONE]] [M0]
+; GCN: s_endpgm
 define void @lds_atomic_inc_noret_i32(i32 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst
  ret void
 }

 ; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32_offset:
-; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
-; SI: ds_inc_u32 v{{[0-9]+}}, [[NEGONE]] offset:16
-; SI: s_endpgm
+; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
+; GCN: ds_inc_u32 v{{[0-9]+}}, [[NEGONE]] offset:16
+; GCN: s_endpgm
 define void @lds_atomic_inc_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
  %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
@ -377,8 +378,8 @@ define void @lds_atomic_inc_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {

 ; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32_bad_si_offset:
 ; SI: ds_inc_u32 v{{[0-9]+}}, v{{[0-9]+}}
-; CI: ds_inc_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: s_endpgm
+; CIVI: ds_inc_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
 define void @lds_atomic_inc_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
  %sub = sub i32 %a, %b
  %add = add i32 %sub, 4
@ -388,16 +389,16 @@ define void @lds_atomic_inc_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32
 }

 ; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i32:
-; SI: ds_sub_u32
-; SI: s_endpgm
+; GCN: ds_sub_u32
+; GCN: s_endpgm
 define void @lds_atomic_sub_noret_i32(i32 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 4 seq_cst
  ret void
 }

 ; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i32_offset:
-; SI: ds_sub_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: s_endpgm
+; GCN: ds_sub_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
 define void @lds_atomic_sub_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
  %result = atomicrmw sub i32 addrspace(3)* %gep, i32 4 seq_cst
@ -405,18 +406,18 @@ define void @lds_atomic_sub_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
 }

 ; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i32:
-; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
-; SI: ds_dec_u32  v{{[0-9]+}}, [[NEGONE]]
-; SI: s_endpgm
+; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
+; GCN: ds_dec_u32  v{{[0-9]+}}, [[NEGONE]]
+; GCN: s_endpgm
 define void @lds_atomic_dec_noret_i32(i32 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst
  ret void
 }

 ; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i32_offset:
-; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
-; SI: ds_dec_u32 v{{[0-9]+}}, [[NEGONE]] offset:16
-; SI: s_endpgm
+; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
+; GCN: ds_dec_u32 v{{[0-9]+}}, [[NEGONE]] offset:16
+; GCN: s_endpgm
 define void @lds_atomic_dec_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
  %result = atomicrmw sub i32 addrspace(3)* %gep, i32 1 seq_cst
@ -424,16 +425,16 @@ define void @lds_atomic_dec_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
 }

 ; FUNC-LABEL: {{^}}lds_atomic_and_noret_i32:
-; SI: ds_and_b32
-; SI: s_endpgm
+; GCN: ds_and_b32
+; GCN: s_endpgm
 define void @lds_atomic_and_noret_i32(i32 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw and i32 addrspace(3)* %ptr, i32 4 seq_cst
  ret void
 }

 ; FUNC-LABEL: {{^}}lds_atomic_and_noret_i32_offset:
-; SI: ds_and_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: s_endpgm
+; GCN: ds_and_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
 define void @lds_atomic_and_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
  %result = atomicrmw and i32 addrspace(3)* %gep, i32 4 seq_cst
@ -441,16 +442,16 @@ define void @lds_atomic_and_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
 }

 ; FUNC-LABEL: {{^}}lds_atomic_or_noret_i32:
-; SI: ds_or_b32
-; SI: s_endpgm
+; GCN: ds_or_b32
+; GCN: s_endpgm
 define void @lds_atomic_or_noret_i32(i32 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw or i32 addrspace(3)* %ptr, i32 4 seq_cst
  ret void
 }

 ; FUNC-LABEL: {{^}}lds_atomic_or_noret_i32_offset:
-; SI: ds_or_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: s_endpgm
+; GCN: ds_or_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
 define void @lds_atomic_or_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
  %result = atomicrmw or i32 addrspace(3)* %gep, i32 4 seq_cst
@ -458,16 +459,16 @@ define void @lds_atomic_or_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
 }

 ; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i32:
-; SI: ds_xor_b32
-; SI: s_endpgm
+; GCN: ds_xor_b32
+; GCN: s_endpgm
 define void @lds_atomic_xor_noret_i32(i32 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw xor i32 addrspace(3)* %ptr, i32 4 seq_cst
  ret void
 }

 ; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i32_offset:
-; SI: ds_xor_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: s_endpgm
+; GCN: ds_xor_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
 define void @lds_atomic_xor_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
  %result = atomicrmw xor i32 addrspace(3)* %gep, i32 4 seq_cst
@ -482,16 +483,16 @@ define void @lds_atomic_xor_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
 ; }

 ; FUNC-LABEL: {{^}}lds_atomic_min_noret_i32:
-; SI: ds_min_i32
-; SI: s_endpgm
+; GCN: ds_min_i32
+; GCN: s_endpgm
 define void @lds_atomic_min_noret_i32(i32 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw min i32 addrspace(3)* %ptr, i32 4 seq_cst
  ret void
 }

 ; FUNC-LABEL: {{^}}lds_atomic_min_noret_i32_offset:
-; SI: ds_min_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: s_endpgm
+; GCN: ds_min_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
 define void @lds_atomic_min_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
  %result = atomicrmw min i32 addrspace(3)* %gep, i32 4 seq_cst
@ -499,16 +500,16 @@ define void @lds_atomic_min_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
 }

 ; FUNC-LABEL: {{^}}lds_atomic_max_noret_i32:
-; SI: ds_max_i32
-; SI: s_endpgm
+; GCN: ds_max_i32
+; GCN: s_endpgm
 define void @lds_atomic_max_noret_i32(i32 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw max i32 addrspace(3)* %ptr, i32 4 seq_cst
  ret void
 }

 ; FUNC-LABEL: {{^}}lds_atomic_max_noret_i32_offset:
-; SI: ds_max_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: s_endpgm
+; GCN: ds_max_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
 define void @lds_atomic_max_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
  %result = atomicrmw max i32 addrspace(3)* %gep, i32 4 seq_cst
@ -516,16 +517,16 @@ define void @lds_atomic_max_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
 }

 ; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i32:
-; SI: ds_min_u32
-; SI: s_endpgm
+; GCN: ds_min_u32
+; GCN: s_endpgm
 define void @lds_atomic_umin_noret_i32(i32 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw umin i32 addrspace(3)* %ptr, i32 4 seq_cst
  ret void
 }

 ; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i32_offset:
-; SI: ds_min_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: s_endpgm
+; GCN: ds_min_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
 define void @lds_atomic_umin_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
  %result = atomicrmw umin i32 addrspace(3)* %gep, i32 4 seq_cst
@ -533,16 +534,16 @@ define void @lds_atomic_umin_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
 }

 ; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i32:
-; SI: ds_max_u32
-; SI: s_endpgm
+; GCN: ds_max_u32
+; GCN: s_endpgm
 define void @lds_atomic_umax_noret_i32(i32 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw umax i32 addrspace(3)* %ptr, i32 4 seq_cst
  ret void
 }

 ; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i32_offset:
-; SI: ds_max_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: s_endpgm
+; GCN: ds_max_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
 define void @lds_atomic_umax_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
  %result = atomicrmw umax i32 addrspace(3)* %gep, i32 4 seq_cst
--- a/llvm/test/CodeGen/R600/local-atomics64.ll
+++ b/llvm/test/CodeGen/R600/local-atomics64.ll
@ -1,8 +1,9 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=VI -check-prefix=GCN %s

 ; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i64:
-; SI: ds_wrxchg_rtn_b64
-; SI: s_endpgm
+; GCN: ds_wrxchg_rtn_b64
+; GCN: s_endpgm
 define void @lds_atomic_xchg_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw xchg i64 addrspace(3)* %ptr, i64 4 seq_cst
  store i64 %result, i64 addrspace(1)* %out, align 8
@ -10,8 +11,8 @@ define void @lds_atomic_xchg_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %
 }

 ; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i64_offset:
-; SI: ds_wrxchg_rtn_b64 {{.*}} offset:32
-; SI: s_endpgm
+; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32
+; GCN: s_endpgm
 define void @lds_atomic_xchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
  %result = atomicrmw xchg i64 addrspace(3)* %gep, i64 4 seq_cst
@ -20,8 +21,8 @@ define void @lds_atomic_xchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspac
 }

 ; FUNC-LABEL: {{^}}lds_atomic_add_ret_i64:
-; SI: ds_add_rtn_u64
-; SI: s_endpgm
+; GCN: ds_add_rtn_u64
+; GCN: s_endpgm
 define void @lds_atomic_add_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw add i64 addrspace(3)* %ptr, i64 4 seq_cst
  store i64 %result, i64 addrspace(1)* %out, align 8
@ -29,13 +30,14 @@ define void @lds_atomic_add_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
 }

 ; FUNC-LABEL: {{^}}lds_atomic_add_ret_i64_offset:
-; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9
-; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0
+; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9
+; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0
 ; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
-; SI: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 [M0]
-; SI: buffer_store_dwordx2 [[RESULT]],
-; SI: s_endpgm
+; VI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
+; GCN: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 [M0]
+; GCN: buffer_store_dwordx2 [[RESULT]],
+; GCN: s_endpgm
 define void @lds_atomic_add_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i64 addrspace(3)* %ptr, i64 4
  %result = atomicrmw add i64 addrspace(3)* %gep, i64 9 seq_cst
@ -44,11 +46,11 @@ define void @lds_atomic_add_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace
 }

 ; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i64:
-; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
-; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
-; SI: ds_inc_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
-; SI: buffer_store_dwordx2 [[RESULT]],
-; SI: s_endpgm
+; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
+; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
+; GCN: ds_inc_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
+; GCN: buffer_store_dwordx2 [[RESULT]],
+; GCN: s_endpgm
 define void @lds_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw add i64 addrspace(3)* %ptr, i64 1 seq_cst
  store i64 %result, i64 addrspace(1)* %out, align 8
@ -56,8 +58,8 @@ define void @lds_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
 }

 ; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i64_offset:
-; SI: ds_inc_rtn_u64 {{.*}} offset:32
-; SI: s_endpgm
+; GCN: ds_inc_rtn_u64 {{.*}} offset:32
+; GCN: s_endpgm
 define void @lds_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
  %result = atomicrmw add i64 addrspace(3)* %gep, i64 1 seq_cst
@ -66,8 +68,8 @@ define void @lds_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace
 }

 ; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i64:
-; SI: ds_sub_rtn_u64
-; SI: s_endpgm
+; GCN: ds_sub_rtn_u64
+; GCN: s_endpgm
 define void @lds_atomic_sub_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 4 seq_cst
  store i64 %result, i64 addrspace(1)* %out, align 8
@ -75,8 +77,8 @@ define void @lds_atomic_sub_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
 }

 ; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i64_offset:
-; SI: ds_sub_rtn_u64 {{.*}} offset:32
-; SI: s_endpgm
+; GCN: ds_sub_rtn_u64 {{.*}} offset:32
+; GCN: s_endpgm
 define void @lds_atomic_sub_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
  %result = atomicrmw sub i64 addrspace(3)* %gep, i64 4 seq_cst
@ -85,11 +87,11 @@ define void @lds_atomic_sub_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace
 }

 ; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i64:
-; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
-; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
-; SI: ds_dec_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
-; SI: buffer_store_dwordx2 [[RESULT]],
-; SI: s_endpgm
+; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
+; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
+; GCN: ds_dec_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
+; GCN: buffer_store_dwordx2 [[RESULT]],
+; GCN: s_endpgm
 define void @lds_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 1 seq_cst
  store i64 %result, i64 addrspace(1)* %out, align 8
@ -97,8 +99,8 @@ define void @lds_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
 }

 ; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i64_offset:
-; SI: ds_dec_rtn_u64 {{.*}} offset:32
-; SI: s_endpgm
+; GCN: ds_dec_rtn_u64 {{.*}} offset:32
+; GCN: s_endpgm
 define void @lds_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
  %result = atomicrmw sub i64 addrspace(3)* %gep, i64 1 seq_cst
@ -107,8 +109,8 @@ define void @lds_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace
 }

 ; FUNC-LABEL: {{^}}lds_atomic_and_ret_i64:
-; SI: ds_and_rtn_b64
-; SI: s_endpgm
+; GCN: ds_and_rtn_b64
+; GCN: s_endpgm
 define void @lds_atomic_and_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw and i64 addrspace(3)* %ptr, i64 4 seq_cst
  store i64 %result, i64 addrspace(1)* %out, align 8
@ -116,8 +118,8 @@ define void @lds_atomic_and_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
 }

 ; FUNC-LABEL: {{^}}lds_atomic_and_ret_i64_offset:
-; SI: ds_and_rtn_b64 {{.*}} offset:32
-; SI: s_endpgm
+; GCN: ds_and_rtn_b64 {{.*}} offset:32
+; GCN: s_endpgm
 define void @lds_atomic_and_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
  %result = atomicrmw and i64 addrspace(3)* %gep, i64 4 seq_cst
@ -126,8 +128,8 @@ define void @lds_atomic_and_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace
 }

 ; FUNC-LABEL: {{^}}lds_atomic_or_ret_i64:
-; SI: ds_or_rtn_b64
-; SI: s_endpgm
+; GCN: ds_or_rtn_b64
+; GCN: s_endpgm
 define void @lds_atomic_or_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw or i64 addrspace(3)* %ptr, i64 4 seq_cst
  store i64 %result, i64 addrspace(1)* %out, align 8
@ -135,8 +137,8 @@ define void @lds_atomic_or_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %pt
 }

 ; FUNC-LABEL: {{^}}lds_atomic_or_ret_i64_offset:
-; SI: ds_or_rtn_b64 {{.*}} offset:32
-; SI: s_endpgm
+; GCN: ds_or_rtn_b64 {{.*}} offset:32
+; GCN: s_endpgm
 define void @lds_atomic_or_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
  %result = atomicrmw or i64 addrspace(3)* %gep, i64 4 seq_cst
@ -145,8 +147,8 @@ define void @lds_atomic_or_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(
 }

 ; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i64:
-; SI: ds_xor_rtn_b64
-; SI: s_endpgm
+; GCN: ds_xor_rtn_b64
+; GCN: s_endpgm
 define void @lds_atomic_xor_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw xor i64 addrspace(3)* %ptr, i64 4 seq_cst
  store i64 %result, i64 addrspace(1)* %out, align 8
@ -154,8 +156,8 @@ define void @lds_atomic_xor_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
 }

 ; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i64_offset:
-; SI: ds_xor_rtn_b64 {{.*}} offset:32
-; SI: s_endpgm
+; GCN: ds_xor_rtn_b64 {{.*}} offset:32
+; GCN: s_endpgm
 define void @lds_atomic_xor_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
  %result = atomicrmw xor i64 addrspace(3)* %gep, i64 4 seq_cst
@ -172,8 +174,8 @@ define void @lds_atomic_xor_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace
 ; }

 ; FUNC-LABEL: {{^}}lds_atomic_min_ret_i64:
-; SI: ds_min_rtn_i64
-; SI: s_endpgm
+; GCN: ds_min_rtn_i64
+; GCN: s_endpgm
 define void @lds_atomic_min_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw min i64 addrspace(3)* %ptr, i64 4 seq_cst
  store i64 %result, i64 addrspace(1)* %out, align 8
@ -181,8 +183,8 @@ define void @lds_atomic_min_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
 }

 ; FUNC-LABEL: {{^}}lds_atomic_min_ret_i64_offset:
-; SI: ds_min_rtn_i64 {{.*}} offset:32
-; SI: s_endpgm
+; GCN: ds_min_rtn_i64 {{.*}} offset:32
+; GCN: s_endpgm
 define void @lds_atomic_min_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
  %result = atomicrmw min i64 addrspace(3)* %gep, i64 4 seq_cst
@ -191,8 +193,8 @@ define void @lds_atomic_min_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace
 }

 ; FUNC-LABEL: {{^}}lds_atomic_max_ret_i64:
-; SI: ds_max_rtn_i64
-; SI: s_endpgm
+; GCN: ds_max_rtn_i64
+; GCN: s_endpgm
 define void @lds_atomic_max_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw max i64 addrspace(3)* %ptr, i64 4 seq_cst
  store i64 %result, i64 addrspace(1)* %out, align 8
@ -200,8 +202,8 @@ define void @lds_atomic_max_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
 }

 ; FUNC-LABEL: {{^}}lds_atomic_max_ret_i64_offset:
-; SI: ds_max_rtn_i64 {{.*}} offset:32
-; SI: s_endpgm
+; GCN: ds_max_rtn_i64 {{.*}} offset:32
+; GCN: s_endpgm
 define void @lds_atomic_max_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
  %result = atomicrmw max i64 addrspace(3)* %gep, i64 4 seq_cst
@ -210,8 +212,8 @@ define void @lds_atomic_max_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace
 }

 ; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i64:
-; SI: ds_min_rtn_u64
-; SI: s_endpgm
+; GCN: ds_min_rtn_u64
+; GCN: s_endpgm
 define void @lds_atomic_umin_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw umin i64 addrspace(3)* %ptr, i64 4 seq_cst
  store i64 %result, i64 addrspace(1)* %out, align 8
@ -219,8 +221,8 @@ define void @lds_atomic_umin_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %
 }

 ; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i64_offset:
-; SI: ds_min_rtn_u64 {{.*}} offset:32
-; SI: s_endpgm
+; GCN: ds_min_rtn_u64 {{.*}} offset:32
+; GCN: s_endpgm
 define void @lds_atomic_umin_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
  %result = atomicrmw umin i64 addrspace(3)* %gep, i64 4 seq_cst
@ -229,8 +231,8 @@ define void @lds_atomic_umin_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspac
 }

 ; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i64:
-; SI: ds_max_rtn_u64
-; SI: s_endpgm
+; GCN: ds_max_rtn_u64
+; GCN: s_endpgm
 define void @lds_atomic_umax_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw umax i64 addrspace(3)* %ptr, i64 4 seq_cst
  store i64 %result, i64 addrspace(1)* %out, align 8
@ -238,8 +240,8 @@ define void @lds_atomic_umax_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %
 }

 ; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i64_offset:
-; SI: ds_max_rtn_u64 {{.*}} offset:32
-; SI: s_endpgm
+; GCN: ds_max_rtn_u64 {{.*}} offset:32
+; GCN: s_endpgm
 define void @lds_atomic_umax_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
  %result = atomicrmw umax i64 addrspace(3)* %gep, i64 4 seq_cst
@ -248,16 +250,16 @@ define void @lds_atomic_umax_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspac
 }

 ; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i64:
-; SI: ds_wrxchg_rtn_b64
-; SI: s_endpgm
+; GCN: ds_wrxchg_rtn_b64
+; GCN: s_endpgm
 define void @lds_atomic_xchg_noret_i64(i64 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw xchg i64 addrspace(3)* %ptr, i64 4 seq_cst
  ret void
 }

 ; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i64_offset:
-; SI: ds_wrxchg_rtn_b64 {{.*}} offset:32
-; SI: s_endpgm
+; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32
+; GCN: s_endpgm
 define void @lds_atomic_xchg_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
  %result = atomicrmw xchg i64 addrspace(3)* %gep, i64 4 seq_cst
@ -265,8 +267,8 @@ define void @lds_atomic_xchg_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
 }

 ; FUNC-LABEL: {{^}}lds_atomic_add_noret_i64:
-; SI: ds_add_u64
-; SI: s_endpgm
+; GCN: ds_add_u64
+; GCN: s_endpgm
 define void @lds_atomic_add_noret_i64(i64 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw add i64 addrspace(3)* %ptr, i64 4 seq_cst
  ret void
@ -274,11 +276,12 @@ define void @lds_atomic_add_noret_i64(i64 addrspace(3)* %ptr) nounwind {

 ; FUNC-LABEL: {{^}}lds_atomic_add_noret_i64_offset:
 ; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
-; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9
-; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0
-; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
-; SI: ds_add_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 [M0]
-; SI: s_endpgm
+; VI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x24
+; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9
+; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0
+; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
+; GCN: ds_add_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 [M0]
+; GCN: s_endpgm
 define void @lds_atomic_add_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i64 addrspace(3)* %ptr, i64 4
  %result = atomicrmw add i64 addrspace(3)* %gep, i64 9 seq_cst
@ -286,18 +289,18 @@ define void @lds_atomic_add_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
 }

 ; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i64:
-; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
-; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
-; SI: ds_inc_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
-; SI: s_endpgm
+; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
+; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
+; GCN: ds_inc_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
+; GCN: s_endpgm
 define void @lds_atomic_inc_noret_i64(i64 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw add i64 addrspace(3)* %ptr, i64 1 seq_cst
  ret void
 }

 ; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i64_offset:
-; SI: ds_inc_u64 {{.*}} offset:32
-; SI: s_endpgm
+; GCN: ds_inc_u64 {{.*}} offset:32
+; GCN: s_endpgm
 define void @lds_atomic_inc_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
  %result = atomicrmw add i64 addrspace(3)* %gep, i64 1 seq_cst
@ -305,16 +308,16 @@ define void @lds_atomic_inc_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
 }

 ; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i64:
-; SI: ds_sub_u64
-; SI: s_endpgm
+; GCN: ds_sub_u64
+; GCN: s_endpgm
 define void @lds_atomic_sub_noret_i64(i64 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 4 seq_cst
  ret void
 }

 ; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i64_offset:
-; SI: ds_sub_u64 {{.*}} offset:32
-; SI: s_endpgm
+; GCN: ds_sub_u64 {{.*}} offset:32
+; GCN: s_endpgm
 define void @lds_atomic_sub_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
  %result = atomicrmw sub i64 addrspace(3)* %gep, i64 4 seq_cst
@ -322,18 +325,18 @@ define void @lds_atomic_sub_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
 }

 ; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i64:
-; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
-; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
-; SI: ds_dec_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
-; SI: s_endpgm
+; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
+; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
+; GCN: ds_dec_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
+; GCN: s_endpgm
 define void @lds_atomic_dec_noret_i64(i64 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 1 seq_cst
  ret void
 }

 ; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i64_offset:
-; SI: ds_dec_u64 {{.*}} offset:32
-; SI: s_endpgm
+; GCN: ds_dec_u64 {{.*}} offset:32
+; GCN: s_endpgm
 define void @lds_atomic_dec_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
  %result = atomicrmw sub i64 addrspace(3)* %gep, i64 1 seq_cst
@ -341,16 +344,16 @@ define void @lds_atomic_dec_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
 }

 ; FUNC-LABEL: {{^}}lds_atomic_and_noret_i64:
-; SI: ds_and_b64
-; SI: s_endpgm
+; GCN: ds_and_b64
+; GCN: s_endpgm
 define void @lds_atomic_and_noret_i64(i64 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw and i64 addrspace(3)* %ptr, i64 4 seq_cst
  ret void
 }

 ; FUNC-LABEL: {{^}}lds_atomic_and_noret_i64_offset:
-; SI: ds_and_b64 {{.*}} offset:32
-; SI: s_endpgm
+; GCN: ds_and_b64 {{.*}} offset:32
+; GCN: s_endpgm
 define void @lds_atomic_and_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
  %result = atomicrmw and i64 addrspace(3)* %gep, i64 4 seq_cst
@ -358,16 +361,16 @@ define void @lds_atomic_and_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
 }

 ; FUNC-LABEL: {{^}}lds_atomic_or_noret_i64:
-; SI: ds_or_b64
-; SI: s_endpgm
+; GCN: ds_or_b64
+; GCN: s_endpgm
 define void @lds_atomic_or_noret_i64(i64 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw or i64 addrspace(3)* %ptr, i64 4 seq_cst
  ret void
 }

 ; FUNC-LABEL: {{^}}lds_atomic_or_noret_i64_offset:
-; SI: ds_or_b64 {{.*}} offset:32
-; SI: s_endpgm
+; GCN: ds_or_b64 {{.*}} offset:32
+; GCN: s_endpgm
 define void @lds_atomic_or_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
  %result = atomicrmw or i64 addrspace(3)* %gep, i64 4 seq_cst
@ -375,16 +378,16 @@ define void @lds_atomic_or_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
 }

 ; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i64:
-; SI: ds_xor_b64
-; SI: s_endpgm
+; GCN: ds_xor_b64
+; GCN: s_endpgm
 define void @lds_atomic_xor_noret_i64(i64 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw xor i64 addrspace(3)* %ptr, i64 4 seq_cst
  ret void
 }

 ; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i64_offset:
-; SI: ds_xor_b64 {{.*}} offset:32
-; SI: s_endpgm
+; GCN: ds_xor_b64 {{.*}} offset:32
+; GCN: s_endpgm
 define void @lds_atomic_xor_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
  %result = atomicrmw xor i64 addrspace(3)* %gep, i64 4 seq_cst
@ -399,16 +402,16 @@ define void @lds_atomic_xor_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
 ; }

 ; FUNC-LABEL: {{^}}lds_atomic_min_noret_i64:
-; SI: ds_min_i64
-; SI: s_endpgm
+; GCN: ds_min_i64
+; GCN: s_endpgm
 define void @lds_atomic_min_noret_i64(i64 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw min i64 addrspace(3)* %ptr, i64 4 seq_cst
  ret void
 }

 ; FUNC-LABEL: {{^}}lds_atomic_min_noret_i64_offset:
-; SI: ds_min_i64 {{.*}} offset:32
-; SI: s_endpgm
+; GCN: ds_min_i64 {{.*}} offset:32
+; GCN: s_endpgm
 define void @lds_atomic_min_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
  %result = atomicrmw min i64 addrspace(3)* %gep, i64 4 seq_cst
@ -416,16 +419,16 @@ define void @lds_atomic_min_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
 }

 ; FUNC-LABEL: {{^}}lds_atomic_max_noret_i64:
-; SI: ds_max_i64
-; SI: s_endpgm
+; GCN: ds_max_i64
+; GCN: s_endpgm
 define void @lds_atomic_max_noret_i64(i64 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw max i64 addrspace(3)* %ptr, i64 4 seq_cst
  ret void
 }

 ; FUNC-LABEL: {{^}}lds_atomic_max_noret_i64_offset:
-; SI: ds_max_i64 {{.*}} offset:32
-; SI: s_endpgm
+; GCN: ds_max_i64 {{.*}} offset:32
+; GCN: s_endpgm
 define void @lds_atomic_max_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
  %result = atomicrmw max i64 addrspace(3)* %gep, i64 4 seq_cst
@ -433,16 +436,16 @@ define void @lds_atomic_max_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
 }

 ; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i64:
-; SI: ds_min_u64
-; SI: s_endpgm
+; GCN: ds_min_u64
+; GCN: s_endpgm
 define void @lds_atomic_umin_noret_i64(i64 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw umin i64 addrspace(3)* %ptr, i64 4 seq_cst
  ret void
 }

 ; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i64_offset:
-; SI: ds_min_u64 {{.*}} offset:32
-; SI: s_endpgm
+; GCN: ds_min_u64 {{.*}} offset:32
+; GCN: s_endpgm
 define void @lds_atomic_umin_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
  %result = atomicrmw umin i64 addrspace(3)* %gep, i64 4 seq_cst
@ -450,16 +453,16 @@ define void @lds_atomic_umin_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
 }

 ; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i64:
-; SI: ds_max_u64
-; SI: s_endpgm
+; GCN: ds_max_u64
+; GCN: s_endpgm
 define void @lds_atomic_umax_noret_i64(i64 addrspace(3)* %ptr) nounwind {
  %result = atomicrmw umax i64 addrspace(3)* %ptr, i64 4 seq_cst
  ret void
 }

 ; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i64_offset:
-; SI: ds_max_u64 {{.*}} offset:32
-; SI: s_endpgm
+; GCN: ds_max_u64 {{.*}} offset:32
+; GCN: s_endpgm
 define void @lds_atomic_umax_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
  %result = atomicrmw umax i64 addrspace(3)* %gep, i64 4 seq_cst
--- a/llvm/test/CodeGen/R600/operand-spacing.ll
+++ b/llvm/test/CodeGen/R600/operand-spacing.ll
@ -1,13 +1,16 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=VI -check-prefix=GCN %s

 ; Make sure there isn't an extra space between the instruction name and first operands.

-; SI-LABEL: {{^}}add_f32:
+; GCN-LABEL: {{^}}add_f32:
 ; SI-DAG: s_load_dword [[SREGA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
 ; SI-DAG: s_load_dword [[SREGB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; SI: v_mov_b32_e32 [[VREGB:v[0-9]+]], [[SREGB]]
-; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], [[SREGA]], [[VREGB]]
-; SI: buffer_store_dword [[RESULT]],
+; VI-DAG: s_load_dword [[SREGA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+; VI-DAG: s_load_dword [[SREGB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
+; GCN: v_mov_b32_e32 [[VREGB:v[0-9]+]], [[SREGB]]
+; GCN: v_add_f32_e32 [[RESULT:v[0-9]+]], [[SREGA]], [[VREGB]]
+; GCN: buffer_store_dword [[RESULT]],
 define void @add_f32(float addrspace(1)* %out, float %a, float %b) {
  %result = fadd float %a, %b
  store float %result, float addrspace(1)* %out
--- a/llvm/test/CodeGen/R600/private-memory.ll
+++ b/llvm/test/CodeGen/R600/private-memory.ll
@ -1,6 +1,8 @@
 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC
 ; RUN: llc -show-mc-encoding -mattr=+promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC
 ; RUN: llc -show-mc-encoding -mattr=-promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC
+; RUN: llc -show-mc-encoding -mattr=+promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC
+; RUN: llc -show-mc-encoding -mattr=-promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC

 declare i32 @llvm.r600.read.tidig.x() nounwind readnone

--- a/llvm/test/CodeGen/R600/schedule-kernel-arg-loads.ll
+++ b/llvm/test/CodeGen/R600/schedule-kernel-arg-loads.ll
@ -1,10 +1,18 @@
 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=VI %s

 ; FUNC-LABEL: {{^}}cluster_arg_loads:
 ; SI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x9
 ; SI-NEXT: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
 ; SI-NEXT: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
 ; SI-NEXT: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0xe
+; VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x24
+; VI-NEXT: s_nop 0
+; VI-NEXT: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+; VI-NEXT: s_nop 0
+; VI-NEXT: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0x34
+; VI-NEXT: s_nop 0
+; VI-NEXT: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0x38
 define void @cluster_arg_loads(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 %x, i32 %y) nounwind {
  store i32 %x, i32 addrspace(1)* %out0, align 4
  store i32 %y, i32 addrspace(1)* %out1, align 4
--- a/llvm/test/CodeGen/R600/scratch-buffer.ll
+++ b/llvm/test/CodeGen/R600/scratch-buffer.ll
@ -1,4 +1,5 @@
 ; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck %s

 ; When a frame index offset is more than 12-bits, make sure we don't store
 ; it in mubuf's offset field.
--- a/llvm/test/CodeGen/R600/sdivrem64.ll
+++ b/llvm/test/CodeGen/R600/sdivrem64.ll
@ -1,4 +1,5 @@
-;RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=FUNC %s
+;RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC %s
+;RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefix=VI --check-prefix=GCN --check-prefix=FUNC %s
 ;RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s

 ;FUNC-LABEL: {{^}}test_sdiv:
@ -35,39 +36,40 @@
 ;EG: BFE_UINT
 ;EG: BFE_UINT

-;SI: v_bfe_u32
-;SI: v_bfe_u32
-;SI: v_bfe_u32
-;SI: v_bfe_u32
-;SI: v_bfe_u32
-;SI: v_bfe_u32
-;SI: v_bfe_u32
-;SI: v_bfe_u32
-;SI: v_bfe_u32
-;SI: v_bfe_u32
-;SI: v_bfe_u32
-;SI: v_bfe_u32
-;SI: v_bfe_u32
-;SI: v_bfe_u32
-;SI: v_bfe_u32
-;SI: v_bfe_u32
-;SI: v_bfe_u32
-;SI: v_bfe_u32
-;SI: v_bfe_u32
-;SI: v_bfe_u32
-;SI: v_bfe_u32
-;SI: v_bfe_u32
-;SI: v_bfe_u32
-;SI: v_bfe_u32
-;SI: v_bfe_u32
-;SI: v_bfe_u32
-;SI: v_bfe_u32
-;SI: v_bfe_u32
-;SI: v_bfe_u32
-;SI: v_bfe_u32
-;SI-NOT: v_mad_f32
-;SI-NOT: v_lshr_64
-;SI: s_endpgm
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN-NOT: v_mad_f32
+;SI-NOT: v_lshr_b64
+;VI-NOT: v_lshrrev_b64
+;GCN: s_endpgm
 define void @test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
  %result = sdiv i64 %x, %y
  store i64 %result, i64 addrspace(1)* %out
@ -108,39 +110,40 @@ define void @test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
 ;EG: BFE_UINT
 ;EG: AND_INT {{.*}}, 1,

-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI-NOT: v_mad_f32
-;SI-NOT: v_lshr_64
-;SI: s_endpgm
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN-NOT: v_mad_f32
+;SI-NOT: v_lshr_b64
+;VI-NOT: v_lshrrev_b64
+;GCN: s_endpgm
 define void @test_srem(i64 addrspace(1)* %out, i64 %x, i64 %y) {
  %result = urem i64 %x, %y
  store i64 %result, i64 addrspace(1)* %out
@ -151,10 +154,11 @@ define void @test_srem(i64 addrspace(1)* %out, i64 %x, i64 %y) {
 ;EG: RECIP_UINT
 ;EG-NOT: BFE_UINT

-;SI-NOT: s_bfe_u32
-;SI-NOT: v_mad_f32
-;SI-NOT: v_lshr_64
-;SI: s_endpgm
+;GCN-NOT: s_bfe_u32
+;GCN-NOT: v_mad_f32
+;SI-NOT: v_lshr_b64
+;VI-NOT: v_lshrrev_b64
+;GCN: s_endpgm
 define void @test_sdiv3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
  %1 = ashr i64 %x, 33
  %2 = ashr i64 %y, 33
@ -167,10 +171,11 @@ define void @test_sdiv3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
 ;EG: RECIP_UINT
 ;EG-NOT: BFE_UINT

-;SI-NOT: s_bfe_u32
-;SI-NOT: v_mad_f32
-;SI-NOT: v_lshr_64
-;SI: s_endpgm
+;GCN-NOT: s_bfe_u32
+;GCN-NOT: v_mad_f32
+;SI-NOT: v_lshr_b64
+;VI-NOT: v_lshrrev_b64
+;GCN: s_endpgm
 define void @test_srem3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
  %1 = ashr i64 %x, 33
  %2 = ashr i64 %y, 33
@ -186,10 +191,11 @@ define void @test_srem3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
 ;EG-NOT: RECIP_UINT
 ;EG-NOT: BFE_UINT

-;SI-NOT: s_bfe_u32
-;SI: v_mad_f32
-;SI-NOT: v_lshr_64
-;SI: s_endpgm
+;GCN-NOT: s_bfe_u32
+;GCN: v_mad_f32
+;SI-NOT: v_lshr_b64
+;VI-NOT: v_lshrrev_b64
+;GCN: s_endpgm
 define void @test_sdiv2464(i64 addrspace(1)* %out, i64 %x, i64 %y) {
  %1 = ashr i64 %x, 40
  %2 = ashr i64 %y, 40
@ -205,10 +211,11 @@ define void @test_sdiv2464(i64 addrspace(1)* %out, i64 %x, i64 %y) {
 ;EG-NOT: RECIP_UINT
 ;EG-NOT: BFE_UINT

-;SI-NOT: s_bfe_u32
-;SI: v_mad_f32
-;SI-NOT: v_lshr_64
-;SI: s_endpgm
+;GCN-NOT: s_bfe_u32
+;GCN: v_mad_f32
+;SI-NOT: v_lshr_b64
+;VI-NOT: v_lshrrev_b64
+;GCN: s_endpgm
 define void @test_srem2464(i64 addrspace(1)* %out, i64 %x, i64 %y) {
  %1 = ashr i64 %x, 40
  %2 = ashr i64 %y, 40
--- a/llvm/test/CodeGen/R600/setcc-opt.ll
+++ b/llvm/test/CodeGen/R600/setcc-opt.ll
@ -1,12 +1,13 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s

 ; FUNC-LABEL: {{^}}sext_bool_icmp_eq_0:
-; SI-NOT: v_cmp
-; SI: v_cmp_ne_i32_e32 vcc,
-; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
-; SI-NEXT:buffer_store_byte [[RESULT]]
-; SI-NEXT: s_endpgm
+; GCN-NOT: v_cmp
+; GCN: v_cmp_ne_i32_e32 vcc,
+; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
+; GCN-NEXT:buffer_store_byte [[RESULT]]
+; GCN-NEXT: s_endpgm

 ; EG: SETNE_INT * [[CMP:T[0-9]+]].[[CMPCHAN:[XYZW]]], KC0[2].Z, KC0[2].W
 ; EG: AND_INT T{{[0-9]+.[XYZW]}}, PS, 1
@ -19,11 +20,11 @@ define void @sext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
 }

 ; FUNC-LABEL: {{^}}sext_bool_icmp_ne_0:
-; SI-NOT: v_cmp
-; SI: v_cmp_ne_i32_e32 vcc,
-; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
-; SI-NEXT: buffer_store_byte [[RESULT]]
-; SI-NEXT: s_endpgm
+; GCN-NOT: v_cmp
+; GCN: v_cmp_ne_i32_e32 vcc,
+; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
+; GCN-NEXT: buffer_store_byte [[RESULT]]
+; GCN-NEXT: s_endpgm

 ; EG: SETNE_INT * [[CMP:T[0-9]+]].[[CMPCHAN:[XYZW]]], KC0[2].Z, KC0[2].W
 ; EG: AND_INT T{{[0-9]+.[XYZW]}}, PS, 1
@ -37,12 +38,12 @@ define void @sext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind

 ; This really folds away to false
 ; FUNC-LABEL: {{^}}sext_bool_icmp_eq_1:
-; SI: v_cmp_eq_i32_e32 vcc,
-; SI-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, -1, vcc
-; SI-NEXT: v_cmp_eq_i32_e64 {{s\[[0-9]+:[0-9]+\]}}, [[TMP]], 1{{$}}
-; SI-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, 1,
-; SI-NEXT: buffer_store_byte [[TMP]]
-; SI-NEXT: s_endpgm
+; GCN: v_cmp_eq_i32_e32 vcc,
+; GCN-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, -1, vcc
+; GCN-NEXT: v_cmp_eq_i32_e64 {{s\[[0-9]+:[0-9]+\]}}, [[TMP]], 1{{$}}
+; GCN-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, 1,
+; GCN-NEXT: buffer_store_byte [[TMP]]
+; GCN-NEXT: s_endpgm
 define void @sext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
  %icmp0 = icmp eq i32 %a, %b
  %ext = sext i1 %icmp0 to i32
@ -53,12 +54,12 @@ define void @sext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind

 ; This really folds away to true
 ; FUNC-LABEL: {{^}}sext_bool_icmp_ne_1:
-; SI: v_cmp_ne_i32_e32 vcc,
-; SI-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, -1, vcc
-; SI-NEXT: v_cmp_ne_i32_e64 {{s\[[0-9]+:[0-9]+\]}}, [[TMP]], 1{{$}}
-; SI-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, 1,
-; SI-NEXT: buffer_store_byte [[TMP]]
-; SI-NEXT: s_endpgm
+; GCN: v_cmp_ne_i32_e32 vcc,
+; GCN-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, -1, vcc
+; GCN-NEXT: v_cmp_ne_i32_e64 {{s\[[0-9]+:[0-9]+\]}}, [[TMP]], 1{{$}}
+; GCN-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, 1,
+; GCN-NEXT: buffer_store_byte [[TMP]]
+; GCN-NEXT: s_endpgm
 define void @sext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
  %icmp0 = icmp ne i32 %a, %b
  %ext = sext i1 %icmp0 to i32
@ -68,11 +69,11 @@ define void @sext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
 }

 ; FUNC-LABEL: {{^}}zext_bool_icmp_eq_0:
-; SI-NOT: v_cmp
-; SI: v_cmp_ne_i32_e32 vcc,
-; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
-; SI-NEXT: buffer_store_byte [[RESULT]]
-; SI-NEXT: s_endpgm
+; GCN-NOT: v_cmp
+; GCN: v_cmp_ne_i32_e32 vcc,
+; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
+; GCN-NEXT: buffer_store_byte [[RESULT]]
+; GCN-NEXT: s_endpgm
 define void @zext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
  %icmp0 = icmp eq i32 %a, %b
  %ext = zext i1 %icmp0 to i32
@ -82,11 +83,11 @@ define void @zext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
 }

 ; FUNC-LABEL: {{^}}zext_bool_icmp_ne_0:
-; SI-NOT: v_cmp
-; SI: v_cmp_ne_i32_e32 vcc,
-; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
-; SI-NEXT: buffer_store_byte [[RESULT]]
-; SI-NEXT: s_endpgm
+; GCN-NOT: v_cmp
+; GCN: v_cmp_ne_i32_e32 vcc,
+; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
+; GCN-NEXT: buffer_store_byte [[RESULT]]
+; GCN-NEXT: s_endpgm
 define void @zext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
  %icmp0 = icmp ne i32 %a, %b
  %ext = zext i1 %icmp0 to i32
@ -96,11 +97,11 @@ define void @zext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
 }

 ; FUNC-LABEL: {{^}}zext_bool_icmp_eq_1:
-; SI-NOT: v_cmp
-; SI: v_cmp_eq_i32_e32 vcc,
-; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
-; SI-NEXT: buffer_store_byte [[RESULT]]
-; SI-NEXT: s_endpgm
+; GCN-NOT: v_cmp
+; GCN: v_cmp_eq_i32_e32 vcc,
+; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
+; GCN-NEXT: buffer_store_byte [[RESULT]]
+; GCN-NEXT: s_endpgm
 define void @zext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
  %icmp0 = icmp eq i32 %a, %b
  %ext = zext i1 %icmp0 to i32
@ -110,10 +111,10 @@ define void @zext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
 }

 ; FUNC-LABEL: {{^}}zext_bool_icmp_ne_1:
-; SI-NOT: v_cmp
-; SI: v_cmp_eq_i32_e32 vcc,
-; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
-; SI-NEXT: buffer_store_byte [[RESULT]]
+; GCN-NOT: v_cmp
+; GCN: v_cmp_eq_i32_e32 vcc,
+; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
+; GCN-NEXT: buffer_store_byte [[RESULT]]
 define void @zext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
  %icmp0 = icmp ne i32 %a, %b
  %ext = zext i1 %icmp0 to i32
@ -125,11 +126,13 @@ define void @zext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
 ; FUNC-LABEL: {{^}}sext_bool_icmp_ne_k:
 ; SI-DAG: s_load_dword [[A:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
 ; SI-DAG: s_load_dword [[B:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; SI: v_mov_b32_e32 [[VB:v[0-9]+]], [[B]]
-; SI: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[VB]], 2{{$}}
-; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP]]
-; SI: buffer_store_byte
-; SI: s_endpgm
+; VI-DAG: s_load_dword [[A:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+; VI-DAG: s_load_dword [[B:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
+; GCN: v_mov_b32_e32 [[VB:v[0-9]+]], [[B]]
+; GCN: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[VB]], 2{{$}}
+; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP]]
+; GCN: buffer_store_byte
+; GCN: s_endpgm
 define void @sext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
  %icmp0 = icmp ne i32 %a, %b
  %ext = sext i1 %icmp0 to i32
@ -139,12 +142,12 @@ define void @sext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
 }

 ; FUNC-LABEL: {{^}}cmp_zext_k_i8max:
-; SI: buffer_load_ubyte [[B:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44
-; SI: v_mov_b32_e32 [[K255:v[0-9]+]], 0xff{{$}}
-; SI: v_cmp_ne_i32_e32 vcc, [[B]], [[K255]]
-; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
-; SI-NEXT: buffer_store_byte [[RESULT]]
-; SI: s_endpgm
+; GCN: buffer_load_ubyte [[B:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44
+; GCN: v_mov_b32_e32 [[K255:v[0-9]+]], 0xff{{$}}
+; GCN: v_cmp_ne_i32_e32 vcc, [[B]], [[K255]]
+; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
+; GCN-NEXT: buffer_store_byte [[RESULT]]
+; GCN: s_endpgm
 define void @cmp_zext_k_i8max(i1 addrspace(1)* %out, i8 %b) nounwind {
  %b.ext = zext i8 %b to i32
  %icmp0 = icmp ne i32 %b.ext, 255
@ -153,11 +156,11 @@ define void @cmp_zext_k_i8max(i1 addrspace(1)* %out, i8 %b) nounwind {
 }

 ; FUNC-LABEL: {{^}}cmp_sext_k_neg1:
-; SI: buffer_load_sbyte [[B:v[0-9]+]]
-; SI: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[B]], -1{{$}}
-; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP]]
-; SI-NEXT: buffer_store_byte [[RESULT]]
-; SI: s_endpgm
+; GCN: buffer_load_sbyte [[B:v[0-9]+]]
+; GCN: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[B]], -1{{$}}
+; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP]]
+; GCN-NEXT: buffer_store_byte [[RESULT]]
+; GCN: s_endpgm
 define void @cmp_sext_k_neg1(i1 addrspace(1)* %out, i8 addrspace(1)* %b.ptr) nounwind {
  %b = load i8 addrspace(1)* %b.ptr
  %b.ext = sext i8 %b to i32
@ -167,11 +170,11 @@ define void @cmp_sext_k_neg1(i1 addrspace(1)* %out, i8 addrspace(1)* %b.ptr) nou
 }

 ; FUNC-LABEL: {{^}}cmp_sext_k_neg1_i8_sext_arg:
-; SI: s_load_dword [[B:s[0-9]+]]
-; SI: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[B]], -1{{$}}
-; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP]]
-; SI-NEXT: buffer_store_byte [[RESULT]]
-; SI: s_endpgm
+; GCN: s_load_dword [[B:s[0-9]+]]
+; GCN: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[B]], -1{{$}}
+; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP]]
+; GCN-NEXT: buffer_store_byte [[RESULT]]
+; GCN: s_endpgm
 define void @cmp_sext_k_neg1_i8_sext_arg(i1 addrspace(1)* %out, i8 signext %b) nounwind {
  %b.ext = sext i8 %b to i32
  %icmp0 = icmp ne i32 %b.ext, -1
@ -184,12 +187,12 @@ define void @cmp_sext_k_neg1_i8_sext_arg(i1 addrspace(1)* %out, i8 signext %b) n
 ; Should do a buffer_load_sbyte and compare with -1

 ; FUNC-LABEL: {{^}}cmp_sext_k_neg1_i8_arg:
-; SI-DAG: buffer_load_ubyte [[B:v[0-9]+]]
-; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xff{{$}}
-; SI: v_cmp_ne_i32_e32 vcc, [[B]], [[K]]{{$}}
-; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
-; SI-NEXT: buffer_store_byte [[RESULT]]
-; SI: s_endpgm
+; GCN-DAG: buffer_load_ubyte [[B:v[0-9]+]]
+; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xff{{$}}
+; GCN: v_cmp_ne_i32_e32 vcc, [[B]], [[K]]{{$}}
+; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
+; GCN-NEXT: buffer_store_byte [[RESULT]]
+; GCN: s_endpgm
 define void @cmp_sext_k_neg1_i8_arg(i1 addrspace(1)* %out, i8 %b) nounwind {
  %b.ext = sext i8 %b to i32
  %icmp0 = icmp ne i32 %b.ext, -1
@ -198,9 +201,9 @@ define void @cmp_sext_k_neg1_i8_arg(i1 addrspace(1)* %out, i8 %b) nounwind {
 }

 ; FUNC-LABEL: {{^}}cmp_zext_k_neg1:
-; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
-; SI: buffer_store_byte [[RESULT]]
-; SI: s_endpgm
+; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
+; GCN: buffer_store_byte [[RESULT]]
+; GCN: s_endpgm
 define void @cmp_zext_k_neg1(i1 addrspace(1)* %out, i8 %b) nounwind {
  %b.ext = zext i8 %b to i32
  %icmp0 = icmp ne i32 %b.ext, -1
@ -209,9 +212,9 @@ define void @cmp_zext_k_neg1(i1 addrspace(1)* %out, i8 %b) nounwind {
 }

 ; FUNC-LABEL: {{^}}zext_bool_icmp_ne_k:
-; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
-; SI: buffer_store_byte [[RESULT]]
-; SI-NEXT: s_endpgm
+; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
+; GCN: buffer_store_byte [[RESULT]]
+; GCN-NEXT: s_endpgm
 define void @zext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
  %icmp0 = icmp ne i32 %a, %b
  %ext = zext i1 %icmp0 to i32
@ -221,9 +224,9 @@ define void @zext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
 }

 ; FUNC-LABEL: {{^}}zext_bool_icmp_eq_k:
-; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
-; SI: buffer_store_byte [[RESULT]]
-; SI-NEXT: s_endpgm
+; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
+; GCN: buffer_store_byte [[RESULT]]
+; GCN-NEXT: s_endpgm
 define void @zext_bool_icmp_eq_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
  %icmp0 = icmp ne i32 %a, %b
  %ext = zext i1 %icmp0 to i32
--- a/llvm/test/CodeGen/R600/smrd.ll
+++ b/llvm/test/CodeGen/R600/smrd.ll
@ -1,8 +1,10 @@
-; RUN: llc < %s -march=amdgcn -mcpu=SI -show-mc-encoding -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=amdgcn -mcpu=SI -show-mc-encoding -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=GCN %s
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs | FileCheck --check-prefix=VI --check-prefix=GCN %s

 ; SMRD load with an immediate offset.
-; CHECK-LABEL: {{^}}smrd0:
-; CHECK: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01
+; GCN-LABEL: {{^}}smrd0:
+; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01
+; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4
 define void @smrd0(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
 entry:
  %0 = getelementptr i32 addrspace(2)* %ptr, i64 1
@ -12,8 +14,9 @@ entry:
 }

 ; SMRD load with the largest possible immediate offset.
-; CHECK-LABEL: {{^}}smrd1:
-; CHECK: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
+; GCN-LABEL: {{^}}smrd1:
+; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
+; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc
 define void @smrd1(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
 entry:
  %0 = getelementptr i32 addrspace(2)* %ptr, i64 255
@ -23,10 +26,11 @@ entry:
 }

 ; SMRD load with an offset greater than the largest possible immediate.
-; CHECK-LABEL: {{^}}smrd2:
-; CHECK: s_movk_i32 s[[OFFSET:[0-9]]], 0x400
-; CHECK: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
-; CHECK: s_endpgm
+; GCN-LABEL: {{^}}smrd2:
+; SI: s_movk_i32 s[[OFFSET:[0-9]]], 0x400
+; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
+; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400
+; GCN: s_endpgm
 define void @smrd2(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
 entry:
  %0 = getelementptr i32 addrspace(2)* %ptr, i64 256
@ -36,17 +40,18 @@ entry:
 }

 ; SMRD load with a 64-bit offset
-; CHECK-LABEL: {{^}}smrd3:
+; GCN-LABEL: {{^}}smrd3:
 ; FIXME: There are too many copies here because we don't fold immediates
 ;        through REG_SEQUENCE
-; CHECK: s_mov_b32 s[[SLO:[0-9]+]], 0 ;
-; CHECK: s_mov_b32 s[[SHI:[0-9]+]], 4
-; CHECK: s_mov_b32 s[[SSLO:[0-9]+]], s[[SLO]]
-; CHECK-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SSLO]]
-; CHECK-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
+; SI: s_mov_b32 s[[SLO:[0-9]+]], 0 ;
+; SI: s_mov_b32 s[[SHI:[0-9]+]], 4
+; SI: s_mov_b32 s[[SSLO:[0-9]+]], s[[SLO]]
+; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SSLO]]
+; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
 ; FIXME: We should be able to use s_load_dword here
-; CHECK: buffer_load_dword v{{[0-9]+}}, v{{\[}}[[VLO]]:[[VHI]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64
-; CHECK: s_endpgm
+; SI: buffer_load_dword v{{[0-9]+}}, v{{\[}}[[VLO]]:[[VHI]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64
+; TODO: Add VI checks
+; GCN: s_endpgm
 define void @smrd3(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
 entry:
  %0 = getelementptr i32 addrspace(2)* %ptr, i64 4294967296 ; 2 ^ 32
@ -56,8 +61,9 @@ entry:
 }

 ; SMRD load using the load.const intrinsic with an immediate offset
-; CHECK-LABEL: {{^}}smrd_load_const0:
-; CHECK: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 ; encoding: [0x04
+; GCN-LABEL: {{^}}smrd_load_const0:
+; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 ; encoding: [0x04
+; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x10
 define void @smrd_load_const0(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
 main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
@ -69,8 +75,9 @@ main_body:

 ; SMRD load using the load.const intrinsic with the largest possible immediate
 ; offset.
-; CHECK-LABEL: {{^}}smrd_load_const1:
-; CHECK: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
+; GCN-LABEL: {{^}}smrd_load_const1:
+; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
+; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc
 define void @smrd_load_const1(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
 main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
@ -82,9 +89,10 @@ main_body:
 ; SMRD load using the load.const intrinsic with an offset greater than the
 ; largets possible immediate.
 ; immediate offset.
-; CHECK-LABEL: {{^}}smrd_load_const2:
-; CHECK: s_movk_i32 s[[OFFSET:[0-9]]], 0x400
-; CHECK: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
+; GCN-LABEL: {{^}}smrd_load_const2:
+; SI: s_movk_i32 s[[OFFSET:[0-9]]], 0x400
+; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
+; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400
 define void @smrd_load_const2(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
 main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
--- a/llvm/test/CodeGen/R600/srl.ll
+++ b/llvm/test/CodeGen/R600/srl.ll
@ -1,8 +1,10 @@
 ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s

 ; FUNC-LABEL: {{^}}lshr_i32:
 ; SI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 ; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 define void @lshr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
  %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
@ -17,6 +19,9 @@ define void @lshr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
 ; SI: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 ; SI: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}

+; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+
 ; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 define void @lshr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
@ -34,6 +39,11 @@ define void @lshr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %i
 ; SI: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 ; SI: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}

+; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+
 ; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
@ -49,6 +59,7 @@ define void @lshr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %i

 ; FUNC-LABEL: {{^}}lshr_i64:
 ; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}

 ; EG: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]
 ; EG: LSHL {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}}
@ -74,6 +85,9 @@ define void @lshr_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
 ; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
 ; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}

+; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
+; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
+
 ; EG-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
 ; EG-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
 ; EG-DAG: LSHL {{\*? *}}[[COMPSHA]]
@ -111,6 +125,11 @@ define void @lshr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %i
 ; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
 ; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}

+; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
+; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
+; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
+; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
+
 ; EG-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
 ; EG-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
 ; EG-DAG: SUB_INT {{\*? *}}[[COMPSHC:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHC:T[0-9]+\.[XYZW]]]
--- a/llvm/test/CodeGen/R600/udivrem64.ll
+++ b/llvm/test/CodeGen/R600/udivrem64.ll
@ -1,5 +1,5 @@
-;RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=FUNC %s
-;RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=FUNC %s
+;RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC %s
+;RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefix=VI --check-prefix=GCN --check-prefix=FUNC %s
 ;RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s

 ;FUNC-LABEL: {{^}}test_udiv:
@ -36,39 +36,40 @@
 ;EG: BFE_UINT
 ;EG: BFE_UINT

-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI-NOT: v_mad_f32
-;SI-NOT: v_lshr_64
-;SI: s_endpgm
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN-NOT: v_mad_f32
+;SI-NOT: v_lshr_b64
+;VI-NOT: v_lshrrev_b64
+;GCN: s_endpgm
 define void @test_udiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
  %result = udiv i64 %x, %y
  store i64 %result, i64 addrspace(1)* %out
@ -109,39 +110,40 @@ define void @test_udiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
 ;EG: BFE_UINT
 ;EG: AND_INT {{.*}}, 1,

-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI: s_bfe_u32
-;SI-NOT: v_mad_f32
-;SI-NOT: v_lshr_64
-;SI: s_endpgm
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN-NOT: v_mad_f32
+;SI-NOT: v_lshr_b64
+;VI-NOT: v_lshrrev_b64
+;GCN: s_endpgm
 define void @test_urem(i64 addrspace(1)* %out, i64 %x, i64 %y) {
  %result = urem i64 %x, %y
  store i64 %result, i64 addrspace(1)* %out
@ -152,10 +154,11 @@ define void @test_urem(i64 addrspace(1)* %out, i64 %x, i64 %y) {
 ;EG: RECIP_UINT
 ;EG-NOT: BFE_UINT

-;SI-NOT: s_bfe_u32
-;SI-NOT: v_mad_f32
-;SI-NOT: v_lshr_64
-;SI: s_endpgm
+;GCN-NOT: s_bfe_u32
+;GCN-NOT: v_mad_f32
+;SI-NOT: v_lshr_b64
+;VI-NOT: v_lshrrev_b64
+;GCN: s_endpgm
 define void @test_udiv3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
  %1 = lshr i64 %x, 33
  %2 = lshr i64 %y, 33
@ -168,10 +171,11 @@ define void @test_udiv3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
 ;EG: RECIP_UINT
 ;EG-NOT: BFE_UINT

-;SI-NOT: s_bfe_u32
-;SI-NOT: v_mad_f32
-;SI-NOT: v_lshr_64
-;SI: s_endpgm
+;GCN-NOT: s_bfe_u32
+;GCN-NOT: v_mad_f32
+;SI-NOT: v_lshr_b64
+;VI-NOT: v_lshrrev_b64
+;GCN: s_endpgm
 define void @test_urem3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
  %1 = lshr i64 %x, 33
  %2 = lshr i64 %y, 33
@ -187,9 +191,10 @@ define void @test_urem3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
 ;EG-NOT: RECIP_UINT
 ;EG-NOT: BFE_UINT

-;SI-NOT: v_lshr_64
-;SI: v_mad_f32
-;SI: s_endpgm
+;SI-NOT: v_lshr_b64
+;VI-NOT: v_lshrrev_b64
+;GCN: v_mad_f32
+;GCN: s_endpgm
 define void @test_udiv2464(i64 addrspace(1)* %out, i64 %x, i64 %y) {
  %1 = lshr i64 %x, 40
  %2 = lshr i64 %y, 40
@ -205,9 +210,10 @@ define void @test_udiv2464(i64 addrspace(1)* %out, i64 %x, i64 %y) {
 ;EG-NOT: RECIP_UINT
 ;EG-NOT: BFE_UINT

-;SI-NOT: v_lshr_64
-;SI: v_mad_f32
-;SI: s_endpgm
+;SI-NOT: v_lshr_b64
+;VI-NOT: v_lshrrev_b64
+;GCN: v_mad_f32
+;GCN: s_endpgm
 define void @test_urem2464(i64 addrspace(1)* %out, i64 %x, i64 %y) {
  %1 = lshr i64 %x, 40
  %2 = lshr i64 %y, 40
--- a/llvm/test/CodeGen/R600/use-sgpr-multiple-times.ll
+++ b/llvm/test/CodeGen/R600/use-sgpr-multiple-times.ll
@ -1,80 +1,87 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s

 declare float @llvm.fma.f32(float, float, float) #1
 declare float @llvm.fmuladd.f32(float, float, float) #1
 declare i32 @llvm.AMDGPU.imad24(i32, i32, i32) #1


-; SI-LABEL: {{^}}test_sgpr_use_twice_binop:
-; SI: s_load_dword [[SGPR:s[0-9]+]],
-; SI: v_add_f32_e64 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]]
-; SI: buffer_store_dword [[RESULT]]
+; GCN-LABEL: {{^}}test_sgpr_use_twice_binop:
+; GCN: s_load_dword [[SGPR:s[0-9]+]],
+; GCN: v_add_f32_e64 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]]
+; GCN: buffer_store_dword [[RESULT]]
 define void @test_sgpr_use_twice_binop(float addrspace(1)* %out, float %a) #0 {
  %dbl = fadd float %a, %a
  store float %dbl, float addrspace(1)* %out, align 4
  ret void
 }

-; SI-LABEL: {{^}}test_sgpr_use_three_ternary_op:
-; SI: s_load_dword [[SGPR:s[0-9]+]],
-; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], [[SGPR]]
-; SI: buffer_store_dword [[RESULT]]
+; GCN-LABEL: {{^}}test_sgpr_use_three_ternary_op:
+; GCN: s_load_dword [[SGPR:s[0-9]+]],
+; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], [[SGPR]]
+; GCN: buffer_store_dword [[RESULT]]
 define void @test_sgpr_use_three_ternary_op(float addrspace(1)* %out, float %a) #0 {
  %fma = call float @llvm.fma.f32(float %a, float %a, float %a) #1
  store float %fma, float addrspace(1)* %out, align 4
  ret void
 }

-; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_b:
+; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_b:
 ; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
 ; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; SI: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
-; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[SGPR0]], [[VGPR1]]
-; SI: buffer_store_dword [[RESULT]]
+; VI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+; VI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
+; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
+; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[SGPR0]], [[VGPR1]]
+; GCN: buffer_store_dword [[RESULT]]
 define void @test_sgpr_use_twice_ternary_op_a_a_b(float addrspace(1)* %out, float %a, float %b) #0 {
  %fma = call float @llvm.fma.f32(float %a, float %a, float %b) #1
  store float %fma, float addrspace(1)* %out, align 4
  ret void
 }

-; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_b_a:
+; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_b_a:
 ; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
 ; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; SI: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
-; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[VGPR1]], [[SGPR0]], [[SGPR0]]
-; SI: buffer_store_dword [[RESULT]]
+; VI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+; VI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
+; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
+; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[VGPR1]], [[SGPR0]], [[SGPR0]]
+; GCN: buffer_store_dword [[RESULT]]
 define void @test_sgpr_use_twice_ternary_op_a_b_a(float addrspace(1)* %out, float %a, float %b) #0 {
  %fma = call float @llvm.fma.f32(float %a, float %b, float %a) #1
  store float %fma, float addrspace(1)* %out, align 4
  ret void
 }

-; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_b_a_a:
+; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_b_a_a:
 ; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
 ; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; SI: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
-; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[VGPR1]], [[SGPR0]]
-; SI: buffer_store_dword [[RESULT]]
+; VI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+; VI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
+; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
+; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[VGPR1]], [[SGPR0]]
+; GCN: buffer_store_dword [[RESULT]]
 define void @test_sgpr_use_twice_ternary_op_b_a_a(float addrspace(1)* %out, float %a, float %b) #0 {
  %fma = call float @llvm.fma.f32(float %b, float %a, float %a) #1
  store float %fma, float addrspace(1)* %out, align 4
  ret void
 }

-; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_imm:
-; SI: s_load_dword [[SGPR:s[0-9]+]]
-; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], 2.0
-; SI: buffer_store_dword [[RESULT]]
+; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_imm:
+; GCN: s_load_dword [[SGPR:s[0-9]+]]
+; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], 2.0
+; GCN: buffer_store_dword [[RESULT]]
 define void @test_sgpr_use_twice_ternary_op_a_a_imm(float addrspace(1)* %out, float %a) #0 {
  %fma = call float @llvm.fma.f32(float %a, float %a, float 2.0) #1
  store float %fma, float addrspace(1)* %out, align 4
  ret void
 }

-; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_imm_a:
-; SI: s_load_dword [[SGPR:s[0-9]+]]
-; SI: v_fma_f32 [[RESULT:v[0-9]+]], 2.0, [[SGPR]], [[SGPR]]
-; SI: buffer_store_dword [[RESULT]]
+; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_imm_a:
+; GCN: s_load_dword [[SGPR:s[0-9]+]]
+; GCN: v_fma_f32 [[RESULT:v[0-9]+]], 2.0, [[SGPR]], [[SGPR]]
+; GCN: buffer_store_dword [[RESULT]]
 define void @test_sgpr_use_twice_ternary_op_a_imm_a(float addrspace(1)* %out, float %a) #0 {
  %fma = call float @llvm.fma.f32(float %a, float 2.0, float %a) #1
  store float %fma, float addrspace(1)* %out, align 4
@ -82,10 +89,10 @@ define void @test_sgpr_use_twice_ternary_op_a_imm_a(float addrspace(1)* %out, fl
 }

 ; Don't use fma since fma c, x, y is canonicalized to fma x, c, y
-; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_imm_a_a:
-; SI: s_load_dword [[SGPR:s[0-9]+]]
-; SI: v_mad_i32_i24 [[RESULT:v[0-9]+]], 2, [[SGPR]], [[SGPR]]
-; SI: buffer_store_dword [[RESULT]]
+; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_imm_a_a:
+; GCN: s_load_dword [[SGPR:s[0-9]+]]
+; GCN: v_mad_i32_i24 [[RESULT:v[0-9]+]], 2, [[SGPR]], [[SGPR]]
+; GCN: buffer_store_dword [[RESULT]]
 define void @test_sgpr_use_twice_ternary_op_imm_a_a(i32 addrspace(1)* %out, i32 %a) #0 {
  %fma = call i32 @llvm.AMDGPU.imad24(i32 2, i32 %a, i32 %a) #1
  store i32 %fma, i32 addrspace(1)* %out, align 4
--- a/llvm/test/CodeGen/R600/work-item-intrinsics.ll
+++ b/llvm/test/CodeGen/R600/work-item-intrinsics.ll
@ -1,14 +1,15 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s


 ; FUNC-LABEL: {{^}}ngroups_x:
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 ; EG: MOV [[VAL]], KC0[0].X

-; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0
-; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI: buffer_store_dword [[VVAL]]
+; GCN: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0
+; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN: buffer_store_dword [[VVAL]]
 define void @ngroups_x (i32 addrspace(1)* %out) {
 entry:
  %0 = call i32 @llvm.r600.read.ngroups.x() #0
@ -21,8 +22,9 @@ entry:
 ; EG: MOV [[VAL]], KC0[0].Y

 ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1
-; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI: buffer_store_dword [[VVAL]]
+; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
+; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN: buffer_store_dword [[VVAL]]
 define void @ngroups_y (i32 addrspace(1)* %out) {
 entry:
  %0 = call i32 @llvm.r600.read.ngroups.y() #0
@ -35,8 +37,9 @@ entry:
 ; EG: MOV [[VAL]], KC0[0].Z

 ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2
-; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI: buffer_store_dword [[VVAL]]
+; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
+; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN: buffer_store_dword [[VVAL]]
 define void @ngroups_z (i32 addrspace(1)* %out) {
 entry:
  %0 = call i32 @llvm.r600.read.ngroups.z() #0
@ -49,8 +52,9 @@ entry:
 ; EG: MOV [[VAL]], KC0[0].W

 ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x3
-; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI: buffer_store_dword [[VVAL]]
+; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xc
+; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN: buffer_store_dword [[VVAL]]
 define void @global_size_x (i32 addrspace(1)* %out) {
 entry:
  %0 = call i32 @llvm.r600.read.global.size.x() #0
@ -63,8 +67,9 @@ entry:
 ; EG: MOV [[VAL]], KC0[1].X

 ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
-; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI: buffer_store_dword [[VVAL]]
+; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x10
+; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN: buffer_store_dword [[VVAL]]
 define void @global_size_y (i32 addrspace(1)* %out) {
 entry:
  %0 = call i32 @llvm.r600.read.global.size.y() #0
@ -77,8 +82,9 @@ entry:
 ; EG: MOV [[VAL]], KC0[1].Y

 ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x5
-; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI: buffer_store_dword [[VVAL]]
+; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x14
+; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN: buffer_store_dword [[VVAL]]
 define void @global_size_z (i32 addrspace(1)* %out) {
 entry:
  %0 = call i32 @llvm.r600.read.global.size.z() #0
@ -91,8 +97,9 @@ entry:
 ; EG: MOV [[VAL]], KC0[1].Z

 ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6
-; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI: buffer_store_dword [[VVAL]]
+; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18
+; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN: buffer_store_dword [[VVAL]]
 define void @local_size_x (i32 addrspace(1)* %out) {
 entry:
  %0 = call i32 @llvm.r600.read.local.size.x() #0
@ -105,8 +112,9 @@ entry:
 ; EG: MOV [[VAL]], KC0[1].W

 ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7
-; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI: buffer_store_dword [[VVAL]]
+; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c
+; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN: buffer_store_dword [[VVAL]]
 define void @local_size_y (i32 addrspace(1)* %out) {
 entry:
  %0 = call i32 @llvm.r600.read.local.size.y() #0
@ -119,8 +127,9 @@ entry:
 ; EG: MOV [[VAL]], KC0[2].X

 ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
-; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI: buffer_store_dword [[VVAL]]
+; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20
+; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN: buffer_store_dword [[VVAL]]
 define void @local_size_z (i32 addrspace(1)* %out) {
 entry:
  %0 = call i32 @llvm.r600.read.local.size.z() #0
@ -133,8 +142,9 @@ entry:
 ; EG: MOV [[VAL]], KC0[2].Z

 ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb
-; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI: buffer_store_dword [[VVAL]]
+; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2c
+; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN: buffer_store_dword [[VVAL]]
 define void @get_work_dim (i32 addrspace(1)* %out) {
 entry:
  %0 = call i32 @llvm.AMDGPU.read.workdim() #0
@ -147,8 +157,8 @@ entry:
 ; kernel arguments, but this may change in the future.

 ; FUNC-LABEL: {{^}}tgid_x:
-; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], s4
-; SI: buffer_store_dword [[VVAL]]
+; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], s4
+; GCN: buffer_store_dword [[VVAL]]
 define void @tgid_x (i32 addrspace(1)* %out) {
 entry:
  %0 = call i32 @llvm.r600.read.tgid.x() #0
@ -157,8 +167,8 @@ entry:
 }

 ; FUNC-LABEL: {{^}}tgid_y:
-; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], s5
-; SI: buffer_store_dword [[VVAL]]
+; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], s5
+; GCN: buffer_store_dword [[VVAL]]
 define void @tgid_y (i32 addrspace(1)* %out) {
 entry:
  %0 = call i32 @llvm.r600.read.tgid.y() #0
@ -167,8 +177,8 @@ entry:
 }

 ; FUNC-LABEL: {{^}}tgid_z:
-; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], s6
-; SI: buffer_store_dword [[VVAL]]
+; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], s6
+; GCN: buffer_store_dword [[VVAL]]
 define void @tgid_z (i32 addrspace(1)* %out) {
 entry:
  %0 = call i32 @llvm.r600.read.tgid.z() #0
@ -177,7 +187,7 @@ entry:
 }

 ; FUNC-LABEL: {{^}}tidig_x:
-; SI: buffer_store_dword v0
+; GCN: buffer_store_dword v0
 define void @tidig_x (i32 addrspace(1)* %out) {
 entry:
  %0 = call i32 @llvm.r600.read.tidig.x() #0
@ -186,7 +196,7 @@ entry:
 }

 ; FUNC-LABEL: {{^}}tidig_y:
-; SI: buffer_store_dword v1
+; GCN: buffer_store_dword v1
 define void @tidig_y (i32 addrspace(1)* %out) {
 entry:
  %0 = call i32 @llvm.r600.read.tidig.y() #0
@ -195,7 +205,7 @@ entry:
 }

 ; FUNC-LABEL: {{^}}tidig_z:
-; SI: buffer_store_dword v2
+; GCN: buffer_store_dword v2
 define void @tidig_z (i32 addrspace(1)* %out) {
 entry:
  %0 = call i32 @llvm.r600.read.tidig.z() #0