| ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN %s | 
 |  | 
 | ; GCN-LABEL: {{^}}float4_extelt: | 
 | ; GCN-NOT: buffer_ | 
 | ; GCN-DAG: v_cmp_eq_u32_e64 [[C1:[^,]+]], [[IDX:s[0-9]+]], 1 | 
 | ; GCN-DAG: v_cmp_ne_u32_e64 [[C2:[^,]+]], [[IDX]], 2 | 
 | ; GCN-DAG: v_cmp_ne_u32_e64 [[C3:[^,]+]], [[IDX]], 3 | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], 0, 1.0, [[C1]] | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], 2.0, [[V1]], [[C2]] | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], 4.0, [[V2]], [[C3]] | 
 | ; GCN: store_dword v[{{[0-9:]+}}], [[V3]] | 
 | define amdgpu_kernel void @float4_extelt(float addrspace(1)* %out, i32 %sel) { | 
 | entry: | 
 |   %ext = extractelement <4 x float> <float 0.0, float 1.0, float 2.0, float 4.0>, i32 %sel | 
 |   store float %ext, float addrspace(1)* %out | 
 |   ret void | 
 | } | 
 |  | 
 | ; GCN-LABEL: {{^}}int4_extelt: | 
 | ; GCN-NOT: buffer_ | 
 | ; GCN-DAG: v_cmp_eq_u32_e64 [[C1:[^,]+]], [[IDX:s[0-9]+]], 1 | 
 | ; GCN-DAG: v_cmp_ne_u32_e64 [[C2:[^,]+]], [[IDX]], 2 | 
 | ; GCN-DAG: v_cmp_ne_u32_e64 [[C3:[^,]+]], [[IDX]], 3 | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], 0, 1, [[C1]] | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], 2, [[V1]], [[C2]] | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], 4, [[V2]], [[C3]] | 
 | ; GCN: store_dword v[{{[0-9:]+}}], [[V3]] | 
 | define amdgpu_kernel void @int4_extelt(i32 addrspace(1)* %out, i32 %sel) { | 
 | entry: | 
 |   %ext = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 4>, i32 %sel | 
 |   store i32 %ext, i32 addrspace(1)* %out | 
 |   ret void | 
 | } | 
 |  | 
 | ; GCN-LABEL: {{^}}double4_extelt: | 
 | ; GCN-NOT: buffer_ | 
 | ; GCN-DAG: v_cmp_eq_u32_e64 [[C1:[^,]+]], [[IDX:s[0-9]+]], 1 | 
 | ; GCN-DAG: v_cmp_eq_u32_e64 [[C2:[^,]+]], [[IDX]], 2 | 
 | ; GCN-DAG: v_cmp_eq_u32_e64 [[C3:[^,]+]], [[IDX]], 3 | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C1]] | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C2]] | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C3]] | 
 | ; GCN: store_dwordx2 v[{{[0-9:]+}}] | 
 | define amdgpu_kernel void @double4_extelt(double addrspace(1)* %out, i32 %sel) { | 
 | entry: | 
 |   %ext = extractelement <4 x double> <double 0.01, double 1.01, double 2.01, double 4.01>, i32 %sel | 
 |   store double %ext, double addrspace(1)* %out | 
 |   ret void | 
 | } | 
 |  | 
 | ; GCN-LABEL: {{^}}half4_extelt: | 
 | ; GCN-NOT: buffer_ | 
 | ; GCN-DAG: s_mov_b32 s[[SL:[0-9]+]], 0x40003c00 | 
 | ; GCN-DAG: s_mov_b32 s[[SH:[0-9]+]], 0x44004200 | 
 | ; GCN-DAG: s_lshl_b32 [[SEL:s[0-p]+]], s{{[0-9]+}}, 4 | 
 | ; GCN:     s_lshr_b64 s{{\[}}[[RL:[0-9]+]]:{{[0-9]+}}], s{{\[}}[[SL]]:[[SH]]], [[SEL]] | 
 | ; GCN-DAG: v_mov_b32_e32 v[[VRL:[0-9]+]], s[[RL]] | 
 | ; GCN:     store_short v[{{[0-9:]+}}], v[[VRL]] | 
 | define amdgpu_kernel void @half4_extelt(half addrspace(1)* %out, i32 %sel) { | 
 | entry: | 
 |   %ext = extractelement <4 x half> <half 1.0, half 2.0, half 3.0, half 4.0>, i32 %sel | 
 |   store half %ext, half addrspace(1)* %out | 
 |   ret void | 
 | } | 
 |  | 
 | ; GCN-LABEL: {{^}}float2_extelt: | 
 | ; GCN-NOT: buffer_ | 
 | ; GCN-DAG: v_cmp_eq_u32_e64 [[C1:[^,]+]], [[IDX:s[0-9]+]], 1 | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], 0, 1.0, [[C1]] | 
 | ; GCN: store_dword v[{{[0-9:]+}}], [[V1]] | 
 | define amdgpu_kernel void @float2_extelt(float addrspace(1)* %out, i32 %sel) { | 
 | entry: | 
 |   %ext = extractelement <2 x float> <float 0.0, float 1.0>, i32 %sel | 
 |   store float %ext, float addrspace(1)* %out | 
 |   ret void | 
 | } | 
 |  | 
 | ; GCN-LABEL: {{^}}double2_extelt: | 
 | ; GCN-NOT: buffer_ | 
 | ; GCN-DAG: v_cmp_eq_u32_e64 [[C1:[^,]+]], [[IDX:s[0-9]+]], 1 | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C1]] | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C1]] | 
 | ; GCN: store_dwordx2 v[{{[0-9:]+}}] | 
 | define amdgpu_kernel void @double2_extelt(double addrspace(1)* %out, i32 %sel) { | 
 | entry: | 
 |   %ext = extractelement <2 x double> <double 0.01, double 1.01>, i32 %sel | 
 |   store double %ext, double addrspace(1)* %out | 
 |   ret void | 
 | } | 
 |  | 
 | ; GCN-LABEL: {{^}}half8_extelt: | 
 | ; GCN-NOT: buffer_ | 
 | ; GCN-DAG: v_cmp_eq_u32_e64 [[C1:[^,]+]], [[IDX:s[0-9]+]], 1 | 
 | ; GCN-DAG: v_cmp_ne_u32_e64 [[C2:[^,]+]], [[IDX]], 2 | 
 | ; GCN-DAG: v_cmp_ne_u32_e64 [[C3:[^,]+]], [[IDX]], 3 | 
 | ; GCN-DAG: v_cmp_ne_u32_e64 [[C4:[^,]+]], [[IDX]], 4 | 
 | ; GCN-DAG: v_cmp_ne_u32_e64 [[C5:[^,]+]], [[IDX]], 5 | 
 | ; GCN-DAG: v_cmp_ne_u32_e64 [[C6:[^,]+]], [[IDX]], 6 | 
 | ; GCN-DAG: v_cmp_ne_u32_e64 [[C7:[^,]+]], [[IDX]], 7 | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], {{[^,]+}}, {{[^,]+}}, [[C1]] | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], {{[^,]+}}, [[V1]], [[C2]] | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], {{[^,]+}}, [[V2]], [[C3]] | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V4:v[0-9]+]], {{[^,]+}}, [[V3]], [[C4]] | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V5:v[0-9]+]], {{[^,]+}}, [[V4]], [[C5]] | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V6:v[0-9]+]], {{[^,]+}}, [[V5]], [[C6]] | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V7:v[0-9]+]], {{[^,]+}}, [[V6]], [[C7]] | 
 | ; GCN:     store_short v[{{[0-9:]+}}], [[V7]] | 
 | define amdgpu_kernel void @half8_extelt(half addrspace(1)* %out, i32 %sel) { | 
 | entry: | 
 |   %ext = extractelement <8 x half> <half 1.0, half 2.0, half 3.0, half 4.0, half 5.0, half 6.0, half 7.0, half 8.0>, i32 %sel | 
 |   store half %ext, half addrspace(1)* %out | 
 |   ret void | 
 | } | 
 |  | 
 | ; GCN-LABEL: {{^}}short8_extelt: | 
 | ; GCN-NOT: buffer_ | 
 | ; GCN-DAG: v_cmp_eq_u32_e64 [[C1:[^,]+]], [[IDX:s[0-9]+]], 1 | 
 | ; GCN-DAG: v_cmp_ne_u32_e64 [[C2:[^,]+]], [[IDX]], 2 | 
 | ; GCN-DAG: v_cmp_ne_u32_e64 [[C3:[^,]+]], [[IDX]], 3 | 
 | ; GCN-DAG: v_cmp_ne_u32_e64 [[C4:[^,]+]], [[IDX]], 4 | 
 | ; GCN-DAG: v_cmp_ne_u32_e64 [[C5:[^,]+]], [[IDX]], 5 | 
 | ; GCN-DAG: v_cmp_ne_u32_e64 [[C6:[^,]+]], [[IDX]], 6 | 
 | ; GCN-DAG: v_cmp_ne_u32_e64 [[C7:[^,]+]], [[IDX]], 7 | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], {{[^,]+}}, {{[^,]+}}, [[C1]] | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], {{[^,]+}}, [[V1]], [[C2]] | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], {{[^,]+}}, [[V2]], [[C3]] | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V4:v[0-9]+]], {{[^,]+}}, [[V3]], [[C4]] | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V5:v[0-9]+]], {{[^,]+}}, [[V4]], [[C5]] | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V6:v[0-9]+]], {{[^,]+}}, [[V5]], [[C6]] | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V7:v[0-9]+]], {{[^,]+}}, [[V6]], [[C7]] | 
 | ; GCN:     store_short v[{{[0-9:]+}}], [[V7]] | 
 | define amdgpu_kernel void @short8_extelt(i16 addrspace(1)* %out, i32 %sel) { | 
 | entry: | 
 |   %ext = extractelement <8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i32 %sel | 
 |   store i16 %ext, i16 addrspace(1)* %out | 
 |   ret void | 
 | } | 
 |  | 
 | ; GCN-LABEL: {{^}}float8_extelt: | 
 | ; GCN-NOT: buffer_ | 
 | ; GCN-DAG: v_cmp_eq_u32_e64 [[C1:[^,]+]], [[IDX:s[0-9]+]], 1 | 
 | ; GCN-DAG: v_cmp_ne_u32_e64 [[C2:[^,]+]], [[IDX]], 2 | 
 | ; GCN-DAG: v_cmp_ne_u32_e64 [[C3:[^,]+]], [[IDX]], 3 | 
 | ; GCN-DAG: v_cmp_ne_u32_e64 [[C4:[^,]+]], [[IDX]], 4 | 
 | ; GCN-DAG: v_cmp_ne_u32_e64 [[C5:[^,]+]], [[IDX]], 5 | 
 | ; GCN-DAG: v_cmp_ne_u32_e64 [[C6:[^,]+]], [[IDX]], 6 | 
 | ; GCN-DAG: v_cmp_ne_u32_e64 [[C7:[^,]+]], [[IDX]], 7 | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], {{[^,]+}}, {{[^,]+}}, [[C1]] | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], {{[^,]+}}, [[V1]], [[C2]] | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], {{[^,]+}}, [[V2]], [[C3]] | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V4:v[0-9]+]], {{[^,]+}}, [[V3]], [[C4]] | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V5:v[0-9]+]], {{[^,]+}}, [[V4]], [[C5]] | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V6:v[0-9]+]], {{[^,]+}}, [[V5]], [[C6]] | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V7:v[0-9]+]], {{[^,]+}}, [[V6]], [[C7]] | 
 | ; GCN:     store_dword v[{{[0-9:]+}}], [[V7]] | 
 | define amdgpu_kernel void @float8_extelt(float addrspace(1)* %out, i32 %sel) { | 
 | entry: | 
 |   %ext = extractelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, i32 %sel | 
 |   store float %ext, float addrspace(1)* %out | 
 |   ret void | 
 | } | 
 |  | 
 | ; GCN-LABEL: {{^}}float16_extelt: | 
 | ; GCN-NOT: buffer_ | 
 | ; GCN-DAG: s_mov_b32 m0, | 
 | ; GCN-DAG: v_mov_b32_e32 [[VLO:v[0-9]+]], 1.0 | 
 | ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 2.0 | 
 | ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40400000 | 
 | ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 4.0 | 
 | ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40a00000 | 
 | ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40c00000 | 
 | ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40e00000 | 
 | ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41000000 | 
 | ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41100000 | 
 | ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41200000 | 
 | ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41300000 | 
 | ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41400000 | 
 | ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41500000 | 
 | ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41600000 | 
 | ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41700000 | 
 | ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41800000 | 
 | ; GCN-DAG: v_movrels_b32_e32 [[RES:v[0-9]+]], [[VLO]] | 
 | ; GCN:     store_dword v[{{[0-9:]+}}], [[RES]] | 
 | define amdgpu_kernel void @float16_extelt(float addrspace(1)* %out, i32 %sel) { | 
 | entry: | 
 |   %ext = extractelement <16 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>, i32 %sel | 
 |   store float %ext, float addrspace(1)* %out | 
 |   ret void | 
 | } | 
 |  | 
 | ; GCN-LABEL: {{^}}double16_extelt: | 
 | ; GCN: buffer_store_dword | 
 | ; GCN: buffer_store_dword | 
 | ; GCN: buffer_store_dword | 
 | ; GCN: buffer_store_dword | 
 | ; GCN: buffer_store_dword | 
 | ; GCN: buffer_store_dword | 
 | ; GCN: buffer_store_dword | 
 | ; GCN: buffer_store_dword | 
 | ; GCN: buffer_store_dword | 
 | ; GCN: buffer_store_dword | 
 | ; GCN: buffer_store_dword | 
 | ; GCN: buffer_store_dword | 
 | ; GCN: buffer_store_dword | 
 | ; GCN: buffer_store_dword | 
 | ; GCN: buffer_store_dword | 
 | ; GCN: buffer_store_dword | 
 | ; GCN: buffer_store_dword | 
 | ; GCN: buffer_store_dword | 
 | ; GCN: buffer_store_dword | 
 | ; GCN: buffer_store_dword | 
 | ; GCN: buffer_store_dword | 
 | ; GCN: buffer_store_dword | 
 | ; GCN: buffer_store_dword | 
 | ; GCN: buffer_store_dword | 
 | ; GCN: buffer_store_dword | 
 | ; GCN: buffer_store_dword | 
 | ; GCN: buffer_store_dword | 
 | ; GCN: buffer_store_dword | 
 | ; GCN: buffer_store_dword | 
 | ; GCN: buffer_store_dword | 
 | ; GCN: buffer_store_dword | 
 | ; GCN: buffer_store_dword | 
 | ; GCN: buffer_load_dword | 
 | ; GCN: buffer_load_dword | 
 | ; GCN: store_dword | 
 | define amdgpu_kernel void @double16_extelt(double addrspace(1)* %out, i32 %sel) { | 
 | entry: | 
 |   %ext = extractelement <16 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0, double 9.0, double 10.0, double 11.0, double 12.0, double 13.0, double 14.0, double 15.0, double 16.0>, i32 %sel | 
 |   store double %ext, double addrspace(1)* %out | 
 |   ret void | 
 | } | 
 |  | 
 | ; GCN-LABEL: {{^}}byte8_extelt: | 
 | ; GCN-NOT: buffer_ | 
 | ; GCN-DAG: s_mov_b32 s[[SL:[0-9]+]], 0x4030201 | 
 | ; GCN-DAG: s_mov_b32 s[[SH:[0-9]+]], 0x8070605 | 
 | ; GCN-DAG: s_lshl_b32 [[SEL:s[0-p]+]], s{{[0-9]+}}, 3 | 
 | ; GCN:     s_lshr_b64 s{{\[}}[[RL:[0-9]+]]:{{[0-9]+}}], s{{\[}}[[SL]]:[[SH]]], [[SEL]] | 
 | ; GCN-DAG: v_mov_b32_e32 v[[VRL:[0-9]+]], s[[RL]] | 
 | ; GCN:     store_byte v[{{[0-9:]+}}], v[[VRL]] | 
 | define amdgpu_kernel void @byte8_extelt(i8 addrspace(1)* %out, i32 %sel) { | 
 | entry: | 
 |   %ext = extractelement <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i32 %sel | 
 |   store i8 %ext, i8 addrspace(1)* %out | 
 |   ret void | 
 | } | 
 |  | 
 | ; GCN-LABEL: {{^}}byte16_extelt: | 
 | ; GCN-NOT: buffer_ | 
 | ; GCN-DAG: v_cmp_eq_u32_e64 [[C1:[^,]+]], [[IDX:s[0-9]+]], 1 | 
 | ; GCN-DAG: v_cmp_ne_u32_e64 [[C2:[^,]+]], [[IDX]], 2 | 
 | ; GCN-DAG: v_cmp_ne_u32_e64 [[C3:[^,]+]], [[IDX]], 3 | 
 | ; GCN-DAG: v_cmp_ne_u32_e64 [[C4:[^,]+]], [[IDX]], 4 | 
 | ; GCN-DAG: v_cmp_ne_u32_e64 [[C5:[^,]+]], [[IDX]], 5 | 
 | ; GCN-DAG: v_cmp_ne_u32_e64 [[C6:[^,]+]], [[IDX]], 6 | 
 | ; GCN-DAG: v_cmp_ne_u32_e64 [[C7:[^,]+]], [[IDX]], 7 | 
 | ; GCN-DAG: v_cmp_ne_u32_e64 [[C8:[^,]+]], [[IDX]], 8 | 
 | ; GCN-DAG: v_cmp_ne_u32_e64 [[C9:[^,]+]], [[IDX]], 9 | 
 | ; GCN-DAG: v_cmp_ne_u32_e64 [[C10:[^,]+]], [[IDX]], 10 | 
 | ; GCN-DAG: v_cmp_ne_u32_e64 [[C11:[^,]+]], [[IDX]], 11 | 
 | ; GCN-DAG: v_cmp_ne_u32_e64 [[C12:[^,]+]], [[IDX]], 12 | 
 | ; GCN-DAG: v_cmp_ne_u32_e64 [[C13:[^,]+]], [[IDX]], 13 | 
 | ; GCN-DAG: v_cmp_ne_u32_e64 [[C14:[^,]+]], [[IDX]], 14 | 
 | ; GCN-DAG: v_cmp_ne_u32_e64 [[C15:[^,]+]], [[IDX]], 15 | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], {{[^,]+}}, {{[^,]+}}, [[C1]] | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], {{[^,]+}}, [[V1]], [[C2]] | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], {{[^,]+}}, [[V2]], [[C3]] | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V4:v[0-9]+]], {{[^,]+}}, [[V3]], [[C4]] | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V5:v[0-9]+]], {{[^,]+}}, [[V4]], [[C5]] | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V6:v[0-9]+]], {{[^,]+}}, [[V5]], [[C6]] | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V7:v[0-9]+]], {{[^,]+}}, [[V6]], [[C7]] | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V8:v[0-9]+]], {{[^,]+}}, [[V7]], [[C8]] | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V9:v[0-9]+]], {{[^,]+}}, [[V8]], [[C8]] | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V10:v[0-9]+]], {{[^,]+}}, [[V9]], [[C10]] | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V11:v[0-9]+]], {{[^,]+}}, [[V10]], [[C11]] | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V12:v[0-9]+]], {{[^,]+}}, [[V11]], [[C12]] | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V13:v[0-9]+]], {{[^,]+}}, [[V12]], [[C13]] | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V14:v[0-9]+]], {{[^,]+}}, [[V13]], [[C14]] | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V15:v[0-9]+]], {{[^,]+}}, [[V14]], [[C15]] | 
 | ; GCN:     store_byte v[{{[0-9:]+}}], [[V15]] | 
 | define amdgpu_kernel void @byte16_extelt(i8 addrspace(1)* %out, i32 %sel) { | 
 | entry: | 
 |   %ext = extractelement <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, i32 %sel | 
 |   store i8 %ext, i8 addrspace(1)* %out | 
 |   ret void | 
 | } | 
 |  | 
 | ; GCN-LABEL: {{^}}bit4_extelt: | 
 | ; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 | 
 | ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 | 
 | ; GCN-DAG: buffer_store_byte [[ZERO]], | 
 | ; GCN-DAG: buffer_store_byte [[ONE]], | 
 | ; GCN-DAG: buffer_store_byte [[ZERO]], | 
 | ; GCN-DAG: buffer_store_byte [[ONE]], | 
 | ; GCN:     buffer_load_ubyte [[LOAD:v[0-9]+]], | 
 | ; GCN:     v_and_b32_e32 [[RES:v[0-9]+]], 1, [[LOAD]] | 
 | ; GCN:     flat_store_dword v[{{[0-9:]+}}], [[RES]] | 
 | define amdgpu_kernel void @bit4_extelt(i32 addrspace(1)* %out, i32 %sel) { | 
 | entry: | 
 |   %ext = extractelement <4 x i1> <i1 0, i1 1, i1 0, i1 1>, i32 %sel | 
 |   %zext = zext i1 %ext to i32 | 
 |   store i32 %zext, i32 addrspace(1)* %out | 
 |   ret void | 
 | } | 
 |  | 
 | ; GCN-LABEL: {{^}}bit128_extelt: | 
 | ; GCN-NOT: buffer_ | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], 1, 0, | 
 | ; GCN-DAG: v_mov_b32_e32 [[LASTIDX:v[0-9]+]], 0x7f | 
 | ; GCN-DAG: v_cmp_ne_u32_e32 [[CL:[^,]+]], s{{[0-9]+}}, [[LASTIDX]] | 
 | ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[VL:v[0-9]+]], 0, v{{[0-9]+}}, [[CL]] | 
 | ; GCN:     v_and_b32_e32 [[RES:v[0-9]+]], 1, [[VL]] | 
 | ; GCN:     store_dword v[{{[0-9:]+}}], [[RES]] | 
 | define amdgpu_kernel void @bit128_extelt(i32 addrspace(1)* %out, i32 %sel) { | 
 | entry: | 
 |   %ext = extractelement <128 x i1> <i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0>, i32 %sel | 
 |   %zext = zext i1 %ext to i32 | 
 |   store i32 %zext, i32 addrspace(1)* %out | 
 |   ret void | 
 | } |