NVPTX: support direct f16 <-> f64 conversions via intrinsics.

Clang may well start emitting these soon, and while it may not be
directly relevant for OpenCL or GLSL, the instructions were just
sitting there waiting to be used.

llvm-svn: 213356
This commit is contained in:
Tim Northover 2014-07-18 08:30:10 +00:00
parent 68446b7253
commit 5e54fe14a4
2 changed files with 50 additions and 0 deletions

View File

@ -799,6 +799,11 @@ def : Pat<(i16 (fp_to_f16 Float32Regs:$a)),
def : Pat<(i16 (fp_to_f16 Float32Regs:$a)),
(CVT_f16_f32 Float32Regs:$a, CvtRN)>;
def : Pat<(f64 (f16_to_fp Int16Regs:$a)),
(CVT_f64_f16 Int16Regs:$a, CvtNONE)>;
def : Pat<(i16 (fp_to_f16 Float64Regs:$a)),
(CVT_f16_f64 Float64Regs:$a, CvtRN)>;
//
// Bitcast
//

View File

@ -0,0 +1,45 @@
; RUN: llc -march=nvptx -verify-machineinstrs < %s | FileCheck %s
declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone
declare double @llvm.convert.from.fp16.f64(i16) nounwind readnone
declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone
declare i16 @llvm.convert.to.fp16.f64(double) nounwind readnone
; CHECK-LABEL: @test_convert_fp16_to_fp32
; CHECK: cvt.f32.f16
define void @test_convert_fp16_to_fp32(float addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
%val = load i16 addrspace(1)* %in, align 2
%cvt = call float @llvm.convert.from.fp16.f32(i16 %val) nounwind readnone
store float %cvt, float addrspace(1)* %out, align 4
ret void
}
; CHECK-LABEL: @test_convert_fp16_to_fp64
; CHECK: cvt.f64.f16
define void @test_convert_fp16_to_fp64(double addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
%val = load i16 addrspace(1)* %in, align 2
%cvt = call double @llvm.convert.from.fp16.f64(i16 %val) nounwind readnone
store double %cvt, double addrspace(1)* %out, align 4
ret void
}
; CHECK-LABEL: @test_convert_fp32_to_fp16
; CHECK: cvt.rn.f16.f32
define void @test_convert_fp32_to_fp16(i16 addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
%val = load float addrspace(1)* %in, align 2
%cvt = call i16 @llvm.convert.to.fp16.f32(float %val) nounwind readnone
store i16 %cvt, i16 addrspace(1)* %out, align 4
ret void
}
; CHECK-LABEL: @test_convert_fp64_to_fp16
; CHECK: cvt.rn.f16.f64
define void @test_convert_fp64_to_fp16(i16 addrspace(1)* noalias %out, double addrspace(1)* noalias %in) nounwind {
%val = load double addrspace(1)* %in, align 2
%cvt = call i16 @llvm.convert.to.fp16.f64(double %val) nounwind readnone
store i16 %cvt, i16 addrspace(1)* %out, align 4
ret void
}