diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 3b15752e2e44..552f92bbd46d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -4301,8 +4301,15 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = Node->getOperand(1); Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2); + + // fcopysign doesn't change anything but the sign bit, so + // (fp_round (fcopysign (fpext a), b)) + // is as precise as + // (fp_round (fpext a)) + // which is a no-op. Mark it as a TRUNCating FP_ROUND. + const bool isTrunc = (Node->getOpcode() == ISD::FCOPYSIGN); Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, - Tmp3, DAG.getIntPtrConstant(0, dl))); + Tmp3, DAG.getIntPtrConstant(isTrunc, dl))); break; } case ISD::FFLOOR: diff --git a/llvm/test/CodeGen/AArch64/f16-instructions.ll b/llvm/test/CodeGen/AArch64/f16-instructions.ll index 99dc8e330148..3dd54e646238 100644 --- a/llvm/test/CodeGen/AArch64/f16-instructions.ll +++ b/llvm/test/CodeGen/AArch64/f16-instructions.ll @@ -702,6 +702,21 @@ define half @test_copysign_f64(half %a, double %b) #0 { ret half %r } +; Check that the FP promotion will use a truncating FP_ROUND, so we can fold +; away the (fpext (fp_round )) here. + +; CHECK-LABEL: test_copysign_extended: +; CHECK-NEXT: fcvt s1, h1 +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: movi.4s v2, #0x80, lsl #24 +; CHECK-NEXT: bit.16b v0, v1, v2 +; CHECK-NEXT: ret +define float @test_copysign_extended(half %a, half %b) #0 { + %r = call half @llvm.copysign.f16(half %a, half %b) + %xr = fpext half %r to float + ret float %xr +} + ; CHECK-LABEL: test_floor: ; CHECK-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0 ; CHECK-NEXT: frintm [[INT32:s[0-9]+]], [[FLOAT32]]