Fix a miscompile in the DAG combiner. Previously, we would incorrectly
try to reduce the width of this load, and would end up transforming: (truncate (lshr (sextload i48 <ptr> as i64), 32) to i32) to (truncate (zextload i32 <ptr+4> as i64) to i32) We lost the sext attached to the load while building the narrower i32 load, and replaced it with a zext because lshr always zext's the results. Instead, bail out of this combine when there is a conflict between a sextload and a zext narrowing. The rest of the DAG combiner still optimize the code down to the proper single instruction: movswl 6(...),%eax Which is exactly what we wanted. Previously we read past the end *and* missed the sign extension: movl 6(...), %eax llvm-svn: 169802
This commit is contained in:
parent
c4550d4967
commit
b27041c50b
|
@ -5068,11 +5068,15 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
|
||||||
// At this point, we must have a load or else we can't do the transform.
|
// At this point, we must have a load or else we can't do the transform.
|
||||||
if (!isa<LoadSDNode>(N0)) return SDValue();
|
if (!isa<LoadSDNode>(N0)) return SDValue();
|
||||||
|
|
||||||
|
// Because a SRL must be assumed to *need* to zero-extend the high bits
|
||||||
|
// (as opposed to anyext the high bits), we can't combine the zextload
|
||||||
|
// lowering of SRL and an sextload.
|
||||||
|
if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD)
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
// If the shift amount is larger than the input type then we're not
|
// If the shift amount is larger than the input type then we're not
|
||||||
// accessing any of the loaded bytes. If the load was a zextload/extload
|
// accessing any of the loaded bytes. If the load was a zextload/extload
|
||||||
// then the result of the shift+trunc is zero/undef (handled elsewhere).
|
// then the result of the shift+trunc is zero/undef (handled elsewhere).
|
||||||
// If the load was a sextload then the result is a splat of the sign bit
|
|
||||||
// of the extended byte. This is not worth optimizing for.
|
|
||||||
if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
|
if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
|
||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,9 +1,30 @@
|
||||||
; RUN: llc < %s -march=x86 | grep movsbl
|
; RUN: llc < %s -march=x86 | FileCheck %s
|
||||||
|
|
||||||
define i32 @foo(i32 %X) nounwind {
|
; When doing sign extension, use the sext-load lowering to take advantage of
|
||||||
|
; x86's sign extension during loads.
|
||||||
|
;
|
||||||
|
; CHECK: test1:
|
||||||
|
; CHECK: movsbl {{.*}}, %eax
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
define i32 @test1(i32 %X) nounwind {
|
||||||
entry:
|
entry:
|
||||||
%tmp12 = trunc i32 %X to i8 ; <i8> [#uses=1]
|
%tmp12 = trunc i32 %X to i8 ; <i8> [#uses=1]
|
||||||
%tmp123 = sext i8 %tmp12 to i32 ; <i32> [#uses=1]
|
%tmp123 = sext i8 %tmp12 to i32 ; <i32> [#uses=1]
|
||||||
ret i32 %tmp123
|
ret i32 %tmp123
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; When using a sextload representation, ensure that the sign extension is
|
||||||
|
; preserved even when removing shifted-out low bits.
|
||||||
|
;
|
||||||
|
; CHECK: test2:
|
||||||
|
; CHECK: movswl {{.*}}, %eax
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
define i32 @test2({i16, [6 x i8]}* %this) {
|
||||||
|
entry:
|
||||||
|
%b48 = getelementptr inbounds { i16, [6 x i8] }* %this, i32 0, i32 1
|
||||||
|
%cast = bitcast [6 x i8]* %b48 to i48*
|
||||||
|
%bf.load = load i48* %cast, align 2
|
||||||
|
%bf.ashr = ashr i48 %bf.load, 32
|
||||||
|
%bf.cast = trunc i48 %bf.ashr to i32
|
||||||
|
ret i32 %bf.cast
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue