[InstCombine] Dropping redundant masking before left-shift [3/5] (PR42563)

Summary:
If we have some pattern that leaves only some low bits set, and then performs
left-shift of those bits, if none of the bits that are left after the final
shift are modified by the mask, we can omit the mask.

There are many variants to this pattern:
d. `(x & ((-1 << MaskShAmt) >> MaskShAmt)) << ShiftShAmt`
All these patterns can be simplified to just:
`x << ShiftShAmt`
iff:
d. `(ShiftShAmt-MaskShAmt) s>= 0` (i.e. `ShiftShAmt u>= MaskShAmt`)

alive proofs:
d: https://rise4fun.com/Alive/I5Y

For now let's start with patterns where both shift amounts are variable,
with trivial constant "offset" between them, since i believe this is
both simplest to handle and i think this is most common.
But again, there are likely other variants where we could use
ValueTracking/ConstantRange to handle more cases.

https://bugs.llvm.org/show_bug.cgi?id=42563

Differential Revision: https://reviews.llvm.org/D64519

llvm-svn: 366538
This commit is contained in:
Roman Lebedev 2019-07-19 08:26:37 +00:00
parent 2ebe57386d
commit 3c212ce305
2 changed files with 16 additions and 12 deletions

View File

@ -73,11 +73,12 @@ reassociateShiftAmtsOfTwoSameDirectionShifts(BinaryOperator *Sh0,
// a) (x & ((1 << MaskShAmt) - 1)) << ShiftShAmt
// b) (x & (~(-1 << MaskShAmt))) << ShiftShAmt
// c) (x & (-1 >> MaskShAmt)) << ShiftShAmt
// d) (x & ((-1 << MaskShAmt) >> MaskShAmt)) << ShiftShAmt
// All these patterns can be simplified to just:
// x << ShiftShAmt
// iff:
// a,b) (MaskShAmt+ShiftShAmt) u>= bitwidth(x)
// c) (ShiftShAmt-MaskShAmt) s>= 0 (i.e. ShiftShAmt u>= MaskShAmt)
// c,d) (ShiftShAmt-MaskShAmt) s>= 0 (i.e. ShiftShAmt u>= MaskShAmt)
static Instruction *
dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift,
const SimplifyQuery &SQ) {
@ -95,6 +96,9 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift,
auto MaskB = m_Xor(m_Shl(m_AllOnes(), m_Value(MaskShAmt)), m_AllOnes());
// (-1 >> MaskShAmt)
auto MaskC = m_Shr(m_AllOnes(), m_Value(MaskShAmt));
// ((-1 << MaskShAmt) >> MaskShAmt)
auto MaskD =
m_Shr(m_Shl(m_AllOnes(), m_Value(MaskShAmt)), m_Deferred(MaskShAmt));
Value *X;
if (match(Masked, m_c_And(m_CombineOr(MaskA, MaskB), m_Value(X)))) {
@ -111,7 +115,7 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift,
APInt(BitWidth, BitWidth))))
return nullptr;
// All good, we can do this fold.
} else if (match(Masked, m_c_And(MaskC, m_Value(X)))) {
} else if (match(Masked, m_c_And(m_CombineOr(MaskC, MaskD), m_Value(X)))) {
// Can we simplify (ShiftShAmt-MaskShAmt) ?
Value *ShAmtsDiff =
SimplifySubInst(ShiftShAmt, MaskShAmt, /*IsNSW=*/false, /*IsNUW=*/false,

View File

@ -23,7 +23,7 @@ define i32 @t0_basic(i32 %x, i32 %nbits) {
; CHECK-NEXT: call void @use32(i32 [[T0]])
; CHECK-NEXT: call void @use32(i32 [[T1]])
; CHECK-NEXT: call void @use32(i32 [[T2]])
; CHECK-NEXT: [[T4:%.*]] = shl i32 [[T2]], [[NBITS]]
; CHECK-NEXT: [[T4:%.*]] = shl i32 [[X]], [[NBITS]]
; CHECK-NEXT: ret i32 [[T4]]
;
%t0 = shl i32 -1, %nbits
@ -46,7 +46,7 @@ define i32 @t1_bigger_shift(i32 %x, i32 %nbits) {
; CHECK-NEXT: call void @use32(i32 [[T1]])
; CHECK-NEXT: call void @use32(i32 [[T2]])
; CHECK-NEXT: call void @use32(i32 [[T3]])
; CHECK-NEXT: [[T4:%.*]] = shl i32 [[T2]], [[T3]]
; CHECK-NEXT: [[T4:%.*]] = shl i32 [[X]], [[T3]]
; CHECK-NEXT: ret i32 [[T4]]
;
%t0 = shl i32 -1, %nbits
@ -75,7 +75,7 @@ define <3 x i32> @t2_vec_splat(<3 x i32> %x, <3 x i32> %nbits) {
; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T1]])
; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T2]])
; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T3]])
; CHECK-NEXT: [[T4:%.*]] = shl <3 x i32> [[T2]], [[T3]]
; CHECK-NEXT: [[T4:%.*]] = shl <3 x i32> [[X]], [[T3]]
; CHECK-NEXT: ret <3 x i32> [[T4]]
;
%t0 = shl <3 x i32> <i32 -1, i32 -1, i32 -1>, %nbits
@ -100,7 +100,7 @@ define <3 x i32> @t3_vec_nonsplat(<3 x i32> %x, <3 x i32> %nbits) {
; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T1]])
; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T2]])
; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T3]])
; CHECK-NEXT: [[T4:%.*]] = shl <3 x i32> [[T2]], [[T3]]
; CHECK-NEXT: [[T4:%.*]] = shl <3 x i32> [[X]], [[T3]]
; CHECK-NEXT: ret <3 x i32> [[T4]]
;
%t0 = shl <3 x i32> <i32 -1, i32 -1, i32 -1>, %nbits
@ -124,7 +124,7 @@ define <3 x i32> @t4_vec_undef(<3 x i32> %x, <3 x i32> %nbits) {
; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T1]])
; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T2]])
; CHECK-NEXT: call void @use3xi32(<3 x i32> [[NBITS]])
; CHECK-NEXT: [[T4:%.*]] = shl <3 x i32> [[T2]], [[NBITS]]
; CHECK-NEXT: [[T4:%.*]] = shl <3 x i32> [[X]], [[NBITS]]
; CHECK-NEXT: ret <3 x i32> [[T4]]
;
%t0 = shl <3 x i32> <i32 -1, i32 undef, i32 -1>, %nbits
@ -152,7 +152,7 @@ define i32 @t5_commutativity0(i32 %nbits) {
; CHECK-NEXT: call void @use32(i32 [[T0]])
; CHECK-NEXT: call void @use32(i32 [[T1]])
; CHECK-NEXT: call void @use32(i32 [[T2]])
; CHECK-NEXT: [[T3:%.*]] = shl i32 [[T2]], [[NBITS]]
; CHECK-NEXT: [[T3:%.*]] = shl i32 [[X]], [[NBITS]]
; CHECK-NEXT: ret i32 [[T3]]
;
%x = call i32 @gen32()
@ -178,7 +178,7 @@ define i32 @t6_commutativity1(i32 %nbits0, i32 %nbits1) {
; CHECK-NEXT: call void @use32(i32 [[T2]])
; CHECK-NEXT: call void @use32(i32 [[T3]])
; CHECK-NEXT: call void @use32(i32 [[T4]])
; CHECK-NEXT: [[T5:%.*]] = shl i32 [[T4]], [[NBITS0]]
; CHECK-NEXT: [[T5:%.*]] = shl i32 [[T3]], [[NBITS0]]
; CHECK-NEXT: ret i32 [[T5]]
;
%t0 = shl i32 -1, %nbits0
@ -233,7 +233,7 @@ define i32 @t8_nuw(i32 %x, i32 %nbits) {
; CHECK-NEXT: call void @use32(i32 [[T0]])
; CHECK-NEXT: call void @use32(i32 [[T1]])
; CHECK-NEXT: call void @use32(i32 [[T2]])
; CHECK-NEXT: [[T3:%.*]] = shl nuw i32 [[T2]], [[NBITS]]
; CHECK-NEXT: [[T3:%.*]] = shl i32 [[X]], [[NBITS]]
; CHECK-NEXT: ret i32 [[T3]]
;
%t0 = shl i32 -1, %nbits
@ -254,7 +254,7 @@ define i32 @t9_nsw(i32 %x, i32 %nbits) {
; CHECK-NEXT: call void @use32(i32 [[T0]])
; CHECK-NEXT: call void @use32(i32 [[T1]])
; CHECK-NEXT: call void @use32(i32 [[T2]])
; CHECK-NEXT: [[T3:%.*]] = shl nsw i32 [[T2]], [[NBITS]]
; CHECK-NEXT: [[T3:%.*]] = shl i32 [[X]], [[NBITS]]
; CHECK-NEXT: ret i32 [[T3]]
;
%t0 = shl i32 -1, %nbits
@ -275,7 +275,7 @@ define i32 @t10_nuw_nsw(i32 %x, i32 %nbits) {
; CHECK-NEXT: call void @use32(i32 [[T0]])
; CHECK-NEXT: call void @use32(i32 [[T1]])
; CHECK-NEXT: call void @use32(i32 [[T2]])
; CHECK-NEXT: [[T3:%.*]] = shl nuw nsw i32 [[T2]], [[NBITS]]
; CHECK-NEXT: [[T3:%.*]] = shl i32 [[X]], [[NBITS]]
; CHECK-NEXT: ret i32 [[T3]]
;
%t0 = shl i32 -1, %nbits