Skip to content

Commit a5f637b

Browse files
committed
[X86] Fold AND(SRL(X,Y),1) -> SETCC(BT(X,Y))
As noticed on PR39174, if we're extracting a single non-constant bit index, then try to use BT+SETCC instead to avoid messing around moving the shift amount to the ECX register, using slow x86 shift ops etc. Differential Revision: https://reviews.llvm.org/D122891
1 parent 36d4e84 commit a5f637b

File tree

2 files changed

+26
-19
lines changed

2 files changed

+26
-19
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47324,6 +47324,19 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
4732447324
if (SDValue R = combineAndLoadToBZHI(N, DAG, Subtarget))
4732547325
return R;
4732647326

47327+
// Fold AND(SRL(X,Y),1) -> SETCC(BT(X,Y), COND_B) iff Y is not a constant
47328+
// avoids slow variable shift (moving shift amount to ECX etc.)
47329+
if (isOneConstant(N1) && N0->hasOneUse()) {
47330+
SDValue Src = N0;
47331+
while ((Src.getOpcode() == ISD::ZERO_EXTEND ||
47332+
Src.getOpcode() == ISD::TRUNCATE) &&
47333+
Src.getOperand(0)->hasOneUse())
47334+
Src = Src.getOperand(0);
47335+
if (Src.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(Src.getOperand(1)))
47336+
if (SDValue BT = getBT(Src.getOperand(0), Src.getOperand(1), dl, DAG))
47337+
return getSETCC(X86::COND_B, BT, dl, DAG);
47338+
}
47339+
4732747340
if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) {
4732847341
// Attempt to recursively combine a bitmask AND with shuffles.
4732947342
SDValue Op(N, 0);

llvm/test/CodeGen/X86/setcc.ll

Lines changed: 13 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -139,19 +139,17 @@ define zeroext i1 @t6(i32 %a) #0 {
139139
define zeroext i1 @t7(i32 %0) {
140140
; X86-LABEL: t7:
141141
; X86: ## %bb.0:
142-
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
143-
; X86-NEXT: movb $19, %al
144-
; X86-NEXT: shrb %cl, %al
145-
; X86-NEXT: andb $1, %al
142+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
143+
; X86-NEXT: movl $19, %ecx
144+
; X86-NEXT: btl %eax, %ecx
145+
; X86-NEXT: setb %al
146146
; X86-NEXT: retl
147147
;
148148
; X64-LABEL: t7:
149149
; X64: ## %bb.0:
150-
; X64-NEXT: movl %edi, %ecx
151-
; X64-NEXT: movb $19, %al
152-
; X64-NEXT: ## kill: def $cl killed $cl killed $ecx
153-
; X64-NEXT: shrb %cl, %al
154-
; X64-NEXT: andb $1, %al
150+
; X64-NEXT: movl $19, %eax
151+
; X64-NEXT: btl %edi, %eax
152+
; X64-NEXT: setb %al
155153
; X64-NEXT: retq
156154
%2 = trunc i32 %0 to i5
157155
%3 = lshr i5 -13, %2
@@ -163,20 +161,16 @@ define zeroext i1 @t7(i32 %0) {
163161
define zeroext i1 @t8(i8 %0, i8 %1) {
164162
; X86-LABEL: t8:
165163
; X86: ## %bb.0:
166-
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
167-
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
168-
; X86-NEXT: shrb %cl, %al
169-
; X86-NEXT: andb $1, %al
164+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
165+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
166+
; X86-NEXT: btl %eax, %ecx
167+
; X86-NEXT: setb %al
170168
; X86-NEXT: retl
171169
;
172170
; X64-LABEL: t8:
173171
; X64: ## %bb.0:
174-
; X64-NEXT: movl %esi, %ecx
175-
; X64-NEXT: movl %edi, %eax
176-
; X64-NEXT: ## kill: def $cl killed $cl killed $ecx
177-
; X64-NEXT: shrb %cl, %al
178-
; X64-NEXT: andb $1, %al
179-
; X64-NEXT: ## kill: def $al killed $al killed $eax
172+
; X64-NEXT: btl %esi, %edi
173+
; X64-NEXT: setb %al
180174
; X64-NEXT: retq
181175
%3 = lshr i8 %0, %1
182176
%4 = and i8 %3, 1

0 commit comments

Comments
 (0)