Skip to content

Commit 91647ae

Browse files
thrimbortstellar
authored andcommitted
[X86][SSE] Don't emit SSE2 load instructions in SSE1-only mode (#134547)
This fixes a regression I traced back to 8b43c1b / #79000 The regression caused an SSE2 instruction, `movsd`, to be emitted as a replacement for an SSE instruction, `movaps` despite the target potentially not supporting this instruction, such as when building with clang using `-march=pentium3`. Fixes #134607 (cherry picked from commit 08e080e)
1 parent d05543e commit 91647ae

File tree

2 files changed

+27
-4
lines changed

2 files changed

+27
-4
lines changed

llvm/lib/Target/X86/X86FixupVectorConstants.cpp

+7-4
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,7 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
333333
MachineInstr &MI) {
334334
unsigned Opc = MI.getOpcode();
335335
MachineConstantPool *CP = MI.getParent()->getParent()->getConstantPool();
336+
bool HasSSE2 = ST->hasSSE2();
336337
bool HasSSE41 = ST->hasSSE41();
337338
bool HasAVX2 = ST->hasAVX2();
338339
bool HasDQI = ST->hasDQI();
@@ -394,11 +395,13 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
394395
case X86::MOVAPDrm:
395396
case X86::MOVAPSrm:
396397
case X86::MOVUPDrm:
397-
case X86::MOVUPSrm:
398+
case X86::MOVUPSrm: {
398399
// TODO: SSE3 MOVDDUP Handling
399-
return FixupConstant({{X86::MOVSSrm, 1, 32, rebuildZeroUpperCst},
400-
{X86::MOVSDrm, 1, 64, rebuildZeroUpperCst}},
401-
128, 1);
400+
FixupEntry Fixups[] = {
401+
{X86::MOVSSrm, 1, 32, rebuildZeroUpperCst},
402+
{HasSSE2 ? X86::MOVSDrm : 0, 1, 64, rebuildZeroUpperCst}};
403+
return FixupConstant(Fixups, 128, 1);
404+
}
402405
case X86::VMOVAPDrm:
403406
case X86::VMOVAPSrm:
404407
case X86::VMOVUPDrm:

llvm/test/CodeGen/X86/pr134607.ll

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+sse -O3 | FileCheck %s --check-prefixes=X86
2+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-sse2,+sse -O3 | FileCheck %s --check-prefixes=X64-SSE1
3+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2,+sse -O3 | FileCheck %s --check-prefixes=X64-SSE2
4+
5+
define void @store_v2f32_constant(ptr %v) {
6+
; X86-LABEL: store_v2f32_constant:
7+
; X86: # %bb.0:
8+
; X86-NEXT: movl 4(%esp), %eax
9+
; X86-NEXT: movaps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
10+
11+
; X64-SSE1-LABEL: store_v2f32_constant:
12+
; X64-SSE1: # %bb.0:
13+
; X64-SSE1-NEXT: movaps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
14+
15+
; X64-SSE2-LABEL: store_v2f32_constant:
16+
; X64-SSE2: # %bb.0:
17+
; X64-SSE2-NEXT: movsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
18+
store <2 x float> <float 2.560000e+02, float 5.120000e+02>, ptr %v, align 4
19+
ret void
20+
}

0 commit comments

Comments
 (0)