[X86][CET] Shadow stack fix for setjmp/longjmp

This patch adds a shadow stack fix when compiling
setjmp/longjmp with the shadow stack enabled. This
allows setjmp/longjmp to work correctly with CET.

Patch by mike.dvoretsky

Differential Revision: https://reviews.llvm.org/D46181

llvm-svn: 331748
This commit is contained in:
Alexander Ivchenko 2018-05-08 09:04:07 +00:00
parent 4021cee996
commit c47f799289
3 changed files with 384 additions and 5 deletions

View File

@ -27480,6 +27480,59 @@ X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI,
return BB;
}
/// SetJmp implies future control flow change upon calling the corresponding
/// LongJmp.
/// Instead of using the 'return' instruction, the long jump fixes the stack and
/// performs an indirect branch. To do so it uses the registers that were stored
/// in the jump buffer (when calling SetJmp).
/// In case the shadow stack is enabled we need to fix it as well, because some
/// return addresses will be skipped.
/// The function will save the SSP for future fixing in the function
/// emitLongJmpShadowStackFix.
/// \param [in] MI The temporary Machine Instruction for the builtin.
/// \param [in] MBB The Machine Basic Block that will be modified.
void X86TargetLowering::emitSetJmpShadowStackFix(MachineInstr &MI,
MachineBasicBlock *MBB) const {
DebugLoc DL = MI.getDebugLoc();
MachineFunction *MF = MBB->getParent();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
MachineRegisterInfo &MRI = MF->getRegInfo();
MachineInstrBuilder MIB;
// Memory Reference
MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
// Initialize a register with zero.
MVT PVT = getPointerTy(MF->getDataLayout());
const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
unsigned ZReg = MRI.createVirtualRegister(PtrRC);
unsigned XorRROpc = (PVT == MVT::i64) ? X86::XOR64rr : X86::XOR32rr;
BuildMI(*MBB, MI, DL, TII->get(XorRROpc))
.addDef(ZReg)
.addReg(ZReg, RegState::Undef)
.addReg(ZReg, RegState::Undef);
// Read the current SSP Register value to the zeroed register.
unsigned SSPCopyReg = MRI.createVirtualRegister(PtrRC);
unsigned RdsspOpc = (PVT == MVT::i64) ? X86::RDSSPQ : X86::RDSSPD;
BuildMI(*MBB, MI, DL, TII->get(RdsspOpc), SSPCopyReg).addReg(ZReg);
// Write the SSP register value to offset 3 in input memory buffer.
unsigned PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr;
MIB = BuildMI(*MBB, MI, DL, TII->get(PtrStoreOpc));
const int64_t SSPOffset = 3 * PVT.getStoreSize();
const unsigned MemOpndSlot = 1;
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
if (i == X86::AddrDisp)
MIB.addDisp(MI.getOperand(MemOpndSlot + i), SSPOffset);
else
MIB.add(MI.getOperand(MemOpndSlot + i));
}
MIB.addReg(SSPCopyReg);
MIB.setMemRefs(MMOBegin, MMOEnd);
}
MachineBasicBlock *
X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
MachineBasicBlock *MBB) const {
@ -27589,6 +27642,11 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
else
MIB.addMBB(restoreMBB);
MIB.setMemRefs(MMOBegin, MMOEnd);
if (Subtarget.hasSHSTK()) {
emitSetJmpShadowStackFix(MI, thisMBB);
}
// Setup
MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::EH_SjLj_Setup))
.addMBB(restoreMBB);
@ -27630,6 +27688,178 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
return sinkMBB;
}
/// Fix the shadow stack using the previously saved SSP pointer.
/// \sa emitSetJmpShadowStackFix
/// \param [in] MI The temporary Machine Instruction for the builtin.
/// \param [in] MBB The Machine Basic Block that will be modified.
/// \return The sink MBB that will perform the future indirect branch.
MachineBasicBlock *
X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI,
MachineBasicBlock *MBB) const {
DebugLoc DL = MI.getDebugLoc();
MachineFunction *MF = MBB->getParent();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
MachineRegisterInfo &MRI = MF->getRegInfo();
// Memory Reference
MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
MVT PVT = getPointerTy(MF->getDataLayout());
const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
// checkSspMBB:
// xor vreg1, vreg1
// rdssp vreg1
// test vreg1, vreg1
// je sinkMBB # Jump if Shadow Stack is not supported
// fallMBB:
// mov buf+24/12(%rip), vreg2
// sub vreg1, vreg2
// jbe sinkMBB # No need to fix the Shadow Stack
// fixShadowMBB:
// shr 3/2, vreg2
// incssp vreg2 # fix the SSP according to the lower 8 bits
// shr 8, vreg2
// je sinkMBB
// fixShadowLoopPrepareMBB:
// shl vreg2
// mov 255, vreg3
// fixShadowLoopMBB:
// incssp vreg3
// dec vreg2
// jne fixShadowLoopMBB # Iterate until you finish fixing
// # the Shadow Stack
// sinkMBB:
MachineFunction::iterator I = ++MBB->getIterator();
const BasicBlock *BB = MBB->getBasicBlock();
MachineBasicBlock *checkSspMBB = MF->CreateMachineBasicBlock(BB);
MachineBasicBlock *fallMBB = MF->CreateMachineBasicBlock(BB);
MachineBasicBlock *fixShadowMBB = MF->CreateMachineBasicBlock(BB);
MachineBasicBlock *fixShadowLoopPrepareMBB = MF->CreateMachineBasicBlock(BB);
MachineBasicBlock *fixShadowLoopMBB = MF->CreateMachineBasicBlock(BB);
MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
MF->insert(I, checkSspMBB);
MF->insert(I, fallMBB);
MF->insert(I, fixShadowMBB);
MF->insert(I, fixShadowLoopPrepareMBB);
MF->insert(I, fixShadowLoopMBB);
MF->insert(I, sinkMBB);
MBB->addSuccessor(checkSspMBB);
// Initialize a register with zero.
unsigned ZReg = MRI.createVirtualRegister(PtrRC);
unsigned XorRROpc = (PVT == MVT::i64) ? X86::XOR64rr : X86::XOR32rr;
BuildMI(checkSspMBB, DL, TII->get(XorRROpc))
.addDef(ZReg)
.addReg(ZReg, RegState::Undef)
.addReg(ZReg, RegState::Undef);
// Read the current SSP Register value to the zeroed register.
unsigned SSPCopyReg = MRI.createVirtualRegister(PtrRC);
unsigned RdsspOpc = (PVT == MVT::i64) ? X86::RDSSPQ : X86::RDSSPD;
BuildMI(checkSspMBB, DL, TII->get(RdsspOpc), SSPCopyReg).addReg(ZReg);
// Check whether the result of the SSP register is zero and jump directly
// to the sink.
unsigned TestRROpc = (PVT == MVT::i64) ? X86::TEST64rr : X86::TEST32rr;
BuildMI(checkSspMBB, DL, TII->get(TestRROpc))
.addReg(SSPCopyReg)
.addReg(SSPCopyReg);
BuildMI(checkSspMBB, DL, TII->get(X86::JE_1)).addMBB(sinkMBB);
checkSspMBB->addSuccessor(sinkMBB);
checkSspMBB->addSuccessor(fallMBB);
// Reload the previously saved SSP register value.
unsigned PrevSSPReg = MRI.createVirtualRegister(PtrRC);
unsigned PtrLoadOpc = (PVT == MVT::i64) ? X86::MOV64rm : X86::MOV32rm;
const int64_t SPPOffset = 3 * PVT.getStoreSize();
MachineInstrBuilder MIB =
BuildMI(fallMBB, DL, TII->get(PtrLoadOpc), PrevSSPReg);
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
if (i == X86::AddrDisp)
MIB.addDisp(MI.getOperand(i), SPPOffset);
else
MIB.add(MI.getOperand(i));
}
MIB.setMemRefs(MMOBegin, MMOEnd);
// Subtract the current SSP from the previous SSP.
unsigned SspSubReg = MRI.createVirtualRegister(PtrRC);
unsigned SubRROpc = (PVT == MVT::i64) ? X86::SUB64rr : X86::SUB32rr;
BuildMI(fallMBB, DL, TII->get(SubRROpc), SspSubReg)
.addReg(PrevSSPReg)
.addReg(SSPCopyReg);
// Jump to sink in case PrevSSPReg <= SSPCopyReg.
BuildMI(fallMBB, DL, TII->get(X86::JBE_1)).addMBB(sinkMBB);
fallMBB->addSuccessor(sinkMBB);
fallMBB->addSuccessor(fixShadowMBB);
// Shift right by 2/3 for 32/64 because incssp multiplies the argument by 4/8.
unsigned ShrRIOpc = (PVT == MVT::i64) ? X86::SHR64ri : X86::SHR32ri;
unsigned Offset = (PVT == MVT::i64) ? 3 : 2;
unsigned SspFirstShrReg = MRI.createVirtualRegister(PtrRC);
BuildMI(fixShadowMBB, DL, TII->get(ShrRIOpc), SspFirstShrReg)
.addReg(SspSubReg)
.addImm(Offset);
// Increase SSP when looking only on the lower 8 bits of the delta.
unsigned IncsspOpc = (PVT == MVT::i64) ? X86::INCSSPQ : X86::INCSSPD;
BuildMI(fixShadowMBB, DL, TII->get(IncsspOpc)).addReg(SspFirstShrReg);
// Reset the lower 8 bits.
unsigned SspSecondShrReg = MRI.createVirtualRegister(PtrRC);
BuildMI(fixShadowMBB, DL, TII->get(ShrRIOpc), SspSecondShrReg)
.addReg(SspFirstShrReg)
.addImm(8);
// Jump if the result of the shift is zero.
BuildMI(fixShadowMBB, DL, TII->get(X86::JE_1)).addMBB(sinkMBB);
fixShadowMBB->addSuccessor(sinkMBB);
fixShadowMBB->addSuccessor(fixShadowLoopPrepareMBB);
// Do a single shift left.
unsigned ShlR1Opc = (PVT == MVT::i64) ? X86::SHL64r1 : X86::SHL32r1;
unsigned SspAfterShlReg = MRI.createVirtualRegister(PtrRC);
BuildMI(fixShadowLoopPrepareMBB, DL, TII->get(ShlR1Opc), SspAfterShlReg)
.addReg(SspSecondShrReg);
// Save the value 128 to a register (will be used next with incssp).
unsigned Value128InReg = MRI.createVirtualRegister(PtrRC);
unsigned MovRIOpc = (PVT == MVT::i64) ? X86::MOV64ri32 : X86::MOV32ri;
BuildMI(fixShadowLoopPrepareMBB, DL, TII->get(MovRIOpc), Value128InReg)
.addImm(128);
fixShadowLoopPrepareMBB->addSuccessor(fixShadowLoopMBB);
// Since incssp only looks at the lower 8 bits, we might need to do several
// iterations of incssp until we finish fixing the shadow stack.
unsigned DecReg = MRI.createVirtualRegister(PtrRC);
unsigned CounterReg = MRI.createVirtualRegister(PtrRC);
BuildMI(fixShadowLoopMBB, DL, TII->get(X86::PHI), CounterReg)
.addReg(SspAfterShlReg)
.addMBB(fixShadowLoopPrepareMBB)
.addReg(DecReg)
.addMBB(fixShadowLoopMBB);
// Every iteration we increase the SSP by 128.
BuildMI(fixShadowLoopMBB, DL, TII->get(IncsspOpc)).addReg(Value128InReg);
// Every iteration we decrement the counter by 1.
unsigned DecROpc = (PVT == MVT::i64) ? X86::DEC64r : X86::DEC32r;
BuildMI(fixShadowLoopMBB, DL, TII->get(DecROpc), DecReg).addReg(CounterReg);
// Jump if the counter is not zero yet.
BuildMI(fixShadowLoopMBB, DL, TII->get(X86::JNE_1)).addMBB(fixShadowLoopMBB);
fixShadowLoopMBB->addSuccessor(sinkMBB);
fixShadowLoopMBB->addSuccessor(fixShadowLoopMBB);
return sinkMBB;
}
MachineBasicBlock *
X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
MachineBasicBlock *MBB) const {
@ -27662,13 +27892,21 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
unsigned PtrLoadOpc = (PVT == MVT::i64) ? X86::MOV64rm : X86::MOV32rm;
unsigned IJmpOpc = (PVT == MVT::i64) ? X86::JMP64r : X86::JMP32r;
MachineBasicBlock *thisMBB = MBB;
// When CET and shadow stack is enabled, we need to fix the Shadow Stack.
if (Subtarget.hasSHSTK()) {
thisMBB = emitLongJmpShadowStackFix(MI, thisMBB);
}
// Reload FP
MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), FP);
MIB = BuildMI(thisMBB, DL, TII->get(PtrLoadOpc), FP);
for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
MIB.add(MI.getOperand(i));
MIB.setMemRefs(MMOBegin, MMOEnd);
// Reload IP
MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), Tmp);
MIB = BuildMI(thisMBB, DL, TII->get(PtrLoadOpc), Tmp);
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
if (i == X86::AddrDisp)
MIB.addDisp(MI.getOperand(i), LabelOffset);
@ -27676,8 +27914,9 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
MIB.add(MI.getOperand(i));
}
MIB.setMemRefs(MMOBegin, MMOEnd);
// Reload SP
MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), SP);
MIB = BuildMI(thisMBB, DL, TII->get(PtrLoadOpc), SP);
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
if (i == X86::AddrDisp)
MIB.addDisp(MI.getOperand(i), SPOffset);
@ -27685,11 +27924,12 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
MIB.add(MI.getOperand(i));
}
MIB.setMemRefs(MMOBegin, MMOEnd);
// Jump
BuildMI(*MBB, MI, DL, TII->get(IJmpOpc)).addReg(Tmp);
BuildMI(thisMBB, DL, TII->get(IJmpOpc)).addReg(Tmp);
MI.eraseFromParent();
return MBB;
return thisMBB;
}
void X86TargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,

View File

@ -1337,9 +1337,15 @@ namespace llvm {
MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
MachineBasicBlock *MBB) const;
void emitSetJmpShadowStackFix(MachineInstr &MI,
MachineBasicBlock *MBB) const;
MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
MachineBasicBlock *MBB) const;
MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
MachineBasicBlock *MBB) const;
MachineBasicBlock *emitFMA3Instr(MachineInstr &MI,
MachineBasicBlock *MBB) const;

View File

@ -0,0 +1,133 @@
; RUN: llc -mtriple x86_64-unknown-unknown -mattr=+shstk < %s | FileCheck %s --check-prefix=X86_64
; RUN: llc -mtriple i386-unknown-unknown -mattr=+shstk < %s | FileCheck %s --check-prefix=X86
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; The IR was created using the following C code:
;; typedef void *jmp_buf;
;; jmp_buf *buf;
;;
;; __attribute__((noinline)) int bar (int i) {
;; int j = i - 111;
;; __builtin_longjmp (buf, 1);
;; return j;
;; }
;;
;; int foo (int i) {
;; int j = i * 11;
;; if (!__builtin_setjmp (buf)) {
;; j += 33 + bar (j);
;; }
;; return j + i;
;; }
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@buf = common local_unnamed_addr global i8* null, align 8
; Functions that use LongJmp should fix the Shadow Stack using previosuly saved
; ShadowStackPointer in the input buffer.
; The fix requires unwinding the shadow stack to the last SSP.
define i32 @bar(i32 %i) local_unnamed_addr {
; X86_64-LABEL: bar:
; X86_64: movq {{.*}}(%rip), %rax
; X86_64-NEXT: xorq %rdx, %rdx
; X86_64-NEXT: rdsspq %rdx
; X86_64-NEXT: testq %rdx, %rdx
; X86_64-NEXT: je .LBB0_5
; X86_64-NEXT: # %bb.1: # %entry
; X86_64-NEXT: movq 24(%rax), %rcx
; X86_64-NEXT: subq %rdx, %rcx
; X86_64-NEXT: jbe .LBB0_5
; X86_64-NEXT: # %bb.2: # %entry
; X86_64-NEXT: shrq $3, %rcx
; X86_64-NEXT: incsspq %rcx
; X86_64-NEXT: shrq $8, %rcx
; X86_64-NEXT: je .LBB0_5
; X86_64-NEXT: # %bb.3: # %entry
; X86_64-NEXT: shlq %rcx
; X86_64-NEXT: movq $128, %rdx
; X86_64-NEXT: .LBB0_4: # %entry
; X86_64-NEXT: # =>This Inner Loop Header: Depth=1
; X86_64-NEXT: incsspq %rdx
; X86_64-NEXT: decq %rcx
; X86_64-NEXT: jne .LBB0_4
; X86_64-NEXT: .LBB0_5: # %entry
; X86_64-NEXT: movq (%rax), %rbp
; X86_64-NEXT: movq 8(%rax), %rcx
; X86_64-NEXT: movq 16(%rax), %rsp
; X86_64-NEXT: jmpq *%rcx
;
; X86-LABEL: bar:
; X86: movl buf, %eax
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: rdsspd %edx
; X86-NEXT: testl %edx, %edx
; X86-NEXT: je .LBB0_5
; X86-NEXT: # %bb.1: # %entry
; X86-NEXT: movl 12(%eax), %ecx
; X86-NEXT: subl %edx, %ecx
; X86-NEXT: jbe .LBB0_5
; X86-NEXT: # %bb.2: # %entry
; X86-NEXT: shrl $2, %ecx
; X86-NEXT: incsspd %ecx
; X86-NEXT: shrl $8, %ecx
; X86-NEXT: je .LBB0_5
; X86-NEXT: # %bb.3: # %entry
; X86-NEXT: shll %ecx
; X86-NEXT: movl $128, %edx
; X86-NEXT: .LBB0_4: # %entry
; X86-NEXT: # =>This Inner Loop Header: Depth=1
; X86-NEXT: incsspd %edx
; X86-NEXT: decl %ecx
; X86-NEXT: jne .LBB0_4
; X86-NEXT: .LBB0_5: # %entry
; X86-NEXT: movl (%eax), %ebp
; X86-NEXT: movl 4(%eax), %ecx
; X86-NEXT: movl 8(%eax), %esp
; X86-NEXT: jmpl *%ecx
entry:
%0 = load i8*, i8** @buf, align 8
tail call void @llvm.eh.sjlj.longjmp(i8* %0)
unreachable
}
declare void @llvm.eh.sjlj.longjmp(i8*)
; Functions that call SetJmp should save the current ShadowStackPointer for
; future fixing of the Shadow Stack.
define i32 @foo(i32 %i) local_unnamed_addr {
; X86_64-LABEL: foo:
; X86_64: xorq %rcx, %rcx
; X86_64-NEXT: rdsspq %rcx
; X86_64-NEXT: movq %rcx, 24(%rax)
; X86_64: callq bar
;
; X86-LABEL: foo:
; X86: xorl %ecx, %ecx
; X86-NEXT: rdsspd %ecx
; X86-NEXT: movl %ecx, 12(%eax)
; X86: calll bar
entry:
%0 = load i8*, i8** @buf, align 8
%1 = bitcast i8* %0 to i8**
%2 = tail call i8* @llvm.frameaddress(i32 0)
store i8* %2, i8** %1, align 8
%3 = tail call i8* @llvm.stacksave()
%4 = getelementptr inbounds i8, i8* %0, i64 16
%5 = bitcast i8* %4 to i8**
store i8* %3, i8** %5, align 8
%6 = tail call i32 @llvm.eh.sjlj.setjmp(i8* %0)
%tobool = icmp eq i32 %6, 0
br i1 %tobool, label %if.then, label %if.end
if.then: ; preds = %entry
%call = tail call i32 @bar(i32 undef)
unreachable
if.end: ; preds = %entry
%add2 = mul nsw i32 %i, 12
ret i32 %add2
}
declare i8* @llvm.frameaddress(i32)
declare i8* @llvm.stacksave()
declare i32 @llvm.eh.sjlj.setjmp(i8*)