When save/restoring CR at prolog/epilog, in a large

stack frame, the prolog/epilog code was using the same
register for the copy of CR and the address of the save slot.  Oops.
This is fixed here for Darwin, sort of, by reserving R2 for this case.
A better way would be to do the store before the decrement of SP,
which is safe on Darwin due to the red zone.

SVR4 probably has the same problem, but I don't know how to fix it;
there is no red zone and R2 is already used for something else.
I'm going to leave it to someone interested in that target.

Better still would be to rewrite the CR-saving code completely;
spilling each CR subregister individually is horrible code.

llvm-svn: 96015
This commit is contained in:
Dale Johannesen 2010-02-12 21:35:34 +00:00
parent 8f8dcbaadd
commit 26062150fa
2 changed files with 60 additions and 14 deletions

View File

@ -421,22 +421,30 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
FrameIdx));
return true;
} else {
// FIXME: We use R0 here, because it isn't available for RA. We need to
// store the CR in the low 4-bits of the saved value. First, issue a MFCR
// to save all of the CRBits.
NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFCR), PPC::R0));
// FIXME: We need a scatch reg here. The trouble with using R0 is that
// it's possible for the stack frame to be so big the save location is
// out of range of immediate offsets, necessitating another register.
// We hack this on Darwin by reserving R2. It's probably broken on Linux
// at the moment.
// We need to store the CR in the low 4-bits of the saved value. First,
// issue a MFCR to save all of the CRBits.
unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
PPC::R2 : PPC::R0;
NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFCR), ScratchReg));
// If the saved register wasn't CR0, shift the bits left so that they are
// in CR0's slot.
if (SrcReg != PPC::CR0) {
unsigned ShiftBits = PPCRegisterInfo::getRegisterNumbering(SrcReg)*4;
// rlwinm r0, r0, ShiftBits, 0, 31.
NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), PPC::R0)
.addReg(PPC::R0).addImm(ShiftBits).addImm(0).addImm(31));
// rlwinm scratch, scratch, ShiftBits, 0, 31.
NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg)
.addReg(ScratchReg).addImm(ShiftBits)
.addImm(0).addImm(31));
}
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
.addReg(PPC::R0,
.addReg(ScratchReg,
getKillRegState(isKill)),
FrameIdx));
}
@ -540,20 +548,28 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg),
FrameIdx));
} else if (RC == PPC::CRRCRegisterClass) {
// FIXME: We use R0 here, because it isn't available for RA.
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), PPC::R0),
FrameIdx));
// FIXME: We need a scatch reg here. The trouble with using R0 is that
// it's possible for the stack frame to be so big the save location is
// out of range of immediate offsets, necessitating another register.
// We hack this on Darwin by reserving R2. It's probably broken on Linux
// at the moment.
unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
PPC::R2 : PPC::R0;
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
ScratchReg), FrameIdx));
// If the reloaded register isn't CR0, shift the bits right so that they are
// in the right CR's slot.
if (DestReg != PPC::CR0) {
unsigned ShiftBits = PPCRegisterInfo::getRegisterNumbering(DestReg)*4;
// rlwinm r11, r11, 32-ShiftBits, 0, 31.
NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), PPC::R0)
.addReg(PPC::R0).addImm(32-ShiftBits).addImm(0).addImm(31));
NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg)
.addReg(ScratchReg).addImm(32-ShiftBits).addImm(0)
.addImm(31));
}
NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTCRF), DestReg).addReg(PPC::R0));
NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTCRF), DestReg)
.addReg(ScratchReg));
} else if (RC == PPC::CRBITRCRegisterClass) {
unsigned Reg = 0;

View File

@ -0,0 +1,30 @@
; RUN: llc < %s -mtriple=powerpc-apple-darwin | FileCheck %s
; ModuleID = 'hh.c'
target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
target triple = "powerpc-apple-darwin9.6"
; This formerly used R0 for both the stack address and CR.
define void @foo() nounwind {
entry:
;CHECK: mfcr r2
;CHECK: rlwinm r2, r2, 8, 0, 31
;CHECK: lis r0, 1
;CHECK: ori r0, r0, 34540
;CHECK: stwx r2, r1, r0
%x = alloca [100000 x i8] ; <[100000 x i8]*> [#uses=1]
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
%x1 = bitcast [100000 x i8]* %x to i8* ; <i8*> [#uses=1]
call void @bar(i8* %x1) nounwind
call void asm sideeffect "", "~{cr2}"() nounwind
br label %return
return: ; preds = %entry
;CHECK: lis r0, 1
;CHECK: ori r0, r0, 34540
;CHECK: lwzx r2, r1, r0
;CHECK: rlwinm r2, r2, 24, 0, 31
;CHECK: mtcrf 32, r2
ret void
}
declare void @bar(i8*)