The UTF16 string referenced by a CFString should go into the __TEXT,__ustring

section. A 'normal' string will go into the __TEXT,__const section, but this
isn't good for UTF16 strings. The __ustring section allows for coalescing, among
other niceties (such as allowing the linker to easily split up strings).

Instead of outputting the UTF16 string as a series of bytes, output it as a
series of shorts. The back-end will then nicely place the UTF16 string into the
correct section, because it's a mensch.
<rdar://problem/10655949>

llvm-svn: 153710
This commit is contained in:
Bill Wendling 2012-03-30 00:26:17 +00:00
parent ab468b0381
commit 82b87f19e2
4 changed files with 40 additions and 29 deletions

View File

@ -1903,8 +1903,10 @@ GetConstantCFStringEntry(llvm::StringMap<llvm::Constant*> &Map,
return Map.GetOrCreateValue(String);
}
// Otherwise, convert the UTF8 literals into a byte string.
SmallVector<UTF16, 128> ToBuf(NumBytes);
// Otherwise, convert the UTF8 literals into a string of shorts.
IsUTF16 = true;
SmallVector<UTF16, 128> ToBuf(NumBytes + 1); // +1 for ending nulls.
const UTF8 *FromPtr = (UTF8 *)String.data();
UTF16 *ToPtr = &ToBuf[0];
@ -1915,28 +1917,11 @@ GetConstantCFStringEntry(llvm::StringMap<llvm::Constant*> &Map,
// ConvertUTF8toUTF16 returns the length in ToPtr.
StringLength = ToPtr - &ToBuf[0];
// Render the UTF-16 string into a byte array and convert to the target byte
// order.
//
// FIXME: This isn't something we should need to do here.
SmallString<128> AsBytes;
AsBytes.reserve(StringLength * 2);
for (unsigned i = 0; i != StringLength; ++i) {
unsigned short Val = ToBuf[i];
if (TargetIsLSB) {
AsBytes.push_back(Val & 0xFF);
AsBytes.push_back(Val >> 8);
} else {
AsBytes.push_back(Val >> 8);
AsBytes.push_back(Val & 0xFF);
}
}
// Append one extra null character, the second is automatically added by our
// caller.
AsBytes.push_back(0);
IsUTF16 = true;
return Map.GetOrCreateValue(StringRef(AsBytes.data(), AsBytes.size()));
// Add an explicit null.
*ToPtr = 0;
return Map.
GetOrCreateValue(StringRef(reinterpret_cast<const char *>(ToBuf.data()),
(StringLength + 1) * 2));
}
static llvm::StringMapEntry<llvm::Constant*> &
@ -1990,8 +1975,15 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) {
llvm::ConstantInt::get(Ty, 0x07C8);
// String pointer.
llvm::Constant *C = llvm::ConstantDataArray::getString(VMContext,
Entry.getKey());
llvm::Constant *C = 0;
if (isUTF16) {
ArrayRef<uint16_t> Arr =
llvm::makeArrayRef<uint16_t>((uint16_t*)Entry.getKey().data(),
Entry.getKey().size() / 2);
C = llvm::ConstantDataArray::get(VMContext, Arr);
} else {
C = llvm::ConstantDataArray::getString(VMContext, Entry.getKey());
}
llvm::GlobalValue::LinkageTypes Linkage;
if (isUTF16)
@ -2016,8 +2008,14 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) {
CharUnits Align = getContext().getTypeAlignInChars(getContext().CharTy);
GV->setAlignment(Align.getQuantity());
}
// String.
Fields[2] = llvm::ConstantExpr::getGetElementPtr(GV, Zeros);
if (isUTF16)
// Cast the UTF16 string to the correct type.
Fields[2] = llvm::ConstantExpr::getBitCast(Fields[2], Int8PtrTy);
// String length.
Ty = getTypes().ConvertType(getContext().LongTy);
Fields[3] = llvm::ConstantInt::get(Ty, StringLength);

View File

@ -2,13 +2,16 @@
// CHECK-LSB: @.str = private unnamed_addr constant [8 x i8] c"string0\00"
// CHECK-LSB: @.str1 = linker_private unnamed_addr constant [8 x i8] c"string1\00"
// CHECK-LSB: @.str2 = internal unnamed_addr constant [36 x i8] c"h\00e\00l\00l\00o\00 \00\92! \00\03& \00\90! \00w\00o\00r\00l\00d\00\00\00", align 2
// CHECK-LSB: @.str2 = internal unnamed_addr constant [18 x i16] [i16 104, i16 101, i16 108, i16 108, i16 111, i16 32, i16 8594, i16 32, i16 9731, i16 32, i16 8592, i16 32, i16 119, i16 111, i16 114, i16 108, i16 100, i16 0], align 2
// CHECK-LSB: @.str4 = internal unnamed_addr constant [6 x i16] [i16 116, i16 101, i16 115, i16 116, i16 8482, i16 0], align 2
// RUN: %clang_cc1 -triple powerpc-apple-darwin9 -emit-llvm %s -o - | FileCheck -check-prefix MSB %s
// CHECK-MSB: @.str = private unnamed_addr constant [8 x i8] c"string0\00"
// CHECK-MSB: @.str1 = linker_private unnamed_addr constant [8 x i8] c"string1\00"
// CHECK-MSB: @.str2 = internal unnamed_addr constant [36 x i8] c"\00h\00e\00l\00l\00o\00 !\92\00 &\03\00 !\90\00 \00w\00o\00r\00l\00d\00\00", align 2
// CHECK-MSB: @.str2 = internal unnamed_addr constant [18 x i16] [i16 104, i16 101, i16 108, i16 108, i16 111, i16 32, i16 8594, i16 32, i16 9731, i16 32, i16 8592, i16 32, i16 119, i16 111, i16 114, i16 108, i16 100, i16 0], align 2
// CHECK-MSB: @.str4 = internal unnamed_addr constant [6 x i16] [i16 116, i16 101, i16 115, i16 116, i16 8482, i16 0], align 2
const char *g0 = "string0";
const void *g1 = __builtin___CFStringMakeConstantString("string1");

View File

@ -0,0 +1,10 @@
// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm %s -o - | FileCheck %s
// <rdar://problem/10655949>
// CHECK: @.str = internal unnamed_addr constant [9 x i16] [i16 252, i16 98, i16 101, i16 114, i16 104, i16 117, i16 110, i16 100, i16 0], align 2
#define CFSTR __builtin___CFStringMakeConstantString
void foo() {
CFSTR("überhund");
}

View File

@ -1,5 +1,5 @@
// RUN: %clang_cc1 -emit-llvm -w -x objective-c %s -o - | FileCheck %s
// rdar://7095855 rdar://7115749
// CHECK: internal unnamed_addr constant [12 x i8]
// CHECK: internal unnamed_addr constant [6 x i16] [i16 105, i16 80, i16 111, i16 100, i16 8482, i16 0], align 2
void *P = @"iPod™";