From 7e45562ad038040418774e830deb7210f78a4426 Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Tue, 18 Dec 2012 09:30:21 +0000 Subject: [PATCH] ubsan: Demangle class names, and be more informative when a reinterpret_cast has got us to the wrong offset within an object. llvm-svn: 170423 --- .../lib/ubsan/lit_tests/TypeCheck/vptr.cpp | 8 ++-- compiler-rt/lib/ubsan/ubsan_diag.cc | 25 ++++++++++- compiler-rt/lib/ubsan/ubsan_diag.h | 11 +++++ compiler-rt/lib/ubsan/ubsan_handlers_cxx.cc | 11 +++-- compiler-rt/lib/ubsan/ubsan_type_hash.cc | 43 +++++++++++++++++-- compiler-rt/lib/ubsan/ubsan_type_hash.h | 7 ++- 6 files changed, 91 insertions(+), 14 deletions(-) diff --git a/compiler-rt/lib/ubsan/lit_tests/TypeCheck/vptr.cpp b/compiler-rt/lib/ubsan/lit_tests/TypeCheck/vptr.cpp index a4f97baed95e..574a7bef9622 100644 --- a/compiler-rt/lib/ubsan/lit_tests/TypeCheck/vptr.cpp +++ b/compiler-rt/lib/ubsan/lit_tests/TypeCheck/vptr.cpp @@ -75,7 +75,7 @@ int main(int, char **argv) { case 'm': // CHECK-MEMBER: vptr.cpp:[[@LINE+5]]:15: runtime error: member access within address [[PTR:0x[0-9a-f]*]] which does not point to an object of type 'T' - // CHECK-MEMBER-NEXT: [[PTR]]: note: object is of type [[DYN_TYPE:1S|1U]] + // CHECK-MEMBER-NEXT: [[PTR]]: note: object is of type [[DYN_TYPE:'S'|'U']] // CHECK-MEMBER-NEXT: {{^ .. .. .. .. .. .. .. .. .. .. .. .. }} // CHECK-MEMBER-NEXT: {{^ \^~~~~~~~~~~(~~~~~~~~~~~~)? *$}} // CHECK-MEMBER-NEXT: {{^ vptr for}} [[DYN_TYPE]] @@ -89,7 +89,7 @@ int main(int, char **argv) { case 'f': // CHECK-MEMFUN: vptr.cpp:[[@LINE+5]]:12: runtime error: member call on address [[PTR:0x[0-9a-f]*]] which does not point to an object of type 'T' - // CHECK-MEMFUN-NEXT: [[PTR]]: note: object is of type [[DYN_TYPE:1S|1U]] + // CHECK-MEMFUN-NEXT: [[PTR]]: note: object is of type [[DYN_TYPE:'S'|'U']] // CHECK-MEMFUN-NEXT: {{^ .. .. .. .. .. .. .. .. .. .. .. .. }} // CHECK-MEMFUN-NEXT: {{^ \^~~~~~~~~~~(~~~~~~~~~~~~)? *$}} // CHECK-MEMFUN-NEXT: {{^ vptr for}} [[DYN_TYPE]] @@ -97,10 +97,10 @@ int main(int, char **argv) { case 'o': // CHECK-OFFSET: vptr.cpp:[[@LINE+5]]:12: runtime error: member call on address [[PTR:0x[0-9a-f]*]] which does not point to an object of type 'U' - // CHECK-OFFSET-NEXT: 0x{{[0-9a-f]*}}: note: object is base class subobject at offset {{8|16}} within object of type [[DYN_TYPE:1U]] + // CHECK-OFFSET-NEXT: 0x{{[0-9a-f]*}}: note: object is base class subobject at offset {{8|16}} within object of type [[DYN_TYPE:'U']] // CHECK-OFFSET-NEXT: {{^ .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. }} // CHECK-OFFSET-NEXT: {{^ \^ ( ~~~~~~~~~~~~)~~~~~~~~~~~ *$}} - // CHECK-OFFSET-NEXT: {{^ ( )?vptr for}} [[DYN_TYPE]] + // CHECK-OFFSET-NEXT: {{^ ( )?vptr for}} 'T' base class of [[DYN_TYPE]] return reinterpret_cast(p)->v() - 2; } } diff --git a/compiler-rt/lib/ubsan/ubsan_diag.cc b/compiler-rt/lib/ubsan/ubsan_diag.cc index 38dfc8512582..517ce6b68b38 100644 --- a/compiler-rt/lib/ubsan/ubsan_diag.cc +++ b/compiler-rt/lib/ubsan/ubsan_diag.cc @@ -54,7 +54,7 @@ Diag &Diag::operator<<(const Value &V) { return *this; } -/// Hexadecimal printing for numbers too large for fprintf to handle directly. +/// Hexadecimal printing for numbers too large for Printf to handle directly. static void PrintHex(UIntMax Val) { #if HAVE_INT128_T Printf("0x%08x%08x%08x%08x", @@ -93,6 +93,15 @@ static void renderLocation(Location Loc) { } } +// C++ demangling function, as required by Itanium C++ ABI. This is weak, +// because we do not require a C++ ABI library to be linked to a program +// using UBSan; if it's not present, we'll just print the string mangled. +namespace __cxxabiv1 { + extern "C" char *__cxa_demangle(const char *mangled, char *buffer, + size_t *length, int *status) + __attribute__((weak)); +} + static void renderText(const char *Message, const Diag::Arg *Args) { for (const char *Msg = Message; *Msg; ++Msg) { if (*Msg != '%') { @@ -109,6 +118,20 @@ static void renderText(const char *Message, const Diag::Arg *Args) { case Diag::AK_String: Printf("%s", A.String); break; + case Diag::AK_Mangled: { + const char *String = 0; + // FIXME: __cxa_demangle aggressively insists on allocating memory. + // There's not much we can do about that, short of providing our + // own demangler (libc++abi's implementation could easily be made + // to not allocate). For now, we just call it anyway, and we leak + // the returned value. + if (__cxxabiv1::__cxa_demangle) + String = __cxxabiv1::__cxa_demangle(A.String, 0, 0, 0); + RawWrite("'"); + RawWrite(String ? String : A.String); + RawWrite("'"); + break; + } case Diag::AK_SInt: // 'long long' is guaranteed to be at least 64 bits wide. if (A.SInt >= INT64_MIN && A.SInt <= INT64_MAX) diff --git a/compiler-rt/lib/ubsan/ubsan_diag.h b/compiler-rt/lib/ubsan/ubsan_diag.h index a77c71b66571..16afffdb0a76 100644 --- a/compiler-rt/lib/ubsan/ubsan_diag.h +++ b/compiler-rt/lib/ubsan/ubsan_diag.h @@ -100,6 +100,14 @@ public: const char *getText() const { return Text; } }; +/// \brief A mangled C++ name. Really just a strong typedef for 'const char*'. +class MangledName { + const char *Name; +public: + MangledName(const char *Name) : Name(Name) {} + const char *getName() const { return Name; } +}; + /// \brief Representation of an in-flight diagnostic. /// /// Temporary \c Diag instances are created by the handler routines to @@ -120,6 +128,7 @@ public: /// Kinds of arguments, corresponding to members of \c Arg's union. enum ArgKind { AK_String, ///< A string argument, displayed as-is. + AK_Mangled,///< A C++ mangled name, demangled before display. AK_UInt, ///< An unsigned integer argument. AK_SInt, ///< A signed integer argument. AK_Float, ///< A floating-point argument. @@ -130,6 +139,7 @@ public: struct Arg { Arg() {} Arg(const char *String) : Kind(AK_String), String(String) {} + Arg(MangledName MN) : Kind(AK_Mangled), String(MN.getName()) {} Arg(UIntMax UInt) : Kind(AK_UInt), UInt(UInt) {} Arg(SIntMax SInt) : Kind(AK_SInt), SInt(SInt) {} Arg(FloatMax Float) : Kind(AK_Float), Float(Float) {} @@ -179,6 +189,7 @@ public: ~Diag(); Diag &operator<<(const char *Str) { return AddArg(Str); } + Diag &operator<<(MangledName MN) { return AddArg(MN); } Diag &operator<<(unsigned long long V) { return AddArg(UIntMax(V)); } Diag &operator<<(const void *V) { return AddArg(V); } Diag &operator<<(const TypeDescriptor &V); diff --git a/compiler-rt/lib/ubsan/ubsan_handlers_cxx.cc b/compiler-rt/lib/ubsan/ubsan_handlers_cxx.cc index e3f39cca5e5d..b199dc7d5a78 100644 --- a/compiler-rt/lib/ubsan/ubsan_handlers_cxx.cc +++ b/compiler-rt/lib/ubsan/ubsan_handlers_cxx.cc @@ -42,17 +42,20 @@ static void HandleDynamicTypeCacheMiss( DynamicTypeInfo DTI = getDynamicTypeInfo((void*)Pointer); if (!DTI.isValid()) Diag(Pointer, DL_Note, "object has invalid vptr") - << DTI.getMostDerivedTypeName() + << MangledName(DTI.getMostDerivedTypeName()) << Range(Pointer, Pointer + sizeof(uptr), "invalid vptr"); else if (!DTI.getOffset()) Diag(Pointer, DL_Note, "object is of type %0") - << DTI.getMostDerivedTypeName() + << MangledName(DTI.getMostDerivedTypeName()) << Range(Pointer, Pointer + sizeof(uptr), "vptr for %0"); else + // FIXME: Find the type at the specified offset, and include that + // in the note. Diag(Pointer - DTI.getOffset(), DL_Note, "object is base class subobject at offset %0 within object of type %1") - << DTI.getOffset() << DTI.getMostDerivedTypeName() - << Range(Pointer, Pointer + sizeof(uptr), "vptr for %1"); + << DTI.getOffset() << MangledName(DTI.getMostDerivedTypeName()) + << MangledName(DTI.getSubobjectTypeName()) + << Range(Pointer, Pointer + sizeof(uptr), "vptr for %2 base class of %1"); if (Abort) Die(); diff --git a/compiler-rt/lib/ubsan/ubsan_type_hash.cc b/compiler-rt/lib/ubsan/ubsan_type_hash.cc index ee17af7d807f..7a9cd28f6ec0 100644 --- a/compiler-rt/lib/ubsan/ubsan_type_hash.cc +++ b/compiler-rt/lib/ubsan/ubsan_type_hash.cc @@ -129,7 +129,7 @@ static bool isDerivedFromAtOffset(const abi::__class_type_info *Derived, // No base class subobjects. return false; - // Look for a zero-offset base class which is derived from \p Base. + // Look for a base class which is derived from \p Base at the right offset. for (unsigned int base = 0; base != VTI->base_count; ++base) { // FIXME: Curtail the recursion if this base can't possibly contain the // given offset. @@ -149,6 +149,39 @@ static bool isDerivedFromAtOffset(const abi::__class_type_info *Derived, return false; } +/// \brief Find the derived-most dynamic base class of \p Derived at offset +/// \p Offset. +static const abi::__class_type_info *findBaseAtOffset( + const abi::__class_type_info *Derived, sptr Offset) { + if (!Offset) + return Derived; + + if (const abi::__si_class_type_info *SI = + dynamic_cast(Derived)) + return findBaseAtOffset(SI->__base_type, Offset); + + const abi::__vmi_class_type_info *VTI = + dynamic_cast(Derived); + if (!VTI) + // No base class subobjects. + return 0; + + for (unsigned int base = 0; base != VTI->base_count; ++base) { + sptr OffsetHere = VTI->base_info[base].__offset_flags >> + abi::__base_class_type_info::__offset_shift; + if (VTI->base_info[base].__offset_flags & + abi::__base_class_type_info::__virtual_mask) + // FIXME: Can't handle virtual bases yet. + continue; + if (const abi::__class_type_info *Base = + findBaseAtOffset(VTI->base_info[base].__base_type, + Offset - OffsetHere)) + return Base; + } + + return 0; +} + namespace { struct VtablePrefix { @@ -206,6 +239,10 @@ bool __ubsan::checkDynamicType(void *Object, void *Type, HashValue Hash) { __ubsan::DynamicTypeInfo __ubsan::getDynamicTypeInfo(void *Object) { VtablePrefix *Vtable = getVtablePrefix(Object); if (!Vtable) - return DynamicTypeInfo(0, 0); - return DynamicTypeInfo(Vtable->TypeInfo->__type_name, -Vtable->Offset); + return DynamicTypeInfo(0, 0, 0); + const abi::__class_type_info *ObjectType = findBaseAtOffset( + static_cast(Vtable->TypeInfo), + -Vtable->Offset); + return DynamicTypeInfo(Vtable->TypeInfo->__type_name, -Vtable->Offset, + ObjectType ? ObjectType->__type_name : ""); } diff --git a/compiler-rt/lib/ubsan/ubsan_type_hash.h b/compiler-rt/lib/ubsan/ubsan_type_hash.h index dfaf32752a9a..1c5355701a5d 100644 --- a/compiler-rt/lib/ubsan/ubsan_type_hash.h +++ b/compiler-rt/lib/ubsan/ubsan_type_hash.h @@ -24,10 +24,11 @@ typedef uptr HashValue; class DynamicTypeInfo { const char *MostDerivedTypeName; sptr Offset; + const char *SubobjectTypeName; public: - DynamicTypeInfo(const char *MDTN, sptr Offset) - : MostDerivedTypeName(MDTN), Offset(Offset) {} + DynamicTypeInfo(const char *MDTN, sptr Offset, const char *STN) + : MostDerivedTypeName(MDTN), Offset(Offset), SubobjectTypeName(STN) {} /// Determine whether the object had a valid dynamic type. bool isValid() const { return MostDerivedTypeName; } @@ -35,6 +36,8 @@ public: const char *getMostDerivedTypeName() const { return MostDerivedTypeName; } /// Get the offset from the most-derived type to this base class. sptr getOffset() const { return Offset; } + /// Get the name of the most-derived type at the specified offset. + const char *getSubobjectTypeName() const { return SubobjectTypeName; } }; /// \brief Get information about the dynamic type of an object.