From fb9ce100d19be130d004d03088ccd4af295f3435 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Mon, 17 Jun 2019 23:39:41 +0000 Subject: [PATCH] hwasan: Add a tag_offset DWARF attribute to instrumented stack variables. The goal is to improve hwasan's error reporting for stack use-after-return by recording enough information to allow the specific variable that was accessed to be identified based on the pointer's tag. Currently we record the PC and lower bits of SP for each stack frame we create (which will eventually be enough to derive the base tag used by the stack frame) but that's not enough to determine the specific tag for each variable, which is the stack frame's base tag XOR a value (the "tag offset") that is unique for each variable in a function. In IR, the tag offset is most naturally represented as part of a location expression on the llvm.dbg.declare instruction. However, the presence of the tag offset in the variable's actual location expression is likely to confuse debuggers which won't know about tag offsets, and moreover the tag offset is not required for a debugger to determine the location of the variable on the stack, so at the DWARF level it is represented as an attribute so that it will be ignored by debuggers that don't know about it. Differential Revision: https://reviews.llvm.org/D63119 llvm-svn: 363635 --- llvm/docs/LangRef.rst | 3 ++ llvm/include/llvm/BinaryFormat/Dwarf.def | 1 + llvm/include/llvm/BinaryFormat/Dwarf.h | 5 +- llvm/lib/BinaryFormat/Dwarf.cpp | 3 ++ .../CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 3 ++ .../CodeGen/AsmPrinter/DwarfExpression.cpp | 3 ++ llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h | 2 + llvm/lib/IR/DebugInfoMetadata.cpp | 6 ++- .../Instrumentation/HWAddressSanitizer.cpp | 23 +++++++-- llvm/test/Assembler/diexpression.ll | 6 ++- .../CodeGen/AArch64/dbg-declare-tag-offset.ll | 47 +++++++++++++++++ .../dbg-declare-tag-offset.ll | 50 +++++++++++++++++++ 12 files changed, 144 insertions(+), 8 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/dbg-declare-tag-offset.ll create mode 100644 llvm/test/Instrumentation/HWAddressSanitizer/dbg-declare-tag-offset.ll diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 4cbc960f0c68..6520821de2b7 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -4704,6 +4704,9 @@ The current supported opcode vocabulary is limited: (``16`` and ``DW_ATE_signed`` here, respectively) to which the top of the expression stack is to be converted. Maps into a ``DW_OP_convert`` operation that references a base type constructed from the supplied values. +- ``DW_OP_LLVM_tag_offset, tag_offset`` specifies that a memory tag should be + optionally applied to the pointer. The memory tag is derived from the + given tag offset in an implementation-defined manner. - ``DW_OP_swap`` swaps top two stack entries. - ``DW_OP_xderef`` provides extended dereference mechanism. The entry at the top of the stack is treated as an address. The second stack entry is treated as an diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.def b/llvm/include/llvm/BinaryFormat/Dwarf.def index 90ae02319454..76d13c116096 100644 --- a/llvm/include/llvm/BinaryFormat/Dwarf.def +++ b/llvm/include/llvm/BinaryFormat/Dwarf.def @@ -386,6 +386,7 @@ HANDLE_DW_AT(0x3b31, BORLAND_closure, 0, BORLAND) HANDLE_DW_AT(0x3e00, LLVM_include_path, 0, LLVM) HANDLE_DW_AT(0x3e01, LLVM_config_macros, 0, LLVM) HANDLE_DW_AT(0x3e02, LLVM_isysroot, 0, LLVM) +HANDLE_DW_AT(0x3e03, LLVM_tag_offset, 0, LLVM) // Apple extensions. HANDLE_DW_AT(0x3fe1, APPLE_optimized, 0, APPLE) HANDLE_DW_AT(0x3fe2, APPLE_flags, 0, APPLE) diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.h b/llvm/include/llvm/BinaryFormat/Dwarf.h index 9455cffab279..76d9c365c0a8 100644 --- a/llvm/include/llvm/BinaryFormat/Dwarf.h +++ b/llvm/include/llvm/BinaryFormat/Dwarf.h @@ -129,8 +129,9 @@ enum LocationAtom { #include "llvm/BinaryFormat/Dwarf.def" DW_OP_lo_user = 0xe0, DW_OP_hi_user = 0xff, - DW_OP_LLVM_fragment = 0x1000, ///< Only used in LLVM metadata. - DW_OP_LLVM_convert = 0x1001 ///< Only used in LLVM metadata. + DW_OP_LLVM_fragment = 0x1000, ///< Only used in LLVM metadata. + DW_OP_LLVM_convert = 0x1001, ///< Only used in LLVM metadata. + DW_OP_LLVM_tag_offset = 0x1002, ///< Only used in LLVM metadata. }; enum TypeKind : uint8_t { diff --git a/llvm/lib/BinaryFormat/Dwarf.cpp b/llvm/lib/BinaryFormat/Dwarf.cpp index b6d4d5b07303..eb6bd33ce583 100644 --- a/llvm/lib/BinaryFormat/Dwarf.cpp +++ b/llvm/lib/BinaryFormat/Dwarf.cpp @@ -147,6 +147,8 @@ StringRef llvm::dwarf::OperationEncodingString(unsigned Encoding) { return "DW_OP_LLVM_convert"; case DW_OP_LLVM_fragment: return "DW_OP_LLVM_fragment"; + case DW_OP_LLVM_tag_offset: + return "DW_OP_LLVM_tag_offset"; } } @@ -157,6 +159,7 @@ unsigned llvm::dwarf::getOperationEncoding(StringRef OperationEncodingString) { #include "llvm/BinaryFormat/Dwarf.def" .Case("DW_OP_LLVM_convert", DW_OP_LLVM_convert) .Case("DW_OP_LLVM_fragment", DW_OP_LLVM_fragment) + .Case("DW_OP_LLVM_tag_offset", DW_OP_LLVM_tag_offset) .Default(0); } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 37e963ed63b1..f0ceba50f144 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -683,6 +683,9 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, NVPTXAddressSpace ? *NVPTXAddressSpace : NVPTX_ADDR_local_space); } addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize()); + if (DwarfExpr.TagOffset) + addUInt(*VariableDie, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1, + *DwarfExpr.TagOffset); return VariableDie; } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp index c7c283202022..d483a30e4999 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -438,6 +438,9 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor, emitOp(dwarf::DW_OP_deref_size); emitData1(Op->getArg(0)); break; + case dwarf::DW_OP_LLVM_tag_offset: + TagOffset = Op->getArg(0); + break; default: llvm_unreachable("unhandled opcode found in expression"); } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h index 6985debe6138..3a9347a3e0ab 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h @@ -140,6 +140,8 @@ public: return LocationKind == Implicit; } + Optional TagOffset; + protected: /// Push a DW_OP_piece / DW_OP_bit_piece for emitting later, if one is needed /// to represent a subregister. diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp index fa8438ea9006..76d9ce3332cf 100644 --- a/llvm/lib/IR/DebugInfoMetadata.cpp +++ b/llvm/lib/IR/DebugInfoMetadata.cpp @@ -835,6 +835,7 @@ unsigned DIExpression::ExprOperand::getSize() const { case dwarf::DW_OP_constu: case dwarf::DW_OP_deref_size: case dwarf::DW_OP_plus_uconst: + case dwarf::DW_OP_LLVM_tag_offset: return 2; default: return 1; @@ -876,6 +877,7 @@ bool DIExpression::isValid() const { break; } case dwarf::DW_OP_LLVM_convert: + case dwarf::DW_OP_LLVM_tag_offset: case dwarf::DW_OP_constu: case dwarf::DW_OP_plus_uconst: case dwarf::DW_OP_plus: @@ -905,7 +907,9 @@ bool DIExpression::isImplicit() const { unsigned N = getNumElements(); if (isValid() && N > 0) { switch (getElement(N-1)) { - case dwarf::DW_OP_stack_value: return true; + case dwarf::DW_OP_stack_value: + case dwarf::DW_OP_LLVM_tag_offset: + return true; case dwarf::DW_OP_LLVM_fragment: return N > 1 && getElement(N-2) == dwarf::DW_OP_stack_value; default: break; diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp index abbe5801de1a..6d70e3bcbf0c 100644 --- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -21,6 +21,7 @@ #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" @@ -205,8 +206,10 @@ public: bool tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag); Value *tagPointer(IRBuilder<> &IRB, Type *Ty, Value *PtrLong, Value *Tag); Value *untagPointer(IRBuilder<> &IRB, Value *PtrLong); - bool instrumentStack(SmallVectorImpl &Allocas, - SmallVectorImpl &RetVec, Value *StackTag); + bool instrumentStack( + SmallVectorImpl &Allocas, + DenseMap> &AllocaDeclareMap, + SmallVectorImpl &RetVec, Value *StackTag); bool instrumentLandingPads(SmallVectorImpl &RetVec); Value *getNextTagWithCall(IRBuilder<> &IRB); Value *getStackBaseTag(IRBuilder<> &IRB); @@ -984,6 +987,7 @@ bool HWAddressSanitizer::instrumentLandingPads( bool HWAddressSanitizer::instrumentStack( SmallVectorImpl &Allocas, + DenseMap> &AllocaDeclareMap, SmallVectorImpl &RetVec, Value *StackTag) { // Ideally, we want to calculate tagged stack base pointer, and rewrite all // alloca addresses using that. Unfortunately, offsets are not known yet @@ -1008,6 +1012,13 @@ bool HWAddressSanitizer::instrumentStack( U.set(Replacement); } + for (auto *DDI : AllocaDeclareMap.lookup(AI)) { + DIExpression *OldExpr = DDI->getExpression(); + DIExpression *NewExpr = DIExpression::append( + OldExpr, {dwarf::DW_OP_LLVM_tag_offset, RetagMask(N)}); + DDI->setArgOperand(2, MetadataAsValue::get(*C, NewExpr)); + } + tagAlloca(IRB, AI, Tag); for (auto RI : RetVec) { @@ -1051,6 +1062,7 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) { SmallVector AllocasToInstrument; SmallVector RetVec; SmallVector LandingPadVec; + DenseMap> AllocaDeclareMap; for (auto &BB : F) { for (auto &Inst : BB) { if (ClInstrumentStack) @@ -1069,6 +1081,10 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) { isa(Inst)) RetVec.push_back(&Inst); + if (auto *DDI = dyn_cast(&Inst)) + if (auto *Alloca = dyn_cast_or_null(DDI->getAddress())) + AllocaDeclareMap[Alloca].push_back(DDI); + if (ClInstrumentLandingPads && isa(Inst)) LandingPadVec.push_back(&Inst); @@ -1107,7 +1123,8 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) { if (!AllocasToInstrument.empty()) { Value *StackTag = ClGenerateTagsWithCalls ? nullptr : getStackBaseTag(EntryIRB); - Changed |= instrumentStack(AllocasToInstrument, RetVec, StackTag); + Changed |= instrumentStack(AllocasToInstrument, AllocaDeclareMap, RetVec, + StackTag); } // If we split the entry block, move any allocas that were originally in the diff --git a/llvm/test/Assembler/diexpression.ll b/llvm/test/Assembler/diexpression.ll index b633ba6a4351..93674acd3e71 100644 --- a/llvm/test/Assembler/diexpression.ll +++ b/llvm/test/Assembler/diexpression.ll @@ -9,9 +9,10 @@ ; CHECK-SAME: !DIExpression(DW_OP_deref, DW_OP_plus_uconst, 3, DW_OP_LLVM_fragment, 3, 7), ; CHECK-SAME: !DIExpression(DW_OP_constu, 2, DW_OP_swap, DW_OP_xderef), ; CHECK-SAME: !DIExpression(DW_OP_plus_uconst, 3) -; CHECK-SAME: !DIExpression(DW_OP_LLVM_convert, 16, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_signed)} +; CHECK-SAME: !DIExpression(DW_OP_LLVM_convert, 16, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_signed) +; CHECK-SAME: !DIExpression(DW_OP_LLVM_tag_offset, 1)} -!named = !{!0, !1, !2, !3, !4, !5, !6, !7} +!named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8} !0 = !DIExpression() !1 = !DIExpression(DW_OP_deref) @@ -21,3 +22,4 @@ !5 = !DIExpression(DW_OP_constu, 2, DW_OP_swap, DW_OP_xderef) !6 = !DIExpression(DW_OP_plus_uconst, 3) !7 = !DIExpression(DW_OP_LLVM_convert, 16, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_signed) +!8 = !DIExpression(DW_OP_LLVM_tag_offset, 1) diff --git a/llvm/test/CodeGen/AArch64/dbg-declare-tag-offset.ll b/llvm/test/CodeGen/AArch64/dbg-declare-tag-offset.ll new file mode 100644 index 000000000000..626db0589a39 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/dbg-declare-tag-offset.ll @@ -0,0 +1,47 @@ +; RUN: llc -o - %s | FileCheck %s + +target triple="aarch64--" + +; CHECK: .Linfo_string4: +; CHECK-NEXT: .asciz "a" +; CHECK: .Linfo_string6: +; CHECK-NEXT: .asciz "b" + +; CHECK: .byte 1 // DW_AT_LLVM_tag_offset +; CHECK: .word .Linfo_string4 // DW_AT_name + +; CHECK: .byte 2 // DW_AT_LLVM_tag_offset +; CHECK: .word .Linfo_string6 // DW_AT_name + +define void @f() !dbg !6 { +entry: + %a = alloca i8* + %b = alloca i8* + call void @llvm.dbg.declare(metadata i8** %a, metadata !12, metadata !DIExpression(DW_OP_LLVM_tag_offset, 1)), !dbg !14 + call void @llvm.dbg.declare(metadata i8** %b, metadata !13, metadata !DIExpression(DW_OP_LLVM_tag_offset, 2)), !dbg !14 + ret void, !dbg !15 +} + +declare void @llvm.dbg.declare(metadata, metadata, metadata) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} +!llvm.ident = !{!5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "x.c", directory: "/") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{!"clang"} +!6 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 1, flags: +DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2) +!7 = !DISubroutineType(types: !8) +!8 = !{null, !9} +!9 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !10, size: 64) +!10 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !11) +!11 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char) +!12 = !DILocalVariable(name: "a", scope: !6, file: !1, line: 1, type: !9) +!13 = !DILocalVariable(name: "b", scope: !6, file: !1, line: 1, type: !9) +!14 = !DILocation(line: 1, column: 29, scope: !6) +!15 = !DILocation(line: 1, column: 37, scope: !6) diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/dbg-declare-tag-offset.ll b/llvm/test/Instrumentation/HWAddressSanitizer/dbg-declare-tag-offset.ll new file mode 100644 index 000000000000..8474b271d76e --- /dev/null +++ b/llvm/test/Instrumentation/HWAddressSanitizer/dbg-declare-tag-offset.ll @@ -0,0 +1,50 @@ +; RUN: opt -hwasan -S -o - %s | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-android" + +declare void @g(i8**, i8**, i8**, i8**, i8**, i8**) + +define void @f() sanitize_hwaddress !dbg !6 { +entry: + %nodebug0 = alloca i8* + %nodebug1 = alloca i8* + %nodebug2 = alloca i8* + %nodebug3 = alloca i8* + %a = alloca i8* + %b = alloca i8* + ; CHECK: @llvm.dbg.declare{{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 4) + call void @llvm.dbg.declare(metadata i8** %a, metadata !12, metadata !DIExpression()), !dbg !14 + ; CHECK: @llvm.dbg.declare{{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 4) + call void @llvm.dbg.declare(metadata i8** %a, metadata !12, metadata !DIExpression()), !dbg !14 + ; CHECK: @llvm.dbg.declare{{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 6) + call void @llvm.dbg.declare(metadata i8** %b, metadata !13, metadata !DIExpression()), !dbg !14 + ; CHECK: @llvm.dbg.declare{{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 6) + call void @llvm.dbg.declare(metadata i8** %b, metadata !13, metadata !DIExpression()), !dbg !14 + call void @g(i8** %nodebug0, i8** %nodebug1, i8** %nodebug2, i8** %nodebug3, i8** %a, i8** %b) + ret void, !dbg !15 +} + +declare void @llvm.dbg.declare(metadata, metadata, metadata) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} +!llvm.ident = !{!5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "x.c", directory: "/") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{!"clang"} +!6 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 1, flags: +DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2) +!7 = !DISubroutineType(types: !8) +!8 = !{null, !9} +!9 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !10, size: 64) +!10 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !11) +!11 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char) +!12 = !DILocalVariable(name: "a", scope: !6, file: !1, line: 1, type: !9) +!13 = !DILocalVariable(name: "b", scope: !6, file: !1, line: 1, type: !9) +!14 = !DILocation(line: 1, column: 29, scope: !6) +!15 = !DILocation(line: 1, column: 37, scope: !6)