[BOLT][DWARF] Handle shared abbrev section

We can have a scenario where multiple CUs share an abbrev table.
We modify or don't modify one CU, which leads to other CUs having invalid abbrev section.
Example that caused it.
All of CUs shared the same abbrev table. First CU just had compile_unit and sub_program.
It was not modified. Next CU had DW_TAG_lexical_block with
DW_AT_low_pc/DW_AT_high_pc converted to DW_AT_low_pc/DW_AT_ranges.
We used unmodified abbrev section for first and subsequent CUs.
So when parsing subsequent CUs debug info was corrupted.

In this patch we will now duplicate all sections that are modified and are different.
This also means that if .debug_types is present and it shares Abbrev table, and
they usually are, we now can have two Abbrev tables. One for CU that was modified,
and unmodified one for TU.

Reviewed By: maksfb

Differential Revision: https://reviews.llvm.org/D118517
This commit is contained in:
Alexander Yermolovich 2022-01-31 11:06:06 -08:00
parent 804915f5dc
commit 9f3f9d19c7
3 changed files with 178 additions and 33 deletions

View File

@ -715,8 +715,11 @@ class DebugAbbrevWriter {
std::unique_ptr<DebugBufferVector> Buffer;
std::unique_ptr<raw_svector_ostream> Stream;
};
/// Map original unit abbrev offset to abbreviations data.
std::map<uint64_t, AbbrevData> UnitsAbbrevData;
/// Map original unit to abbreviations data.
std::unordered_map<const DWARFUnit *, AbbrevData *> UnitsAbbrevData;
/// Map from Hash Signature to AbbrevData.
llvm::StringMap<std::unique_ptr<AbbrevData>> AbbrevDataCache;
/// Attributes substitution (patch) information.
struct PatchInfo {
@ -783,10 +786,8 @@ public:
/// Return an offset in the finalized abbrev section corresponding to CU/TU.
uint64_t getAbbreviationsOffsetForUnit(const DWARFUnit &Unit) {
assert(!DWOId && "offsets are tracked for non-DWO units only");
assert(UnitsAbbrevData.find(Unit.getAbbreviationsOffset()) !=
UnitsAbbrevData.end() &&
"no abbrev data found for unit");
return UnitsAbbrevData[Unit.getAbbreviationsOffset()].Offset;
assert(UnitsAbbrevData.count(&Unit) && "no abbrev data found for unit");
return UnitsAbbrevData[&Unit]->Offset;
}
};

View File

@ -18,6 +18,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/SHA1.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@ -694,20 +695,30 @@ void DebugAbbrevWriter::addUnitAbbreviations(DWARFUnit &Unit) {
if (!Abbrevs)
return;
// Multiple units may share the same abbreviations. Only add abbreviations
// for the first unit and reuse them.
const uint64_t AbbrevOffset = Unit.getAbbreviationsOffset();
if (UnitsAbbrevData.find(AbbrevOffset) != UnitsAbbrevData.end())
return;
AbbrevData &UnitData = UnitsAbbrevData[AbbrevOffset];
UnitData.Buffer = std::make_unique<DebugBufferVector>();
UnitData.Stream = std::make_unique<raw_svector_ostream>(*UnitData.Buffer);
const PatchesTy &UnitPatches = Patches[&Unit];
// We are duplicating abbrev sections, to handle the case where for one CU we
// modify it, but for another we don't.
auto UnitDataPtr = std::make_unique<AbbrevData>();
AbbrevData &UnitData = *UnitDataPtr.get();
UnitData.Buffer = std::make_unique<DebugBufferVector>();
UnitData.Stream = std::make_unique<raw_svector_ostream>(*UnitData.Buffer);
raw_svector_ostream &OS = *UnitData.Stream.get();
// Returns true if AbbrevData is re-used, false otherwise.
auto hashAndAddAbbrev = [&](StringRef AbbrevData) -> bool {
llvm::SHA1 Hasher;
Hasher.update(AbbrevData);
StringRef Key = Hasher.final();
auto Iter = AbbrevDataCache.find(Key);
if (Iter != AbbrevDataCache.end()) {
UnitsAbbrevData[&Unit] = Iter->second.get();
return true;
}
AbbrevDataCache[Key] = std::move(UnitDataPtr);
UnitsAbbrevData[&Unit] = &UnitData;
return false;
};
// Take a fast path if there are no patches to apply. Simply copy the original
// contents.
if (UnitPatches.empty()) {
@ -756,9 +767,10 @@ void DebugAbbrevWriter::addUnitAbbreviations(DWARFUnit &Unit) {
AbbrevContents = AbbrevSectionContents;
}
OS.reserveExtraSpace(AbbrevContents.size());
OS << AbbrevContents;
if (!hashAndAddAbbrev(AbbrevContents)) {
OS.reserveExtraSpace(AbbrevContents.size());
OS << AbbrevContents;
}
return;
}
@ -798,9 +810,13 @@ void DebugAbbrevWriter::addUnitAbbreviations(DWARFUnit &Unit) {
encodeULEB128(0, OS);
}
encodeULEB128(0, OS);
hashAndAddAbbrev(OS.str());
}
std::unique_ptr<DebugBufferVector> DebugAbbrevWriter::finalize() {
// Used to create determinism for writing out abbrevs.
std::vector<AbbrevData *> Abbrevs;
if (DWOId) {
// We expect abbrev_offset to always be zero for DWO units as there
// should be one CU per DWO, and TUs should share the same abbreviation
@ -818,33 +834,40 @@ std::unique_ptr<DebugBufferVector> DebugAbbrevWriter::finalize() {
}
}
DWARFUnit *Unit = Context.getDWOCompileUnitForHash(*DWOId);
// Issue abbreviations for the DWO CU only.
addUnitAbbreviations(*Context.getDWOCompileUnitForHash(*DWOId));
addUnitAbbreviations(*Unit);
AbbrevData *Abbrev = UnitsAbbrevData[Unit];
Abbrevs.push_back(Abbrev);
} else {
Abbrevs.reserve(Context.getNumCompileUnits() + Context.getNumTypeUnits());
std::unordered_set<AbbrevData *> ProcessedAbbrevs;
// Add abbreviations from compile and type non-DWO units.
for (const std::unique_ptr<DWARFUnit> &Unit : Context.normal_units())
for (const std::unique_ptr<DWARFUnit> &Unit : Context.normal_units()) {
addUnitAbbreviations(*Unit);
AbbrevData *Abbrev = UnitsAbbrevData[Unit.get()];
if (!ProcessedAbbrevs.insert(Abbrev).second)
continue;
Abbrevs.push_back(Abbrev);
}
}
DebugBufferVector ReturnBuffer;
// Pre-calculate the total size of abbrev section.
uint64_t Size = 0;
for (const auto &KV : UnitsAbbrevData) {
const AbbrevData &UnitData = KV.second;
Size += UnitData.Buffer->size();
}
for (const AbbrevData *UnitData : Abbrevs)
Size += UnitData->Buffer->size();
ReturnBuffer.reserve(Size);
uint64_t Pos = 0;
for (auto &KV : UnitsAbbrevData) {
AbbrevData &UnitData = KV.second;
ReturnBuffer.append(*UnitData.Buffer);
UnitData.Offset = Pos;
Pos += UnitData.Buffer->size();
for (AbbrevData *UnitData : Abbrevs) {
ReturnBuffer.append(*UnitData->Buffer);
UnitData->Offset = Pos;
Pos += UnitData->Buffer->size();
UnitData.Buffer.reset();
UnitData.Stream.reset();
UnitData->Buffer.reset();
UnitData->Stream.reset();
}
return std::make_unique<DebugBufferVector>(ReturnBuffer);

View File

@ -0,0 +1,121 @@
# RUN: rm -rf %t
# RUN: mkdir %t
# RUN: llvm-mc -filetype=obj -triple=x86_64 -dwarf-version=4 %s -o %t/shared-abbrev.o
# RUN: %clang %cflags %t/shared-abbrev.o -o %t/shared-abbrev.exe
# RUN: llvm-bolt %t/shared-abbrev.exe -o %t/shared-abbrev.exe.bolt -update-debug-sections
# RUN: llvm-dwarfdump --debug-info %t/shared-abbrev.exe.bolt | FileCheck %s
# CHECK: 0x00000000:
# CHECK-SAME: abbr_offset = 0x0000
# CHECK-EMPTY:
# CHECK-NEXT: DW_TAG_compile_unit
# CHECK-NEXT: DW_AT_stmt_list
# CHECK-NEXT: DW_AT_low_pc
# CHECK-NEXT: DW_AT_ranges
# CHECK: 0x0000001d:
# CHECK-SAME: abbr_offset = 0x0017
# CHECK-EMPTY:
# CHECK: DW_TAG_compile_unit
# CHECK-NEXT: DW_AT_stmt_list
# CHECK-NEXT: DW_AT_low_pc
# CHECK-NEXT: DW_AT_ranges
# CHECK: 0x0000003a:
# CHECK-SAME: abbr_offset = 0x0000
# CHECK-EMPTY:
# CHECK-NEXT: DW_TAG_compile_unit
# CHECK-NEXT: DW_AT_stmt_list
# CHECK-NEXT: DW_AT_low_pc
# CHECK-NEXT: DW_AT_ranges
.text
.file "main.cpp"
.globl main # -- Begin function main
.p2align 4, 0x90
.type main,@function
main: # @main
.Lfunc_begin0:
.file 1 "test" "main.cpp"
.loc 1 1 0 # main.cpp:1:0
.cfi_startproc
.cfi_def_cfa %rsp, 8
retq
.Ltmp1:
.Lfunc_end0:
.size main, .Lfunc_end0-main
.cfi_endproc
# -- End function
.section .debug_abbrev,"",@progbits
.byte 1 # Abbreviation Code
.byte 17 # DW_TAG_compile_unit
.byte 0 # DW_CHILDREN_no
.byte 16 # DW_AT_stmt_list
.byte 23 # DW_FORM_sec_offset
.byte 17 # DW_AT_low_pc
.byte 1 # DW_FORM_addr
.byte 18 # DW_AT_high_pc
.byte 7 # DW_FORM_data8
.byte 0 # EOM(1)
.byte 0 # EOM(2)
.byte 2 # Abbreviation Code
.byte 17 # DW_TAG_compile_unit
.byte 0 # DW_CHILDREN_no
.byte 16 # DW_AT_stmt_list
.byte 23 # DW_FORM_sec_offset
.byte 17 # DW_AT_low_pc
.byte 1 # DW_FORM_addr
.byte 85 # DW_AT_ranges
.byte 23 # DW_FORM_sec_offset
.byte 0 # EOM(1)
.byte 0 # EOM(2)
.byte 0 # EOM(3)
.section .debug_info,"",@progbits
.Lcu_begin0:
.long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
.Ldebug_info_start0:
.short 4 # DWARF version number
.long .debug_abbrev # Offset Into Abbrev. Section
.byte 8 # Address Size (in bytes)
.byte 2 # Abbrev [2] DW_TAG_compile_unit
.long .Lline_table_start0 # DW_AT_stmt_list
.quad 0 # DW_AT_low_pc
.byte 0 # End Of Children Mark
.long .Ldebug_ranges0 # DW_AT_ranges --- end manual --
.Ldebug_info_end0:
# Second CU table.
.long .Ldebug_info_end1-.Ldebug_info_start1 # Length of Unit
.Ldebug_info_start1:
.short 4 # DWARF version number
.long .debug_abbrev # Offset Into Abbrev. Section
.byte 8 # Address Size (in bytes)
.byte 1 # Abbrev [1] DW_TAG_compile_unit
.long .Lline_table_start0 # DW_AT_stmt_list
.quad .Lfunc_begin0 # DW_AT_low_pc
.quad .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
.byte 0 # End Of Children Mark
.Ldebug_info_end1:
# Third CU table.
.long .Ldebug_info_end2-.Ldebug_info_start2 # Length of Unit
.Ldebug_info_start2:
.short 4 # DWARF version number
.long .debug_abbrev # Offset Into Abbrev. Section
.byte 8 # Address Size (in bytes)
.byte 2 # Abbrev [2] DW_TAG_compile_unit
.long .Lline_table_start0 # DW_AT_stmt_list
.quad 0 # DW_AT_low_pc
.byte 0 # End Of Children Mark
.long .Ldebug_ranges0 # DW_AT_ranges --- end manual --
.Ldebug_info_end2:
.section .debug_ranges,"",@progbits
.Ldebug_ranges0:
.quad .Lfunc_begin0
.quad .Lfunc_end0
.quad .Lfunc_begin0
.quad .Lfunc_end0
.quad 0
.quad 0
.section ".note.GNU-stack","",@progbits
.addrsig
.section .debug_line,"",@progbits
.Lline_table_start0: