diff --git a/llvm/test/tools/llvm-objcopy/COFF/Inputs/bigobj.o.gz b/llvm/test/tools/llvm-objcopy/COFF/Inputs/bigobj.o.gz new file mode 100644 index 000000000000..6435f4785ff7 Binary files /dev/null and b/llvm/test/tools/llvm-objcopy/COFF/Inputs/bigobj.o.gz differ diff --git a/llvm/test/tools/llvm-objcopy/COFF/bigobj.test b/llvm/test/tools/llvm-objcopy/COFF/bigobj.test new file mode 100644 index 000000000000..17968f12b8ae --- /dev/null +++ b/llvm/test/tools/llvm-objcopy/COFF/bigobj.test @@ -0,0 +1,35 @@ +RUN: %python %p/../Inputs/ungzip.py %p/Inputs/bigobj.o.gz > %t.in.o + +RUN: llvm-objdump -t %t.in.o | FileCheck %s --check-prefixes=SYMBOLS,SYMBOLS-BIG,SYMBOLS-ORIG + +# Do a plain copy, to check that section numbers in symbols referring +# to sections outside of the small object format are handled correctly. +RUN: llvm-objcopy -R '.text$4' %t.in.o %t.small.o +RUN: llvm-objdump -t %t.in.o | FileCheck %s --check-prefixes=SYMBOLS,SYMBOLS-BIG,SYMBOLS-ORIG + +# Remove a section, making the section count fit into a small object. +RUN: llvm-objcopy -R '.text$4' %t.in.o %t.small.o +RUN: llvm-objdump -t %t.small.o | FileCheck %s --check-prefixes=SYMBOLS,SYMBOLS-SMALL,SYMBOLS-REMOVED-SMALL + +# Add a .gnu_debuglink section, forcing the object back to big format. +RUN: llvm-objcopy --add-gnu-debuglink=%t.in.o %t.small.o %t.big.o + llvm-objdump -t %t.big.o | FileCheck %s --check-prefixes=SYMBOLS,SYMBOLS-BIG,SYMBOLS-REMOVED-BIG + +# In big object format, the .file symbol occupies one symbol table entry for +# the auxillary data, but needs two entries in the small format, forcing the +# raw symbol indices of later symbols to change. +SYMBOLS: SYMBOL TABLE: +SYMBOLS-NEXT: [ 0]{{.*}} (nx 1) {{.*}} .text +SYMBOLS-NEXT: AUX scnlen +SYMBOLS-SMALL-NEXT: [ 2]{{.*}} (nx 2) {{.*}} .file +SYMBOLS-BIG-NEXT: [ 2]{{.*}} (nx 1) {{.*}} .file +SYMBOLS-NEXT: AUX abcdefghijklmnopqrs +SYMBOLS-SMALL-NEXT: [ 5]{{.*}} (nx 0) {{.*}} foo +SYMBOLS-BIG-NEXT: [ 4]{{.*}} (nx 0) {{.*}} foo + +# Check that the section numbers outside of signed 16 bit int range +# are represented properly. After removing one section, the section +# numbers decrease. +SYMBOLS-ORIG: [ 5](sec 65280){{.*}} symbol65280 +SYMBOLS-REMOVED-SMALL: [ 6](sec 65279){{.*}} symbol65280 +SYMBOLS-REMOVED-BIG: [ 5](sec 65279){{.*}} symbol65280 diff --git a/llvm/test/tools/llvm-objcopy/ELF/auto-remove-shndx.test b/llvm/test/tools/llvm-objcopy/ELF/auto-remove-shndx.test index 5a23493fa941..8e6c788bf481 100644 --- a/llvm/test/tools/llvm-objcopy/ELF/auto-remove-shndx.test +++ b/llvm/test/tools/llvm-objcopy/ELF/auto-remove-shndx.test @@ -1,4 +1,4 @@ -# RUN: %python %p/Inputs/ungzip.py %p/Inputs/many-sections.o.gz > %t +# RUN: %python %p/../Inputs/ungzip.py %p/Inputs/many-sections.o.gz > %t # RUN: llvm-objcopy -R .text -R s0 -R s1 -R s2 -R s3 -R s4 -R s5 -R s6 %t %t2 # RUN: llvm-readobj --sections %t2 | FileCheck --check-prefix=SECS %s diff --git a/llvm/test/tools/llvm-objcopy/ELF/many-sections.test b/llvm/test/tools/llvm-objcopy/ELF/many-sections.test index 57239f32e4ad..1dd41cfb10c3 100644 --- a/llvm/test/tools/llvm-objcopy/ELF/many-sections.test +++ b/llvm/test/tools/llvm-objcopy/ELF/many-sections.test @@ -1,4 +1,4 @@ -RUN: %python %p/Inputs/ungzip.py %p/Inputs/many-sections.o.gz > %t +RUN: %python %p/../Inputs/ungzip.py %p/Inputs/many-sections.o.gz > %t RUN: llvm-objcopy %t %t2 RUN: llvm-readobj --file-headers %t2 | FileCheck --check-prefix=EHDR %s RUN: llvm-readobj --sections %t2 | FileCheck --check-prefix=SECS %s diff --git a/llvm/test/tools/llvm-objcopy/ELF/remove-shndx.test b/llvm/test/tools/llvm-objcopy/ELF/remove-shndx.test index 6cc3a1a291fb..53ca8e7f2201 100644 --- a/llvm/test/tools/llvm-objcopy/ELF/remove-shndx.test +++ b/llvm/test/tools/llvm-objcopy/ELF/remove-shndx.test @@ -1,6 +1,6 @@ # This test checks to see that a .symtab_shndx section is added to any binary # that needs it, even if the original was removed. -RUN: %python %p/Inputs/ungzip.py %p/Inputs/many-sections.o.gz > %t +RUN: %python %p/../Inputs/ungzip.py %p/Inputs/many-sections.o.gz > %t RUN: llvm-objcopy -R .symtab_shndx %t %t2 RUN: llvm-readobj --sections %t2 | FileCheck %s diff --git a/llvm/test/tools/llvm-objcopy/ELF/strict-no-add.test b/llvm/test/tools/llvm-objcopy/ELF/strict-no-add.test index 4f24df31bf97..348ab7c4fbd7 100644 --- a/llvm/test/tools/llvm-objcopy/ELF/strict-no-add.test +++ b/llvm/test/tools/llvm-objcopy/ELF/strict-no-add.test @@ -1,7 +1,7 @@ # This test makes sure that sections added at the end that don't have symbols # defined in them don't trigger the creation of a large index table. -RUN: %python %p/Inputs/ungzip.py %p/Inputs/many-sections.o.gz > %t.0 +RUN: %python %p/../Inputs/ungzip.py %p/Inputs/many-sections.o.gz > %t.0 RUN: cat %p/Inputs/alloc-symtab.o > %t RUN: llvm-objcopy -R .text -R s0 -R s1 -R s2 -R s3 -R s4 -R s5 -R s6 %t.0 %t2 RUN: llvm-objcopy --add-section=.s0=%t --add-section=.s1=%t --add-section=.s2=%t %t2 %t2 diff --git a/llvm/test/tools/llvm-objcopy/ELF/Inputs/ungzip.py b/llvm/test/tools/llvm-objcopy/Inputs/ungzip.py similarity index 100% rename from llvm/test/tools/llvm-objcopy/ELF/Inputs/ungzip.py rename to llvm/test/tools/llvm-objcopy/Inputs/ungzip.py diff --git a/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp b/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp index 20adbe11e7a8..64b4e79a4e02 100644 --- a/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp +++ b/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp @@ -37,7 +37,7 @@ static uint64_t getNextRVA(const Object &Obj) { return 0; const Section &Last = Obj.getSections().back(); return alignTo(Last.Header.VirtualAddress + Last.Header.VirtualSize, - Obj.PeHeader.SectionAlignment); + Obj.IsPE ? Obj.PeHeader.SectionAlignment : 1); } static uint32_t getCRC32(StringRef Data) { @@ -74,8 +74,8 @@ static void addGnuDebugLink(Object &Obj, StringRef DebugLinkFile) { Sec.Name = ".gnu_debuglink"; Sec.Header.VirtualSize = Sec.getContents().size(); Sec.Header.VirtualAddress = StartRVA; - Sec.Header.SizeOfRawData = - alignTo(Sec.Header.VirtualSize, Obj.PeHeader.FileAlignment); + Sec.Header.SizeOfRawData = alignTo(Sec.Header.VirtualSize, + Obj.IsPE ? Obj.PeHeader.FileAlignment : 1); // Sec.Header.PointerToRawData is filled in by the writer. Sec.Header.PointerToRelocations = 0; Sec.Header.PointerToLinenumbers = 0; diff --git a/llvm/tools/llvm-objcopy/COFF/Object.cpp b/llvm/tools/llvm-objcopy/COFF/Object.cpp index 8c382c1faef7..0ad5a05a1444 100644 --- a/llvm/tools/llvm-objcopy/COFF/Object.cpp +++ b/llvm/tools/llvm-objcopy/COFF/Object.cpp @@ -26,12 +26,8 @@ void Object::addSymbols(ArrayRef NewSymbols) { void Object::updateSymbols() { SymbolMap = DenseMap(Symbols.size()); - size_t RawSymIndex = 0; - for (Symbol &Sym : Symbols) { + for (Symbol &Sym : Symbols) SymbolMap[Sym.UniqueId] = &Sym; - Sym.RawIndex = RawSymIndex; - RawSymIndex += 1 + Sym.Sym.NumberOfAuxSymbols; - } } const Symbol *Object::findSymbol(size_t UniqueId) const { diff --git a/llvm/tools/llvm-objcopy/COFF/Object.h b/llvm/tools/llvm-objcopy/COFF/Object.h index afa272286ef7..21475b068629 100644 --- a/llvm/tools/llvm-objcopy/COFF/Object.h +++ b/llvm/tools/llvm-objcopy/COFF/Object.h @@ -66,10 +66,24 @@ private: std::vector OwnedContents; }; +struct AuxSymbol { + AuxSymbol(ArrayRef In) { + assert(In.size() == sizeof(Opaque)); + std::copy(In.begin(), In.end(), Opaque); + } + + ArrayRef getRef() const { + return ArrayRef(Opaque, sizeof(Opaque)); + } + + uint8_t Opaque[sizeof(object::coff_symbol16)]; +}; + struct Symbol { object::coff_symbol32 Sym; StringRef Name; - std::vector AuxData; + std::vector AuxData; + StringRef AuxFile; ssize_t TargetSectionId; ssize_t AssociativeComdatTargetSectionId = 0; Optional WeakTargetSymbolId; @@ -132,7 +146,7 @@ private: ssize_t NextSectionUniqueId = 1; // Allow a UniqueId 0 to mean undefined. - // Update SymbolMap and RawIndex in each Symbol. + // Update SymbolMap. void updateSymbols(); // Update SectionMap and Index in each Section. diff --git a/llvm/tools/llvm-objcopy/COFF/Reader.cpp b/llvm/tools/llvm-objcopy/COFF/Reader.cpp index 87dd60a43cf3..7270bbf94de6 100644 --- a/llvm/tools/llvm-objcopy/COFF/Reader.cpp +++ b/llvm/tools/llvm-objcopy/COFF/Reader.cpp @@ -107,9 +107,24 @@ Error COFFReader::readSymbols(Object &Obj, bool IsBigObj) const { *reinterpret_cast(SymRef.getRawPtr())); if (auto EC = COFFObj.getSymbolName(SymRef, Sym.Name)) return errorCodeToError(EC); - Sym.AuxData = COFFObj.getSymbolAuxData(SymRef); - assert((Sym.AuxData.size() % - (IsBigObj ? sizeof(coff_symbol32) : sizeof(coff_symbol16))) == 0); + + ArrayRef AuxData = COFFObj.getSymbolAuxData(SymRef); + size_t SymSize = IsBigObj ? sizeof(coff_symbol32) : sizeof(coff_symbol16); + assert(AuxData.size() == SymSize * SymRef.getNumberOfAuxSymbols()); + // The auxillary symbols are structs of sizeof(coff_symbol16) each. + // In the big object format (where symbols are coff_symbol32), each + // auxillary symbol is padded with 2 bytes at the end. Copy each + // auxillary symbol to the Sym.AuxData vector. For file symbols, + // the whole range of aux symbols are interpreted as one null padded + // string instead. + if (SymRef.isFileRecord()) + Sym.AuxFile = StringRef(reinterpret_cast(AuxData.data()), + AuxData.size()) + .rtrim('\0'); + else + for (size_t I = 0; I < SymRef.getNumberOfAuxSymbols(); I++) + Sym.AuxData.push_back(AuxData.slice(I * SymSize, sizeof(AuxSymbol))); + // Find the unique id of the section if (SymRef.getSectionNumber() <= 0) // Special symbol (undefined/absolute/debug) diff --git a/llvm/tools/llvm-objcopy/COFF/Writer.cpp b/llvm/tools/llvm-objcopy/COFF/Writer.cpp index db897e2ff334..6e69c5972179 100644 --- a/llvm/tools/llvm-objcopy/COFF/Writer.cpp +++ b/llvm/tools/llvm-objcopy/COFF/Writer.cpp @@ -55,7 +55,8 @@ Error COFFWriter::finalizeSymbolContents() { if (Sym.Sym.NumberOfAuxSymbols == 1 && Sym.Sym.StorageClass == IMAGE_SYM_CLASS_STATIC) { coff_aux_section_definition *SD = - reinterpret_cast(Sym.AuxData.data()); + reinterpret_cast( + Sym.AuxData[0].Opaque); uint32_t SDSectionNumber; if (Sym.AssociativeComdatTargetSectionId == 0) { // Not a comdat associative section; just set the Number field to @@ -79,7 +80,7 @@ Error COFFWriter::finalizeSymbolContents() { // we want to set. Only >= 1 would be required, but only == 1 makes sense. if (Sym.WeakTargetSymbolId && Sym.Sym.NumberOfAuxSymbols == 1) { coff_aux_weak_external *WE = - reinterpret_cast(Sym.AuxData.data()); + reinterpret_cast(Sym.AuxData[0].Opaque); const Symbol *Target = Obj.findSymbol(*Sym.WeakTargetSymbolId); if (Target == nullptr) return createStringError(object_error::invalid_symbol_index, @@ -141,13 +142,26 @@ size_t COFFWriter::finalizeStringTable() { template std::pair COFFWriter::finalizeSymbolTable() { - size_t SymTabSize = Obj.getSymbols().size() * sizeof(SymbolTy); - for (const auto &S : Obj.getSymbols()) - SymTabSize += S.AuxData.size(); - return std::make_pair(SymTabSize, sizeof(SymbolTy)); + size_t RawSymIndex = 0; + for (auto &S : Obj.getMutableSymbols()) { + // Symbols normally have NumberOfAuxSymbols set correctly all the time. + // For file symbols, we need to know the output file's symbol size to be + // able to calculate the number of slots it occupies. + if (!S.AuxFile.empty()) + S.Sym.NumberOfAuxSymbols = + alignTo(S.AuxFile.size(), sizeof(SymbolTy)) / sizeof(SymbolTy); + S.RawIndex = RawSymIndex; + RawSymIndex += 1 + S.Sym.NumberOfAuxSymbols; + } + return std::make_pair(RawSymIndex * sizeof(SymbolTy), sizeof(SymbolTy)); } Error COFFWriter::finalize(bool IsBigObj) { + size_t SymTabSize, SymbolSize; + std::tie(SymTabSize, SymbolSize) = IsBigObj + ? finalizeSymbolTable() + : finalizeSymbolTable(); + if (Error E = finalizeRelocTargets()) return E; if (Error E = finalizeSymbolContents()) @@ -199,10 +213,6 @@ Error COFFWriter::finalize(bool IsBigObj) { } size_t StrTabSize = finalizeStringTable(); - size_t SymTabSize, SymbolSize; - std::tie(SymTabSize, SymbolSize) = IsBigObj - ? finalizeSymbolTable() - : finalizeSymbolTable(); size_t PointerToSymbolTable = FileSize; // StrTabSize <= 4 is the size of an empty string table, only consisting @@ -312,8 +322,23 @@ template void COFFWriter::writeSymbolStringTables() { copySymbol(*reinterpret_cast(Ptr), S.Sym); Ptr += sizeof(SymbolTy); - std::copy(S.AuxData.begin(), S.AuxData.end(), Ptr); - Ptr += S.AuxData.size(); + if (!S.AuxFile.empty()) { + // For file symbols, just write the string into the aux symbol slots, + // assuming that the unwritten parts are initialized to zero in the memory + // mapped file. + std::copy(S.AuxFile.begin(), S.AuxFile.end(), Ptr); + Ptr += S.Sym.NumberOfAuxSymbols * sizeof(SymbolTy); + } else { + // For other auxillary symbols, write their opaque payload into one symbol + // table slot each. For big object files, the symbols are larger than the + // opaque auxillary symbol struct and we leave padding at the end of each + // entry. + for (const AuxSymbol &AuxSym : S.AuxData) { + ArrayRef Ref = AuxSym.getRef(); + std::copy(Ref.begin(), Ref.end(), Ptr); + Ptr += sizeof(SymbolTy); + } + } } if (StrTabBuilder.getSize() > 4 || !Obj.IsPE) { // Always write a string table in object files, even an empty one. diff --git a/llvm/tools/llvm-objcopy/COFF/Writer.h b/llvm/tools/llvm-objcopy/COFF/Writer.h index 9b1cfa91d00e..681a8d5e4a66 100644 --- a/llvm/tools/llvm-objcopy/COFF/Writer.h +++ b/llvm/tools/llvm-objcopy/COFF/Writer.h @@ -30,11 +30,11 @@ class COFFWriter { size_t SizeOfInitializedData; StringTableBuilder StrTabBuilder; + template std::pair finalizeSymbolTable(); Error finalizeRelocTargets(); Error finalizeSymbolContents(); void layoutSections(); size_t finalizeStringTable(); - template std::pair finalizeSymbolTable(); Error finalize(bool IsBigObj);