From f590c971c7655b703700a729cbb7619bcb1cf28a Mon Sep 17 00:00:00 2001 From: Chris Bieneman Date: Thu, 19 May 2016 20:40:03 +0000 Subject: [PATCH] [obj2yaml] [yaml2obj] Support for MachO Load Command data Many of the MachO load commands can have data appended after the command structure. This data is frequently strings, but can actually be anything. This patch adds support for three optional fields on load command yaml descriptions. The new PayloadString YAML field is populated with the data after load commands known to have strings as extra data. The new ZeroPadBytes YAML field is a count of zero'd bytes after the end of the load command structure before the next command. This can apply anywhere in the file. MachO2YAML verifies that bytes are zero before populating this field, and YAML2MachO will add zero'd bytes. The new PayloadBytes YAML field stores all bytes after the end of the load command structure before the next command if they are non-zero. This is a catch all for all unhandled bytes. If MachO2Yaml populates PayloadBytes it will not populate ZeroPadBytes, instead zero'd bytes will be in the PayloadBytes structure. llvm-svn: 270115 --- llvm/include/llvm/ObjectYAML/MachOYAML.h | 4 ++ llvm/lib/ObjectYAML/MachOYAML.cpp | 34 ++++++++-- llvm/tools/obj2yaml/macho2yaml.cpp | 80 +++++++++++++++++++---- llvm/tools/yaml2obj/yaml2macho.cpp | 83 +++++++++++++++++++----- 4 files changed, 170 insertions(+), 31 deletions(-) diff --git a/llvm/include/llvm/ObjectYAML/MachOYAML.h b/llvm/include/llvm/ObjectYAML/MachOYAML.h index 3c79a20c55af..cb29c6dcd62d 100644 --- a/llvm/include/llvm/ObjectYAML/MachOYAML.h +++ b/llvm/include/llvm/ObjectYAML/MachOYAML.h @@ -52,6 +52,9 @@ struct LoadCommand { virtual ~LoadCommand(); llvm::MachO::macho_load_command Data; std::vector
Sections; + std::vector PayloadBytes; + std::string PayloadString; + uint64_t ZeroPadBytes; }; struct Object { @@ -65,6 +68,7 @@ struct Object { LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MachOYAML::LoadCommand) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MachOYAML::Section) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::Hex8) namespace llvm { namespace yaml { diff --git a/llvm/lib/ObjectYAML/MachOYAML.cpp b/llvm/lib/ObjectYAML/MachOYAML.cpp index 2faefca432e5..60ed53f7d1e1 100644 --- a/llvm/lib/ObjectYAML/MachOYAML.cpp +++ b/llvm/lib/ObjectYAML/MachOYAML.cpp @@ -96,6 +96,33 @@ void MappingTraits::mapping(IO &IO, IO.setContext(nullptr); } +template +void mapLoadCommandData(IO &IO, MachOYAML::LoadCommand &LoadCommand) {} + +template <> +void mapLoadCommandData( + IO &IO, MachOYAML::LoadCommand &LoadCommand) { + IO.mapOptional("Sections", LoadCommand.Sections); +} + +template <> +void mapLoadCommandData( + IO &IO, MachOYAML::LoadCommand &LoadCommand) { + IO.mapOptional("Sections", LoadCommand.Sections); +} + +template <> +void mapLoadCommandData( + IO &IO, MachOYAML::LoadCommand &LoadCommand) { + IO.mapOptional("PayloadString", LoadCommand.PayloadString); +} + +template <> +void mapLoadCommandData( + IO &IO, MachOYAML::LoadCommand &LoadCommand) { + IO.mapOptional("PayloadString", LoadCommand.PayloadString); +} + void MappingTraits::mapping( IO &IO, MachOYAML::LoadCommand &LoadCommand) { IO.mapRequired( @@ -106,15 +133,14 @@ void MappingTraits::mapping( case MachO::LCName: \ MappingTraits::mapping(IO, \ LoadCommand.Data.LCStruct##_data); \ + mapLoadCommandData(IO, LoadCommand); \ break; switch (LoadCommand.Data.load_command_data.cmd) { #include "llvm/Support/MachO.def" } - if (LoadCommand.Data.load_command_data.cmd == MachO::LC_SEGMENT || - LoadCommand.Data.load_command_data.cmd == MachO::LC_SEGMENT_64) { - IO.mapOptional("Sections", LoadCommand.Sections); - } + IO.mapOptional("PayloadBytes", LoadCommand.PayloadBytes); + IO.mapOptional("ZeroPadBytes", LoadCommand.ZeroPadBytes, 0ull); } void MappingTraits::mapping( diff --git a/llvm/tools/obj2yaml/macho2yaml.cpp b/llvm/tools/obj2yaml/macho2yaml.cpp index b90147403bd0..eae15533d07c 100644 --- a/llvm/tools/obj2yaml/macho2yaml.cpp +++ b/llvm/tools/obj2yaml/macho2yaml.cpp @@ -19,6 +19,11 @@ using namespace llvm; class MachODumper { + template + const char *processLoadCommandData( + MachOYAML::LoadCommand &LC, + const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd); + const object::MachOObjectFile &Obj; public: @@ -32,6 +37,7 @@ public: sizeof(MachO::LCStruct)); \ if (Obj.isLittleEndian() != sys::IsLittleEndianHost) \ MachO::swapStruct(LC.Data.LCStruct##_data); \ + EndPtr = processLoadCommandData(LC, LoadCmd); \ break; template @@ -68,9 +74,10 @@ template <> MachOYAML::Section constructSection(MachO::section_64 Sec) { } template -void extractSections( - const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, - std::vector &Sections, bool IsLittleEndian) { +const char * +extractSections(const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, + std::vector &Sections, + bool IsLittleEndian) { auto End = LoadCmd.Ptr + LoadCmd.C.cmdsize; const SectionType *Curr = reinterpret_cast(LoadCmd.Ptr + sizeof(SegmentType)); @@ -84,6 +91,55 @@ void extractSections( Sections.push_back(constructSection(*Curr)); } } + return reinterpret_cast(Curr); +} + +template +const char *MachODumper::processLoadCommandData( + MachOYAML::LoadCommand &LC, + const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) { + return LoadCmd.Ptr + sizeof(StructType); +} + +template <> +const char *MachODumper::processLoadCommandData( + MachOYAML::LoadCommand &LC, + const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) { + return extractSections( + LoadCmd, LC.Sections, Obj.isLittleEndian()); +} + +template <> +const char *MachODumper::processLoadCommandData( + MachOYAML::LoadCommand &LC, + const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) { + return extractSections( + LoadCmd, LC.Sections, Obj.isLittleEndian()); +} + +template +const char * +readString(MachOYAML::LoadCommand &LC, + const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) { + auto Start = LoadCmd.Ptr + sizeof(StructType); + auto MaxSize = LoadCmd.C.cmdsize - sizeof(StructType); + auto Size = strnlen(Start, MaxSize); + LC.PayloadString = StringRef(Start, Size).str(); + return Start + Size; +} + +template <> +const char *MachODumper::processLoadCommandData( + MachOYAML::LoadCommand &LC, + const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) { + return readString(LC, LoadCmd); +} + +template <> +const char *MachODumper::processLoadCommandData( + MachOYAML::LoadCommand &LC, + const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) { + return readString(LC, LoadCmd); } Expected> MachODumper::dump() { @@ -99,25 +155,25 @@ Expected> MachODumper::dump() { for (auto LoadCmd : Obj.load_commands()) { MachOYAML::LoadCommand LC; + const char *EndPtr = LoadCmd.Ptr; switch (LoadCmd.C.cmd) { default: memcpy((void *)&(LC.Data.load_command_data), LoadCmd.Ptr, sizeof(MachO::load_command)); if (Obj.isLittleEndian() != sys::IsLittleEndianHost) MachO::swapStruct(LC.Data.load_command_data); + EndPtr = processLoadCommandData(LC, LoadCmd); break; #include "llvm/Support/MachO.def" } - switch (LoadCmd.C.cmd) { - case MachO::LC_SEGMENT: - extractSections( - LoadCmd, LC.Sections, Obj.isLittleEndian()); - break; - case MachO::LC_SEGMENT_64: - extractSections( - LoadCmd, LC.Sections, Obj.isLittleEndian()); - break; + auto RemainingBytes = LoadCmd.C.cmdsize - (EndPtr - LoadCmd.Ptr); + if (!std::all_of(EndPtr, &EndPtr[RemainingBytes], + [](const char C) { return C == 0; })) { + LC.PayloadBytes.insert(LC.PayloadBytes.end(), EndPtr, + &EndPtr[RemainingBytes]); + RemainingBytes = 0; } + LC.ZeroPadBytes = RemainingBytes; Y->LoadCommands.push_back(std::move(LC)); } diff --git a/llvm/tools/yaml2obj/yaml2macho.cpp b/llvm/tools/yaml2obj/yaml2macho.cpp index 0e8799e2ce10..2a1c6a7e9d2b 100644 --- a/llvm/tools/yaml2obj/yaml2macho.cpp +++ b/llvm/tools/yaml2obj/yaml2macho.cpp @@ -94,6 +94,60 @@ SectionType constructSection(MachOYAML::Section Sec) { return TempSec; } +template +size_t writeLoadCommandData(MachOYAML::LoadCommand &LC, raw_ostream &OS) { + return 0; +} + +template <> +size_t writeLoadCommandData(MachOYAML::LoadCommand &LC, + raw_ostream &OS) { + size_t BytesWritten = 0; + for (auto Sec : LC.Sections) { + auto TempSec = constructSection(Sec); + OS.write(reinterpret_cast(&(TempSec)), + sizeof(MachO::section)); + BytesWritten += sizeof(MachO::section); + } + return BytesWritten; +} + +template <> +size_t +writeLoadCommandData(MachOYAML::LoadCommand &LC, + raw_ostream &OS) { + size_t BytesWritten = 0; + for (auto Sec : LC.Sections) { + auto TempSec = constructSection(Sec); + TempSec.reserved3 = Sec.reserved3; + OS.write(reinterpret_cast(&(TempSec)), + sizeof(MachO::section_64)); + BytesWritten += sizeof(MachO::section_64); + } + return BytesWritten; +} + +size_t writePayloadString(MachOYAML::LoadCommand &LC, raw_ostream &OS) { + size_t BytesWritten = 0; + if (!LC.PayloadString.empty()) { + OS.write(LC.PayloadString.c_str(), LC.PayloadString.length()); + BytesWritten = LC.PayloadString.length(); + } + return BytesWritten; +} + +template <> +size_t writeLoadCommandData(MachOYAML::LoadCommand &LC, + raw_ostream &OS) { + return writePayloadString(LC, OS); +} + +template <> +size_t writeLoadCommandData(MachOYAML::LoadCommand &LC, + raw_ostream &OS) { + return writePayloadString(LC, OS); +} + Error MachOWriter::writeLoadCommands(raw_ostream &OS) { for (auto &LC : Obj.LoadCommands) { size_t BytesWritten = 0; @@ -102,6 +156,7 @@ Error MachOWriter::writeLoadCommands(raw_ostream &OS) { OS.write(reinterpret_cast(&(LC.Data.LCStruct##_data)), \ sizeof(MachO::LCStruct)); \ BytesWritten = sizeof(MachO::LCStruct); \ + BytesWritten += writeLoadCommandData(LC, OS); \ break; switch (LC.Data.load_command_data.cmd) { @@ -109,27 +164,25 @@ Error MachOWriter::writeLoadCommands(raw_ostream &OS) { OS.write(reinterpret_cast(&(LC.Data.load_command_data)), sizeof(MachO::load_command)); BytesWritten = sizeof(MachO::load_command); + BytesWritten += writeLoadCommandData(LC, OS); break; #include "llvm/Support/MachO.def" } - if(LC.Data.load_command_data.cmd == MachO::LC_SEGMENT) { - for(auto Sec : LC.Sections) { - auto TempSec = constructSection(Sec); - OS.write(reinterpret_cast(&(TempSec)), sizeof(MachO::section)); - BytesWritten += sizeof(MachO::section); - } - } else if(LC.Data.load_command_data.cmd == MachO::LC_SEGMENT_64) { - for(auto Sec : LC.Sections) { - auto TempSec = constructSection(Sec); - TempSec.reserved3 = Sec.reserved3; - OS.write(reinterpret_cast(&(TempSec)), sizeof(MachO::section_64)); - BytesWritten += sizeof(MachO::section_64); - } + if (LC.PayloadBytes.size() > 0) { + OS.write(reinterpret_cast(LC.PayloadBytes.data()), + LC.PayloadBytes.size()); + BytesWritten += LC.PayloadBytes.size(); } - auto BytesRemaining = - LC.Data.load_command_data.cmdsize - BytesWritten; + if (LC.ZeroPadBytes > 0) { + std::vector FillData; + FillData.insert(FillData.begin(), LC.ZeroPadBytes, 0); + OS.write(reinterpret_cast(FillData.data()), LC.ZeroPadBytes); + BytesWritten += LC.ZeroPadBytes; + } + + auto BytesRemaining = LC.Data.load_command_data.cmdsize - BytesWritten; if (BytesRemaining > 0) { // TODO: Replace all this once the load command data is present in yaml. // For now I fill with 0xDEADBEEF because it is easy to spot on a hex