add initial support for coalescing by content (c-strings) with test case

llvm-svn: 147799
This commit is contained in:
Nick Kledzik 2012-01-09 20:18:15 +00:00
parent a8f80b31f9
commit bfedfc171d
5 changed files with 204 additions and 26 deletions

View File

@ -14,7 +14,9 @@
#include <map>
#include <vector>
namespace llvm { class StringRef; }
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/DenseSet.h"
namespace lld {
@ -52,12 +54,21 @@ public:
private:
typedef std::map<llvm::StringRef, const Atom *> NameToAtom;
typedef std::map<const Atom *, const Atom *> AtomToAtom;
struct MyMappingInfo {
static const Atom * getEmptyKey() { return NULL; }
static const Atom * getTombstoneKey() { return (Atom*)(-1); }
static unsigned getHashValue(const Atom * const Val);
static bool isEqual(const Atom * const LHS, const Atom * const RHS);
};
typedef llvm::DenseSet<const Atom*, MyMappingInfo> AtomContentSet;
void addByName(const Atom &);
void addByContent(const Atom &);
Platform& _platform;
AtomToAtom _replacedAtoms;
NameToAtom _nameTable;
AtomContentSet _contentTable;
};
} // namespace lld

View File

@ -16,6 +16,8 @@
#include "lld/Platform/Platform.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/ArrayRef.h"
#include <algorithm>
#include <cassert>
@ -34,7 +36,7 @@ void SymbolTable::add(const Atom &atom) {
this->addByName(atom);
}
else if ( atom.mergeDuplicates() ) {
// TO DO: support constants merging
this->addByContent(atom);
}
}
@ -130,6 +132,58 @@ void SymbolTable::addByName(const Atom & newAtom) {
}
}
unsigned SymbolTable::MyMappingInfo::getHashValue(const Atom * const atom) {
unsigned hash = atom->size();
if ( atom->contentType() != Atom::typeZeroFill ) {
llvm::ArrayRef<uint8_t> content = atom->rawContent();
for (unsigned int i=0; i < content.size(); ++i) {
hash = hash * 33 + content[i];
}
}
hash &= 0x00FFFFFF;
hash |= ((unsigned)atom->contentType()) << 24;
//fprintf(stderr, "atom=%p, hash=0x%08X\n", atom, hash);
return hash;
}
bool SymbolTable::MyMappingInfo::isEqual(const Atom * const l,
const Atom * const r) {
if ( l == r )
return true;
if ( l == getEmptyKey() )
return false;
if ( r == getEmptyKey() )
return false;
if ( l == getTombstoneKey() )
return false;
if ( r == getTombstoneKey() )
return false;
if ( l->contentType() != r->contentType() )
return false;
if ( l->size() != r->size() )
return false;
llvm::ArrayRef<uint8_t> lc = l->rawContent();
llvm::ArrayRef<uint8_t> rc = r->rawContent();
return lc.equals(rc);
}
void SymbolTable::addByContent(const Atom & newAtom) {
AtomContentSet::iterator pos = _contentTable.find(&newAtom);
if ( pos == _contentTable.end() ) {
_contentTable.insert(&newAtom);
return;
}
const Atom* existing = *pos;
// New atom is not being used. Add it to replacement table.
_replacedAtoms[&newAtom] = existing;
}
const Atom *SymbolTable::findByName(llvm::StringRef sym) {
NameToAtom::iterator pos = _nameTable.find(sym);
if (pos == _nameTable.end())

View File

@ -16,6 +16,7 @@
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MemoryBuffer.h"
@ -66,18 +67,21 @@ inline llvm::error_code make_error_code(yaml_reader_errors e) {
class YAML {
public:
struct Entry {
Entry(const char *k, const char *v, int d, bool bd, bool bs)
Entry(const char *k, const char *v, std::vector<uint8_t>* vs,
int d, bool bd, bool bs)
: key(strdup(k))
, value(strdup(v))
, value(v ? strdup(v) : NULL)
, valueSequenceBytes(vs)
, depth(d)
, beginSequence(bs)
, beginDocument(bd) {}
const char *key;
const char *value;
int depth;
bool beginSequence;
bool beginDocument;
const char * key;
const char * value;
std::vector<uint8_t>* valueSequenceBytes;
int depth;
bool beginSequence;
bool beginDocument;
};
static void parse(llvm::MemoryBuffer *mb, std::vector<const Entry *>&);
@ -107,6 +111,8 @@ void YAML::parse(llvm::MemoryBuffer *mb, std::vector<const Entry *> &entries) {
int depth = 0;
bool nextKeyIsStartOfDocument = false;
bool nextKeyIsStartOfSequence = false;
std::vector<uint8_t>* sequenceBytes = NULL;
unsigned contentByte = 0;
for (const char *s = mb->getBufferStart(); s < mb->getBufferEnd(); ++s) {
char c = *s;
if (c == '\n')
@ -204,7 +210,7 @@ void YAML::parse(llvm::MemoryBuffer *mb, std::vector<const Entry *> &entries) {
*p++ = c;
state = inValue;
} else if (c == '\n') {
entries.push_back(new Entry(key, "", depth,
entries.push_back(new Entry(key, "", NULL, depth,
nextKeyIsStartOfDocument,
nextKeyIsStartOfSequence));
nextKeyIsStartOfSequence = false;
@ -212,6 +218,8 @@ void YAML::parse(llvm::MemoryBuffer *mb, std::vector<const Entry *> &entries) {
state = inDocument;
depth = 0;
} else if (c == '[') {
contentByte = 0;
sequenceBytes = new std::vector<uint8_t>();
state = inValueSequence;
} else if (c == ' ') {
// eat space
@ -226,7 +234,7 @@ void YAML::parse(llvm::MemoryBuffer *mb, std::vector<const Entry *> &entries) {
*p++ = c;
} else if (c == '\n') {
*p = '\0';
entries.push_back(new Entry(key, value, depth,
entries.push_back(new Entry(key, value, NULL, depth,
nextKeyIsStartOfDocument,
nextKeyIsStartOfSequence));
nextKeyIsStartOfSequence = false;
@ -236,11 +244,33 @@ void YAML::parse(llvm::MemoryBuffer *mb, std::vector<const Entry *> &entries) {
}
break;
case inValueSequence:
if (c == ']')
if (c == ']') {
sequenceBytes->push_back(contentByte);
state = inValueSequenceEnd;
}
else if (c == ' ') {
// eat white space
}
else if (c == ',') {
sequenceBytes->push_back(contentByte);
}
else if ( isdigit(c) ) {
contentByte = (contentByte << 4) | (c-'0');
}
else if ( ('a' <= tolower(c)) && (tolower(c) <= 'f') ) {
contentByte = (contentByte << 4) | (tolower(c)-'a'+10);
}
else {
llvm::report_fatal_error("non-hex digit found in content [ ]");
}
break;
case inValueSequenceEnd:
if (c == '\n') {
entries.push_back(new Entry(key, NULL, sequenceBytes, depth,
nextKeyIsStartOfDocument,
nextKeyIsStartOfSequence));
nextKeyIsStartOfSequence = false;
nextKeyIsStartOfDocument = false;
state = inDocument;
depth = 0;
}
@ -296,11 +326,13 @@ public:
, YAMLFile& f
, const char *n
, const char* sn
, uint64_t sz)
, uint64_t sz
, std::vector<uint8_t>* c)
: Atom(ord, d, s, ct, sc, intn, md, ah, dsk, tb, al, a)
, _file(f)
, _name(n)
, _sectionName(sn)
, _content(c)
, _size(sz)
, _refStartIndex(f._lastRefIndex)
, _refEndIndex(f._references.size()) {
@ -320,7 +352,7 @@ public:
}
virtual llvm::StringRef customSectionName() const {
return _sectionName;
return (_sectionName ? _sectionName : llvm::StringRef());
}
virtual uint64_t objectAddress() const {
@ -328,19 +360,26 @@ public:
}
virtual uint64_t size() const {
return _size;
return (_content ? _content->size() : _size);
}
virtual void copyRawContent(uint8_t buffer[]) const { }
llvm::ArrayRef<uint8_t> rawContent() const {
if ( _content != NULL )
return llvm::ArrayRef<uint8_t>(*_content);
else
return llvm::ArrayRef<uint8_t>();
}
virtual Reference::iterator referencesBegin() const;
virtual Reference::iterator referencesEnd() const;
private:
YAMLFile& _file;
const char * _name;
const char * _sectionName;
unsigned long _size;
unsigned int _refStartIndex;
unsigned int _refEndIndex;
YAMLFile& _file;
const char * _name;
const char * _sectionName;
std::vector<uint8_t>* _content;
unsigned long _size;
unsigned int _refStartIndex;
unsigned int _refEndIndex;
};
Reference::iterator YAMLAtom::referencesBegin() const {
@ -384,6 +423,7 @@ public:
bool _alias;
bool _autoHide;
const char *_sectionName;
std::vector<uint8_t>* _content;
Reference _ref;
};
@ -395,13 +435,15 @@ YAMLAtomState::YAMLAtomState()
, _type(KeyValues::contentTypeDefault)
, _scope(KeyValues::scopeDefault)
, _def(KeyValues::definitionDefault)
, _sectionChoice(KeyValues::sectionChoiceDefault)
, _internalName(KeyValues::internalNameDefault)
, _mergeDuplicates(KeyValues::mergeDuplicatesDefault)
, _deadStrip(KeyValues::deadStripKindDefault)
, _thumb(KeyValues::isThumbDefault)
, _alias(KeyValues::isAliasDefault)
, _autoHide(KeyValues::autoHideDefault)
, _sectionName(NULL) {
, _sectionName(NULL)
, _content(NULL) {
_ref.target = NULL;
_ref.addend = 0;
_ref.offsetInAtom = 0;
@ -413,7 +455,7 @@ void YAMLAtomState::makeAtom(YAMLFile& f) {
Atom *a = new YAMLAtom(_ordinal, _def, _scope, _type, _sectionChoice,
_internalName, _mergeDuplicates, _autoHide,
_deadStrip, _thumb, _alias, _align, f,
_name, _sectionName, _size);
_name, _sectionName, _size, _content);
f._atoms.push_back(a);
++_ordinal;
@ -433,6 +475,7 @@ void YAMLAtomState::makeAtom(YAMLFile& f) {
_alias = KeyValues::isAliasDefault;
_autoHide = KeyValues::autoHideDefault;
_sectionName = NULL;
_content = NULL;
_ref.target = NULL;
_ref.addend = 0;
_ref.offsetInAtom = 0;
@ -593,7 +636,7 @@ llvm::error_code parseObjectText( llvm::MemoryBuffer *mb
haveAtom = true;
}
else if (strcmp(entry->key, KeyValues::contentKeyword) == 0) {
// TO DO: switch to content mode
atomState._content = entry->valueSequenceBytes;
haveAtom = true;
}
else if (strcmp(entry->key, "align2") == 0) {

View File

@ -15,6 +15,8 @@
#include "lld/Core/Reference.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/system_error.h"
@ -141,6 +143,24 @@ public:
}
if ( atom.contentType() != Atom::typeZeroFill ) {
_out << " "
<< KeyValues::contentKeyword
<< ":"
<< spacePadding(KeyValues::contentKeyword)
<< "[ ";
llvm::ArrayRef<uint8_t> arr = atom.rawContent();
bool needComma = false;
for (unsigned int i=0; i < arr.size(); ++i) {
if ( needComma )
_out << ", ";
_out << hexdigit(arr[i] >> 4);
_out << hexdigit(arr[i] & 0x0F);
needComma = true;
}
_out << " ]\n";
}
if (atom.referencesBegin() != atom.referencesEnd()) {
_out << " fixups:\n";
for (Reference::iterator it = atom.referencesBegin(),
@ -160,7 +180,12 @@ private:
return &spaces[strlen(key)];
}
char hexdigit(uint8_t nibble) {
if ( nibble < 0x0A )
return '0' + nibble;
else
return 'A' + nibble - 0x0A;
}
llvm::raw_ostream& _out;
bool _firstAtom;

View File

@ -0,0 +1,45 @@
# RUN: lld-core %s | FileCheck %s
#
# Test that duplicate c-strings are coalesced
#
---
atoms:
- name: L0
internal-name: true
scope: hidden
type: c-string
merge-duplicates: true
content: [ 68, 65, 6c, 6c, 6f, 00 ]
- name: L1
internal-name: true
scope: hidden
type: c-string
merge-duplicates: true
content: [ 74, 68, 65, 72, 65, 00 ]
---
atoms:
- name: L2
internal-name: true
scope: hidden
type: c-string
merge-duplicates: true
content: [ 68, 65, 6c, 6c, 6f, 00 ]
---
atoms:
- name: L2
internal-name: true
scope: hidden
type: c-string
merge-duplicates: true
content: [ 74, 68, 65, 72, 65, 00 ]
...
# CHECK: type: c-string
# CHECK: content: [ 68, 65, 6C, 6C, 6F, 00 ]
# CHECK: type: c-string
# CHECK: content: [ 74, 68, 65, 72, 65, 00 ]
# CHECK-NOT: name:
# CHECK: ...