Track the full (not mod the hash table size) hash value for each token.
This lets us find interesting properties of the hash distribution. llvm-svn: 39056
This commit is contained in:
parent
05646c7f0e
commit
893f272c39
|
@ -179,7 +179,8 @@ IdentifierInfo &IdentifierTable::get(const char *NameStart,
|
|||
const char *NameEnd) {
|
||||
IdentifierBucket **TableArray = (IdentifierBucket**)TheTable;
|
||||
|
||||
unsigned Hash = HashString(NameStart, NameEnd) & (HASH_TABLE_SIZE-1);
|
||||
unsigned FullHash = HashString(NameStart, NameEnd);
|
||||
unsigned Hash = FullHash & (HASH_TABLE_SIZE-1);
|
||||
unsigned Length = NameEnd-NameStart;
|
||||
|
||||
IdentifierBucket *IdentHead = TableArray[Hash];
|
||||
|
@ -218,6 +219,7 @@ IdentifierInfo &IdentifierTable::get(const char *NameStart,
|
|||
Identifier->TokInfo.IsPoisoned = false;
|
||||
Identifier->TokInfo.IsOtherTargetMacro = false;
|
||||
Identifier->TokInfo.FETokenInfo = 0;
|
||||
Identifier->TokInfo.HashValue = FullHash;
|
||||
|
||||
// Copy the string information.
|
||||
char *StrBuffer = (char*)(Identifier+1);
|
||||
|
@ -350,8 +352,19 @@ void IdentifierTable::PrintStats() const {
|
|||
MaxIdentifierLength = Id->TokInfo.getNameLength();
|
||||
++NumIdentifiersInBucket;
|
||||
}
|
||||
if (NumIdentifiersInBucket > MaxBucketLength)
|
||||
if (NumIdentifiersInBucket > MaxBucketLength) {
|
||||
MaxBucketLength = NumIdentifiersInBucket;
|
||||
|
||||
#if 0 // This code can be enabled to see (with -stats) a sample of some of the
|
||||
// longest buckets in the hash table. Useful for inspecting density of
|
||||
// buckets etc.
|
||||
std::cerr << "Bucket length " << MaxBucketLength << ":\n";
|
||||
for (IdentifierBucket *Id = TableArray[i]; Id; Id = Id->Next) {
|
||||
std::cerr << " " << Id->TokInfo.getName() << " hash = "
|
||||
<< Id->TokInfo.HashValue << "\n";
|
||||
}
|
||||
#endif
|
||||
}
|
||||
if (NumIdentifiersInBucket == 0)
|
||||
++NumEmptyBuckets;
|
||||
|
||||
|
|
|
@ -30,7 +30,7 @@ namespace clang {
|
|||
/// variable or function name). The preprocessor keeps this information in a
|
||||
/// set, and all tok::identifier tokens have a pointer to one of these.
|
||||
class IdentifierInfo {
|
||||
unsigned NameLen; // String that is the identifier.
|
||||
unsigned NameLen; // Length of the identifier string.
|
||||
MacroInfo *Macro; // Set if this identifier is #define'd.
|
||||
tok::TokenKind TokenID : 8; // Front-end token ID or tok::identifier.
|
||||
tok::PPKeywordKind PPID : 5; // ID for preprocessor command like 'ifdef'.
|
||||
|
@ -39,6 +39,7 @@ class IdentifierInfo {
|
|||
bool IsPoisoned : 1; // True if identifier is poisoned.
|
||||
bool IsOtherTargetMacro : 1; // True if ident is macro on another target.
|
||||
void *FETokenInfo; // Managed by the language front-end.
|
||||
unsigned HashValue; // The full (non-truncated) hash value.
|
||||
friend class IdentifierTable;
|
||||
public:
|
||||
/// getName - Return the actual string for this identifier. The length of
|
||||
|
|
Loading…
Reference in New Issue