Track the full (not mod the hash table size) hash value for each token.

This lets us find interesting properties of the hash distribution.

llvm-svn: 39056
This commit is contained in:
Chris Lattner 2006-10-26 05:12:31 +00:00
parent 05646c7f0e
commit 893f272c39
2 changed files with 17 additions and 3 deletions

View File

@ -179,7 +179,8 @@ IdentifierInfo &IdentifierTable::get(const char *NameStart,
const char *NameEnd) {
IdentifierBucket **TableArray = (IdentifierBucket**)TheTable;
unsigned Hash = HashString(NameStart, NameEnd) & (HASH_TABLE_SIZE-1);
unsigned FullHash = HashString(NameStart, NameEnd);
unsigned Hash = FullHash & (HASH_TABLE_SIZE-1);
unsigned Length = NameEnd-NameStart;
IdentifierBucket *IdentHead = TableArray[Hash];
@ -218,6 +219,7 @@ IdentifierInfo &IdentifierTable::get(const char *NameStart,
Identifier->TokInfo.IsPoisoned = false;
Identifier->TokInfo.IsOtherTargetMacro = false;
Identifier->TokInfo.FETokenInfo = 0;
Identifier->TokInfo.HashValue = FullHash;
// Copy the string information.
char *StrBuffer = (char*)(Identifier+1);
@ -350,8 +352,19 @@ void IdentifierTable::PrintStats() const {
MaxIdentifierLength = Id->TokInfo.getNameLength();
++NumIdentifiersInBucket;
}
if (NumIdentifiersInBucket > MaxBucketLength)
if (NumIdentifiersInBucket > MaxBucketLength) {
MaxBucketLength = NumIdentifiersInBucket;
#if 0 // This code can be enabled to see (with -stats) a sample of some of the
// longest buckets in the hash table. Useful for inspecting density of
// buckets etc.
std::cerr << "Bucket length " << MaxBucketLength << ":\n";
for (IdentifierBucket *Id = TableArray[i]; Id; Id = Id->Next) {
std::cerr << " " << Id->TokInfo.getName() << " hash = "
<< Id->TokInfo.HashValue << "\n";
}
#endif
}
if (NumIdentifiersInBucket == 0)
++NumEmptyBuckets;

View File

@ -30,7 +30,7 @@ namespace clang {
/// variable or function name). The preprocessor keeps this information in a
/// set, and all tok::identifier tokens have a pointer to one of these.
class IdentifierInfo {
unsigned NameLen; // String that is the identifier.
unsigned NameLen; // Length of the identifier string.
MacroInfo *Macro; // Set if this identifier is #define'd.
tok::TokenKind TokenID : 8; // Front-end token ID or tok::identifier.
tok::PPKeywordKind PPID : 5; // ID for preprocessor command like 'ifdef'.
@ -39,6 +39,7 @@ class IdentifierInfo {
bool IsPoisoned : 1; // True if identifier is poisoned.
bool IsOtherTargetMacro : 1; // True if ident is macro on another target.
void *FETokenInfo; // Managed by the language front-end.
unsigned HashValue; // The full (non-truncated) hash value.
friend class IdentifierTable;
public:
/// getName - Return the actual string for this identifier. The length of