[modules] Use `HashBuilder` and `MD5` for the module hash.

Per the comments, `hash_code` values "are not stable to save or
persist", so are unsuitable for the module hash, which must persist
across compilations for the implicit module hashes to match. Note that
in practice, today, `hash_code` are stable. But this is an
implementation detail, with a clear `FIXME` indicating we should switch
to a per-execution seed.

The stability of `MD5` also allows modules cross-compilation use-cases.
The `size_t` underlying storage for `hash_code` varying across platforms
could cause mismatching hashes when cross-compiling from a 64bit
target to a 32bit target.

Note that native endianness is still used for the hash computation. So hashes
will differ between platforms of different endianness.

Reviewed By: jansvoboda11

Differential Revision: https://reviews.llvm.org/D102943
This commit is contained in:
Alexandre Rames 2021-09-03 11:12:27 -07:00
parent 830c0b9023
commit 655bea4226
10 changed files with 103 additions and 89 deletions

View File

@ -18,6 +18,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/HashBuilder.h"
#include "llvm/Support/VersionTuple.h"
#include <string>
@ -480,6 +481,12 @@ public:
friend llvm::hash_code hash_value(const ObjCRuntime &OCR) {
return llvm::hash_combine(OCR.getKind(), OCR.getVersion());
}
template <typename HasherT, llvm::support::endianness Endianness>
friend void addHash(llvm::HashBuilderImpl<HasherT, Endianness> &HBuilder,
const ObjCRuntime &OCR) {
HBuilder.add(OCR.getKind(), OCR.getVersion());
}
};
raw_ostream &operator<<(raw_ostream &out, const ObjCRuntime &value);

View File

@ -16,6 +16,7 @@
#include "clang/Basic/LLVM.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/HashBuilder.h"
#include "llvm/Transforms/Instrumentation/AddressSanitizerOptions.h"
#include <cassert>
#include <cstdint>
@ -72,6 +73,12 @@ public:
llvm::hash_code hash_value() const;
template <typename HasherT, llvm::support::endianness Endianness>
friend void addHash(llvm::HashBuilderImpl<HasherT, Endianness> &HBuilder,
const SanitizerMask &SM) {
HBuilder.addRange(&SM.maskLoToHigh[0], &SM.maskLoToHigh[kNumElem]);
}
constexpr explicit operator bool() const {
return maskLoToHigh[0] || maskLoToHigh[1];
}

View File

@ -14,10 +14,11 @@
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/HashBuilder.h"
#include <cstdint>
#include <map>
#include <string>
#include <vector>
#include <map>
namespace clang {
@ -256,11 +257,23 @@ inline llvm::hash_code hash_value(const HeaderSearchOptions::Entry &E) {
return llvm::hash_combine(E.Path, E.Group, E.IsFramework, E.IgnoreSysRoot);
}
template <typename HasherT, llvm::support::endianness Endianness>
inline void addHash(llvm::HashBuilderImpl<HasherT, Endianness> &HBuilder,
const HeaderSearchOptions::Entry &E) {
HBuilder.add(E.Path, E.Group, E.IsFramework, E.IgnoreSysRoot);
}
inline llvm::hash_code
hash_value(const HeaderSearchOptions::SystemHeaderPrefix &SHP) {
return llvm::hash_combine(SHP.Prefix, SHP.IsSystemHeader);
}
template <typename HasherT, llvm::support::endianness Endianness>
inline void addHash(llvm::HashBuilderImpl<HasherT, Endianness> &HBuilder,
const HeaderSearchOptions::SystemHeaderPrefix &SHP) {
HBuilder.add(SHP.Prefix, SHP.IsSystemHeader);
}
} // namespace clang
#endif // LLVM_CLANG_LEX_HEADERSEARCHOPTIONS_H

View File

@ -11,13 +11,14 @@
#include "llvm/ADT/IntrusiveRefCntPtr.h"
#include "llvm/Support/ExtensibleRTTI.h"
#include "llvm/Support/HashBuilder.h"
#include "llvm/Support/MD5.h"
#include <memory>
#include <string>
namespace llvm {
class BitstreamCursor;
class BitstreamWriter;
class hash_code;
class raw_ostream;
}
@ -74,19 +75,20 @@ public:
virtual ModuleFileExtensionMetadata getExtensionMetadata() const = 0;
/// Hash information about the presence of this extension into the
/// module hash code.
/// module hash.
///
/// The module hash code is used to distinguish different variants
/// of a module that are incompatible. If the presence, absence, or
/// version of the module file extension should force the creation
/// of a separate set of module files, override this method to
/// combine that distinguishing information into the module hash
/// code.
/// The module hash is used to distinguish different variants of a module that
/// are incompatible. If the presence, absence, or version of the module file
/// extension should force the creation of a separate set of module files,
/// override this method to combine that distinguishing information into the
/// module hash.
///
/// The default implementation of this function simply returns the
/// hash code as given, so the presence/absence of this extension
/// does not distinguish module files.
virtual llvm::hash_code hashExtension(llvm::hash_code c) const;
/// The default implementation of this function simply does nothing, so the
/// presence/absence of this extension does not distinguish module files.
using ExtensionHashBuilder =
llvm::HashBuilderImpl<llvm::MD5,
llvm::support::endian::system_endianness()>;
virtual void hashExtension(ExtensionHashBuilder &HBuilder) const;
/// Create a new module file extension writer, which will be
/// responsible for writing the extension contents into a particular

View File

@ -78,6 +78,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/HashBuilder.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/MemoryBuffer.h"
@ -4476,116 +4477,99 @@ bool CompilerInvocation::CreateFromArgs(CompilerInvocation &Invocation,
}
std::string CompilerInvocation::getModuleHash() const {
// FIXME: Consider using SHA1 instead of MD5.
llvm::HashBuilder<llvm::MD5, llvm::support::endianness::native> HBuilder;
// Note: For QoI reasons, the things we use as a hash here should all be
// dumped via the -module-info flag.
using llvm::hash_code;
using llvm::hash_value;
using llvm::hash_combine;
using llvm::hash_combine_range;
// Start the signature with the compiler version.
// FIXME: We'd rather use something more cryptographically sound than
// CityHash, but this will do for now.
hash_code code = hash_value(getClangFullRepositoryVersion());
HBuilder.add(getClangFullRepositoryVersion());
// Also include the serialization version, in case LLVM_APPEND_VC_REV is off
// and getClangFullRepositoryVersion() doesn't include git revision.
code = hash_combine(code, serialization::VERSION_MAJOR,
serialization::VERSION_MINOR);
HBuilder.add(serialization::VERSION_MAJOR, serialization::VERSION_MINOR);
// Extend the signature with the language options
#define LANGOPT(Name, Bits, Default, Description) \
code = hash_combine(code, LangOpts->Name);
#define ENUM_LANGOPT(Name, Type, Bits, Default, Description) \
code = hash_combine(code, static_cast<unsigned>(LangOpts->get##Name()));
#define LANGOPT(Name, Bits, Default, Description) HBuilder.add(LangOpts->Name);
#define ENUM_LANGOPT(Name, Type, Bits, Default, Description) \
HBuilder.add(static_cast<unsigned>(LangOpts->get##Name()));
#define BENIGN_LANGOPT(Name, Bits, Default, Description)
#define BENIGN_ENUM_LANGOPT(Name, Type, Bits, Default, Description)
#include "clang/Basic/LangOptions.def"
for (StringRef Feature : LangOpts->ModuleFeatures)
code = hash_combine(code, Feature);
HBuilder.addRange(LangOpts->ModuleFeatures);
code = hash_combine(code, LangOpts->ObjCRuntime);
const auto &BCN = LangOpts->CommentOpts.BlockCommandNames;
code = hash_combine(code, hash_combine_range(BCN.begin(), BCN.end()));
HBuilder.add(LangOpts->ObjCRuntime);
HBuilder.addRange(LangOpts->CommentOpts.BlockCommandNames);
// Extend the signature with the target options.
code = hash_combine(code, TargetOpts->Triple, TargetOpts->CPU,
TargetOpts->TuneCPU, TargetOpts->ABI);
for (const auto &FeatureAsWritten : TargetOpts->FeaturesAsWritten)
code = hash_combine(code, FeatureAsWritten);
HBuilder.add(TargetOpts->Triple, TargetOpts->CPU, TargetOpts->TuneCPU,
TargetOpts->ABI);
HBuilder.addRange(TargetOpts->FeaturesAsWritten);
// Extend the signature with preprocessor options.
const PreprocessorOptions &ppOpts = getPreprocessorOpts();
const HeaderSearchOptions &hsOpts = getHeaderSearchOpts();
code = hash_combine(code, ppOpts.UsePredefines, ppOpts.DetailedRecord);
HBuilder.add(ppOpts.UsePredefines, ppOpts.DetailedRecord);
for (const auto &I : getPreprocessorOpts().Macros) {
const HeaderSearchOptions &hsOpts = getHeaderSearchOpts();
for (const auto &Macro : getPreprocessorOpts().Macros) {
// If we're supposed to ignore this macro for the purposes of modules,
// don't put it into the hash.
if (!hsOpts.ModulesIgnoreMacros.empty()) {
// Check whether we're ignoring this macro.
StringRef MacroDef = I.first;
StringRef MacroDef = Macro.first;
if (hsOpts.ModulesIgnoreMacros.count(
llvm::CachedHashString(MacroDef.split('=').first)))
continue;
}
code = hash_combine(code, I.first, I.second);
HBuilder.add(Macro);
}
// Extend the signature with the sysroot and other header search options.
code = hash_combine(code, hsOpts.Sysroot,
hsOpts.ModuleFormat,
hsOpts.UseDebugInfo,
hsOpts.UseBuiltinIncludes,
hsOpts.UseStandardSystemIncludes,
hsOpts.UseStandardCXXIncludes,
hsOpts.UseLibcxx,
hsOpts.ModulesValidateDiagnosticOptions);
code = hash_combine(code, hsOpts.ResourceDir);
HBuilder.add(hsOpts.Sysroot, hsOpts.ModuleFormat, hsOpts.UseDebugInfo,
hsOpts.UseBuiltinIncludes, hsOpts.UseStandardSystemIncludes,
hsOpts.UseStandardCXXIncludes, hsOpts.UseLibcxx,
hsOpts.ModulesValidateDiagnosticOptions);
HBuilder.add(hsOpts.ResourceDir);
if (hsOpts.ModulesStrictContextHash) {
hash_code SHPC = hash_combine_range(hsOpts.SystemHeaderPrefixes.begin(),
hsOpts.SystemHeaderPrefixes.end());
hash_code UEC = hash_combine_range(hsOpts.UserEntries.begin(),
hsOpts.UserEntries.end());
code = hash_combine(code, hsOpts.SystemHeaderPrefixes.size(), SHPC,
hsOpts.UserEntries.size(), UEC);
HBuilder.addRange(hsOpts.SystemHeaderPrefixes);
HBuilder.addRange(hsOpts.UserEntries);
const DiagnosticOptions &diagOpts = getDiagnosticOpts();
#define DIAGOPT(Name, Bits, Default) \
code = hash_combine(code, diagOpts.Name);
#define ENUM_DIAGOPT(Name, Type, Bits, Default) \
code = hash_combine(code, diagOpts.get##Name());
#include "clang/Basic/DiagnosticOptions.def"
#undef DIAGOPT
#undef ENUM_DIAGOPT
#define DIAGOPT(Name, Bits, Default) HBuilder.add(diagOpts.Name);
#define ENUM_DIAGOPT(Name, Type, Bits, Default) \
HBuilder.add(diagOpts.get##Name());
#include "clang/Basic/DiagnosticOptions.def"
#undef DIAGOPT
#undef ENUM_DIAGOPT
}
// Extend the signature with the user build path.
code = hash_combine(code, hsOpts.ModuleUserBuildPath);
HBuilder.add(hsOpts.ModuleUserBuildPath);
// Extend the signature with the module file extensions.
const FrontendOptions &frontendOpts = getFrontendOpts();
for (const auto &ext : frontendOpts.ModuleFileExtensions) {
code = ext->hashExtension(code);
}
for (const auto &ext : getFrontendOpts().ModuleFileExtensions)
ext->hashExtension(HBuilder);
// When compiling with -gmodules, also hash -fdebug-prefix-map as it
// affects the debug info in the PCM.
if (getCodeGenOpts().DebugTypeExtRefs)
for (const auto &KeyValue : getCodeGenOpts().DebugPrefixMap)
code = hash_combine(code, KeyValue.first, KeyValue.second);
HBuilder.addRange(getCodeGenOpts().DebugPrefixMap);
// Extend the signature with the enabled sanitizers, if at least one is
// enabled. Sanitizers which cannot affect AST generation aren't hashed.
SanitizerSet SanHash = LangOpts->Sanitize;
SanHash.clear(getPPTransparentSanitizers());
if (!SanHash.empty())
code = hash_combine(code, SanHash.Mask);
HBuilder.add(SanHash.Mask);
return toString(llvm::APInt(64, code), 36, /*Signed=*/false);
llvm::MD5::MD5Result Result;
HBuilder.getHasher().final(Result);
uint64_t Hash = Result.high() ^ Result.low();
return toString(llvm::APInt(64, Hash), 36, /*Signed=*/false);
}
void CompilerInvocation::generateCC1CommandLine(

View File

@ -93,16 +93,14 @@ TestModuleFileExtension::getExtensionMetadata() const {
return { BlockName, MajorVersion, MinorVersion, UserInfo };
}
llvm::hash_code TestModuleFileExtension::hashExtension(
llvm::hash_code Code) const {
void TestModuleFileExtension::hashExtension(
ExtensionHashBuilder &HBuilder) const {
if (Hashed) {
Code = llvm::hash_combine(Code, BlockName);
Code = llvm::hash_combine(Code, MajorVersion);
Code = llvm::hash_combine(Code, MinorVersion);
Code = llvm::hash_combine(Code, UserInfo);
HBuilder.add(BlockName);
HBuilder.add(MajorVersion);
HBuilder.add(MinorVersion);
HBuilder.add(UserInfo);
}
return Code;
}
std::unique_ptr<ModuleFileExtensionWriter>

View File

@ -55,7 +55,7 @@ public:
ModuleFileExtensionMetadata getExtensionMetadata() const override;
llvm::hash_code hashExtension(llvm::hash_code Code) const override;
void hashExtension(ExtensionHashBuilder &HBuilder) const override;
std::unique_ptr<ModuleFileExtensionWriter>
createExtensionWriter(ASTWriter &Writer) override;

View File

@ -11,12 +11,10 @@ using namespace clang;
char ModuleFileExtension::ID = 0;
ModuleFileExtension::~ModuleFileExtension() { }
ModuleFileExtension::~ModuleFileExtension() {}
llvm::hash_code ModuleFileExtension::hashExtension(llvm::hash_code Code) const {
return Code;
}
void ModuleFileExtension::hashExtension(ExtensionHashBuilder &HBuilder) const {}
ModuleFileExtensionWriter::~ModuleFileExtensionWriter() { }
ModuleFileExtensionWriter::~ModuleFileExtensionWriter() {}
ModuleFileExtensionReader::~ModuleFileExtensionReader() { }
ModuleFileExtensionReader::~ModuleFileExtensionReader() {}

View File

@ -860,9 +860,7 @@ struct DummyModuleFileExtension
return {};
};
llvm::hash_code hashExtension(llvm::hash_code Code) const override {
return {};
}
void hashExtension(ExtensionHashBuilder &HBuilder) const override {}
std::unique_ptr<ModuleFileExtensionWriter>
createExtensionWriter(ASTWriter &Writer) override {

View File

@ -17,6 +17,7 @@
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/Optional.h"
#include "llvm/Support/HashBuilder.h"
#include <string>
#include <tuple>
@ -164,6 +165,12 @@ public:
return llvm::hash_combine(VT.Major, VT.Minor, VT.Subminor, VT.Build);
}
template <typename HasherT, llvm::support::endianness Endianness>
friend void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder,
const VersionTuple &VT) {
HBuilder.add(VT.Major, VT.Minor, VT.Subminor, VT.Build);
}
/// Retrieve a string representation of the version number.
std::string getAsString() const;