Introduce til::linear_flat_set (#15089)

`til::linear_flat_set` is a primitive hash map with linear probing. The implementation is slightly complicated due to the use of templates. I've strongly considered just writing multiple copies of this class, by hand since the code is indeed fairly trivial but ended up deciding against it, because this templated approach makes testing easier. This class is in the order of 10x faster than `std::unordered_map`.
2023-04-04 19:50:10 +02:00 · 2023-04-04 19:50:10 +02:00 · 9dfdf2afa3
parent 2a839d8c5a
commit 9dfdf2afa3
6 changed files with 222 additions and 0 deletions
--- a/.github/actions/spelling/expect/expect.txt
+++ b/.github/actions/spelling/expect/expect.txt
@ -1124,6 +1124,7 @@ Mip
 MMBB
 mmcc
 MMCPL
+MMIX
 mmsystem
 MNC
 MNOPQ
--- a/src/inc/til/flat_set.h
+++ b/src/inc/til/flat_set.h
@ -0,0 +1,141 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+#pragma once
+
+#pragma warning(push)
+#pragma warning(disable : 26446) // Prefer to use gsl::at() instead of unchecked subscript operator (bounds.4).
+#pragma warning(disable : 26409) // Avoid calling new and delete explicitly, use std::make_unique<T> instead (r.11).
+
+namespace til
+{
+    // A simple hash function for simple hash maps.
+    // As demonstrated in https://doi.org/10.14778/2850583.2850585, a simple "multiply and shift" hash performs
+    // very well with linear probing hash maps and I found this to be true as well in my own testing. This hash
+    // function doesn't do the "shift" part, because linear_flat_set already does it by an appropriate amount.
+    constexpr size_t flat_set_hash_integer(size_t v) noexcept
+    {
+        // These two multipliers are the same as used by the PCG family of random number generators.
+        // The 32-Bit version is described in https://doi.org/10.1090/S0025-5718-99-00996-5, Table 5.
+        // The 64-Bit version is the multiplier as used by Donald Knuth for MMIX and found by C. E. Haynes.
+#ifdef _WIN64
+        return v * UINT64_C(6364136223846793005);
+#else
+        return v * UINT32_C(747796405);
+#endif
+    }
+
+    // A basic, hashmap with linear probing. A `LoadFactor` of 2 equals
+    // a max. load of roughly 50% and a `LoadFactor` of 4 roughly 25%.
+    //
+    // It performs best with:
+    // * small and cheap T
+    // * >= 50% successful lookups
+    // * <= 50% load factor (LoadFactor >= 2, which is the minimum anyways)
+    template<typename T, size_t LoadFactor = 2>
+    struct linear_flat_set
+    {
+        static_assert(LoadFactor >= 2);
+
+        bool empty() const noexcept
+        {
+            return _load == 0;
+        }
+
+        size_t size() const noexcept
+        {
+            return _load / LoadFactor;
+        }
+
+        std::span<T> container() const noexcept
+        {
+            return { _map.get(), _capacity };
+        }
+
+        template<typename U>
+        std::pair<T&, bool> insert(U&& key)
+        {
+            // Putting this into the lookup path is a little pessimistic, but it
+            // allows us to default-construct this hashmap with a size of 0.
+            if (_load >= _capacity) [[unlikely]]
+            {
+                _bumpSize();
+            }
+
+            // The most common, basic and performant hash function is to multiply the value
+            // by some prime number and divide by the number of slots. It's been shown
+            // many times in literature that such a scheme performs the best on average.
+            // As such, we perform the divide here to get the topmost bits down.
+            // See flat_set_hash_integer.
+            const auto hash = ::std::hash<T>{}(key) >> _shift;
+
+            for (auto i = hash;; ++i)
+            {
+                auto& slot = _map[i & _mask];
+                if (!slot)
+                {
+                    slot = std::forward<U>(key);
+                    _load += LoadFactor;
+                    return { slot, true };
+                }
+                if (slot == key) [[likely]]
+                {
+                    return { slot, false };
+                }
+            }
+        }
+
+    private:
+        __declspec(noinline) void _bumpSize()
+        {
+            // A _shift of 0 would result in a newShift of 0xfffff...
+            // A _shift of 1 would result in a newCapacity of 0
+            if (_shift < 2)
+            {
+                throw std::bad_array_new_length{};
+            }
+
+            const auto newShift = _shift - 1;
+            const auto newCapacity = size_t{ 1 } << (digits - newShift);
+            const auto newMask = newCapacity - 1;
+            auto newMap = std::make_unique<T[]>(newCapacity);
+
+            // This mirrors the insert() function, but without the lookup part.
+            for (auto& oldSlot : container())
+            {
+                if (!oldSlot)
+                {
+                    continue;
+                }
+
+                const auto hash = ::std::hash<T>{}(oldSlot) >> newShift;
+
+                for (auto i = hash;; ++i)
+                {
+                    auto& slot = newMap[i & newMask];
+                    if (!slot)
+                    {
+                        slot = std::move_if_noexcept(oldSlot);
+                        break;
+                    }
+                }
+            }
+
+            _map = std::move(newMap);
+            _capacity = newCapacity;
+            _shift = newShift;
+            _mask = newMask;
+        }
+
+        static constexpr auto digits = std::numeric_limits<size_t>::digits;
+
+        std::unique_ptr<T[]> _map;
+        size_t _capacity = 0;
+        size_t _load = 0;
+        // This results in an initial capacity of 8 items, independent of the LoadFactor.
+        size_t _shift = digits - LoadFactor - 1;
+        size_t _mask = 0;
+    };
+}
+
+#pragma warning(pop)
--- a/src/til/ut_til/FlatSetTests.cpp
+++ b/src/til/ut_til/FlatSetTests.cpp
@ -0,0 +1,68 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+#include "precomp.h"
+
+#include <til/flat_set.h>
+
+using namespace WEX::Common;
+using namespace WEX::Logging;
+using namespace WEX::TestExecution;
+
+struct Data
+{
+    static constexpr auto emptyMarker = std::numeric_limits<size_t>::max();
+
+    constexpr operator bool() const noexcept
+    {
+        return value != emptyMarker;
+    }
+
+    constexpr bool operator==(int key) const noexcept
+    {
+        return value == static_cast<size_t>(key);
+    }
+
+    constexpr Data& operator=(int key) noexcept
+    {
+        value = static_cast<size_t>(key);
+        return *this;
+    }
+
+    size_t value = emptyMarker;
+};
+
+template<>
+struct ::std::hash<Data>
+{
+    constexpr size_t operator()(int key) const noexcept
+    {
+        return til::flat_set_hash_integer(static_cast<size_t>(key));
+    }
+
+    constexpr size_t operator()(Data d) const noexcept
+    {
+        return til::flat_set_hash_integer(d.value);
+    }
+};
+
+class FlatSetTests
+{
+    TEST_CLASS(FlatSetTests);
+
+    TEST_METHOD(Basic)
+    {
+        til::linear_flat_set<Data> set;
+
+        // This simultaneously demonstrates how the class can't just do "heterogeneous lookups"
+        // like STL does, but also insert items with a different type.
+        const auto [entry1, inserted1] = set.insert(123);
+        VERIFY_IS_TRUE(inserted1);
+
+        const auto [entry2, inserted2] = set.insert(123);
+        VERIFY_IS_FALSE(inserted2);
+
+        VERIFY_ARE_EQUAL(&entry1, &entry2);
+        VERIFY_ARE_EQUAL(123u, entry2.value);
+    }
+};
--- a/src/til/ut_til/til.unit.tests.vcxproj
+++ b/src/til/ut_til/til.unit.tests.vcxproj
@ -20,6 +20,7 @@
    <ClCompile Include="ColorTests.cpp" />
    <ClCompile Include="EnumSetTests.cpp" />
    <ClCompile Include="EnvTests.cpp" />
+    <ClCompile Include="FlatSetTests.cpp" />
    <ClCompile Include="GenerationalTests.cpp" />
    <ClCompile Include="HashTests.cpp" />
    <ClCompile Include="MathTests.cpp" />
--- a/src/til/ut_til/til.unit.tests.vcxproj.filters
+++ b/src/til/ut_til/til.unit.tests.vcxproj.filters
@ -29,6 +29,7 @@
    <ClCompile Include="EnvTests.cpp" />
    <ClCompile Include="UnicodeTests.cpp" />
    <ClCompile Include="GenerationalTests.cpp" />
+    <ClCompile Include="FlatSetTests.cpp" />
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="..\precomp.h" />
--- a/tools/ConsoleTypes.natvis
+++ b/tools/ConsoleTypes.natvis
@ -112,4 +112,14 @@
            <ExpandedItem>_value</ExpandedItem>
        </Expand>
    </Type>
+
+    <Type Name="til::linear_flat_set&lt;*,*&gt;">
+        <DisplayString>{{ size={_load / $T2} }}</DisplayString>
+        <Expand>
+            <ArrayItems>
+                <Size>_capacity</Size>
+                <ValuePointer>_map._Mypair._Myval2</ValuePointer>
+            </ArrayItems>
+        </Expand>
+    </Type>
 </AutoVisualizer>