diff options
| author | Maxime Coste <mawww@kakoune.org> | 2024-02-06 21:57:17 +1100 |
|---|---|---|
| committer | Maxime Coste <mawww@kakoune.org> | 2024-02-06 21:57:17 +1100 |
| commit | 04a96b059faac8100a291e56bfbdb1962d53d4e1 (patch) | |
| tree | 82191cd45870634637c4a7ee0f437d5fd52111e3 | |
| parent | 53d9b9b67650a2b34345d9153bef2a01cb75c418 (diff) | |
Use different hash algorithms for strings and file hashing
For hash map, using fnv1a is faster as it is a much simpler algorithm
we can afford to inline. For files murmur3 should win as it processes
bytes 4 by 4.
| -rw-r--r-- | src/buffer_utils.cc | 2 | ||||
| -rw-r--r-- | src/client.cc | 2 | ||||
| -rw-r--r-- | src/file.cc | 2 | ||||
| -rw-r--r-- | src/hash.cc | 10 | ||||
| -rw-r--r-- | src/hash.hh | 14 | ||||
| -rw-r--r-- | src/string.hh | 2 |
6 files changed, 22 insertions, 10 deletions
diff --git a/src/buffer_utils.cc b/src/buffer_utils.cc index 21ec7092..be255e4a 100644 --- a/src/buffer_utils.cc +++ b/src/buffer_utils.cc @@ -140,7 +140,7 @@ decltype(auto) parse_file(StringView filename, Func&& func) const bool crlf = has_crlf and not has_lf; auto eolformat = crlf ? EolFormat::Crlf : EolFormat::Lf; - FsStatus fs_status{file.st.st_mtim, file.st.st_size, hash_data(file.data, file.st.st_size)}; + FsStatus fs_status{file.st.st_mtim, file.st.st_size, murmur3(file.data, file.st.st_size)}; return func(parse_lines(pos, end, eolformat), bom, eolformat, fs_status); } diff --git a/src/client.cc b/src/client.cc index ae8f83a8..20123c2f 100644 --- a/src/client.cc +++ b/src/client.cc @@ -394,7 +394,7 @@ void Client::check_if_buffer_needs_reloading() return; if (MappedFile fd{filename}; - fd.st.st_size == status.file_size and hash_data(fd.data, fd.st.st_size) == status.hash) + fd.st.st_size == status.file_size and murmur3(fd.data, fd.st.st_size) == status.hash) return; if (reload == Autoreload::Ask) diff --git a/src/file.cc b/src/file.cc index 87112e24..56d7b027 100644 --- a/src/file.cc +++ b/src/file.cc @@ -626,7 +626,7 @@ FsStatus get_fs_status(StringView filename) { MappedFile fd{filename}; - return {fd.st.st_mtim, fd.st.st_size, hash_data(fd.data, fd.st.st_size)}; + return {fd.st.st_mtim, fd.st.st_size, murmur3(fd.data, fd.st.st_size)}; } String get_kak_binary_path() diff --git a/src/hash.cc b/src/hash.cc index 567d8733..3b9fb018 100644 --- a/src/hash.cc +++ b/src/hash.cc @@ -27,8 +27,8 @@ static inline uint32_t fmix(uint32_t h) return h; } -// murmur3 hash, based on https://github.com/PeterScott/murmur3 -size_t hash_data(const char* input, size_t len) +// based on https://github.com/PeterScott/murmur3 +size_t murmur3(const char* input, size_t len) { const uint8_t* data = reinterpret_cast<const uint8_t*>(input); uint32_t hash = 0x1235678; @@ -73,13 +73,13 @@ size_t hash_data(const char* input, size_t len) UnitTest test_murmur_hash{[] { { constexpr char data[] = "Hello, World!"; - kak_assert(hash_data(data, strlen(data)) == 0xf816f95b); + kak_assert(murmur3(data, strlen(data)) == 0xf816f95b); } { constexpr char data[] = "xxxxxxxxxxxxxxxxxxxxxxxxxxxx"; - kak_assert(hash_data(data, strlen(data)) == 3551113186); + kak_assert(murmur3(data, strlen(data)) == 3551113186); } - kak_assert(hash_data("", 0) == 2572747774); + kak_assert(murmur3("", 0) == 2572747774); }}; } diff --git a/src/hash.hh b/src/hash.hh index 8dcd719f..deed78f2 100644 --- a/src/hash.hh +++ b/src/hash.hh @@ -5,11 +5,23 @@ #include <utility> #include <cstddef> +#include <cstdint> namespace Kakoune { -size_t hash_data(const char* data, size_t len); +inline size_t fnv1a(const char* data, size_t len) +{ + constexpr uint32_t FNV_prime_32 = 16777619; + constexpr uint32_t offset_basis_32 = 2166136261; + + uint32_t hash_value = offset_basis_32; + for (size_t i = 0; i < len; ++i) + hash_value = (hash_value ^ data[i]) * FNV_prime_32; + return hash_value; +} + +size_t murmur3(const char* input, size_t len); template<typename Type> requires std::is_integral_v<Type> constexpr size_t hash_value(const Type& val) diff --git a/src/string.hh b/src/string.hh index b6b313a1..004be04c 100644 --- a/src/string.hh +++ b/src/string.hh @@ -21,7 +21,7 @@ public: friend constexpr size_t hash_value(const Type& str) { - return hash_data(str.data(), (int)str.length()); + return fnv1a(str.data(), (int)str.length()); } using iterator = CharType*; |
