summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMaxime Coste <mawww@kakoune.org>2024-02-06 21:57:17 +1100
committerMaxime Coste <mawww@kakoune.org>2024-02-06 21:57:17 +1100
commit04a96b059faac8100a291e56bfbdb1962d53d4e1 (patch)
tree82191cd45870634637c4a7ee0f437d5fd52111e3
parent53d9b9b67650a2b34345d9153bef2a01cb75c418 (diff)
Use different hash algorithms for strings and file hashing
For hash map, using fnv1a is faster as it is a much simpler algorithm we can afford to inline. For files murmur3 should win as it processes bytes 4 by 4.
-rw-r--r--src/buffer_utils.cc2
-rw-r--r--src/client.cc2
-rw-r--r--src/file.cc2
-rw-r--r--src/hash.cc10
-rw-r--r--src/hash.hh14
-rw-r--r--src/string.hh2
6 files changed, 22 insertions, 10 deletions
diff --git a/src/buffer_utils.cc b/src/buffer_utils.cc
index 21ec7092..be255e4a 100644
--- a/src/buffer_utils.cc
+++ b/src/buffer_utils.cc
@@ -140,7 +140,7 @@ decltype(auto) parse_file(StringView filename, Func&& func)
const bool crlf = has_crlf and not has_lf;
auto eolformat = crlf ? EolFormat::Crlf : EolFormat::Lf;
- FsStatus fs_status{file.st.st_mtim, file.st.st_size, hash_data(file.data, file.st.st_size)};
+ FsStatus fs_status{file.st.st_mtim, file.st.st_size, murmur3(file.data, file.st.st_size)};
return func(parse_lines(pos, end, eolformat), bom, eolformat, fs_status);
}
diff --git a/src/client.cc b/src/client.cc
index ae8f83a8..20123c2f 100644
--- a/src/client.cc
+++ b/src/client.cc
@@ -394,7 +394,7 @@ void Client::check_if_buffer_needs_reloading()
return;
if (MappedFile fd{filename};
- fd.st.st_size == status.file_size and hash_data(fd.data, fd.st.st_size) == status.hash)
+ fd.st.st_size == status.file_size and murmur3(fd.data, fd.st.st_size) == status.hash)
return;
if (reload == Autoreload::Ask)
diff --git a/src/file.cc b/src/file.cc
index 87112e24..56d7b027 100644
--- a/src/file.cc
+++ b/src/file.cc
@@ -626,7 +626,7 @@ FsStatus get_fs_status(StringView filename)
{
MappedFile fd{filename};
- return {fd.st.st_mtim, fd.st.st_size, hash_data(fd.data, fd.st.st_size)};
+ return {fd.st.st_mtim, fd.st.st_size, murmur3(fd.data, fd.st.st_size)};
}
String get_kak_binary_path()
diff --git a/src/hash.cc b/src/hash.cc
index 567d8733..3b9fb018 100644
--- a/src/hash.cc
+++ b/src/hash.cc
@@ -27,8 +27,8 @@ static inline uint32_t fmix(uint32_t h)
return h;
}
-// murmur3 hash, based on https://github.com/PeterScott/murmur3
-size_t hash_data(const char* input, size_t len)
+// based on https://github.com/PeterScott/murmur3
+size_t murmur3(const char* input, size_t len)
{
const uint8_t* data = reinterpret_cast<const uint8_t*>(input);
uint32_t hash = 0x1235678;
@@ -73,13 +73,13 @@ size_t hash_data(const char* input, size_t len)
UnitTest test_murmur_hash{[] {
{
constexpr char data[] = "Hello, World!";
- kak_assert(hash_data(data, strlen(data)) == 0xf816f95b);
+ kak_assert(murmur3(data, strlen(data)) == 0xf816f95b);
}
{
constexpr char data[] = "xxxxxxxxxxxxxxxxxxxxxxxxxxxx";
- kak_assert(hash_data(data, strlen(data)) == 3551113186);
+ kak_assert(murmur3(data, strlen(data)) == 3551113186);
}
- kak_assert(hash_data("", 0) == 2572747774);
+ kak_assert(murmur3("", 0) == 2572747774);
}};
}
diff --git a/src/hash.hh b/src/hash.hh
index 8dcd719f..deed78f2 100644
--- a/src/hash.hh
+++ b/src/hash.hh
@@ -5,11 +5,23 @@
#include <utility>
#include <cstddef>
+#include <cstdint>
namespace Kakoune
{
-size_t hash_data(const char* data, size_t len);
+inline size_t fnv1a(const char* data, size_t len)
+{
+ constexpr uint32_t FNV_prime_32 = 16777619;
+ constexpr uint32_t offset_basis_32 = 2166136261;
+
+ uint32_t hash_value = offset_basis_32;
+ for (size_t i = 0; i < len; ++i)
+ hash_value = (hash_value ^ data[i]) * FNV_prime_32;
+ return hash_value;
+}
+
+size_t murmur3(const char* input, size_t len);
template<typename Type> requires std::is_integral_v<Type>
constexpr size_t hash_value(const Type& val)
diff --git a/src/string.hh b/src/string.hh
index b6b313a1..004be04c 100644
--- a/src/string.hh
+++ b/src/string.hh
@@ -21,7 +21,7 @@ public:
friend constexpr size_t hash_value(const Type& str)
{
- return hash_data(str.data(), (int)str.length());
+ return fnv1a(str.data(), (int)str.length());
}
using iterator = CharType*;