summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMaxime Coste <mawww@kakoune.org>2023-11-17 17:01:51 +1100
committerMaxime Coste <mawww@kakoune.org>2023-11-17 17:01:51 +1100
commit296ab1a1ffb707f4691cdce21172b3b8d0b48eb0 (patch)
tree6388c07b53a5cfda953b92c058a819684bcec67f /src
parentb10a935b8ceb18f256bd04843a5442d7c746abdb (diff)
Improve WordDB performance by precomputing hashes
Avoid multiple computation of string hashes by making it possible to pre-compute and pass hashes to interned strings and hash maps.
Diffstat (limited to 'src')
-rw-r--r--src/hash_map.hh10
-rw-r--r--src/shared_string.cc16
-rw-r--r--src/shared_string.hh7
-rw-r--r--src/word_db.cc11
4 files changed, 28 insertions, 16 deletions
diff --git a/src/hash_map.hh b/src/hash_map.hh
index f5bc9412..2f2f6042 100644
--- a/src/hash_map.hh
+++ b/src/hash_map.hh
@@ -194,9 +194,9 @@ struct HashMap
insert(*begin++);
}
- constexpr EffectiveValue& insert(Item item)
+ constexpr EffectiveValue& insert(Item item, size_t hash)
{
- const auto hash = hash_value(item_key(item));
+ kak_assert(hash == hash_value(item_key(item)));
if constexpr (not multi_key)
{
if (auto index = find_index(item_key(item), hash); index >= 0)
@@ -212,6 +212,11 @@ struct HashMap
return item_value(m_items.back());
}
+ constexpr EffectiveValue& insert(Item item)
+ {
+ return insert(std::move(item), hash_value(item_key(item)));
+ }
+
template<typename KeyType> requires IsHashCompatible<Key, KeyType>
constexpr int find_index(const KeyType& key, size_t hash) const
{
@@ -313,6 +318,7 @@ struct HashMap
constexpr const_iterator begin() const { return m_items.begin(); }
constexpr const_iterator end() const { return m_items.end(); }
+ Item& item(size_t index) { return m_items[index]; }
const Item& item(size_t index) const { return m_items[index]; }
template<typename KeyType> requires IsHashCompatible<Key, KeyType>
diff --git a/src/shared_string.cc b/src/shared_string.cc
index 752ac828..7291699c 100644
--- a/src/shared_string.cc
+++ b/src/shared_string.cc
@@ -25,18 +25,24 @@ StringDataPtr StringData::create(ArrayView<const StringView> strs)
return RefPtr<StringData, PtrPolicy>{res};
}
-StringDataPtr StringData::Registry::intern(StringView str)
+StringDataPtr StringData::Registry::intern(StringView str, size_t hash)
{
- auto it = m_strings.find(str);
- if (it != m_strings.end())
- return StringDataPtr{it->value};
+ kak_assert(hash_value(str) == hash);
+ auto index = m_strings.find_index(str, hash);
+ if (index >= 0)
+ return StringDataPtr{m_strings.item(index).value};
auto data = StringData::create(str);
data->refcount |= interned_flag;
- m_strings.insert({data->strview(), data.get()});
+ m_strings.insert({data->strview(), data.get()}, hash);
return data;
}
+StringDataPtr StringData::Registry::intern(StringView str)
+{
+ return intern(str, hash_value(str));
+}
+
void StringData::Registry::remove(StringView str)
{
kak_assert(m_strings.contains(str));
diff --git a/src/shared_string.hh b/src/shared_string.hh
index 8455a31e..17bb72d8 100644
--- a/src/shared_string.hh
+++ b/src/shared_string.hh
@@ -50,6 +50,7 @@ public:
public:
void debug_stats() const;
Ptr intern(StringView str);
+ Ptr intern(StringView str, size_t hash);
void remove(StringView str);
private:
@@ -62,10 +63,8 @@ public:
using StringDataPtr = StringData::Ptr;
using StringRegistry = StringData::Registry;
-inline StringDataPtr intern(StringView str)
-{
- return StringRegistry::instance().intern(str);
-}
+inline StringDataPtr intern(StringView str) { return StringRegistry::instance().intern(str); }
+inline StringDataPtr intern(StringView str, size_t hash) { return StringRegistry::instance().intern(str, hash); }
}
diff --git a/src/word_db.cc b/src/word_db.cc
index 2ea35513..498d6a41 100644
--- a/src/word_db.cc
+++ b/src/word_db.cc
@@ -71,14 +71,15 @@ void WordDB::add_words(StringView line, ConstArrayView<Codepoint> extra_word_cha
{
for (auto&& w : WordSplitter{line, extra_word_chars})
{
- auto it = m_words.find(w);
- if (it != m_words.end())
- ++it->value.refcount;
+ auto hash = hash_value(w);
+ auto index = m_words.find_index(w, hash);
+ if (index >= 0)
+ ++m_words.item(index).value.refcount;
else
{
- auto word = intern(w);
+ auto word = intern(w, hash);
auto view = word->strview();
- m_words.insert({view, {std::move(word), used_letters(view), 1}});
+ m_words.insert({view, {std::move(word), used_letters(view), 1}}, hash);
}
}
}