diff options
| author | Maxime Coste <mawww@kakoune.org> | 2023-11-17 17:01:51 +1100 |
|---|---|---|
| committer | Maxime Coste <mawww@kakoune.org> | 2023-11-17 17:01:51 +1100 |
| commit | 296ab1a1ffb707f4691cdce21172b3b8d0b48eb0 (patch) | |
| tree | 6388c07b53a5cfda953b92c058a819684bcec67f /src | |
| parent | b10a935b8ceb18f256bd04843a5442d7c746abdb (diff) | |
Improve WordDB performance by precomputing hashes
Avoid multiple computation of string hashes by making it possible
to pre-compute and pass hashes to interned strings and hash maps.
Diffstat (limited to 'src')
| -rw-r--r-- | src/hash_map.hh | 10 | ||||
| -rw-r--r-- | src/shared_string.cc | 16 | ||||
| -rw-r--r-- | src/shared_string.hh | 7 | ||||
| -rw-r--r-- | src/word_db.cc | 11 |
4 files changed, 28 insertions, 16 deletions
diff --git a/src/hash_map.hh b/src/hash_map.hh index f5bc9412..2f2f6042 100644 --- a/src/hash_map.hh +++ b/src/hash_map.hh @@ -194,9 +194,9 @@ struct HashMap insert(*begin++); } - constexpr EffectiveValue& insert(Item item) + constexpr EffectiveValue& insert(Item item, size_t hash) { - const auto hash = hash_value(item_key(item)); + kak_assert(hash == hash_value(item_key(item))); if constexpr (not multi_key) { if (auto index = find_index(item_key(item), hash); index >= 0) @@ -212,6 +212,11 @@ struct HashMap return item_value(m_items.back()); } + constexpr EffectiveValue& insert(Item item) + { + return insert(std::move(item), hash_value(item_key(item))); + } + template<typename KeyType> requires IsHashCompatible<Key, KeyType> constexpr int find_index(const KeyType& key, size_t hash) const { @@ -313,6 +318,7 @@ struct HashMap constexpr const_iterator begin() const { return m_items.begin(); } constexpr const_iterator end() const { return m_items.end(); } + Item& item(size_t index) { return m_items[index]; } const Item& item(size_t index) const { return m_items[index]; } template<typename KeyType> requires IsHashCompatible<Key, KeyType> diff --git a/src/shared_string.cc b/src/shared_string.cc index 752ac828..7291699c 100644 --- a/src/shared_string.cc +++ b/src/shared_string.cc @@ -25,18 +25,24 @@ StringDataPtr StringData::create(ArrayView<const StringView> strs) return RefPtr<StringData, PtrPolicy>{res}; } -StringDataPtr StringData::Registry::intern(StringView str) +StringDataPtr StringData::Registry::intern(StringView str, size_t hash) { - auto it = m_strings.find(str); - if (it != m_strings.end()) - return StringDataPtr{it->value}; + kak_assert(hash_value(str) == hash); + auto index = m_strings.find_index(str, hash); + if (index >= 0) + return StringDataPtr{m_strings.item(index).value}; auto data = StringData::create(str); data->refcount |= interned_flag; - m_strings.insert({data->strview(), data.get()}); + m_strings.insert({data->strview(), data.get()}, hash); return data; } +StringDataPtr StringData::Registry::intern(StringView str) +{ + return intern(str, hash_value(str)); +} + void StringData::Registry::remove(StringView str) { kak_assert(m_strings.contains(str)); diff --git a/src/shared_string.hh b/src/shared_string.hh index 8455a31e..17bb72d8 100644 --- a/src/shared_string.hh +++ b/src/shared_string.hh @@ -50,6 +50,7 @@ public: public: void debug_stats() const; Ptr intern(StringView str); + Ptr intern(StringView str, size_t hash); void remove(StringView str); private: @@ -62,10 +63,8 @@ public: using StringDataPtr = StringData::Ptr; using StringRegistry = StringData::Registry; -inline StringDataPtr intern(StringView str) -{ - return StringRegistry::instance().intern(str); -} +inline StringDataPtr intern(StringView str) { return StringRegistry::instance().intern(str); } +inline StringDataPtr intern(StringView str, size_t hash) { return StringRegistry::instance().intern(str, hash); } } diff --git a/src/word_db.cc b/src/word_db.cc index 2ea35513..498d6a41 100644 --- a/src/word_db.cc +++ b/src/word_db.cc @@ -71,14 +71,15 @@ void WordDB::add_words(StringView line, ConstArrayView<Codepoint> extra_word_cha { for (auto&& w : WordSplitter{line, extra_word_chars}) { - auto it = m_words.find(w); - if (it != m_words.end()) - ++it->value.refcount; + auto hash = hash_value(w); + auto index = m_words.find_index(w, hash); + if (index >= 0) + ++m_words.item(index).value.refcount; else { - auto word = intern(w); + auto word = intern(w, hash); auto view = word->strview(); - m_words.insert({view, {std::move(word), used_letters(view), 1}}); + m_words.insert({view, {std::move(word), used_letters(view), 1}}, hash); } } } |
