summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMaxime Coste <mawww@kakoune.org>2019-03-22 18:03:49 +1100
committerMaxime Coste <mawww@kakoune.org>2019-03-22 18:03:49 +1100
commitad882c33707c65344ca05d421ea3a29c95168eeb (patch)
treeaa87490201ba9940168de2e0e08a8186b8dd3fd1 /src
parentb9c1fa61a04a81cb73cc7495bc5d0eedf491092a (diff)
Limit WordDB word length to 50 bytes
Should improve both performance and relevancy of the word completions.
Diffstat (limited to 'src')
-rw-r--r--src/word_db.cc24
1 files changed, 18 insertions, 6 deletions
diff --git a/src/word_db.cc b/src/word_db.cc
index a9f0b751..9a7680a5 100644
--- a/src/word_db.cc
+++ b/src/word_db.cc
@@ -21,6 +21,8 @@ WordDB& get_word_db(const Buffer& buffer)
struct WordSplitter
{
+ static constexpr CharCount max_word_len = 50;
+
struct Iterator
{
Iterator(const char* begin, const WordSplitter& splitter)
@@ -34,12 +36,22 @@ struct WordSplitter
const auto* end = m_splitter->m_content.end();
auto extra_chars = m_splitter->m_extra_word_chars;
- m_word_begin = m_word_end;
- while (m_word_begin != end and not is_word(utf8::codepoint(m_word_begin, end), extra_chars))
- utf8::to_next(m_word_begin, end);
- m_word_end = m_word_begin;
- while (m_word_end != end and is_word(utf8::codepoint(m_word_end, end), extra_chars))
- utf8::to_next(m_word_end, end);
+ while (true)
+ {
+ m_word_begin = m_word_end;
+ while (m_word_begin != end and not is_word(utf8::codepoint(m_word_begin, end), extra_chars))
+ utf8::to_next(m_word_begin, end);
+ m_word_end = m_word_begin;
+ CharCount word_len = 0;
+ while (m_word_end != end and is_word(utf8::codepoint(m_word_end, end), extra_chars))
+ {
+ utf8::to_next(m_word_end, end);
+ ++word_len;
+ }
+ if (m_word_begin == end or word_len < max_word_len)
+ break;
+ }
+
return *this;
}