diff options
| author | Maxime Coste <mawww@kakoune.org> | 2019-03-22 18:03:49 +1100 |
|---|---|---|
| committer | Maxime Coste <mawww@kakoune.org> | 2019-03-22 18:03:49 +1100 |
| commit | ad882c33707c65344ca05d421ea3a29c95168eeb (patch) | |
| tree | aa87490201ba9940168de2e0e08a8186b8dd3fd1 /src | |
| parent | b9c1fa61a04a81cb73cc7495bc5d0eedf491092a (diff) | |
Limit WordDB word length to 50 bytes
Should improve both performance and relevancy of the word completions.
Diffstat (limited to 'src')
| -rw-r--r-- | src/word_db.cc | 24 |
1 files changed, 18 insertions, 6 deletions
diff --git a/src/word_db.cc b/src/word_db.cc index a9f0b751..9a7680a5 100644 --- a/src/word_db.cc +++ b/src/word_db.cc @@ -21,6 +21,8 @@ WordDB& get_word_db(const Buffer& buffer) struct WordSplitter { + static constexpr CharCount max_word_len = 50; + struct Iterator { Iterator(const char* begin, const WordSplitter& splitter) @@ -34,12 +36,22 @@ struct WordSplitter const auto* end = m_splitter->m_content.end(); auto extra_chars = m_splitter->m_extra_word_chars; - m_word_begin = m_word_end; - while (m_word_begin != end and not is_word(utf8::codepoint(m_word_begin, end), extra_chars)) - utf8::to_next(m_word_begin, end); - m_word_end = m_word_begin; - while (m_word_end != end and is_word(utf8::codepoint(m_word_end, end), extra_chars)) - utf8::to_next(m_word_end, end); + while (true) + { + m_word_begin = m_word_end; + while (m_word_begin != end and not is_word(utf8::codepoint(m_word_begin, end), extra_chars)) + utf8::to_next(m_word_begin, end); + m_word_end = m_word_begin; + CharCount word_len = 0; + while (m_word_end != end and is_word(utf8::codepoint(m_word_end, end), extra_chars)) + { + utf8::to_next(m_word_end, end); + ++word_len; + } + if (m_word_begin == end or word_len < max_word_len) + break; + } + return *this; } |
