Limit WordDB word length to 50 bytes

Should improve both performance and relevancy of the word completions.
author: Maxime Coste <mawww@kakoune.org> 2019-03-22 18:03:49 +1100
committer: Maxime Coste <mawww@kakoune.org> 2019-03-22 18:03:49 +1100
commit: ad882c33707c65344ca05d421ea3a29c95168eeb (patch)
tree: aa87490201ba9940168de2e0e08a8186b8dd3fd1 /src
parent: b9c1fa61a04a81cb73cc7495bc5d0eedf491092a (diff)
1 files changed, 18 insertions, 6 deletions
diff --git a/src/word_db.cc b/src/word_db.cc
index a9f0b751..9a7680a5 100644
--- a/src/word_db.cc
+++ b/src/word_db.cc
@@ -21,6 +21,8 @@ WordDB& get_word_db(const Buffer& buffer)
 
 struct WordSplitter
 {
+    static constexpr CharCount max_word_len = 50;
+
     struct Iterator
     {
         Iterator(const char* begin, const WordSplitter& splitter)
@@ -34,12 +36,22 @@ struct WordSplitter
             const auto* end = m_splitter->m_content.end();
             auto extra_chars = m_splitter->m_extra_word_chars;
 
-            m_word_begin = m_word_end;
-            while (m_word_begin != end and not is_word(utf8::codepoint(m_word_begin, end), extra_chars))
-                utf8::to_next(m_word_begin, end);
-            m_word_end = m_word_begin;
-            while (m_word_end != end and is_word(utf8::codepoint(m_word_end, end), extra_chars))
-                utf8::to_next(m_word_end, end);
+            while (true)
+            {
+                m_word_begin = m_word_end;
+                while (m_word_begin != end and not is_word(utf8::codepoint(m_word_begin, end), extra_chars))
+                    utf8::to_next(m_word_begin, end);
+                m_word_end = m_word_begin;
+                CharCount word_len = 0;
+                while (m_word_end != end and is_word(utf8::codepoint(m_word_end, end), extra_chars))
+                {
+                    utf8::to_next(m_word_end, end);
+                    ++word_len;
+                }
+                if (m_word_begin == end or word_len < max_word_len)
+                    break;
+            }
+
             return *this;
         }
author	Maxime Coste <mawww@kakoune.org>	2019-03-22 18:03:49 +1100
committer	Maxime Coste <mawww@kakoune.org>	2019-03-22 18:03:49 +1100
commit	ad882c33707c65344ca05d421ea3a29c95168eeb (patch)
tree	aa87490201ba9940168de2e0e08a8186b8dd3fd1 /src
parent	b9c1fa61a04a81cb73cc7495bc5d0eedf491092a (diff)