diff options
| author | Maxime Coste <mawww@kakoune.org> | 2025-04-02 17:35:23 +1100 |
|---|---|---|
| committer | Maxime Coste <mawww@kakoune.org> | 2025-04-02 17:35:23 +1100 |
| commit | 63efcc06d5bee7f05a1ee9539b2391c80e5d6205 (patch) | |
| tree | 8ad15a23836f02421ff98796b7a2c994fe00165d /src/word_splitter.hh | |
| parent | c7d688f578c7b58989fc04e7bb1e9b5a939a5730 (diff) | |
Tweak ranked match behaviour to consider the number of full words
Tracking the number of query words that appear as full words in the
candidate seems to fix a few cases where the existing fuzzy matching
algorithm was not great.
I have been running with this for a while and did not notice any
annoyances, the whole RankedMatch code probably deserves more attention
but this seems to go in the right direction.
Diffstat (limited to 'src/word_splitter.hh')
| -rw-r--r-- | src/word_splitter.hh | 57 |
1 files changed, 57 insertions, 0 deletions
diff --git a/src/word_splitter.hh b/src/word_splitter.hh new file mode 100644 index 00000000..97af606d --- /dev/null +++ b/src/word_splitter.hh @@ -0,0 +1,57 @@ +#ifndef word_splitter_hh_INCLUDED +#define word_splitter_hh_INCLUDED + +#include "string.hh" +#include "array_view.hh" + +namespace Kakoune +{ + +struct WordSplitter +{ + static constexpr ByteCount max_word_len = 100; + + struct Iterator + { + Iterator(const char* begin, const WordSplitter& splitter) + : m_word_begin{begin}, m_word_end{begin}, m_splitter{&splitter} + { operator++(); } + + StringView operator*() const { return {m_word_begin, m_word_end}; } + + Iterator& operator++() + { + const auto* end = m_splitter->m_content.end(); + auto extra_chars = m_splitter->m_extra_word_chars; + + do + { + auto it = m_word_begin = m_word_end; + while (it != end and not is_word(utf8::read_codepoint(it, end), extra_chars)) + m_word_begin = it; + + m_word_end = it; + while (it != end and is_word(utf8::read_codepoint(it, end), extra_chars)) + m_word_end = it; + } while (m_word_begin != end and (m_word_end - m_word_begin) > max_word_len); + + return *this; + } + + friend bool operator==(const Iterator& lhs, const Iterator& rhs) = default; + + const char* m_word_begin; + const char* m_word_end; + const WordSplitter* m_splitter; + }; + + StringView m_content; + ConstArrayView<Codepoint> m_extra_word_chars; + + Iterator begin() const { return {m_content.begin(), *this}; } + Iterator end() const { return {m_content.end(), *this}; } +}; + +} + +#endif // word_splitter_hh_INCLUDED |
