diff options
| author | Maxime Coste <mawww@kakoune.org> | 2025-04-02 17:35:23 +1100 |
|---|---|---|
| committer | Maxime Coste <mawww@kakoune.org> | 2025-04-02 17:35:23 +1100 |
| commit | 63efcc06d5bee7f05a1ee9539b2391c80e5d6205 (patch) | |
| tree | 8ad15a23836f02421ff98796b7a2c994fe00165d /src/ranked_match.cc | |
| parent | c7d688f578c7b58989fc04e7bb1e9b5a939a5730 (diff) | |
Tweak ranked match behaviour to consider the number of full words
Tracking the number of query words that appear as full words in the
candidate seems to fix a few cases where the existing fuzzy matching
algorithm was not great.
I have been running with this for a while and did not notice any
annoyances, the whole RankedMatch code probably deserves more attention
but this seems to go in the right direction.
Diffstat (limited to 'src/ranked_match.cc')
| -rw-r--r-- | src/ranked_match.cc | 26 |
1 files changed, 26 insertions, 0 deletions
diff --git a/src/ranked_match.cc b/src/ranked_match.cc index 87bcece7..6918809c 100644 --- a/src/ranked_match.cc +++ b/src/ranked_match.cc @@ -5,6 +5,7 @@ #include "utf8_iterator.hh" #include "optional.hh" #include "ranges.hh" +#include "word_splitter.hh" #include <algorithm> @@ -70,6 +71,25 @@ static int count_word_boundaries_match(StringView candidate, StringView query) return count; } +static int count_full_word_match(StringView candidate, StringView query) +{ + int count = 0; + WordSplitter query_words{query, {}}; + WordSplitter candidate_words{candidate, {}}; + for (auto query_word : query_words) + { + for (auto word : candidate_words) + { + if (word == query_word) + { + ++count; + break; + } + } + } + return count; +} + static bool smartcase_eq(Codepoint candidate, Codepoint query) { return query == (is_lower(query) ? to_lower(candidate) : candidate); @@ -168,6 +188,7 @@ RankedMatch::RankedMatch(StringView candidate, StringView query, TestFunc func) } } + m_full_word_match_count = count_full_word_match(candidate, query); m_word_boundary_match_count = count_word_boundaries_match(candidate, query); if (m_word_boundary_match_count == query.length()) m_flags |= Flags::OnlyWordBoundary; @@ -207,6 +228,9 @@ bool RankedMatch::operator<(const RankedMatch& other) const m_word_boundary_match_count != other.m_word_boundary_match_count) return m_word_boundary_match_count > other.m_word_boundary_match_count; + if (m_full_word_match_count != other.m_full_word_match_count) + return m_full_word_match_count > other.m_full_word_match_count; + if (m_max_index != other.m_max_index) return m_max_index < other.m_max_index; @@ -288,6 +312,8 @@ UnitTest test_ranked_match{[] { kak_assert(preferred("foo_bar", "test_foo_bar", "foo_test_bar")); kak_assert(preferred("rm.cc", "src/ranked_match.cc", "test/README.asciidoc")); kak_assert(preferred("luaremote", "src/script/LuaRemote.cpp", "tests/TestLuaRemote.cpp")); + kak_assert(preferred("lang/haystack/needle.c", "git.evilcorp.com/language/haystack/aaa/needle.c", "git.evilcorp.com/aaa/ng/wrong-haystack/needle.cpp")); + kak_assert(preferred("evilcorp-lint/bar.go", "scripts/evilcorp-lint/foo/bar.go", "src/evilcorp-client/foo/bar.go")); }}; UnitTest test_used_letters{[]() |
