summaryrefslogtreecommitdiff
path: root/src/ranked_match.cc
diff options
context:
space:
mode:
authorMaxime Coste <mawww@kakoune.org>2025-04-02 17:35:23 +1100
committerMaxime Coste <mawww@kakoune.org>2025-04-02 17:35:23 +1100
commit63efcc06d5bee7f05a1ee9539b2391c80e5d6205 (patch)
tree8ad15a23836f02421ff98796b7a2c994fe00165d /src/ranked_match.cc
parentc7d688f578c7b58989fc04e7bb1e9b5a939a5730 (diff)
Tweak ranked match behaviour to consider the number of full words
Tracking the number of query words that appear as full words in the candidate seems to fix a few cases where the existing fuzzy matching algorithm was not great. I have been running with this for a while and did not notice any annoyances, the whole RankedMatch code probably deserves more attention but this seems to go in the right direction.
Diffstat (limited to 'src/ranked_match.cc')
-rw-r--r--src/ranked_match.cc26
1 files changed, 26 insertions, 0 deletions
diff --git a/src/ranked_match.cc b/src/ranked_match.cc
index 87bcece7..6918809c 100644
--- a/src/ranked_match.cc
+++ b/src/ranked_match.cc
@@ -5,6 +5,7 @@
#include "utf8_iterator.hh"
#include "optional.hh"
#include "ranges.hh"
+#include "word_splitter.hh"
#include <algorithm>
@@ -70,6 +71,25 @@ static int count_word_boundaries_match(StringView candidate, StringView query)
return count;
}
+static int count_full_word_match(StringView candidate, StringView query)
+{
+ int count = 0;
+ WordSplitter query_words{query, {}};
+ WordSplitter candidate_words{candidate, {}};
+ for (auto query_word : query_words)
+ {
+ for (auto word : candidate_words)
+ {
+ if (word == query_word)
+ {
+ ++count;
+ break;
+ }
+ }
+ }
+ return count;
+}
+
static bool smartcase_eq(Codepoint candidate, Codepoint query)
{
return query == (is_lower(query) ? to_lower(candidate) : candidate);
@@ -168,6 +188,7 @@ RankedMatch::RankedMatch(StringView candidate, StringView query, TestFunc func)
}
}
+ m_full_word_match_count = count_full_word_match(candidate, query);
m_word_boundary_match_count = count_word_boundaries_match(candidate, query);
if (m_word_boundary_match_count == query.length())
m_flags |= Flags::OnlyWordBoundary;
@@ -207,6 +228,9 @@ bool RankedMatch::operator<(const RankedMatch& other) const
m_word_boundary_match_count != other.m_word_boundary_match_count)
return m_word_boundary_match_count > other.m_word_boundary_match_count;
+ if (m_full_word_match_count != other.m_full_word_match_count)
+ return m_full_word_match_count > other.m_full_word_match_count;
+
if (m_max_index != other.m_max_index)
return m_max_index < other.m_max_index;
@@ -288,6 +312,8 @@ UnitTest test_ranked_match{[] {
kak_assert(preferred("foo_bar", "test_foo_bar", "foo_test_bar"));
kak_assert(preferred("rm.cc", "src/ranked_match.cc", "test/README.asciidoc"));
kak_assert(preferred("luaremote", "src/script/LuaRemote.cpp", "tests/TestLuaRemote.cpp"));
+ kak_assert(preferred("lang/haystack/needle.c", "git.evilcorp.com/language/haystack/aaa/needle.c", "git.evilcorp.com/aaa/ng/wrong-haystack/needle.cpp"));
+ kak_assert(preferred("evilcorp-lint/bar.go", "scripts/evilcorp-lint/foo/bar.go", "src/evilcorp-client/foo/bar.go"));
}};
UnitTest test_used_letters{[]()