Tweak ranked match behaviour to consider the number of full words

Tracking the number of query words that appear as full words in the candidate seems to fix a few cases where the existing fuzzy matching algorithm was not great. I have been running with this for a while and did not notice any annoyances, the whole RankedMatch code probably deserves more attention but this seems to go in the right direction.
author: Maxime Coste <mawww@kakoune.org> 2025-04-02 17:35:23 +1100
committer: Maxime Coste <mawww@kakoune.org> 2025-04-02 17:35:23 +1100
commit: 63efcc06d5bee7f05a1ee9539b2391c80e5d6205 (patch)
tree: 8ad15a23836f02421ff98796b7a2c994fe00165d /src/ranked_match.cc
parent: c7d688f578c7b58989fc04e7bb1e9b5a939a5730 (diff)
1 files changed, 26 insertions, 0 deletions
diff --git a/src/ranked_match.cc b/src/ranked_match.cc
index 87bcece7..6918809c 100644
--- a/src/ranked_match.cc
+++ b/src/ranked_match.cc
@@ -5,6 +5,7 @@
 #include "utf8_iterator.hh"
 #include "optional.hh"
 #include "ranges.hh"
+#include "word_splitter.hh"
 
 #include <algorithm>
 
@@ -70,6 +71,25 @@ static int count_word_boundaries_match(StringView candidate, StringView query)
     return count;
 }
 
+static int count_full_word_match(StringView candidate, StringView query)
+{
+    int count = 0;
+    WordSplitter query_words{query, {}};
+    WordSplitter candidate_words{candidate, {}};
+    for (auto query_word : query_words)
+    {
+        for (auto word : candidate_words)
+        {
+            if (word == query_word)
+            {
+                ++count;
+                break;
+            }
+        }
+    }
+    return count;
+}
+
 static bool smartcase_eq(Codepoint candidate, Codepoint query)
 {
     return query == (is_lower(query) ? to_lower(candidate) : candidate);
@@ -168,6 +188,7 @@ RankedMatch::RankedMatch(StringView candidate, StringView query, TestFunc func)
         }
     }
 
+    m_full_word_match_count = count_full_word_match(candidate, query);
     m_word_boundary_match_count = count_word_boundaries_match(candidate, query);
     if (m_word_boundary_match_count == query.length())
         m_flags |= Flags::OnlyWordBoundary;
@@ -207,6 +228,9 @@ bool RankedMatch::operator<(const RankedMatch& other) const
         m_word_boundary_match_count != other.m_word_boundary_match_count)
         return m_word_boundary_match_count > other.m_word_boundary_match_count;
 
+    if (m_full_word_match_count != other.m_full_word_match_count)
+        return m_full_word_match_count > other.m_full_word_match_count;
+
     if (m_max_index != other.m_max_index)
         return m_max_index < other.m_max_index;
 
@@ -288,6 +312,8 @@ UnitTest test_ranked_match{[] {
     kak_assert(preferred("foo_bar", "test_foo_bar", "foo_test_bar"));
     kak_assert(preferred("rm.cc", "src/ranked_match.cc", "test/README.asciidoc"));
     kak_assert(preferred("luaremote", "src/script/LuaRemote.cpp", "tests/TestLuaRemote.cpp"));
+    kak_assert(preferred("lang/haystack/needle.c", "git.evilcorp.com/language/haystack/aaa/needle.c", "git.evilcorp.com/aaa/ng/wrong-haystack/needle.cpp"));
+    kak_assert(preferred("evilcorp-lint/bar.go", "scripts/evilcorp-lint/foo/bar.go", "src/evilcorp-client/foo/bar.go"));
 }};
 
 UnitTest test_used_letters{[]()
author	Maxime Coste <mawww@kakoune.org>	2025-04-02 17:35:23 +1100
committer	Maxime Coste <mawww@kakoune.org>	2025-04-02 17:35:23 +1100
commit	63efcc06d5bee7f05a1ee9539b2391c80e5d6205 (patch)
tree	8ad15a23836f02421ff98796b7a2c994fe00165d /src/ranked_match.cc
parent	c7d688f578c7b58989fc04e7bb1e9b5a939a5730 (diff)