summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMaxime Coste <mawww@kakoune.org>2025-04-02 17:35:23 +1100
committerMaxime Coste <mawww@kakoune.org>2025-04-02 17:35:23 +1100
commit63efcc06d5bee7f05a1ee9539b2391c80e5d6205 (patch)
tree8ad15a23836f02421ff98796b7a2c994fe00165d /src
parentc7d688f578c7b58989fc04e7bb1e9b5a939a5730 (diff)
Tweak ranked match behaviour to consider the number of full words
Tracking the number of query words that appear as full words in the candidate seems to fix a few cases where the existing fuzzy matching algorithm was not great. I have been running with this for a while and did not notice any annoyances, the whole RankedMatch code probably deserves more attention but this seems to go in the right direction.
Diffstat (limited to 'src')
-rw-r--r--src/insert_completer.cc6
-rw-r--r--src/ranked_match.cc26
-rw-r--r--src/ranked_match.hh1
-rw-r--r--src/word_db.cc44
-rw-r--r--src/word_db.hh2
-rw-r--r--src/word_splitter.hh57
6 files changed, 88 insertions, 48 deletions
diff --git a/src/insert_completer.cc b/src/insert_completer.cc
index 49bbd4ca..01058012 100644
--- a/src/insert_completer.cc
+++ b/src/insert_completer.cc
@@ -3,7 +3,6 @@
#include "buffer_manager.hh"
#include "buffer_utils.hh"
#include "debug.hh"
-#include "client.hh"
#include "command_manager.hh"
#include "changes.hh"
#include "context.hh"
@@ -13,6 +12,7 @@
#include "regex.hh"
#include "window.hh"
#include "word_db.hh"
+#include "word_splitter.hh"
#include "option_types.hh"
#include "utf8_iterator.hh"
#include "user_interface.hh"
@@ -87,7 +87,7 @@ InsertCompletion complete_word(const SelectionList& sels,
for (int i = 0; i < sels.size(); ++i)
{
int len = 0;
- auto is_short_enough_word = [&] (Codepoint c) { return len++ < WordDB::max_word_len && is_word_pred(c); };
+ auto is_short_enough_word = [&] (Codepoint c) { return len++ < WordSplitter::max_word_len && is_word_pred(c); };
Utf8It end{buffer.iterator_at(sels[i].cursor()), buffer};
Utf8It begin = end-1;
@@ -103,7 +103,7 @@ InsertCompletion complete_word(const SelectionList& sels,
skip_while(end, buffer.end(), is_short_enough_word);
- if (len <= WordDB::max_word_len)
+ if (len <= WordSplitter::max_word_len)
{
StringView word = buffer.substr(begin.base().coord(), end.base().coord());
++sel_word_counts[word];
diff --git a/src/ranked_match.cc b/src/ranked_match.cc
index 87bcece7..6918809c 100644
--- a/src/ranked_match.cc
+++ b/src/ranked_match.cc
@@ -5,6 +5,7 @@
#include "utf8_iterator.hh"
#include "optional.hh"
#include "ranges.hh"
+#include "word_splitter.hh"
#include <algorithm>
@@ -70,6 +71,25 @@ static int count_word_boundaries_match(StringView candidate, StringView query)
return count;
}
+static int count_full_word_match(StringView candidate, StringView query)
+{
+ int count = 0;
+ WordSplitter query_words{query, {}};
+ WordSplitter candidate_words{candidate, {}};
+ for (auto query_word : query_words)
+ {
+ for (auto word : candidate_words)
+ {
+ if (word == query_word)
+ {
+ ++count;
+ break;
+ }
+ }
+ }
+ return count;
+}
+
static bool smartcase_eq(Codepoint candidate, Codepoint query)
{
return query == (is_lower(query) ? to_lower(candidate) : candidate);
@@ -168,6 +188,7 @@ RankedMatch::RankedMatch(StringView candidate, StringView query, TestFunc func)
}
}
+ m_full_word_match_count = count_full_word_match(candidate, query);
m_word_boundary_match_count = count_word_boundaries_match(candidate, query);
if (m_word_boundary_match_count == query.length())
m_flags |= Flags::OnlyWordBoundary;
@@ -207,6 +228,9 @@ bool RankedMatch::operator<(const RankedMatch& other) const
m_word_boundary_match_count != other.m_word_boundary_match_count)
return m_word_boundary_match_count > other.m_word_boundary_match_count;
+ if (m_full_word_match_count != other.m_full_word_match_count)
+ return m_full_word_match_count > other.m_full_word_match_count;
+
if (m_max_index != other.m_max_index)
return m_max_index < other.m_max_index;
@@ -288,6 +312,8 @@ UnitTest test_ranked_match{[] {
kak_assert(preferred("foo_bar", "test_foo_bar", "foo_test_bar"));
kak_assert(preferred("rm.cc", "src/ranked_match.cc", "test/README.asciidoc"));
kak_assert(preferred("luaremote", "src/script/LuaRemote.cpp", "tests/TestLuaRemote.cpp"));
+ kak_assert(preferred("lang/haystack/needle.c", "git.evilcorp.com/language/haystack/aaa/needle.c", "git.evilcorp.com/aaa/ng/wrong-haystack/needle.cpp"));
+ kak_assert(preferred("evilcorp-lint/bar.go", "scripts/evilcorp-lint/foo/bar.go", "src/evilcorp-client/foo/bar.go"));
}};
UnitTest test_used_letters{[]()
diff --git a/src/ranked_match.hh b/src/ranked_match.hh
index 5a58defb..1d877d53 100644
--- a/src/ranked_match.hh
+++ b/src/ranked_match.hh
@@ -54,6 +54,7 @@ private:
StringView m_candidate{};
bool m_matches = false;
Flags m_flags = Flags::None;
+ int m_full_word_match_count = 0;
int m_word_boundary_match_count = 0;
int m_max_index = 0;
size_t m_input_sequence_number = 0;
diff --git a/src/word_db.cc b/src/word_db.cc
index 0a1873e7..57821da8 100644
--- a/src/word_db.cc
+++ b/src/word_db.cc
@@ -4,6 +4,7 @@
#include "line_modification.hh"
#include "unit_tests.hh"
#include "value.hh"
+#include "word_splitter.hh"
namespace Kakoune
{
@@ -17,49 +18,6 @@ WordDB& get_word_db(const Buffer& buffer)
return cache_val.as<WordDB>();
}
-struct WordSplitter
-{
- struct Iterator
- {
- Iterator(const char* begin, const WordSplitter& splitter)
- : m_word_begin{begin}, m_word_end{begin}, m_splitter{&splitter}
- { operator++(); }
-
- StringView operator*() const { return {m_word_begin, m_word_end}; }
-
- Iterator& operator++()
- {
- const auto* end = m_splitter->m_content.end();
- auto extra_chars = m_splitter->m_extra_word_chars;
-
- do
- {
- auto it = m_word_begin = m_word_end;
- while (it != end and not is_word(utf8::read_codepoint(it, end), extra_chars))
- m_word_begin = it;
-
- m_word_end = it;
- while (it != end and is_word(utf8::read_codepoint(it, end), extra_chars))
- m_word_end = it;
- } while (m_word_begin != end and (m_word_end - m_word_begin) > WordDB::max_word_len);
-
- return *this;
- }
-
- friend bool operator==(const Iterator& lhs, const Iterator& rhs) = default;
-
- const char* m_word_begin;
- const char* m_word_end;
- const WordSplitter* m_splitter;
- };
-
- StringView m_content;
- ConstArrayView<Codepoint> m_extra_word_chars;
-
- Iterator begin() const { return {m_content.begin(), *this}; }
- Iterator end() const { return {m_content.end(), *this}; }
-};
-
static ConstArrayView<Codepoint> get_extra_word_chars(const Buffer& buffer)
{
return buffer.options()["extra_word_chars"].get<Vector<Codepoint, MemoryDomain::Options>>();
diff --git a/src/word_db.hh b/src/word_db.hh
index dcb46653..dab65d4c 100644
--- a/src/word_db.hh
+++ b/src/word_db.hh
@@ -18,8 +18,6 @@ class Buffer;
class WordDB : public OptionManagerWatcher
{
public:
- static constexpr ByteCount max_word_len = 50;
-
WordDB(const Buffer& buffer);
~WordDB();
WordDB(const WordDB&) = delete;
diff --git a/src/word_splitter.hh b/src/word_splitter.hh
new file mode 100644
index 00000000..97af606d
--- /dev/null
+++ b/src/word_splitter.hh
@@ -0,0 +1,57 @@
+#ifndef word_splitter_hh_INCLUDED
+#define word_splitter_hh_INCLUDED
+
+#include "string.hh"
+#include "array_view.hh"
+
+namespace Kakoune
+{
+
+struct WordSplitter
+{
+ static constexpr ByteCount max_word_len = 100;
+
+ struct Iterator
+ {
+ Iterator(const char* begin, const WordSplitter& splitter)
+ : m_word_begin{begin}, m_word_end{begin}, m_splitter{&splitter}
+ { operator++(); }
+
+ StringView operator*() const { return {m_word_begin, m_word_end}; }
+
+ Iterator& operator++()
+ {
+ const auto* end = m_splitter->m_content.end();
+ auto extra_chars = m_splitter->m_extra_word_chars;
+
+ do
+ {
+ auto it = m_word_begin = m_word_end;
+ while (it != end and not is_word(utf8::read_codepoint(it, end), extra_chars))
+ m_word_begin = it;
+
+ m_word_end = it;
+ while (it != end and is_word(utf8::read_codepoint(it, end), extra_chars))
+ m_word_end = it;
+ } while (m_word_begin != end and (m_word_end - m_word_begin) > max_word_len);
+
+ return *this;
+ }
+
+ friend bool operator==(const Iterator& lhs, const Iterator& rhs) = default;
+
+ const char* m_word_begin;
+ const char* m_word_end;
+ const WordSplitter* m_splitter;
+ };
+
+ StringView m_content;
+ ConstArrayView<Codepoint> m_extra_word_chars;
+
+ Iterator begin() const { return {m_content.begin(), *this}; }
+ Iterator end() const { return {m_content.end(), *this}; }
+};
+
+}
+
+#endif // word_splitter_hh_INCLUDED