summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMaxime Coste <frrrwww@gmail.com>2015-10-29 13:36:30 +0000
committerMaxime Coste <frrrwww@gmail.com>2015-10-29 13:36:30 +0000
commit24043bbffe6b277bd5f9b4a68df751c6d2090967 (patch)
tree684100493e1c0f4cd36554c779ba071b398341b5 /src
parent89d22f3335dfa471189f4925ec58d9b658a269ae (diff)
Use an heuristic based match ranking algorithm inspired by what YouCompleteMe does
Diffstat (limited to 'src')
-rw-r--r--src/ranked_match.cc116
-rw-r--r--src/ranked_match.hh5
2 files changed, 86 insertions, 35 deletions
diff --git a/src/ranked_match.cc b/src/ranked_match.cc
index 03cb2fcd..9acaa8b4 100644
--- a/src/ranked_match.cc
+++ b/src/ranked_match.cc
@@ -1,65 +1,113 @@
#include "ranked_match.hh"
+#include "unit_tests.hh"
+
namespace Kakoune
{
-static bool match_rank(StringView candidate, StringView query)
+static int count_word_boundaries_match(StringView candidate, StringView query)
{
- int rank = 0;
- auto it = candidate.begin();
+ int count = 0;
+ auto it = query.begin();
char prev = 0;
- for (auto c : query)
+ for (auto c : candidate)
{
- if (it == candidate.end())
- return 0;
-
- const bool islow = islower(c);
- auto eq_c = [islow, c](char ch) { return islow ? tolower(ch) == c : ch == c; };
+ const bool is_word_boundary = prev == 0 or
+ (ispunct(prev) and is_word(c)) or
+ (islower(prev) and isupper(c));
+ prev = c;
- if (eq_c(*it)) // improve rank on contiguous
- ++rank;
+ if (not is_word_boundary)
+ continue;
- while (!eq_c(*it))
+ const char lc = tolower(c);
+ for (; it != query.end(); ++it)
{
- prev = *it;
- if (++it == candidate.end())
- return 0;
+ const char qc = *it;
+ if (qc == (islower(qc) ? lc : c))
+ {
+ ++count;
+ ++it;
+ break;
+ }
}
- // Improve rank on word boundaries
- if (prev == 0 or prev == '_' or
- (islower(prev) and isupper(*it)))
- rank += 5;
+ if (it == query.end())
+ break;
+ }
+ return count;
+}
- prev = c;
- ++rank;
+static bool smartcase_eq(char query, char candidate)
+{
+ return query == (islower(query) ? tolower(candidate) : candidate);
+}
+
+static bool subsequence_match_smart_case(StringView str, StringView subseq)
+{
+ auto it = str.begin();
+ for (auto& c : subseq)
+ {
+ if (it == str.end())
+ return false;
+ while (not smartcase_eq(c, *it))
+ {
+ if (++it == str.end())
+ return false;
+ }
++it;
}
- return rank;
+ return true;
}
RankedMatch::RankedMatch(StringView candidate, StringView query)
{
- if (candidate.empty() or query.empty())
+ if (candidate.empty() or query.length() > candidate.length())
+ return;
+
+ if (query.empty())
{
m_candidate = candidate;
return;
}
- m_match_rank = match_rank(candidate, query);
+ if (not subsequence_match_smart_case(candidate, query))
+ return;
+
+ m_candidate = candidate;
+
+ m_first_char_match = smartcase_eq(query[0], candidate[0]);
+ m_word_boundary_match_count = count_word_boundaries_match(candidate, query);
+ m_only_word_boundary = m_word_boundary_match_count == query.length();
+ m_prefix = std::equal(query.begin(), query.end(), candidate.begin(), smartcase_eq);
}
bool RankedMatch::operator<(const RankedMatch& other) const
{
- if (m_match_rank == other.m_match_rank)
- return std::lexicographical_compare(
- m_candidate.begin(), m_candidate.end(),
- other.m_candidate.begin(), other.m_candidate.end(),
- [](char a, char b) {
- const bool low_a = islower(a), low_b = islower(b);
- return low_a == low_b ? a < b : low_a;
- });
-
- return m_match_rank < other.m_match_rank;
+ if (m_only_word_boundary or other.m_only_word_boundary)
+ return m_only_word_boundary and other.m_only_word_boundary ?
+ m_word_boundary_match_count > other.m_word_boundary_match_count
+ : m_only_word_boundary;
+
+ if (m_prefix != other.m_prefix)
+ return m_prefix;
+
+ if (m_word_boundary_match_count != other.m_word_boundary_match_count)
+ return m_word_boundary_match_count > other.m_word_boundary_match_count;
+
+ if (m_first_char_match != other.m_first_char_match)
+ return m_first_char_match;
+
+ return std::lexicographical_compare(
+ m_candidate.begin(), m_candidate.end(),
+ other.m_candidate.begin(), other.m_candidate.end(),
+ [](char a, char b) {
+ const bool low_a = islower(a), low_b = islower(b);
+ return low_a == low_b ? a < b : low_a;
+ });
}
+UnitTest test_ranked_match{[] {
+ kak_assert(count_word_boundaries_match("run_all_tests", "rat") == 3);
+}};
+
}
diff --git a/src/ranked_match.hh b/src/ranked_match.hh
index d6367453..2a805002 100644
--- a/src/ranked_match.hh
+++ b/src/ranked_match.hh
@@ -19,7 +19,10 @@ struct RankedMatch
private:
StringView m_candidate;
- int m_match_rank = 0;
+ bool m_first_char_match = false;
+ bool m_prefix = false;
+ int m_word_boundary_match_count = 0;
+ bool m_only_word_boundary = false;
};
using RankedMatchList = Vector<RankedMatch>;