summaryrefslogtreecommitdiff
path: root/src/word_db.cc
diff options
context:
space:
mode:
authorMaxime Coste <frrrwww@gmail.com>2015-10-18 16:55:21 +0100
committerMaxime Coste <frrrwww@gmail.com>2015-10-18 16:55:21 +0100
commit4f2584a091f34cdb3eb0dda116709ef95fc34732 (patch)
tree8782f400ee5ba2cf8d64dd0e2a7a9f732376566c /src/word_db.cc
parent944d8f53fb958e71d4763819f9863e3de9457aa7 (diff)
Experiment with ranked word completion depending on word boundaries
Diffstat (limited to 'src/word_db.cc')
-rw-r--r--src/word_db.cc69
1 files changed, 69 insertions, 0 deletions
diff --git a/src/word_db.cc b/src/word_db.cc
index f15ea380..addb9fab 100644
--- a/src/word_db.cc
+++ b/src/word_db.cc
@@ -27,6 +27,13 @@ UsedLetters used_letters(StringView str)
return res;
}
+constexpr UsedLetters upper_mask = 0xFFFFFFC000000;
+
+UsedLetters to_lower(UsedLetters letters)
+{
+ return ((letters & upper_mask) >> 26) | (letters & (~upper_mask));
+}
+
static WordDB::WordList get_words(const SharedString& content)
{
WordDB::WordList res;
@@ -136,6 +143,63 @@ int WordDB::get_word_occurences(StringView word) const
return 0;
}
+WordDB::RankedWordList WordDB::find_matching(StringView query)
+{
+ auto match_rank = [](StringView candidate, StringView query)
+ {
+ int rank = 0;
+ auto it = candidate.begin();
+ char prev = 0;
+ for (auto c : query)
+ {
+ if (it == candidate.end())
+ return 0;
+
+ const bool islow = islower(c);
+ auto eq_c = [islow, c](char ch) { return islow ? tolower(ch) == c : ch == c; };
+
+ if (eq_c(*it)) // improve rank on contiguous
+ ++rank;
+
+ while (!eq_c(*it))
+ {
+ prev = *it;
+ if (++it == candidate.end())
+ return 0;
+ }
+ // Improve rank on word boundaries
+ if (prev == 0 or prev == '_' or
+ (islower(prev) and isupper(*it)))
+ rank += 5;
+
+ prev = c;
+ ++rank;
+ ++it;
+ }
+ return rank;
+ };
+
+ auto matches = [](UsedLetters query, UsedLetters letters)
+ {
+ return (query & letters) == query;
+ };
+
+ update_db();
+ const UsedLetters letters = used_letters(query);
+ RankedWordList res;
+ for (auto&& word : m_words)
+ {
+ UsedLetters word_letters = word.second.letters;
+ if (not matches(to_lower(letters), to_lower(word_letters)) or
+ not matches(letters & upper_mask, word_letters & upper_mask))
+ continue;
+ if (int rank = match_rank(word.first, query))
+ res.push_back({ word.first, rank });
+ }
+
+ return res;
+}
+
UnitTest test_word_db{[]()
{
Buffer buffer("test", Buffer::Flags::None,
@@ -160,4 +224,9 @@ UnitTest test_word_db{[]()
kak_assert(res == WordDB::WordList{ "allo" COMMA "mutch" COMMA "retchou" COMMA "tchou" });
}};
+UnitTest test_used_letters{[]()
+{
+ kak_assert(used_letters("abcd") == to_lower(used_letters("abcdABCD")));
+}};
+
}