summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/input_handler.cc33
-rw-r--r--src/unit_tests.cc24
-rw-r--r--src/word_db.cc114
-rw-r--r--src/word_db.hh40
4 files changed, 196 insertions, 15 deletions
diff --git a/src/input_handler.cc b/src/input_handler.cc
index 30c31413..99121ba9 100644
--- a/src/input_handler.cc
+++ b/src/input_handler.cc
@@ -10,6 +10,7 @@
#include "client.hh"
#include "color_registry.hh"
#include "file.hh"
+#include "word_db.hh"
#include <unordered_map>
@@ -707,6 +708,15 @@ public:
}
using StringList = std::vector<String>;
+ static WordDB& get_word_db(const Buffer& buffer)
+ {
+ static const ValueId word_db_id = ValueId::get_free_id();
+ Value& cache_val = buffer.values()[word_db_id];
+ if (not cache_val)
+ cache_val = Value(WordDB{buffer});
+ return cache_val.as<WordDB>();
+ }
+
template<bool other_buffers>
BufferCompletion complete_word(const Buffer& buffer, BufferCoord cursor_pos)
{
@@ -721,31 +731,24 @@ public:
if (not is_word(*begin))
++begin;
- String ex = R"(\<\Q)" + String{begin, end} + R"(\E\w+\>)";
- Regex re(ex.begin(), ex.end());
- using RegexIt = boost::regex_iterator<BufferIterator>;
+ String prefix{begin, end};
+
std::unordered_set<String> matches;
- for (RegexIt it(buffer.begin(), buffer.end(), re), re_end; it != re_end; ++it)
- {
- auto& match = (*it)[0];
- if (match.first <= pos and pos < match.second)
- continue;
- matches.insert(String{match.first, match.second});
- }
+ auto bufmatches = get_word_db(buffer).find_prefix(prefix);
+ matches.insert(bufmatches.begin(), bufmatches.end());
+
if (other_buffers)
{
for (const auto& buf : BufferManager::instance())
{
if (buf.get() == &buffer)
continue;
- for (RegexIt it(buf->begin(), buf->end(), re), re_end; it != re_end; ++it)
- {
- auto& match = (*it)[0];
- matches.insert(String{match.first, match.second});
- }
+ bufmatches = get_word_db(*buf).find_prefix(prefix);
+ matches.insert(bufmatches.begin(), bufmatches.end());
}
}
+ matches.erase(prefix);
CandidateList result;
std::copy(make_move_iterator(matches.begin()),
make_move_iterator(matches.end()),
diff --git a/src/unit_tests.cc b/src/unit_tests.cc
index 3b2b2960..66fe53f2 100644
--- a/src/unit_tests.cc
+++ b/src/unit_tests.cc
@@ -2,6 +2,7 @@
#include "buffer.hh"
#include "keys.hh"
#include "selectors.hh"
+#include "word_db.hh"
using namespace Kakoune;
@@ -67,6 +68,28 @@ void test_undo_group_optimizer()
kak_assert(lines[i] == buffer[LineCount((int)i)]);
}
+void test_word_db()
+{
+ Buffer buffer("test", Buffer::Flags::None,
+ { "tchou mutch\n",
+ "tchou kanaky tchou\n",
+ "\n",
+ "tchaa tchaa\n",
+ "allo\n"});
+ WordDB word_db(buffer);
+ auto res = word_db.find_prefix("");
+ std::sort(res.begin(), res.end());
+ kak_assert(res == std::vector<String>{ "allo" COMMA "kanaky" COMMA "mutch" COMMA "tchaa" COMMA "tchou" });
+ buffer.erase(buffer.iterator_at({1, 6}), buffer.iterator_at({4, 0}));
+ res = word_db.find_prefix("");
+ std::sort(res.begin(), res.end());
+ kak_assert(res == std::vector<String>{ "allo" COMMA "mutch" COMMA "tchou" });
+ buffer.insert(buffer.iterator_at({1, 0}), "re");
+ res = word_db.find_prefix("");
+ std::sort(res.begin(), res.end());
+ kak_assert(res == std::vector<String>{ "allo" COMMA "mutch" COMMA "retchou" COMMA "tchou" });
+}
+
void test_utf8()
{
String str = "maïs mélange bientôt";
@@ -121,4 +144,5 @@ void run_unit_tests()
test_keys();
test_buffer();
test_undo_group_optimizer();
+ test_word_db();
}
diff --git a/src/word_db.cc b/src/word_db.cc
new file mode 100644
index 00000000..c64ae01a
--- /dev/null
+++ b/src/word_db.cc
@@ -0,0 +1,114 @@
+#include "word_db.hh"
+
+#include "utils.hh"
+#include "utf8_iterator.hh"
+
+namespace Kakoune
+{
+
+WordDB::WordDB(const Buffer& buffer)
+ : BufferChangeListener_AutoRegister{const_cast<Buffer&>(buffer)}
+{
+ for (auto line = 0_line, end = buffer.line_count(); line < end; ++line)
+ add_words(line, buffer[line]);
+}
+
+void WordDB::add_words(LineCount line, const String& content)
+{
+ using Iterator = utf8::utf8_iterator<String::const_iterator,
+ utf8::InvalidBytePolicy::Pass>;
+ auto word_start = content.begin();
+ bool in_word = false;
+ for (Iterator it{word_start}, end{content.end()}; it != end; ++it)
+ {
+ Codepoint c = *it;
+ const bool word = is_word(c);
+ if (not in_word and word)
+ {
+ word_start = it.base();
+ in_word = true;
+ }
+ else if (in_word and not word)
+ {
+ String w{word_start, it.base()};
+ m_word_to_lines[w].push_back(line);
+ m_line_to_words[line].push_back(w);
+ in_word = false;
+ }
+ }
+}
+
+WordDB::LineToWords::iterator WordDB::remove_line(LineToWords::iterator it)
+{
+ if (it == m_line_to_words.end())
+ return it;
+
+ for (auto& word : it->second)
+ {
+ auto wtl_it = m_word_to_lines.find(word);
+ auto& lines = wtl_it->second;
+ lines.erase(find(lines, it->first));
+ if (lines.empty())
+ m_word_to_lines.erase(wtl_it);
+ }
+ return m_line_to_words.erase(it);
+}
+
+void WordDB::update_lines(LineToWords::iterator begin, LineToWords::iterator end,
+ LineCount num)
+{
+ std::vector<std::pair<LineCount, std::vector<String>>>
+ to_update{std::make_move_iterator(begin), std::make_move_iterator(end)};
+ m_line_to_words.erase(begin, end);
+
+ for (auto& elem : to_update)
+ {
+ for (auto& word : elem.second)
+ {
+ auto& lines = m_word_to_lines[word];
+ *find(lines, elem.first) += num;
+ }
+ elem.first += num;
+ }
+ m_line_to_words.insert(std::make_move_iterator(to_update.begin()),
+ std::make_move_iterator(to_update.end()));
+}
+
+void WordDB::on_insert(const Buffer& buffer, BufferCoord begin, BufferCoord end)
+{
+ auto num = end.line - begin.line;
+ if (num > 0)
+ update_lines(m_line_to_words.upper_bound(begin.line),
+ m_line_to_words.end(), num);
+
+ remove_line(m_line_to_words.find(begin.line));
+ for (auto line = begin.line; line <= end.line; ++line)
+ add_words(line, buffer[line]);
+}
+
+void WordDB::on_erase(const Buffer& buffer, BufferCoord begin, BufferCoord end)
+{
+ auto first = m_line_to_words.lower_bound(begin.line);
+ auto last = m_line_to_words.upper_bound(end.line);
+ while (first != last)
+ first = remove_line(first);
+
+ auto num = end.line - begin.line;
+ if (num > 0)
+ update_lines(last, m_line_to_words.end(), -num);
+
+ add_words(begin.line, buffer[begin.line]);
+}
+
+std::vector<String> WordDB::find_prefix(const String& prefix) const
+{
+ std::vector<String> res;
+ for (auto& word : m_word_to_lines)
+ {
+ if (prefix_match(word.first, prefix))
+ res.push_back(word.first);
+ }
+ return res;
+}
+
+}
diff --git a/src/word_db.hh b/src/word_db.hh
new file mode 100644
index 00000000..c7f8a83d
--- /dev/null
+++ b/src/word_db.hh
@@ -0,0 +1,40 @@
+#ifndef word_db_hh_INCLUDED
+#define word_db_hh_INCLUDED
+
+#include "buffer.hh"
+
+#include <set>
+
+namespace Kakoune
+{
+
+class String;
+
+// maintain a database of words available in a buffer
+class WordDB : public BufferChangeListener_AutoRegister
+{
+public:
+ WordDB(const Buffer& buffer);
+
+ void on_insert(const Buffer& buffer, BufferCoord begin, BufferCoord end) override;
+ void on_erase(const Buffer& buffer, BufferCoord begin, BufferCoord end) override;
+
+ std::vector<String> find_prefix(const String& prefix) const;
+
+private:
+ using WordToLines = std::unordered_map<String, std::vector<LineCount>>;
+ using LineToWords = std::map<LineCount, std::vector<String>>;
+
+ void add_words(LineCount line, const String& content);
+ LineToWords::iterator remove_line(LineToWords::iterator it);
+ void update_lines(LineToWords::iterator begin, LineToWords::iterator end,
+ LineCount num);
+
+ WordToLines m_word_to_lines;
+ LineToWords m_line_to_words;
+};
+
+}
+
+#endif // word_db_hh_INCLUDED
+