summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMaxime Coste <mawww@kakoune.org>2017-10-04 12:16:52 +0800
committerMaxime Coste <mawww@kakoune.org>2017-11-01 14:05:14 +0800
commitdbb175841b65ff4c1d8f7bad92fab9841c04e9b0 (patch)
tree40bee454765008f1421673ca8ea24b6e1432a971 /src
parentcf5055f68b8ac049c8ec42e37ee71f9d7841fcd2 (diff)
Regex: do not write the search prefix inside the program bytecode
Its faster to have specialized code in the VM directly
Diffstat (limited to 'src')
-rw-r--r--src/regex_impl.cc12
-rw-r--r--src/regex_impl.hh76
2 files changed, 44 insertions, 44 deletions
diff --git a/src/regex_impl.cc b/src/regex_impl.cc
index c8c9d6b2..c7950bd8 100644
--- a/src/regex_impl.cc
+++ b/src/regex_impl.cc
@@ -502,7 +502,6 @@ struct RegexCompiler
RegexCompiler(const ParsedRegex& parsed_regex)
: m_parsed_regex{parsed_regex}
{
- write_search_prefix();
compile_node(m_parsed_regex.ast);
push_op(CompiledRegex::Match);
m_program.matchers = m_parsed_regex.matchers;
@@ -659,17 +658,6 @@ private:
return pos;
}
- // Add a '.*' as the first instructions for the search use case
- void write_search_prefix()
- {
- kak_assert(m_program.bytecode.empty());
- push_op(CompiledRegex::Split_PrioritizeChild);
- get_offset(alloc_offset()) = CompiledRegex::search_prefix_size;
- push_op(CompiledRegex::AnyChar);
- push_op(CompiledRegex::Split_PrioritizeParent);
- get_offset(alloc_offset()) = 1 + sizeof(Offset);
- }
-
Offset alloc_offset()
{
auto pos = m_program.bytecode.size();
diff --git a/src/regex_impl.hh b/src/regex_impl.hh
index 4d073473..0176b223 100644
--- a/src/regex_impl.hh
+++ b/src/regex_impl.hh
@@ -36,8 +36,6 @@ struct CompiledRegex
};
using Offset = unsigned;
- static constexpr Offset search_prefix_size = 3 + 2 * sizeof(Offset);
-
explicit operator bool() const { return not bytecode.empty(); }
Vector<char> bytecode;
@@ -103,18 +101,18 @@ struct ThreadedRegexVM
}
};
- Saves* clone_saves(Saves* saves)
+ Saves* clone_saves(Iterator* pos)
{
if (not m_free_saves.empty())
{
Saves* res = m_free_saves.back();
m_free_saves.pop_back();
res->refcount = 1;
- std::copy(saves->pos, saves->pos + m_program.save_count, res->pos);
+ std::copy(pos, pos + m_program.save_count, res->pos);
return res;
}
- m_saves.push_back(Saves::allocate(m_program.save_count, saves->pos));
+ m_saves.push_back(Saves::allocate(m_program.save_count, pos));
return m_saves.back();
}
@@ -130,6 +128,8 @@ struct ThreadedRegexVM
Saves* saves;
};
+ using Utf8It = utf8::iterator<Iterator>;
+
enum class StepResult { Consumed, Matched, Failed };
StepResult step(Thread& thread, Vector<Thread>& threads)
{
@@ -181,7 +181,7 @@ struct ThreadedRegexVM
if (thread.saves->refcount > 1)
{
--thread.saves->refcount;
- thread.saves = clone_saves(thread.saves);
+ thread.saves = clone_saves(thread.saves->pos);
}
const size_t index = *thread.inst++;
thread.saves->pos[index] = m_pos.base();
@@ -250,30 +250,13 @@ struct ThreadedRegexVM
return StepResult::Failed;
}
- bool exec(Iterator begin, Iterator end, RegexExecFlags flags)
+ bool exec_from(Utf8It start, Saves* initial_saves, Vector<Thread>& current_threads, Vector<Thread>& next_threads)
{
- m_begin = begin;
- m_end = end;
- m_flags = flags;
+ current_threads.push_back({m_program.bytecode.data(), initial_saves});
+ next_threads.clear();
bool found_match = false;
-
- if (flags & RegexExecFlags::NotInitialNull and m_begin == m_end)
- return false;
-
- Saves* initial_saves = nullptr;
- if (not (m_flags & RegexExecFlags::NoSaves))
- {
- m_saves.push_back(Saves::allocate(m_program.save_count));
- initial_saves = m_saves.back();
- }
-
- const bool search = (flags & RegexExecFlags::Search);
-
- const auto start_offset = search ? 0 : CompiledRegex::search_prefix_size;
- Vector<Thread> current_threads{Thread{m_program.bytecode.data() + start_offset, initial_saves}};
- Vector<Thread> next_threads;
- for (m_pos = Utf8It{m_begin, m_begin, m_end}; m_pos != m_end; ++m_pos)
+ for (m_pos = start; m_pos != m_end; ++m_pos)
{
while (not current_threads.empty())
{
@@ -282,15 +265,15 @@ struct ThreadedRegexVM
switch (step(thread, current_threads))
{
case StepResult::Matched:
- if (not search or // We are not at end, this is not a full match
- (flags & RegexExecFlags::NotInitialNull and m_pos == m_begin))
+ if (not (m_flags & RegexExecFlags::Search) or // We are not at end, this is not a full match
+ (m_flags & RegexExecFlags::NotInitialNull and m_pos == m_begin))
{
release_saves(thread.saves);
continue;
}
m_captures = thread.saves;
- if (flags & RegexExecFlags::AnyMatch)
+ if (m_flags & RegexExecFlags::AnyMatch)
return true;
found_match = true;
@@ -330,6 +313,37 @@ struct ThreadedRegexVM
return false;
}
+ bool exec(Iterator begin, Iterator end, RegexExecFlags flags)
+ {
+ m_begin = begin;
+ m_end = end;
+ m_flags = flags;
+
+ if (flags & RegexExecFlags::NotInitialNull and m_begin == m_end)
+ return false;
+
+ Vector<Thread> current_threads, next_threads;
+
+ const bool no_saves = (m_flags & RegexExecFlags::NoSaves);
+ Vector<Iterator> empty_saves(m_program.save_count, Iterator{});
+
+ Utf8It start{m_begin, m_begin, m_end};
+ if (exec_from(start, no_saves ? nullptr : clone_saves(empty_saves.data()),
+ current_threads, next_threads))
+ return true;
+
+ if (not (flags & RegexExecFlags::Search))
+ return false;
+
+ while (start != end)
+ {
+ if (exec_from(++start, no_saves ? nullptr : clone_saves(empty_saves.data()),
+ current_threads, next_threads))
+ return true;
+ }
+ return false;
+ }
+
bool is_line_start() const
{
return (m_pos == m_begin and not (m_flags & RegexExecFlags::NotBeginOfLine)) or
@@ -351,8 +365,6 @@ struct ThreadedRegexVM
const CompiledRegex& m_program;
- using Utf8It = utf8::iterator<Iterator>;
-
Iterator m_begin;
Iterator m_end;
Utf8It m_pos;