summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMaxime Coste <mawww@kakoune.org>2019-01-05 18:28:56 +1100
committerMaxime Coste <mawww@kakoune.org>2019-01-20 22:59:28 +1100
commit0364a998273cdf91904a6cd6d1cf921e44b41c84 (patch)
tree2377737484b5fb9b3403442f810c50225ebb03a0 /src
parentfd043435e5274a138b66c9f35455c896aaeaa96e (diff)
Refactor regex find next start not to be an instruction anymore
The same logic can be hard coded, avoiding one thread and 3 instructions, improving the regex matching speed.
Diffstat (limited to 'src')
-rw-r--r--src/regex_impl.cc17
-rw-r--r--src/regex_impl.hh34
2 files changed, 15 insertions, 36 deletions
diff --git a/src/regex_impl.cc b/src/regex_impl.cc
index 6e5bee51..28d201f5 100644
--- a/src/regex_impl.cc
+++ b/src/regex_impl.cc
@@ -676,14 +676,13 @@ struct RegexCompiler
{
kak_assert(not (flags & RegexCompileFlags::NoForward) or flags & RegexCompileFlags::Backward);
// Approximation of the number of instructions generated
- m_program.instructions.reserve((CompiledRegex::search_prefix_size + parsed_regex.nodes.size() + 1)
+ m_program.instructions.reserve((parsed_regex.nodes.size() + 1)
* (((flags & RegexCompileFlags::Backward) and
not (flags & RegexCompileFlags::NoForward)) ? 2 : 1));
if (not (flags & RegexCompileFlags::NoForward))
{
m_program.forward_start_desc = compute_start_desc<RegexMode::Forward>();
- write_search_prefix();
compile_node<RegexMode::Forward>(0);
push_inst(CompiledRegex::Match);
}
@@ -692,7 +691,6 @@ struct RegexCompiler
{
m_program.first_backward_inst = m_program.instructions.size();
m_program.backward_start_desc = compute_start_desc<RegexMode::Backward>();
- write_search_prefix();
compile_node<RegexMode::Backward>(0);
push_inst(CompiledRegex::Match);
}
@@ -866,16 +864,6 @@ private:
return start_pos;
}
- // Add a sequence of instructions that enable searching for a match instead of checking for it
- void write_search_prefix()
- {
- const uint32_t first_inst = m_program.instructions.size();
- push_inst(CompiledRegex::Split_PrioritizeChild, first_inst + CompiledRegex::search_prefix_size);
- push_inst(CompiledRegex::FindNextStart);
- push_inst(CompiledRegex::Split_PrioritizeParent, first_inst + 1);
- kak_assert(m_program.instructions.size() == first_inst + CompiledRegex::search_prefix_size);
- }
-
uint32_t push_inst(CompiledRegex::Op op, uint32_t param = 0)
{
constexpr auto max_instructions = std::numeric_limits<int16_t>::max();
@@ -1137,9 +1125,6 @@ String dump_regex(const CompiledRegex& program)
res += format("{} ({})\n", name, str);
break;
}
- case CompiledRegex::FindNextStart:
- res += "find next start\n";
- break;
case CompiledRegex::Match:
res += "match\n";
}
diff --git a/src/regex_impl.hh b/src/regex_impl.hh
index 99ec2585..9ab89c71 100644
--- a/src/regex_impl.hh
+++ b/src/regex_impl.hh
@@ -49,7 +49,6 @@ struct CompiledRegex : RefCountable, UseMemoryDomain<MemoryDomain::Regex>
enum Op : char
{
Match,
- FindNextStart,
Literal,
Literal_IgnoreCase,
AnyChar,
@@ -97,8 +96,6 @@ struct CompiledRegex : RefCountable, UseMemoryDomain<MemoryDomain::Regex>
};
static_assert(sizeof(Instruction) == 8, "");
- static constexpr uint16_t search_prefix_size = 3;
-
explicit operator bool() const { return not instructions.empty(); }
struct NamedCapture
@@ -217,8 +214,6 @@ public:
instructions = instructions.subrange(0, m_program.first_backward_inst);
else
instructions = instructions.subrange(m_program.first_backward_inst);
- if (not search)
- instructions = instructions.subrange(CompiledRegex::search_prefix_size);
const ExecConfig config{
Sentinel{forward ? begin : end},
@@ -452,14 +447,6 @@ private:
(inst.op == CompiledRegex::LookBehind_IgnoreCase))
return failed();
break;
- case CompiledRegex::FindNextStart:
- // search thread should by construction be the lowest priority thread
- kak_assert(m_threads.current_is_empty());
- if (not m_threads.next_is_empty())
- return consumed();
- m_threads.push_next(thread);
- m_find_next_start = true;
- return;
case CompiledRegex::Match:
if ((pos != config.end and not (mode & RegexMode::Search)) or
(config.flags & RegexExecFlags::NotInitialNull and pos == config.begin))
@@ -484,10 +471,12 @@ private:
release_saves(m_captures);
m_captures = -1;
m_threads.grow_ifn();
- m_threads.push_current({static_cast<int16_t>(&config.instructions[0] - &m_program.instructions[0]), -1});
+ const int16_t first_inst = &config.instructions[0] - &m_program.instructions[0];
+ m_threads.push_current({first_inst, -1});
const auto& start_desc = forward ? m_program.forward_start_desc : m_program.backward_start_desc;
+ constexpr bool search = mode & RegexMode::Search;
constexpr bool any_match = mode & RegexMode::AnyMatch;
uint16_t current_step = -1;
m_found_match = false;
@@ -501,14 +490,15 @@ private:
current_step = 1; // step 0 is never valid
}
- m_find_next_start = false;
while (not m_threads.current_is_empty())
step_thread(pos, current_step, m_threads.pop_current(), config);
for (auto& thread : m_threads.next_threads())
m_program.instructions[thread.inst].scheduled = false;
- if (pos == config.end or m_threads.next_is_empty() or (m_found_match and any_match))
+ if (pos == config.end or
+ (m_threads.next_is_empty() and (not search or m_found_match)) or
+ (m_found_match and any_match))
{
for (auto& t : m_threads.next_threads())
release_saves(t.saves);
@@ -516,12 +506,17 @@ private:
return m_found_match;
}
- m_threads.swap_next();
forward ? utf8::to_next(pos, config.subject_end)
: utf8::to_previous(pos, config.subject_begin);
- if (m_find_next_start and start_desc)
- to_next_start(pos, config, *start_desc);
+ if (search)
+ {
+ if (start_desc and m_threads.next_is_empty())
+ to_next_start(pos, config, *start_desc);
+ m_threads.grow_ifn();
+ m_threads.push_next({first_inst, -1});
+ }
+ m_threads.swap_next();
}
}
@@ -691,7 +686,6 @@ private:
int16_t m_first_free = -1;
int16_t m_captures = -1;
bool m_found_match = false;
- bool m_find_next_start = false;
};
}