summaryrefslogtreecommitdiff
path: root/src/regex_impl.cc
diff options
context:
space:
mode:
authorMaxime Coste <mawww@kakoune.org>2024-06-15 14:03:10 +1000
committerMaxime Coste <mawww@kakoune.org>2024-06-15 14:36:33 +1000
commitc4684d0d849798bf7543131ce51158cd90cce82e (patch)
tree60e4244fc3db1e4afb1bc332675c694b5a5d613e /src/regex_impl.cc
parentc84942c2ac61091808af0a80303f3d5b1b84443a (diff)
Store instruction pointers directly in ThreadedRegexVM::Thread
The previous tradeoff of having a very small Thread struct is not necessary anymore as we do not memcpy Threads on swap_next since d708b77186c1685dcbd2298246ada7d204acec2f. This requires offsets to be used instead of indices for jump/split ops.
Diffstat (limited to 'src/regex_impl.cc')
-rw-r--r--src/regex_impl.cc47
1 files changed, 29 insertions, 18 deletions
diff --git a/src/regex_impl.cc b/src/regex_impl.cc
index 3388cde5..34cd104a 100644
--- a/src/regex_impl.cc
+++ b/src/regex_impl.cc
@@ -706,7 +706,7 @@ private:
{
auto& node = get_node(index);
- const uint32_t start_pos = (uint32_t)m_program.instructions.size();
+ const OpIndex start_pos = op_count();
const bool ignore_case = node.ignore_case;
const bool save = (node.op == ParsedRegex::Alternation or node.op == ParsedRegex::Sequence) and
@@ -746,14 +746,14 @@ private:
if (child != index+1)
push_inst(CompiledRegex::Split);
}
- auto split_pos = m_program.instructions.size();
+ auto split_pos = op_count();
const auto end = node.children_end;
for (auto child : Children<>{m_parsed_regex, index})
{
auto node = compile_node<direction>(child);
if (child != index+1)
- m_program.instructions[--split_pos].param.split = CompiledRegex::Param::Split{.target = node, .prioritize_parent = true};
+ m_program.instructions[--split_pos].param.split = CompiledRegex::Param::Split{.offset = offset(node, split_pos), .prioritize_parent = true};
if (get_node(child).children_end != end)
{
auto jump = push_inst(CompiledRegex::Jump);
@@ -801,8 +801,8 @@ private:
break;
}
- for (auto& offset : goto_inner_end_offsets)
- m_program.instructions[offset].param.jump_target = m_program.instructions.size();
+ for (auto& index : goto_inner_end_offsets)
+ m_program.instructions[index].param.jump_offset = offset(op_count(), index);
if (save)
push_inst(CompiledRegex::Save, {.save_index=int16_t(node.value * 2 + (forward ? 1 : 0))});
@@ -810,19 +810,29 @@ private:
return start_pos;
}
+ OpIndex op_count() const
+ {
+ return static_cast<OpIndex>(m_program.instructions.size());
+ }
+
+ static OpIndex offset(OpIndex to, OpIndex from)
+ {
+ return static_cast<OpIndex>(to - from);
+ }
+
template<RegexMode direction>
OpIndex compile_node(ParsedRegex::NodeIndex index)
{
auto& node = get_node(index);
- const OpIndex start_pos = (OpIndex)m_program.instructions.size();
+ const OpIndex start_pos = op_count();
Vector<OpIndex> goto_ends;
auto& quantifier = node.quantifier;
if (quantifier.allows_none())
{
- auto split_pos = push_inst(CompiledRegex::Split, {.split={.target=0, .prioritize_parent=quantifier.greedy}});
+ auto split_pos = push_inst(CompiledRegex::Split, {.split={.offset=0, .prioritize_parent=quantifier.greedy}});
goto_ends.push_back(split_pos);
}
@@ -832,18 +842,18 @@ private:
inner_pos = compile_node_inner<direction>(index);
if (quantifier.allows_infinite_repeat())
- push_inst(CompiledRegex::Split, {.split = {.target=inner_pos, .prioritize_parent=not quantifier.greedy}});
+ push_inst(CompiledRegex::Split, {.split = {.offset=offset(inner_pos, op_count()), .prioritize_parent=not quantifier.greedy}});
// Write the node as an optional match for the min -> max counts
else for (int i = std::max((int16_t)1, quantifier.min); // STILL UGLY !
i < quantifier.max; ++i)
{
- auto split_pos = push_inst(CompiledRegex::Split, {.split={.target=0, .prioritize_parent=quantifier.greedy}});
+ auto split_pos = push_inst(CompiledRegex::Split, {.split={.offset=0, .prioritize_parent=quantifier.greedy}});
goto_ends.push_back(split_pos);
compile_node_inner<direction>(index);
}
- for (auto offset : goto_ends)
- m_program.instructions[offset].param.split.target = m_program.instructions.size();
+ for (auto index : goto_ends)
+ m_program.instructions[index].param.split.offset = offset(op_count(), index);
return start_pos;
}
@@ -851,7 +861,7 @@ private:
OpIndex push_inst(CompiledRegex::Op op, CompiledRegex::Param param = {})
{
constexpr auto max_instructions = std::numeric_limits<OpIndex>::max();
- const auto res = m_program.instructions.size();
+ const auto res = op_count();
if (res >= max_instructions)
throw regex_error(format("regex compiled to more than {} instructions", max_instructions));
m_program.instructions.push_back({ op, 0, param });
@@ -1031,7 +1041,7 @@ private:
{
auto& inst = m_program.instructions[i];
if (is_jump(inst.op))
- m_program.instructions[inst.param.jump_target].last_step = 0xffff; // tag as jump target
+ m_program.instructions[i + inst.param.jump_offset].last_step = 0xffff; // tag as jump target
}
for (auto block_begin = begin; block_begin < end; )
@@ -1071,11 +1081,11 @@ private:
String dump_regex(const CompiledRegex& program)
{
String res;
- int count = 0;
+ int index = 0;
for (auto& inst : program.instructions)
{
char buf[20];
- format_to(buf, " {:03} ", count++);
+ format_to(buf, " {:03} ", index);
res += buf;
switch (inst.op)
{
@@ -1095,13 +1105,13 @@ String dump_regex(const CompiledRegex& program)
res += format("character type {}\n", to_underlying(inst.param.character_type));
break;
case CompiledRegex::Jump:
- res += format("jump {}\n", inst.param.jump_target);
+ res += format("jump {} ({:03})\n", inst.param.jump_offset, index + inst.param.jump_offset);
break;
case CompiledRegex::Split:
{
- res += format("split (prioritize {}) {}\n",
+ res += format("split (prioritize {}) {} ({:03})\n",
(inst.param.split.prioritize_parent) ? "parent" : "child",
- inst.param.split.target);
+ inst.param.split.offset, index + inst.param.split.offset);
break;
}
case CompiledRegex::Save:
@@ -1135,6 +1145,7 @@ String dump_regex(const CompiledRegex& program)
case CompiledRegex::Match:
res += "match\n";
}
+ ++index;
}
auto dump_start_desc = [&](const CompiledRegex::StartDesc& desc, StringView name) {
res += name + " start desc: [";