summaryrefslogtreecommitdiff
path: root/src/regex_impl.cc
diff options
context:
space:
mode:
authorMaxime Coste <mawww@kakoune.org>2024-03-18 22:25:21 +1100
committerMaxime Coste <mawww@kakoune.org>2024-03-21 19:18:20 +1100
commitca7471c25d3e0f8dbe48bfec7f6c9af1cb6b34ae (patch)
tree8c649e7778992da0a134d695357aadb009f6f331 /src/regex_impl.cc
parentee364d911f8218d4bff937a1f4b75ecbe122d1f4 (diff)
Compute StartDesc with an offset to effective start
This means `.{2,4}foo` will now consider 4 or less before f as a start candidate instead of every characters
Diffstat (limited to 'src/regex_impl.cc')
-rw-r--r--src/regex_impl.cc27
1 files changed, 24 insertions, 3 deletions
diff --git a/src/regex_impl.cc b/src/regex_impl.cc
index b1630a1f..266f920e 100644
--- a/src/regex_impl.cc
+++ b/src/regex_impl.cc
@@ -886,8 +886,8 @@ private:
}
// Mutate start_desc with informations on which Codepoint could start a match.
- // Returns true if the node possibly does not consume the char, in which case
- // the next node would still be relevant for the parent node start chars computation.
+ // Returns true if the subsequent nodes are still relevant for computing the
+ // start desc
template<RegexMode direction>
bool compute_start_desc(ParsedRegex::NodeIndex index,
CompiledRegex::StartDesc& start_desc) const
@@ -916,10 +916,20 @@ private:
add_multi_byte_utf8();
return node.quantifier.allows_none();
case ParsedRegex::AnyChar:
+ if (start_desc.offset + node.quantifier.max <= CompiledRegex::StartDesc::OffsetLimits::max())
+ {
+ start_desc.offset += node.quantifier.max;
+ return true;
+ }
for (auto& b : start_desc.map)
b = true;
return node.quantifier.allows_none();
case ParsedRegex::AnyCharExceptNewLine:
+ if (start_desc.offset + node.quantifier.max <= CompiledRegex::StartDesc::OffsetLimits::max())
+ {
+ start_desc.offset += node.quantifier.max;
+ return true;
+ }
for (Codepoint cp = 0; cp < single_byte_limit; ++cp)
{
if (cp != '\n')
@@ -1138,7 +1148,7 @@ String dump_regex(const CompiledRegex& program)
res += (char)c;
}
}
- res += "]\n";
+ res += format("]+{}\n", static_cast<int>(desc.offset));
};
if (program.forward_start_desc)
dump_start_desc(*program.forward_start_desc, "forward");
@@ -1557,6 +1567,17 @@ auto test_regex = UnitTest{[]{
}
{
+ TestVM<RegexMode::Forward | RegexMode::Search> vm{"(.{3,4}|f)oo"};
+ kak_assert(vm.forward_start_desc and vm.forward_start_desc->offset == 4);
+ for (int c = 0; c < CompiledRegex::StartDesc::count; ++c)
+ kak_assert(vm.forward_start_desc->map[c] == (c == 'f' or c == 'o'));
+
+ kak_assert(vm.exec("xxxoo", RegexExecFlags::None));
+ kak_assert(vm.exec("xfoo", RegexExecFlags::None));
+ kak_assert(not vm.exec("😄xoo", RegexExecFlags::None));
+ }
+
+ {
auto eq = [](const CompiledRegex::NamedCapture& lhs,
const CompiledRegex::NamedCapture& rhs) {
return lhs.name == rhs.name and