diff options
| author | Maxime Coste <mawww@kakoune.org> | 2024-03-18 22:25:21 +1100 |
|---|---|---|
| committer | Maxime Coste <mawww@kakoune.org> | 2024-03-21 19:18:20 +1100 |
| commit | ca7471c25d3e0f8dbe48bfec7f6c9af1cb6b34ae (patch) | |
| tree | 8c649e7778992da0a134d695357aadb009f6f331 /src/regex_impl.cc | |
| parent | ee364d911f8218d4bff937a1f4b75ecbe122d1f4 (diff) | |
Compute StartDesc with an offset to effective start
This means `.{2,4}foo` will now consider 4 or less before f as
a start candidate instead of every characters
Diffstat (limited to 'src/regex_impl.cc')
| -rw-r--r-- | src/regex_impl.cc | 27 |
1 files changed, 24 insertions, 3 deletions
diff --git a/src/regex_impl.cc b/src/regex_impl.cc index b1630a1f..266f920e 100644 --- a/src/regex_impl.cc +++ b/src/regex_impl.cc @@ -886,8 +886,8 @@ private: } // Mutate start_desc with informations on which Codepoint could start a match. - // Returns true if the node possibly does not consume the char, in which case - // the next node would still be relevant for the parent node start chars computation. + // Returns true if the subsequent nodes are still relevant for computing the + // start desc template<RegexMode direction> bool compute_start_desc(ParsedRegex::NodeIndex index, CompiledRegex::StartDesc& start_desc) const @@ -916,10 +916,20 @@ private: add_multi_byte_utf8(); return node.quantifier.allows_none(); case ParsedRegex::AnyChar: + if (start_desc.offset + node.quantifier.max <= CompiledRegex::StartDesc::OffsetLimits::max()) + { + start_desc.offset += node.quantifier.max; + return true; + } for (auto& b : start_desc.map) b = true; return node.quantifier.allows_none(); case ParsedRegex::AnyCharExceptNewLine: + if (start_desc.offset + node.quantifier.max <= CompiledRegex::StartDesc::OffsetLimits::max()) + { + start_desc.offset += node.quantifier.max; + return true; + } for (Codepoint cp = 0; cp < single_byte_limit; ++cp) { if (cp != '\n') @@ -1138,7 +1148,7 @@ String dump_regex(const CompiledRegex& program) res += (char)c; } } - res += "]\n"; + res += format("]+{}\n", static_cast<int>(desc.offset)); }; if (program.forward_start_desc) dump_start_desc(*program.forward_start_desc, "forward"); @@ -1557,6 +1567,17 @@ auto test_regex = UnitTest{[]{ } { + TestVM<RegexMode::Forward | RegexMode::Search> vm{"(.{3,4}|f)oo"}; + kak_assert(vm.forward_start_desc and vm.forward_start_desc->offset == 4); + for (int c = 0; c < CompiledRegex::StartDesc::count; ++c) + kak_assert(vm.forward_start_desc->map[c] == (c == 'f' or c == 'o')); + + kak_assert(vm.exec("xxxoo", RegexExecFlags::None)); + kak_assert(vm.exec("xfoo", RegexExecFlags::None)); + kak_assert(not vm.exec("😄xoo", RegexExecFlags::None)); + } + + { auto eq = [](const CompiledRegex::NamedCapture& lhs, const CompiledRegex::NamedCapture& rhs) { return lhs.name == rhs.name and |
