diff options
| author | Maxime Coste <mawww@kakoune.org> | 2024-12-01 19:36:44 +1100 |
|---|---|---|
| committer | Maxime Coste <mawww@kakoune.org> | 2024-12-01 19:36:44 +1100 |
| commit | c52faded6fdba1297e1daf5894ccaa208b83ea7e (patch) | |
| tree | 4bdc1eb6da9e031876153a139410618beacf9bdc /src | |
| parent | 8769b94eb25cd7402db6caa4f0d4b417f6365703 (diff) | |
Add specific start desc optimization for single possible start byte
Use tighter codegen for that pretty common use case.
Diffstat (limited to 'src')
| -rw-r--r-- | src/regex_impl.cc | 3 | ||||
| -rw-r--r-- | src/regex_impl.hh | 23 |
2 files changed, 25 insertions, 1 deletions
diff --git a/src/regex_impl.cc b/src/regex_impl.cc index d86637cd..0789ef6d 100644 --- a/src/regex_impl.cc +++ b/src/regex_impl.cc @@ -1031,6 +1031,9 @@ private: not contains(start_desc.map, false)) return nullptr; + if (std::count(std::begin(start_desc.map), std::end(start_desc.map), true) == 1) + start_desc.start_byte = find(start_desc.map, true) - std::begin(start_desc.map); + return std::make_unique<CompiledRegex::StartDesc>(start_desc); } diff --git a/src/regex_impl.hh b/src/regex_impl.hh index 56b1d867..ae46ae38 100644 --- a/src/regex_impl.hh +++ b/src/regex_impl.hh @@ -156,6 +156,7 @@ struct CompiledRegex : UseMemoryDomain<MemoryDomain::Regex> { static constexpr Codepoint count = 256; using OffsetLimits = std::numeric_limits<uint8_t>; + char start_byte = 0; uint8_t offset = 0; bool map[count]; }; @@ -270,7 +271,7 @@ public: }; Iterator start = forward ? begin : end; - if (const auto& start_desc = forward ? m_program.forward_start_desc : m_program.backward_start_desc) + if (const auto* start_desc = (forward ? m_program.forward_start_desc : m_program.backward_start_desc).get()) { if (search) { @@ -542,6 +543,26 @@ private: static Iterator find_next_start(Iterator start, const ExecConfig& config, const StartDesc& start_desc) { auto pos = start; + if (char start_byte = start_desc.start_byte) + { + while (pos != config.end) + { + if constexpr (forward) + { + if (*pos == start_byte) + return utf8::advance(pos, start, -CharCount(start_desc.offset)); + ++pos; + } + else + { + auto prev = utf8::previous(pos, config.end); + if (*prev == start_byte) + return utf8::advance(pos, start, CharCount(start_desc.offset)); + pos = prev; + } + } + } + while (pos != config.end) { static_assert(StartDesc::count <= 256, "start desc should be ascii only"); |
