summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMaxime Coste <mawww@kakoune.org>2024-12-01 19:36:44 +1100
committerMaxime Coste <mawww@kakoune.org>2024-12-01 19:36:44 +1100
commitc52faded6fdba1297e1daf5894ccaa208b83ea7e (patch)
tree4bdc1eb6da9e031876153a139410618beacf9bdc /src
parent8769b94eb25cd7402db6caa4f0d4b417f6365703 (diff)
Add specific start desc optimization for single possible start byte
Use tighter codegen for that pretty common use case.
Diffstat (limited to 'src')
-rw-r--r--src/regex_impl.cc3
-rw-r--r--src/regex_impl.hh23
2 files changed, 25 insertions, 1 deletions
diff --git a/src/regex_impl.cc b/src/regex_impl.cc
index d86637cd..0789ef6d 100644
--- a/src/regex_impl.cc
+++ b/src/regex_impl.cc
@@ -1031,6 +1031,9 @@ private:
not contains(start_desc.map, false))
return nullptr;
+ if (std::count(std::begin(start_desc.map), std::end(start_desc.map), true) == 1)
+ start_desc.start_byte = find(start_desc.map, true) - std::begin(start_desc.map);
+
return std::make_unique<CompiledRegex::StartDesc>(start_desc);
}
diff --git a/src/regex_impl.hh b/src/regex_impl.hh
index 56b1d867..ae46ae38 100644
--- a/src/regex_impl.hh
+++ b/src/regex_impl.hh
@@ -156,6 +156,7 @@ struct CompiledRegex : UseMemoryDomain<MemoryDomain::Regex>
{
static constexpr Codepoint count = 256;
using OffsetLimits = std::numeric_limits<uint8_t>;
+ char start_byte = 0;
uint8_t offset = 0;
bool map[count];
};
@@ -270,7 +271,7 @@ public:
};
Iterator start = forward ? begin : end;
- if (const auto& start_desc = forward ? m_program.forward_start_desc : m_program.backward_start_desc)
+ if (const auto* start_desc = (forward ? m_program.forward_start_desc : m_program.backward_start_desc).get())
{
if (search)
{
@@ -542,6 +543,26 @@ private:
static Iterator find_next_start(Iterator start, const ExecConfig& config, const StartDesc& start_desc)
{
auto pos = start;
+ if (char start_byte = start_desc.start_byte)
+ {
+ while (pos != config.end)
+ {
+ if constexpr (forward)
+ {
+ if (*pos == start_byte)
+ return utf8::advance(pos, start, -CharCount(start_desc.offset));
+ ++pos;
+ }
+ else
+ {
+ auto prev = utf8::previous(pos, config.end);
+ if (*prev == start_byte)
+ return utf8::advance(pos, start, CharCount(start_desc.offset));
+ pos = prev;
+ }
+ }
+ }
+
while (pos != config.end)
{
static_assert(StartDesc::count <= 256, "start desc should be ascii only");