summaryrefslogtreecommitdiff
path: root/src/regex_impl.hh
diff options
context:
space:
mode:
authorMaxime Coste <mawww@kakoune.org>2024-12-01 19:36:44 +1100
committerMaxime Coste <mawww@kakoune.org>2024-12-01 19:36:44 +1100
commitc52faded6fdba1297e1daf5894ccaa208b83ea7e (patch)
tree4bdc1eb6da9e031876153a139410618beacf9bdc /src/regex_impl.hh
parent8769b94eb25cd7402db6caa4f0d4b417f6365703 (diff)
Add specific start desc optimization for single possible start byte
Use tighter codegen for that pretty common use case.
Diffstat (limited to 'src/regex_impl.hh')
-rw-r--r--src/regex_impl.hh23
1 files changed, 22 insertions, 1 deletions
diff --git a/src/regex_impl.hh b/src/regex_impl.hh
index 56b1d867..ae46ae38 100644
--- a/src/regex_impl.hh
+++ b/src/regex_impl.hh
@@ -156,6 +156,7 @@ struct CompiledRegex : UseMemoryDomain<MemoryDomain::Regex>
{
static constexpr Codepoint count = 256;
using OffsetLimits = std::numeric_limits<uint8_t>;
+ char start_byte = 0;
uint8_t offset = 0;
bool map[count];
};
@@ -270,7 +271,7 @@ public:
};
Iterator start = forward ? begin : end;
- if (const auto& start_desc = forward ? m_program.forward_start_desc : m_program.backward_start_desc)
+ if (const auto* start_desc = (forward ? m_program.forward_start_desc : m_program.backward_start_desc).get())
{
if (search)
{
@@ -542,6 +543,26 @@ private:
static Iterator find_next_start(Iterator start, const ExecConfig& config, const StartDesc& start_desc)
{
auto pos = start;
+ if (char start_byte = start_desc.start_byte)
+ {
+ while (pos != config.end)
+ {
+ if constexpr (forward)
+ {
+ if (*pos == start_byte)
+ return utf8::advance(pos, start, -CharCount(start_desc.offset));
+ ++pos;
+ }
+ else
+ {
+ auto prev = utf8::previous(pos, config.end);
+ if (*prev == start_byte)
+ return utf8::advance(pos, start, CharCount(start_desc.offset));
+ pos = prev;
+ }
+ }
+ }
+
while (pos != config.end)
{
static_assert(StartDesc::count <= 256, "start desc should be ascii only");