summaryrefslogtreecommitdiff
path: root/src/regex_impl.hh
diff options
context:
space:
mode:
authorMaxime Coste <mawww@kakoune.org>2025-07-07 10:15:11 +1000
committerMaxime Coste <mawww@kakoune.org>2025-07-07 12:15:19 +1000
commit3c92a6650d721fdc8fb98e83b30795e6d2984f20 (patch)
tree03163b991a085e7f65afd313b5f2c7339c40896e /src/regex_impl.hh
parentcb6cbb4e17b1080cc18a0195773ab763e7e11e64 (diff)
Add a CharRange regex op to optimize the common simple range case
Instead of jumping into the general CharClass code, detect simple [a-z] style ranges and use a specific op. Also detect when a range can be converted to ignore case
Diffstat (limited to 'src/regex_impl.hh')
-rw-r--r--src/regex_impl.hh24
1 files changed, 19 insertions, 5 deletions
diff --git a/src/regex_impl.hh b/src/regex_impl.hh
index 5faa7c94..2f5cbcfa 100644
--- a/src/regex_impl.hh
+++ b/src/regex_impl.hh
@@ -76,8 +76,9 @@ struct CompiledRegex : UseMemoryDomain<MemoryDomain::Regex>
Literal,
AnyChar,
AnyCharExceptNewLine,
- CharClass,
+ CharRange,
CharType,
+ CharClass,
Jump,
Split,
Save,
@@ -105,8 +106,15 @@ struct CompiledRegex : UseMemoryDomain<MemoryDomain::Regex>
uint32_t codepoint : 24;
bool ignore_case : 1;
} literal;
- int16_t character_class_index;
+ struct CharRange
+ {
+ uint8_t min;
+ uint8_t max;
+ bool ignore_case : 1;
+ bool negative;
+ } range;
CharacterType character_type;
+ int16_t character_class_index;
int16_t jump_offset;
int16_t save_index;
struct Split
@@ -399,15 +407,21 @@ private:
if (pos != config.end and cp != '\n')
return consumed();
return failed();
- case CompiledRegex::CharClass:
- if (pos != config.end and
- m_program.character_classes[inst.param.character_class_index].matches(cp))
+ case CompiledRegex::CharRange:
+ if (auto actual_cp = (inst.param.range.ignore_case ? to_lower(cp) : cp);
+ pos != config.end and
+ (actual_cp >= inst.param.range.min and actual_cp <= inst.param.range.max) != inst.param.range.negative)
return consumed();
return failed();
case CompiledRegex::CharType:
if (pos != config.end and is_ctype(inst.param.character_type, cp))
return consumed();
return failed();
+ case CompiledRegex::CharClass:
+ if (pos != config.end and
+ m_program.character_classes[inst.param.character_class_index].matches(cp))
+ return consumed();
+ return failed();
case CompiledRegex::Jump:
thread.inst = &inst + inst.param.jump_offset;
break;