diff options
| author | Maxime Coste <mawww@kakoune.org> | 2025-07-07 10:15:11 +1000 |
|---|---|---|
| committer | Maxime Coste <mawww@kakoune.org> | 2025-07-07 12:15:19 +1000 |
| commit | 3c92a6650d721fdc8fb98e83b30795e6d2984f20 (patch) | |
| tree | 03163b991a085e7f65afd313b5f2c7339c40896e /src/regex_impl.hh | |
| parent | cb6cbb4e17b1080cc18a0195773ab763e7e11e64 (diff) | |
Add a CharRange regex op to optimize the common simple range case
Instead of jumping into the general CharClass code, detect simple
[a-z] style ranges and use a specific op.
Also detect when a range can be converted to ignore case
Diffstat (limited to 'src/regex_impl.hh')
| -rw-r--r-- | src/regex_impl.hh | 24 |
1 files changed, 19 insertions, 5 deletions
diff --git a/src/regex_impl.hh b/src/regex_impl.hh index 5faa7c94..2f5cbcfa 100644 --- a/src/regex_impl.hh +++ b/src/regex_impl.hh @@ -76,8 +76,9 @@ struct CompiledRegex : UseMemoryDomain<MemoryDomain::Regex> Literal, AnyChar, AnyCharExceptNewLine, - CharClass, + CharRange, CharType, + CharClass, Jump, Split, Save, @@ -105,8 +106,15 @@ struct CompiledRegex : UseMemoryDomain<MemoryDomain::Regex> uint32_t codepoint : 24; bool ignore_case : 1; } literal; - int16_t character_class_index; + struct CharRange + { + uint8_t min; + uint8_t max; + bool ignore_case : 1; + bool negative; + } range; CharacterType character_type; + int16_t character_class_index; int16_t jump_offset; int16_t save_index; struct Split @@ -399,15 +407,21 @@ private: if (pos != config.end and cp != '\n') return consumed(); return failed(); - case CompiledRegex::CharClass: - if (pos != config.end and - m_program.character_classes[inst.param.character_class_index].matches(cp)) + case CompiledRegex::CharRange: + if (auto actual_cp = (inst.param.range.ignore_case ? to_lower(cp) : cp); + pos != config.end and + (actual_cp >= inst.param.range.min and actual_cp <= inst.param.range.max) != inst.param.range.negative) return consumed(); return failed(); case CompiledRegex::CharType: if (pos != config.end and is_ctype(inst.param.character_type, cp)) return consumed(); return failed(); + case CompiledRegex::CharClass: + if (pos != config.end and + m_program.character_classes[inst.param.character_class_index].matches(cp)) + return consumed(); + return failed(); case CompiledRegex::Jump: thread.inst = &inst + inst.param.jump_offset; break; |
