diff options
| author | Maxime Coste <mawww@kakoune.org> | 2020-07-30 19:51:25 +1000 |
|---|---|---|
| committer | Maxime Coste <mawww@kakoune.org> | 2021-11-21 09:44:22 +1100 |
| commit | ba379cba52974285414ffac16faa752f16ea9a28 (patch) | |
| tree | 1ebb3992ac1163c2ba9ead32117b8f887d9638a6 /src/regex_impl.cc | |
| parent | 8566ae14a01a813a4b44846785675ae5d61b8eac (diff) | |
Micro-optimize regex character class/type matching
Also force-inline step_thread as function call overhead has a
mesurable impact.
Diffstat (limited to 'src/regex_impl.cc')
| -rw-r--r-- | src/regex_impl.cc | 27 |
1 files changed, 12 insertions, 15 deletions
diff --git a/src/regex_impl.cc b/src/regex_impl.cc index ff65cdbe..5507273c 100644 --- a/src/regex_impl.cc +++ b/src/regex_impl.cc @@ -1173,27 +1173,24 @@ bool is_character_class(const CharacterClass& character_class, Codepoint cp) if (character_class.ignore_case) cp = to_lower(cp); - auto it = std::lower_bound(character_class.ranges.begin(), - character_class.ranges.end(), cp, - [](auto& range, Codepoint cp) - { return range.max < cp; }); - - auto found = (it != character_class.ranges.end() and it->min <= cp) or - is_ctype(character_class.ctypes, cp); + auto it = std::find_if(character_class.ranges.begin(), + character_class.ranges.end(), + [cp](auto& range) { return range.min <= cp and cp <= range.max; }); + bool found = it != character_class.ranges.end() or (character_class.ctypes != CharacterType::None and + is_ctype(character_class.ctypes, cp)); return found != character_class.negative; } bool is_ctype(CharacterType ctype, Codepoint cp) { - return ((ctype & CharacterType::Whitespace) and is_blank(cp)) or - ((ctype & CharacterType::HorizontalWhitespace) and is_horizontal_blank(cp)) or - ((ctype & CharacterType::Digit) and iswdigit(cp)) or - ((ctype & CharacterType::Word) and is_word(cp)) or - ((ctype & CharacterType::NotWhitespace) and not is_blank(cp)) or - ((ctype & CharacterType::NotHorizontalWhitespace) and not is_horizontal_blank(cp)) or - ((ctype & CharacterType::NotDigit) and not iswdigit(cp)) or - ((ctype & CharacterType::NotWord) and not is_word(cp)); + auto check = [&](CharacterType bit, CharacterType not_bit, auto&& func) { + return (ctype & (bit | not_bit)) and func(cp) == (bool)(ctype & bit); + }; + return check(CharacterType::Word, CharacterType::NotWord, [](Codepoint cp) { return is_word(cp); }) or + check(CharacterType::Whitespace, CharacterType::NotWhitespace, is_blank) or + check(CharacterType::HorizontalWhitespace, CharacterType::NotHorizontalWhitespace, is_horizontal_blank) or + check(CharacterType::Digit, CharacterType::NotDigit, iswdigit); } namespace |
