summaryrefslogtreecommitdiff
path: root/src/regex_impl.cc
diff options
context:
space:
mode:
authorMaxime Coste <mawww@kakoune.org>2020-07-30 19:51:25 +1000
committerMaxime Coste <mawww@kakoune.org>2021-11-21 09:44:22 +1100
commitba379cba52974285414ffac16faa752f16ea9a28 (patch)
tree1ebb3992ac1163c2ba9ead32117b8f887d9638a6 /src/regex_impl.cc
parent8566ae14a01a813a4b44846785675ae5d61b8eac (diff)
Micro-optimize regex character class/type matching
Also force-inline step_thread as function call overhead has a mesurable impact.
Diffstat (limited to 'src/regex_impl.cc')
-rw-r--r--src/regex_impl.cc27
1 files changed, 12 insertions, 15 deletions
diff --git a/src/regex_impl.cc b/src/regex_impl.cc
index ff65cdbe..5507273c 100644
--- a/src/regex_impl.cc
+++ b/src/regex_impl.cc
@@ -1173,27 +1173,24 @@ bool is_character_class(const CharacterClass& character_class, Codepoint cp)
if (character_class.ignore_case)
cp = to_lower(cp);
- auto it = std::lower_bound(character_class.ranges.begin(),
- character_class.ranges.end(), cp,
- [](auto& range, Codepoint cp)
- { return range.max < cp; });
-
- auto found = (it != character_class.ranges.end() and it->min <= cp) or
- is_ctype(character_class.ctypes, cp);
+ auto it = std::find_if(character_class.ranges.begin(),
+ character_class.ranges.end(),
+ [cp](auto& range) { return range.min <= cp and cp <= range.max; });
+ bool found = it != character_class.ranges.end() or (character_class.ctypes != CharacterType::None and
+ is_ctype(character_class.ctypes, cp));
return found != character_class.negative;
}
bool is_ctype(CharacterType ctype, Codepoint cp)
{
- return ((ctype & CharacterType::Whitespace) and is_blank(cp)) or
- ((ctype & CharacterType::HorizontalWhitespace) and is_horizontal_blank(cp)) or
- ((ctype & CharacterType::Digit) and iswdigit(cp)) or
- ((ctype & CharacterType::Word) and is_word(cp)) or
- ((ctype & CharacterType::NotWhitespace) and not is_blank(cp)) or
- ((ctype & CharacterType::NotHorizontalWhitespace) and not is_horizontal_blank(cp)) or
- ((ctype & CharacterType::NotDigit) and not iswdigit(cp)) or
- ((ctype & CharacterType::NotWord) and not is_word(cp));
+ auto check = [&](CharacterType bit, CharacterType not_bit, auto&& func) {
+ return (ctype & (bit | not_bit)) and func(cp) == (bool)(ctype & bit);
+ };
+ return check(CharacterType::Word, CharacterType::NotWord, [](Codepoint cp) { return is_word(cp); }) or
+ check(CharacterType::Whitespace, CharacterType::NotWhitespace, is_blank) or
+ check(CharacterType::HorizontalWhitespace, CharacterType::NotHorizontalWhitespace, is_horizontal_blank) or
+ check(CharacterType::Digit, CharacterType::NotDigit, iswdigit);
}
namespace