diff options
| author | Maxime Coste <mawww@kakoune.org> | 2018-10-31 21:13:14 +1100 |
|---|---|---|
| committer | Maxime Coste <mawww@kakoune.org> | 2018-11-01 08:22:43 +1100 |
| commit | 4cd7583bbcd85671eea51c9d7b0d1c2fbcc65b6d (patch) | |
| tree | 18fa43e9ec8f16578452b8cfe44399948c4060b0 | |
| parent | 4cfb46ff2e6c63c28c1881b366e33d817c45b637 (diff) | |
Improve regex vm to next start performance by avoiding iterator copies
| -rw-r--r-- | src/regex_impl.hh | 16 | ||||
| -rw-r--r-- | src/utf8_iterator.hh | 5 |
2 files changed, 17 insertions, 4 deletions
diff --git a/src/regex_impl.hh b/src/regex_impl.hh index b0d7feb5..4ccbb617 100644 --- a/src/regex_impl.hh +++ b/src/regex_impl.hh @@ -525,10 +525,15 @@ private: void to_next_start(EffectiveIt& start, const EffectiveIt& end, const StartDesc& start_desc) { - Codepoint cp; - while (start != end and (cp = *start) >= 0 and - not start_desc.map[cp < StartDesc::count ? cp : StartDesc::other]) - ++start; + while (start != end) + { + const Codepoint cp = read(start); + if (start_desc.map[(cp >= 0 and cp < StartDesc::count) ? cp : StartDesc::other]) + { + --start; + return; + } + } } template<MatchDirection look_direction, bool ignore_case> @@ -596,6 +601,9 @@ private: return is_word(*(pos-1)) != is_word(*pos); } + static Codepoint read(Utf8It& it) { return it.read(); } + static Codepoint read(std::reverse_iterator<Utf8It>& it) { Codepoint cp = *it; ++it; return cp; } + static const Iterator& get_base(const Utf8It& it) { return it.base(); } static Iterator get_base(const std::reverse_iterator<Utf8It>& it) { return it.base().base(); } diff --git a/src/utf8_iterator.hh b/src/utf8_iterator.hh index c145f900..9d10df2a 100644 --- a/src/utf8_iterator.hh +++ b/src/utf8_iterator.hh @@ -130,6 +130,11 @@ public: return get_value(); } + CodepointType read() noexcept(noexcept_policy) + { + return (CodepointType)utf8::read_codepoint<InvalidPolicy>(m_it, m_end); + } + const BaseIt& base() const noexcept(noexcept_policy) { return m_it; } private: |
