summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMaxime Coste <mawww@kakoune.org>2018-10-31 21:13:14 +1100
committerMaxime Coste <mawww@kakoune.org>2018-11-01 08:22:43 +1100
commit4cd7583bbcd85671eea51c9d7b0d1c2fbcc65b6d (patch)
tree18fa43e9ec8f16578452b8cfe44399948c4060b0
parent4cfb46ff2e6c63c28c1881b366e33d817c45b637 (diff)
Improve regex vm to next start performance by avoiding iterator copies
-rw-r--r--src/regex_impl.hh16
-rw-r--r--src/utf8_iterator.hh5
2 files changed, 17 insertions, 4 deletions
diff --git a/src/regex_impl.hh b/src/regex_impl.hh
index b0d7feb5..4ccbb617 100644
--- a/src/regex_impl.hh
+++ b/src/regex_impl.hh
@@ -525,10 +525,15 @@ private:
void to_next_start(EffectiveIt& start, const EffectiveIt& end, const StartDesc& start_desc)
{
- Codepoint cp;
- while (start != end and (cp = *start) >= 0 and
- not start_desc.map[cp < StartDesc::count ? cp : StartDesc::other])
- ++start;
+ while (start != end)
+ {
+ const Codepoint cp = read(start);
+ if (start_desc.map[(cp >= 0 and cp < StartDesc::count) ? cp : StartDesc::other])
+ {
+ --start;
+ return;
+ }
+ }
}
template<MatchDirection look_direction, bool ignore_case>
@@ -596,6 +601,9 @@ private:
return is_word(*(pos-1)) != is_word(*pos);
}
+ static Codepoint read(Utf8It& it) { return it.read(); }
+ static Codepoint read(std::reverse_iterator<Utf8It>& it) { Codepoint cp = *it; ++it; return cp; }
+
static const Iterator& get_base(const Utf8It& it) { return it.base(); }
static Iterator get_base(const std::reverse_iterator<Utf8It>& it) { return it.base().base(); }
diff --git a/src/utf8_iterator.hh b/src/utf8_iterator.hh
index c145f900..9d10df2a 100644
--- a/src/utf8_iterator.hh
+++ b/src/utf8_iterator.hh
@@ -130,6 +130,11 @@ public:
return get_value();
}
+ CodepointType read() noexcept(noexcept_policy)
+ {
+ return (CodepointType)utf8::read_codepoint<InvalidPolicy>(m_it, m_end);
+ }
+
const BaseIt& base() const noexcept(noexcept_policy) { return m_it; }
private: