diff options
| author | Maxime Coste <mawww@kakoune.org> | 2025-07-07 12:07:29 +1000 |
|---|---|---|
| committer | Maxime Coste <mawww@kakoune.org> | 2025-07-07 12:14:32 +1000 |
| commit | cb6cbb4e17b1080cc18a0195773ab763e7e11e64 (patch) | |
| tree | 2276f5354e3ef7442aa990bb6230df640323f345 | |
| parent | 0f47c28fe2ba7d8d0b6d8d1a904b4b261f8552dc (diff) | |
Avoid branches in ThreadedRegexVM::DualThreadStack iteration
decrement and post_increment do not get cmov optimised as expected,
we can avoid this altogether by taking advantage of the fact that
capacity is always a power-of-two and we can hence use a bitwise and
we can use a bitwise and to loop around capacity.
| -rw-r--r-- | src/regex_impl.hh | 38 |
1 files changed, 17 insertions, 21 deletions
diff --git a/src/regex_impl.hh b/src/regex_impl.hh index b04d99e7..5faa7c94 100644 --- a/src/regex_impl.hh +++ b/src/regex_impl.hh @@ -361,12 +361,8 @@ private: void step_current_thread(const Iterator& pos, Codepoint cp, uint16_t current_step, const ExecConfig& config) { Thread thread = m_threads.pop_current(); - auto failed = [this, &thread]() { - release_saves(thread.saves); - }; - auto consumed = [this, &thread]() { - m_threads.push_next(thread); - }; + auto failed = [&] { release_saves(thread.saves); }; + auto consumed = [&] { m_threads.push_next(thread); }; while (true) { @@ -691,13 +687,13 @@ private: void ensure_initial_capacity() { - if (m_capacity != 0) + if (m_capacity_mask != 0) return; - constexpr int32_t initial_capacity = 64 / sizeof(Thread); - static_assert(initial_capacity >= 4); + constexpr uint32_t initial_capacity = 64 / sizeof(Thread); + static_assert(initial_capacity >= 4 and std::has_single_bit(initial_capacity)); m_data.reset(new Thread[initial_capacity]); - m_capacity = initial_capacity; + m_capacity_mask = initial_capacity-1; } [[gnu::always_inline]] @@ -710,38 +706,38 @@ private: [[gnu::noinline]] void grow(bool pushed_current) { - const auto new_capacity = m_capacity * 2; + auto capacity = m_capacity_mask + 1; + const auto new_capacity = capacity * 2; Thread* new_data = new Thread[new_capacity]; Thread* old_data = m_data.get(); - std::rotate_copy(old_data, old_data + m_current, old_data + m_capacity, new_data); + std::rotate_copy(old_data, old_data + m_current, old_data + capacity, new_data); m_next_begin -= m_current; if ((pushed_current and m_next_begin == 0) or m_next_begin < 0) - m_next_begin += m_capacity; - m_next_end = m_capacity; + m_next_begin += capacity; + m_next_end = capacity; m_current = 0; m_data.reset(new_data); - m_capacity = new_capacity; + kak_assert(std::has_single_bit(new_capacity)); + m_capacity_mask = new_capacity-1; } private: int32_t decrement(int32_t& index) { - if (index == 0) - index = m_capacity; - return --index; + index = (index - 1) & m_capacity_mask; + return index; } int32_t post_increment(int32_t& index) { auto res = index; - if (++index == m_capacity) - index = 0; + index = (index + 1) & m_capacity_mask; return res; } std::unique_ptr<Thread[]> m_data; - int32_t m_capacity = 0; // Maximum capacity should be 2*instruction count, so 65536 + uint32_t m_capacity_mask = 0; // Maximum capacity should be 2*instruction count, so 65536 int32_t m_current = 0; int32_t m_next_begin = 0; int32_t m_next_end = 0; |
