diff options
| author | Maxime Coste <frrrwww@gmail.com> | 2012-10-08 14:25:58 +0200 |
|---|---|---|
| committer | Maxime Coste <frrrwww@gmail.com> | 2012-10-08 14:25:58 +0200 |
| commit | f2e98f700e405a0cb17be9a6b73daed15f50a165 (patch) | |
| tree | 10e59e1d471ca5936c83e26fde2db3c2e15f6f90 /src | |
| parent | 2db1d023294d985ba5624ad52b64946b9bbf7a2a (diff) | |
add utf8_iterator for adapting byte iterators to iterate on unicode characters
Diffstat (limited to 'src')
| -rw-r--r-- | src/utf8_iterator.hh | 140 |
1 files changed, 140 insertions, 0 deletions
diff --git a/src/utf8_iterator.hh b/src/utf8_iterator.hh new file mode 100644 index 00000000..e782e297 --- /dev/null +++ b/src/utf8_iterator.hh @@ -0,0 +1,140 @@ +#ifndef utf8_iterator_hh_INCLUDED +#define utf8_iterator_hh_INCLUDED + +#include "utf8.hh" + +namespace Kakoune +{ + +namespace utf8 +{ + +// adapter for an iterator on bytes which permits to iterate +// on unicode codepoints instead. +template<typename Iterator> +class utf8_iterator +{ +public: + utf8_iterator() = default; + utf8_iterator(Iterator it) : m_it(std::move(it)) {} + + utf8_iterator& operator++() + { + m_it = utf8::next(m_it); + invalidate_value(); + return *this; + } + + utf8_iterator operator++(int) + { + utf8_iterator save = *this; + ++*this; + return save; + } + + utf8_iterator& operator--() + { + m_it = utf8::previous(m_it); + invalidate_value(); + return *this; + } + + utf8_iterator operator--(int) + { + utf8_iterator save = *this; + --*this; + return save; + } + + utf8_iterator operator+(int count) const + { + if (count < 0) + return operator-(-count); + + utf8_iterator res = *this; + while (count--) + ++res; + return res; + } + + utf8_iterator operator-(int count) const + { + if (count < 0) + return operator+(-count); + + utf8_iterator res = *this; + while (count--) + --res; + return res; + } + + bool operator==(const utf8_iterator& other) { return m_it == other.m_it; } + bool operator!=(const utf8_iterator& other) { return m_it != other.m_it; } + + bool operator< (const utf8_iterator& other) const + { + return m_it < other.m_it; + } + + bool operator<= (const utf8_iterator& other) const + { + return m_it <= other.m_it; + } + + bool operator> (const utf8_iterator& other) const + { + return m_it > other.m_it; + } + + bool operator>= (const utf8_iterator& other) const + { + return m_it >= other.m_it; + } + + size_t operator-(utf8_iterator other) const + { + //assert(other < *this); + check_invariant(); + other.check_invariant(); + size_t dist = 0; + while (other.m_it != m_it) + { + ++dist; + ++other; + } + return dist; + } + + Codepoint operator*() const + { + return get_value(); + } + + const Iterator& underlying_iterator() const { return m_it; } + Iterator& underlying_iterator() { return m_it; } + +protected: + void check_invariant() const + { + // always point to a character first byte; + // assert(is_character_start(it)); + } + +private: + void invalidate_value() { m_value = -1; } + Codepoint get_value() const + { + if (m_value == -1) + m_value = utf8::codepoint(m_it); + return m_value; + } + + Iterator m_it; + mutable Codepoint m_value = -1; +}; + +} + +} + +#endif // utf8_iterator_hh_INCLUDED |
