diff options
| author | Maxime Coste <mawww@kakoune.org> | 2024-12-05 18:40:55 +1100 |
|---|---|---|
| committer | Maxime Coste <mawww@kakoune.org> | 2024-12-05 18:40:55 +1100 |
| commit | 98ed5847de20f04d0595d5ac08da06ff8aa5c1b2 (patch) | |
| tree | 3fdfeedba8d5f7463b203ec0c2b6fa90e100dfb6 /src | |
| parent | dac922e24c48d297de063786be3065554409cc71 (diff) | |
Split utf8::read_codepoint between single byte and multibyte code
Make read_codepoint_multibyte noinline so that the common case single
byte case gets inlined.
Diffstat (limited to 'src')
| -rw-r--r-- | src/utf8.hh | 28 |
1 files changed, 17 insertions, 11 deletions
diff --git a/src/utf8.hh b/src/utf8.hh index 0ba8bdef..e5643214 100644 --- a/src/utf8.hh +++ b/src/utf8.hh @@ -39,22 +39,13 @@ struct Pass } -// returns the codepoint of the character whose first byte -// is pointed by it template<typename InvalidPolicy = utf8::InvalidPolicy::Pass, typename Iterator, typename Sentinel> -Codepoint read_codepoint(Iterator& it, const Sentinel& end) +[[gnu::noinline]] +Codepoint read_codepoint_multibyte(Iterator& it, const Sentinel& end, char byte) noexcept(noexcept(InvalidPolicy{}(0))) { if (it == end) - return InvalidPolicy{}(-1); - // According to rfc3629, UTF-8 allows only up to 4 bytes. - // (21 bits codepoint) - unsigned char byte = read(it); - if ((byte & 0x80) == 0) // 0xxxxxxx - return byte; - - if (it == end) return InvalidPolicy{}(byte); if ((byte & 0xE0) == 0xC0) // 110xxxxx @@ -81,6 +72,21 @@ Codepoint read_codepoint(Iterator& it, const Sentinel& end) return InvalidPolicy{}(byte); } +// returns the codepoint of the character whose first byte +// is pointed by it +template<typename InvalidPolicy = utf8::InvalidPolicy::Pass, + typename Iterator, typename Sentinel> +Codepoint read_codepoint(Iterator& it, const Sentinel& end) + noexcept(noexcept(InvalidPolicy{}(0))) +{ + if (it == end) + return InvalidPolicy{}(-1); + unsigned char byte = read(it); + if ((byte & 0x80) == 0) [[likely]] // 0xxxxxxx + return byte; + return read_codepoint_multibyte(it, end, byte); +} + template<typename InvalidPolicy = utf8::InvalidPolicy::Pass, typename Iterator, typename Sentinel> Codepoint codepoint(Iterator it, const Sentinel& end) |
