diff options
| author | Maxime Coste <frrrwww@gmail.com> | 2013-05-30 18:49:50 +0200 |
|---|---|---|
| committer | Maxime Coste <frrrwww@gmail.com> | 2013-05-30 18:49:50 +0200 |
| commit | 2d96f853f8ba7de76cc6f5c5bc019369eddc9fa9 (patch) | |
| tree | e227c9c68a62071fc613858741a087e13bb09efc /src | |
| parent | 28e127a48ad1c1fff4e34a68be887a56a888d309 (diff) | |
Add utf8::codepoint_size function
Diffstat (limited to 'src')
| -rw-r--r-- | src/utf8.hh | 20 |
1 files changed, 20 insertions, 0 deletions
diff --git a/src/utf8.hh b/src/utf8.hh index 9b74373f..865d8705 100644 --- a/src/utf8.hh +++ b/src/utf8.hh @@ -138,6 +138,26 @@ Codepoint codepoint(Iterator it) return cp; } +template<typename InvalidPolicy = InvalidBytePolicy::Assert, + typename Iterator> +ByteCount codepoint_size(Iterator it) +{ + char byte = *it; + if (not (byte & 0x80)) // 0xxxxxxx + return 1; + else if ((byte & 0xE0) == 0xC0) // 110xxxxx + return 2; + else if ((byte & 0xF0) == 0xE0) // 1110xxxx + return 3; + else if ((byte & 0xF8) == 0xF0) // 11110xxx + return 4; + else + { + InvalidPolicy{}(byte); + return -1; + } +} + struct invalid_codepoint{}; template<typename OutputIterator> |
