summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMaxime Coste <frrrwww@gmail.com>2013-05-30 18:49:50 +0200
committerMaxime Coste <frrrwww@gmail.com>2013-05-30 18:49:50 +0200
commit2d96f853f8ba7de76cc6f5c5bc019369eddc9fa9 (patch)
treee227c9c68a62071fc613858741a087e13bb09efc /src
parent28e127a48ad1c1fff4e34a68be887a56a888d309 (diff)
Add utf8::codepoint_size function
Diffstat (limited to 'src')
-rw-r--r--src/utf8.hh20
1 files changed, 20 insertions, 0 deletions
diff --git a/src/utf8.hh b/src/utf8.hh
index 9b74373f..865d8705 100644
--- a/src/utf8.hh
+++ b/src/utf8.hh
@@ -138,6 +138,26 @@ Codepoint codepoint(Iterator it)
return cp;
}
+template<typename InvalidPolicy = InvalidBytePolicy::Assert,
+ typename Iterator>
+ByteCount codepoint_size(Iterator it)
+{
+ char byte = *it;
+ if (not (byte & 0x80)) // 0xxxxxxx
+ return 1;
+ else if ((byte & 0xE0) == 0xC0) // 110xxxxx
+ return 2;
+ else if ((byte & 0xF0) == 0xE0) // 1110xxxx
+ return 3;
+ else if ((byte & 0xF8) == 0xF0) // 11110xxx
+ return 4;
+ else
+ {
+ InvalidPolicy{}(byte);
+ return -1;
+ }
+}
+
struct invalid_codepoint{};
template<typename OutputIterator>