diff options
| author | Maxime Coste <frrrwww@gmail.com> | 2014-07-01 23:47:09 +0100 |
|---|---|---|
| committer | Maxime Coste <frrrwww@gmail.com> | 2014-07-05 12:10:06 +0100 |
| commit | 3f70d91f8c716ef2dbc76abb9c878f86ecb946f7 (patch) | |
| tree | 91022e830547b624102d64faebe0c0e427168754 /src | |
| parent | 70db72b175071e097f25eae5eb80e9cb10e39719 (diff) | |
Use unsigned char rather than char in utf8 decoding to avoid sign extension
Diffstat (limited to 'src')
| -rw-r--r-- | src/utf8.hh | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/src/utf8.hh b/src/utf8.hh index 5aac8c2a..60460a7a 100644 --- a/src/utf8.hh +++ b/src/utf8.hh @@ -95,12 +95,12 @@ namespace InvalidBytePolicy struct Assert { - Codepoint operator()(char byte) const { kak_assert(false); return byte; } + Codepoint operator()(unsigned char byte) const { kak_assert(false); return byte; } }; struct Pass { - Codepoint operator()(char byte) const { return byte; } + Codepoint operator()(unsigned char byte) const { return byte; } }; } @@ -114,7 +114,7 @@ Codepoint codepoint(Iterator it) // According to rfc3629, UTF-8 allows only up to 4 bytes. // (21 bits codepoint) Codepoint cp; - char byte = *it++; + unsigned char byte = *it++; if (not (byte & 0x80)) // 0xxxxxxx cp = byte; else if ((byte & 0xE0) == 0xC0) // 110xxxxx @@ -141,7 +141,7 @@ template<typename InvalidPolicy = InvalidBytePolicy::Assert, typename Iterator> ByteCount codepoint_size(Iterator it) { - char byte = *it; + unsigned char byte = *it; if (not (byte & 0x80)) // 0xxxxxxx return 1; else if ((byte & 0xE0) == 0xC0) // 110xxxxx |
