Use unsigned char rather than char in utf8 decoding to avoid sign extension

author: Maxime Coste <frrrwww@gmail.com> 2014-07-01 23:47:09 +0100
committer: Maxime Coste <frrrwww@gmail.com> 2014-07-05 12:10:06 +0100
commit: 3f70d91f8c716ef2dbc76abb9c878f86ecb946f7 (patch)
tree: 91022e830547b624102d64faebe0c0e427168754 /src
parent: 70db72b175071e097f25eae5eb80e9cb10e39719 (diff)
1 files changed, 4 insertions, 4 deletions
diff --git a/src/utf8.hh b/src/utf8.hh
index 5aac8c2a..60460a7a 100644
--- a/src/utf8.hh
+++ b/src/utf8.hh
@@ -95,12 +95,12 @@ namespace InvalidBytePolicy
 
 struct Assert
 {
-    Codepoint operator()(char byte) const { kak_assert(false); return byte; }
+    Codepoint operator()(unsigned char byte) const { kak_assert(false); return byte; }
 };
 
 struct Pass
 {
-    Codepoint operator()(char byte) const { return byte; }
+    Codepoint operator()(unsigned char byte) const { return byte; }
 };
 
 }
@@ -114,7 +114,7 @@ Codepoint codepoint(Iterator it)
     // According to rfc3629, UTF-8 allows only up to 4 bytes.
     // (21 bits codepoint)
     Codepoint cp;
-    char byte = *it++;
+    unsigned char byte = *it++;
     if (not (byte & 0x80)) // 0xxxxxxx
         cp = byte;
     else if ((byte & 0xE0) == 0xC0) // 110xxxxx
@@ -141,7 +141,7 @@ template<typename InvalidPolicy = InvalidBytePolicy::Assert,
          typename Iterator>
 ByteCount codepoint_size(Iterator it)
 {
-    char byte = *it;
+    unsigned char byte = *it;
     if (not (byte & 0x80)) // 0xxxxxxx
         return 1;
     else if ((byte & 0xE0) == 0xC0) // 110xxxxx
author	Maxime Coste <frrrwww@gmail.com>	2014-07-01 23:47:09 +0100
committer	Maxime Coste <frrrwww@gmail.com>	2014-07-05 12:10:06 +0100
commit	3f70d91f8c716ef2dbc76abb9c878f86ecb946f7 (patch)
tree	91022e830547b624102d64faebe0c0e427168754 /src
parent	70db72b175071e097f25eae5eb80e9cb10e39719 (diff)