summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMaxime Coste <frrrwww@gmail.com>2014-07-01 23:47:09 +0100
committerMaxime Coste <frrrwww@gmail.com>2014-07-05 12:10:06 +0100
commit3f70d91f8c716ef2dbc76abb9c878f86ecb946f7 (patch)
tree91022e830547b624102d64faebe0c0e427168754 /src
parent70db72b175071e097f25eae5eb80e9cb10e39719 (diff)
Use unsigned char rather than char in utf8 decoding to avoid sign extension
Diffstat (limited to 'src')
-rw-r--r--src/utf8.hh8
1 files changed, 4 insertions, 4 deletions
diff --git a/src/utf8.hh b/src/utf8.hh
index 5aac8c2a..60460a7a 100644
--- a/src/utf8.hh
+++ b/src/utf8.hh
@@ -95,12 +95,12 @@ namespace InvalidBytePolicy
struct Assert
{
- Codepoint operator()(char byte) const { kak_assert(false); return byte; }
+ Codepoint operator()(unsigned char byte) const { kak_assert(false); return byte; }
};
struct Pass
{
- Codepoint operator()(char byte) const { return byte; }
+ Codepoint operator()(unsigned char byte) const { return byte; }
};
}
@@ -114,7 +114,7 @@ Codepoint codepoint(Iterator it)
// According to rfc3629, UTF-8 allows only up to 4 bytes.
// (21 bits codepoint)
Codepoint cp;
- char byte = *it++;
+ unsigned char byte = *it++;
if (not (byte & 0x80)) // 0xxxxxxx
cp = byte;
else if ((byte & 0xE0) == 0xC0) // 110xxxxx
@@ -141,7 +141,7 @@ template<typename InvalidPolicy = InvalidBytePolicy::Assert,
typename Iterator>
ByteCount codepoint_size(Iterator it)
{
- char byte = *it;
+ unsigned char byte = *it;
if (not (byte & 0x80)) // 0xxxxxxx
return 1;
else if ((byte & 0xE0) == 0xC0) // 110xxxxx