summaryrefslogtreecommitdiff
path: root/src/utf8.hh
diff options
context:
space:
mode:
authorMaxime Coste <frrrwww@gmail.com>2012-10-13 18:31:29 +0200
committerMaxime Coste <frrrwww@gmail.com>2012-10-13 19:05:14 +0200
commitdfafcdb6e6b52ebdd5b664b7561e682c32762663 (patch)
treefd94329d686633260b7ec9dd4162287eb6225898 /src/utf8.hh
parent4f1ab5b74935d3250c41bbee5f347ec4f815063c (diff)
utf8::codepoint: configurable invalid byte policy
Diffstat (limited to 'src/utf8.hh')
-rw-r--r--src/utf8.hh20
1 files changed, 18 insertions, 2 deletions
diff --git a/src/utf8.hh b/src/utf8.hh
index 0f136c68..53737414 100644
--- a/src/utf8.hh
+++ b/src/utf8.hh
@@ -81,9 +81,25 @@ bool is_character_start(Iterator it)
struct invalid_utf8_sequence{};
+namespace InvalidBytePolicy
+{
+
+struct Throw
+{
+ Codepoint operator()(char byte) const { throw invalid_utf8_sequence{}; }
+};
+
+struct Pass
+{
+ Codepoint operator()(char byte) const { return byte; }
+};
+
+}
+
// returns the codepoint of the character whose first byte
// is pointed by it
-template<typename Iterator>
+template<typename InvalidPolicy = InvalidBytePolicy::Throw,
+ typename Iterator>
Codepoint codepoint(Iterator it)
{
// According to rfc3629, UTF-8 allows only up to 4 bytes.
@@ -108,7 +124,7 @@ Codepoint codepoint(Iterator it)
cp |= (*it & 0x3F);
}
else
- throw invalid_utf8_sequence{};
+ cp = InvalidPolicy{}(byte);
return cp;
}