diff options
| author | Maxime Coste <frrrwww@gmail.com> | 2012-10-13 18:31:29 +0200 |
|---|---|---|
| committer | Maxime Coste <frrrwww@gmail.com> | 2012-10-13 19:05:14 +0200 |
| commit | dfafcdb6e6b52ebdd5b664b7561e682c32762663 (patch) | |
| tree | fd94329d686633260b7ec9dd4162287eb6225898 /src/utf8.hh | |
| parent | 4f1ab5b74935d3250c41bbee5f347ec4f815063c (diff) | |
utf8::codepoint: configurable invalid byte policy
Diffstat (limited to 'src/utf8.hh')
| -rw-r--r-- | src/utf8.hh | 20 |
1 files changed, 18 insertions, 2 deletions
diff --git a/src/utf8.hh b/src/utf8.hh index 0f136c68..53737414 100644 --- a/src/utf8.hh +++ b/src/utf8.hh @@ -81,9 +81,25 @@ bool is_character_start(Iterator it) struct invalid_utf8_sequence{}; +namespace InvalidBytePolicy +{ + +struct Throw +{ + Codepoint operator()(char byte) const { throw invalid_utf8_sequence{}; } +}; + +struct Pass +{ + Codepoint operator()(char byte) const { return byte; } +}; + +} + // returns the codepoint of the character whose first byte // is pointed by it -template<typename Iterator> +template<typename InvalidPolicy = InvalidBytePolicy::Throw, + typename Iterator> Codepoint codepoint(Iterator it) { // According to rfc3629, UTF-8 allows only up to 4 bytes. @@ -108,7 +124,7 @@ Codepoint codepoint(Iterator it) cp |= (*it & 0x3F); } else - throw invalid_utf8_sequence{}; + cp = InvalidPolicy{}(byte); return cp; } |
