Avoid iswlower, iswupper, towlower and towupper for ascii codepoints

Avoid the costly shared object function call when most codepoints will be ascii. The regex benchmark gets a nice speedup: Regex Before After --------------------------------------+----------+--------- 'Twain' | 25 ms | 15 ms '(?i)Twain' | 74 ms | 57 ms '[a-z]shing' | 323 ms | 303 ms 'Huck[a-zA-Z]+|Saw[a-zA-Z]+' | 26 ms | 17 ms '\b\w+nn\b' | 424 ms | 393 ms '[a-q][^u-z]{13}x' | 869 ms | 815 ms 'Tom|Sawyer|Huckleberry|Finn' | 33 ms | 24 ms '(?i)Tom|Sawyer|Huckleberry|Finn' | 319 ms | 281 ms '.{0,2}(Tom|Sawyer|Huckleberry|Finn)' | 1294 ms | 1293 ms '.{2,4}(Tom|Sawyer|Huckleberry|Finn)' | 1470 ms | 1429 ms 'Tom.{10,25}river|river.{10,25}Tom' | 69 ms | 61 ms '[a-zA-Z]+ing' | 447 ms | 408 ms '\s[a-zA-Z]{0,12}ing\s' | 539 ms | 543 ms '([A-Za-z]awyer|[A-Za-z]inn)\s' | 588 ms | 552 ms '["'][^"']{0,30}[?!\.]["']' | 92 ms | 81 ms
author: Maxime Coste <mawww@kakoune.org> 2024-02-06 22:13:57 +1100
committer: Maxime Coste <mawww@kakoune.org> 2024-02-06 22:16:08 +1100
commit: 3ef68188b470d4f23707c32fce4a19b4fa38965e (patch)
tree: 915d3599e202feae147232b25712c40420ad63e2 /src
parent: 04a96b059faac8100a291e56bfbdb1962d53d4e1 (diff)
1 files changed, 6 insertions, 6 deletions
diff --git a/src/unicode.hh b/src/unicode.hh
index fd35ecf0..0acf4005 100644
--- a/src/unicode.hh
+++ b/src/unicode.hh
@@ -124,18 +124,18 @@ inline CharCategories categorize(Codepoint c, ConstArrayView<Codepoint> extra_wo
     return CharCategories::Punctuation;
 }
 
-inline Codepoint to_lower(Codepoint cp) noexcept { return towlower((wchar_t)cp); }
-inline Codepoint to_upper(Codepoint cp) noexcept { return towupper((wchar_t)cp); }
-
-inline bool is_lower(Codepoint cp) noexcept { return iswlower((wchar_t)cp); }
-inline bool is_upper(Codepoint cp) noexcept { return iswupper((wchar_t)cp); }
-
 inline char to_lower(char c) noexcept { return c >= 'A' and c <= 'Z' ? c - 'A' + 'a' : c; }
 inline char to_upper(char c) noexcept { return c >= 'a' and c <= 'z' ? c - 'a' + 'A' : c; }
 
 inline bool is_lower(char c) noexcept { return c >= 'a' and c <= 'z'; }
 inline bool is_upper(char c) noexcept { return c >= 'A' and c <= 'Z'; }
 
+inline Codepoint to_lower(Codepoint cp) noexcept { return cp < 128 ? (Codepoint)to_lower((char)cp) : towlower((wchar_t)cp); }
+inline Codepoint to_upper(Codepoint cp) noexcept { return cp < 128 ? (Codepoint)to_upper((char)cp) : towupper((wchar_t)cp); }
+
+inline bool is_lower(Codepoint cp) noexcept { return cp < 128 ? is_lower((char)cp) : iswlower((wchar_t)cp); }
+inline bool is_upper(Codepoint cp) noexcept { return cp < 128 ? is_upper((char)cp) : iswupper((wchar_t)cp); }
+
 }
 
 #endif // unicode_hh_INCLUDED
author	Maxime Coste <mawww@kakoune.org>	2024-02-06 22:13:57 +1100
committer	Maxime Coste <mawww@kakoune.org>	2024-02-06 22:16:08 +1100
commit	3ef68188b470d4f23707c32fce4a19b4fa38965e (patch)
tree	915d3599e202feae147232b25712c40420ad63e2 /src
parent	04a96b059faac8100a291e56bfbdb1962d53d4e1 (diff)