blob: 2fcba68c8389e91e4045ae6b8d1747b4c0465ea7 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
|
#ifndef unicode_hh_INCLUDED
#define unicode_hh_INCLUDED
#include <cwctype>
#include <cwchar>
#include <locale>
#include "units.hh"
namespace Kakoune
{
using Codepoint = char32_t;
inline bool is_eol(Codepoint c)
{
return c == '\n';
}
inline bool is_horizontal_blank(Codepoint c)
{
return c == ' ' or c == '\t';
}
inline bool is_blank(Codepoint c)
{
return c == ' ' or c == '\t' or c == '\n';
}
enum WordType { Word, WORD };
template<WordType word_type = Word>
inline bool is_word(Codepoint c)
{
return c == '_' or iswalnum((wchar_t)c);
}
template<>
inline bool is_word<WORD>(Codepoint c)
{
return not is_horizontal_blank(c) and not is_eol(c);
}
inline bool is_punctuation(Codepoint c)
{
return not (is_word(c) or is_horizontal_blank(c) or is_eol(c));
}
inline bool is_basic_alpha(Codepoint c)
{
return (c >= 'a' and c <= 'z') or (c >= 'A' and c <= 'Z');
}
inline ColumnCount codepoint_width(Codepoint c)
{
return c == '\n' ? 1 : wcwidth((wchar_t)c);
}
enum class CharCategories
{
Blank,
EndOfLine,
Word,
Punctuation,
};
template<WordType word_type = Word>
inline CharCategories categorize(Codepoint c)
{
if (is_eol(c))
return CharCategories::EndOfLine;
if (is_horizontal_blank(c))
return CharCategories::Blank;
if (word_type == WORD or is_word(c))
return CharCategories::Word;
return CharCategories::Punctuation;
}
inline Codepoint to_lower(Codepoint cp) { return towlower((wchar_t)cp); }
inline Codepoint to_upper(Codepoint cp) { return towupper((wchar_t)cp); }
inline char to_lower(char c) { return c >= 'A' and c <= 'Z' ? c - 'A' + 'a' : c; }
inline char to_upper(char c) { return c >= 'a' and c <= 'z' ? c - 'a' + 'A' : c; }
}
#endif // unicode_hh_INCLUDED
|