summaryrefslogtreecommitdiff
path: root/src/unicode.hh
blob: 9485a27da932c2f5271ee143ae29a9f60aa2bc5b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#ifndef unicode_hh_INCLUDED
#define unicode_hh_INCLUDED

#include <cstdint>
#include <ctype.h>
#include <wctype.h>

namespace Kakoune
{

using Codepoint = uint32_t;

inline bool is_eol(Codepoint c)
{
    return c == '\n';
}

inline bool is_blank(Codepoint c)
{
    return c == ' ' or c == '\t';
}

inline bool is_horizontal_blank(Codepoint c)
{
    return c == ' ' or c == '\t';
}

enum WordType { Word, WORD };

template<WordType word_type = Word>
inline bool is_word(Codepoint c)
{
    return c == '_' or iswalnum(c);
}

template<>
inline bool is_word<WORD>(Codepoint c)
{
    return not is_blank(c) and not is_eol(c);
}

inline bool is_punctuation(Codepoint c)
{
    return not (is_word(c) or is_blank(c) or is_eol(c));
}

enum class CharCategories
{
    Blank,
    EndOfLine,
    Word,
    Punctuation,
};

template<WordType word_type = Word>
inline CharCategories categorize(Codepoint c)
{
    if (is_word(c))
        return CharCategories::Word;
    if (is_eol(c))
        return CharCategories::EndOfLine;
    if (is_blank(c))
        return CharCategories::Blank;
    return word_type == WORD ? CharCategories::Word
                             : CharCategories::Punctuation;
}

}

#endif // unicode_hh_INCLUDED