summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMaxime Coste <mawww@kakoune.org>2017-09-27 14:04:05 +0800
committerMaxime Coste <mawww@kakoune.org>2017-11-01 14:05:14 +0800
commite4004a7b7fbbc2fb903394dcb172c701f7733847 (patch)
treea1780b18c2e2d29b401538107e49632a3242ed83 /src
parent4ac0d35d1e00dec9461d3ecfa2057cfda1dab31e (diff)
Regex: Add support for \h and \H "horizontal blank" character classes
Diffstat (limited to 'src')
-rw-r--r--src/regex_impl.cc36
1 files changed, 26 insertions, 10 deletions
diff --git a/src/regex_impl.cc b/src/regex_impl.cc
index d3ca59d9..868b319b 100644
--- a/src/regex_impl.cc
+++ b/src/regex_impl.cc
@@ -222,9 +222,9 @@ private:
{
auto matcher_id = m_parsed_regex.matchers.size();
m_parsed_regex.matchers.push_back(
- [ctype = wctype(character_class.ctype),
+ [ctype = character_class.ctype ? wctype(character_class.ctype) : (wctype_t)0,
chars = character_class.additional_chars] (Codepoint cp) {
- return iswctype(cp, ctype) or contains(chars, cp);
+ return (ctype != 0 and iswctype(cp, ctype)) or contains(chars, cp);
});
return new_node(ParsedRegex::Matcher, matcher_id);
}
@@ -255,6 +255,7 @@ private:
struct CharRange { Codepoint min, max; };
Vector<CharRange> ranges;
+ Vector<Codepoint> excluded;
Vector<std::pair<wctype_t, bool>> ctypes;
while (m_pos != m_regex.end() and *m_pos != ']')
{
@@ -274,9 +275,15 @@ private:
[cp = *m_pos](auto& t) { return t.cp == cp; });
if (it != std::end(character_class_escapes))
{
- ctypes.push_back({wctype(it->ctype), not it->neg});
- for (auto& c : it->additional_chars)
- ranges.push_back({(Codepoint)c, (Codepoint)c});
+ if (it->ctype)
+ ctypes.push_back({wctype(it->ctype), not it->neg});
+ for (auto& c : it->additional_chars) // TODO: handle negative case
+ {
+ if (it->neg)
+ excluded.push_back((Codepoint)c);
+ else
+ ranges.push_back({(Codepoint)c, (Codepoint)c});
+ }
++m_pos;
continue;
}
@@ -306,12 +313,13 @@ private:
++m_pos;
auto matcher = [ranges = std::move(ranges),
- ctypes = std::move(ctypes), negative] (Codepoint cp) {
+ ctypes = std::move(ctypes),
+ excluded = std::move(excluded), negative] (Codepoint cp) {
auto found = contains_that(ranges, [cp](auto& r) {
return r.min <= cp and cp <= r.max;
}) or contains_that(ctypes, [cp](auto& c) {
return (bool)iswctype(cp, c.first) == c.second;
- });
+ }) or (not excluded.empty() and not contains(excluded, cp));
return negative ? not found : found;
};
@@ -390,17 +398,19 @@ private:
bool neg;
};
- static const CharacterClassEscape character_class_escapes[6];
+ static const CharacterClassEscape character_class_escapes[8];
};
// For some reason Gcc fails to link if this is constexpr
-const RegexParser::CharacterClassEscape RegexParser::character_class_escapes[6] = {
+const RegexParser::CharacterClassEscape RegexParser::character_class_escapes[8] = {
{ 'd', "digit", "", false },
{ 'D', "digit", "", true },
{ 'w', "alnum", "_", false },
{ 'W', "alnum", "_", true },
{ 's', "space", "", false },
- { 's', "space", "", true },
+ { 'S', "space", "", true },
+ { 'h', nullptr, " \t", false },
+ { 'H', nullptr, " \t", true },
};
struct CompiledRegex
@@ -983,6 +993,12 @@ auto test_regex = UnitTest{[]{
}
{
+ TestVM vm{R"([ \H]+)"};
+ kak_assert(vm.exec("abc "));
+ kak_assert(not vm.exec("a \t"));
+ }
+
+ {
TestVM vm{R"(\Q{}[]*+?\Ea+)"};
kak_assert(vm.exec("{}[]*+?aa"));
}