diff options
| author | Maxime Coste <mawww@kakoune.org> | 2017-09-27 14:04:05 +0800 |
|---|---|---|
| committer | Maxime Coste <mawww@kakoune.org> | 2017-11-01 14:05:14 +0800 |
| commit | e4004a7b7fbbc2fb903394dcb172c701f7733847 (patch) | |
| tree | a1780b18c2e2d29b401538107e49632a3242ed83 /src | |
| parent | 4ac0d35d1e00dec9461d3ecfa2057cfda1dab31e (diff) | |
Regex: Add support for \h and \H "horizontal blank" character classes
Diffstat (limited to 'src')
| -rw-r--r-- | src/regex_impl.cc | 36 |
1 files changed, 26 insertions, 10 deletions
diff --git a/src/regex_impl.cc b/src/regex_impl.cc index d3ca59d9..868b319b 100644 --- a/src/regex_impl.cc +++ b/src/regex_impl.cc @@ -222,9 +222,9 @@ private: { auto matcher_id = m_parsed_regex.matchers.size(); m_parsed_regex.matchers.push_back( - [ctype = wctype(character_class.ctype), + [ctype = character_class.ctype ? wctype(character_class.ctype) : (wctype_t)0, chars = character_class.additional_chars] (Codepoint cp) { - return iswctype(cp, ctype) or contains(chars, cp); + return (ctype != 0 and iswctype(cp, ctype)) or contains(chars, cp); }); return new_node(ParsedRegex::Matcher, matcher_id); } @@ -255,6 +255,7 @@ private: struct CharRange { Codepoint min, max; }; Vector<CharRange> ranges; + Vector<Codepoint> excluded; Vector<std::pair<wctype_t, bool>> ctypes; while (m_pos != m_regex.end() and *m_pos != ']') { @@ -274,9 +275,15 @@ private: [cp = *m_pos](auto& t) { return t.cp == cp; }); if (it != std::end(character_class_escapes)) { - ctypes.push_back({wctype(it->ctype), not it->neg}); - for (auto& c : it->additional_chars) - ranges.push_back({(Codepoint)c, (Codepoint)c}); + if (it->ctype) + ctypes.push_back({wctype(it->ctype), not it->neg}); + for (auto& c : it->additional_chars) // TODO: handle negative case + { + if (it->neg) + excluded.push_back((Codepoint)c); + else + ranges.push_back({(Codepoint)c, (Codepoint)c}); + } ++m_pos; continue; } @@ -306,12 +313,13 @@ private: ++m_pos; auto matcher = [ranges = std::move(ranges), - ctypes = std::move(ctypes), negative] (Codepoint cp) { + ctypes = std::move(ctypes), + excluded = std::move(excluded), negative] (Codepoint cp) { auto found = contains_that(ranges, [cp](auto& r) { return r.min <= cp and cp <= r.max; }) or contains_that(ctypes, [cp](auto& c) { return (bool)iswctype(cp, c.first) == c.second; - }); + }) or (not excluded.empty() and not contains(excluded, cp)); return negative ? not found : found; }; @@ -390,17 +398,19 @@ private: bool neg; }; - static const CharacterClassEscape character_class_escapes[6]; + static const CharacterClassEscape character_class_escapes[8]; }; // For some reason Gcc fails to link if this is constexpr -const RegexParser::CharacterClassEscape RegexParser::character_class_escapes[6] = { +const RegexParser::CharacterClassEscape RegexParser::character_class_escapes[8] = { { 'd', "digit", "", false }, { 'D', "digit", "", true }, { 'w', "alnum", "_", false }, { 'W', "alnum", "_", true }, { 's', "space", "", false }, - { 's', "space", "", true }, + { 'S', "space", "", true }, + { 'h', nullptr, " \t", false }, + { 'H', nullptr, " \t", true }, }; struct CompiledRegex @@ -983,6 +993,12 @@ auto test_regex = UnitTest{[]{ } { + TestVM vm{R"([ \H]+)"}; + kak_assert(vm.exec("abc ")); + kak_assert(not vm.exec("a \t")); + } + + { TestVM vm{R"(\Q{}[]*+?\Ea+)"}; kak_assert(vm.exec("{}[]*+?aa")); } |
