Regex: Cleanup character class parsing a bit

author: Maxime Coste <mawww@kakoune.org> 2017-10-06 19:51:41 +0800
committer: Maxime Coste <mawww@kakoune.org> 2017-11-01 14:05:14 +0800
commit: 337e58d4f9b680078bfae81ae212d74acceb9a6d (patch)
tree: 2554f6922c3cc754ee9912a36d7fca1347c732e6 /src
parent: 236751cb84e9bdbf46f114f4f456760101618c06 (diff)
1 files changed, 15 insertions, 20 deletions
diff --git a/src/regex_impl.cc b/src/regex_impl.cc
index 17241064..ae107bcb 100644
--- a/src/regex_impl.cc
+++ b/src/regex_impl.cc
@@ -162,7 +162,6 @@ private:
                     case 'K': m_pos += 2; return new_node(ParsedRegex::ResetStart);
                 }
                 break;
-            /* TODO: look ahead, look behind */
         }
         return nullptr;
     }
@@ -267,18 +266,17 @@ private:
         }
 
         // CharacterClassEscape
-        for (auto& character_class : character_class_escapes)
+        auto class_it = find_if(character_class_escapes,
+                                [cp = to_lower(cp)](auto& c) { return c.cp == cp; });
+        if (class_it != std::end(character_class_escapes))
         {
-            if (character_class.cp == cp)
-            {
-                auto matcher_id = m_parsed_regex.matchers.size();
-                m_parsed_regex.matchers.push_back(
-                    [ctype = character_class.ctype ? wctype(character_class.ctype) : (wctype_t)0,
-                     chars = character_class.additional_chars, neg = character_class.neg] (Codepoint cp) {
-                        return ((ctype != 0 and iswctype(cp, ctype)) or contains(chars, cp)) != neg;
-                    });
-                return new_node(ParsedRegex::Matcher, matcher_id);
-            }
+            auto matcher_id = m_parsed_regex.matchers.size();
+            m_parsed_regex.matchers.push_back(
+                [ctype = class_it->ctype ? wctype(class_it->ctype) : (wctype_t)0,
+                 chars = class_it->additional_chars, neg = is_upper(cp)] (Codepoint cp) {
+                    return ((ctype != 0 and iswctype(cp, ctype)) or contains(chars, cp)) != neg;
+                });
+            return new_node(ParsedRegex::Matcher, matcher_id);
         }
 
         // CharacterEscape
@@ -323,14 +321,15 @@ private:
             if (cp == '\\')
             {
                 auto it = find_if(character_class_escapes,
-                                  [cp = *m_pos](auto& t) { return t.cp == cp; });
+                                  [cp = to_lower(*m_pos)](auto& t) { return t.cp == cp; });
                 if (it != std::end(character_class_escapes))
                 {
+                    auto negative = is_upper(*m_pos);
                     if (it->ctype)
-                        ctypes.push_back({wctype(it->ctype), not it->neg});
-                    for (auto& c : it->additional_chars) // TODO: handle negative case
+                        ctypes.push_back({wctype(it->ctype), not negative});
+                    for (auto& c : it->additional_chars)
                     {
-                        if (it->neg)
+                        if (negative)
                             excluded.push_back((Codepoint)c);
                         else
                             ranges.push_back({(Codepoint)c, (Codepoint)c});
@@ -497,13 +496,9 @@ private:
 // For some reason Gcc fails to link if this is constexpr
 const RegexParser::CharacterClassEscape RegexParser::character_class_escapes[8] = {
     { 'd', "digit", "", false },
-    { 'D', "digit", "", true },
     { 'w', "alnum", "_", false },
-    { 'W', "alnum", "_", true },
     { 's', "space", "", false },
-    { 'S', "space", "", true },
     { 'h', nullptr, " \t", false },
-    { 'H', nullptr, " \t", true },
 };
 
 struct RegexCompiler
author	Maxime Coste <mawww@kakoune.org>	2017-10-06 19:51:41 +0800
committer	Maxime Coste <mawww@kakoune.org>	2017-11-01 14:05:14 +0800
commit	337e58d4f9b680078bfae81ae212d74acceb9a6d (patch)
tree	2554f6922c3cc754ee9912a36d7fca1347c732e6 /src
parent	236751cb84e9bdbf46f114f4f456760101618c06 (diff)