summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMaxime Coste <mawww@kakoune.org>2017-10-06 19:51:41 +0800
committerMaxime Coste <mawww@kakoune.org>2017-11-01 14:05:14 +0800
commit337e58d4f9b680078bfae81ae212d74acceb9a6d (patch)
tree2554f6922c3cc754ee9912a36d7fca1347c732e6 /src
parent236751cb84e9bdbf46f114f4f456760101618c06 (diff)
Regex: Cleanup character class parsing a bit
Diffstat (limited to 'src')
-rw-r--r--src/regex_impl.cc35
1 files changed, 15 insertions, 20 deletions
diff --git a/src/regex_impl.cc b/src/regex_impl.cc
index 17241064..ae107bcb 100644
--- a/src/regex_impl.cc
+++ b/src/regex_impl.cc
@@ -162,7 +162,6 @@ private:
case 'K': m_pos += 2; return new_node(ParsedRegex::ResetStart);
}
break;
- /* TODO: look ahead, look behind */
}
return nullptr;
}
@@ -267,18 +266,17 @@ private:
}
// CharacterClassEscape
- for (auto& character_class : character_class_escapes)
+ auto class_it = find_if(character_class_escapes,
+ [cp = to_lower(cp)](auto& c) { return c.cp == cp; });
+ if (class_it != std::end(character_class_escapes))
{
- if (character_class.cp == cp)
- {
- auto matcher_id = m_parsed_regex.matchers.size();
- m_parsed_regex.matchers.push_back(
- [ctype = character_class.ctype ? wctype(character_class.ctype) : (wctype_t)0,
- chars = character_class.additional_chars, neg = character_class.neg] (Codepoint cp) {
- return ((ctype != 0 and iswctype(cp, ctype)) or contains(chars, cp)) != neg;
- });
- return new_node(ParsedRegex::Matcher, matcher_id);
- }
+ auto matcher_id = m_parsed_regex.matchers.size();
+ m_parsed_regex.matchers.push_back(
+ [ctype = class_it->ctype ? wctype(class_it->ctype) : (wctype_t)0,
+ chars = class_it->additional_chars, neg = is_upper(cp)] (Codepoint cp) {
+ return ((ctype != 0 and iswctype(cp, ctype)) or contains(chars, cp)) != neg;
+ });
+ return new_node(ParsedRegex::Matcher, matcher_id);
}
// CharacterEscape
@@ -323,14 +321,15 @@ private:
if (cp == '\\')
{
auto it = find_if(character_class_escapes,
- [cp = *m_pos](auto& t) { return t.cp == cp; });
+ [cp = to_lower(*m_pos)](auto& t) { return t.cp == cp; });
if (it != std::end(character_class_escapes))
{
+ auto negative = is_upper(*m_pos);
if (it->ctype)
- ctypes.push_back({wctype(it->ctype), not it->neg});
- for (auto& c : it->additional_chars) // TODO: handle negative case
+ ctypes.push_back({wctype(it->ctype), not negative});
+ for (auto& c : it->additional_chars)
{
- if (it->neg)
+ if (negative)
excluded.push_back((Codepoint)c);
else
ranges.push_back({(Codepoint)c, (Codepoint)c});
@@ -497,13 +496,9 @@ private:
// For some reason Gcc fails to link if this is constexpr
const RegexParser::CharacterClassEscape RegexParser::character_class_escapes[8] = {
{ 'd', "digit", "", false },
- { 'D', "digit", "", true },
{ 'w', "alnum", "_", false },
- { 'W', "alnum", "_", true },
{ 's', "space", "", false },
- { 'S', "space", "", true },
{ 'h', nullptr, " \t", false },
- { 'H', nullptr, " \t", true },
};
struct RegexCompiler