summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMaxime Coste <mawww@kakoune.org>2017-10-13 10:44:24 +0800
committerMaxime Coste <mawww@kakoune.org>2017-11-01 14:05:14 +0800
commitb8495f0953f06caeca9f99f1e6ef157cfe358a89 (patch)
treed5e707e9c6872bb690e783eec30f031282249a50 /src
parentb0233262b8a4ab3751cf2d94a6966a6ff56461c4 (diff)
Regex: Rework parsing, treat lookarounds as assertions, and flags separately
Diffstat (limited to 'src')
-rw-r--r--src/regex_impl.cc116
1 files changed, 66 insertions, 50 deletions
diff --git a/src/regex_impl.cc b/src/regex_impl.cc
index 2a517d01..89f3b5aa 100644
--- a/src/regex_impl.cc
+++ b/src/regex_impl.cc
@@ -135,6 +135,8 @@ private:
AstNodePtr term()
{
+ while (flag()) // read all flags
+ {}
if (auto node = assertion())
return node;
if (auto node = atom())
@@ -145,6 +147,34 @@ private:
return nullptr;
}
+ bool peek(StringView expected) const
+ {
+ auto it = m_pos;
+ for (Iterator expected_it{expected.begin(), expected}; expected_it != expected.end(); ++expected_it)
+ {
+ if (it == m_regex.end() or *it++ != *expected_it)
+ return false;
+ }
+ return true;
+ }
+
+ bool flag()
+ {
+ if (peek("(?i)"))
+ {
+ m_ignore_case = true;
+ m_pos += 4;
+ return true;
+ }
+ if (peek("(?I)"))
+ {
+ m_ignore_case = false;
+ m_pos += 4;
+ return true;
+ }
+ return false;
+ }
+
AstNodePtr assertion()
{
if (at_end())
@@ -166,6 +196,34 @@ private:
case 'K': m_pos += 2; return new_node(ParsedRegex::ResetStart);
}
break;
+ case '(':
+ {
+ Optional<ParsedRegex::Op> lookaround_op;
+ constexpr struct { StringView prefix; ParsedRegex::Op op; } lookarounds[] = {
+ { "(?=", ParsedRegex::LookAhead },
+ { "(?!", ParsedRegex::NegativeLookAhead },
+ { "(?<=", ParsedRegex::LookBehind },
+ { "(?<!", ParsedRegex::NegativeLookBehind }
+ };
+ for (auto& lookaround : lookarounds)
+ {
+ if (peek(lookaround.prefix))
+ {
+ lookaround_op = lookaround.op;
+ m_pos += (int)lookaround.prefix.char_length();
+ break;
+ }
+ }
+ if (not lookaround_op)
+ return nullptr;
+
+ AstNodePtr lookaround = alternative(*lookaround_op);
+ if (at_end() or *m_pos++ != ')')
+ parse_error("unclosed parenthesis");
+
+ validate_lookaround(lookaround);
+ return lookaround;
+ }
}
return nullptr;
}
@@ -181,59 +239,17 @@ private:
case '.': ++m_pos; return new_node(ParsedRegex::AnyChar);
case '(':
{
- auto advance = [&]() {
- if (++m_pos == m_regex.end())
- parse_error("unclosed parenthesis");
- return *m_pos;
- };
-
- AstNodePtr content;
- if (advance() == '?')
+ ++m_pos;
+ bool capture = true;
+ if (peek("?:"))
{
- auto c = advance();
- if (c == ':')
- {
- ++m_pos;
- content = disjunction(-1);
- }
- else if (contains("=!<", c))
- {
- bool behind = false;
- if (c == '<')
- {
- advance();
- behind = true;
- }
-
- auto type = *m_pos++;
- if (type == '=')
- content = alternative(behind ? ParsedRegex::LookBehind
- : ParsedRegex::LookAhead);
- else if (type == '!')
- content = alternative(behind ? ParsedRegex::NegativeLookBehind
- : ParsedRegex::NegativeLookAhead);
- else
- parse_error("invalid disjunction");
-
- validate_lookaround(content);
- }
- else if (c == 'i' or c == 'I')
- {
- m_ignore_case = c == 'i';
- if (advance() != ')')
- parse_error("unclosed parenthesis");
- ++m_pos;
- return atom(); // get next atom
- }
- else
- parse_error("invalid disjunction");
+ capture = false;
+ m_pos += 2;
}
- else
- content = disjunction(m_parsed_regex.capture_count++);
- if (at_end() or *m_pos != ')')
+ AstNodePtr content = disjunction(capture ? m_parsed_regex.capture_count++ : -1);
+ if (at_end() or *m_pos++ != ')')
parse_error("unclosed parenthesis");
- ++m_pos;
return content;
}
case '\\':
@@ -473,7 +489,7 @@ private:
bool at_end() const { return m_pos == m_regex.end(); }
[[gnu::noreturn]]
- void parse_error(StringView error)
+ void parse_error(StringView error) const
{
throw regex_error(format("regex parse error: {} at '{}<<<HERE>>>{}'", error,
StringView{m_regex.begin(), m_pos.base()},