summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMaxime Coste <mawww@kakoune.org>2017-10-10 11:21:21 +0800
committerMaxime Coste <mawww@kakoune.org>2017-11-01 14:05:14 +0800
commit5bf4be645a10f9cbabf2ed7ed5d96fdfdf1ab839 (patch)
tree29a580ebeb39d5c92b785ac3aecee28dec139f09 /src
parent80f6caee8149e50aa4870c0c6d8241b86113d405 (diff)
Regex: Fix support for ignore case in lookarounds
Diffstat (limited to 'src')
-rw-r--r--src/regex_impl.cc66
-rw-r--r--src/regex_impl.hh31
2 files changed, 74 insertions, 23 deletions
diff --git a/src/regex_impl.cc b/src/regex_impl.cc
index 847044b7..bb1f3537 100644
--- a/src/regex_impl.cc
+++ b/src/regex_impl.cc
@@ -541,6 +541,7 @@ private:
uint32_t compile_node_inner(const ParsedRegex::AstNodePtr& node)
{
const auto start_pos = m_program.instructions.size();
+ const bool ignore_case = node->ignore_case;
const Codepoint capture = (node->op == ParsedRegex::Alternation or node->op == ParsedRegex::Sequence) ? node->value : -1;
if (capture != -1 and (capture == 0 or not (m_flags & RegexCompileFlags::NoSubs)))
@@ -550,8 +551,8 @@ private:
switch (node->op)
{
case ParsedRegex::Literal:
- if (node->ignore_case)
- push_inst(CompiledRegex::LiteralIgnoreCase, to_lower(node->value));
+ if (ignore_case)
+ push_inst(CompiledRegex::Literal_IgnoreCase, to_lower(node->value));
else
push_inst(CompiledRegex::Literal, node->value);
break;
@@ -594,24 +595,32 @@ private:
break;
}
case ParsedRegex::LookAhead:
- push_inst(m_forward ? CompiledRegex::LookAhead
- : CompiledRegex::LookBehind,
- push_lookaround(node->children, false));
+ push_inst(m_forward ? (ignore_case ? CompiledRegex::LookAhead_IgnoreCase
+ : CompiledRegex::LookAhead)
+ : (ignore_case ? CompiledRegex::LookBehind_IgnoreCase
+ : CompiledRegex::LookBehind),
+ push_lookaround(node->children, false, ignore_case));
break;
case ParsedRegex::NegativeLookAhead:
- push_inst(m_forward ? CompiledRegex::NegativeLookAhead
- : CompiledRegex::NegativeLookBehind,
- push_lookaround(node->children, false));
+ push_inst(m_forward ? (ignore_case ? CompiledRegex::NegativeLookAhead_IgnoreCase
+ : CompiledRegex::NegativeLookAhead)
+ : (ignore_case ? CompiledRegex::NegativeLookBehind_IgnoreCase
+ : CompiledRegex::NegativeLookBehind),
+ push_lookaround(node->children, false, ignore_case));
break;
case ParsedRegex::LookBehind:
- push_inst(m_forward ? CompiledRegex::LookBehind
- : CompiledRegex::LookAhead,
- push_lookaround(node->children, true));
+ push_inst(m_forward ? (ignore_case ? CompiledRegex::LookBehind_IgnoreCase
+ : CompiledRegex::LookBehind)
+ : (ignore_case ? CompiledRegex::LookAhead_IgnoreCase
+ : CompiledRegex::LookAhead),
+ push_lookaround(node->children, true, ignore_case));
break;
case ParsedRegex::NegativeLookBehind:
- push_inst(m_forward ? CompiledRegex::NegativeLookBehind
- : CompiledRegex::NegativeLookAhead,
- push_lookaround(node->children, true));
+ push_inst(m_forward ? (ignore_case ? CompiledRegex::NegativeLookBehind_IgnoreCase
+ : CompiledRegex::NegativeLookBehind)
+ : (ignore_case ? CompiledRegex::NegativeLookAhead_IgnoreCase
+ : CompiledRegex::NegativeLookAhead),
+ push_lookaround(node->children, true, ignore_case));
break;
case ParsedRegex::LineStart:
push_inst(m_forward ? CompiledRegex::LineStart
@@ -698,14 +707,16 @@ private:
return res;
}
- uint32_t push_lookaround(const Vector<ParsedRegex::AstNodePtr>& characters, bool reversed = false)
+ uint32_t push_lookaround(const Vector<ParsedRegex::AstNodePtr>& characters,
+ bool reversed, bool ignore_case)
{
uint32_t res = m_program.lookarounds.size();
- auto write_lookaround = [this](auto&& characters) {
+ auto write_lookaround = [this, ignore_case](auto&& characters) {
for (auto& character : characters)
{
if (character->op == ParsedRegex::Literal)
- m_program.lookarounds.push_back(character->value);
+ m_program.lookarounds.push_back(ignore_case ? to_lower(character->value)
+ : character->value);
else if (character->op == ParsedRegex::AnyChar)
m_program.lookarounds.push_back(0xF000);
else if (character->op == ParsedRegex::Matcher)
@@ -841,7 +852,7 @@ void dump_regex(const CompiledRegex& program)
case CompiledRegex::Literal:
printf("literal %lc\n", inst.param);
break;
- case CompiledRegex::LiteralIgnoreCase:
+ case CompiledRegex::Literal_IgnoreCase:
printf("literal (ignore case) %lc\n", inst.param);
break;
case CompiledRegex::AnyChar:
@@ -886,6 +897,10 @@ void dump_regex(const CompiledRegex& program)
case CompiledRegex::NegativeLookAhead:
case CompiledRegex::LookBehind:
case CompiledRegex::NegativeLookBehind:
+ case CompiledRegex::LookAhead_IgnoreCase:
+ case CompiledRegex::NegativeLookAhead_IgnoreCase:
+ case CompiledRegex::LookBehind_IgnoreCase:
+ case CompiledRegex::NegativeLookBehind_IgnoreCase:
{
const char* name = nullptr;
if (inst.op == CompiledRegex::LookAhead)
@@ -897,6 +912,15 @@ void dump_regex(const CompiledRegex& program)
if (inst.op == CompiledRegex::NegativeLookBehind)
name = "negative look behind";
+ if (inst.op == CompiledRegex::LookAhead_IgnoreCase)
+ name = "look ahead (ignore case)";
+ if (inst.op == CompiledRegex::NegativeLookAhead_IgnoreCase)
+ name = "negative look ahead (ignore case)";
+ if (inst.op == CompiledRegex::LookBehind_IgnoreCase)
+ name = "look behind (ignore case)";
+ if (inst.op == CompiledRegex::NegativeLookBehind_IgnoreCase)
+ name = "negative look behind (ignore case)";
+
String str;
for (auto it = program.lookarounds.begin() + inst.param; *it != -1; ++it)
utf8::dump(std::back_inserter(str), *it);
@@ -1183,6 +1207,12 @@ auto test_regex = UnitTest{[]{
TestVM<> vm{R"((?=))"};
kak_assert(vm.exec(""));
}
+
+ {
+ TestVM<> vm{R"((?i)(?=Foo))"};
+ kak_assert(vm.exec("fOO", RegexExecFlags::Search));
+ kak_assert(*vm.captures()[0] == 'f');
+ }
}};
}
diff --git a/src/regex_impl.hh b/src/regex_impl.hh
index 2d8da322..9aa736d6 100644
--- a/src/regex_impl.hh
+++ b/src/regex_impl.hh
@@ -29,7 +29,7 @@ struct CompiledRegex : RefCountable
{
Match,
Literal,
- LiteralIgnoreCase,
+ Literal_IgnoreCase,
AnyChar,
Matcher,
Jump,
@@ -46,6 +46,10 @@ struct CompiledRegex : RefCountable
NegativeLookAhead,
LookBehind,
NegativeLookBehind,
+ LookAhead_IgnoreCase,
+ NegativeLookAhead_IgnoreCase,
+ LookBehind_IgnoreCase,
+ NegativeLookBehind_IgnoreCase,
};
struct Instruction
@@ -240,7 +244,7 @@ private:
if (pos != m_end and inst.param == *pos)
return StepResult::Consumed;
return StepResult::Failed;
- case CompiledRegex::LiteralIgnoreCase:
+ case CompiledRegex::Literal_IgnoreCase:
if (pos != m_end and inst.param == to_lower(*pos))
return StepResult::Consumed;
return StepResult::Failed;
@@ -307,12 +311,26 @@ private:
break;
case CompiledRegex::LookAhead:
case CompiledRegex::NegativeLookAhead:
- if (lookaround<MatchDirection::Forward>(inst.param, pos) != (inst.op == CompiledRegex::LookAhead))
+ if (lookaround<MatchDirection::Forward, false>(inst.param, pos) !=
+ (inst.op == CompiledRegex::LookAhead))
+ return StepResult::Failed;
+ break;
+ case CompiledRegex::LookAhead_IgnoreCase:
+ case CompiledRegex::NegativeLookAhead_IgnoreCase:
+ if (lookaround<MatchDirection::Forward, true>(inst.param, pos) !=
+ (inst.op == CompiledRegex::LookAhead_IgnoreCase))
return StepResult::Failed;
break;
case CompiledRegex::LookBehind:
case CompiledRegex::NegativeLookBehind:
- if (lookaround<MatchDirection::Backward>(inst.param, pos) != (inst.op == CompiledRegex::LookBehind))
+ if (lookaround<MatchDirection::Backward, false>(inst.param, pos) !=
+ (inst.op == CompiledRegex::LookBehind))
+ return StepResult::Failed;
+ break;
+ case CompiledRegex::LookBehind_IgnoreCase:
+ case CompiledRegex::NegativeLookBehind_IgnoreCase:
+ if (lookaround<MatchDirection::Backward, true>(inst.param, pos) !=
+ (inst.op == CompiledRegex::LookBehind_IgnoreCase))
return StepResult::Failed;
break;
case CompiledRegex::Match:
@@ -391,7 +409,7 @@ private:
++start;
}
- template<MatchDirection look_direction>
+ template<MatchDirection look_direction, bool ignore_case>
bool lookaround(uint32_t index, Utf8It pos) const
{
for (auto it = m_program.lookarounds.begin() + index; *it != -1; ++it)
@@ -399,6 +417,9 @@ private:
if (pos == (look_direction == MatchDirection::Forward ? m_end : m_begin))
return false;
auto cp = (look_direction == MatchDirection::Forward ? *pos : *(pos-1)), ref = *it;
+ if (ignore_case)
+ cp = to_lower(cp);
+
if (ref == 0xF000)
{} // any character matches
else if (ref > 0xF0000 and ref <= 0xFFFFD)