summaryrefslogtreecommitdiff
path: root/src/regex_impl.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/regex_impl.cc')
-rw-r--r--src/regex_impl.cc45
1 files changed, 39 insertions, 6 deletions
diff --git a/src/regex_impl.cc b/src/regex_impl.cc
index dd900125..8a46d833 100644
--- a/src/regex_impl.cc
+++ b/src/regex_impl.cc
@@ -505,6 +505,18 @@ private:
parse_error("unclosed character class");
++m_pos;
+ if (not character_class.ignore_case)
+ {
+ bool could_ignore_case = true;
+ for (const auto& [min, max] : character_class.ranges)
+ {
+ if (not contains(character_class.ranges, CharacterClass::Range{to_lower(min), to_lower(max)}) or
+ not contains(character_class.ranges, CharacterClass::Range{to_upper(min), to_upper(max)}))
+ could_ignore_case = false;
+ }
+ character_class.ignore_case = could_ignore_case;
+ }
+
if (character_class.ignore_case)
{
for (auto& range : character_class.ranges)
@@ -521,7 +533,7 @@ private:
if (character_class.ctypes == CharacterType::None and not character_class.negative and
character_class.ranges.size() == 1 and
character_class.ranges.front().min == character_class.ranges.front().max)
- return add_node(ParsedRegex::Literal, character_class.ranges.front().min);
+ return add_node(ParsedRegex::Literal, character_class.ranges.front().min, {1,1}, character_class.ignore_case);
if (character_class.ctypes != CharacterType::None and not character_class.negative and
character_class.ranges.empty())
@@ -585,14 +597,14 @@ private:
}
}
- NodeIndex add_node(ParsedRegex::Op op, Codepoint value = -1, ParsedRegex::Quantifier quantifier = {1, 1})
+ NodeIndex add_node(ParsedRegex::Op op, Codepoint value = -1, ParsedRegex::Quantifier quantifier = {1, 1}, bool ignore_case = false)
{
constexpr auto max_nodes = std::numeric_limits<int16_t>::max();
const NodeIndex res = m_parsed_regex.nodes.size();
if (res == max_nodes)
parse_error(format("regex parsed to more than {} ast nodes", max_nodes));
const NodeIndex next = res+1;
- m_parsed_regex.nodes.push_back({op, m_flags & Flags::IgnoreCase, next, value, quantifier});
+ m_parsed_regex.nodes.push_back({op, ignore_case or (m_flags & Flags::IgnoreCase), next, value, quantifier});
return res;
}
@@ -703,6 +715,7 @@ struct RegexCompiler
private:
template<RegexMode direction>
+ [[gnu::noinline]]
OpIndex compile_node_inner(ParsedRegex::NodeIndex index)
{
auto& node = get_node(index);
@@ -729,7 +742,15 @@ private:
push_inst(CompiledRegex::AnyCharExceptNewLine);
break;
case ParsedRegex::CharClass:
- push_inst(CompiledRegex::CharClass, {.character_class_index=int16_t(node.value)});
+ if (auto& char_class = m_parsed_regex.character_classes[node.value];
+ char_class.ranges.size() == 1 and char_class.ctypes == CharacterType::None and
+ char_class.ranges[0].max <= std::numeric_limits<uint8_t>::max())
+ push_inst(CompiledRegex::CharRange, {.range={.min=uint8_t(char_class.ranges[0].min),
+ .max=uint8_t(char_class.ranges[0].max),
+ .ignore_case=char_class.ignore_case,
+ .negative=char_class.negative}});
+ else
+ push_inst(CompiledRegex::CharClass, {.character_class_index=int16_t(node.value)});
break;
case ParsedRegex::CharType:
push_inst(CompiledRegex::CharType, {.character_type=CharacterType{(unsigned char)node.value}});
@@ -1107,12 +1128,18 @@ String dump_regex(const CompiledRegex& program)
case CompiledRegex::AnyCharExceptNewLine:
res += "anything but newline\n";
break;
- case CompiledRegex::CharClass:
- res += format("character class {}\n", inst.param.character_class_index);
+ case CompiledRegex::CharRange:
+ res += format("character range {}[{}{}-{}]\n",
+ inst.param.range.ignore_case ? "(ignore case) " : "",
+ inst.param.range.negative ? "^" : "",
+ inst.param.range.min, inst.param.range.max);
break;
case CompiledRegex::CharType:
res += format("character type {}\n", to_underlying(inst.param.character_type));
break;
+ case CompiledRegex::CharClass:
+ res += format("character class {}\n", inst.param.character_class_index);
+ break;
case CompiledRegex::Jump:
res += format("jump {} ({:03})\n", inst.param.jump_offset, index + inst.param.jump_offset);
break;
@@ -1259,6 +1286,12 @@ auto test_regex = UnitTest{[]{
}
{
+ TestVM<> vm{R"([aA])"};
+ kak_assert(vm.exec("a"));
+ kak_assert(vm.exec("A"));
+ }
+
+ {
TestVM<> vm{R"(a{3,5}b)"};
kak_assert(not vm.exec("aab"));
kak_assert(vm.exec("aaab"));