diff options
| author | Maxime Coste <mawww@kakoune.org> | 2022-08-05 20:29:43 +1000 |
|---|---|---|
| committer | Maxime Coste <mawww@kakoune.org> | 2022-08-05 20:31:39 +1000 |
| commit | ca71d8997d6144fd7bc770ff458d469a989d576c (patch) | |
| tree | 877d54e0cd206ccddbf17dbed20479959e090aa1 /src | |
| parent | 26d14d52bb922ccdf896fd1dae392f6cbbbb132e (diff) | |
Reuse existing character classes when possible in regex
Diffstat (limited to 'src')
| -rw-r--r-- | src/regex_impl.cc | 13 | ||||
| -rw-r--r-- | src/regex_impl.hh | 8 |
2 files changed, 18 insertions, 3 deletions
diff --git a/src/regex_impl.cc b/src/regex_impl.cc index 256afcc1..40701e48 100644 --- a/src/regex_impl.cc +++ b/src/regex_impl.cc @@ -119,6 +119,7 @@ struct Children Index operator*() const { return m_pos; } bool operator!=(Sentinel) const { return m_pos != m_end; } + private: Index find_prev(Index parent, Index pos) const { Index child = parent+1; @@ -544,8 +545,10 @@ private: character_class.ranges.empty()) return add_node(ParsedRegex::CharType, (Codepoint)character_class.ctypes); - auto class_id = m_parsed_regex.character_classes.size(); - m_parsed_regex.character_classes.push_back(std::move(character_class)); + auto it = std::find(m_parsed_regex.character_classes.begin(), m_parsed_regex.character_classes.end(), character_class); + auto class_id = it - m_parsed_regex.character_classes.begin(); + if (it == m_parsed_regex.character_classes.end()) + m_parsed_regex.character_classes.push_back(std::move(character_class)); return add_node(ParsedRegex::CharClass, class_id); } @@ -1537,6 +1540,12 @@ auto test_regex = UnitTest{[]{ } { + TestVM<> vm{R"([\t-\r]\h+[\t-\r])"}; + kak_assert(vm.character_classes.size() == 1); + kak_assert(vm.exec("\n \f")); + } + + { TestVM<> vm{R"([^\x00-\x7F]+)"}; kak_assert(not vm.exec("ascii")); kak_assert(vm.exec("←↑→↓")); diff --git a/src/regex_impl.hh b/src/regex_impl.hh index ca859592..73d69790 100644 --- a/src/regex_impl.hh +++ b/src/regex_impl.hh @@ -33,12 +33,18 @@ constexpr bool with_bit_ops(Meta::Type<CharacterType>) { return true; } struct CharacterClass { - struct Range { Codepoint min, max; }; + struct Range + { + Codepoint min, max; + friend bool operator==(const Range&, const Range&) = default; + }; Vector<Range, MemoryDomain::Regex> ranges; CharacterType ctypes = CharacterType::None; bool negative = false; bool ignore_case = false; + + friend bool operator==(const CharacterClass&, const CharacterClass&) = default; }; bool is_character_class(const CharacterClass& character_class, Codepoint cp); |
