diff options
| author | Maxime Coste <mawww@kakoune.org> | 2018-05-21 22:22:34 +1000 |
|---|---|---|
| committer | Maxime Coste <mawww@kakoune.org> | 2018-07-05 07:54:28 +1000 |
| commit | c829595d017eb2bddb059dd984d047819827723b (patch) | |
| tree | 50f7cf5b5cd8f14ada48d5b8b29b76ad19ca5c17 /src/command_manager.cc | |
| parent | cad5f37efd4d4178d0f6942df063074e9ab7e686 (diff) | |
Refactor command line parsing
Command line parsing now works as follow:
* Quoted strings ('...', "..." and %~...~ with '~' non nestable)
use 'doubling-up' for escaping their delimiter, if the delimiter
appears twice in a row, it is considered as part of the string and
represent one delimiter character. So 'abc''def' == "abc'def". No
other escaping takes place in those strings.
* Balanced strings (%{...}) do not support any kind of escaping, but
finds the matching closing delimiter by taking nesting into account.
So %{abc{def}} == "abc{def}".
* Non quoted words support escaping of `;` and whitespaces with `\`,
`%`, `'` and '"` can be escaped with `\` at the start of the word,
they do not need escaping (and will not be escaped) else where in
a word where they are treated literally. Any other use of '\' is a
literal '\'. So \%abc%\;\ def == "%abc%; def"
As discussed in #2046 this should make our command line syntax more
robust, provide a simple programmatic way to escape a string content
(s/<delim>/<delim><delim>/g), be well defined instead of ad-hoc
undocumented behaviour, and interact nicely with other common
escaping by avoiding escaping hell (:grep <regex> can in most case
be written with the regex unquoted).
Diffstat (limited to 'src/command_manager.cc')
| -rw-r--r-- | src/command_manager.cc | 149 |
1 files changed, 106 insertions, 43 deletions
diff --git a/src/command_manager.cc b/src/command_manager.cc index 9429b334..a4af3e62 100644 --- a/src/command_manager.cc +++ b/src/command_manager.cc @@ -11,6 +11,7 @@ #include "register_manager.hh" #include "shell_manager.hh" #include "utils.hh" +#include "unit_tests.hh" #include <algorithm> @@ -67,44 +68,41 @@ bool is_command_separator(Codepoint c) return c == ';' or c == '\n'; } -template<typename Func, typename UnescapeFunc> -String get_until_delimiter(Reader& reader, Func is_delimiter, - UnescapeFunc unescape = [](Codepoint) { return false; }) +struct QuotedResult +{ + String content; + bool terminated; +}; + +QuotedResult parse_quoted(Reader& reader, Codepoint delimiter) { auto beg = reader.pos; String str; - bool was_antislash = false; while (reader) { const Codepoint c = *reader; - if (is_delimiter(c) or (was_antislash and unescape(c))) + if (c == delimiter) { str += reader.substr_from(beg); - if (was_antislash) + ++reader; + if (reader and *reader == delimiter) { - str.back() = c; + str += String{c}; beg = reader.pos+1; } else - return str; + return {str, true}; } - was_antislash = c == '\\'; ++reader; } if (beg < reader.str.end()) str += reader.substr_from(beg); - return str; + return {str, false}; } -[[gnu::always_inline]] -inline String get_until_delimiter(Reader& reader, Codepoint c) -{ - return get_until_delimiter(reader, [c](Codepoint ch) { return c == ch; }, [](Codepoint) { return false; }); -} - -StringView get_until_closing_delimiter(Reader& reader, Codepoint opening_delimiter, - Codepoint closing_delimiter) +QuotedResult parse_quoted_balanced(Reader& reader, Codepoint opening_delimiter, + Codepoint closing_delimiter) { kak_assert(utf8::codepoint(utf8::previous(reader.pos, reader.str.begin()), reader.str.end()) == opening_delimiter); @@ -115,16 +113,43 @@ StringView get_until_closing_delimiter(Reader& reader, Codepoint opening_delimit const Codepoint c = *reader; if (c == opening_delimiter) ++level; - else if (c == closing_delimiter) + else if (c == closing_delimiter and level-- == 0) + { + auto content = reader.substr_from(start); + ++reader; + return {content.str(), true}; + } + ++reader; + } + return {reader.substr_from(start).str(), false}; +} + +String parse_unquoted(Reader& reader) +{ + auto beg = reader.pos; + String str; + bool was_antislash = false; + + while (reader) + { + const Codepoint c = *reader; + if (is_command_separator(c) or is_horizontal_blank(c)) { - if (level > 0) - --level; + str += reader.substr_from(beg); + if (was_antislash) + { + str.back() = c; + beg = reader.pos+1; + } else - break; + return str; } + was_antislash = c == '\\'; ++reader; } - return reader.substr_from(start); + if (beg < reader.str.end()) + str += reader.substr_from(beg); + return str; } Token::Type token_type(StringView type_name, bool throw_on_invalid) @@ -203,25 +228,24 @@ Token parse_percent_token(Reader& reader, bool throw_on_unterminated) if (it != std::end(matching_pairs)) { const Codepoint closing_delimiter = it->closing; - auto token = get_until_closing_delimiter(reader, opening_delimiter, - closing_delimiter); - if (throw_on_unterminated and not reader) + auto quoted = parse_quoted_balanced(reader, opening_delimiter, closing_delimiter); + if (throw_on_unterminated and not quoted.terminated) throw parse_error{format("{}:{}: unterminated string '%{}{}...{}'", coord.line, coord.column, type_name, opening_delimiter, closing_delimiter)}; - return {type, start - str_beg, coord, token.str()}; + return {type, start - str_beg, coord, std::move(quoted.content)}; } else { - String token = get_until_delimiter(reader, opening_delimiter); + auto quoted = parse_quoted(reader, opening_delimiter); - if (throw_on_unterminated and not reader) + if (throw_on_unterminated and not quoted.terminated) throw parse_error{format("{}:{}: unterminated string '%{}{}...{}'", coord.line, coord.column, type_name, opening_delimiter, opening_delimiter)}; - return {type, start - str_beg, coord, std::move(token)}; + return {type, start - str_beg, coord, std::move(quoted.content)}; } } @@ -297,20 +321,16 @@ Optional<Token> CommandParser::read_token(bool throw_on_unterminated) if (c == '"' or c == '\'') { start = (++m_reader).pos; - String token = get_until_delimiter(m_reader, c); - if (throw_on_unterminated and not m_reader) + QuotedResult quoted = parse_quoted(m_reader, c); + if (throw_on_unterminated and not quoted.terminated) throw parse_error{format("unterminated string {0}...{0}", c)}; - if (m_reader) - ++m_reader; return Token{c == '"' ? Token::Type::RawEval : Token::Type::RawQuoted, - start - line.begin(), coord, std::move(token)}; + start - line.begin(), coord, std::move(quoted.content)}; } else if (c == '%') { auto token = parse_percent_token(m_reader, throw_on_unterminated); - if (m_reader) - ++m_reader; return token; } else if (is_command_separator(*m_reader)) @@ -321,11 +341,14 @@ Optional<Token> CommandParser::read_token(bool throw_on_unterminated) } else { - String str = get_until_delimiter(m_reader, [](Codepoint c) { - return is_command_separator(c) or is_horizontal_blank(c); - }, [](Codepoint c) { return c == '%'; }); + if (c == '\\') + { + auto next = utf8::codepoint(utf8::next(m_reader.pos, m_reader.str.end()), m_reader.str.end()); + if (next == '%' or next == '\'' or next == '"') + ++m_reader; + } return Token{Token::Type::Raw, start - line.begin(), - coord, std::move(str)}; + coord, parse_unquoted(m_reader)}; } return {}; } @@ -350,7 +373,7 @@ String expand_impl(StringView str, const Context& context, { res += reader.substr_from(beg); res.back() = c; - beg = (++reader).pos; + beg = reader.pos; } } else if (c == '%') @@ -358,7 +381,7 @@ String expand_impl(StringView str, const Context& context, res += reader.substr_from(beg); res += postprocess(expand_token(parse_percent_token(reader, true), context, shell_context)); - beg = (++reader).pos; + beg = reader.pos; } else ++reader; @@ -660,4 +683,44 @@ Completions CommandManager::complete(const Context& context, return Completions{}; } +UnitTest test_command_parsing{[] +{ + auto check_quoted = [](StringView str, bool terminated, StringView content) + { + Reader reader{str}; + const Codepoint delimiter = *reader; + auto quoted = parse_quoted(++reader, delimiter); + kak_assert(quoted.terminated == terminated); + kak_assert(quoted.content == content); + }; + + check_quoted("'abc'", true, "abc"); + check_quoted("'abc''def", false, "abc'def"); + check_quoted("'abc''def'''", true, "abc'def'"); + + auto check_balanced = [](StringView str, Codepoint opening, Codepoint closing, bool terminated, StringView content) + { + Reader reader{str}; + auto quoted = parse_quoted_balanced(++reader, opening, closing); + kak_assert(quoted.terminated == terminated); + kak_assert(quoted.content == content); + }; + + check_balanced("{abc}", '{', '}', true, "abc"); + check_balanced("{abc{def}}", '{', '}', true, "abc{def}"); + check_balanced("{{abc}{def}", '{', '}', false, "{abc}{def}"); + + auto check_unquoted = [](StringView str, StringView content) + { + Reader reader{str}; + auto res = parse_unquoted(reader); + kak_assert(res == content); + }; + + check_unquoted("abc def", "abc"); + check_unquoted("abc; def", "abc"); + check_unquoted("abc\\; def", "abc;"); + check_unquoted("abc\\;\\ def", "abc; def"); +}}; + } |
