summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMaxime Coste <frrrwww@gmail.com>2012-10-08 19:12:09 +0200
committerMaxime Coste <frrrwww@gmail.com>2012-10-08 19:12:09 +0200
commit0a686573abe217b7bd38ae252f5673c300ab4adc (patch)
treea033cb6c869a6c6427c7acb42d4e4ded88713acb /src
parented592a30056f65f5c431c10efc9e11f970239cbb (diff)
utf8 awareness in selectors
Diffstat (limited to 'src')
-rw-r--r--src/selectors.cc162
-rw-r--r--src/selectors.hh12
2 files changed, 99 insertions, 75 deletions
diff --git a/src/selectors.cc b/src/selectors.cc
index 2ac1168c..bed7d23e 100644
--- a/src/selectors.cc
+++ b/src/selectors.cc
@@ -2,37 +2,42 @@
#include "string.hh"
+#include "utf8_iterator.hh"
+
#include <algorithm>
namespace Kakoune
{
+using utf8::Codepoint;
+using Utf8Iterator = utf8::utf8_iterator<BufferIterator>;
+
namespace
{
-bool is_eol(char c)
+bool is_eol(Codepoint c)
{
return c == '\n';
}
-bool is_blank(char c)
+bool is_blank(Codepoint c)
{
return c == ' ' or c == '\t';
}
template<bool punctuation_is_word = false>
-bool is_word(char c)
+bool is_word(Codepoint c)
{
return Kakoune::is_word(c);
}
template<>
-bool is_word<true>(char c)
+bool is_word<true>(Codepoint c)
{
return !is_blank(c) and !is_eol(c);
}
-static bool is_punctuation(char c)
+static bool is_punctuation(Codepoint c)
{
return not (is_word(c) or is_blank(c) or is_eol(c));
}
@@ -46,7 +51,7 @@ enum class CharCategories
};
template<bool punctuation_is_word = false>
-CharCategories categorize(char c)
+CharCategories categorize(Codepoint c)
{
if (is_word(c))
return CharCategories::Word;
@@ -58,20 +63,31 @@ CharCategories categorize(char c)
: CharCategories::Punctuation;
}
-template<typename T>
-bool skip_while(BufferIterator& it, T condition)
+bool is_begin(const BufferIterator& it) { return it.is_begin(); }
+bool is_end(const BufferIterator& it) { return it.is_end(); }
+
+bool is_begin(const Utf8Iterator& it) { return it.underlying_iterator().is_begin(); }
+bool is_end(const Utf8Iterator& it) { return it.underlying_iterator().is_end(); }
+
+template<typename Iterator, typename T>
+bool skip_while(Iterator& it, T condition)
{
- while (not it.is_end() and condition(*it))
+ while (not is_end(it) and condition(*it))
++it;
- return not it.is_end() and condition(*it);
+ return not is_end(it) and condition(*it);
}
-template<typename T>
-bool skip_while_reverse(BufferIterator& it, T condition)
+template<typename Iterator, typename T>
+bool skip_while_reverse(Iterator& it, T condition)
{
- while (not it.is_begin() and condition(*it))
+ while (not is_begin(it) and condition(*it))
--it;
- return not it.is_end() and condition(*it);
+ return not is_end(it) and condition(*it);
+}
+
+Selection utf8_selection(const Utf8Iterator& first, const Utf8Iterator& last)
+{
+ return Selection(first.underlying_iterator(), last.underlying_iterator());
}
}
@@ -81,13 +97,13 @@ typedef boost::regex_iterator<BufferIterator> RegexIterator;
template<bool punctuation_is_word>
SelectionAndCaptures select_to_next_word(const Selection& selection)
{
- BufferIterator begin = selection.last();
+ Utf8Iterator begin = selection.last();
if (categorize<punctuation_is_word>(*begin) !=
categorize<punctuation_is_word>(*(begin+1)))
++begin;
skip_while(begin, is_eol);
- BufferIterator end = begin+1;
+ Utf8Iterator end = begin+1;
if (not punctuation_is_word and is_punctuation(*begin))
skip_while(end, is_punctuation);
@@ -96,7 +112,7 @@ SelectionAndCaptures select_to_next_word(const Selection& selection)
bool with_end = skip_while(end, is_blank);
- return Selection(begin, with_end ? end : end-1);
+ return utf8_selection(begin, with_end ? end : end-1);
}
template SelectionAndCaptures select_to_next_word<false>(const Selection&);
template SelectionAndCaptures select_to_next_word<true>(const Selection&);
@@ -104,13 +120,13 @@ template SelectionAndCaptures select_to_next_word<true>(const Selection&);
template<bool punctuation_is_word>
SelectionAndCaptures select_to_next_word_end(const Selection& selection)
{
- BufferIterator begin = selection.last();
+ Utf8Iterator begin = selection.last();
if (categorize<punctuation_is_word>(*begin) !=
categorize<punctuation_is_word>(*(begin+1)))
++begin;
skip_while(begin, is_eol);
- BufferIterator end = begin;
+ Utf8Iterator end = begin;
skip_while(end, is_blank);
bool with_end = false;
@@ -119,7 +135,7 @@ SelectionAndCaptures select_to_next_word_end(const Selection& selection)
else if (is_word<punctuation_is_word>(*end))
with_end = skip_while(end, is_word<punctuation_is_word>);
- return Selection(begin, with_end ? end : end-1);
+ return utf8_selection(begin, with_end ? end : end-1);
}
template SelectionAndCaptures select_to_next_word_end<false>(const Selection&);
template SelectionAndCaptures select_to_next_word_end<true>(const Selection&);
@@ -127,14 +143,14 @@ template SelectionAndCaptures select_to_next_word_end<true>(const Selection&);
template<bool punctuation_is_word>
SelectionAndCaptures select_to_previous_word(const Selection& selection)
{
- BufferIterator begin = selection.last();
+ Utf8Iterator begin = selection.last();
if (categorize<punctuation_is_word>(*begin) !=
categorize<punctuation_is_word>(*(begin-1)))
--begin;
skip_while_reverse(begin, is_eol);
- BufferIterator end = begin;
+ Utf8Iterator end = begin;
skip_while_reverse(end, is_blank);
bool with_end = false;
@@ -143,30 +159,30 @@ SelectionAndCaptures select_to_previous_word(const Selection& selection)
else if (is_word<punctuation_is_word>(*end))
with_end = skip_while_reverse(end, is_word<punctuation_is_word>);
- return Selection(begin, with_end ? end : end+1);
+ return utf8_selection(begin, with_end ? end : end+1);
}
template SelectionAndCaptures select_to_previous_word<false>(const Selection&);
template SelectionAndCaptures select_to_previous_word<true>(const Selection&);
SelectionAndCaptures select_line(const Selection& selection)
{
- BufferIterator first = selection.last();
- if (*first == '\n' and not (first + 1).is_end())
+ Utf8Iterator first = selection.last();
+ if (*first == '\n' and not is_end(first + 1))
++first;
- while (not first.is_begin() and *(first - 1) != '\n')
+ while (not is_begin(first) and *(first - 1) != '\n')
--first;
- BufferIterator last = first;
- while (not (last + 1).is_end() and *last != '\n')
+ Utf8Iterator last = first;
+ while (not is_end(last + 1) and *last != '\n')
++last;
- return Selection(first, last);
+ return utf8_selection(first, last);
}
SelectionAndCaptures select_matching(const Selection& selection)
{
std::vector<char> matching_pairs = { '(', ')', '{', '}', '[', ']', '<', '>' };
- BufferIterator it = selection.last();
+ Utf8Iterator it = selection.last();
std::vector<char>::iterator match = matching_pairs.end();
while (not is_eol(*it))
{
@@ -178,19 +194,19 @@ SelectionAndCaptures select_matching(const Selection& selection)
if (match == matching_pairs.end())
return selection;
- BufferIterator begin = it;
+ Utf8Iterator begin = it;
if (((match - matching_pairs.begin()) % 2) == 0)
{
int level = 0;
const char opening = *match;
const char closing = *(match+1);
- while (not it.is_end())
+ while (not is_end(it))
{
if (*it == opening)
++level;
else if (*it == closing and --level == 0)
- return Selection(begin, it);
+ return utf8_selection(begin, it);
++it;
}
@@ -200,12 +216,12 @@ SelectionAndCaptures select_matching(const Selection& selection)
int level = 0;
const char opening = *(match-1);
const char closing = *match;
- while (not it.is_begin())
+ while (not is_begin(it))
{
if (*it == closing)
++level;
else if (*it == opening and --level == 0)
- return Selection(begin, it);
+ return utf8_selection(begin, it);
--it;
}
}
@@ -213,12 +229,12 @@ SelectionAndCaptures select_matching(const Selection& selection)
}
SelectionAndCaptures select_surrounding(const Selection& selection,
- const std::pair<char, char>& matching,
- bool inside)
+ const CodepointPair& matching,
+ bool inside)
{
int level = 0;
- BufferIterator first = selection.last();
- while (not first.is_begin())
+ Utf8Iterator first = selection.last();
+ while (not is_begin(first))
{
if (first != selection.last() and *first == matching.second)
++level;
@@ -235,8 +251,8 @@ SelectionAndCaptures select_surrounding(const Selection& selection,
return selection;
level = 0;
- BufferIterator last = first + 1;
- while (not last.is_end())
+ Utf8Iterator last = first + 1;
+ while (not is_end(last))
{
if (*last == matching.first)
++level;
@@ -258,64 +274,64 @@ SelectionAndCaptures select_surrounding(const Selection& selection,
if (first != last)
--last;
}
- return Selection(first, last);
+ return utf8_selection(first, last);
}
SelectionAndCaptures select_to(const Selection& selection,
- char c, int count, bool inclusive)
+ Codepoint c, int count, bool inclusive)
{
- BufferIterator begin = selection.last();
- BufferIterator end = begin;
+ Utf8Iterator begin = selection.last();
+ Utf8Iterator end = begin;
do
{
++end;
- skip_while(end, [c](char cur) { return not is_eol(cur) and cur != c; });
- if (end.is_end() or is_eol(*end))
+ skip_while(end, [c](Codepoint cur) { return not is_eol(cur) and cur != c; });
+ if (is_end(end) or is_eol(*end))
return selection;
}
while (--count > 0);
- return Selection(begin, inclusive ? end : end-1);
+ return utf8_selection(begin, inclusive ? end : end-1);
}
SelectionAndCaptures select_to_reverse(const Selection& selection,
- char c, int count, bool inclusive)
+ Codepoint c, int count, bool inclusive)
{
- BufferIterator begin = selection.last();
- BufferIterator end = begin;
+ Utf8Iterator begin = selection.last();
+ Utf8Iterator end = begin;
do
{
--end;
- skip_while_reverse(end, [c](char cur) { return not is_eol(cur) and cur != c; });
- if (end.is_begin() or is_eol(*end))
+ skip_while_reverse(end, [c](Codepoint cur) { return not is_eol(cur) and cur != c; });
+ if (is_begin(end) or is_eol(*end))
return selection;
}
while (--count > 0);
- return Selection(begin, inclusive ? end : end+1);
+ return utf8_selection(begin, inclusive ? end : end+1);
}
SelectionAndCaptures select_to_eol(const Selection& selection)
{
- BufferIterator begin = selection.last();
- BufferIterator end = begin + 1;
- skip_while(end, [](char cur) { return not is_eol(cur); });
- return Selection(begin, end-1);
+ Utf8Iterator begin = selection.last();
+ Utf8Iterator end = begin + 1;
+ skip_while(end, [](Codepoint cur) { return not is_eol(cur); });
+ return utf8_selection(begin, end-1);
}
SelectionAndCaptures select_to_eol_reverse(const Selection& selection)
{
- BufferIterator begin = selection.last();
- BufferIterator end = begin - 1;
- skip_while_reverse(end, [](char cur) { return not is_eol(cur); });
- return Selection(begin, end.is_begin() ? end : end+1);
+ Utf8Iterator begin = selection.last();
+ Utf8Iterator end = begin - 1;
+ skip_while_reverse(end, [](Codepoint cur) { return not is_eol(cur); });
+ return utf8_selection(begin, is_begin(end) ? end : end+1);
}
template<bool punctuation_is_word>
SelectionAndCaptures select_whole_word(const Selection& selection, bool inner)
{
- BufferIterator first = selection.last();
- BufferIterator last = first;
+ Utf8Iterator first = selection.last();
+ Utf8Iterator last = first;
if (is_word(*first))
{
if (not skip_while_reverse(first, is_word<punctuation_is_word>))
@@ -334,13 +350,14 @@ SelectionAndCaptures select_whole_word(const Selection& selection, bool inner)
skip_while(last, is_word<punctuation_is_word>);
}
--last;
- return Selection(first, last);
+ return utf8_selection(first, last);
}
template SelectionAndCaptures select_whole_word<false>(const Selection&, bool);
template SelectionAndCaptures select_whole_word<true>(const Selection&, bool);
SelectionAndCaptures select_whole_lines(const Selection& selection)
{
+ // no need to be utf8 aware for is_eol as we only use \n as line seperator
BufferIterator first = selection.first();
BufferIterator last = selection.last();
BufferIterator& to_line_start = first <= last ? first : last;
@@ -352,14 +369,13 @@ SelectionAndCaptures select_whole_lines(const Selection& selection)
skip_while(to_line_end, [](char cur) { return not is_eol(cur); });
-
return Selection(first, last);
}
SelectionAndCaptures select_whole_buffer(const Selection& selection)
{
const Buffer& buffer = selection.first().buffer();
- return Selection(buffer.begin(), buffer.end()-1);
+ return Selection(buffer.begin(), utf8::previous(buffer.end()));
}
SelectionAndCaptures select_next_match(const Selection& selection,
@@ -367,6 +383,8 @@ SelectionAndCaptures select_next_match(const Selection& selection,
{
try
{
+ // regex matching do not use Utf8Iterator as boost::regex handle utf8
+ // decoding itself
BufferIterator begin = selection.last();
BufferIterator end = begin;
CaptureList captures;
@@ -374,7 +392,7 @@ SelectionAndCaptures select_next_match(const Selection& selection,
Regex ex(regex.begin(), regex.end());
boost::match_results<BufferIterator> matches;
- if (boost::regex_search(begin+1, begin.buffer().end(),
+ if (boost::regex_search(utf8::next(begin), begin.buffer().end(),
matches, ex))
{
begin = matches[0].first;
@@ -382,7 +400,7 @@ SelectionAndCaptures select_next_match(const Selection& selection,
for (auto& match : matches)
captures.push_back(String(match.first, match.second));
}
- else if (boost::regex_search(begin.buffer().begin(), begin+1,
+ else if (boost::regex_search(begin.buffer().begin(), utf8::next(begin),
matches, ex))
{
begin = matches[0].first;
@@ -396,7 +414,7 @@ SelectionAndCaptures select_next_match(const Selection& selection,
if (begin == end)
++end;
- return SelectionAndCaptures(begin, end - 1, std::move(captures));
+ return SelectionAndCaptures(begin, utf8::previous(end), std::move(captures));
}
catch (boost::regex_error& err)
{
@@ -427,7 +445,7 @@ SelectionAndCapturesList select_all_matches(const Selection& selection,
captures.push_back(String(match.first, match.second));
result.push_back(SelectionAndCaptures(begin,
- begin == end ? end : end-1,
+ begin == end ? end : utf8::previous(end),
std::move(captures)));
}
return result;
@@ -454,7 +472,7 @@ SelectionAndCapturesList split_selection(const Selection& selection,
{
BufferIterator end = (*re_it)[0].first;
- result.push_back(Selection(begin, (begin == end) ? end : end-1));
+ result.push_back(Selection(begin, (begin == end) ? end : utf8::previous(end)));
begin = (*re_it)[0].second;
}
result.push_back(Selection(begin, selection.last()));
diff --git a/src/selectors.hh b/src/selectors.hh
index b9acd08e..fc6a628a 100644
--- a/src/selectors.hh
+++ b/src/selectors.hh
@@ -2,6 +2,7 @@
#define selectors_hh_INCLUDED
#include "selection.hh"
+#include "utf8.hh"
namespace Kakoune
{
@@ -15,14 +16,19 @@ SelectionAndCaptures select_to_previous_word(const Selection& selection);
SelectionAndCaptures select_line(const Selection& selection);
SelectionAndCaptures select_matching(const Selection& selection);
+
+
+using CodepointPair = std::pair<utf8::Codepoint, utf8::Codepoint>;
SelectionAndCaptures select_surrounding(const Selection& selection,
- const std::pair<char, char>& matching,
+ const CodepointPair& matching,
bool inside);
SelectionAndCaptures select_to(const Selection& selection,
- char c, int count, bool inclusive);
+ utf8::Codepoint c,
+ int count, bool inclusive);
SelectionAndCaptures select_to_reverse(const Selection& selection,
- char c, int count, bool inclusive);
+ utf8::Codepoint c,
+ int count, bool inclusive);
SelectionAndCaptures select_to_eol(const Selection& selection);
SelectionAndCaptures select_to_eol_reverse(const Selection& selection);