summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMaxime Coste <mawww@kakoune.org>2024-06-12 19:55:34 +1000
committerMaxime Coste <mawww@kakoune.org>2024-06-12 19:55:34 +1000
commitfe8f0f3371f535602dbcd641e78dbe18f449bc87 (patch)
tree5ec3c36ec8983b2a034c288fa56ed2c018ad8e66 /src
parent5a6fb51bdb43f15eafdbe1078183f754b4fecdf6 (diff)
parentfaf83b10e25fe237c9e780d09386420c98110a5e (diff)
Merge remote-tracking branch 'Icantjuddle/master'
Diffstat (limited to 'src')
-rw-r--r--src/string.cc70
-rw-r--r--src/string.hh43
2 files changed, 81 insertions, 32 deletions
diff --git a/src/string.cc b/src/string.cc
index fe763165..a672433b 100644
--- a/src/string.cc
+++ b/src/string.cc
@@ -1,22 +1,27 @@
#include "string.hh"
-#include <cstdio>
#include <cstring>
+#include "assert.hh"
+#include "unit_tests.hh"
namespace Kakoune
{
+namespace
+{
+// Avoid including all of <algorithm> just for this.
+constexpr auto max(auto lhs, auto rhs) { return lhs > rhs ? lhs : rhs;}
+constexpr auto min(auto lhs, auto rhs) { return lhs < rhs ? lhs : rhs;}
+}
String::Data::Data(const char* data, size_t size, size_t capacity)
{
if (capacity > Short::capacity)
{
- if (capacity & 1)
- ++capacity;
-
- kak_assert(capacity < Long::max_capacity);
+ kak_assert(capacity <= Long::max_capacity);
u.l.ptr = Alloc{}.allocate(capacity+1);
u.l.size = size;
- u.l.capacity = capacity;
+ u.l.capacity = (capacity & Long::max_capacity);
+ u.l.mode = Long::active_mask;
if (data != nullptr)
memcpy(u.l.ptr, data, size);
@@ -60,26 +65,28 @@ String::Data& String::Data::operator=(Data&& other) noexcept
template<bool copy>
void String::Data::reserve(size_t new_capacity)
{
- if (capacity() != 0 and new_capacity <= capacity())
+ auto const current_capacity = capacity();
+ if (current_capacity != 0 and new_capacity <= current_capacity)
return;
- if (is_long())
- new_capacity = std::max(u.l.capacity * 2, new_capacity);
+ if (!is_long() and new_capacity <= Short::capacity)
+ return;
- if (new_capacity & 1)
- ++new_capacity;
+ kak_assert(new_capacity <= Long::max_capacity);
+ new_capacity = max(new_capacity, // Do not upgrade new_capacity to be over limit.
+ min(current_capacity * 2, Long::max_capacity));
- kak_assert(new_capacity < Long::max_capacity);
char* new_ptr = Alloc{}.allocate(new_capacity+1);
if (copy)
{
memcpy(new_ptr, data(), size()+1);
- u.l.size = size();
}
release();
+ u.l.size = size();
u.l.ptr = new_ptr;
- u.l.capacity = new_capacity;
+ u.l.capacity = (new_capacity & Long::max_capacity);
+ u.l.mode = Long::active_mask;
}
template void String::Data::reserve<true>(size_t);
@@ -89,6 +96,7 @@ void String::Data::force_size(size_t new_size)
{
reserve<false>(new_size);
set_size(new_size);
+ data()[new_size] = 0;
}
void String::Data::append(const char* str, size_t len)
@@ -131,17 +139,47 @@ void String::Data::set_size(size_t size)
if (is_long())
u.l.size = size;
else
- u.s.size = (size << 1) | 1;
+ u.s.remaining_size = Short::capacity - size;
}
void String::Data::set_short(const char* data, size_t size)
{
- u.s.size = (size << 1) | 1;
+ kak_assert(size <= Short::capacity);
+ u.s.remaining_size = Short::capacity - size;
if (data != nullptr)
memcpy(u.s.string, data, size);
u.s.string[size] = 0;
}
+UnitTest test_data{[]{
+ using Data = String::Data;
+ { // Basic data usage.
+ Data data;
+ kak_assert(data.size() == 0);
+ kak_assert(not data.is_long());
+ kak_assert(data.capacity() == 23);
+
+ // Should be SSO-ed.
+ data.append("test", 4);
+ kak_assert(data.size() == 4);
+ kak_assert(data.capacity() == 23);
+ kak_assert(not data.is_long());
+ kak_assert(data.data() == StringView("test"));
+ }
+ {
+ char large_buf[2048];
+ memset(large_buf, 'x', 2048);
+ Data data(large_buf, 2048);
+ kak_assert(data.size() == 2048);
+ kak_assert(data.capacity() >= 2048);
+
+ data.clear();
+ kak_assert(data.size() == 0);
+ kak_assert(not data.is_long());
+ kak_assert(data.capacity() == 23);
+ }
+}};
+
const String String::ms_empty;
}
diff --git a/src/string.hh b/src/string.hh
index 004be04c..857a1b19 100644
--- a/src/string.hh
+++ b/src/string.hh
@@ -1,12 +1,13 @@
#ifndef string_hh_INCLUDED
#define string_hh_INCLUDED
+#include <climits>
+#include <cstddef>
#include "memory.hh"
#include "hash.hh"
#include "units.hh"
#include "utf8.hh"
-#include <climits>
namespace Kakoune
{
@@ -156,17 +157,22 @@ public:
// String data storage using small string optimization.
//
- // the LSB of the last byte is used to flag if we are using the small buffer
- // or an allocated one. On big endian systems that means the allocated
- // capacity must be pair, on little endian systems that means the allocated
- // capacity cannot use its most significant byte, so we effectively limit
- // capacity to 2^24 on 32bit arch, and 2^60 on 64.
+ // The MSB of the last byte is used to flag if we are using the allocated buffer
+ // (1) or in-situ storage, the small one (0). That means the allocated capacity
+ // cannot use its most significant byte, so we effectively limit capacity to
+ // 2^24 on 32bit arch, and 2^56 on 64bit.
+ //
+ // There is also a special NoCopy mode in which the data referred to is un-owned.
+ // It is indicated by being in Long mode with capacity == 0.
struct Data
{
using Alloc = Allocator<char, MemoryDomain::String>;
Data() { set_empty(); }
- Data(NoCopy, const char* data, size_t size) : u{Long{const_cast<char*>(data), size, 0}} {}
+ Data(NoCopy, const char* data, size_t size) : u{Long{const_cast<char*>(data),
+ size,
+ /*capacity=*/0,
+ /*mode=*/Long::active_mask}} {}
Data(const char* data, size_t size, size_t capacity);
Data(const char* data, size_t size) : Data(data, size, size) {}
@@ -177,8 +183,8 @@ public:
Data& operator=(const Data& other);
Data& operator=(Data&& other) noexcept;
- bool is_long() const { return (u.s.size & 1) == 0; }
- size_t size() const { return is_long() ? u.l.size : (u.s.size >> 1); }
+ bool is_long() const { return (u.l.mode& Long::active_mask) > 0; }
+ size_t size() const { return is_long() ? u.l.size : (Short::capacity - u.s.remaining_size); }
size_t capacity() const { return is_long() ? u.l.capacity : Short::capacity; }
const char* data() const { return is_long() ? u.l.ptr : u.s.string; }
@@ -195,18 +201,23 @@ public:
struct Long
{
static constexpr size_t max_capacity =
- (size_t)1 << 8 * (sizeof(size_t) - 1);
+ ((size_t)1 << (CHAR_BIT * (sizeof(size_t) - 1))) - 1;
char* ptr;
size_t size;
- size_t capacity;
+ size_t capacity: (sizeof(size_t) - 1) *CHAR_BIT;
+ unsigned char mode;
+ static constexpr unsigned char active_mask = 0b1000'0000;
};
struct Short
{
- static constexpr size_t capacity = sizeof(Long) - 2;
- char string[capacity+1];
- unsigned char size;
+ static constexpr size_t capacity = sizeof(Long) - 1;
+ char string[capacity];
+ // When string is full remaining_size will be 0 and be the null terminator.
+ // When string is empty remaining size will be 23 (0b00010111)
+ // and not collide with Long::active_mask.
+ unsigned char remaining_size;
};
union
@@ -217,11 +228,11 @@ public:
void release()
{
- if (is_long() and u.l.capacity != 0)
+ if (is_long() and (u.l.capacity != 0))
Alloc{}.deallocate(u.l.ptr, u.l.capacity+1);
}
- void set_empty() { u.s.size = 1; u.s.string[0] = 0; }
+ void set_empty() { u.s.remaining_size = Short::capacity; u.s.string[0] = '\0'; }
void set_short(const char* data, size_t size);
};