summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Judd <bjudd61@gmail.com>2024-06-11 08:33:35 -0700
committerBen Judd <bjudd61@gmail.com>2024-06-11 08:33:35 -0700
commitfaf83b10e25fe237c9e780d09386420c98110a5e (patch)
treea44e9c30cb0ee12dd5d69983fadffe46a52edb00
parent9c185249a2943bb81fcdff47c0d9423d0cc8caa6 (diff)
Switch to bitfield.
-rw-r--r--src/string.cc18
-rw-r--r--src/string.hh53
2 files changed, 34 insertions, 37 deletions
diff --git a/src/string.cc b/src/string.cc
index 7be39715..a672433b 100644
--- a/src/string.cc
+++ b/src/string.cc
@@ -1,11 +1,17 @@
#include "string.hh"
-#include <cstdio>
+#include <cstring>
#include "assert.hh"
#include "unit_tests.hh"
namespace Kakoune
{
+namespace
+{
+// Avoid including all of <algorithm> just for this.
+constexpr auto max(auto lhs, auto rhs) { return lhs > rhs ? lhs : rhs;}
+constexpr auto min(auto lhs, auto rhs) { return lhs < rhs ? lhs : rhs;}
+}
String::Data::Data(const char* data, size_t size, size_t capacity)
{
@@ -14,7 +20,8 @@ String::Data::Data(const char* data, size_t size, size_t capacity)
kak_assert(capacity <= Long::max_capacity);
u.l.ptr = Alloc{}.allocate(capacity+1);
u.l.size = size;
- u.l.set_capacity(capacity);
+ u.l.capacity = (capacity & Long::max_capacity);
+ u.l.mode = Long::active_mask;
if (data != nullptr)
memcpy(u.l.ptr, data, size);
@@ -66,8 +73,8 @@ void String::Data::reserve(size_t new_capacity)
return;
kak_assert(new_capacity <= Long::max_capacity);
- new_capacity = std::max(new_capacity, // Do not upgrade new_capacity to be over limit.
- std::min(current_capacity * 2, Long::max_capacity));
+ new_capacity = max(new_capacity, // Do not upgrade new_capacity to be over limit.
+ min(current_capacity * 2, Long::max_capacity));
char* new_ptr = Alloc{}.allocate(new_capacity+1);
if (copy)
@@ -78,7 +85,8 @@ void String::Data::reserve(size_t new_capacity)
u.l.size = size();
u.l.ptr = new_ptr;
- u.l.set_capacity(new_capacity);
+ u.l.capacity = (new_capacity & Long::max_capacity);
+ u.l.mode = Long::active_mask;
}
template void String::Data::reserve<true>(size_t);
diff --git a/src/string.hh b/src/string.hh
index b3102272..857a1b19 100644
--- a/src/string.hh
+++ b/src/string.hh
@@ -1,14 +1,13 @@
#ifndef string_hh_INCLUDED
#define string_hh_INCLUDED
-#include <bit>
-#include <cstring>
+#include <climits>
+#include <cstddef>
#include "memory.hh"
#include "hash.hh"
#include "units.hh"
#include "utf8.hh"
-#include <climits>
namespace Kakoune
{
@@ -158,17 +157,22 @@ public:
// String data storage using small string optimization.
//
- // the MSB of the last byte is used to flag if we are using the allocated buffer
- // (1) or a small one (0). On big endian systems that means the allocated
- // capacity must be pair, on little endian systems that means the allocated
- // capacity cannot use its most significant byte, so we effectively limit
- // capacity to 2^24 on 32bit arch, and 2^56 on 64.
+ // The MSB of the last byte is used to flag if we are using the allocated buffer
+ // (1) or in-situ storage, the small one (0). That means the allocated capacity
+ // cannot use its most significant byte, so we effectively limit capacity to
+ // 2^24 on 32bit arch, and 2^56 on 64bit.
+ //
+ // There is also a special NoCopy mode in which the data referred to is un-owned.
+ // It is indicated by being in Long mode with capacity == 0.
struct Data
{
using Alloc = Allocator<char, MemoryDomain::String>;
Data() { set_empty(); }
- Data(NoCopy, const char* data, size_t size) : u{Long{const_cast<char*>(data), size, {0}, Short::inactive_mask}} {}
+ Data(NoCopy, const char* data, size_t size) : u{Long{const_cast<char*>(data),
+ size,
+ /*capacity=*/0,
+ /*mode=*/Long::active_mask}} {}
Data(const char* data, size_t size, size_t capacity);
Data(const char* data, size_t size) : Data(data, size, size) {}
@@ -179,9 +183,9 @@ public:
Data& operator=(const Data& other);
Data& operator=(Data&& other) noexcept;
- bool is_long() const { return (u.s.remaining_size & Short::inactive_mask) > 0; }
+ bool is_long() const { return (u.l.mode& Long::active_mask) > 0; }
size_t size() const { return is_long() ? u.l.size : (Short::capacity - u.s.remaining_size); }
- size_t capacity() const { return is_long() ? u.l.capacity() : Short::capacity; }
+ size_t capacity() const { return is_long() ? u.l.capacity : Short::capacity; }
const char* data() const { return is_long() ? u.l.ptr : u.s.string; }
char* data() { return is_long() ? u.l.ptr : u.s.string; }
@@ -201,23 +205,9 @@ public:
char* ptr;
size_t size;
- unsigned char m_capacity[sizeof(size_t) - 1];
+ size_t capacity: (sizeof(size_t) - 1) *CHAR_BIT;
unsigned char mode;
- size_t capacity() const
- {
- size_t ret{};
- auto* dest = ((unsigned char*)&ret) +
- (std::endian::native == std::endian::big ? 1: 0);
- memcpy(dest, m_capacity, sizeof m_capacity);
- return ret;
- }
- void set_capacity(size_t cap)
- {
- auto* src = ((unsigned char*)&cap) +
- (std::endian::native == std::endian::big ? 1: 0);
- memcpy(m_capacity, src, sizeof m_capacity);
- mode = Short::inactive_mask;
- }
+ static constexpr unsigned char active_mask = 0b1000'0000;
};
struct Short
@@ -225,9 +215,9 @@ public:
static constexpr size_t capacity = sizeof(Long) - 1;
char string[capacity];
// When string is full remaining_size will be 0 and be the null terminator.
- // When string is empty remaining size will be 0b00010111 and not collide with inactive_mask.
+ // When string is empty remaining size will be 23 (0b00010111)
+ // and not collide with Long::active_mask.
unsigned char remaining_size;
- static constexpr unsigned char inactive_mask = 0b1000'0000;
};
union
@@ -238,9 +228,8 @@ public:
void release()
{
- auto const cap = u.l.capacity();
- if (is_long() and (cap != 0))
- Alloc{}.deallocate(u.l.ptr, cap+1);
+ if (is_long() and (u.l.capacity != 0))
+ Alloc{}.deallocate(u.l.ptr, u.l.capacity+1);
}
void set_empty() { u.s.remaining_size = Short::capacity; u.s.string[0] = '\0'; }