summaryrefslogtreecommitdiff
path: root/src/string.hh
diff options
context:
space:
mode:
authorBen Judd <bjudd61@gmail.com>2024-06-07 17:33:32 -0700
committerBen Judd <bjudd@nuro.ai>2024-06-07 17:48:07 -0700
commit2754e27cf2a85fd4004dd7daabe052b7df1b425a (patch)
treeccd6d85a2e21620df984ba4c787e1534ae43ad6b /src/string.hh
parentc93cb5c4d8f1c2aec21f8bdeba59dfb8f153499c (diff)
Increase SSO from 22 to 23 chars.
Diffstat (limited to 'src/string.hh')
-rw-r--r--src/string.hh51
1 files changed, 36 insertions, 15 deletions
diff --git a/src/string.hh b/src/string.hh
index 004be04c..3bbb907e 100644
--- a/src/string.hh
+++ b/src/string.hh
@@ -1,6 +1,7 @@
#ifndef string_hh_INCLUDED
#define string_hh_INCLUDED
+#include <bit>
#include "memory.hh"
#include "hash.hh"
#include "units.hh"
@@ -156,17 +157,17 @@ public:
// String data storage using small string optimization.
//
- // the LSB of the last byte is used to flag if we are using the small buffer
- // or an allocated one. On big endian systems that means the allocated
+ // the MSB of the last byte is used to flag if we are using the allocated buffer
+ // (1) or a small one (0). On big endian systems that means the allocated
// capacity must be pair, on little endian systems that means the allocated
// capacity cannot use its most significant byte, so we effectively limit
- // capacity to 2^24 on 32bit arch, and 2^60 on 64.
+ // capacity to 2^24 on 32bit arch, and 2^56 on 64.
struct Data
{
using Alloc = Allocator<char, MemoryDomain::String>;
Data() { set_empty(); }
- Data(NoCopy, const char* data, size_t size) : u{Long{const_cast<char*>(data), size, 0}} {}
+ Data(NoCopy, const char* data, size_t size) : u{Long{const_cast<char*>(data), size, {0}, Short::inactive_mask}} {}
Data(const char* data, size_t size, size_t capacity);
Data(const char* data, size_t size) : Data(data, size, size) {}
@@ -177,9 +178,9 @@ public:
Data& operator=(const Data& other);
Data& operator=(Data&& other) noexcept;
- bool is_long() const { return (u.s.size & 1) == 0; }
- size_t size() const { return is_long() ? u.l.size : (u.s.size >> 1); }
- size_t capacity() const { return is_long() ? u.l.capacity : Short::capacity; }
+ bool is_long() const { return (u.s.remaining_size & Short::inactive_mask) > 0; }
+ size_t size() const { return is_long() ? u.l.size : (Short::capacity - u.s.remaining_size); }
+ size_t capacity() const { return is_long() ? u.l.capacity() : Short::capacity; }
const char* data() const { return is_long() ? u.l.ptr : u.s.string; }
char* data() { return is_long() ? u.l.ptr : u.s.string; }
@@ -195,18 +196,37 @@ public:
struct Long
{
static constexpr size_t max_capacity =
- (size_t)1 << 8 * (sizeof(size_t) - 1);
+ ((size_t)1 << (CHAR_BIT * (sizeof(size_t) - 1))) - 1;
char* ptr;
size_t size;
- size_t capacity;
+ unsigned char m_capacity[sizeof(size_t) - 1];
+ unsigned char mode;
+ size_t capacity() const
+ {
+ size_t ret{};
+ auto* dest = ((unsigned char*)&ret) +
+ (std::endian::native == std::endian::big ? 1: 0);
+ memcpy(dest, m_capacity, sizeof m_capacity);
+ return ret;
+ }
+ void set_capacity(size_t cap)
+ {
+ auto* src = ((unsigned char*)&cap) +
+ (std::endian::native == std::endian::big ? 1: 0);
+ memcpy(m_capacity, src, sizeof m_capacity);
+ mode = Short::inactive_mask;
+ }
};
struct Short
{
- static constexpr size_t capacity = sizeof(Long) - 2;
- char string[capacity+1];
- unsigned char size;
+ static constexpr size_t capacity = sizeof(Long) - 1;
+ char string[capacity];
+ // When string is full remaining_size will be 0 and be the null terminator.
+ // When string is empty remaining size will be 0b00010111 and not collide with inactive_mask.
+ unsigned char remaining_size;
+ static constexpr unsigned char inactive_mask = 0b1000'0000;
};
union
@@ -217,11 +237,12 @@ public:
void release()
{
- if (is_long() and u.l.capacity != 0)
- Alloc{}.deallocate(u.l.ptr, u.l.capacity+1);
+ auto const cap = u.l.capacity();
+ if (is_long() and (cap != 0))
+ Alloc{}.deallocate(u.l.ptr, cap+1);
}
- void set_empty() { u.s.size = 1; u.s.string[0] = 0; }
+ void set_empty() { u.s.remaining_size = Short::capacity; u.s.string[0] = '\0'; }
void set_short(const char* data, size_t size);
};