diff options
| author | Ben Judd <bjudd61@gmail.com> | 2024-06-07 17:33:32 -0700 |
|---|---|---|
| committer | Ben Judd <bjudd@nuro.ai> | 2024-06-07 17:48:07 -0700 |
| commit | 2754e27cf2a85fd4004dd7daabe052b7df1b425a (patch) | |
| tree | ccd6d85a2e21620df984ba4c787e1534ae43ad6b /src/string.hh | |
| parent | c93cb5c4d8f1c2aec21f8bdeba59dfb8f153499c (diff) | |
Increase SSO from 22 to 23 chars.
Diffstat (limited to 'src/string.hh')
| -rw-r--r-- | src/string.hh | 51 |
1 files changed, 36 insertions, 15 deletions
diff --git a/src/string.hh b/src/string.hh index 004be04c..3bbb907e 100644 --- a/src/string.hh +++ b/src/string.hh @@ -1,6 +1,7 @@ #ifndef string_hh_INCLUDED #define string_hh_INCLUDED +#include <bit> #include "memory.hh" #include "hash.hh" #include "units.hh" @@ -156,17 +157,17 @@ public: // String data storage using small string optimization. // - // the LSB of the last byte is used to flag if we are using the small buffer - // or an allocated one. On big endian systems that means the allocated + // the MSB of the last byte is used to flag if we are using the allocated buffer + // (1) or a small one (0). On big endian systems that means the allocated // capacity must be pair, on little endian systems that means the allocated // capacity cannot use its most significant byte, so we effectively limit - // capacity to 2^24 on 32bit arch, and 2^60 on 64. + // capacity to 2^24 on 32bit arch, and 2^56 on 64. struct Data { using Alloc = Allocator<char, MemoryDomain::String>; Data() { set_empty(); } - Data(NoCopy, const char* data, size_t size) : u{Long{const_cast<char*>(data), size, 0}} {} + Data(NoCopy, const char* data, size_t size) : u{Long{const_cast<char*>(data), size, {0}, Short::inactive_mask}} {} Data(const char* data, size_t size, size_t capacity); Data(const char* data, size_t size) : Data(data, size, size) {} @@ -177,9 +178,9 @@ public: Data& operator=(const Data& other); Data& operator=(Data&& other) noexcept; - bool is_long() const { return (u.s.size & 1) == 0; } - size_t size() const { return is_long() ? u.l.size : (u.s.size >> 1); } - size_t capacity() const { return is_long() ? u.l.capacity : Short::capacity; } + bool is_long() const { return (u.s.remaining_size & Short::inactive_mask) > 0; } + size_t size() const { return is_long() ? u.l.size : (Short::capacity - u.s.remaining_size); } + size_t capacity() const { return is_long() ? u.l.capacity() : Short::capacity; } const char* data() const { return is_long() ? u.l.ptr : u.s.string; } char* data() { return is_long() ? u.l.ptr : u.s.string; } @@ -195,18 +196,37 @@ public: struct Long { static constexpr size_t max_capacity = - (size_t)1 << 8 * (sizeof(size_t) - 1); + ((size_t)1 << (CHAR_BIT * (sizeof(size_t) - 1))) - 1; char* ptr; size_t size; - size_t capacity; + unsigned char m_capacity[sizeof(size_t) - 1]; + unsigned char mode; + size_t capacity() const + { + size_t ret{}; + auto* dest = ((unsigned char*)&ret) + + (std::endian::native == std::endian::big ? 1: 0); + memcpy(dest, m_capacity, sizeof m_capacity); + return ret; + } + void set_capacity(size_t cap) + { + auto* src = ((unsigned char*)&cap) + + (std::endian::native == std::endian::big ? 1: 0); + memcpy(m_capacity, src, sizeof m_capacity); + mode = Short::inactive_mask; + } }; struct Short { - static constexpr size_t capacity = sizeof(Long) - 2; - char string[capacity+1]; - unsigned char size; + static constexpr size_t capacity = sizeof(Long) - 1; + char string[capacity]; + // When string is full remaining_size will be 0 and be the null terminator. + // When string is empty remaining size will be 0b00010111 and not collide with inactive_mask. + unsigned char remaining_size; + static constexpr unsigned char inactive_mask = 0b1000'0000; }; union @@ -217,11 +237,12 @@ public: void release() { - if (is_long() and u.l.capacity != 0) - Alloc{}.deallocate(u.l.ptr, u.l.capacity+1); + auto const cap = u.l.capacity(); + if (is_long() and (cap != 0)) + Alloc{}.deallocate(u.l.ptr, cap+1); } - void set_empty() { u.s.size = 1; u.s.string[0] = 0; } + void set_empty() { u.s.remaining_size = Short::capacity; u.s.string[0] = '\0'; } void set_short(const char* data, size_t size); }; |
