wiwi/
string.rs

1use crate::prelude::*;
2
3use crate::num::*;
4
5const STRING_SIZE_BYTES: usize = size_of::<usize>() * 3;
6const MAX_INLINE_LEN: usize = {
7	// we'll be fine even on (hypothetical, at time of writing)
8	// 128bit and 256bit computers, 512bit will cause issues, so this is
9	// (quite extreme, probably unnecessary) future proofing
10	let container_size = STRING_SIZE_BYTES - 1;
11	#[expect(clippy::as_conversions, reason = "u8 to usize is fine (also we are in const context)")]
12	let len_size = (u8::MAX >> 1) as usize;
13
14	if len_size < container_size {
15		len_size
16	} else {
17		container_size
18	}
19};
20const CAP_MARKER_BE: usize = (!(usize::MAX >> 1)).to_be();
21const CAP_MARKER_U8: u8 = !(u8::MAX >> 1);
22
23const _: () = assert!(size_of::<StringInlineable>() == STRING_SIZE_BYTES);
24const _: () = assert!(size_of::<StringInline>() == STRING_SIZE_BYTES);
25const _: () = assert!(size_of::<StringHeap>() == STRING_SIZE_BYTES);
26const _: () = assert!(MAX_INLINE_LEN > 0);
27
28pub union StringInlineable {
29	inline: ManuallyDrop<StringInline>,
30	heap: ManuallyDrop<StringHeap>
31}
32
33impl StringInlineable {
34	#[inline]
35	pub const fn new() -> Self {
36		let inline = StringInline::new();
37		Self { inline: ManuallyDrop::new(inline) }
38	}
39}
40
41impl StringInlineable {
42	#[inline]
43	pub fn len(&self) -> usize {
44		self.do_thing(|s| s.len(), |s| s.len())
45	}
46
47	#[inline]
48	pub fn capacity(&self) -> usize {
49		self.do_thing(|s| s.capacity(), |s| s.capacity())
50	}
51
52	#[inline]
53	pub fn is_empty(&self) -> bool {
54		self.len() == 0
55	}
56
57	#[inline]
58	pub fn as_str(&self) -> &str {
59		self.do_thing(|s| s.as_str(), |s| s.as_str())
60	}
61
62	#[inline]
63	pub fn as_str_mut(&mut self) -> &mut str {
64		self.do_thing_mut(|s| s.as_str_mut(), |s| s.as_str_mut())
65	}
66}
67
68impl StringInlineable {
69	#[inline]
70	fn is_inline(&self) -> bool {
71		// SAFETY: all memory-valid instancees of `StringHeap` satisfy memory
72		// invariants of `StringInline`, so union field access `self.inline` is fine
73		let len = unsafe { self.inline.len };
74
75		len & CAP_MARKER_U8 == 0
76	}
77
78	#[inline]
79	fn do_thing<'h, T, FInline, FHeap>(&'h self, f_inline: FInline, f_heap: FHeap) -> T
80	where
81		FInline: FnOnce(&'h StringInline) -> T,
82		FHeap: FnOnce(&'h StringHeap) -> T
83	{
84		match self.is_inline() {
85			// SAFETY: we just checked `self.is_inline()`
86			true => unsafe { f_inline(&self.inline) }
87			// SAFETY: we just checked `self.is_inline()`
88			false => unsafe { f_heap(&self.heap) }
89		}
90	}
91
92	#[inline]
93	fn do_thing_mut<'h, T, FInline, FHeap>(&'h mut self, f_inline: FInline, f_heap: FHeap) -> T
94	where
95		FInline: FnOnce(&'h mut StringInline) -> T,
96		FHeap: FnOnce(&'h mut StringHeap) -> T
97	{
98		match self.is_inline() {
99			// SAFETY: we just checked `self.is_inline()`
100			true => unsafe { f_inline(&mut self.inline) }
101			// SAFETY: we just checked `self.is_inline()`
102			false => unsafe { f_heap(&mut self.heap) }
103		}
104	}
105}
106
107impl Default for StringInlineable {
108	#[inline]
109	fn default() -> Self {
110		Self::new()
111	}
112}
113
114impl Deref for StringInlineable {
115	type Target = str;
116
117	#[inline]
118	fn deref(&self) -> &str {
119		self.as_str()
120	}
121}
122
123impl From<&str> for StringInlineable {
124	#[inline]
125	fn from(s: &str) -> Self {
126		match s.len() <= MAX_INLINE_LEN {
127			true => {
128				// SAFETY: just checked `s.len() <= MAX_INLINE_LEN`
129				let inline = unsafe { StringInline::from_str_unchecked(s) };
130				Self { inline: ManuallyDrop::new(inline) }
131			}
132			false => {
133				// SAFETY: just checked `s.len() > MAX_INLINE_LEN`
134				// (which is also not zero)
135				let heap = unsafe { StringHeap::from_str_unchecked(s) };
136				Self { heap: ManuallyDrop::new(heap) }
137			}
138		}
139	}
140}
141
142#[repr(C)]
143struct StringInline {
144	/// regular u8, represented as is
145	len: u8,
146	rest: MaybeUninit<[u8; MAX_INLINE_LEN]>
147}
148
149impl StringInline {
150	#[inline]
151	const fn new() -> Self {
152		Self { len: 0, rest: MaybeUninit::uninit() }
153	}
154
155	/// # Safety
156	///
157	/// The passed in `str` must have length less than or equal to [`MAX_INLINE_LEN`].
158	#[inline]
159	unsafe fn from_str_unchecked(s: &str) -> Self {
160		debug_assert!(s.len() <= MAX_INLINE_LEN);
161
162		let mut inline = Self {
163			len: 0,
164			rest: MaybeUninit::uninit()
165		};
166
167		// SAFETY:
168		// - ptr obtained from `value` is valid, and for `s.len()` reads
169		// - ptr obtained from `inline.rest` is valid
170		// - caller promises `s.len()` is lte `MAX_INLINE_LEN`
171		// - ptrs obtained from aligned sources
172		// - reference to memory outside local stack memory in `value`
173		//   cannot overlap with local stack memory in `inline`
174		unsafe {
175			ptr::copy_nonoverlapping(
176				s.as_ptr(),
177				inline.rest.as_mut_ptr().cast::<u8>(),
178				s.len()
179			)
180		}
181
182		// we just initialised `s.len()` amount of
183		// memory with that `copy` call above
184		inline.len = s.len().into_u8_lossy();
185
186		inline
187	}
188}
189
190impl StringInline {
191	#[inline]
192	fn len(&self) -> usize {
193		usize::from_u8(self.len)
194	}
195
196	#[inline]
197	fn capacity(&self) -> usize {
198		MAX_INLINE_LEN
199	}
200
201	#[inline]
202	fn as_str(&self) -> &str {
203		let ptr = self.rest.as_ptr().cast::<u8>();
204		let len = self.len.into_usize();
205
206		// SAFETY: relying on invariant that `self.rest` must have
207		// at least `self.len` elements initialised
208		let slice = unsafe { slice::from_raw_parts(ptr, len) };
209		// SAFETY: relying on invariant that `self` contains valid utf-8
210		unsafe { str::from_utf8_unchecked(slice) }
211	}
212
213	#[inline]
214	fn as_str_mut(&mut self) -> &mut str {
215		let ptr = self.rest.as_ptr().cast::<u8>();
216		let len = self.len.into_usize();
217
218		// SAFETY: relying on invariant that `self.rest` must have
219		// at least `self.len` elements initialised
220		let slice = unsafe { slice::from_raw_parts_mut(ptr.cast_mut(), len) };
221		// SAFETY: relying on invariant that `self` contains valid utf-8
222		unsafe { str::from_utf8_unchecked_mut(slice) }
223	}
224}
225
226#[repr(C)]
227struct StringHeap {
228	/// This value needs processing in order to be a valid capacity
229	///
230	/// This stores the capacity, in big endian, with the highest bit set. Just
231	/// use [`capacity`](Self::capacity) function to get the capacity.
232	cap_be_and_marker: usize,
233	len: usize,
234	ptr: *const u8
235}
236
237impl StringHeap {
238	/// # Safety
239	///
240	/// The passed in `str` must have length greater than zero. (The passed in
241	/// `str` _should_ have greater than `MAX_INLINE_LEN` len, which is larger
242	/// than zero, and `StringInlineable` already ensures this)
243	unsafe fn from_str_unchecked(s: &str) -> Self {
244		let layout = alloc_mod::Layout::for_value(s);
245		// SAFETY: layout is nonzero (caller promises `s` is not zero length)
246		let ptr = unsafe { alloc(layout) };
247
248		let mut heap = Self {
249			cap_be_and_marker: 0,
250			len: 0,
251			ptr
252		};
253		// SAFETY:
254		// - we just allocated the ptr inside with this layout
255		// - existing `&str` cannot have memory larger than `isize::MAX`
256		unsafe { heap.set_capacity(layout.size()) }
257
258		// SAFETY:
259		// - ptr obtained from `value` is valid, and for `s.len()` reads
260		// - we just allocated ptr in `heap.ptr` for `s.len()` bytes
261		// - ptrs obtained from aligned sources
262		// - reference to existing memory in `value` cannot overlap with
263		//   memory we just allocated
264		unsafe {
265			ptr::copy_nonoverlapping(
266				s.as_ptr(),
267				heap.ptr.cast_mut(),
268				s.len()
269			)
270		}
271
272		// we just initialised `s.len()` amount of
273		// memory with that `copy` call above
274		heap.len = s.len();
275
276		heap
277	}
278}
279
280impl StringHeap {
281	#[inline]
282	fn len(&self) -> usize {
283		self.len
284	}
285
286	#[inline]
287	fn capacity(&self) -> usize {
288		usize::from_be(self.cap_be_and_marker ^ CAP_MARKER_BE)
289	}
290
291	/// Helper for setting capacity (since it's stored in a... nonstandard way)
292	///
293	/// # Safety
294	///
295	/// - Capacity in `self` must actually be `capacity`
296	/// - `capacity` must be less than or equal to `isize::MAX`. This is
297	///   required by rust's allocation APIs, as well as needed for the heap marker
298	///   to be set properlu
299	#[inline]
300	unsafe fn set_capacity(&mut self, capacity: usize) {
301		self.cap_be_and_marker = capacity.to_be() ^ CAP_MARKER_BE
302	}
303
304	#[inline]
305	fn as_str(&self) -> &str {
306		// SAFETY: relying on invariant that `self.rest` must have
307		// at least `self.len` elements initialised
308		let slice = unsafe { slice::from_raw_parts(self.ptr, self.len) };
309		// SAFETY: relying on invariant that `self` contains valid utf-8
310		unsafe { str::from_utf8_unchecked(slice) }
311	}
312
313	#[inline]
314	fn as_str_mut(&mut self) -> &mut str {
315		// SAFETY: relying on invariant that `self.rest` must have
316		// at least `self.len` elements initialised
317		let slice = unsafe { slice::from_raw_parts_mut(self.ptr.cast_mut(), self.len) };
318		// SAFETY: relying on invariant that `self` contains valid utf-8
319		unsafe { str::from_utf8_unchecked_mut(slice) }
320	}
321}