wiwi/
encoding.rs

1use crate::prelude::*;
2
3pub use self::generic_fn::{
4	encode,
5	decode,
6
7	Encode,
8	Encoding,
9
10	Base16,
11	Base32,
12	Base64,
13	Hex,
14	RFC1751,
15	Z85,
16};
17
18mod generic_fn;
19
20pub mod base16;
21pub mod base32;
22pub mod base64;
23pub mod hex;
24pub mod rfc1751;
25pub mod z85;
26
27/// Helper for unsafe buffer operations, when the _exact_ total capacity needed
28/// is known ahead of time and requested up front
29///
30/// Calling [`Vec::extend_from_slice`] on a vec would be the equivalent safe
31/// version of this struct. The reason this struct exists then, is because
32/// anything that pushes to a vec checks to make sure there is enough capacity.
33/// That cost is probably negligible, but added up could be noticeable
34/// (especially in hot loops), and if you can guarantee you know / can calculate
35/// the exact amount you need, then allocate it all up front, you no longer need
36/// those checks, so why even bother running those checks?
37///
38/// With debug assertions enabled, this struct will keep track of the amount of
39/// bytes written and assert unsafe preconditions, like not overflowing the
40/// allocated capacity, as well as having all preallocated capacity filled.
41/// However, with debug assertions off (release mode has it off by default),
42/// these checks are not run (the `bytes_written` field that tracks it is
43/// gated behind `cfg(debug_assertions)`, so doesn't even exist!), and it becomes
44/// essentially just a wrapper around a vec, its ptr, raw ptr copying operations,
45/// and a method that unsafely sets the len of the vec before unwrapping it.
46///
47/// Creating one of these structs is not unsafe, but you can't
48/// really do much with it in safe only code :p
49struct UnsafeBufWriteGuard {
50	/// The [`Vec`] that's being written to
51	///
52	/// Note: if `self` is prematurely dropped, this vec will be dropped by it's
53	/// normal drop implementation. Additionally, `u8` is plain old data, so
54	/// doesn't have any special drop behaviour (it's just bytes), so it's safe
55	/// that the length is still set to 0.
56	vec: Vec<u8>,
57	/// The pointer into the vec
58	///
59	/// This pointer is set to the start of the `vec` upon creation, and is
60	/// shifted forward with every write operation to it.
61	///
62	/// This pointer is guaranteed not to move throughout the lifespan of `self`,
63	/// because as far as the vec itself is aware, we request it to allocate some
64	/// memory, it gives us at least that much, then we are not touching it, until
65	/// `self` gets unwrapped using [`into_full_vec`]. After that, we will never
66	/// use this pointer again. We are then (unsafely) setting the len of the vec
67	/// to the initially requested amount of capacity, which the caller of that
68	/// method promises is initialised, since it is a safety invariant of
69	/// [`into_full_vec`]. What happens to the vec after we hand it's ownership
70	/// back to caller, is no longer on us to handle. The pointer has been dropped,
71	/// and our job is done.
72	///
73	/// [`into_full_vec`]: Self::into_full_vec
74	ptr: *mut u8,
75	/// The amount of capacity that the caller initially requested
76	///
77	/// # Safety
78	///
79	/// Previously, in the unwrapping operation, we used the value returned by
80	/// [`Vec::capacity`] to set the len of the vec. This is unsound, since
81	/// [`Vec::with_capacity`] is allowed to over allocate. Because of this, we
82	/// must store the initial requested capacity (that the caller promises to
83	/// fill before taking the vec), and use that value to set the len instead.
84	requested_capacity: usize,
85	/// In debug mode, tracks the amount of bytes written, and uses it to perform
86	/// assertions on preconditions. In release mode, this is not present
87	///
88	/// Because this value is not present, memory usage is theoretically reduced
89	/// by a word size. Unless you're relying on the size of `Self`, and/or having
90	/// this struct as a field of another struct where size matters, this field
91	/// not being present in release mode should not be an issue.
92	///
93	/// # Safety
94	///
95	/// You shouldn't be relying on the layout of this struct anyways.
96	#[cfg(debug_assertions)]
97	bytes_written: usize
98}
99
100impl UnsafeBufWriteGuard {
101	/// Create a new [`UnsafeBufWriteGuard`] with specified capacity
102	///
103	/// The amount of capacity specified must be _exactly_ calculated, and _all_
104	/// capacity allocated here _must_ be initialised before calling
105	/// [`into_full_vec`](Self::into_full_vec). See that function for more details
106	/// and safety notes.
107	///
108	/// The act of creating one of these structs is not unsafe, but you can't
109	/// really do much with it in safe only code :p
110	#[inline]
111	pub fn with_capacity(capacity: usize) -> Self {
112		let mut vec = Vec::new();
113		vec.reserve_exact(capacity);
114
115		debug_assert!(vec.capacity() >= capacity);
116
117		let ptr = vec.as_mut_ptr();
118
119		Self {
120			vec,
121			ptr,
122			requested_capacity: capacity,
123			#[cfg(debug_assertions)]
124			bytes_written: 0
125		}
126	}
127
128	/// Writes an amount of bytes into `self`, determined by const param `N`
129	///
130	/// This does the same as [`write_bytes`](Self::write_bytes) in functionality,
131	/// but maybe the const generic param `N` will enable more optimisations?
132	///
133	/// # Safety
134	///
135	/// You must not write, in total, more than the amount of capacity that you
136	/// requested when creating `self`.
137	#[inline]
138	pub unsafe fn write_bytes_const<const N: usize>(&mut self, src: *const u8) {
139		#[cfg(debug_assertions)] {
140			// this has to be behind cfg because self.bytes_written
141			// doesn't exist in not(debug_assertions)
142			self.bytes_written += N;
143			assert!(self.bytes_written <= self.requested_capacity)
144		}
145
146		// SAFETY: caller promises not to write more bytes than they requested
147		// up front, which is what we also requested from the vec
148		unsafe { ptr::copy_nonoverlapping(src, self.ptr, N) }
149
150		// SAFETY: caller promises not to write more bytes than they requested
151		// up front. In the case of this invocation writing the exact amount to
152		// fill the remaining bytes, the pointer could be set to the end of the
153		// allocation, which is valid
154		unsafe { self.ptr = self.ptr.add(N) }
155	}
156
157	/// Writes an amount of bytes into `self`
158	///
159	/// # Safety
160	///
161	/// You must not write, in total, more than the amount of capacity that you
162	/// requested when creating `self`.
163	#[inline]
164	pub unsafe fn write_bytes(&mut self, src: *const u8, n: usize) {
165		#[cfg(debug_assertions)] {
166			// this has to be behind cfg because self.bytes_written
167			// doesn't exist in not(debug_assertions)
168			self.bytes_written += n;
169			assert!(self.bytes_written <= self.requested_capacity)
170		}
171
172		// SAFETY: caller promises not to write more bytes than they requested
173		// up front, which is what we also requested from the vec
174		unsafe { ptr::copy_nonoverlapping(src, self.ptr, n) }
175
176		// SAFETY: caller promises not to write more bytes than they requested
177		// up front. In the case of this invocation writing the exact amount to
178		// fill the remaining bytes, the pointer could be set to the end of the
179		// allocation, which is valid
180		unsafe { self.ptr = self.ptr.add(n) }
181	}
182
183	/// Get the pointer pointing to the start of the uninitialised memory in the
184	/// buffer (to operate on the raw pointer directly)
185	///
186	/// If/when you are done writing to the pointer, you should call
187	/// [`add_byte_count`](Self::add_byte_count). This offsets the internally
188	/// stored pointer by that amount. If you don't, calling any other write
189	/// function on this struct will clobber over what you just wrote.
190	#[inline]
191	pub fn as_mut_ptr(&mut self) -> *mut u8 {
192		self.ptr
193	}
194
195	/// Declare that `n` bytes have been written
196	///
197	/// Call this after writing to the raw pointer (which you can get using
198	/// [`as_mut_ptr`](Self::as_mut_ptr)), ensuring the pointer stored internally
199	/// is still going to point at the start of the uninitialised chunk (or the end).
200	///
201	/// # Safety
202	///
203	/// You must have written the amount of bytes that you say you have written,
204	/// and that you have not written too many bytes.
205	///
206	/// Calling this function without writing to the amount of memory you say you
207	/// did will leave uninitialised memory "holes", which will cause undefined
208	/// behaviour when you unwrap the vec.
209	#[expect(dead_code, reason = "bweh")]
210	#[inline]
211	pub unsafe fn add_byte_count(&mut self, n: usize) {
212		#[cfg(debug_assertions)] {
213			// this has to be behind cfg because self.bytes_written
214			// doesn't exist in not(debug_assertions)
215			self.bytes_written += n;
216			assert!(self.bytes_written <= self.requested_capacity)
217		}
218
219		// SAFETY: caller promises to have written the amount
220		// of bytes that they say they did
221		unsafe { self.ptr = self.ptr.add(n) }
222	}
223
224	/// Declare that the amount of bytes requested up front, has been written to,
225	/// then unwraps and returns the internal vec
226	///
227	/// # Safety
228	///
229	/// You must have written to all the bytes that you have requested up front.
230	/// Calling this function without doing so will leave a "tail" of uninitialised
231	/// bytes in the vec, causing undefined behaviour.
232	#[inline]
233	pub unsafe fn into_full_vec(mut self) -> Vec<u8> {
234		#[cfg(debug_assertions)] {
235			// this has to be behind cfg because self.bytes_written
236			// doesn't exist in not(debug_assertions)
237			assert!(self.bytes_written == self.requested_capacity);
238		}
239
240		// SAFETY: caller promises to have written to all
241		// the capacity they requested
242		unsafe { self.vec.set_len(self.requested_capacity) }
243
244		self.vec
245	}
246}
247
248/// Utility to emit fixed size (const) chunks, in an unchecked manner, from
249/// a slice
250///
251/// Contains debug assertions to assert preconditions.
252// I cannot remember if I rely on this being repr(transparent) anywhere
253#[repr(transparent)]
254struct ChunkedSlice<'h, const N: usize> {
255	/// The slice to pull bytes from
256	bytes: &'h [u8]
257}
258
259impl<'h, const N: usize> ChunkedSlice<'h, N> {
260	/// Creates a new [`ChunkedSlice`] instance from the given
261	/// borrowed, byte slice
262	#[inline]
263	pub fn new(bytes: &'h [u8]) -> Self {
264		Self { bytes }
265	}
266
267	/// Removes, without checking, `N` bytes off the front of the internal slice,
268	/// then returns a reference to that slice
269	///
270	/// I believe the reason this function returns a reference rather than an
271	/// array by value is performance? if I remember correctly, changing it to
272	/// return the array by value caused a quite heavy performance regression
273	/// in z85 encode speed. My not-very-educated guess is the alignment? since
274	/// references are aligned to word size, which the CPU likes, while the byte
275	/// array is only aligned to 1 ~vt
276	///
277	/// # Safety
278	///
279	/// There must be at least `N` bytes left, otherwise a reference to invalid
280	/// memory will be created, causing undefined behaviour.
281	#[inline]
282	pub unsafe fn next_frame_unchecked(&mut self) -> &'h [u8; N] {
283		debug_assert!(self.bytes.len() >= N, "enough bytes left to form another whole frame");
284
285		let self_ptr = self.bytes.as_ptr();
286		let self_len = self.bytes.len();
287
288		// SAFETY: caller asserts there is at least `N` bytes left,
289		// so this reference will point to valid memory
290		let new_slice = unsafe { &*self_ptr.cast::<[u8; N]>() };
291
292		// SAFETY: caller asserts there is at least `N` bytes left,
293		// so this ptr will still point in range
294		let self_ptr = unsafe { self_ptr.add(N) };
295
296		// SAFETY: caller asserts there is at least `N` bytes left,
297		// so the subtraction won't overflow (pointer is offset above)
298		self.bytes = unsafe { slice::from_raw_parts(self_ptr, self_len - N) };
299
300		new_slice
301	}
302
303	/// Consumes self, takes the remainder slice, copies it into a temporary
304	/// buffer of length `N`, and calls the provided closure with the temporary
305	/// buffer
306	///
307	/// This does _not_ indicate anywhere how many were padding bytes vs actual
308	/// data. In the few places that this utility struct is used, the remainder
309	/// has been calculated already.
310	///
311	/// # Safety
312	///
313	/// There must be strictly N or less bytes left, otherwise invalid memory
314	/// (past the end of the temporary buffer created) will be written to.
315	#[inline]
316	pub unsafe fn with_remainder_unchecked<F>(self, f: F)
317	where
318		F: FnOnce(&[u8; N])
319	{
320		let len = self.bytes.len();
321
322		debug_assert!(len < N, "(strictly) less than a whole frame remaining");
323
324		// temp buffer of correct length, to add padding
325		let mut slice = [0u8; N];
326
327		// ptr to self
328		let self_ptr = self.bytes.as_ptr();
329		// ptr to temp buffer
330		let slice_ptr = slice.as_mut_ptr();
331
332		// SAFETY: caller promises that there is strictly less than N bytes
333		// remaining, so the amount of data copied will always be less than
334		// the temp buffer length. `len` comes from the same slice we are
335		// copying from, so we must be able to copy that much over
336		unsafe { ptr::copy_nonoverlapping(self_ptr, slice_ptr, len) }
337
338		f(&slice);
339	}
340
341	/// Returns the slice left in `self`
342	#[inline]
343	pub fn to_slice(&self) -> &'h [u8] {
344		self.bytes
345	}
346}