wiwi/encoding.rs
1use crate::prelude::*;
2
3pub use self::generic_fn::{
4 encode,
5 decode,
6
7 Encode,
8 Encoding,
9
10 Base16,
11 Base32,
12 Base64,
13 Hex,
14 RFC1751,
15 Z85,
16};
17
18mod generic_fn;
19
20pub mod base16;
21pub mod base32;
22pub mod base64;
23pub mod hex;
24pub mod rfc1751;
25pub mod z85;
26
27/// Helper for unsafe buffer operations, when the _exact_ total capacity needed
28/// is known ahead of time and requested up front
29///
30/// Calling [`Vec::extend_from_slice`] on a vec would be the equivalent safe
31/// version of this struct. The reason this struct exists then, is because
32/// anything that pushes to a vec checks to make sure there is enough capacity.
33/// That cost is probably negligible, but added up could be noticeable
34/// (especially in hot loops), and if you can guarantee you know / can calculate
35/// the exact amount you need, then allocate it all up front, you no longer need
36/// those checks, so why even bother running those checks?
37///
38/// With debug assertions enabled, this struct will keep track of the amount of
39/// bytes written and assert unsafe preconditions, like not overflowing the
40/// allocated capacity, as well as having all preallocated capacity filled.
41/// However, with debug assertions off (release mode has it off by default),
42/// these checks are not run (the `bytes_written` field that tracks it is
43/// gated behind `cfg(debug_assertions)`, so doesn't even exist!), and it becomes
44/// essentially just a wrapper around a vec, its ptr, raw ptr copying operations,
45/// and a method that unsafely sets the len of the vec before unwrapping it.
46///
47/// Creating one of these structs is not unsafe, but you can't
48/// really do much with it in safe only code :p
49struct UnsafeBufWriteGuard {
50 /// The [`Vec`] that's being written to
51 ///
52 /// Note: if `self` is prematurely dropped, this vec will be dropped by it's
53 /// normal drop implementation. Additionally, `u8` is plain old data, so
54 /// doesn't have any special drop behaviour (it's just bytes), so it's safe
55 /// that the length is still set to 0.
56 vec: Vec<u8>,
57 /// The pointer into the vec
58 ///
59 /// This pointer is set to the start of the `vec` upon creation, and is
60 /// shifted forward with every write operation to it.
61 ///
62 /// This pointer is guaranteed not to move throughout the lifespan of `self`,
63 /// because as far as the vec itself is aware, we request it to allocate some
64 /// memory, it gives us at least that much, then we are not touching it, until
65 /// `self` gets unwrapped using [`into_full_vec`]. After that, we will never
66 /// use this pointer again. We are then (unsafely) setting the len of the vec
67 /// to the initially requested amount of capacity, which the caller of that
68 /// method promises is initialised, since it is a safety invariant of
69 /// [`into_full_vec`]. What happens to the vec after we hand it's ownership
70 /// back to caller, is no longer on us to handle. The pointer has been dropped,
71 /// and our job is done.
72 ///
73 /// [`into_full_vec`]: Self::into_full_vec
74 ptr: *mut u8,
75 /// The amount of capacity that the caller initially requested
76 ///
77 /// # Safety
78 ///
79 /// Previously, in the unwrapping operation, we used the value returned by
80 /// [`Vec::capacity`] to set the len of the vec. This is unsound, since
81 /// [`Vec::with_capacity`] is allowed to over allocate. Because of this, we
82 /// must store the initial requested capacity (that the caller promises to
83 /// fill before taking the vec), and use that value to set the len instead.
84 requested_capacity: usize,
85 /// In debug mode, tracks the amount of bytes written, and uses it to perform
86 /// assertions on preconditions. In release mode, this is not present
87 ///
88 /// Because this value is not present, memory usage is theoretically reduced
89 /// by a word size. Unless you're relying on the size of `Self`, and/or having
90 /// this struct as a field of another struct where size matters, this field
91 /// not being present in release mode should not be an issue.
92 ///
93 /// # Safety
94 ///
95 /// You shouldn't be relying on the layout of this struct anyways.
96 #[cfg(debug_assertions)]
97 bytes_written: usize
98}
99
100impl UnsafeBufWriteGuard {
101 /// Create a new [`UnsafeBufWriteGuard`] with specified capacity
102 ///
103 /// The amount of capacity specified must be _exactly_ calculated, and _all_
104 /// capacity allocated here _must_ be initialised before calling
105 /// [`into_full_vec`](Self::into_full_vec). See that function for more details
106 /// and safety notes.
107 ///
108 /// The act of creating one of these structs is not unsafe, but you can't
109 /// really do much with it in safe only code :p
110 #[inline]
111 pub fn with_capacity(capacity: usize) -> Self {
112 let mut vec = Vec::new();
113 vec.reserve_exact(capacity);
114
115 debug_assert!(vec.capacity() >= capacity);
116
117 let ptr = vec.as_mut_ptr();
118
119 Self {
120 vec,
121 ptr,
122 requested_capacity: capacity,
123 #[cfg(debug_assertions)]
124 bytes_written: 0
125 }
126 }
127
128 /// Writes an amount of bytes into `self`, determined by const param `N`
129 ///
130 /// This does the same as [`write_bytes`](Self::write_bytes) in functionality,
131 /// but maybe the const generic param `N` will enable more optimisations?
132 ///
133 /// # Safety
134 ///
135 /// You must not write, in total, more than the amount of capacity that you
136 /// requested when creating `self`.
137 #[inline]
138 pub unsafe fn write_bytes_const<const N: usize>(&mut self, src: *const u8) {
139 #[cfg(debug_assertions)] {
140 // this has to be behind cfg because self.bytes_written
141 // doesn't exist in not(debug_assertions)
142 self.bytes_written += N;
143 assert!(self.bytes_written <= self.requested_capacity)
144 }
145
146 // SAFETY: caller promises not to write more bytes than they requested
147 // up front, which is what we also requested from the vec
148 unsafe { ptr::copy_nonoverlapping(src, self.ptr, N) }
149
150 // SAFETY: caller promises not to write more bytes than they requested
151 // up front. In the case of this invocation writing the exact amount to
152 // fill the remaining bytes, the pointer could be set to the end of the
153 // allocation, which is valid
154 unsafe { self.ptr = self.ptr.add(N) }
155 }
156
157 /// Writes an amount of bytes into `self`
158 ///
159 /// # Safety
160 ///
161 /// You must not write, in total, more than the amount of capacity that you
162 /// requested when creating `self`.
163 #[inline]
164 pub unsafe fn write_bytes(&mut self, src: *const u8, n: usize) {
165 #[cfg(debug_assertions)] {
166 // this has to be behind cfg because self.bytes_written
167 // doesn't exist in not(debug_assertions)
168 self.bytes_written += n;
169 assert!(self.bytes_written <= self.requested_capacity)
170 }
171
172 // SAFETY: caller promises not to write more bytes than they requested
173 // up front, which is what we also requested from the vec
174 unsafe { ptr::copy_nonoverlapping(src, self.ptr, n) }
175
176 // SAFETY: caller promises not to write more bytes than they requested
177 // up front. In the case of this invocation writing the exact amount to
178 // fill the remaining bytes, the pointer could be set to the end of the
179 // allocation, which is valid
180 unsafe { self.ptr = self.ptr.add(n) }
181 }
182
183 /// Get the pointer pointing to the start of the uninitialised memory in the
184 /// buffer (to operate on the raw pointer directly)
185 ///
186 /// If/when you are done writing to the pointer, you should call
187 /// [`add_byte_count`](Self::add_byte_count). This offsets the internally
188 /// stored pointer by that amount. If you don't, calling any other write
189 /// function on this struct will clobber over what you just wrote.
190 #[inline]
191 pub fn as_mut_ptr(&mut self) -> *mut u8 {
192 self.ptr
193 }
194
195 /// Declare that `n` bytes have been written
196 ///
197 /// Call this after writing to the raw pointer (which you can get using
198 /// [`as_mut_ptr`](Self::as_mut_ptr)), ensuring the pointer stored internally
199 /// is still going to point at the start of the uninitialised chunk (or the end).
200 ///
201 /// # Safety
202 ///
203 /// You must have written the amount of bytes that you say you have written,
204 /// and that you have not written too many bytes.
205 ///
206 /// Calling this function without writing to the amount of memory you say you
207 /// did will leave uninitialised memory "holes", which will cause undefined
208 /// behaviour when you unwrap the vec.
209 #[expect(dead_code, reason = "bweh")]
210 #[inline]
211 pub unsafe fn add_byte_count(&mut self, n: usize) {
212 #[cfg(debug_assertions)] {
213 // this has to be behind cfg because self.bytes_written
214 // doesn't exist in not(debug_assertions)
215 self.bytes_written += n;
216 assert!(self.bytes_written <= self.requested_capacity)
217 }
218
219 // SAFETY: caller promises to have written the amount
220 // of bytes that they say they did
221 unsafe { self.ptr = self.ptr.add(n) }
222 }
223
224 /// Declare that the amount of bytes requested up front, has been written to,
225 /// then unwraps and returns the internal vec
226 ///
227 /// # Safety
228 ///
229 /// You must have written to all the bytes that you have requested up front.
230 /// Calling this function without doing so will leave a "tail" of uninitialised
231 /// bytes in the vec, causing undefined behaviour.
232 #[inline]
233 pub unsafe fn into_full_vec(mut self) -> Vec<u8> {
234 #[cfg(debug_assertions)] {
235 // this has to be behind cfg because self.bytes_written
236 // doesn't exist in not(debug_assertions)
237 assert!(self.bytes_written == self.requested_capacity);
238 }
239
240 // SAFETY: caller promises to have written to all
241 // the capacity they requested
242 unsafe { self.vec.set_len(self.requested_capacity) }
243
244 self.vec
245 }
246}
247
248/// Utility to emit fixed size (const) chunks, in an unchecked manner, from
249/// a slice
250///
251/// Contains debug assertions to assert preconditions.
252// I cannot remember if I rely on this being repr(transparent) anywhere
253#[repr(transparent)]
254struct ChunkedSlice<'h, const N: usize> {
255 /// The slice to pull bytes from
256 bytes: &'h [u8]
257}
258
259impl<'h, const N: usize> ChunkedSlice<'h, N> {
260 /// Creates a new [`ChunkedSlice`] instance from the given
261 /// borrowed, byte slice
262 #[inline]
263 pub fn new(bytes: &'h [u8]) -> Self {
264 Self { bytes }
265 }
266
267 /// Removes, without checking, `N` bytes off the front of the internal slice,
268 /// then returns a reference to that slice
269 ///
270 /// I believe the reason this function returns a reference rather than an
271 /// array by value is performance? if I remember correctly, changing it to
272 /// return the array by value caused a quite heavy performance regression
273 /// in z85 encode speed. My not-very-educated guess is the alignment? since
274 /// references are aligned to word size, which the CPU likes, while the byte
275 /// array is only aligned to 1 ~vt
276 ///
277 /// # Safety
278 ///
279 /// There must be at least `N` bytes left, otherwise a reference to invalid
280 /// memory will be created, causing undefined behaviour.
281 #[inline]
282 pub unsafe fn next_frame_unchecked(&mut self) -> &'h [u8; N] {
283 debug_assert!(self.bytes.len() >= N, "enough bytes left to form another whole frame");
284
285 let self_ptr = self.bytes.as_ptr();
286 let self_len = self.bytes.len();
287
288 // SAFETY: caller asserts there is at least `N` bytes left,
289 // so this reference will point to valid memory
290 let new_slice = unsafe { &*self_ptr.cast::<[u8; N]>() };
291
292 // SAFETY: caller asserts there is at least `N` bytes left,
293 // so this ptr will still point in range
294 let self_ptr = unsafe { self_ptr.add(N) };
295
296 // SAFETY: caller asserts there is at least `N` bytes left,
297 // so the subtraction won't overflow (pointer is offset above)
298 self.bytes = unsafe { slice::from_raw_parts(self_ptr, self_len - N) };
299
300 new_slice
301 }
302
303 /// Consumes self, takes the remainder slice, copies it into a temporary
304 /// buffer of length `N`, and calls the provided closure with the temporary
305 /// buffer
306 ///
307 /// This does _not_ indicate anywhere how many were padding bytes vs actual
308 /// data. In the few places that this utility struct is used, the remainder
309 /// has been calculated already.
310 ///
311 /// # Safety
312 ///
313 /// There must be strictly N or less bytes left, otherwise invalid memory
314 /// (past the end of the temporary buffer created) will be written to.
315 #[inline]
316 pub unsafe fn with_remainder_unchecked<F>(self, f: F)
317 where
318 F: FnOnce(&[u8; N])
319 {
320 let len = self.bytes.len();
321
322 debug_assert!(len < N, "(strictly) less than a whole frame remaining");
323
324 // temp buffer of correct length, to add padding
325 let mut slice = [0u8; N];
326
327 // ptr to self
328 let self_ptr = self.bytes.as_ptr();
329 // ptr to temp buffer
330 let slice_ptr = slice.as_mut_ptr();
331
332 // SAFETY: caller promises that there is strictly less than N bytes
333 // remaining, so the amount of data copied will always be less than
334 // the temp buffer length. `len` comes from the same slice we are
335 // copying from, so we must be able to copy that much over
336 unsafe { ptr::copy_nonoverlapping(self_ptr, slice_ptr, len) }
337
338 f(&slice);
339 }
340
341 /// Returns the slice left in `self`
342 #[inline]
343 pub fn to_slice(&self) -> &'h [u8] {
344 self.bytes
345 }
346}