wiwi/encoding.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346
use crate::prelude::*;
pub use self::generic_fn::{
encode,
decode,
Encode,
Encoding,
Base16,
Base32,
Base64,
Hex,
RFC1751,
Z85,
};
mod generic_fn;
pub mod base16;
pub mod base32;
pub mod base64;
pub mod hex;
pub mod rfc1751;
pub mod z85;
/// Helper for unsafe buffer operations, when the _exact_ total capacity needed
/// is known ahead of time and requested up front
///
/// Calling [`Vec::extend_from_slice`] on a vec would be the equivalent safe
/// version of this struct. The reason this struct exists then, is because
/// anything that pushes to a vec checks to make sure there is enough capacity.
/// That cost is probably negligible, but added up could be noticeable
/// (especially in hot loops), and if you can guarantee you know / can calculate
/// the exact amount you need, then allocate it all up front, you no longer need
/// those checks, so why even bother running those checks?
///
/// With debug assertions enabled, this struct will keep track of the amount of
/// bytes written and assert unsafe preconditions, like not overflowing the
/// allocated capacity, as well as having all preallocated capacity filled.
/// However, with debug assertions off (release mode has it off by default),
/// these checks are not run (the `bytes_written` field that tracks it is
/// gated behind `cfg(debug_assertions)`, so doesn't even exist!), and it becomes
/// essentially just a wrapper around a vec, its ptr, raw ptr copying operations,
/// and a method that unsafely sets the len of the vec before unwrapping it.
///
/// Creating one of these structs is not unsafe, but you can't
/// really do much with it in safe only code :p
struct UnsafeBufWriteGuard {
/// The [`Vec`] that's being written to
///
/// Note: if `self` is prematurely dropped, this vec will be dropped by it's
/// normal drop implementation. Additionally, `u8` is plain old data, so
/// doesn't have any special drop behaviour (it's just bytes), so it's safe
/// that the length is still set to 0.
vec: Vec<u8>,
/// The pointer into the vec
///
/// This pointer is set to the start of the `vec` upon creation, and is
/// shifted forward with every write operation to it.
///
/// This pointer is guaranteed not to move throughout the lifespan of `self`,
/// because as far as the vec itself is aware, we request it to allocate some
/// memory, it gives us at least that much, then we are not touching it, until
/// `self` gets unwrapped using [`into_full_vec`]. After that, we will never
/// use this pointer again. We are then (unsafely) setting the len of the vec
/// to the initially requested amount of capacity, which the caller of that
/// method promises is initialised, since it is a safety invariant of
/// [`into_full_vec`]. What happens to the vec after we hand it's ownership
/// back to caller, is no longer on us to handle. The pointer has been dropped,
/// and our job is done.
///
/// [`into_full_vec`]: Self::into_full_vec
ptr: *mut u8,
/// The amount of capacity that the caller initially requested
///
/// # Safety
///
/// Previously, in the unwrapping operation, we used the value returned by
/// [`Vec::capacity`] to set the len of the vec. This is unsound, since
/// [`Vec::with_capacity`] is allowed to over allocate. Because of this, we
/// must store the initial requested capacity (that the caller promises to
/// fill before taking the vec), and use that value to set the len instead.
requested_capacity: usize,
/// In debug mode, tracks the amount of bytes written, and uses it to perform
/// assertions on preconditions. In release mode, this is not present
///
/// Because this value is not present, memory usage is theoretically reduced
/// by a word size. Unless you're relying on the size of `Self`, and/or having
/// this struct as a field of another struct where size matters, this field
/// not being present in release mode should not be an issue.
///
/// # Safety
///
/// You shouldn't be relying on the layout of this struct anyways.
#[cfg(debug_assertions)]
bytes_written: usize
}
impl UnsafeBufWriteGuard {
/// Create a new [`UnsafeBufWriteGuard`] with specified capacity
///
/// The amount of capacity specified must be _exactly_ calculated, and _all_
/// capacity allocated here _must_ be initialised before calling
/// [`into_full_vec`](Self::into_full_vec). See that function for more details
/// and safety notes.
///
/// The act of creating one of these structs is not unsafe, but you can't
/// really do much with it in safe only code :p
#[inline]
pub fn with_capacity(capacity: usize) -> Self {
let mut vec = Vec::new();
vec.reserve_exact(capacity);
debug_assert!(vec.capacity() >= capacity);
let ptr = vec.as_mut_ptr();
Self {
vec,
ptr,
requested_capacity: capacity,
#[cfg(debug_assertions)]
bytes_written: 0
}
}
/// Writes an amount of bytes into `self`, determined by const param `N`
///
/// This does the same as [`write_bytes`](Self::write_bytes) in functionality,
/// but maybe the const generic param `N` will enable more optimisations?
///
/// # Safety
///
/// You must not write, in total, more than the amount of capacity that you
/// requested when creating `self`.
#[inline]
pub unsafe fn write_bytes_const<const N: usize>(&mut self, src: *const u8) {
#[cfg(debug_assertions)] {
// this has to be behind cfg because self.bytes_written
// doesn't exist in not(debug_assertions)
self.bytes_written += N;
assert!(self.bytes_written <= self.requested_capacity)
}
// SAFETY: caller promises not to write more bytes than they requested
// up front, which is what we also requested from the vec
unsafe { ptr::copy_nonoverlapping(src, self.ptr, N) }
// SAFETY: caller promises not to write more bytes than they requested
// up front. In the case of this invocation writing the exact amount to
// fill the remaining bytes, the pointer could be set to the end of the
// allocation, which is valid
unsafe { self.ptr = self.ptr.add(N) }
}
/// Writes an amount of bytes into `self`
///
/// # Safety
///
/// You must not write, in total, more than the amount of capacity that you
/// requested when creating `self`.
#[inline]
pub unsafe fn write_bytes(&mut self, src: *const u8, n: usize) {
#[cfg(debug_assertions)] {
// this has to be behind cfg because self.bytes_written
// doesn't exist in not(debug_assertions)
self.bytes_written += n;
assert!(self.bytes_written <= self.requested_capacity)
}
// SAFETY: caller promises not to write more bytes than they requested
// up front, which is what we also requested from the vec
unsafe { ptr::copy_nonoverlapping(src, self.ptr, n) }
// SAFETY: caller promises not to write more bytes than they requested
// up front. In the case of this invocation writing the exact amount to
// fill the remaining bytes, the pointer could be set to the end of the
// allocation, which is valid
unsafe { self.ptr = self.ptr.add(n) }
}
/// Get the pointer pointing to the start of the uninitialised memory in the
/// buffer (to operate on the raw pointer directly)
///
/// If/when you are done writing to the pointer, you should call
/// [`add_byte_count`](Self::add_byte_count). This offsets the internally
/// stored pointer by that amount. If you don't, calling any other write
/// function on this struct will clobber over what you just wrote.
#[inline]
pub fn as_mut_ptr(&mut self) -> *mut u8 {
self.ptr
}
/// Declare that `n` bytes have been written
///
/// Call this after writing to the raw pointer (which you can get using
/// [`as_mut_ptr`](Self::as_mut_ptr)), ensuring the pointer stored internally
/// is still going to point at the start of the uninitialised chunk (or the end).
///
/// # Safety
///
/// You must have written the amount of bytes that you say you have written,
/// and that you have not written too many bytes.
///
/// Calling this function without writing to the amount of memory you say you
/// did will leave uninitialised memory "holes", which will cause undefined
/// behaviour when you unwrap the vec.
#[expect(dead_code, reason = "bweh")]
#[inline]
pub unsafe fn add_byte_count(&mut self, n: usize) {
#[cfg(debug_assertions)] {
// this has to be behind cfg because self.bytes_written
// doesn't exist in not(debug_assertions)
self.bytes_written += n;
assert!(self.bytes_written <= self.requested_capacity)
}
// SAFETY: caller promises to have written the amount
// of bytes that they say they did
unsafe { self.ptr = self.ptr.add(n) }
}
/// Declare that the amount of bytes requested up front, has been written to,
/// then unwraps and returns the internal vec
///
/// # Safety
///
/// You must have written to all the bytes that you have requested up front.
/// Calling this function without doing so will leave a "tail" of uninitialised
/// bytes in the vec, causing undefined behaviour.
#[inline]
pub unsafe fn into_full_vec(mut self) -> Vec<u8> {
#[cfg(debug_assertions)] {
// this has to be behind cfg because self.bytes_written
// doesn't exist in not(debug_assertions)
assert!(self.bytes_written == self.requested_capacity);
}
// SAFETY: caller promises to have written to all
// the capacity they requested
unsafe { self.vec.set_len(self.requested_capacity) }
self.vec
}
}
/// Utility to emit fixed size (const) chunks, in an unchecked manner, from
/// a slice
///
/// Contains debug assertions to assert preconditions.
// I cannot remember if I rely on this being repr(transparent) anywhere
#[repr(transparent)]
struct ChunkedSlice<'h, const N: usize> {
/// The slice to pull bytes from
bytes: &'h [u8]
}
impl<'h, const N: usize> ChunkedSlice<'h, N> {
/// Creates a new [`ChunkedSlice`] instance from the given
/// borrowed, byte slice
#[inline]
pub fn new(bytes: &'h [u8]) -> Self {
Self { bytes }
}
/// Removes, without checking, `N` bytes off the front of the internal slice,
/// then returns a reference to that slice
///
/// I believe the reason this function returns a reference rather than an
/// array by value is performance? if I remember correctly, changing it to
/// return the array by value caused a quite heavy performance regression
/// in z85 encode speed. My not-very-educated guess is the alignment? since
/// references are aligned to word size, which the CPU likes, while the byte
/// array is only aligned to 1 ~vt
///
/// # Safety
///
/// There must be at least `N` bytes left, otherwise a reference to invalid
/// memory will be created, causing undefined behaviour.
#[inline]
pub unsafe fn next_frame_unchecked(&mut self) -> &'h [u8; N] {
debug_assert!(self.bytes.len() >= N, "enough bytes left to form another whole frame");
let self_ptr = self.bytes.as_ptr();
let self_len = self.bytes.len();
// SAFETY: caller asserts there is at least `N` bytes left,
// so this reference will point to valid memory
let new_slice = unsafe { &*self_ptr.cast::<[u8; N]>() };
// SAFETY: caller asserts there is at least `N` bytes left,
// so this ptr will still point in range
let self_ptr = unsafe { self_ptr.add(N) };
// SAFETY: caller asserts there is at least `N` bytes left,
// so the subtraction won't overflow (pointer is offset above)
self.bytes = unsafe { slice::from_raw_parts(self_ptr, self_len - N) };
new_slice
}
/// Consumes self, takes the remainder slice, copies it into a temporary
/// buffer of length `N`, and calls the provided closure with the temporary
/// buffer
///
/// This does _not_ indicate anywhere how many were padding bytes vs actual
/// data. In the few places that this utility struct is used, the remainder
/// has been calculated already.
///
/// # Safety
///
/// There must be strictly N or less bytes left, otherwise invalid memory
/// (past the end of the temporary buffer created) will be written to.
#[inline]
pub unsafe fn with_remainder_unchecked<F>(self, f: F)
where
F: FnOnce(&[u8; N])
{
let len = self.bytes.len();
debug_assert!(len < N, "(strictly) less than a whole frame remaining");
// temp buffer of correct length, to add padding
let mut slice = [0u8; N];
// ptr to self
let self_ptr = self.bytes.as_ptr();
// ptr to temp buffer
let slice_ptr = slice.as_mut_ptr();
// SAFETY: caller promises that there is strictly less than N bytes
// remaining, so the amount of data copied will always be less than
// the temp buffer length. `len` comes from the same slice we are
// copying from, so we must be able to copy that much over
unsafe { ptr::copy_nonoverlapping(self_ptr, slice_ptr, len) }
f(&slice);
}
/// Returns the slice left in `self`
#[inline]
pub fn to_slice(&self) -> &'h [u8] {
self.bytes
}
}