wiwi/encoding/
z85.rs

1extern crate thiserror;
2
3use crate::prelude::*;
4use crate::num::*;
5use super::{ ChunkedSlice, UnsafeBufWriteGuard };
6
7/// Length of the encoding table (ie. number of different characters)
8pub const TABLE_ENCODER_LEN: usize = 85;
9/// Table to map a number 0-84 to a Z85 character.
10pub static TABLE_ENCODER: [u8; TABLE_ENCODER_LEN] = *b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#";
11//                                                     0123456789          0123456789          0123456789          0123456789          01234
12//                                                               0123456789          0123456789          0123456789          0123456789
13//                                                     0         10        20        30        40        50        60        70        80
14
15/// Length of the decoding table
16pub const TABLE_DECODER_LEN: usize = 256;
17
18/// Table of length 256, mapping any `u8` to the value it represents
19///
20/// `0xff` values represent `None`. Doing it this way instead of `Option<u8>` halves
21/// the size of this table (`u8` is 1 byte, `Option<u8>` is 2 bytes)
22///
23/// This table could be shrinked to just 96 elements, holding the range `32..128`,
24/// but keeping it length 256 means it's possible to index into this table with
25/// any arbitrary byte value without going out of bounds, saving a check for a
26/// value to be within `32..128`.
27// generated by script `generate-z85-table-decoder`... and then manually modified
28// when we switched over from `Option<u8>` to just u8 with 0xff representing None
29pub static TABLE_DECODER: [u8; 256] = [
30	NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
31	NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
32
33	NONE, 0x44, NONE, 0x54, 0x53, 0x52, 0x48, NONE, 0x4b, 0x4c, 0x46, 0x41, NONE, 0x3f, 0x3e, 0x45,
34	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x40, NONE, 0x49, 0x42, 0x4a, 0x47,
35	0x51, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32,
36	0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x4d, NONE, 0x4e, 0x43, NONE,
37	NONE, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
38	0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x4f, NONE, 0x50, NONE, NONE,
39
40	NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
41	NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
42	NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
43	NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
44	NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
45	NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
46	NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
47	NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE
48];
49/// The byte (`0xff`) used to represent a `None` value in the
50/// [table decoder](TABLE_DECODER)
51const NONE: u8 = 0xff;
52
53/// One frame of this many bytes (4) from the binary data encodes to
54/// one frame of 5 ([`STRING_FRAME_LEN`]) bytes of encoded Z85 output
55pub const BINARY_FRAME_LEN: usize = 4;
56
57/// One frame of this many bytes (5) from the encoded Z85 data decodes
58/// to one frame of 4 ([`BINARY_FRAME_LEN`]) bytes of binary data.
59pub const STRING_FRAME_LEN: usize = 5;
60
61/// Encodes a slice of bytes into a Z85 string, adding padding if necessary
62#[inline]
63pub fn encode_z85(bytes: &[u8]) -> String {
64	// we *don't* fast path out on zero bytes, because in like, 99%
65	// of situations, the input is not 0 length, lol
66
67	let EncodedReprInfo {
68		frames,
69		remainder,
70		needed_capacity
71	} = EncodedReprInfo::for_input_len(bytes.len());
72
73	let mut frames_iter = ChunkedSlice::<BINARY_FRAME_LEN>::new(bytes);
74
75	// here we allocate enough capacity (calculated by `EncodedReprInfo`)
76	let mut dest = UnsafeBufWriteGuard::with_capacity(needed_capacity);
77
78	// this is the loop that encodes and writes all the chunks to output buffer
79	for _ in 0..frames {
80		// SAFETY: we loop `frames` times, which is calculated correctly
81		// by `EncodedReprInfo` (see its safety notes)
82		let frame = unsafe { frames_iter.next_frame_unchecked() };
83
84		// SAFETY: same as above, we will have enough bytes to write to,
85		// calculated by `EncodedReprInfo`
86		unsafe { encode_frame(frame, &mut dest) }
87	}
88
89	// this is where the remainder, if any, is encoded and stored
90	if remainder > 0 {
91		{
92			// Refactoring out the inner closure is to avoid an edge case in lints
93			// https://github.com/rust-lang/rust-clippy/issues/13134
94			let do_thing = |remainder: &_| {
95				// SAFETY: we calculated and preallocated the correct amount up front.
96				// This closure is called only once, just after this declaration. I (vt)
97				// have been extra cautious and put it in its own scope lol
98				unsafe { encode_frame(remainder, &mut dest) }
99			};
100
101			// SAFETY: this will only run if there is any remainder (guarded by if statement)
102			// All the full frames have been consumed by the previous loop, so if we do have
103			// any remainder, it will be strictly under 4
104			// This triggers, even with unsafe block on `encode_frame`, and additionally
105			// the lint for unnecessary unsafe block triggers too. I think nested unsafe
106			// is edge case that the authors of this lint have not thought about, hmm...
107			unsafe { frames_iter.with_remainder_unchecked(do_thing) }
108		}
109
110		// remainder is amount of non padding bytes in the frame
111		let padding_len = 4 - remainder;
112
113		// SAFETY: remainder will always be 1, 2, or 3 (both strictly less than 4
114		// as guaranteed by `EncodedReprInfo`, and greater than 0, by if condition),
115		// so the result of the subtraction above, will always be 3, 2, or 1, which
116		// is less than 85, so this will always be in bounds
117		let ptr = unsafe { TABLE_ENCODER.as_ptr().add(padding_len) };
118
119		// SAFETY: ptr is in bounds and valid (see above)
120		let padding_char = unsafe { *ptr };
121
122		// SAFETY: we calculated and preallocated the right amount, which if we
123		// are here, would be because we needed to encode remainder, and allocation
124		// calculation will have taken that into account would include this extra
125		// byte that is needed
126		unsafe { dest.write_bytes_const::<1>(&padding_char) }
127	}
128
129	// SAFETY: we calculated and requested the exact amount of memory we need,
130	// and have written to all of it in the loop and if statement above
131	let vec = unsafe { dest.into_full_vec() };
132
133	// SAFETY: throughout the encoding process, we only pushed characters from the
134	// encoding table, all of which are ASCII chars. If a string is valid ASCII,
135	// it is also valid UTF-8
136	unsafe {
137		debug_assert!(str::from_utf8(&vec).is_ok(), "output bytes are valid utf-8");
138		String::from_utf8_unchecked(vec)
139	}
140}
141
142/// Decodes a slice of a Z85 string back into the source bytes
143#[inline]
144pub fn decode_z85(mut bytes: &[u8]) -> Result<Vec<u8>, DecodeError> {
145	if bytes.len() < STRING_FRAME_LEN {
146		return if bytes.is_empty() {
147			Ok(Vec::new())
148		} else {
149			// in here, bytes len is 0 < n < STRING_FRAME_LEN. we already returned
150			// on empty input (valid because empty bytes <-> empty string). at input
151			// lengths 1-3, the single frame would have been padded to a full frame
152			// and then the amount of padding appended as one more byte, for a total
153			// lenght of 6. At input length 4, it would just be the frame without
154			// any extra bytes added. so therefore the smallest valid non-zero len
155			// is 5, encoding one full frame of data or more, so this is invalid input.
156			Err(DecodeError::InvalidLength)
157		}
158	}
159
160	// `bytes.len()` will always be 5 or more, so `frames` will always
161	// be 1 or more (see comment above)
162	let frames = bytes.len() / STRING_FRAME_LEN;
163	debug_assert!(frames >= 1, "condition of \"at least one frame in input\" was checked correctly");
164
165	let remainder = bytes.len() % STRING_FRAME_LEN;
166
167	// left shift 2 is the same as multiply by 4 (BINARY_FRAME_LEN)
168	let capacity = frames << 2;
169
170	// Match statement to check remainder for that extra padding encoding byte.
171	// Either, there is 1 trailing byte, that is stritly less than 4, that
172	// encodes the amount of padding added. Or, it's a different character that
173	// doesn't decode to 0..=3 or none at all (in which case, we exit early)
174	let (capacity, added_padding) = match remainder {
175		0 => {
176			// no padding was added
177			(capacity, 0usize)
178		}
179		1 => {
180			// the singular trailing byte that encodes how much padding was added
181
182			// remainder is 1, so there will be at least 1 byte in the
183			// slice, because duh (well technically there will be at least 6
184			// bytes as established earlier, but I can still safely subtract 1).
185			// This will never overflow
186			// TODO: could be unchecked sub?
187			let one_shorter = bytes.len() - 1;
188
189			// le ptr to the input slice
190			let ptr = bytes.as_ptr();
191
192			let byte = {
193				// SAFETY: this points at the last byte, it is in bounds
194				let last_byte_ptr = unsafe { ptr.add(one_shorter) };
195
196				// SAFETY: since the ptr is in bounds and points
197				// at last byte, it is safe to dereference
198				unsafe { *last_byte_ptr }
199			};
200
201			// SAFETY:
202			// - `ptr` points at the start of the input slice
203			// - it's established above that subtracting by 1
204			//   will not overflow, if we got into this match branch
205			// - `one_shorter` is one less than the len of the input slice
206			//    (which won't overflow)
207			// this is just taking a subslice of the all the bytes except the last
208			bytes = unsafe { slice::from_raw_parts(ptr, one_shorter) };
209
210			let decoded = {
211				// SAFETY: `byte` is of type u8, which has a range of 0..=255,
212				// which will never overflow TABLE_DECODER as its len is 256
213				let table_ptr = unsafe { TABLE_DECODER.as_ptr().add(byte.into_usize()) };
214
215				// SAFETY: as established above, pointer above will not
216				// index past end of TABLE_DECODER
217				unsafe { *table_ptr }
218			};
219
220			let decoded = decoded.into_usize();
221			let added_padding = if decoded < BINARY_FRAME_LEN {
222				decoded
223			} else {
224				// invalid char in this context (too large / does not exist)
225				// does not exist is 255 so it won't pass above check either
226				return Err(DecodeError::InvalidChar)
227			};
228
229			// We established that if we got here, we need to decode at least 1
230			// full frame. `added_padding` is lte 3, which is less than the
231			// size of 1 full binary frame (4), so this won't overflow. If
232			// added_padding is 0 for some reason, this returns the same values
233			// as the 0 case (checked in unit test below)
234			// TODO: this can be unchecked sub
235			(capacity - added_padding, added_padding)
236		}
237		_n => {
238			// 2 or 3 extra bytes at end of input, not valid in any scenario
239			return Err(DecodeError::InvalidLength)
240		}
241	};
242
243	// because frames >= 1, `excluding_last_frame` will be >= 0 (ie. will not underflow).
244	let excluding_last_frame = frames - 1;
245
246	let mut frames_iter = ChunkedSlice::<STRING_FRAME_LEN>::new(bytes);
247	let mut dest = UnsafeBufWriteGuard::with_capacity(capacity);
248
249	// this loop goes over and decodees all the string chunks to output buffer
250	for _ in 0..excluding_last_frame {
251		// SAFETY: this loop loops `excluding_last_frame` times, which is the
252		// amount of times it takes to loop through all the full chunks, except
253		// the last one
254		let frame = unsafe { frames_iter.next_frame_unchecked() };
255
256		// Refactoring out the inner closure is to avoid an edge case in lints
257		// https://github.com/rust-lang/rust-clippy/issues/13134
258		let do_thing = |frame: &[_; 4]| {
259			// SAFETY: we calculated/preallocated the exact amount of
260			// memory we need up front, and we only loop one less amount
261			// of times than the number of full frames, so we won't overflow
262			unsafe { dest.write_bytes_const::<BINARY_FRAME_LEN>(frame.as_ptr()) }
263		};
264
265		// SAFETY: uhm, conservatively marked unsafe yay? lol
266		// but as established above, we won't overflow
267		unsafe { decode_frame(frame, do_thing)? }
268	}
269
270	// this is the last frame, and this frame gets decoded a bit specially.
271	// We remove the amount of padding from the amount, so we can only write
272	// the amount of actual data bytes into the output. We are still able to
273	// preallocate exact capacity we need up front!
274
275	// SAFETY: we do have one last frame left. if there was 1 remainder it was
276	// decoded and removed from the end, if there was more remainder we returned
277	// an error already, and if it were 0 then.. well, we have a perfect frame left.
278	// After this, there will be no frames left.
279	let frame = unsafe { frames_iter.next_frame_unchecked() };
280
281	// This is the amount of bytes minus the padding bytes at the end, aka, the
282	// amount of actual data bytes that were encoded.
283	// - if 0 bytes of padding were added, this is whole frame and
284	//   added_padding would be 0
285	// - if 1 to 3 bytes of padding were added, this is correct, and would be
286	//   1 to 3
287	// - 4 or more here is not possible, since if it did, full frames would have
288	//   been counted towards full frame count and strict remainder would be here
289	//   (0..=3)
290
291	// Because of all that explained above, this will also be in range of
292	// 0 <= n < 4 (BINARY_FRAME_LEN), ie. will not overflow.
293	let non_padding_bytes = BINARY_FRAME_LEN - added_padding;
294
295	let do_thing = |frame: &[_; 4]| {
296		// SAFETY: this writes the actual data bytes into the buffer. We
297		// subtracted padding bytes from the number we write already, so we
298		// write the rest of the buffer the perfect amount left
299		unsafe { dest.write_bytes(frame.as_ptr(), non_padding_bytes) }
300	};
301
302	// SAFETY: conservatively marked unsafe function aha
303	// (reasoning for other invariants detailed out above)
304	unsafe { decode_frame(frame, do_thing)? }
305
306	// SAFETY: We have consumed all the input bytes (calculated)
307	debug_assert!(frames_iter.to_slice().is_empty(), "all bytes were consumed");
308
309	// SAFETY: We have written the exact amount of bytes we preallocated (calculated)
310	Ok(unsafe { dest.into_full_vec() })
311}
312
313/// Errors that can be encountered on decoding data (encoding data does not error)
314// TODO: these errors could be improved.
315#[derive(Debug, PartialEq, thiserror::Error)]
316pub enum DecodeError {
317	/// Input data has invalid length
318	#[error("invalid length")]
319	InvalidLength,
320	/// Invalid character in input data
321	#[error("invalid character")]
322	InvalidChar,
323	/// One frame of data contains characters that are too large and would overflow
324	#[error("correct characters, but incorrect combination that would cause overflow")]
325	FrameOverflow
326}
327
328/// Various details about a slice and it's encoded output bytes, including
329/// number of full frames, remainder, and how much capacity is needed to hold
330/// all the encoded bytes
331///
332/// # Safety
333///
334/// All fields on this struct are marked `pub`, meaning anyone is allowed to
335/// directly access and modify them. Don't accept any instances of this struct
336/// from nontrusted sources, nor construct instances from raw data taken from
337/// nontrusted sources.
338///
339/// On the contrary, you can trust and rely on the output directly from the
340/// [`for_input_len`](EncodedReprInfo::for_input_len) associated function,
341/// including in unsafe contexts. The body of this function is heavily
342/// documented.
343pub struct EncodedReprInfo {
344	/// The amount of _full_ frames (eg. amount of full chunks of 4 bytes)
345	pub frames: usize,
346	/// The amount of remainder bytes, strictly less than 4 (frame size)
347	pub remainder: usize,
348	/// The amount of capacity required to fit all the encoded data into
349	///
350	/// This is calculated by summing up the following values:
351	/// - Space needed for full frames is `frames * 5`, since every frame
352	///   is a chunk of 4 bytes that gets encoded into a frame of 5 bytes
353	/// - Space needed for remainder bytes:
354	///   - If no remainder, then 0. Simple enough :p
355	///     - This also implies that if the input does not need to be padded to
356	///       a len that is a multiple of 4, no padding is needed. In this case,
357	///       the numbers/calculations here are compliant with the [Z85 spec].
358	///   - If there is remainder, it is `5 + 1`. The remainder bytes
359	///     will be padded to a full frame of 4, then encoded as a full frame,
360	///     yielding 5. Then, one extra byte is added onto the end to encode the
361	///     amount of padding we have added (ex. 1 for 3 remainder bytes and 1
362	///     padding).
363	///
364	/// [Z85 spec]: https://rfc.zeromq.org/spec/32
365	pub needed_capacity: usize
366}
367
368impl EncodedReprInfo {
369	/// Calculates the values
370	///
371	/// See documentation on [`EncodedReprInfo`] and on the individual fields
372	/// for more information.
373	#[inline]
374	pub fn for_input_len(input_len: usize) -> Self {
375		// right shift 2 is same as integer divide by 4,
376		// to get the amount of full binary frames
377		let frames = input_len >> 2;
378
379		// binary AND with 0b11 (3) is the same as rem 4,
380		// to get the amount of remainder bytes
381		let remainder = input_len & 0b11;
382
383		let needed_capacity = if remainder == 0 {
384			// each frame of 4 is encoded to a frame of
385			// 5 bytes of output. No padding needed case
386			frames * 5
387		} else {
388			// `frames` is number of *whole* binary frames, so the remainder
389			// is not included in this. adding 1 to allocate space for one more
390			// frame containing the padded remainder
391			let capacity = (frames + 1) * STRING_FRAME_LEN;
392
393			// adding 1 more byte for the last byte that
394			// encodes amount of padding added
395			capacity + 1
396		};
397
398		Self { frames, remainder, needed_capacity }
399	}
400}
401
402/// # Safety
403///
404/// Caller must guarantee dest is valid for at least `STRING_FRAME_LEN` bytes
405/// to be written.
406unsafe fn encode_frame(frame: &[u8; BINARY_FRAME_LEN], dest: &mut UnsafeBufWriteGuard) {
407	let mut int = u32::from_be_bytes(*frame).into_usize();
408
409	let byte5 = int % TABLE_ENCODER_LEN;
410	int /= TABLE_ENCODER_LEN;
411
412	let byte4 = int % TABLE_ENCODER_LEN;
413	int /= TABLE_ENCODER_LEN;
414
415	let byte3 = int % TABLE_ENCODER_LEN;
416	int /= TABLE_ENCODER_LEN;
417
418	let byte2 = int % TABLE_ENCODER_LEN;
419	int /= TABLE_ENCODER_LEN;
420
421	let byte1 = int;
422
423	debug_assert!(int % TABLE_ENCODER_LEN == int, "no remaining/unused byte information");
424	debug_assert!(int / TABLE_ENCODER_LEN == 0, "no remaining/unused byte information");
425
426	let table_ptr = TABLE_ENCODER.as_ptr();
427
428	/// # Safety
429	///
430	/// The value stored in the variable that's passed into this macro must be
431	/// within the range 0..=84, so that it can be used to index the encode table
432	macro_rules! encode_byte_unsafe {
433		($byte:ident) => {
434			{
435				// SAFETY: macro caller promises variable is within 0..=84,
436				// which can be safely used to index encode table (len 85)
437				let byte_ptr = unsafe { table_ptr.add($byte) };
438
439				// SAFETY: as described above, the pointer is valid to read from
440				unsafe { *byte_ptr }
441			}
442		}
443	}
444
445	let encoded_frame = [
446		// SAFETY: all the below macro invocations pass in variables whose value
447		// is calculating by rem 85. so they will always be strictly less than 85
448		encode_byte_unsafe!(byte1),
449		encode_byte_unsafe!(byte2),
450		encode_byte_unsafe!(byte3),
451		encode_byte_unsafe!(byte4),
452		encode_byte_unsafe!(byte5)
453	];
454
455	// SAFETY: caller guarantees that `dest` has at least
456	// `STRING_FRAME_LEN` bytes left, and that writing this won't overflow.
457	unsafe { dest.write_bytes_const::<STRING_FRAME_LEN>(encoded_frame.as_ptr()) }
458}
459
460/// # Safety
461///
462/// All possible inputs are sound. However, marking this function `unsafe` is
463/// consistent with [`encode_frame`]. This is an internal function, so doesn't
464/// matter too much.
465unsafe fn decode_frame<F>(frame: &[u8; STRING_FRAME_LEN], f: F) -> Result<(), DecodeError>
466where
467	F: FnOnce(&[u8; BINARY_FRAME_LEN])
468{
469	let [byte1, byte2, byte3, byte4, byte5] = *frame;
470	let table_ptr = TABLE_DECODER.as_ptr();
471
472	/// # Safety
473	///
474	/// The passed variable must be to a byte value, or a value within the
475	/// range `0..256`.
476	macro_rules! decode_byte_unsafe {
477		($byte:ident) => {
478			// SAFETY: caller promises that `$byte` is within range 0..=255,
479			// and the decoding table is len 256, so this will not be out of bounds
480			let ptr = unsafe { table_ptr.add($byte.into_usize()) };
481
482			// SAFETY: as established above, the ptr is within
483			// bounds and safe to dereference
484			let $byte = unsafe { *ptr };
485			if $byte == NONE { return Err(DecodeError::InvalidChar) }
486		}
487	}
488
489	// SAFETY: the byte values provided are bytes, so will guaranteed
490	// in the range 0..=255 (as larger values are not even representable)
491	// Additionally, if this comes back as Some from TABLE_DECODER, it is guaranteed
492	// to be 0 <= n <= 84, since there are no Some(n) values outside this range.
493	decode_byte_unsafe!(byte1);
494	decode_byte_unsafe!(byte2);
495	decode_byte_unsafe!(byte3);
496	decode_byte_unsafe!(byte4);
497	decode_byte_unsafe!(byte5);
498
499	// A string frame containing all valid z85 chars, can still overflow u32
500	// (u32 max is 4.294.967.295, but the result of this operation can be 4.437.053.124).
501	// However it cannot overflow u64 (u64 max is 18.446.744.073.709.551.616).
502	// So we decode in a u64 first, and then we check for overflow, then error if so
503	let mut int = byte1.into_u64();
504
505	int *= TABLE_ENCODER_LEN.into_u64();
506	int += byte2.into_u64();
507
508	int *= TABLE_ENCODER_LEN.into_u64();
509	int += byte3.into_u64();
510
511	int *= TABLE_ENCODER_LEN.into_u64();
512	int += byte4.into_u64();
513
514	int *= TABLE_ENCODER_LEN.into_u64();
515	int += byte5.into_u64();
516
517	if int >> u32::BITS != 0 { return Err(DecodeError::FrameOverflow) }
518
519	let decoded_frame = u32::to_be_bytes(int.into_u32_lossy());
520	f(&decoded_frame);
521
522	Ok(())
523}
524
525#[cfg(test)]
526mod tests {
527	extern crate rand;
528	extern crate z85;
529
530	use crate::prelude::*;
531	use super::*;
532	use rand::{ Rng, thread_rng };
533
534	#[test]
535	fn provided_test_case() {
536		let bytes: &[u8] = &[
537			0x86, 0x4f, 0xd2, 0x6f,
538			0xb5, 0x59, 0xf7, 0x5b
539		];
540		let encoded = "HelloWorld";
541
542		assert_eq!(encoded, encode_z85(bytes));
543		assert_eq!(bytes, decode_z85(encoded.as_bytes()).expect("provided test case decodes properly"));
544	}
545
546	#[test]
547	fn randomised() {
548		// (bytes_len, encoded_len)
549		// (expected_input_len, expected_output_len)
550		let expected_lengths = [
551			(0usize, 0usize),
552			(1, 6),
553			(2, 6),
554			(3, 6),
555			(4, 5),
556			(5, 11),
557			(6, 11),
558			(7, 11),
559			(8, 10),
560			(9, 16),
561			(10, 16),
562			(11, 16),
563			(12, 15),
564			(13, 21),
565			(14, 21),
566			(15, 21),
567			(16, 20),
568			(17, 26),
569			(18, 26),
570			(19, 26),
571			(20, 25),
572
573			(50, 66),
574			(100, 125),
575			(500, 625),
576			(1000, 1250),
577			(100_000, 125_000),
578			(1_000_000, 1_250_000),
579		];
580		let mut rng = thread_rng();
581
582		for (expected_input_len, expected_output_len) in expected_lengths {
583			for _ in 0usize..5 {
584				let mut original_input = vec![0u8; expected_input_len];
585				rng.fill(&mut *original_input);
586				assert_eq!(original_input.len(), expected_input_len);
587
588				let encoded = encode_z85(&original_input);
589				assert_eq!(encoded.len(), expected_output_len);
590
591				let decoded = decode_z85(encoded.as_bytes())
592					.expect("can round trip decode just encoded data");
593				assert_eq!(decoded.len(), expected_input_len);
594
595				assert_eq!(original_input, decoded);
596			}
597		}
598	}
599
600	#[test]
601	fn z85_crate_nonpadded_compat() {
602		// this should work, since when bytes length is a multiple of 4
603		// we don't add any padding characters and `z85` doesn't add any
604		// either, meaning in this situation our impls are cross compatible
605
606		let mut rng = thread_rng();
607
608		let mut bytes = vec![0u8; 1000];
609		rng.fill(&mut *bytes);
610		let bytes = &*bytes;
611
612		let wiwi_encoded = encode_z85(bytes);
613		let z85_encoded = z85::encode(bytes);
614		assert_eq!(wiwi_encoded, z85_encoded);
615
616		let wiwi_decoded_z85 = decode_z85(z85_encoded.as_bytes())
617			.expect("wiwi can decode z85");
618		let z85_decoded_wiwi = z85::decode(wiwi_encoded.as_bytes())
619			.expect("z85 can decode wiwi");
620
621		assert_eq!(wiwi_decoded_z85, z85_decoded_wiwi);
622	}
623
624	#[test]
625	fn extra_zero_padding_byte() {
626		// for the case where theres an extra padding marker byte that encodes
627		// zero padding, which we don't emit because that's just a waste of space.
628		// but it doesn't break the parser, so we don't check for it.
629
630		let strs = [
631			("adfeg", "adfeg0"),
632			(
633				// len 45
634				"abcdefafuehirugehdbfntkvdneoiwr4htrugitdfkwwu",
635				"abcdefafuehirugehdbfntkvdneoiwr4htrugitdfkwwu0"
636			)
637		];
638
639		for (str1, str2) in strs {
640			let str1 = decode_z85(str1.as_bytes())
641				.expect("nonpadded z85 parses successfully");
642			let str2 = decode_z85(str2.as_bytes())
643				.expect("padded-with-0 z85 parses successfully");
644			assert_eq!(str1, str2);
645		}
646	}
647
648	#[test]
649	fn max_value() {
650		let decoded = decode_z85(b"%nSc0%nSc0%nSc0%nSc0");
651		let decoded = decoded.as_deref();
652		assert_eq!(decoded, Ok(&[255u8; 16] as &[u8]));
653	}
654
655	#[test]
656	fn rejects_too_large() {
657		let strs = [
658			//   v
659			"%nSc1",
660			//                            v
661			"%nSc0%nSc0%nSc0%nSc0%nSc0%nSc1%nSc0",
662			//          v
663			"%nSc0%nSc0%oSc0%nSc0%nSc0%nSc0%nSc0",
664			// kinda obvious
665			"#####"
666		];
667
668		for s in strs {
669			let decoded = decode_z85(s.as_bytes());
670			assert_eq!(decoded, Err(DecodeError::FrameOverflow))
671		}
672	}
673}
wiwi/encoding/z85.rs

wiwi/encoding/
z85.rs