wiwi/encoding/
hex.rs

1extern crate thiserror;
2
3use crate::prelude::*;
4use super::UnsafeBufWriteGuard;
5
6mod encode;
7mod decode;
8
9pub use self::encode::{
10	TABLE_ENCODER_LEN,
11	TABLE_ENCODER_LOWER,
12	TABLE_ENCODER_UPPER
13};
14
15/// Encodes a slice of bytes into a String, using lowercase characters
16#[inline]
17pub fn encode_hex(bytes: &[u8]) -> String {
18	_encode::<false>(bytes)
19}
20
21/// Encodes a slice of bytes into a String, using uppercase characters
22#[inline]
23pub fn encode_hex_upper(bytes: &[u8]) -> String {
24	_encode::<true>(bytes)
25}
26
27/// Inner function with const generic `UPPER`
28fn _encode<const UPPER: bool>(bytes: &[u8]) -> String {
29	debug_assert!(bytes.len() >> (usize::BITS - 1) == 0, "size overflow");
30
31	let len = bytes.len();
32	// shl 1 is same as multiplying by 2
33	let capacity = len << 1;
34	let ptr = bytes.as_ptr();
35	let mut dest = UnsafeBufWriteGuard::with_capacity(capacity);
36
37	// SAFETY: we obtained `ptr` and `len` from `bytes`, so `ptr` is valid for `len`
38	// reads, and we calculated and requested `dest` to allocate `len * 2` bytes
39	unsafe { encode::generic::<UPPER>(ptr, &mut dest, len) };
40
41	// SAFETY: we wrote into all the space we requested (`len * 2`)
42	let vec = unsafe { dest.into_full_vec() };
43
44	// SAFETY: `encode::generic` will only ever write the ASCII chars `0-9`, `a-f`,
45	// and `A-F` into vec. ASCII is valid UTF-8
46	unsafe {
47		debug_assert!(str::from_utf8(&vec).is_ok(), "output bytes are valid utf-8");
48		String::from_utf8_unchecked(vec)
49	}
50}
51
52/// Decodes a slice of hex bytes into a byte vector. This function handles and
53/// supports both uppercase and lowercase characters.
54#[inline]
55pub fn decode_hex(bytes: &[u8]) -> Result<Vec<u8>, DecodeError> {
56	let len = bytes.len();
57
58	// `AND 0b1` is chopping off all the other bits
59	// if the last bit is 1 then it's odd, which is invalid
60	if len & 0b1 != 0 { return Err(DecodeError::InvalidLength) }
61
62	// shr 1 is same as div 2
63	let capacity = len >> 1;
64	let mut dest = UnsafeBufWriteGuard::with_capacity(capacity);
65	let ptr = bytes.as_ptr();
66
67	// SAFETY: ptr is readable for `capacity * 2` bytes (since `capacity` is
68	// `len / 2` and `ptr` is readable for `len` bytes), and we requested `capacity`
69	// bytes in `dest`
70	unsafe { decode::generic(ptr, &mut dest, capacity)? }
71
72	// SAFETY: we wrote into all the space we requested (`len / 2`)
73	Ok(unsafe { dest.into_full_vec() })
74}
75
76/// Errors that can be encountered on decoding data (encoding data does not error)
77// TODO: these errors could be improved.
78#[derive(Debug, thiserror::Error)]
79pub enum DecodeError {
80	/// Invalid length. Length is expected to be a multiple of two
81	#[error("invalid length")]
82	InvalidLength,
83	/// Invalid character. Characters are only allowed to be in `0-9`, `a-f`, `A-F`
84	#[error("invalid character")]
85	InvalidChar
86}
87
88#[cfg(test)]
89mod tests {
90	extern crate hex;
91	extern crate rand;
92
93	use crate::prelude::*;
94	use super::*;
95	use rand::{ Rng, thread_rng };
96
97	#[test]
98	fn rfc_provided_examples() {
99		let examples = [
100			("", ""),
101			("f", "66"),
102			("fo", "666F"),
103			("foo", "666F6F"),
104			("foob", "666F6F62"),
105			("fooba", "666F6F6261"),
106			("foobar", "666F6F626172")
107		];
108
109		for (bytes, encoded) in examples {
110			assert_eq!(encoded, encode_hex_upper(bytes.as_bytes()));
111			assert_eq!(encoded.to_lowercase(), encode_hex(bytes.as_bytes()));
112		}
113	}
114
115	#[test]
116	fn randomised() {
117		// (in_len, out_len)
118		let expected_lengths = [
119			(0usize, 0usize),
120			(1, 2),
121			(2, 4),
122			(3, 6),
123			(4, 8),
124			(5, 10),
125			(6, 12),
126			(7, 14),
127			(8, 16),
128			(9, 18),
129			(10, 20),
130			(11, 22),
131			(12, 24),
132			(13, 26),
133			(14, 28),
134			(15, 30),
135			(16, 32),
136			(17, 34),
137			(18, 36),
138			(19, 38),
139			(20, 40),
140
141			(50, 100),
142			(100, 200),
143			(500, 1000),
144			(1000, 2000),
145			(100_000, 200_000),
146			(1_000_000, 2_000_000),
147		];
148		let mut rng = thread_rng();
149
150		for (expected_input_len, expected_output_len) in expected_lengths {
151			for _ in 0usize..5 {
152				let mut original_input = vec![0u8; expected_input_len];
153				rng.fill(&mut *original_input);
154				assert_eq!(original_input.len(), expected_input_len);
155
156				let encoded_lower = encode_hex(&original_input);
157				assert_eq!(encoded_lower.len(), expected_output_len);
158				let encoded_upper = encode_hex_upper(&original_input);
159				assert_eq!(encoded_upper.len(), expected_output_len);
160
161				let decoded_lower = decode_hex(encoded_lower.as_bytes())
162					.expect("can round trip decode just encoded data");
163				assert_eq!(decoded_lower.len(), expected_input_len);
164				assert_eq!(original_input, decoded_lower);
165
166				let decoded_upper = decode_hex(encoded_upper.as_bytes())
167					.expect("can round trip decode just encoded data");
168				assert_eq!(decoded_upper.len(), expected_input_len);
169				assert_eq!(original_input, decoded_upper);
170			}
171		}
172	}
173
174	#[test]
175	fn hex_crate_compat() {
176		let mut rng = thread_rng();
177
178		let mut bytes = vec![0u8; 1000];
179		rng.fill(&mut *bytes);
180		let bytes = &*bytes;
181
182		let wiwi_encoded = encode_hex(bytes);
183		let hex_encoded = hex::encode(bytes);
184		assert_eq!(wiwi_encoded, hex_encoded);
185
186		let wiwi_decoded_hex = decode_hex(hex_encoded.as_bytes())
187			.expect("wiwi can decode hex");
188		let hex_decoded_wiwi = hex::decode(wiwi_encoded.as_bytes())
189			.expect("hex can decode wiwi");
190
191		assert_eq!(wiwi_decoded_hex, hex_decoded_wiwi);
192	}
193}