drive/util/common/encode.rs
1#![allow(clippy::result_large_err)] // Encoding helpers bubble up drive::Error with context
2//! Encoding.
3//!
4//! This module defines encoding functions.
5//!
6
7use crate::error::drive::DriveError;
8use crate::error::Error;
9use byteorder::{BigEndian, ByteOrder, WriteBytesExt};
10
11/// Encodes an unsigned integer on 64 bits.
12pub fn encode_u64(val: u64) -> Vec<u8> {
13 // Positive integers are represented in binary with the signed bit set to 0
14 // Negative integers are represented in 2's complement form
15
16 // Encode the integer in big endian form
17 // This ensures that most significant bits are compared first
18 // a bigger positive number would be greater than a smaller one
19 // and a bigger negative number would be greater than a smaller one
20 // maintains sort order for each domain
21 let mut wtr = vec![];
22 wtr.write_u64::<BigEndian>(val).unwrap();
23
24 // Flip the sign bit
25 // to deal with interaction between the domains
26 // 2's complement values have the sign bit set to 1
27 // this makes them greater than the positive domain in terms of sort order
28 // to fix this, we just flip the sign bit
29 // so positive integers have the high bit and negative integers have the low bit
30 // the relative order of elements in each domain is still maintained, as the
31 // change was uniform across all elements
32 wtr[0] ^= 0b1000_0000;
33
34 wtr
35}
36
37/// Decodes a 64-bit unsigned integer from a vector of bytes encoded with `encode_u64`.
38///
39/// # Arguments
40///
41/// * `bytes` - A vector of bytes representing the encoded 64-bit unsigned integer.
42///
43/// # Returns
44///
45/// * A 64-bit unsigned integer decoded from the input bytes.
46///
47/// # Panics
48///
49/// This function will panic if the input vector does not have exactly 8 bytes.
50pub fn decode_u64_owned(mut bytes: Vec<u8>) -> Result<u64, Error> {
51 // Ensure the input vector has exactly 8 bytes
52 if bytes.len() != 8 {
53 return Err(Error::Drive(DriveError::CorruptedDriveState(format!(
54 "Trying to decode a u64 from {} bytes {}",
55 bytes.len(),
56 hex::encode(bytes)
57 ))));
58 }
59
60 // Flip the sign bit back to its original state
61 // This reverses the transformation done in `encode_u64`
62 bytes[0] ^= 0b1000_0000;
63
64 // Read the integer from the modified bytes
65 // The bytes are in big endian form, which preserves the correct order
66 // when they were written in the encode function
67 Ok(BigEndian::read_u64(&bytes))
68}
69
70/// Decodes a 64-bit unsigned integer from a vector of bytes encoded with `encode_u64`.
71///
72/// # Arguments
73///
74/// * `bytes` - A vector of bytes representing the encoded 64-bit unsigned integer.
75///
76/// # Returns
77///
78/// * A 64-bit unsigned integer decoded from the input bytes.
79///
80/// # Panics
81///
82/// This function will panic if the input vector does not have exactly 8 bytes.
83pub fn decode_u64(bytes: &[u8]) -> Result<u64, Error> {
84 // Ensure the input vector has exactly 8 bytes
85 if bytes.len() != 8 {
86 return Err(Error::Drive(DriveError::CorruptedDriveState(format!(
87 "Trying to decode a u64 from {} bytes {}",
88 bytes.len(),
89 hex::encode(bytes)
90 ))));
91 }
92
93 let mut wtr = bytes.to_vec();
94
95 // Flip the sign bit back to its original state
96 // This reverses the transformation done in `encode_u64`
97 wtr[0] ^= 0b1000_0000;
98
99 // Read the integer from the modified bytes
100 // The bytes are in big endian form, which preserves the correct order
101 // when they were written in the encode function
102 Ok(BigEndian::read_u64(&wtr))
103}
104
105/// Encodes a signed integer on 64 bits.
106pub fn encode_i64(val: i64) -> Vec<u8> {
107 // Positive integers are represented in binary with the signed bit set to 0
108 // Negative integers are represented in 2's complement form
109
110 // Encode the integer in big endian form
111 // This ensures that most significant bits are compared first
112 // a bigger positive number would be greater than a smaller one
113 // and a bigger negative number would be greater than a smaller one
114 // maintains sort order for each domain
115 let mut wtr = vec![];
116 wtr.write_i64::<BigEndian>(val).unwrap();
117
118 // Flip the sign bit
119 // to deal with interaction between the domains
120 // 2's complement values have the sign bit set to 1
121 // this makes them greater than the positive domain in terms of sort order
122 // to fix this, we just flip the sign bit
123 // so positive integers have the high bit and negative integers have the low bit
124 // the relative order of elements in each domain is still maintained, as the
125 // change was uniform across all elements
126 wtr[0] ^= 0b1000_0000;
127
128 wtr
129}
130
131/// Encodes a float.
132pub fn encode_float(val: f64) -> Vec<u8> {
133 // Floats are represented based on the IEEE 754-2008 standard
134 // [sign bit] [biased exponent] [mantissa]
135
136 // when comparing floats, the sign bit has the greatest impact
137 // any positive number is greater than all negative numbers
138 // if the numbers come from the same domain then the exponent is the next factor to consider
139 // the exponent gives a sense of how many digits are in the non fractional part of the number
140 // for example in base 10, 10 has an exponent of 1 (1.0 * 10^1)
141 // while 5000 (5.0 * 10^3) has an exponent of 3
142 // for the positive domain, the bigger the exponent the larger the number i.e 5000 > 10
143 // for the negative domain, the bigger the exponent the smaller the number i.e -10 > -5000
144 // if the exponents are the same, then the mantissa is used to determine the greater number
145 // the inverse relationship still holds
146 // i.e bigger mantissa (bigger number in positive domain but smaller number in negative domain)
147
148 // There are two things to fix to achieve total sort order
149 // 1. Place positive domain above negative domain (i.e flip the sign bit)
150 // 2. Exponent and mantissa for a smaller number like -5000 is greater than that of -10
151 // so bit level comparison would say -5000 is greater than -10
152 // we fix this by flipping the exponent and mantissa values, which has the effect of reversing
153 // the order (0000 [smallest] -> 1111 [largest])
154
155 // Encode in big endian form, so most significant bits are compared first
156 let mut wtr = vec![];
157 wtr.write_f64::<BigEndian>(val).unwrap();
158
159 // Check if the value is negative, if it is
160 // flip all the bits i.e sign, exponent and mantissa
161 if val < 0.0 {
162 wtr = wtr.iter().map(|byte| !byte).collect();
163 } else {
164 // for positive values, just flip the sign bit
165 wtr[0] ^= 0b1000_0000;
166 }
167
168 wtr
169}
170
171/// Encodes an unsigned integer on 16 bits.
172pub fn encode_u16(val: u16) -> Vec<u8> {
173 // Positive integers are represented in binary with the signed bit set to 0
174 // Negative integers are represented in 2's complement form
175
176 // Encode the integer in big endian form
177 // This ensures that most significant bits are compared first
178 // a bigger positive number would be greater than a smaller one
179 // and a bigger negative number would be greater than a smaller one
180 // maintains sort order for each domain
181 let mut wtr = vec![];
182 wtr.write_u16::<BigEndian>(val).unwrap();
183
184 // Flip the sign bit
185 // to deal with interaction between the domains
186 // 2's complement values have the sign bit set to 1
187 // this makes them greater than the positive domain in terms of sort order
188 // to fix this, we just flip the sign bit
189 // so positive integers have the high bit and negative integers have the low bit
190 // the relative order of elements in each domain is still maintained, as the
191 // change was uniform across all elements
192 wtr[0] ^= 0b1000_0000;
193
194 wtr
195}
196
197/// Encodes an unsigned integer on 32 bits.
198pub fn encode_u32(val: u32) -> Vec<u8> {
199 // Positive integers are represented in binary with the signed bit set to 0
200 // Negative integers are represented in 2's complement form
201
202 // Encode the integer in big endian form
203 // This ensures that most significant bits are compared first
204 // a bigger positive number would be greater than a smaller one
205 // and a bigger negative number would be greater than a smaller one
206 // maintains sort order for each domain
207 let mut wtr = vec![];
208 wtr.write_u32::<BigEndian>(val).unwrap();
209
210 // Flip the sign bit
211 // to deal with interaction between the domains
212 // 2's complement values have the sign bit set to 1
213 // this makes them greater than the positive domain in terms of sort order
214 // to fix this, we just flip the sign bit
215 // so positive integers have the high bit and negative integers have the low bit
216 // the relative order of elements in each domain is still maintained, as the
217 // change was uniform across all elements
218 wtr[0] ^= 0b1000_0000;
219
220 wtr
221}
222
223#[cfg(test)]
224mod tests {
225 use super::*;
226
227 // --- encode_u64 / decode_u64 round-trip tests ---
228
229 #[test]
230 fn encode_decode_u64_zero() {
231 let encoded = encode_u64(0);
232 assert_eq!(encoded.len(), 8);
233 let decoded = decode_u64(&encoded).unwrap();
234 assert_eq!(decoded, 0);
235 }
236
237 #[test]
238 fn encode_decode_u64_one() {
239 let encoded = encode_u64(1);
240 let decoded = decode_u64(&encoded).unwrap();
241 assert_eq!(decoded, 1);
242 }
243
244 #[test]
245 fn encode_decode_u64_max() {
246 let encoded = encode_u64(u64::MAX);
247 let decoded = decode_u64(&encoded).unwrap();
248 assert_eq!(decoded, u64::MAX);
249 }
250
251 #[test]
252 fn encode_decode_u64_owned_round_trip() {
253 for val in [0u64, 1, 42, 1000, u64::MAX / 2, u64::MAX] {
254 let encoded = encode_u64(val);
255 let decoded = decode_u64_owned(encoded).unwrap();
256 assert_eq!(decoded, val);
257 }
258 }
259
260 #[test]
261 fn encode_u64_preserves_sort_order_in_positive_range() {
262 // The sign-bit flip means lexicographic ordering matches signed interpretation.
263 // Values in 0..=i64::MAX sort correctly among themselves.
264 let values = [0u64, 1, 2, 100, 1000, i64::MAX as u64];
265 let encoded: Vec<Vec<u8>> = values.iter().map(|&v| encode_u64(v)).collect();
266 for i in 0..encoded.len() - 1 {
267 assert!(
268 encoded[i] < encoded[i + 1],
269 "Sort order violated: encode_u64({}) >= encode_u64({})",
270 values[i],
271 values[i + 1]
272 );
273 }
274 }
275
276 #[test]
277 fn encode_u64_sign_bit_flip_makes_high_values_sort_lower() {
278 // Values above i64::MAX have the sign bit set in big-endian, so the flip
279 // clears it, making them sort below values in the 0..=i64::MAX range.
280 // This is the intended behavior: the encoding treats u64 as if it were i64.
281 let below_midpoint = encode_u64(100);
282 let above_midpoint = encode_u64(u64::MAX);
283 assert!(above_midpoint < below_midpoint);
284 }
285
286 #[test]
287 fn decode_u64_wrong_length_returns_error() {
288 assert!(decode_u64(&[]).is_err());
289 assert!(decode_u64(&[0; 7]).is_err());
290 assert!(decode_u64(&[0; 9]).is_err());
291 assert!(decode_u64(&[0; 1]).is_err());
292 }
293
294 #[test]
295 fn decode_u64_owned_wrong_length_returns_error() {
296 assert!(decode_u64_owned(vec![]).is_err());
297 assert!(decode_u64_owned(vec![0; 7]).is_err());
298 assert!(decode_u64_owned(vec![0; 9]).is_err());
299 }
300
301 // --- encode_i64 tests ---
302
303 #[test]
304 fn encode_i64_positive() {
305 let encoded = encode_i64(42);
306 assert_eq!(encoded.len(), 8);
307 }
308
309 #[test]
310 fn encode_i64_negative() {
311 let encoded = encode_i64(-42);
312 assert_eq!(encoded.len(), 8);
313 }
314
315 #[test]
316 fn encode_i64_zero() {
317 let encoded = encode_i64(0);
318 assert_eq!(encoded.len(), 8);
319 }
320
321 #[test]
322 fn encode_i64_preserves_sort_order() {
323 let values = [i64::MIN, -1000, -1, 0, 1, 1000, i64::MAX];
324 let encoded: Vec<Vec<u8>> = values.iter().map(|&v| encode_i64(v)).collect();
325 for i in 0..encoded.len() - 1 {
326 assert!(
327 encoded[i] < encoded[i + 1],
328 "Sort order violated: encode_i64({}) >= encode_i64({})",
329 values[i],
330 values[i + 1]
331 );
332 }
333 }
334
335 #[test]
336 fn encode_i64_negative_less_than_positive() {
337 let neg = encode_i64(-1);
338 let pos = encode_i64(1);
339 assert!(neg < pos);
340 }
341
342 // --- encode_float tests ---
343
344 #[test]
345 fn encode_float_positive() {
346 let encoded = encode_float(3.14);
347 assert_eq!(encoded.len(), 8);
348 }
349
350 #[test]
351 fn encode_float_negative() {
352 let encoded = encode_float(-3.14);
353 assert_eq!(encoded.len(), 8);
354 }
355
356 #[test]
357 fn encode_float_zero() {
358 let encoded = encode_float(0.0);
359 assert_eq!(encoded.len(), 8);
360 }
361
362 #[test]
363 fn encode_float_preserves_sort_order() {
364 let values = [-1000.0f64, -1.0, -0.001, 0.0, 0.001, 1.0, 1000.0];
365 let encoded: Vec<Vec<u8>> = values.iter().map(|&v| encode_float(v)).collect();
366 for i in 0..encoded.len() - 1 {
367 assert!(
368 encoded[i] < encoded[i + 1],
369 "Sort order violated: encode_float({}) >= encode_float({})",
370 values[i],
371 values[i + 1]
372 );
373 }
374 }
375
376 #[test]
377 fn encode_float_negative_less_than_positive() {
378 let neg = encode_float(-0.5);
379 let pos = encode_float(0.5);
380 assert!(neg < pos);
381 }
382
383 // --- encode_u16 tests ---
384
385 #[test]
386 fn encode_u16_basic() {
387 assert_eq!(encode_u16(0).len(), 2);
388 assert_eq!(encode_u16(u16::MAX).len(), 2);
389 }
390
391 #[test]
392 fn encode_u16_preserves_sort_order_in_positive_range() {
393 // Values in 0..=i16::MAX sort correctly after sign-bit flip.
394 let values = [0u16, 1, 100, 1000, i16::MAX as u16];
395 let encoded: Vec<Vec<u8>> = values.iter().map(|&v| encode_u16(v)).collect();
396 for i in 0..encoded.len() - 1 {
397 assert!(
398 encoded[i] < encoded[i + 1],
399 "Sort order violated: encode_u16({}) >= encode_u16({})",
400 values[i],
401 values[i + 1]
402 );
403 }
404 }
405
406 #[test]
407 fn encode_u16_sign_bit_flip_makes_high_values_sort_lower() {
408 let below = encode_u16(100);
409 let above = encode_u16(u16::MAX);
410 assert!(above < below);
411 }
412
413 // --- encode_u32 tests ---
414
415 #[test]
416 fn encode_u32_basic() {
417 assert_eq!(encode_u32(0).len(), 4);
418 assert_eq!(encode_u32(u32::MAX).len(), 4);
419 }
420
421 #[test]
422 fn encode_u32_preserves_sort_order_in_positive_range() {
423 // Values in 0..=i32::MAX sort correctly after sign-bit flip.
424 let values = [0u32, 1, 100, 10000, i32::MAX as u32];
425 let encoded: Vec<Vec<u8>> = values.iter().map(|&v| encode_u32(v)).collect();
426 for i in 0..encoded.len() - 1 {
427 assert!(
428 encoded[i] < encoded[i + 1],
429 "Sort order violated: encode_u32({}) >= encode_u32({})",
430 values[i],
431 values[i + 1]
432 );
433 }
434 }
435
436 #[test]
437 fn encode_u32_sign_bit_flip_makes_high_values_sort_lower() {
438 let below = encode_u32(100);
439 let above = encode_u32(u32::MAX);
440 assert!(above < below);
441 }
442}