Skip to main content

drive/util/common/
encode.rs

1#![allow(clippy::result_large_err)] // Encoding helpers bubble up drive::Error with context
2//! Encoding.
3//!
4//! This module defines encoding functions.
5//!
6
7use crate::error::drive::DriveError;
8use crate::error::Error;
9use byteorder::{BigEndian, ByteOrder, WriteBytesExt};
10
11/// Encodes an unsigned integer on 64 bits.
12pub fn encode_u64(val: u64) -> Vec<u8> {
13    // Positive integers are represented in binary with the signed bit set to 0
14    // Negative integers are represented in 2's complement form
15
16    // Encode the integer in big endian form
17    // This ensures that most significant bits are compared first
18    // a bigger positive number would be greater than a smaller one
19    // and a bigger negative number would be greater than a smaller one
20    // maintains sort order for each domain
21    let mut wtr = vec![];
22    wtr.write_u64::<BigEndian>(val).unwrap();
23
24    // Flip the sign bit
25    // to deal with interaction between the domains
26    // 2's complement values have the sign bit set to 1
27    // this makes them greater than the positive domain in terms of sort order
28    // to fix this, we just flip the sign bit
29    // so positive integers have the high bit and negative integers have the low bit
30    // the relative order of elements in each domain is still maintained, as the
31    // change was uniform across all elements
32    wtr[0] ^= 0b1000_0000;
33
34    wtr
35}
36
37/// Decodes a 64-bit unsigned integer from a vector of bytes encoded with `encode_u64`.
38///
39/// # Arguments
40///
41/// * `bytes` - A vector of bytes representing the encoded 64-bit unsigned integer.
42///
43/// # Returns
44///
45/// * A 64-bit unsigned integer decoded from the input bytes.
46///
47/// # Panics
48///
49/// This function will panic if the input vector does not have exactly 8 bytes.
50pub fn decode_u64_owned(mut bytes: Vec<u8>) -> Result<u64, Error> {
51    // Ensure the input vector has exactly 8 bytes
52    if bytes.len() != 8 {
53        return Err(Error::Drive(DriveError::CorruptedDriveState(format!(
54            "Trying to decode a u64 from {} bytes {}",
55            bytes.len(),
56            hex::encode(bytes)
57        ))));
58    }
59
60    // Flip the sign bit back to its original state
61    // This reverses the transformation done in `encode_u64`
62    bytes[0] ^= 0b1000_0000;
63
64    // Read the integer from the modified bytes
65    // The bytes are in big endian form, which preserves the correct order
66    // when they were written in the encode function
67    Ok(BigEndian::read_u64(&bytes))
68}
69
70/// Decodes a 64-bit unsigned integer from a vector of bytes encoded with `encode_u64`.
71///
72/// # Arguments
73///
74/// * `bytes` - A vector of bytes representing the encoded 64-bit unsigned integer.
75///
76/// # Returns
77///
78/// * A 64-bit unsigned integer decoded from the input bytes.
79///
80/// # Panics
81///
82/// This function will panic if the input vector does not have exactly 8 bytes.
83pub fn decode_u64(bytes: &[u8]) -> Result<u64, Error> {
84    // Ensure the input vector has exactly 8 bytes
85    if bytes.len() != 8 {
86        return Err(Error::Drive(DriveError::CorruptedDriveState(format!(
87            "Trying to decode a u64 from {} bytes {}",
88            bytes.len(),
89            hex::encode(bytes)
90        ))));
91    }
92
93    let mut wtr = bytes.to_vec();
94
95    // Flip the sign bit back to its original state
96    // This reverses the transformation done in `encode_u64`
97    wtr[0] ^= 0b1000_0000;
98
99    // Read the integer from the modified bytes
100    // The bytes are in big endian form, which preserves the correct order
101    // when they were written in the encode function
102    Ok(BigEndian::read_u64(&wtr))
103}
104
105/// Encodes a signed integer on 64 bits.
106pub fn encode_i64(val: i64) -> Vec<u8> {
107    // Positive integers are represented in binary with the signed bit set to 0
108    // Negative integers are represented in 2's complement form
109
110    // Encode the integer in big endian form
111    // This ensures that most significant bits are compared first
112    // a bigger positive number would be greater than a smaller one
113    // and a bigger negative number would be greater than a smaller one
114    // maintains sort order for each domain
115    let mut wtr = vec![];
116    wtr.write_i64::<BigEndian>(val).unwrap();
117
118    // Flip the sign bit
119    // to deal with interaction between the domains
120    // 2's complement values have the sign bit set to 1
121    // this makes them greater than the positive domain in terms of sort order
122    // to fix this, we just flip the sign bit
123    // so positive integers have the high bit and negative integers have the low bit
124    // the relative order of elements in each domain is still maintained, as the
125    // change was uniform across all elements
126    wtr[0] ^= 0b1000_0000;
127
128    wtr
129}
130
131/// Encodes a float.
132pub fn encode_float(val: f64) -> Vec<u8> {
133    // Floats are represented based on the  IEEE 754-2008 standard
134    // [sign bit] [biased exponent] [mantissa]
135
136    // when comparing floats, the sign bit has the greatest impact
137    // any positive number is greater than all negative numbers
138    // if the numbers come from the same domain then the exponent is the next factor to consider
139    // the exponent gives a sense of how many digits are in the non fractional part of the number
140    // for example in base 10, 10 has an exponent of 1 (1.0 * 10^1)
141    // while 5000 (5.0 * 10^3) has an exponent of 3
142    // for the positive domain, the bigger the exponent the larger the number i.e 5000 > 10
143    // for the negative domain, the bigger the exponent the smaller the number i.e -10 > -5000
144    // if the exponents are the same, then the mantissa is used to determine the greater number
145    // the inverse relationship still holds
146    // i.e bigger mantissa (bigger number in positive domain but smaller number in negative domain)
147
148    // There are two things to fix to achieve total sort order
149    // 1. Place positive domain above negative domain (i.e flip the sign bit)
150    // 2. Exponent and mantissa for a smaller number like -5000 is greater than that of -10
151    //    so bit level comparison would say -5000 is greater than -10
152    //    we fix this by flipping the exponent and mantissa values, which has the effect of reversing
153    //    the order (0000 [smallest] -> 1111 [largest])
154
155    // Encode in big endian form, so most significant bits are compared first
156    let mut wtr = vec![];
157    wtr.write_f64::<BigEndian>(val).unwrap();
158
159    // Check if the value is negative, if it is
160    // flip all the bits i.e sign, exponent and mantissa
161    if val < 0.0 {
162        wtr = wtr.iter().map(|byte| !byte).collect();
163    } else {
164        // for positive values, just flip the sign bit
165        wtr[0] ^= 0b1000_0000;
166    }
167
168    wtr
169}
170
171/// Encodes an unsigned integer on 16 bits.
172pub fn encode_u16(val: u16) -> Vec<u8> {
173    // Positive integers are represented in binary with the signed bit set to 0
174    // Negative integers are represented in 2's complement form
175
176    // Encode the integer in big endian form
177    // This ensures that most significant bits are compared first
178    // a bigger positive number would be greater than a smaller one
179    // and a bigger negative number would be greater than a smaller one
180    // maintains sort order for each domain
181    let mut wtr = vec![];
182    wtr.write_u16::<BigEndian>(val).unwrap();
183
184    // Flip the sign bit
185    // to deal with interaction between the domains
186    // 2's complement values have the sign bit set to 1
187    // this makes them greater than the positive domain in terms of sort order
188    // to fix this, we just flip the sign bit
189    // so positive integers have the high bit and negative integers have the low bit
190    // the relative order of elements in each domain is still maintained, as the
191    // change was uniform across all elements
192    wtr[0] ^= 0b1000_0000;
193
194    wtr
195}
196
197/// Encodes an unsigned integer on 32 bits.
198pub fn encode_u32(val: u32) -> Vec<u8> {
199    // Positive integers are represented in binary with the signed bit set to 0
200    // Negative integers are represented in 2's complement form
201
202    // Encode the integer in big endian form
203    // This ensures that most significant bits are compared first
204    // a bigger positive number would be greater than a smaller one
205    // and a bigger negative number would be greater than a smaller one
206    // maintains sort order for each domain
207    let mut wtr = vec![];
208    wtr.write_u32::<BigEndian>(val).unwrap();
209
210    // Flip the sign bit
211    // to deal with interaction between the domains
212    // 2's complement values have the sign bit set to 1
213    // this makes them greater than the positive domain in terms of sort order
214    // to fix this, we just flip the sign bit
215    // so positive integers have the high bit and negative integers have the low bit
216    // the relative order of elements in each domain is still maintained, as the
217    // change was uniform across all elements
218    wtr[0] ^= 0b1000_0000;
219
220    wtr
221}
222
223#[cfg(test)]
224#[allow(clippy::approx_constant)]
225mod tests {
226    use super::*;
227
228    // --- encode_u64 / decode_u64 round-trip tests ---
229
230    #[test]
231    fn encode_decode_u64_zero() {
232        let encoded = encode_u64(0);
233        assert_eq!(encoded.len(), 8);
234        let decoded = decode_u64(&encoded).unwrap();
235        assert_eq!(decoded, 0);
236    }
237
238    #[test]
239    fn encode_decode_u64_one() {
240        let encoded = encode_u64(1);
241        let decoded = decode_u64(&encoded).unwrap();
242        assert_eq!(decoded, 1);
243    }
244
245    #[test]
246    fn encode_decode_u64_max() {
247        let encoded = encode_u64(u64::MAX);
248        let decoded = decode_u64(&encoded).unwrap();
249        assert_eq!(decoded, u64::MAX);
250    }
251
252    #[test]
253    fn encode_decode_u64_owned_round_trip() {
254        for val in [0u64, 1, 42, 1000, u64::MAX / 2, u64::MAX] {
255            let encoded = encode_u64(val);
256            let decoded = decode_u64_owned(encoded).unwrap();
257            assert_eq!(decoded, val);
258        }
259    }
260
261    #[test]
262    fn encode_u64_preserves_sort_order_in_positive_range() {
263        // The sign-bit flip means lexicographic ordering matches signed interpretation.
264        // Values in 0..=i64::MAX sort correctly among themselves.
265        let values = [0u64, 1, 2, 100, 1000, i64::MAX as u64];
266        let encoded: Vec<Vec<u8>> = values.iter().map(|&v| encode_u64(v)).collect();
267        for i in 0..encoded.len() - 1 {
268            assert!(
269                encoded[i] < encoded[i + 1],
270                "Sort order violated: encode_u64({}) >= encode_u64({})",
271                values[i],
272                values[i + 1]
273            );
274        }
275    }
276
277    #[test]
278    fn encode_u64_sign_bit_flip_makes_high_values_sort_lower() {
279        // Values above i64::MAX have the sign bit set in big-endian, so the flip
280        // clears it, making them sort below values in the 0..=i64::MAX range.
281        // This is the intended behavior: the encoding treats u64 as if it were i64.
282        let below_midpoint = encode_u64(100);
283        let above_midpoint = encode_u64(u64::MAX);
284        assert!(above_midpoint < below_midpoint);
285    }
286
287    #[test]
288    fn decode_u64_wrong_length_returns_error() {
289        assert!(decode_u64(&[]).is_err());
290        assert!(decode_u64(&[0; 7]).is_err());
291        assert!(decode_u64(&[0; 9]).is_err());
292        assert!(decode_u64(&[0; 1]).is_err());
293    }
294
295    #[test]
296    fn decode_u64_owned_wrong_length_returns_error() {
297        assert!(decode_u64_owned(vec![]).is_err());
298        assert!(decode_u64_owned(vec![0; 7]).is_err());
299        assert!(decode_u64_owned(vec![0; 9]).is_err());
300    }
301
302    // --- encode_i64 tests ---
303
304    #[test]
305    fn encode_i64_positive() {
306        let encoded = encode_i64(42);
307        assert_eq!(encoded.len(), 8);
308    }
309
310    #[test]
311    fn encode_i64_negative() {
312        let encoded = encode_i64(-42);
313        assert_eq!(encoded.len(), 8);
314    }
315
316    #[test]
317    fn encode_i64_zero() {
318        let encoded = encode_i64(0);
319        assert_eq!(encoded.len(), 8);
320    }
321
322    #[test]
323    fn encode_i64_preserves_sort_order() {
324        let values = [i64::MIN, -1000, -1, 0, 1, 1000, i64::MAX];
325        let encoded: Vec<Vec<u8>> = values.iter().map(|&v| encode_i64(v)).collect();
326        for i in 0..encoded.len() - 1 {
327            assert!(
328                encoded[i] < encoded[i + 1],
329                "Sort order violated: encode_i64({}) >= encode_i64({})",
330                values[i],
331                values[i + 1]
332            );
333        }
334    }
335
336    #[test]
337    fn encode_i64_negative_less_than_positive() {
338        let neg = encode_i64(-1);
339        let pos = encode_i64(1);
340        assert!(neg < pos);
341    }
342
343    // --- encode_float tests ---
344
345    #[test]
346    fn encode_float_positive() {
347        let encoded = encode_float(3.14);
348        assert_eq!(encoded.len(), 8);
349    }
350
351    #[test]
352    fn encode_float_negative() {
353        let encoded = encode_float(-3.14);
354        assert_eq!(encoded.len(), 8);
355    }
356
357    #[test]
358    fn encode_float_zero() {
359        let encoded = encode_float(0.0);
360        assert_eq!(encoded.len(), 8);
361    }
362
363    #[test]
364    fn encode_float_preserves_sort_order() {
365        let values = [-1000.0f64, -1.0, -0.001, 0.0, 0.001, 1.0, 1000.0];
366        let encoded: Vec<Vec<u8>> = values.iter().map(|&v| encode_float(v)).collect();
367        for i in 0..encoded.len() - 1 {
368            assert!(
369                encoded[i] < encoded[i + 1],
370                "Sort order violated: encode_float({}) >= encode_float({})",
371                values[i],
372                values[i + 1]
373            );
374        }
375    }
376
377    #[test]
378    fn encode_float_negative_less_than_positive() {
379        let neg = encode_float(-0.5);
380        let pos = encode_float(0.5);
381        assert!(neg < pos);
382    }
383
384    // --- encode_u16 tests ---
385
386    #[test]
387    fn encode_u16_basic() {
388        assert_eq!(encode_u16(0).len(), 2);
389        assert_eq!(encode_u16(u16::MAX).len(), 2);
390    }
391
392    #[test]
393    fn encode_u16_preserves_sort_order_in_positive_range() {
394        // Values in 0..=i16::MAX sort correctly after sign-bit flip.
395        let values = [0u16, 1, 100, 1000, i16::MAX as u16];
396        let encoded: Vec<Vec<u8>> = values.iter().map(|&v| encode_u16(v)).collect();
397        for i in 0..encoded.len() - 1 {
398            assert!(
399                encoded[i] < encoded[i + 1],
400                "Sort order violated: encode_u16({}) >= encode_u16({})",
401                values[i],
402                values[i + 1]
403            );
404        }
405    }
406
407    #[test]
408    fn encode_u16_sign_bit_flip_makes_high_values_sort_lower() {
409        let below = encode_u16(100);
410        let above = encode_u16(u16::MAX);
411        assert!(above < below);
412    }
413
414    // --- encode_u32 tests ---
415
416    #[test]
417    fn encode_u32_basic() {
418        assert_eq!(encode_u32(0).len(), 4);
419        assert_eq!(encode_u32(u32::MAX).len(), 4);
420    }
421
422    #[test]
423    fn encode_u32_preserves_sort_order_in_positive_range() {
424        // Values in 0..=i32::MAX sort correctly after sign-bit flip.
425        let values = [0u32, 1, 100, 10000, i32::MAX as u32];
426        let encoded: Vec<Vec<u8>> = values.iter().map(|&v| encode_u32(v)).collect();
427        for i in 0..encoded.len() - 1 {
428            assert!(
429                encoded[i] < encoded[i + 1],
430                "Sort order violated: encode_u32({}) >= encode_u32({})",
431                values[i],
432                values[i + 1]
433            );
434        }
435    }
436
437    #[test]
438    fn encode_u32_sign_bit_flip_makes_high_values_sort_lower() {
439        let below = encode_u32(100);
440        let above = encode_u32(u32::MAX);
441        assert!(above < below);
442    }
443}