Skip to content

Commit 83dd4aa

Browse files
committed
CanonicalizationScheme enum type
1 parent 5689cfc commit 83dd4aa

File tree

2 files changed

+72
-32
lines changed

2 files changed

+72
-32
lines changed

ciborium/src/ser/mod.rs

Lines changed: 70 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,40 @@ use ciborium_io::Write;
1111
use ciborium_ll::*;
1212
use serde::{ser, Serialize as _};
1313

14+
/// Which canonicalization scheme to use for CBOR serialization.
15+
#[cfg(feature = "std")]
16+
pub enum CanonicalizationScheme {
17+
/// No canonicalization, don't sort map keys. Faster and reduces allocations.
18+
None,
19+
20+
/// Sort map keys in output according to [RFC 7049]'s deterministic encoding spec.
21+
///
22+
/// Also aligns with [RFC 8949 4.2.3]'s backwards compatibility sort order.
23+
///
24+
/// Uses length-first map key ordering. Eg. `["a", "b", "aa"]`.
25+
Rfc7049,
26+
27+
/// Sort map keys in output according to [RFC 8949]'s deterministic encoding spec.
28+
///
29+
/// Uses bytewise lexicographic map key ordering. Eg. `["a", "aa", "b"]`.
30+
Rfc8049,
31+
}
32+
33+
#[cfg(feature = "std")]
34+
impl CanonicalizationScheme {
35+
/// Does this canonicalisation scheme require sorting of keys.
36+
pub fn is_sorting(&self) -> bool {
37+
matches!(self, Self::Rfc7049 | Self::Rfc8049)
38+
}
39+
40+
// pub fn key<K: serde::Serialize>(&self, key: &K) -> Result<Vec<u8>, Error<std::io::Error>> {
41+
// let mut buffer = Vec::new();
42+
// let mut serializer = Serializer::new(&mut buffer, true);
43+
// key.serialize(&mut serializer)?;
44+
// Ok(buffer)
45+
// }
46+
}
47+
1448
/// A serializer for CBOR.
1549
pub struct Serializer<W> {
1650
encoder: Encoder<W>,
@@ -20,7 +54,7 @@ pub struct Serializer<W> {
2054
///
2155
/// [RFC 8949]: https://www.rfc-editor.org/rfc/rfc8949.html#name-deterministically-encoded-c
2256
#[cfg(feature = "std")]
23-
canonical: bool,
57+
canonicalization: CanonicalizationScheme,
2458
}
2559

2660
impl<W: Write> Serializer<W> {
@@ -31,10 +65,10 @@ impl<W: Write> Serializer<W> {
3165
///
3266
/// [RFC 8949]: https://www.rfc-editor.org/rfc/rfc8949.html#name-deterministically-encoded-c
3367
#[cfg(feature = "std")]
34-
pub fn new(encoder: impl Into<Encoder<W>>, canonical: bool) -> Self {
68+
pub fn new(encoder: impl Into<Encoder<W>>, canonicalization: CanonicalizationScheme) -> Self {
3569
Self {
3670
encoder: encoder.into(),
37-
canonical
71+
canonicalization
3872
}
3973
}
4074
}
@@ -45,7 +79,7 @@ impl<W: Write> From<W> for Serializer<W> {
4579
Self {
4680
encoder: writer.into(),
4781
#[cfg(feature = "std")]
48-
canonical: false,
82+
canonicalization: CanonicalizationScheme::None,
4983
}
5084
}
5185
}
@@ -56,7 +90,7 @@ impl<W: Write> From<Encoder<W>> for Serializer<W> {
5690
Self {
5791
encoder: writer,
5892
#[cfg(feature = "std")]
59-
canonical: false,
93+
canonicalization: CanonicalizationScheme::None,
6094
}
6195
}
6296
}
@@ -335,7 +369,7 @@ macro_rules! end {
335369
() => {
336370
#[inline]
337371
fn end(self) -> Result<(), Self::Error> {
338-
if self.ending {
372+
if self.indefinite {
339373
self.serializer.encoder.push(Header::Break)?;
340374
}
341375

@@ -350,27 +384,30 @@ macro_rules! end_map {
350384
#[inline]
351385
fn end(self) -> Result<(), Self::Error> {
352386
#[cfg(feature = "std")]
353-
if self.serializer.canonical {
387+
if matches!(self.serializer.canonicalization, CanonicalizationScheme::Rfc8049) {
354388
// keys get sorted in lexicographical byte order
355389
let keys = self.cache_keys;
356390
let values = self.cache_values;
357391

358392
debug_assert_eq!(
359393
keys.len(), values.len(),
360-
"ciborium error: canonicalization failed, keys and values must have same length.");
394+
"ciborium error: canonicalization failed, different number of keys and values?");
361395

362-
let mut pairs = std::collections::BTreeMap::new();
363-
for (key, value) in keys.iter().zip(values.iter()) {
364-
pairs.insert(key, value);
365-
}
396+
let pairs: std::collections::BTreeMap<_, _> =
397+
keys.iter().zip(values.iter()).collect();
366398

367-
for (key, value) in pairs {
399+
for (key, value) in pairs.iter() {
368400
self.serializer.encoder.write_all(&key)?;
369401
self.serializer.encoder.write_all(&value)?;
370402
}
371403
}
372404

373-
if self.ending {
405+
#[cfg(feature = "std")]
406+
if matches!(self.serializer.canonicalization, CanonicalizationScheme::Rfc7049) {
407+
unimplemented!("rfc7049 canonicalization not yet implemented");
408+
}
409+
410+
if self.indefinite {
374411
self.serializer.encoder.push(Header::Break)?;
375412
}
376413

@@ -385,7 +422,9 @@ macro_rules! end_map {
385422
#[doc(hidden)]
386423
pub struct CollectionSerializer<'a, W> {
387424
serializer: &'a mut Serializer<W>,
388-
ending: bool,
425+
426+
/// Whether the collection is indefinite length. Cannot be used with canonical serialization.
427+
indefinite: bool,
389428
tag: bool,
390429

391430
#[cfg(feature = "std")]
@@ -395,21 +434,20 @@ pub struct CollectionSerializer<'a, W> {
395434
}
396435

397436
impl<'a, W> CollectionSerializer<'a, W> {
398-
pub fn new(serializer: &'a mut Serializer<W>, ending: bool, tag: bool) -> Self {
437+
pub fn new(serializer: &'a mut Serializer<W>, indefinite: bool, tag: bool) -> Self {
399438
#[cfg(feature = "std")]
400-
let capacity = match serializer.canonical {
401-
true => 4,
402-
false => 0,
403-
};
439+
assert!(
440+
!(serializer.canonicalization.is_sorting() && indefinite),
441+
"ciborium error: canonical mode cannot be used with indefinite length collections");
404442

405443
Self {
406444
serializer,
407-
ending,
445+
indefinite,
408446
tag,
409447
#[cfg(feature = "std")]
410-
cache_keys: Vec::with_capacity(capacity),
448+
cache_keys: Vec::with_capacity(0),
411449
#[cfg(feature = "std")]
412-
cache_values: Vec::with_capacity(capacity),
450+
cache_values: Vec::with_capacity(0),
413451
}
414452
}
415453
}
@@ -504,7 +542,7 @@ where
504542
#[inline]
505543
fn serialize_key<U: ?Sized + ser::Serialize>(&mut self, key: &U) -> Result<(), Self::Error> {
506544
#[cfg(feature = "std")]
507-
if self.serializer.canonical {
545+
if self.serializer.canonicalization.is_sorting() {
508546
let key_bytes = to_vec(key).map_err(|e| Error::Value(e.to_string()))?;
509547
self.cache_keys.push(key_bytes);
510548
return Ok(());
@@ -519,7 +557,7 @@ where
519557
value: &U,
520558
) -> Result<(), Self::Error> {
521559
#[cfg(feature = "std")]
522-
if self.serializer.canonical {
560+
if self.serializer.canonicalization.is_sorting() {
523561
let value_bytes = to_vec(value).map_err(|e| Error::Value(e.to_string()))?;
524562
self.cache_values.push(value_bytes);
525563
return Ok(());
@@ -545,7 +583,7 @@ where
545583
value: &U,
546584
) -> Result<(), Self::Error> {
547585
#[cfg(feature = "std")]
548-
if self.serializer.canonical {
586+
if self.serializer.canonicalization.is_sorting() {
549587
let key_bytes = to_vec(key).map_err(|e| Error::Value(e.to_string()))?;
550588
self.cache_keys.push(key_bytes);
551589
let value_bytes = to_vec(value).map_err(|e| Error::Value(e.to_string()))?;
@@ -575,7 +613,7 @@ where
575613
value: &U,
576614
) -> Result<(), Self::Error> {
577615
#[cfg(feature = "std")]
578-
if self.serializer.canonical {
616+
if self.serializer.canonicalization.is_sorting() {
579617
let key_bytes = to_vec(key).map_err(|e| Error::Value(e.to_string()))?;
580618
self.cache_keys.push(key_bytes);
581619
let value_bytes = to_vec(value).map_err(|e| Error::Value(e.to_string()))?;
@@ -595,7 +633,7 @@ where
595633
#[inline]
596634
pub fn to_vec<T: ?Sized + ser::Serialize>(value: &T) -> Result<Vec<u8>, Error<std::io::Error>> {
597635
let mut buffer = std::vec::Vec::with_capacity(1024);
598-
let mut serializer = Serializer::new(&mut buffer, false);
636+
let mut serializer = Serializer::new(&mut buffer, CanonicalizationScheme::None);
599637
value.serialize(&mut serializer)?;
600638
Ok(buffer)
601639
}
@@ -607,9 +645,9 @@ pub fn to_vec<T: ?Sized + ser::Serialize>(value: &T) -> Result<Vec<u8>, Error<st
607645
/// [RFC 8949]: https://www.rfc-editor.org/rfc/rfc8949.html#name-deterministically-encoded-c
608646
#[cfg(feature = "std")]
609647
#[inline]
610-
pub fn to_vec_canonical<T: ?Sized + ser::Serialize>(value: &T) -> Result<Vec<u8>, Error<std::io::Error>> {
648+
pub fn to_vec_canonical<T: ?Sized + ser::Serialize>(value: &T, scheme: CanonicalizationScheme) -> Result<Vec<u8>, Error<std::io::Error>> {
611649
let mut buffer = std::vec::Vec::with_capacity(1024);
612-
let mut serializer = Serializer::new(&mut buffer, true);
650+
let mut serializer = Serializer::new(&mut buffer, scheme);
613651
value.serialize(&mut serializer)?;
614652
Ok(buffer)
615653
}
@@ -637,10 +675,11 @@ where
637675
pub fn into_writer_canonical<T: ?Sized + ser::Serialize, W: Write>(
638676
value: &T,
639677
writer: W,
678+
scheme: CanonicalizationScheme,
640679
) -> Result<(), Error<W::Error>>
641680
where
642681
W::Error: core::fmt::Debug,
643682
{
644-
let mut encoder = Serializer::new(writer, true);
683+
let mut encoder = Serializer::new(writer, scheme);
645684
value.serialize(&mut encoder)
646685
}

ciborium/tests/canonical.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use ciborium::tag::Required;
77
use ciborium::value::CanonicalValue;
88
use rand::prelude::*;
99
use std::collections::BTreeMap;
10+
use ciborium::ser::CanonicalizationScheme;
1011

1112
macro_rules! cval {
1213
($x:expr) => {
@@ -81,7 +82,7 @@ fn map_canonical() {
8182
map.insert(cval!("z"), val!(4));
8283
map.insert(cval!("aa"), val!(6));
8384

84-
let bytes1 = ciborium::ser::to_vec_canonical(&map).unwrap();
85+
let bytes1 = ciborium::ser::to_vec_canonical(&map, CanonicalizationScheme::Rfc8049).unwrap();
8586

8687
assert_eq!(
8788
hex::encode(&bytes1),

0 commit comments

Comments
 (0)