Skip to content

Commit 9327a14

Browse files
authored
Merge pull request #81 from elrafoon/ser_collect_str
Serializer: Implement collect_str
2 parents e695aa6 + e13c3f8 commit 9327a14

File tree

2 files changed

+93
-57
lines changed

2 files changed

+93
-57
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
1111

1212
- Support for optional package `defmt` which allows for easy conversion for
1313
error types when using tools like `probe-rs` for logging over debuggers.
14+
- Implement `Serializer::collect_str`
1415

1516
### Changed
1617

src/ser/mod.rs

+92-57
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,66 @@ impl<'a> Serializer<'a> {
9494
Ok(())
9595
}
9696
}
97+
98+
fn push_char(&mut self, c: char) -> Result<()> {
99+
// Do escaping according to "6. MUST represent all strings (including object member names) in
100+
// their minimal-length UTF-8 encoding": https://gibson042.github.io/canonicaljson-spec/
101+
//
102+
// We don't need to escape lone surrogates because surrogate pairs do not exist in valid UTF-8,
103+
// even if they can exist in JSON or JavaScript strings (UCS-2 based). As a result, lone surrogates
104+
// cannot exist in a Rust String. If they do, the bug is in the String constructor.
105+
// An excellent explanation is available at https://www.youtube.com/watch?v=HhIEDWmQS3w
106+
107+
// Temporary storage for encoded a single char.
108+
// A char is up to 4 bytes long wehn encoded to UTF-8.
109+
let mut encoding_tmp = [0u8; 4];
110+
111+
match c {
112+
'\\' => {
113+
self.push(b'\\')?;
114+
self.push(b'\\')?;
115+
}
116+
'"' => {
117+
self.push(b'\\')?;
118+
self.push(b'"')?;
119+
}
120+
'\u{0008}' => {
121+
self.push(b'\\')?;
122+
self.push(b'b')?;
123+
}
124+
'\u{0009}' => {
125+
self.push(b'\\')?;
126+
self.push(b't')?;
127+
}
128+
'\u{000A}' => {
129+
self.push(b'\\')?;
130+
self.push(b'n')?;
131+
}
132+
'\u{000C}' => {
133+
self.push(b'\\')?;
134+
self.push(b'f')?;
135+
}
136+
'\u{000D}' => {
137+
self.push(b'\\')?;
138+
self.push(b'r')?;
139+
}
140+
'\u{0000}'..='\u{001F}' => {
141+
self.push(b'\\')?;
142+
self.push(b'u')?;
143+
self.push(b'0')?;
144+
self.push(b'0')?;
145+
let (hex1, hex2) = hex(c as u8);
146+
self.push(hex1)?;
147+
self.push(hex2)?;
148+
}
149+
_ => {
150+
let encoded = c.encode_utf8(&mut encoding_tmp as &mut [u8]);
151+
self.extend_from_slice(encoded.as_bytes())?;
152+
}
153+
}
154+
155+
Ok(())
156+
}
97157
}
98158

99159
// NOTE(serialize_*signed) This is basically the numtoa implementation minus the lookup tables,
@@ -263,62 +323,8 @@ impl<'a, 'b: 'a> ser::Serializer for &'a mut Serializer<'b> {
263323
fn serialize_str(self, v: &str) -> Result<Self::Ok> {
264324
self.push(b'"')?;
265325

266-
// Do escaping according to "6. MUST represent all strings (including object member names) in
267-
// their minimal-length UTF-8 encoding": https://gibson042.github.io/canonicaljson-spec/
268-
//
269-
// We don't need to escape lone surrogates because surrogate pairs do not exist in valid UTF-8,
270-
// even if they can exist in JSON or JavaScript strings (UCS-2 based). As a result, lone surrogates
271-
// cannot exist in a Rust String. If they do, the bug is in the String constructor.
272-
// An excellent explanation is available at https://www.youtube.com/watch?v=HhIEDWmQS3w
273-
274-
// Temporary storage for encoded a single char.
275-
// A char is up to 4 bytes long wehn encoded to UTF-8.
276-
let mut encoding_tmp = [0u8; 4];
277-
278326
for c in v.chars() {
279-
match c {
280-
'\\' => {
281-
self.push(b'\\')?;
282-
self.push(b'\\')?;
283-
}
284-
'"' => {
285-
self.push(b'\\')?;
286-
self.push(b'"')?;
287-
}
288-
'\u{0008}' => {
289-
self.push(b'\\')?;
290-
self.push(b'b')?;
291-
}
292-
'\u{0009}' => {
293-
self.push(b'\\')?;
294-
self.push(b't')?;
295-
}
296-
'\u{000A}' => {
297-
self.push(b'\\')?;
298-
self.push(b'n')?;
299-
}
300-
'\u{000C}' => {
301-
self.push(b'\\')?;
302-
self.push(b'f')?;
303-
}
304-
'\u{000D}' => {
305-
self.push(b'\\')?;
306-
self.push(b'r')?;
307-
}
308-
'\u{0000}'..='\u{001F}' => {
309-
self.push(b'\\')?;
310-
self.push(b'u')?;
311-
self.push(b'0')?;
312-
self.push(b'0')?;
313-
let (hex1, hex2) = hex(c as u8);
314-
self.push(hex1)?;
315-
self.push(hex2)?;
316-
}
317-
_ => {
318-
let encoded = c.encode_utf8(&mut encoding_tmp as &mut [u8]);
319-
self.extend_from_slice(encoded.as_bytes())?;
320-
}
321-
}
327+
self.push_char(c)?;
322328
}
323329

324330
self.push(b'"')
@@ -434,11 +440,40 @@ impl<'a, 'b: 'a> ser::Serializer for &'a mut Serializer<'b> {
434440
Ok(SerializeStructVariant::new(self))
435441
}
436442

437-
fn collect_str<T: ?Sized>(self, _value: &T) -> Result<Self::Ok>
443+
fn collect_str<T: ?Sized>(self, value: &T) -> Result<Self::Ok>
438444
where
439445
T: fmt::Display,
440446
{
441-
unreachable!()
447+
self.push(b'"')?;
448+
449+
let mut col = StringCollector::new(self);
450+
fmt::write(&mut col, format_args!("{}", value)).or(Err(Error::BufferFull))?;
451+
452+
self.push(b'"')
453+
}
454+
}
455+
456+
struct StringCollector<'a, 'b> {
457+
ser: &'a mut Serializer<'b>,
458+
}
459+
460+
impl<'a, 'b> StringCollector<'a, 'b> {
461+
pub fn new(ser: &'a mut Serializer<'b>) -> Self {
462+
Self { ser }
463+
}
464+
465+
fn do_write_str(&mut self, s: &str) -> Result<()> {
466+
for c in s.chars() {
467+
self.ser.push_char(c)?;
468+
}
469+
470+
Ok(())
471+
}
472+
}
473+
474+
impl<'a, 'b> fmt::Write for StringCollector<'a, 'b> {
475+
fn write_str(&mut self, s: &str) -> fmt::Result {
476+
self.do_write_str(s).or(Err(fmt::Error))
442477
}
443478
}
444479

0 commit comments

Comments
 (0)