Skip to content

Commit b049dfc

Browse files
authored
perf(ext/web): optimize TextEncoder encodeInto result (#34055)
Optimizes `TextEncoder.encodeInto()` by returning the common `read` / `written` result as a packed number from the op, instead of writing those two counters through the shared `Uint32Array` out-buffer on every call. For very large results that cannot be represented exactly in the packed number format, the implementation falls back to the previous out-buffer path. This is roughly a 12% improvement in the short-string encodeInto path.
1 parent 2cd5aac commit b049dfc

2 files changed

Lines changed: 66 additions & 11 deletions

File tree

ext/web/08_text_encoding.js

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,15 @@ const {
2121
op_encoding_decode_single,
2222
op_encoding_decode_utf8,
2323
op_encoding_encode_into,
24+
op_encoding_encode_into_fallback,
2425
op_encoding_new_decoder,
2526
op_encoding_normalize_label,
2627
} = core.ops;
2728
const {
2829
DataViewPrototypeGetBuffer,
2930
DataViewPrototypeGetByteLength,
3031
DataViewPrototypeGetByteOffset,
32+
MathTrunc,
3133
ObjectPrototypeIsPrototypeOf,
3234
PromiseReject,
3335
PromiseResolve,
@@ -298,10 +300,18 @@ class TextEncoder {
298300
encodeIntoOpts,
299301
);
300302
}
301-
op_encoding_encode_into(source, destination, encodeIntoBuf);
303+
const packed = op_encoding_encode_into(source, destination);
304+
if (packed === ENCODE_INTO_PACKED_SENTINEL) {
305+
op_encoding_encode_into_fallback(source, destination, encodeIntoBuf);
306+
return {
307+
read: encodeIntoBuf[0],
308+
written: encodeIntoBuf[1],
309+
};
310+
}
311+
const read = MathTrunc(packed / ENCODE_INTO_PACKED_MULTIPLIER);
302312
return {
303-
read: encodeIntoBuf[0],
304-
written: encodeIntoBuf[1],
313+
read,
314+
written: packed - read * ENCODE_INTO_PACKED_MULTIPLIER,
305315
};
306316
}
307317

@@ -319,6 +329,8 @@ class TextEncoder {
319329

320330
const encodeIntoBuf = new Uint32Array(2);
321331
const encodeIntoOpts = { __proto__: null, allowShared: true };
332+
const ENCODE_INTO_PACKED_SENTINEL = -1;
333+
const ENCODE_INTO_PACKED_MULTIPLIER = 0x100000000;
322334

323335
webidl.configureInterface(TextEncoder);
324336
const TextEncoderPrototype = TextEncoder.prototype;

ext/web/lib.rs

Lines changed: 51 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ deno_core::extension!(deno_web,
7777
op_encoding_new_decoder,
7878
op_encoding_decode,
7979
op_encoding_encode_into,
80+
op_encoding_encode_into_fallback,
8081
op_blob_create_part,
8182
op_blob_slice_part,
8283
op_blob_read_part,
@@ -682,11 +683,49 @@ unsafe impl deno_core::GarbageCollected for TextDecoderResource {
682683
}
683684
}
684685

686+
const ENCODE_INTO_PACKED_SENTINEL: f64 = -1.0;
687+
const ENCODE_INTO_MAX_PACKED_READ: usize = (1 << 21) - 1;
688+
const ENCODE_INTO_PACKED_MULTIPLIER: f64 = (1u64 << 32) as f64;
689+
690+
#[inline]
691+
fn pack_encode_into_result(read: usize, written: usize) -> f64 {
692+
debug_assert!(read <= ENCODE_INTO_MAX_PACKED_READ);
693+
debug_assert!(written <= u32::MAX as usize);
694+
(read as f64) * ENCODE_INTO_PACKED_MULTIPLIER + written as f64
695+
}
696+
685697
#[op2(fast(op_encoding_encode_into_fast))]
686698
fn op_encoding_encode_into(
687699
scope: &mut v8::PinScope<'_, '_>,
688700
input: v8::Local<v8::Value>,
689701
#[buffer] buffer: &mut [u8],
702+
) -> Result<f64, WebError> {
703+
let s = v8::Local::<v8::String>::try_from(input)?;
704+
705+
if s.length() > ENCODE_INTO_MAX_PACKED_READ
706+
&& buffer.len() > ENCODE_INTO_MAX_PACKED_READ
707+
{
708+
return Ok(ENCODE_INTO_PACKED_SENTINEL);
709+
}
710+
711+
let mut nchars = 0;
712+
let len = s.write_utf8_v2(
713+
scope,
714+
buffer,
715+
v8::WriteFlags::kReplaceInvalidUtf8,
716+
Some(&mut nchars),
717+
);
718+
719+
debug_assert!(nchars <= ENCODE_INTO_MAX_PACKED_READ);
720+
debug_assert!(len <= u32::MAX as usize);
721+
Ok(pack_encode_into_result(nchars, len))
722+
}
723+
724+
#[op2(fast)]
725+
fn op_encoding_encode_into_fallback(
726+
scope: &mut v8::PinScope<'_, '_>,
727+
input: v8::Local<v8::Value>,
728+
#[buffer] buffer: &mut [u8],
690729
#[buffer] out_buf: &mut [u32],
691730
) -> Result<(), WebError> {
692731
let s = v8::Local::<v8::String>::try_from(input)?;
@@ -707,8 +746,7 @@ fn op_encoding_encode_into(
707746
fn op_encoding_encode_into_fast(
708747
#[string] input: Cow<'_, str>,
709748
#[buffer] buffer: &mut [u8],
710-
#[buffer] out_buf: &mut [u32],
711-
) {
749+
) -> f64 {
712750
// Since `input` is already UTF-8, we can simply find the last UTF-8 code
713751
// point boundary from input that fits in `buffer`, and copy the bytes up to
714752
// that point.
@@ -730,16 +768,21 @@ fn op_encoding_encode_into_fast(
730768
boundary
731769
};
732770

733-
buffer[..boundary].copy_from_slice(input[..boundary].as_bytes());
734-
735771
// The `read` output parameter is measured in UTF-16 code units.
736-
out_buf[0] = match input {
772+
let read = match input {
737773
// Borrowed Cow strings are zero-copy views into the V8 heap.
738774
// Thus, they are guarantee to be SeqOneByteString.
739-
Cow::Borrowed(v) => v[..boundary].len() as u32,
740-
Cow::Owned(v) => v[..boundary].encode_utf16().count() as u32,
775+
Cow::Borrowed(v) => v[..boundary].len(),
776+
Cow::Owned(ref v) => v[..boundary].encode_utf16().count(),
741777
};
742-
out_buf[1] = boundary as u32;
778+
779+
if read > ENCODE_INTO_MAX_PACKED_READ || boundary > u32::MAX as usize {
780+
return ENCODE_INTO_PACKED_SENTINEL;
781+
}
782+
783+
buffer[..boundary].copy_from_slice(input[..boundary].as_bytes());
784+
785+
pack_encode_into_result(read, boundary)
743786
}
744787

745788
pub struct Location(pub Url);

0 commit comments

Comments
 (0)