Skip to content

Commit e9369fb

Browse files
committed
Expose copy_from_unchecked as append_utf32_unchecked and String::utf32_unchecked in String for high performance string copies. Expose append_wstring and String::wstring for platform strings.
1 parent ba34829 commit e9369fb

File tree

2 files changed

+42
-30
lines changed

2 files changed

+42
-30
lines changed

Diff for: core/string/ustring.cpp

+16-15
Original file line numberDiff line numberDiff line change
@@ -320,11 +320,13 @@ void String::append_latin1(const Span<char> &p_cstr) {
320320
*dst = 0;
321321
}
322322

323-
void String::append_utf32(const Span<char32_t> &p_cstr) {
323+
Error String::append_utf32(const Span<char32_t> &p_cstr) {
324324
if (p_cstr.is_empty()) {
325-
return;
325+
return OK;
326326
}
327327

328+
Error error = OK;
329+
328330
const int prev_length = length();
329331
resize(prev_length + p_cstr.size() + 1);
330332
const char32_t *src = p_cstr.ptr();
@@ -337,28 +339,27 @@ void String::append_utf32(const Span<char32_t> &p_cstr) {
337339
if ((chr & 0xfffff800) == 0xd800) {
338340
print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)chr), true);
339341
*dst = _replacement_char;
342+
error = ERR_PARSE_ERROR;
340343
continue;
341344
}
342345
if (chr > 0x10ffff) {
343346
print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)chr), true);
344347
*dst = _replacement_char;
348+
error = ERR_PARSE_ERROR;
345349
continue;
346350
}
347351
*dst = chr;
348352
}
349353
*dst = 0;
354+
return error;
350355
}
351356

352-
// assumes the following have already been validated:
353-
// p_char != nullptr
354-
// p_length > 0
355-
// p_length <= p_char strlen
356-
// p_char is a valid UTF32 string
357-
void String::copy_from_unchecked(const char32_t *p_char, const int p_length) {
358-
resize(p_length + 1); // + 1 for \0
359-
char32_t *dst = ptrw();
360-
memcpy(dst, p_char, p_length * sizeof(char32_t));
361-
*(dst + p_length) = _null;
357+
void String::append_utf32_unchecked(const Span<char32_t> &p_span) {
358+
const int prev_length = length();
359+
resize(prev_length + p_span.size() + 1); // + 1 for \0
360+
char32_t *dst = ptrw() + prev_length;
361+
memcpy(dst, p_span.ptr(), p_span.size() * sizeof(char32_t));
362+
*(dst + p_span.size()) = _null;
362363
}
363364

364365
String String::operator+(const String &p_str) const {
@@ -3182,7 +3183,7 @@ String String::substr(int p_from, int p_chars) const {
31823183
}
31833184

31843185
String s;
3185-
s.copy_from_unchecked(&get_data()[p_from], p_chars);
3186+
s.append_utf32_unchecked(Span(ptr() + p_from, p_chars));
31863187
return s;
31873188
}
31883189

@@ -4254,7 +4255,7 @@ String String::left(int p_len) const {
42544255
}
42554256

42564257
String s;
4257-
s.copy_from_unchecked(&get_data()[0], p_len);
4258+
s.append_utf32_unchecked(Span(ptr(), p_len));
42584259
return s;
42594260
}
42604261

@@ -4272,7 +4273,7 @@ String String::right(int p_len) const {
42724273
}
42734274

42744275
String s;
4275-
s.copy_from_unchecked(&get_data()[length() - p_len], p_len);
4276+
s.append_utf32_unchecked(Span(ptr() + length() - p_len, p_len));
42764277
return s;
42774278
}
42784279

Diff for: core/string/ustring.h

+26-15
Original file line numberDiff line numberDiff line change
@@ -245,27 +245,13 @@ class String {
245245
static const char32_t _null;
246246
static const char32_t _replacement_char;
247247

248-
// Known-length copy.
249-
void copy_from_unchecked(const char32_t *p_char, int p_length);
250-
251248
// NULL-terminated c string copy - automatically parse the string to find the length.
252249
void append_latin1(const char *p_cstr) {
253250
append_latin1(Span(p_cstr, p_cstr ? strlen(p_cstr) : 0));
254251
}
255252
void append_utf32(const char32_t *p_cstr) {
256253
append_utf32(Span(p_cstr, p_cstr ? strlen(p_cstr) : 0));
257254
}
258-
259-
// wchar_t copy_from depends on the platform.
260-
void append_wstring(const Span<wchar_t> &p_cstr) {
261-
#ifdef WINDOWS_ENABLED
262-
// wchar_t is 16-bit, parse as UTF-16
263-
append_utf16((const char16_t *)p_cstr.ptr(), p_cstr.size());
264-
#else
265-
// wchar_t is 32-bit, copy directly
266-
append_utf32((Span<char32_t> &)p_cstr);
267-
#endif
268-
}
269255
void append_wstring(const wchar_t *p_cstr) {
270256
#ifdef WINDOWS_ENABLED
271257
// wchar_t is 16-bit, parse as UTF-16
@@ -527,13 +513,38 @@ class String {
527513
static String utf16(const char16_t *p_utf16, int p_len = -1);
528514
static String utf16(const Span<char16_t> &p_range) { return utf16(p_range.ptr(), p_range.size()); }
529515

530-
void append_utf32(const Span<char32_t> &p_cstr);
516+
// wchar_t copy_from depends on the platform.
517+
Error append_wstring(const Span<wchar_t> &p_cstr) {
518+
#ifdef WINDOWS_ENABLED
519+
// wchar_t is 16-bit, parse as UTF-16
520+
return append_utf16((const char16_t *)p_cstr.ptr(), p_cstr.size());
521+
#else
522+
// wchar_t is 32-bit, copy directly
523+
return append_utf32((Span<char32_t> &)p_cstr);
524+
#endif
525+
}
526+
static String wstring(const Span<wchar_t> &p_string) {
527+
String string;
528+
string.append_wstring(p_string);
529+
return string;
530+
}
531+
532+
Error append_utf32(const Span<char32_t> &p_cstr);
531533
static String utf32(const Span<char32_t> &p_span) {
532534
String string;
533535
string.append_utf32(p_span);
534536
return string;
535537
}
536538

539+
// Like append_utf32, but does not check the string for string integrity (and is thus faster).
540+
// Prefer this function for conversion from trusted utf32 strings.
541+
void append_utf32_unchecked(const Span<char32_t> &p_span);
542+
static String utf32_unchecked(const Span<char32_t> &p_string) {
543+
String string;
544+
string.append_utf32_unchecked(p_string);
545+
return string;
546+
}
547+
537548
static uint32_t hash(const char32_t *p_cstr, int p_len); /* hash the string */
538549
static uint32_t hash(const char32_t *p_cstr); /* hash the string */
539550
static uint32_t hash(const wchar_t *p_cstr, int p_len); /* hash the string */

0 commit comments

Comments
 (0)