Conversation
|
Thank you for submitting a Pull Request (PR) to the LLVM Project! This PR will be automatically labeled and the relevant teams will be notified. If you wish to, you can add reviewers by using the "Reviewers" section on this page. If this is not working for you, it is probably because you do not have write permissions for the repository. In which case you can instead tag reviewers by name in a comment by using If you have received no comments on your PR for a week, you can request a review by "ping"ing the PR by adding a comment “Ping”. The common courtesy "ping" rate is once a week. Please remember that you are asking for valuable time from other developers. If you have further questions, they may be answered by the LLVM GitHub User Guide. You can also ask questions in a comment on this PR, on the LLVM Discord or on the forums. |
|
@llvm/pr-subscribers-libc Author: Arya C S (AryaCS111) ChangesThis patch adds wcsxfrm to LLVM libc. It includes:
The current implementation provides C/POSIX-style behavior and does not yet add locale-aware collation support. Tested with:
Notes:
Fixes #191074 Full diff: https://github.com/llvm/llvm-project/pull/191692.diff 7 Files Affected:
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index b43ec51d1ece8..f45a7fe5e59f4 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -398,6 +398,7 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.wchar.wmemset
libc.src.wchar.wcschr
libc.src.wchar.wcsncmp
+ libc.src.wchar.wcsxfrm
libc.src.wchar.wcscmp
libc.src.wchar.wcspbrk
libc.src.wchar.wcsrchr
diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml
index 1bd829dc5efd6..6d6fc26c99fe9 100644
--- a/libc/include/wchar.yaml
+++ b/libc/include/wchar.yaml
@@ -354,3 +354,11 @@ functions:
arguments:
- type: const wchar_t *__restrict
- type: wchar_t **__restrict
+ - name: wcsxfrm
+ standards:
+ - stdc
+ return_type: size_t
+ arguments:
+ - type: wchar_t *__restrict
+ - type: const wchar_t *__restrict
+ - type: size_t
diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt
index ce57199b0837a..34283b4632c1d 100644
--- a/libc/src/wchar/CMakeLists.txt
+++ b/libc/src/wchar/CMakeLists.txt
@@ -543,3 +543,11 @@ add_entrypoint_object(
libc.hdr.types.size_t
libc.hdr.wchar_macros
)
+
+add_entrypoint_object(
+ wcsxfrm
+ SRCS
+ wcsxfrm.cpp
+ HDRS
+ wcsxfrm.h
+)
diff --git a/libc/src/wchar/wcsxfrm.cpp b/libc/src/wchar/wcsxfrm.cpp
new file mode 100644
index 0000000000000..6792540b94f23
--- /dev/null
+++ b/libc/src/wchar/wcsxfrm.cpp
@@ -0,0 +1,51 @@
+//===-- Implementation of wcsxfrm ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/wcsxfrm.h"
+
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+// TODO: Add support for locale-aware collation keys.
+// For now, this implements C/POSIX-like behavior: the transformed form is the
+// original wide string itself, so comparing transformed strings with wcscmp
+// matches code-point order.
+LLVM_LIBC_FUNCTION(size_t, wcsxfrm,
+ (wchar_t *__restrict dest, const wchar_t *__restrict src,
+ size_t n)) {
+ // Number of source characters that may be written before the trailing NUL.
+ const size_t write_limit = n > 0 ? n - 1 : 0;
+
+ size_t i = 0;
+
+ // Single pass over the prefix we might need to copy.
+ // This avoids a full wcslen(src) pass for the common case where the source
+ // fits in the destination buffer.
+ for (; i < write_limit; ++i) {
+ const wchar_t ch = src[i];
+ if (ch == L'\0') {
+ dest[i] = L'\0';
+ return i;
+ }
+ dest[i] = ch;
+ }
+
+ // If n > 0, always NUL-terminate. This is correct both when truncating and
+ // when write_limit == 0 (i.e. n == 1).
+ if (n > 0)
+ dest[write_limit] = L'\0';
+
+ // Finish counting the remaining source length if we truncated or if n == 0.
+ while (src[i] != L'\0')
+ ++i;
+
+ return i;
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/wcsxfrm.h b/libc/src/wchar/wcsxfrm.h
new file mode 100644
index 0000000000000..653633227f1f5
--- /dev/null
+++ b/libc/src/wchar/wcsxfrm.h
@@ -0,0 +1,16 @@
+#ifndef LLVM_LIBC_SRC_WCHAR_WCSXFRM_H
+#define LLVM_LIBC_SRC_WCHAR_WCSXFRM_H
+
+#include "src/__support/macros/config.h"
+
+#include <stddef.h>
+#include <wchar.h>
+
+namespace LIBC_NAMESPACE_DECL {
+
+size_t wcsxfrm(wchar_t *__restrict dest, const wchar_t *__restrict src,
+ size_t n);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_WCHAR_WCSXFRM_H
diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt
index 7a7cfaee7f367..50359a37e212c 100644
--- a/libc/test/src/wchar/CMakeLists.txt
+++ b/libc/test/src/wchar/CMakeLists.txt
@@ -517,3 +517,13 @@ add_libc_test(
libc.src.wchar.wcstold
libc.test.UnitTest.ErrnoCheckingTest
)
+
+add_libc_unittest(
+ wcsxfrm_test
+ SUITE
+ libc_wchar_unittests
+ SRCS
+ wcsxfrm_test.cpp
+ DEPENDS
+ libc.src.wchar.wcsxfrm
+)
diff --git a/libc/test/src/wchar/wcsxfrm_test.cpp b/libc/test/src/wchar/wcsxfrm_test.cpp
new file mode 100644
index 0000000000000..5470d3bb79f71
--- /dev/null
+++ b/libc/test/src/wchar/wcsxfrm_test.cpp
@@ -0,0 +1,98 @@
+//===-- Unittests for wcsxfrm --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/wcsxfrm.h"
+#include "test/UnitTest/Test.h"
+
+#define EXPECT_WCHAR_EQ(ACTUAL, EXPECTED) \
+ EXPECT_EQ(static_cast<int>(ACTUAL), static_cast<int>(EXPECTED))
+
+TEST(LlvmLibcWCSXfrmTest, EmptyString) {
+ wchar_t dest[8];
+ size_t result = LIBC_NAMESPACE::wcsxfrm(dest, L"", 8);
+
+ EXPECT_EQ(result, size_t(0));
+ EXPECT_WCHAR_EQ(dest[0], L'\0');
+}
+
+TEST(LlvmLibcWCSXfrmTest, NullDestinationWhenCountIsZero) {
+ size_t result = LIBC_NAMESPACE::wcsxfrm(nullptr, L"abc", 0);
+ EXPECT_EQ(result, size_t(3));
+}
+
+TEST(LlvmLibcWCSXfrmTest, CopiesWholeStringWhenBufferIsLargeEnough) {
+ wchar_t dest[16];
+ size_t result = LIBC_NAMESPACE::wcsxfrm(dest, L"hello", 16);
+
+ EXPECT_EQ(result, size_t(5));
+ EXPECT_WCHAR_EQ(dest[0], L'h');
+ EXPECT_WCHAR_EQ(dest[1], L'e');
+ EXPECT_WCHAR_EQ(dest[2], L'l');
+ EXPECT_WCHAR_EQ(dest[3], L'l');
+ EXPECT_WCHAR_EQ(dest[4], L'o');
+ EXPECT_WCHAR_EQ(dest[5], L'\0');
+}
+
+TEST(LlvmLibcWCSXfrmTest, ExactFitIncludingNullTerminator) {
+ wchar_t dest[6];
+ size_t result = LIBC_NAMESPACE::wcsxfrm(dest, L"hello", 6);
+
+ EXPECT_EQ(result, size_t(5));
+ EXPECT_WCHAR_EQ(dest[0], L'h');
+ EXPECT_WCHAR_EQ(dest[1], L'e');
+ EXPECT_WCHAR_EQ(dest[2], L'l');
+ EXPECT_WCHAR_EQ(dest[3], L'l');
+ EXPECT_WCHAR_EQ(dest[4], L'o');
+ EXPECT_WCHAR_EQ(dest[5], L'\0');
+}
+
+TEST(LlvmLibcWCSXfrmTest, TruncatesAndNullTerminates) {
+ wchar_t dest[4];
+ size_t result = LIBC_NAMESPACE::wcsxfrm(dest, L"hello", 4);
+
+ EXPECT_EQ(result, size_t(5));
+ EXPECT_WCHAR_EQ(dest[0], L'h');
+ EXPECT_WCHAR_EQ(dest[1], L'e');
+ EXPECT_WCHAR_EQ(dest[2], L'l');
+ EXPECT_WCHAR_EQ(dest[3], L'\0');
+}
+
+TEST(LlvmLibcWCSXfrmTest, BufferSizeOneWritesOnlyNullTerminator) {
+ wchar_t dest[1];
+ dest[0] = L'x';
+
+ size_t result = LIBC_NAMESPACE::wcsxfrm(dest, L"hello", 1);
+
+ EXPECT_EQ(result, size_t(5));
+ EXPECT_WCHAR_EQ(dest[0], L'\0');
+}
+
+TEST(LlvmLibcWCSXfrmTest, DoesNotWriteWhenCountIsZero) {
+ wchar_t dest[4] = {L'x', L'y', L'z', L'\0'};
+
+ size_t result = LIBC_NAMESPACE::wcsxfrm(dest, L"hello", 0);
+
+ EXPECT_EQ(result, size_t(5));
+ EXPECT_WCHAR_EQ(dest[0], L'x');
+ EXPECT_WCHAR_EQ(dest[1], L'y');
+ EXPECT_WCHAR_EQ(dest[2], L'z');
+ EXPECT_WCHAR_EQ(dest[3], L'\0');
+}
+
+TEST(LlvmLibcWCSXfrmTest, WideCharactersAreHandledCorrectly) {
+ wchar_t dest[8];
+ const wchar_t src[] = {L'A', L'\u03A9', L'\u2603', L'\0'};
+
+ size_t result = LIBC_NAMESPACE::wcsxfrm(dest, src, 8);
+
+ EXPECT_EQ(result, size_t(3));
+ EXPECT_WCHAR_EQ(dest[0], L'A');
+ EXPECT_WCHAR_EQ(dest[1], L'\u03A9');
+ EXPECT_WCHAR_EQ(dest[2], L'\u2603');
+ EXPECT_WCHAR_EQ(dest[3], L'\0');
+}
|
|
@michaelrj-google I've added the changes. I checked the existing strxfrm implementation and saw that locale support isn’t included, so I kept it consistent here. Please take a look and let me know your thoughts on the PR. |
| libc.src.wchar.wmemset | ||
| libc.src.wchar.wcschr | ||
| libc.src.wchar.wcsncmp | ||
| libc.src.wchar.wcsxfrm |
There was a problem hiding this comment.
Can you add it as entrypoint to all the other configurations?
There was a problem hiding this comment.
@vhscampos
I checked the current tree and only found neighboring wchar entrypoints like wcsncmp registered in libc/config/linux/x86_64/entrypoints.txt in my branch, so I mirrored that pattern for wcsxfrm.
I haven’t tested on other platforms locally. If you want wcsxfrm added to additional configurations as well, I can do that and rely on CI for broader coverage.
|
|
||
| #include "src/__support/common.h" | ||
|
|
||
| namespace LIBC_NAMESPACE_DECL { |
There was a problem hiding this comment.
This macro is defined in "src/__support/macros/config.h". Can you please add an #include here and add that header as a dependency in the CMake part.
There was a problem hiding this comment.
Addressed the include-policy feedback and added the CMake dependencies
| // For now, this implements C/POSIX-like behavior: the transformed form is the | ||
| // original wide string itself, so comparing transformed strings with wcscmp | ||
| // matches code-point order. | ||
| LLVM_LIBC_FUNCTION(size_t, wcsxfrm, |
There was a problem hiding this comment.
AFAIK in order to use size_t, you must include hdr/types/size_t.h. Likewise for wchar_t. Remember also to add these headers as dependencies in CMake.
See https://libc.llvm.org/dev/code_style.html#header-inclusion-policy
There was a problem hiding this comment.
Addressed the include-policy feedback and added the CMake dependencies
| #include <stddef.h> | ||
| #include <wchar.h> |
There was a problem hiding this comment.
AFAIK you should include hdr/types/size_t.h and /wchar_t.h instead. See https://libc.llvm.org/dev/code_style.html#header-inclusion-policy
| SRCS | ||
| wcsxfrm.cpp | ||
| HDRS | ||
| wcsxfrm.h |
There was a problem hiding this comment.
Done, updated the includes/CMake deps.
This patch adds wcsxfrm to LLVM libc.
It includes:
The current implementation provides C/POSIX-style behavior and does not yet add locale-aware collation support.
Tested with:
Notes:
Fixes #191074