From b6b7fdaae53674307d99c76763eee418f73b3887 Mon Sep 17 00:00:00 2001 From: Duncan Horn Date: Thu, 14 Mar 2024 10:39:49 -0700 Subject: [PATCH 01/20] Fix compilation with Clang and when compiling as Debug --- CMakeLists.txt | 6 +-- cpio/test/test_option_c.c | 8 +-- libarchive/test/test_archive_match_time.c | 60 +++++++++++------------ libarchive/test/test_read_format_rar5.c | 2 +- test_utils/test_main.c | 10 ++-- 5 files changed, 43 insertions(+), 43 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 822056b910..dc22722780 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -107,7 +107,7 @@ endif () # aggressive about diagnosing build problems; this can get # relaxed somewhat in final shipping versions. IF (CMAKE_C_COMPILER_ID MATCHES "^GNU$" OR - CMAKE_C_COMPILER_ID MATCHES "^Clang$") + CMAKE_C_COMPILER_ID MATCHES "^Clang$" AND NOT MSVC) SET(CMAKE_REQUIRED_FLAGS "-Wall -Wformat -Wformat-security") ################################################################# # Set compile flags for all build types. @@ -144,7 +144,7 @@ IF (CMAKE_C_COMPILER_ID MATCHES "^GNU$" OR SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-dead_strip") ENDIF(NOT CMAKE_SYSTEM_NAME MATCHES "Darwin") ENDIF (CMAKE_C_COMPILER_ID MATCHES "^GNU$" OR - CMAKE_C_COMPILER_ID MATCHES "^Clang$") + CMAKE_C_COMPILER_ID MATCHES "^Clang$" AND NOT MSVC) IF (CMAKE_C_COMPILER_ID MATCHES "^XL$") SET(CMAKE_C_COMPILER "xlc_r") SET(CMAKE_REQUIRED_FLAGS "-qflag=e:e -qformat=sec") @@ -2166,7 +2166,7 @@ ENDIF(WIN32 AND NOT CYGWIN) INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/libarchive) # IF(MSVC) - ADD_DEFINITIONS(-D_CRT_SECURE_NO_DEPRECATE) + ADD_DEFINITIONS(-D_CRT_SECURE_NO_DEPRECATE -D_CRT_NONSTDC_NO_WARNINGS) ENDIF(MSVC) IF(APPLE) diff --git a/cpio/test/test_option_c.c b/cpio/test/test_option_c.c index 0b6bed2fac..de25ed1ab5 100644 --- a/cpio/test/test_option_c.c +++ b/cpio/test/test_option_c.c @@ -119,9 +119,9 @@ DEFINE_TEST(test_option_c) assert(is_octal(e, 76)); /* Entire header is octal digits. */ assertEqualMem(e + 0, "070707", 6); /* Magic */ assert(is_octal(e + 6, 6)); /* dev */ - dev = from_octal(e + 6, 6); + dev = (int)from_octal(e + 6, 6); assert(is_octal(e + 12, 6)); /* ino */ - ino = from_octal(e + 12, 6); + ino = (int)from_octal(e + 12, 6); #if defined(_WIN32) && !defined(__CYGWIN__) /* Group members bits and others bits do not work. */ assertEqualMem(e + 18, "100666", 6); /* Mode */ @@ -129,10 +129,10 @@ DEFINE_TEST(test_option_c) assertEqualMem(e + 18, "100644", 6); /* Mode */ #endif if (uid < 0) - uid = from_octal(e + 24, 6); + uid = (int)from_octal(e + 24, 6); assertEqualInt(from_octal(e + 24, 6), uid); /* uid */ assert(is_octal(e + 30, 6)); /* gid */ - gid = from_octal(e + 30, 6); + gid = (int)from_octal(e + 30, 6); assertEqualMem(e + 36, "000001", 6); /* nlink */ failure("file entries should not have rdev set (dev field was 0%o)", dev); diff --git a/libarchive/test/test_archive_match_time.c b/libarchive/test/test_archive_match_time.c index 25a0623a7e..9b9d201489 100644 --- a/libarchive/test/test_archive_match_time.c +++ b/libarchive/test/test_archive_match_time.c @@ -316,14 +316,13 @@ test_newer_mtime_than_file_mbs(void) static void test_newer_ctime_than_file_mbs(void) { - struct archive *a; - struct archive_entry *ae; - struct archive *m; - #if defined(_WIN32) && !defined(__CYGWIN__) skipping("Can't set ctime on Windows"); return; -#endif +#else + struct archive *a; + struct archive_entry *ae; + struct archive *m; if (!assert((m = archive_match_new()) != NULL)) return; @@ -373,6 +372,7 @@ test_newer_ctime_than_file_mbs(void) archive_read_free(a); archive_entry_free(ae); archive_match_free(m); +#endif } static void @@ -435,14 +435,13 @@ test_newer_mtime_than_file_wcs(void) static void test_newer_ctime_than_file_wcs(void) { - struct archive *a; - struct archive_entry *ae; - struct archive *m; - #if defined(_WIN32) && !defined(__CYGWIN__) skipping("Can't set ctime on Windows"); return; -#endif +#else + struct archive *a; + struct archive_entry *ae; + struct archive *m; if (!assert((m = archive_match_new()) != NULL)) return; @@ -493,6 +492,7 @@ test_newer_ctime_than_file_wcs(void) archive_read_free(a); archive_entry_free(ae); archive_match_free(m); +#endif } static void @@ -787,14 +787,13 @@ test_older_mtime_than_file_mbs(void) static void test_older_ctime_than_file_mbs(void) { - struct archive *a; - struct archive_entry *ae; - struct archive *m; - #if defined(_WIN32) && !defined(__CYGWIN__) skipping("Can't set ctime on Windows"); return; -#endif +#else + struct archive *a; + struct archive_entry *ae; + struct archive *m; if (!assert((m = archive_match_new()) != NULL)) return; @@ -845,6 +844,7 @@ test_older_ctime_than_file_mbs(void) archive_read_free(a); archive_entry_free(ae); archive_match_free(m); +#endif } static void @@ -907,14 +907,13 @@ test_older_mtime_than_file_wcs(void) static void test_older_ctime_than_file_wcs(void) { - struct archive *a; - struct archive_entry *ae; - struct archive *m; - #if defined(_WIN32) && !defined(__CYGWIN__) skipping("Can't set ctime on Windows"); return; -#endif +#else + struct archive *a; + struct archive_entry *ae; + struct archive *m; if (!assert((m = archive_match_new()) != NULL)) return; @@ -965,6 +964,7 @@ test_older_ctime_than_file_wcs(void) archive_read_free(a); archive_entry_free(ae); archive_match_free(m); +#endif } static void @@ -1088,14 +1088,13 @@ test_mtime_between_files_wcs(void) static void test_ctime_between_files_mbs(void) { - struct archive *a; - struct archive_entry *ae; - struct archive *m; - #if defined(_WIN32) && !defined(__CYGWIN__) skipping("Can't set ctime on Windows"); return; -#endif +#else + struct archive *a; + struct archive_entry *ae; + struct archive *m; if (!assert((m = archive_match_new()) != NULL)) return; @@ -1147,19 +1146,19 @@ test_ctime_between_files_mbs(void) archive_read_free(a); archive_entry_free(ae); archive_match_free(m); +#endif } static void test_ctime_between_files_wcs(void) { - struct archive *a; - struct archive_entry *ae; - struct archive *m; - #if defined(_WIN32) && !defined(__CYGWIN__) skipping("Can't set ctime on Windows"); return; -#endif +#else + struct archive *a; + struct archive_entry *ae; + struct archive *m; if (!assert((m = archive_match_new()) != NULL)) return; @@ -1211,6 +1210,7 @@ test_ctime_between_files_wcs(void) archive_read_free(a); archive_entry_free(ae); archive_match_free(m); +#endif } static void diff --git a/libarchive/test/test_read_format_rar5.c b/libarchive/test/test_read_format_rar5.c index 705913b04c..39279c3d4e 100644 --- a/libarchive/test/test_read_format_rar5.c +++ b/libarchive/test/test_read_format_rar5.c @@ -843,7 +843,7 @@ DEFINE_TEST(test_read_format_rar5_block_by_block) struct archive_entry *ae; struct archive *a; uint8_t buf[173]; - int bytes_read; + la_ssize_t bytes_read; uint32_t computed_crc = 0; extract_reference_file("test_read_format_rar5_compressed.rar"); diff --git a/test_utils/test_main.c b/test_utils/test_main.c index 6617732a33..c98ebf8624 100644 --- a/test_utils/test_main.c +++ b/test_utils/test_main.c @@ -227,8 +227,8 @@ my_CreateSymbolicLinkA(const char *linkname, const char *target, if (f == NULL) return (0); - tlen = strlen(target); - llen = strlen(linkname); + tlen = (int)strlen(target); + llen = (int)strlen(linkname); if (tlen == 0 || llen == 0) return (0); @@ -3921,7 +3921,7 @@ main(int argc, char **argv) * tree. */ progname = p = argv[0]; - testprogdir_len = strlen(progname) + 1; + testprogdir_len = (int)strlen(progname) + 1; if ((testprogdir = (char *)malloc(testprogdir_len)) == NULL) { fprintf(stderr, "ERROR: Out of memory."); @@ -4069,7 +4069,7 @@ main(int argc, char **argv) #ifdef PROGRAM if (testprogfile == NULL) { - tmp2_len = strlen(testprogdir) + 1 + strlen(PROGRAM) + 1; + tmp2_len = (int)(strlen(testprogdir) + 1 + strlen(PROGRAM) + 1); if ((tmp2 = (char *)malloc(tmp2_len)) == NULL) { fprintf(stderr, "ERROR: Out of memory."); @@ -4094,7 +4094,7 @@ main(int argc, char **argv) testprogfile = testprg; #endif /* Quote the name that gets put into shell command lines. */ - testprg_len = strlen(testprogfile) + 3; + testprg_len = (int)strlen(testprogfile) + 3; testprg = malloc(testprg_len); strncpy(testprg, "\"", testprg_len); strncat(testprg, testprogfile, testprg_len); From 2c6f1a7f6fe4384a91a85f6d1f1597d03003d4cc Mon Sep 17 00:00:00 2001 From: Duncan Horn Date: Fri, 15 Mar 2024 14:10:53 -0700 Subject: [PATCH 02/20] Fix various archive creation failures on Windows with Unicode names --- libarchive/archive_string.c | 24 ++++ libarchive/archive_write_set_format_7zip.c | 16 ++- libarchive/archive_write_set_format_gnutar.c | 4 + libarchive/archive_write_set_format_pax.c | 7 ++ libarchive/archive_write_set_format_ustar.c | 4 + libarchive/test/CMakeLists.txt | 1 + libarchive/test/test_7zip_filename_encoding.c | 104 +++++++++++++++++ .../test/test_gnutar_filename_encoding.c | 102 ++++++++++++++++ libarchive/test/test_pax_filename_encoding.c | 110 ++++++++++++++++++ .../test/test_ustar_filename_encoding.c | 102 ++++++++++++++++ libarchive/test/test_zip_filename_encoding.c | 95 +++++++++++++++ 11 files changed, 566 insertions(+), 3 deletions(-) create mode 100644 libarchive/test/test_7zip_filename_encoding.c diff --git a/libarchive/archive_string.c b/libarchive/archive_string.c index f39677ad7a..5a2a2b40b3 100644 --- a/libarchive/archive_string.c +++ b/libarchive/archive_string.c @@ -3886,6 +3886,30 @@ archive_mstring_get_utf8(struct archive *a, struct archive_mstring *aes, } *p = NULL; +#if defined(_WIN32) && !defined(__CYGWIN__) + /* + * On Windows, first try converting from WCS because (1) there's no + * guarantee that the conversion to MBS will succeed, e.g. when using + * CP_ACP, and (2) that's more efficient than converting to MBS, just to + * convert back to WCS again before finally converting to UTF-8 + */ + if ((aes->aes_set & AES_SET_WCS) != 0) { + sc = archive_string_conversion_to_charset(a, "UTF-8", 1); + if (sc == NULL) + return (-1);/* Couldn't allocate memory for sc. */ + aes->aes_utf8.length = 0; + r = archive_string_append_from_wcs_in_codepage(&(aes->aes_utf8), + aes->aes_wcs.s, aes->aes_wcs.length, sc); + if (a == NULL) + free_sconv_object(sc); + if (r == 0) { + aes->aes_set |= AES_SET_UTF8; + *p = aes->aes_utf8.s; + return (0);/* success. */ + } else + return (-1);/* failure. */ + } +#endif /* Try converting WCS to MBS first if MBS does not exist yet. */ if ((aes->aes_set & AES_SET_MBS) == 0) { const char *pm; /* unused */ diff --git a/libarchive/archive_write_set_format_7zip.c b/libarchive/archive_write_set_format_7zip.c index c0ea9d6b15..b870338fc0 100644 --- a/libarchive/archive_write_set_format_7zip.c +++ b/libarchive/archive_write_set_format_7zip.c @@ -521,7 +521,7 @@ _7z_write_header(struct archive_write *a, struct archive_entry *entry) */ if (archive_entry_filetype(entry) == AE_IFLNK) { ssize_t bytes; - const void *p = (const void *)archive_entry_symlink(entry); + const void *p = (const void *)archive_entry_symlink_utf8(entry); bytes = compress_out(a, p, (size_t)file->size, ARCHIVE_Z_RUN); if (bytes < 0) return ((int)bytes); @@ -1563,8 +1563,18 @@ file_new(struct archive_write *a, struct archive_entry *entry, archive_entry_set_size(entry, 0); if (archive_entry_filetype(entry) == AE_IFDIR) file->dir = 1; - else if (archive_entry_filetype(entry) == AE_IFLNK) - file->size = strlen(archive_entry_symlink(entry)); + else if (archive_entry_filetype(entry) == AE_IFLNK) { + const char* linkpath; + linkpath = archive_entry_symlink_utf8(entry); + if (linkpath == NULL) { + free(file); + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "symlink path could not be converted to UTF-8"); + return (ARCHIVE_FAILED); + } + else + file->size = strlen(linkpath); + } if (archive_entry_mtime_is_set(entry)) { file->flg |= MTIME_IS_SET; file->times[MTIME].time = archive_entry_mtime(entry); diff --git a/libarchive/archive_write_set_format_gnutar.c b/libarchive/archive_write_set_format_gnutar.c index a88350b874..849cb39a7b 100644 --- a/libarchive/archive_write_set_format_gnutar.c +++ b/libarchive/archive_write_set_format_gnutar.c @@ -523,7 +523,11 @@ archive_write_gnutar_header(struct archive_write *a, goto exit_write_header; } +#if defined(_WIN32) && !defined(__CYGWIN__) + if (archive_entry_hardlink_w(entry) != NULL) { +#else if (archive_entry_hardlink(entry) != NULL) { +#endif tartype = '1'; } else switch (archive_entry_filetype(entry)) { diff --git a/libarchive/archive_write_set_format_pax.c b/libarchive/archive_write_set_format_pax.c index e93333074a..de9c8a6e7e 100644 --- a/libarchive/archive_write_set_format_pax.c +++ b/libarchive/archive_write_set_format_pax.c @@ -608,7 +608,14 @@ archive_write_pax_header(struct archive_write *a, const time_t ustar_max_mtime = get_ustar_max_mtime(); /* Sanity check. */ +#if defined(_WIN32) && !defined(__CYGWIN__) + /* NOTE: Check both for compatability where we return ARCHIVE_WARN on + * conversion failure */ + if ((archive_entry_pathname_w(entry_original) == NULL) && + (archive_entry_pathname(entry_original) == NULL)) { +#else if (archive_entry_pathname(entry_original) == NULL) { +#endif archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Can't record entry in tar file without pathname"); return (ARCHIVE_FAILED); diff --git a/libarchive/archive_write_set_format_ustar.c b/libarchive/archive_write_set_format_ustar.c index 673487b27f..d8f0b45846 100644 --- a/libarchive/archive_write_set_format_ustar.c +++ b/libarchive/archive_write_set_format_ustar.c @@ -254,7 +254,11 @@ archive_write_ustar_header(struct archive_write *a, struct archive_entry *entry) sconv = ustar->opt_sconv; /* Sanity check. */ +#if defined(_WIN32) && !defined(__CYGWIN__) + if (archive_entry_pathname_w(entry) == NULL) { +#else if (archive_entry_pathname(entry) == NULL) { +#endif archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Can't record entry in tar file without pathname"); return (ARCHIVE_FAILED); diff --git a/libarchive/test/CMakeLists.txt b/libarchive/test/CMakeLists.txt index 7b166c5fba..7ca30aaca9 100644 --- a/libarchive/test/CMakeLists.txt +++ b/libarchive/test/CMakeLists.txt @@ -9,6 +9,7 @@ IF(ENABLE_TEST) ../../test_utils/test_main.c read_open_memory.c test.h + test_7zip_filename_encoding.c test_acl_nfs4.c test_acl_pax.c test_acl_platform_nfs4.c diff --git a/libarchive/test/test_7zip_filename_encoding.c b/libarchive/test/test_7zip_filename_encoding.c new file mode 100644 index 0000000000..aa0f576f0c --- /dev/null +++ b/libarchive/test/test_7zip_filename_encoding.c @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2003-2018 + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include "test.h" + +DEFINE_TEST(test_7zip_filename_encoding_UTF16_win) +{ +#if !defined(_WIN32) || defined(__CYGWIN__) + skipping("This test is meant to verify unicode string handling" + " on Windows with UTF-16 names"); + return; +#else + struct archive *a; + struct archive_entry *entry; + char buff[4096]; + size_t used; + + /* + * Don't call setlocale because we're verifying that the '_w' functions + * work as expected + */ + + /* Part 1: file */ + a = archive_write_new(); + assertEqualInt(ARCHIVE_OK, archive_write_set_format_7zip(a)); + assertEqualInt(ARCHIVE_OK, + archive_write_open_memory(a, buff, sizeof(buff), &used)); + + entry = archive_entry_new2(a); + /* Set the filename using a UTF-16 string */ + archive_entry_copy_pathname_w(entry, L"\u8868.txt"); + archive_entry_set_filetype(entry, AE_IFREG); + archive_entry_set_size(entry, 0); + assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); + archive_entry_free(entry); + assertEqualInt(ARCHIVE_OK, archive_write_free(a)); + + /* Check UTF-16 version. */ + assertEqualMem(buff + 44, L"\u8868.txt", 10); + + /* Part 2: directory */ + a = archive_write_new(); + assertEqualInt(ARCHIVE_OK, archive_write_set_format_7zip(a)); + assertEqualInt(ARCHIVE_OK, + archive_write_open_memory(a, buff, sizeof(buff), &used)); + + entry = archive_entry_new2(a); + /* Set the directory name using a UTF-16 string */ + /* NOTE: Explicitly not adding trailing slash to test that code path */ + archive_entry_copy_pathname_w(entry, L"\u8868"); + archive_entry_set_filetype(entry, AE_IFDIR); + archive_entry_set_size(entry, 0); + assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); + archive_entry_free(entry); + assertEqualInt(ARCHIVE_OK, archive_write_free(a)); + + /* NOTE: Directories do not get trailing slash for 7zip files */ + assertEqualMem(buff + 41, L"\u8868", 2); + + /* Part 3: symlink */ + a = archive_write_new(); + assertEqualInt(ARCHIVE_OK, archive_write_set_format_7zip(a)); + assertEqualInt(ARCHIVE_OK, + archive_write_open_memory(a, buff, sizeof(buff), &used)); + + entry = archive_entry_new2(a); + /* Set the symlink target using a UTF-16 string */ + archive_entry_set_pathname(entry, "link.txt"); + archive_entry_copy_symlink_w(entry, L"\u8868.txt"); + archive_entry_set_filetype(entry, AE_IFLNK); + archive_entry_set_symlink_type(entry, AE_SYMLINK_TYPE_FILE); + archive_entry_set_size(entry, 0); + assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); + archive_entry_free(entry); + assertEqualInt(ARCHIVE_OK, archive_write_free(a)); + + /* NOTE: link names are stored in UTF-8 */ + assertEqualMem(buff + 32, "\xE8\xA1\xA8.txt", 7); + + /* NOTE: 7zip does not support hardlinks */ +#endif +} diff --git a/libarchive/test/test_gnutar_filename_encoding.c b/libarchive/test/test_gnutar_filename_encoding.c index f473ddfb4f..476ec2149f 100644 --- a/libarchive/test/test_gnutar_filename_encoding.c +++ b/libarchive/test/test_gnutar_filename_encoding.c @@ -389,3 +389,105 @@ DEFINE_TEST(test_gnutar_filename_encoding_CP932_UTF8) assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7); } +DEFINE_TEST(test_gnutar_filename_encoding_UTF16_win) +{ +#if !defined(_WIN32) || defined(__CYGWIN__) + skipping("This test is meant to verify unicode string handling" + " on Windows with UTF-16 names"); + return; +#else + struct archive *a; + struct archive_entry *entry; + char buff[4096]; + size_t used; + + /* + * Don't call setlocale because we're verifying that the '_w' functions + * work as expected when 'hdrcharset' is UTF-8 + */ + + /* Part 1: file */ + a = archive_write_new(); + assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); + if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { + skipping("This system cannot convert character-set" + " from UTF-16 to UTF-8."); + archive_write_free(a); + return; + } + assertEqualInt(ARCHIVE_OK, + archive_write_open_memory(a, buff, sizeof(buff), &used)); + + entry = archive_entry_new2(a); + /* Set the filename using a UTF-16 string */ + archive_entry_copy_pathname_w(entry, L"\u8868.txt"); + archive_entry_set_filetype(entry, AE_IFREG); + archive_entry_set_size(entry, 0); + assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); + archive_entry_free(entry); + assertEqualInt(ARCHIVE_OK, archive_write_free(a)); + + /* Check UTF-8 version. */ + assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7); + + /* Part 2: directory */ + a = archive_write_new(); + assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); + assertEqualInt(ARCHIVE_OK, archive_write_set_options(a, "hdrcharset=UTF-8")); + assertEqualInt(ARCHIVE_OK, + archive_write_open_memory(a, buff, sizeof(buff), &used)); + + entry = archive_entry_new2(a); + /* Set the directory name using a UTF-16 string */ + /* NOTE: Explicitly not adding trailing slash to test that code path */ + archive_entry_copy_pathname_w(entry, L"\u8868"); + archive_entry_set_filetype(entry, AE_IFDIR); + archive_entry_set_size(entry, 0); + assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); + archive_entry_free(entry); + assertEqualInt(ARCHIVE_OK, archive_write_free(a)); + + /* Check UTF-8 version. */ + assertEqualMem(buff, "\xE8\xA1\xA8/", 4); + + /* Part 3: symlink */ + a = archive_write_new(); + assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); + assertEqualInt(ARCHIVE_OK, archive_write_set_options(a, "hdrcharset=UTF-8")); + assertEqualInt(ARCHIVE_OK, + archive_write_open_memory(a, buff, sizeof(buff), &used)); + + entry = archive_entry_new2(a); + /* Set the symlink target using a UTF-16 string */ + archive_entry_set_pathname(entry, "link.txt"); + archive_entry_copy_symlink_w(entry, L"\u8868.txt"); + archive_entry_set_filetype(entry, AE_IFLNK); + archive_entry_set_symlink_type(entry, AE_SYMLINK_TYPE_FILE); + archive_entry_set_size(entry, 0); + assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); + archive_entry_free(entry); + assertEqualInt(ARCHIVE_OK, archive_write_free(a)); + + /* Check UTF-8 version. */ + assertEqualMem(buff + 157, "\xE8\xA1\xA8.txt", 7); + + /* Part 4: hardlink */ + a = archive_write_new(); + assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); + assertEqualInt(ARCHIVE_OK, archive_write_set_options(a, "hdrcharset=UTF-8")); + assertEqualInt(ARCHIVE_OK, + archive_write_open_memory(a, buff, sizeof(buff), &used)); + + entry = archive_entry_new2(a); + /* Set the symlink target using a UTF-16 string */ + archive_entry_set_pathname(entry, "link.txt"); + archive_entry_copy_hardlink_w(entry, L"\u8868.txt"); + archive_entry_set_size(entry, 0); + assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); + archive_entry_free(entry); + assertEqualInt(ARCHIVE_OK, archive_write_free(a)); + + /* Check UTF-8 version. */ + assertEqualMem(buff + 157, "\xE8\xA1\xA8.txt", 7); +#endif +} diff --git a/libarchive/test/test_pax_filename_encoding.c b/libarchive/test/test_pax_filename_encoding.c index 737641c5ab..a28edc4b27 100644 --- a/libarchive/test/test_pax_filename_encoding.c +++ b/libarchive/test/test_pax_filename_encoding.c @@ -579,6 +579,116 @@ DEFINE_TEST(test_pax_filename_encoding_KOI8R_CP1251) assertEqualInt(ARCHIVE_OK, archive_write_free(a)); } +/* + * Verify that unicode filenames are correctly preserved on Windows + */ +DEFINE_TEST(test_pax_filename_encoding_UTF16_win) +{ +#if !defined(_WIN32) || defined(__CYGWIN__) + skipping("This test is meant to verify unicode string handling" + " on Windows with UTF-16 names"); + return; +#else + struct archive *a; + struct archive_entry *entry; + char buff[4096]; + size_t used; + + /* + * Don't call setlocale because we're verifying that the '_w' functions + * work as expected when 'hdrcharset' is UTF-8 + */ + + /* Check if the platform completely supports the string conversion. */ + a = archive_write_new(); + assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a)); + if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { + skipping("This system cannot convert character-set" + " from UTF-16 to UTF-8."); + archive_write_free(a); + return; + } + + /* Re-create a write archive object since filenames should be written + * in UTF-8 by default. */ + archive_write_free(a); + + /* Part 1: file */ + a = archive_write_new(); + assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a)); + assertEqualInt(ARCHIVE_OK, + archive_write_open_memory(a, buff, sizeof(buff), &used)); + + entry = archive_entry_new2(a); + archive_entry_copy_pathname_w(entry, L"\u043f\u0440\u0438"); + archive_entry_set_filetype(entry, AE_IFREG); + archive_entry_set_size(entry, 0); + assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); + archive_entry_free(entry); + assertEqualInt(ARCHIVE_OK, archive_write_free(a)); + + /* Above three characters in UTF-16 should translate to the following + * three characters (two bytes each) in UTF-8. */ + assertEqualMem(buff + 512, "15 path=\xD0\xBF\xD1\x80\xD0\xB8\x0A", 15); + + /* Part 2: directory */ + a = archive_write_new(); + assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a)); + assertEqualInt(ARCHIVE_OK, + archive_write_open_memory(a, buff, sizeof(buff), &used)); + + entry = archive_entry_new2(a); + /* NOTE: Explicitly not adding trailing slash to test that code path */ + archive_entry_copy_pathname_w(entry, L"\u043f\u0440\u0438"); + archive_entry_set_filetype(entry, AE_IFDIR); + archive_entry_set_size(entry, 0); + assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); + archive_entry_free(entry); + assertEqualInt(ARCHIVE_OK, archive_write_free(a)); + + /* Above three characters in UTF-16 should translate to the following + * three characters (two bytes each) in UTF-8, with trailing slash. */ + assertEqualMem(buff + 512, "16 path=\xD0\xBF\xD1\x80\xD0\xB8\x2f\x0A", 15); + + /* Part 3: symlink */ + a = archive_write_new(); + assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a)); + assertEqualInt(ARCHIVE_OK, + archive_write_open_memory(a, buff, sizeof(buff), &used)); + + entry = archive_entry_new2(a); + archive_entry_set_pathname(entry, "link.txt"); + archive_entry_copy_symlink_w(entry, L"\u043f\u0440\u0438"); + archive_entry_set_filetype(entry, AE_IFLNK); + archive_entry_set_symlink_type(entry, AE_SYMLINK_TYPE_FILE); + archive_entry_set_size(entry, 0); + assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); + archive_entry_free(entry); + assertEqualInt(ARCHIVE_OK, archive_write_free(a)); + + /* Above three characters in UTF-16 should translate to the following + * three characters (two bytes each) in UTF-8. */ + assertEqualMem(buff + 512, "19 linkpath=\xD0\xBF\xD1\x80\xD0\xB8\x0A", 15); + + /* Part 4: hardlink */ + a = archive_write_new(); + assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a)); + assertEqualInt(ARCHIVE_OK, + archive_write_open_memory(a, buff, sizeof(buff), &used)); + + entry = archive_entry_new2(a); + archive_entry_set_pathname(entry, "link.txt"); + archive_entry_copy_hardlink_w(entry, L"\u043f\u0440\u0438"); + archive_entry_set_size(entry, 0); + assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); + archive_entry_free(entry); + assertEqualInt(ARCHIVE_OK, archive_write_free(a)); + + /* Above three characters in UTF-16 should translate to the following + * three characters (two bytes each) in UTF-8. */ + assertEqualMem(buff + 512, "19 linkpath=\xD0\xBF\xD1\x80\xD0\xB8\x0A", 15); +#endif +} DEFINE_TEST(test_pax_filename_encoding) { diff --git a/libarchive/test/test_ustar_filename_encoding.c b/libarchive/test/test_ustar_filename_encoding.c index cc62453f1c..1242bd1d3c 100644 --- a/libarchive/test/test_ustar_filename_encoding.c +++ b/libarchive/test/test_ustar_filename_encoding.c @@ -390,3 +390,105 @@ DEFINE_TEST(test_ustar_filename_encoding_CP932_UTF8) assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7); } +DEFINE_TEST(test_ustar_filename_encoding_UTF16_win) +{ +#if !defined(_WIN32) || defined(__CYGWIN__) + skipping("This test is meant to verify unicode string handling" + " on Windows with UTF-16 names"); + return; +#else + struct archive *a; + struct archive_entry *entry; + char buff[4096]; + size_t used; + + /* + * Don't call setlocale because we're verifying that the '_w' functions + * work as expected when 'hdrcharset' is UTF-8 + */ + + /* Part 1: file */ + a = archive_write_new(); + assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a)); + if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { + skipping("This system cannot convert character-set" + " from UTF-16 to UTF-8."); + archive_write_free(a); + return; + } + assertEqualInt(ARCHIVE_OK, + archive_write_open_memory(a, buff, sizeof(buff), &used)); + + entry = archive_entry_new2(a); + /* Set the filename using a UTF-16 string */ + archive_entry_copy_pathname_w(entry, L"\u8868.txt"); + archive_entry_set_filetype(entry, AE_IFREG); + archive_entry_set_size(entry, 0); + assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); + archive_entry_free(entry); + assertEqualInt(ARCHIVE_OK, archive_write_free(a)); + + /* Check UTF-8 version. */ + assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7); + + /* Part 2: directory */ + a = archive_write_new(); + assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a)); + assertEqualInt(ARCHIVE_OK, archive_write_set_options(a, "hdrcharset=UTF-8")); + assertEqualInt(ARCHIVE_OK, + archive_write_open_memory(a, buff, sizeof(buff), &used)); + + entry = archive_entry_new2(a); + /* Set the directory name using a UTF-16 string */ + /* NOTE: Explicitly not adding trailing slash to test that code path */ + archive_entry_copy_pathname_w(entry, L"\u8868"); + archive_entry_set_filetype(entry, AE_IFDIR); + archive_entry_set_size(entry, 0); + assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); + archive_entry_free(entry); + assertEqualInt(ARCHIVE_OK, archive_write_free(a)); + + /* Check UTF-8 version. */ + assertEqualMem(buff, "\xE8\xA1\xA8/", 4); + + /* Part 3: symlink */ + a = archive_write_new(); + assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a)); + assertEqualInt(ARCHIVE_OK, archive_write_set_options(a, "hdrcharset=UTF-8")); + assertEqualInt(ARCHIVE_OK, + archive_write_open_memory(a, buff, sizeof(buff), &used)); + + entry = archive_entry_new2(a); + /* Set the symlink target using a UTF-16 string */ + archive_entry_set_pathname(entry, "link.txt"); + archive_entry_copy_symlink_w(entry, L"\u8868.txt"); + archive_entry_set_filetype(entry, AE_IFLNK); + archive_entry_set_symlink_type(entry, AE_SYMLINK_TYPE_FILE); + archive_entry_set_size(entry, 0); + assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); + archive_entry_free(entry); + assertEqualInt(ARCHIVE_OK, archive_write_free(a)); + + /* Check UTF-8 version. */ + assertEqualMem(buff + 157, "\xE8\xA1\xA8.txt", 7); + + /* Part 4: hardlink */ + a = archive_write_new(); + assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a)); + assertEqualInt(ARCHIVE_OK, archive_write_set_options(a, "hdrcharset=UTF-8")); + assertEqualInt(ARCHIVE_OK, + archive_write_open_memory(a, buff, sizeof(buff), &used)); + + entry = archive_entry_new2(a); + /* Set the symlink target using a UTF-16 string */ + archive_entry_set_pathname(entry, "link.txt"); + archive_entry_copy_hardlink_w(entry, L"\u8868.txt"); + archive_entry_set_size(entry, 0); + assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); + archive_entry_free(entry); + assertEqualInt(ARCHIVE_OK, archive_write_free(a)); + + /* Check UTF-8 version. */ + assertEqualMem(buff + 157, "\xE8\xA1\xA8.txt", 7); +#endif +} diff --git a/libarchive/test/test_zip_filename_encoding.c b/libarchive/test/test_zip_filename_encoding.c index 448fb9b1d4..571d088cb9 100644 --- a/libarchive/test/test_zip_filename_encoding.c +++ b/libarchive/test/test_zip_filename_encoding.c @@ -527,3 +527,98 @@ DEFINE_TEST(test_zip_filename_encoding_CP932) assertEqualInt(0, buff[7]); assertEqualMem(buff + 30, "abcABC", 6); } + +DEFINE_TEST(test_zip_filename_encoding_UTF16_win) +{ +#if !defined(_WIN32) || defined(__CYGWIN__) + skipping("This test is meant to verify unicode string handling" + " on Windows with UTF-16 names"); + return; +#else + struct archive *a; + struct archive_entry *entry; + char buff[4096]; + size_t used; + + /* + * Don't call setlocale because we're verifying that the '_w' functions + * work as expected when 'hdrcharset' is UTF-8 + */ + + /* Part 1: file */ + a = archive_write_new(); + assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); + if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { + skipping("This system cannot convert character-set" + " from UTF-16 to UTF-8."); + archive_write_free(a); + return; + } + assertEqualInt(ARCHIVE_OK, + archive_write_open_memory(a, buff, sizeof(buff), &used)); + + entry = archive_entry_new2(a); + /* Set the filename using a UTF-16 string */ + archive_entry_copy_pathname_w(entry, L"\u8868.txt"); + archive_entry_set_filetype(entry, AE_IFREG); + archive_entry_set_size(entry, 0); + assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); + archive_entry_free(entry); + assertEqualInt(ARCHIVE_OK, archive_write_free(a)); + + /* A bit 11 of general purpose flag should be 1, + * which indicates the filename charset is UTF-8. */ + assertEqualInt(0x08, buff[7]); + /* Check UTF-8 version. */ + assertEqualMem(buff + 30, "\xE8\xA1\xA8.txt", 7); + + /* Part 2: directory */ + a = archive_write_new(); + assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); + assertEqualInt(ARCHIVE_OK, archive_write_set_options(a, "hdrcharset=UTF-8")); + assertEqualInt(ARCHIVE_OK, + archive_write_open_memory(a, buff, sizeof(buff), &used)); + + entry = archive_entry_new2(a); + /* Set the directory name using a UTF-16 string */ + /* NOTE: Explicitly not adding trailing slash to test that code path */ + archive_entry_copy_pathname_w(entry, L"\u8868"); + archive_entry_set_filetype(entry, AE_IFDIR); + archive_entry_set_size(entry, 0); + assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); + archive_entry_free(entry); + assertEqualInt(ARCHIVE_OK, archive_write_free(a)); + + /* A bit 11 of general purpose flag should be 1, + * which indicates the filename charset is UTF-8. */ + assertEqualInt(0x08, buff[7]); + /* Check UTF-8 version. */ + assertEqualMem(buff+ 30, "\xE8\xA1\xA8/", 4); + + /* Part 3: symlink */ + a = archive_write_new(); + assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); + assertEqualInt(ARCHIVE_OK, archive_write_set_options(a, "hdrcharset=UTF-8")); + assertEqualInt(ARCHIVE_OK, + archive_write_open_memory(a, buff, sizeof(buff), &used)); + + entry = archive_entry_new2(a); + /* Set the symlink target using a UTF-16 string */ + archive_entry_set_pathname(entry, "link.txt"); + archive_entry_copy_symlink_w(entry, L"\u8868.txt"); + archive_entry_set_filetype(entry, AE_IFLNK); + archive_entry_set_symlink_type(entry, AE_SYMLINK_TYPE_FILE); + archive_entry_set_size(entry, 0); + assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); + archive_entry_free(entry); + assertEqualInt(ARCHIVE_OK, archive_write_free(a)); + + /* A bit 11 of general purpose flag should be 0, + * because the file name is ASCII. */ + assertEqualInt(0, buff[7]); + /* Check UTF-8 version. */ + assertEqualMem(buff + 58, "\xE8\xA1\xA8.txt", 7); + + /* NOTE: ZIP does not support hardlinks */ +#endif +} From 2dbde6a41b70919916205a35a33ffcab3cdb39f0 Mon Sep 17 00:00:00 2001 From: Duncan Horn Date: Fri, 15 Mar 2024 15:54:52 -0700 Subject: [PATCH 03/20] Change 7zip test to use archive reader instead of inspecting memory & fixing --- libarchive/archive_read_support_format_7zip.c | 15 ++++- libarchive/test/test_7zip_filename_encoding.c | 56 +++++++++---------- 2 files changed, 39 insertions(+), 32 deletions(-) diff --git a/libarchive/archive_read_support_format_7zip.c b/libarchive/archive_read_support_format_7zip.c index 7e465935c9..cc85e9e6d2 100644 --- a/libarchive/archive_read_support_format_7zip.c +++ b/libarchive/archive_read_support_format_7zip.c @@ -835,9 +835,20 @@ archive_read_format_7zip_read_header(struct archive_read *a, zip_entry->mode |= AE_IFREG; archive_entry_set_mode(entry, zip_entry->mode); } else { + struct archive_string_conv* utf8_conv; + symname[symsize] = '\0'; - archive_entry_copy_symlink(entry, - (const char *)symname); + + /* Symbolic links are embedded as UTF-8 strings */ + utf8_conv = archive_string_conversion_from_charset(&a->archive, + "UTF-8", 1); + if (utf8_conv == NULL) { + free(symname); + return ARCHIVE_FATAL; + } + + archive_entry_copy_symlink_l(entry, (const char*)symname, symsize, + utf8_conv); } free(symname); archive_entry_set_size(entry, 0); diff --git a/libarchive/test/test_7zip_filename_encoding.c b/libarchive/test/test_7zip_filename_encoding.c index aa0f576f0c..b423e0ecca 100644 --- a/libarchive/test/test_7zip_filename_encoding.c +++ b/libarchive/test/test_7zip_filename_encoding.c @@ -42,63 +42,59 @@ DEFINE_TEST(test_7zip_filename_encoding_UTF16_win) * work as expected */ - /* Part 1: file */ a = archive_write_new(); assertEqualInt(ARCHIVE_OK, archive_write_set_format_7zip(a)); assertEqualInt(ARCHIVE_OK, archive_write_open_memory(a, buff, sizeof(buff), &used)); + /* Part 1: file */ entry = archive_entry_new2(a); - /* Set the filename using a UTF-16 string */ archive_entry_copy_pathname_w(entry, L"\u8868.txt"); archive_entry_set_filetype(entry, AE_IFREG); archive_entry_set_size(entry, 0); assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); - archive_entry_free(entry); - assertEqualInt(ARCHIVE_OK, archive_write_free(a)); - - /* Check UTF-16 version. */ - assertEqualMem(buff + 44, L"\u8868.txt", 10); /* Part 2: directory */ - a = archive_write_new(); - assertEqualInt(ARCHIVE_OK, archive_write_set_format_7zip(a)); - assertEqualInt(ARCHIVE_OK, - archive_write_open_memory(a, buff, sizeof(buff), &used)); - - entry = archive_entry_new2(a); - /* Set the directory name using a UTF-16 string */ - /* NOTE: Explicitly not adding trailing slash to test that code path */ + archive_entry_clear(entry); archive_entry_copy_pathname_w(entry, L"\u8868"); archive_entry_set_filetype(entry, AE_IFDIR); archive_entry_set_size(entry, 0); assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); - archive_entry_free(entry); - assertEqualInt(ARCHIVE_OK, archive_write_free(a)); - - /* NOTE: Directories do not get trailing slash for 7zip files */ - assertEqualMem(buff + 41, L"\u8868", 2); /* Part 3: symlink */ - a = archive_write_new(); - assertEqualInt(ARCHIVE_OK, archive_write_set_format_7zip(a)); - assertEqualInt(ARCHIVE_OK, - archive_write_open_memory(a, buff, sizeof(buff), &used)); - - entry = archive_entry_new2(a); - /* Set the symlink target using a UTF-16 string */ + archive_entry_clear(entry); archive_entry_set_pathname(entry, "link.txt"); archive_entry_copy_symlink_w(entry, L"\u8868.txt"); archive_entry_set_filetype(entry, AE_IFLNK); archive_entry_set_symlink_type(entry, AE_SYMLINK_TYPE_FILE); archive_entry_set_size(entry, 0); assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); + + /* NOTE: 7zip does not support hardlinks */ + archive_entry_free(entry); assertEqualInt(ARCHIVE_OK, archive_write_free(a)); - /* NOTE: link names are stored in UTF-8 */ - assertEqualMem(buff + 32, "\xE8\xA1\xA8.txt", 7); + /* Ensure that the archive contents can be read properly */ + /* NOTE: 7zip file contents are not in the order we wrote them! */ + a = archive_read_new(); + archive_read_support_format_all(a); + archive_read_support_filter_all(a); + assertEqualIntA(a, ARCHIVE_OK, read_open_memory_seek(a, buff, used, 7)); - /* NOTE: 7zip does not support hardlinks */ + /* Read part 3: symlink */ + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &entry)); + assertEqualWString(L"\u8868.txt", archive_entry_symlink_w(entry)); + + /* Read part 1: file */ + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &entry)); + assertEqualWString(L"\u8868.txt", archive_entry_pathname_w(entry)); + + /* Read part 2: directory */ + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &entry)); + /* NOTE: Trailing slash added automatically for us */ + assertEqualWString(L"\u8868/", archive_entry_pathname_w(entry)); + + archive_read_free(a); #endif } From 8b543971aa5c086889bec5d88152c956dada4293 Mon Sep 17 00:00:00 2001 From: Duncan Horn Date: Sat, 16 Mar 2024 12:42:19 -0700 Subject: [PATCH 04/20] Fix & test link_resolver --- libarchive/archive_entry_link_resolver.c | 15 ++++++++ libarchive/test/test_link_resolver.c | 45 ++++++++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/libarchive/archive_entry_link_resolver.c b/libarchive/archive_entry_link_resolver.c index 6c61734302..c2fd6895f2 100644 --- a/libarchive/archive_entry_link_resolver.c +++ b/libarchive/archive_entry_link_resolver.c @@ -201,16 +201,26 @@ archive_entry_linkify(struct archive_entry_linkresolver *res, le = find_entry(res, *e); if (le != NULL) { archive_entry_unset_size(*e); +#if defined(_WIN32) && !defined(__CYGWIN__) + archive_entry_copy_hardlink_w(*e, + archive_entry_pathname_w(le->canonical)); +#else archive_entry_copy_hardlink(*e, archive_entry_pathname(le->canonical)); +#endif } else insert_entry(res, *e); return; case ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE: le = find_entry(res, *e); if (le != NULL) { +#if defined(_WIN32) && !defined(__CYGWIN__) + archive_entry_copy_hardlink_w(*e, + archive_entry_pathname_w(le->canonical)); +#else archive_entry_copy_hardlink(*e, archive_entry_pathname(le->canonical)); +#endif } else insert_entry(res, *e); return; @@ -229,8 +239,13 @@ archive_entry_linkify(struct archive_entry_linkresolver *res, le->entry = t; /* Make the old entry into a hardlink. */ archive_entry_unset_size(*e); +#if defined(_WIN32) && !defined(__CYGWIN__) + archive_entry_copy_hardlink_w(*e, + archive_entry_pathname_w(le->canonical)); +#else archive_entry_copy_hardlink(*e, archive_entry_pathname(le->canonical)); +#endif /* If we ran out of links, return the * final entry as well. */ if (le->links == 0) { diff --git a/libarchive/test/test_link_resolver.c b/libarchive/test/test_link_resolver.c index 5bea9a463b..6c6230c4d0 100644 --- a/libarchive/test/test_link_resolver.c +++ b/libarchive/test/test_link_resolver.c @@ -202,3 +202,48 @@ DEFINE_TEST(test_link_resolver) test_linkify_old_cpio(); test_linkify_new_cpio(); } + +DEFINE_TEST(test_link_resolver_unicode_win) +{ +#if !defined(_WIN32) || defined(__CYGWIN__) + skipping("This test is meant to verify unicode string handling" + " on Windows with UTF-16 names"); + return; +#else + struct archive_entry *entry, *e2; + struct archive_entry_linkresolver *resolver; + + /* Initialize the resolver. */ + assert(NULL != (resolver = archive_entry_linkresolver_new())); + archive_entry_linkresolver_set_strategy(resolver, + ARCHIVE_FORMAT_TAR_USTAR); + + /* Create an entry with a unicode filename and 2 links. */ + assert(NULL != (entry = archive_entry_new())); + archive_entry_copy_pathname_w(entry, L"\u4f60\u597d.txt"); + archive_entry_set_ino(entry, 1); + archive_entry_set_dev(entry, 2); + archive_entry_set_nlink(entry, 2); + archive_entry_set_size(entry, 10); + archive_entry_linkify(resolver, &entry, &e2); + + /* Shouldn't be altered, since it wasn't seen before. */ + assert(e2 == NULL); + assertEqualWString(L"\u4f60\u597d.txt", archive_entry_pathname_w(entry)); + assertEqualWString(NULL, archive_entry_hardlink_w(entry)); + assertEqualInt(10, archive_entry_size(entry)); + + /* Link to the same file contents, but a new unicode name. */ + archive_entry_copy_pathname_w(entry, L"\u518d\u89c1.txt"); + archive_entry_linkify(resolver, &entry, &e2); + + /* Size & link path should have changed. */ + assert(e2 == NULL); + assertEqualWString(L"\u518d\u89c1.txt", archive_entry_pathname_w(entry)); + assertEqualWString(L"\u4f60\u597d.txt", archive_entry_hardlink_w(entry)); + assertEqualInt(0, archive_entry_size(entry)); + + archive_entry_free(entry); + archive_entry_linkresolver_free(resolver); +#endif +} From 97a962b5fb4a573655fe3551434202a3d557c35e Mon Sep 17 00:00:00 2001 From: Duncan Horn Date: Sun, 17 Mar 2024 11:16:31 -0700 Subject: [PATCH 05/20] Fix PAX link encoding and tests --- libarchive/archive_write_set_format_pax.c | 4 +- libarchive/test/test_pax_filename_encoding.c | 78 ++++++++------------ 2 files changed, 34 insertions(+), 48 deletions(-) diff --git a/libarchive/archive_write_set_format_pax.c b/libarchive/archive_write_set_format_pax.c index de9c8a6e7e..fecde82302 100644 --- a/libarchive/archive_write_set_format_pax.c +++ b/libarchive/archive_write_set_format_pax.c @@ -1394,7 +1394,7 @@ archive_write_pax_header(struct archive_write *a, * numeric fields, though they're less critical. */ if (__archive_write_format_header_ustar(a, ustarbuff, entry_main, -1, 0, - NULL) == ARCHIVE_FATAL) { + sconv) == ARCHIVE_FATAL) { archive_entry_free(entry_main); archive_string_free(&entry_name); return (ARCHIVE_FATAL); @@ -1454,7 +1454,7 @@ archive_write_pax_header(struct archive_write *a, archive_entry_set_ctime(pax_attr_entry, 0, 0); r = __archive_write_format_header_ustar(a, paxbuff, - pax_attr_entry, 'x', 1, NULL); + pax_attr_entry, 'x', 1, sconv); archive_entry_free(pax_attr_entry); diff --git a/libarchive/test/test_pax_filename_encoding.c b/libarchive/test/test_pax_filename_encoding.c index a28edc4b27..3165b65dd3 100644 --- a/libarchive/test/test_pax_filename_encoding.c +++ b/libarchive/test/test_pax_filename_encoding.c @@ -591,7 +591,7 @@ DEFINE_TEST(test_pax_filename_encoding_UTF16_win) #else struct archive *a; struct archive_entry *entry; - char buff[4096]; + char buff[0x2000]; size_t used; /* @@ -613,80 +613,66 @@ DEFINE_TEST(test_pax_filename_encoding_UTF16_win) * in UTF-8 by default. */ archive_write_free(a); - /* Part 1: file */ a = archive_write_new(); assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a)); assertEqualInt(ARCHIVE_OK, archive_write_open_memory(a, buff, sizeof(buff), &used)); + /* Part 1: file */ entry = archive_entry_new2(a); - archive_entry_copy_pathname_w(entry, L"\u043f\u0440\u0438"); + archive_entry_copy_pathname_w(entry, L"\u4f60\u597d.txt"); archive_entry_set_filetype(entry, AE_IFREG); archive_entry_set_size(entry, 0); assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); - archive_entry_free(entry); - assertEqualInt(ARCHIVE_OK, archive_write_free(a)); - - /* Above three characters in UTF-16 should translate to the following - * three characters (two bytes each) in UTF-8. */ - assertEqualMem(buff + 512, "15 path=\xD0\xBF\xD1\x80\xD0\xB8\x0A", 15); /* Part 2: directory */ - a = archive_write_new(); - assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a)); - assertEqualInt(ARCHIVE_OK, - archive_write_open_memory(a, buff, sizeof(buff), &used)); - - entry = archive_entry_new2(a); /* NOTE: Explicitly not adding trailing slash to test that code path */ archive_entry_copy_pathname_w(entry, L"\u043f\u0440\u0438"); archive_entry_set_filetype(entry, AE_IFDIR); archive_entry_set_size(entry, 0); assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); - archive_entry_free(entry); - assertEqualInt(ARCHIVE_OK, archive_write_free(a)); - - /* Above three characters in UTF-16 should translate to the following - * three characters (two bytes each) in UTF-8, with trailing slash. */ - assertEqualMem(buff + 512, "16 path=\xD0\xBF\xD1\x80\xD0\xB8\x2f\x0A", 15); /* Part 3: symlink */ - a = archive_write_new(); - assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a)); - assertEqualInt(ARCHIVE_OK, - archive_write_open_memory(a, buff, sizeof(buff), &used)); - - entry = archive_entry_new2(a); - archive_entry_set_pathname(entry, "link.txt"); - archive_entry_copy_symlink_w(entry, L"\u043f\u0440\u0438"); + archive_entry_copy_pathname_w(entry, L"\u518d\u89c1.txt"); + archive_entry_copy_symlink_w(entry, L"\u4f60\u597d.txt"); archive_entry_set_filetype(entry, AE_IFLNK); archive_entry_set_symlink_type(entry, AE_SYMLINK_TYPE_FILE); archive_entry_set_size(entry, 0); assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); - archive_entry_free(entry); - assertEqualInt(ARCHIVE_OK, archive_write_free(a)); - - /* Above three characters in UTF-16 should translate to the following - * three characters (two bytes each) in UTF-8. */ - assertEqualMem(buff + 512, "19 linkpath=\xD0\xBF\xD1\x80\xD0\xB8\x0A", 15); /* Part 4: hardlink */ - a = archive_write_new(); - assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a)); - assertEqualInt(ARCHIVE_OK, - archive_write_open_memory(a, buff, sizeof(buff), &used)); - - entry = archive_entry_new2(a); - archive_entry_set_pathname(entry, "link.txt"); - archive_entry_copy_hardlink_w(entry, L"\u043f\u0440\u0438"); + archive_entry_copy_pathname_w(entry, L"\u665a\u5b89.txt"); + archive_entry_copy_hardlink_w(entry, L"\u4f60\u597d.txt"); + archive_entry_set_filetype(entry, AE_IFREG); archive_entry_set_size(entry, 0); assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); + archive_entry_free(entry); assertEqualInt(ARCHIVE_OK, archive_write_free(a)); - /* Above three characters in UTF-16 should translate to the following - * three characters (two bytes each) in UTF-8. */ - assertEqualMem(buff + 512, "19 linkpath=\xD0\xBF\xD1\x80\xD0\xB8\x0A", 15); + /* Ensure that the names round trip properly */ + a = archive_read_new(); + archive_read_support_format_all(a); + archive_read_support_filter_all(a); + assertEqualInt(0, archive_read_open_memory(a, buff, used)); + + /* Read part 1: file */ + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &entry)); + assertEqualWString(L"\u4f60\u597d.txt", archive_entry_pathname_w(entry)); + + /* Read part 2: directory */ + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &entry)); + assertEqualWString(L"\u043f\u0440\u0438/", archive_entry_pathname_w(entry)); + + /* Read part 3: symlink */ + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &entry)); + assertEqualWString(L"\u518d\u89c1.txt", archive_entry_pathname_w(entry)); + assertEqualWString(L"\u4f60\u597d.txt", archive_entry_symlink_w(entry)); + + /* Read part 4: hardlink */ + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &entry)); + assertEqualWString(L"\u665a\u5b89.txt", archive_entry_pathname_w(entry)); + assertEqualWString(L"\u4f60\u597d.txt", archive_entry_hardlink_w(entry)); #endif } From 480904cb271c4349e411f0e2b66b0a4979c36654 Mon Sep 17 00:00:00 2001 From: Duncan Horn Date: Sun, 17 Mar 2024 19:17:08 -0700 Subject: [PATCH 06/20] Unicode fix reading RAR files --- libarchive/archive_string.c | 26 ++++++ libarchive/test/test_read_format_rar5.c | 86 +++++++++++++++++++ .../test/test_read_format_rar5_unicode.rar.uu | 18 ++-- test_utils/test_main.c | 2 +- 4 files changed, 122 insertions(+), 10 deletions(-) diff --git a/libarchive/archive_string.c b/libarchive/archive_string.c index 5a2a2b40b3..aa14eb5ee4 100644 --- a/libarchive/archive_string.c +++ b/libarchive/archive_string.c @@ -3994,6 +3994,32 @@ archive_mstring_get_wcs(struct archive *a, struct archive_mstring *aes, } *wp = NULL; +#if defined(_WIN32) && !defined(__CYGWIN__) + /* + * On Windows, prefer converting from UTF-8 directly to WCS because: + * (1) there's no guarantee that the string can be represented in MBS (e.g. + * with CP_ACP), and (2) in order to convert from UTF-8 to MBS, we're going + * to need to convert from UTF-8 to WCS anyway and its wasteful to throw + * away that intermediate result + */ + if (aes->aes_set & AES_SET_UTF8) { + struct archive_string_conv *sc; + + sc = archive_string_conversion_from_charset(a, "UTF-8", 1); + if (sc != NULL) { + archive_wstring_empty((&aes->aes_wcs)); + r = archive_wstring_append_from_mbs_in_codepage(&(aes->aes_wcs), + aes->aes_utf8.s, aes->aes_utf8.length, sc); + if (a == NULL) + free_sconv_object(sc); + if (r == 0) { + aes->aes_set |= AES_SET_WCS; + *wp = aes->aes_wcs.s; + return (0); + } + } + } +#endif /* Try converting UTF8 to MBS first if MBS does not exist yet. */ if ((aes->aes_set & AES_SET_MBS) == 0) { const char *p; /* unused */ diff --git a/libarchive/test/test_read_format_rar5.c b/libarchive/test/test_read_format_rar5.c index 39279c3d4e..9bed0c82e3 100644 --- a/libarchive/test/test_read_format_rar5.c +++ b/libarchive/test/test_read_format_rar5.c @@ -1402,3 +1402,89 @@ DEFINE_TEST(test_read_format_rar5_read_data_block_uninitialized_offset) EPILOGUE(); } + +DEFINE_TEST(test_read_format_rar5_unicode) +{ + PROLOGUE("test_read_format_rar5_unicode.rar"); + + /* Script "file.txt" */ + assertA(0 == archive_read_next_header(a, &ae)); + assertEqualInt(AE_IFREG, archive_entry_filetype(ae)); +#if defined(_WIN32) && !defined(__CYGWIN__) + assertEqualWString(L"\U0001d4bb\U0001d4be\U0001d4c1\U0001d452.txt", + archive_entry_pathname_w(ae)); +#else + assertEqualString( + "\xf0\x9d\x92\xbb\xf0\x9d\x92\xbe\xf0\x9d\x93\x81\xf0\x9d\x91\x92.txt", + archive_entry_pathname(ae)); +#endif + + /* Script "hardlink.txt" */ + assertA(0 == archive_read_next_header(a, &ae)); + assertEqualInt(AE_IFREG, archive_entry_filetype(ae)); +#if defined(_WIN32) && !defined(__CYGWIN__) + assertEqualWString(L"\U0001d4bd\U0001d4b6\U0001d4c7\U0001d4b9\U0001d4c1" + "\U0001d4be\U0001d4c3\U0001d4c0.txt", archive_entry_pathname_w(ae)); + assertEqualWString(L"\U0001d4bb\U0001d4be\U0001d4c1\U0001d452.txt", + archive_entry_hardlink_w(ae)); +#else + assertEqualString( + "\xf0\x9d\x92\xbd\xf0\x9d\x92\xb6\xf0\x9d\x93\x87\xf0\x9d\x92\xb9\xf0" + "\x9d\x93\x81\xf0\x9d\x92\xbe\xf0\x9d\x93\x83\xf0\x9d\x93\x80.txt", + archive_entry_pathname(ae)); + assertEqualString( + "\xf0\x9d\x92\xbb\xf0\x9d\x92\xbe\xf0\x9d\x93\x81\xf0\x9d\x91\x92.txt", + archive_entry_hardlink(ae)); +#endif + + /* Script "symlink.txt" */ + assertA(0 == archive_read_next_header(a, &ae)); + assertEqualInt(AE_IFLNK, archive_entry_filetype(ae)); + assertEqualInt(AE_SYMLINK_TYPE_FILE, archive_entry_symlink_type(ae)); +#if defined(_WIN32) && !defined(__CYGWIN__) + assertEqualWString(L"\U0001d4c8\U0001d4ce\U0001d4c2\U0001d4c1\U0001d4be" + "\U0001d4c3\U0001d4c0.txt", archive_entry_pathname_w(ae)); + assertEqualWString(L"\U0001d4bb\U0001d4be\U0001d4c1\U0001d452.txt", + archive_entry_symlink_w(ae)); +#else + assertEqualString("\xf0\x9d\x93\x88\xf0\x9d\x93\x8e\xf0\x9d\x93\x82\xf0" + "\x9d\x93\x81\xf0\x9d\x92\xbe\xf0\x9d\x93\x83\xf0\x9d\x93\x80.txt", + archive_entry_pathname(ae)); + assertEqualString( + "\xf0\x9d\x92\xbb\xf0\x9d\x92\xbe\xf0\x9d\x93\x81\xf0\x9d\x91\x92.txt", + archive_entry_symlink(ae)); +#endif + + /* Script "dir" */ + assertA(0 == archive_read_next_header(a, &ae)); + assertEqualInt(AE_IFDIR, archive_entry_filetype(ae)); +#if defined(_WIN32) && !defined(__CYGWIN__) + assertEqualWString(L"\U0001d4b9\U0001d4be\U0001d4c7", + archive_entry_pathname_w(ae)); +#else + assertEqualString("\xf0\x9d\x92\xb9\xf0\x9d\x92\xbe\xf0\x9d\x93\x87", + archive_entry_pathname(ae)); +#endif + + /* Script "dir_symlink" */ + assertA(0 == archive_read_next_header(a, &ae)); + assertEqualInt(AE_IFLNK, archive_entry_filetype(ae)); + assertEqualInt(AE_SYMLINK_TYPE_DIRECTORY, archive_entry_symlink_type(ae)); +#if defined(_WIN32) && !defined(__CYGWIN__) + assertEqualWString(L"\U0001d4b9\U0001d4be\U0001d4c7_\U0001d4c8" + "\U0001d4ce\U0001d4c2\U0001d4c1\U0001d4be\U0001d4c3\U0001d4c0", + archive_entry_pathname_w(ae)); + assertEqualWString(L"\U0001d4b9\U0001d4be\U0001d4c7", + archive_entry_symlink_w(ae)); +#else + assertEqualString("\xf0\x9d\x92\xb9\xf0\x9d\x92\xbe\xf0\x9d\x93\x87\x5f\xf0" + "\x9d\x93\x88\xf0\x9d\x93\x8e\xf0\x9d\x93\x82\xf0\x9d\x93\x81\xf0\x9d" + "\x92\xbe\xf0\x9d\x93\x83\xf0\x9d\x93\x80", archive_entry_pathname(ae)); + assertEqualString("\xf0\x9d\x92\xb9\xf0\x9d\x92\xbe\xf0\x9d\x93\x87", + archive_entry_symlink(ae)); +#endif + + assertA(ARCHIVE_EOF == archive_read_next_header(a, &ae)); + + EPILOGUE(); +} diff --git a/libarchive/test/test_read_format_rar5_unicode.rar.uu b/libarchive/test/test_read_format_rar5_unicode.rar.uu index e011ab64e6..515b8657fb 100644 --- a/libarchive/test/test_read_format_rar5_unicode.rar.uu +++ b/libarchive/test/test_read_format_rar5_unicode.rar.uu @@ -1,9 +1,9 @@ -begin 0744 test_read_format_rar5_unicode.rar -M4F%R(1H'`0#SX8+K"P$%!P`&`0&`@(``//TR$"@"`PN-``2-`"#FQN;K@``` -M#/"?D8OPGXR.+G1X=`H#`D:)>%;RZ]D!2&5L;&\L('=O*3F.*3G>*3FBYT>'0*`P)& -MB7A6\NO9`1`%!``,\)^1B_"?C(XN='ATD-'.1V$"`QP`!`"@"`````"````U -M\)V2KO"=DX[PG9."\)V2M_"=D9SPG9.!\)V2OO"=DK@@\)V0O_"=DK[PG9.# -L\)V3@"YT>'0*`P(>7'J>!.S9`1`%`@`,\)^1B_"?C(XN='AT'7=640,%!``` -` -end +begin 0744 test_read_format_rar5_unicode.rar +M4F%R(1H'`0#SX8+K"P$%!P`&`0&`@(``//TR$"@"`PN-``2-`"#FQN;K@``` +M#/"?D8OPGXR.+G1X=`H#`D:)>%;RZ]D!2&5L;&\L('=O*3F.*3G>*3FBYT>'0*`P)& +MB7A6\NO9`1`%!``,\)^1B_"?C(XN='ATD-'.1V$"`QP`!`"@"`````"````U +M\)V2KO"=DX[PG9."\)V2M_"=D9SPG9.!\)V2OO"=DK@@\)V0O_"=DK[PG9.# +L\)V3@"YT>'0*`P(>7'J>!.S9`1`%`@`,\)^1B_"?C(XN='AT'7=640,%!``` +` +end diff --git a/test_utils/test_main.c b/test_utils/test_main.c index c98ebf8624..8cb10195c0 100644 --- a/test_utils/test_main.c +++ b/test_utils/test_main.c @@ -815,7 +815,7 @@ wcsdump(const char *e, const wchar_t *w) { logprintf(" %s = ", e); if (w == NULL) { - logprintf("(null)"); + logprintf("(null)\n"); return; } logprintf("\""); From db169e13393836cac4320431456860310048ca47 Mon Sep 17 00:00:00 2001 From: Duncan Horn Date: Mon, 18 Mar 2024 09:37:59 -0700 Subject: [PATCH 07/20] Use the macro --- libarchive/archive_string.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libarchive/archive_string.c b/libarchive/archive_string.c index aa14eb5ee4..e03e92f9d3 100644 --- a/libarchive/archive_string.c +++ b/libarchive/archive_string.c @@ -3897,7 +3897,7 @@ archive_mstring_get_utf8(struct archive *a, struct archive_mstring *aes, sc = archive_string_conversion_to_charset(a, "UTF-8", 1); if (sc == NULL) return (-1);/* Couldn't allocate memory for sc. */ - aes->aes_utf8.length = 0; + archive_string_empty(&(aes->aes_utf8)); r = archive_string_append_from_wcs_in_codepage(&(aes->aes_utf8), aes->aes_wcs.s, aes->aes_wcs.length, sc); if (a == NULL) From 97f2f8149830ad59881f5f560b7ae27d5efda54c Mon Sep 17 00:00:00 2001 From: Duncan Horn Date: Mon, 18 Mar 2024 12:30:30 -0700 Subject: [PATCH 08/20] Revamp and test archive_mstring_update_utf8 --- libarchive/archive_string.c | 35 +++++--- .../test/test_archive_string_conversion.c | 82 +++++++++++++++++++ 2 files changed, 106 insertions(+), 11 deletions(-) diff --git a/libarchive/archive_string.c b/libarchive/archive_string.c index e03e92f9d3..1f66c245b8 100644 --- a/libarchive/archive_string.c +++ b/libarchive/archive_string.c @@ -4273,21 +4273,32 @@ archive_mstring_update_utf8(struct archive *a, struct archive_mstring *aes, aes->aes_set = AES_SET_UTF8; /* Only UTF8 is set now. */ - /* Try converting UTF-8 to MBS, return false on failure. */ sc = archive_string_conversion_from_charset(a, "UTF-8", 1); if (sc == NULL) return (-1);/* Couldn't allocate memory for sc. */ - r = archive_strcpy_l(&(aes->aes_mbs), utf8, sc); #if defined(_WIN32) && !defined(__CYGWIN__) - /* On failure, make an effort to convert UTF8 to WCS as the active code page - * may not be able to represent all characters in the string */ - if (r != 0) { - if (archive_wstring_append_from_mbs_in_codepage(&(aes->aes_wcs), - aes->aes_utf8.s, aes->aes_utf8.length, sc) == 0) - aes->aes_set = AES_SET_UTF8 | AES_SET_WCS; - } -#endif + /* On Windows, there's no good way to convert from UTF8 -> MBS directly, so + * prefer to first convert to WCS as (1) it's wasteful to throw away the + * intermediate result, and (2) WCS will still be set even if we fail to + * convert to MBS (e.g. with ACP that can't represent the characters) */ + r = archive_wstring_append_from_mbs_in_codepage(&(aes->aes_wcs), + aes->aes_utf8.s, aes->aes_utf8.length, sc); + + if (a == NULL) + free_sconv_object(sc); + if (r != 0) + return (-1); /* This will guarantee we can't convert to MBS */ + aes->aes_set = AES_SET_UTF8 | AES_SET_WCS; /* Both UTF8 and WCS set. */ + + /* Try converting WCS to MBS, return false on failure. */ + if (archive_string_append_from_wcs(&(aes->aes_mbs), aes->aes_wcs.s, + aes->aes_wcs.length)) + return (-1); +#else + + /* Try converting UTF-8 to MBS, return false on failure. */ + r = archive_strcpy_l(&(aes->aes_mbs), utf8, sc); if (a == NULL) free_sconv_object(sc); @@ -4299,8 +4310,10 @@ archive_mstring_update_utf8(struct archive *a, struct archive_mstring *aes, if (archive_wstring_append_from_mbs(&(aes->aes_wcs), aes->aes_mbs.s, aes->aes_mbs.length)) return (-1); - aes->aes_set = AES_SET_UTF8 | AES_SET_WCS | AES_SET_MBS; +#endif /* All conversions succeeded. */ + aes->aes_set = AES_SET_UTF8 | AES_SET_WCS | AES_SET_MBS; + return (0); } diff --git a/libarchive/test/test_archive_string_conversion.c b/libarchive/test/test_archive_string_conversion.c index d8c75888a4..2623aaf9be 100644 --- a/libarchive/test/test_archive_string_conversion.c +++ b/libarchive/test/test_archive_string_conversion.c @@ -882,3 +882,85 @@ DEFINE_TEST(test_archive_string_conversion) test_archive_string_canonicalization(); test_archive_string_set_get(); } + +DEFINE_TEST(test_archive_string_update_utf8_C) +{ + static const char utf8_string[] = "\xD0\xBF\xD1\x80\xD0\xB8"; + static const wchar_t wcs_string[] = L"\U0000043f\U00000440\U00000438"; + struct archive_mstring mstr; + int r; + + memset(&mstr, 0, sizeof(mstr)); + + r = archive_mstring_update_utf8(NULL, &mstr, utf8_string); + +#if defined(_WIN32) && !defined(__CYGWIN__) + /* On Windows, this should reliably fail with the C locale */ + assertEqualInt(-1, r); + assertEqualInt(0, mstr.aes_set & AES_SET_MBS); +#endif + + /* NOTE: We access the internals to validate that they were set by the + * 'archive_mstring_update_utf8' function */ + /* UTF-8 should always be set */ + assertEqualInt(AES_SET_UTF8, mstr.aes_set & AES_SET_UTF8); + assertEqualString(utf8_string, mstr.aes_utf8.s); + /* WCS should always be set as well */ + assertEqualInt(AES_SET_WCS, mstr.aes_set & AES_SET_WCS); + assertEqualWString(wcs_string, mstr.aes_wcs.s); + + archive_mstring_clean(&mstr); +} + +DEFINE_TEST(test_archive_string_update_utf8_utf8) +{ + static const char utf8_string[] = "\xD0\xBF\xD1\x80\xD0\xB8"; + static const wchar_t wcs_string[] = L"\U0000043f\U00000440\U00000438"; + struct archive_mstring mstr; + int r; + + memset(&mstr, 0, sizeof(mstr)); + + if (setlocale(LC_ALL, "en_US.UTF-8") == NULL) { + skipping("UTF-8 not supported on this system."); + return; + } + + r = archive_mstring_update_utf8(NULL, &mstr, utf8_string); + + /* All conversions should have succeeded */ + assertEqualInt(0, r); + assertEqualInt(AES_SET_MBS | AES_SET_WCS | AES_SET_UTF8, mstr.aes_set); + assertEqualString(utf8_string, mstr.aes_utf8.s); + assertEqualString(utf8_string, mstr.aes_mbs.s); + assertEqualWString(wcs_string, mstr.aes_wcs.s); + + archive_mstring_clean(&mstr); +} + +DEFINE_TEST(test_archive_string_update_utf8_koi8) +{ + static const char utf8_string[] = "\xD0\xBF\xD1\x80\xD0\xB8"; + static const char koi8_string[] = "\xD0\xD2\xC9"; + static const wchar_t wcs_string[] = L"\U0000043f\U00000440\U00000438"; + struct archive_mstring mstr; + int r; + + memset(&mstr, 0, sizeof(mstr)); + + if (setlocale(LC_ALL, "ru_RU.KOI8-R") == NULL) { + skipping("KOI8-R locale not available on this system."); + return; + } + + r = archive_mstring_update_utf8(NULL, &mstr, utf8_string); + + /* All conversions should have succeeded */ + assertEqualInt(0, r); + assertEqualInt(AES_SET_MBS | AES_SET_WCS | AES_SET_UTF8, mstr.aes_set); + assertEqualString(utf8_string, mstr.aes_utf8.s); + assertEqualString(koi8_string, mstr.aes_mbs.s); + assertEqualWString(wcs_string, mstr.aes_wcs.s); + + archive_mstring_clean(&mstr); +} From b02d90f63de14e0da0aa9525f1229855bc050ba6 Mon Sep 17 00:00:00 2001 From: Duncan Horn Date: Mon, 18 Mar 2024 12:36:28 -0700 Subject: [PATCH 09/20] Unused var --- libarchive/test/test_archive_string_conversion.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/libarchive/test/test_archive_string_conversion.c b/libarchive/test/test_archive_string_conversion.c index 2623aaf9be..5c15bfc4fc 100644 --- a/libarchive/test/test_archive_string_conversion.c +++ b/libarchive/test/test_archive_string_conversion.c @@ -898,6 +898,9 @@ DEFINE_TEST(test_archive_string_update_utf8_C) /* On Windows, this should reliably fail with the C locale */ assertEqualInt(-1, r); assertEqualInt(0, mstr.aes_set & AES_SET_MBS); +#else + /* Only guaranteed to fail on Windows */ + (void)r; #endif /* NOTE: We access the internals to validate that they were set by the From 877515c12179a7c9372d5594815da8b4c7dd4c57 Mon Sep 17 00:00:00 2001 From: Duncan Horn Date: Mon, 18 Mar 2024 12:49:52 -0700 Subject: [PATCH 10/20] Update C locale test to be Windows only --- libarchive/test/test_archive_string_conversion.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/libarchive/test/test_archive_string_conversion.c b/libarchive/test/test_archive_string_conversion.c index 5c15bfc4fc..3e63403681 100644 --- a/libarchive/test/test_archive_string_conversion.c +++ b/libarchive/test/test_archive_string_conversion.c @@ -883,8 +883,12 @@ DEFINE_TEST(test_archive_string_conversion) test_archive_string_set_get(); } -DEFINE_TEST(test_archive_string_update_utf8_C) +DEFINE_TEST(test_archive_string_update_utf8_win) { +#if !defined(_WIN32) || defined(__CYGWIN__) + skipping("This test is meant to verify unicode string handling on Windows" + " with the C locale"); +#else static const char utf8_string[] = "\xD0\xBF\xD1\x80\xD0\xB8"; static const wchar_t wcs_string[] = L"\U0000043f\U00000440\U00000438"; struct archive_mstring mstr; @@ -894,14 +898,9 @@ DEFINE_TEST(test_archive_string_update_utf8_C) r = archive_mstring_update_utf8(NULL, &mstr, utf8_string); -#if defined(_WIN32) && !defined(__CYGWIN__) /* On Windows, this should reliably fail with the C locale */ assertEqualInt(-1, r); assertEqualInt(0, mstr.aes_set & AES_SET_MBS); -#else - /* Only guaranteed to fail on Windows */ - (void)r; -#endif /* NOTE: We access the internals to validate that they were set by the * 'archive_mstring_update_utf8' function */ @@ -913,6 +912,7 @@ DEFINE_TEST(test_archive_string_update_utf8_C) assertEqualWString(wcs_string, mstr.aes_wcs.s); archive_mstring_clean(&mstr); +#endif } DEFINE_TEST(test_archive_string_update_utf8_utf8) From dc340ed1d6216250e6288e6af7738a9d21e65f67 Mon Sep 17 00:00:00 2001 From: Duncan Horn Date: Tue, 19 Mar 2024 12:51:30 -0700 Subject: [PATCH 11/20] Update makefile.am to see if this is why things are breaking --- Makefile.am | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile.am b/Makefile.am index 47b6fa1fc6..f02a23682c 100644 --- a/Makefile.am +++ b/Makefile.am @@ -365,6 +365,7 @@ libarchive_test_SOURCES= \ $(test_utils_SOURCES) \ libarchive/test/read_open_memory.c \ libarchive/test/test.h \ + libarchive/test/test_7zip_filename_encoding.c \ libarchive/test/test_acl_nfs4.c \ libarchive/test/test_acl_pax.c \ libarchive/test/test_acl_platform_nfs4.c \ From d164c073a6c8e507636a6cb731ac8fe92b2a407d Mon Sep 17 00:00:00 2001 From: Duncan Horn Date: Tue, 19 Mar 2024 13:04:19 -0700 Subject: [PATCH 12/20] Apparently I don't know how WCS strings behave --- libarchive/test/test_archive_string_conversion.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/libarchive/test/test_archive_string_conversion.c b/libarchive/test/test_archive_string_conversion.c index 3e63403681..d0d8dcdfa1 100644 --- a/libarchive/test/test_archive_string_conversion.c +++ b/libarchive/test/test_archive_string_conversion.c @@ -963,7 +963,13 @@ DEFINE_TEST(test_archive_string_update_utf8_koi8) assertEqualInt(AES_SET_MBS | AES_SET_WCS | AES_SET_UTF8, mstr.aes_set); assertEqualString(utf8_string, mstr.aes_utf8.s); assertEqualString(koi8_string, mstr.aes_mbs.s); +#if defined(_WIN32) && !defined(__CYGWIN__) assertEqualWString(wcs_string, mstr.aes_wcs.s); +#else + /* No guarantee of how WCS strings behave, however this test test is + * primarily meant for Windows */ + (void)wcs_string; +#endif archive_mstring_clean(&mstr); } From 48e9625f570dbdd1f0a7f6adc84e1de480033bd6 Mon Sep 17 00:00:00 2001 From: Duncan Horn Date: Tue, 19 Mar 2024 13:26:13 -0700 Subject: [PATCH 13/20] Try and get a more useful error message --- libarchive/test/test_pax_filename_encoding.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libarchive/test/test_pax_filename_encoding.c b/libarchive/test/test_pax_filename_encoding.c index 3165b65dd3..69be5805fd 100644 --- a/libarchive/test/test_pax_filename_encoding.c +++ b/libarchive/test/test_pax_filename_encoding.c @@ -462,7 +462,7 @@ DEFINE_TEST(test_pax_filename_encoding_EUCJP) /* Check the Unicode version. */ archive_entry_set_filetype(entry, AE_IFREG); archive_entry_set_size(entry, 0); - assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); + assertEqualIntA(a, ARCHIVE_OK, archive_write_header(a, entry)); archive_entry_free(entry); assertEqualInt(ARCHIVE_OK, archive_write_free(a)); From af67fafc5acc45c25d3a4720f5fc5fb8ef6e18ee Mon Sep 17 00:00:00 2001 From: Duncan Horn Date: Tue, 19 Mar 2024 23:45:14 -0700 Subject: [PATCH 14/20] Hopefully fix issue and add hardlink_is_set function --- libarchive/archive_entry.c | 6 ++++++ libarchive/archive_entry.h | 1 + libarchive/archive_write_set_format_gnutar.c | 6 +----- libarchive/archive_write_set_format_pax.c | 12 ++++++++++-- 4 files changed, 18 insertions(+), 7 deletions(-) diff --git a/libarchive/archive_entry.c b/libarchive/archive_entry.c index 9463233e6a..ad9a5e758e 100644 --- a/libarchive/archive_entry.c +++ b/libarchive/archive_entry.c @@ -519,6 +519,12 @@ archive_entry_hardlink_w(struct archive_entry *entry) return (NULL); } +int +archive_entry_hardlink_is_set(struct archive_entry *entry) +{ + return (entry->ae_set & AE_SET_HARDLINK) != 0; +} + int _archive_entry_hardlink_l(struct archive_entry *entry, const char **p, size_t *len, struct archive_string_conv *sc) diff --git a/libarchive/archive_entry.h b/libarchive/archive_entry.h index 3b41a0f8e3..3e8583cb13 100644 --- a/libarchive/archive_entry.h +++ b/libarchive/archive_entry.h @@ -262,6 +262,7 @@ __LA_DECL const wchar_t *archive_entry_gname_w(struct archive_entry *); __LA_DECL const char *archive_entry_hardlink(struct archive_entry *); __LA_DECL const char *archive_entry_hardlink_utf8(struct archive_entry *); __LA_DECL const wchar_t *archive_entry_hardlink_w(struct archive_entry *); +__LA_DECL int archive_entry_hardlink_is_set(struct archive_entry *); __LA_DECL la_int64_t archive_entry_ino(struct archive_entry *); __LA_DECL la_int64_t archive_entry_ino64(struct archive_entry *); __LA_DECL int archive_entry_ino_is_set(struct archive_entry *); diff --git a/libarchive/archive_write_set_format_gnutar.c b/libarchive/archive_write_set_format_gnutar.c index 849cb39a7b..a3a49c573c 100644 --- a/libarchive/archive_write_set_format_gnutar.c +++ b/libarchive/archive_write_set_format_gnutar.c @@ -523,11 +523,7 @@ archive_write_gnutar_header(struct archive_write *a, goto exit_write_header; } -#if defined(_WIN32) && !defined(__CYGWIN__) - if (archive_entry_hardlink_w(entry) != NULL) { -#else - if (archive_entry_hardlink(entry) != NULL) { -#endif + if (archive_entry_hardlink_is_set(entry)) { tartype = '1'; } else switch (archive_entry_filetype(entry)) { diff --git a/libarchive/archive_write_set_format_pax.c b/libarchive/archive_write_set_format_pax.c index fecde82302..ca7142793f 100644 --- a/libarchive/archive_write_set_format_pax.c +++ b/libarchive/archive_write_set_format_pax.c @@ -1039,6 +1039,14 @@ archive_write_pax_header(struct archive_write *a, archive_entry_set_symlink(entry_main, "././@LongSymLink"); } + else { + /* Otherwise, has non-ASCII characters; update the paths to + * however they got decoded above */ + if (hardlink != NULL) + archive_entry_set_hardlink(entry_main, linkpath); + else + archive_entry_set_symlink(entry_main, linkpath); + } need_extension = 1; } } @@ -1394,7 +1402,7 @@ archive_write_pax_header(struct archive_write *a, * numeric fields, though they're less critical. */ if (__archive_write_format_header_ustar(a, ustarbuff, entry_main, -1, 0, - sconv) == ARCHIVE_FATAL) { + NULL) == ARCHIVE_FATAL) { archive_entry_free(entry_main); archive_string_free(&entry_name); return (ARCHIVE_FATAL); @@ -1454,7 +1462,7 @@ archive_write_pax_header(struct archive_write *a, archive_entry_set_ctime(pax_attr_entry, 0, 0); r = __archive_write_format_header_ustar(a, paxbuff, - pax_attr_entry, 'x', 1, sconv); + pax_attr_entry, 'x', 1, NULL); archive_entry_free(pax_attr_entry); From 5a25f799f7eb26d44eb5b579d7d62c9db319d26c Mon Sep 17 00:00:00 2001 From: Duncan Horn Date: Tue, 19 Mar 2024 23:58:35 -0700 Subject: [PATCH 15/20] I think tab size is supposed to be 8 for this file --- libarchive/archive_entry.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libarchive/archive_entry.h b/libarchive/archive_entry.h index 3e8583cb13..807a4e39fc 100644 --- a/libarchive/archive_entry.h +++ b/libarchive/archive_entry.h @@ -262,7 +262,7 @@ __LA_DECL const wchar_t *archive_entry_gname_w(struct archive_entry *); __LA_DECL const char *archive_entry_hardlink(struct archive_entry *); __LA_DECL const char *archive_entry_hardlink_utf8(struct archive_entry *); __LA_DECL const wchar_t *archive_entry_hardlink_w(struct archive_entry *); -__LA_DECL int archive_entry_hardlink_is_set(struct archive_entry *); +__LA_DECL int archive_entry_hardlink_is_set(struct archive_entry *); __LA_DECL la_int64_t archive_entry_ino(struct archive_entry *); __LA_DECL la_int64_t archive_entry_ino64(struct archive_entry *); __LA_DECL int archive_entry_ino_is_set(struct archive_entry *); From b7d6625b056d6a313456c4857cac2c9e630a801e Mon Sep 17 00:00:00 2001 From: Duncan Horn Date: Sat, 23 Mar 2024 19:48:45 -0700 Subject: [PATCH 16/20] Update zip test after merge with master (though perhaps we should use archive_read here...) --- libarchive/test/test_zip_filename_encoding.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libarchive/test/test_zip_filename_encoding.c b/libarchive/test/test_zip_filename_encoding.c index 571d088cb9..b6786f2c3b 100644 --- a/libarchive/test/test_zip_filename_encoding.c +++ b/libarchive/test/test_zip_filename_encoding.c @@ -617,7 +617,7 @@ DEFINE_TEST(test_zip_filename_encoding_UTF16_win) * because the file name is ASCII. */ assertEqualInt(0, buff[7]); /* Check UTF-8 version. */ - assertEqualMem(buff + 58, "\xE8\xA1\xA8.txt", 7); + assertEqualMem(buff + 38, "\xE8\xA1\xA8.txt", 7); /* NOTE: ZIP does not support hardlinks */ #endif From 8cd66f9f3ab8ebc1e0376f4c84a8468aa0051dd0 Mon Sep 17 00:00:00 2001 From: Duncan Horn Date: Fri, 26 Apr 2024 19:35:58 -0700 Subject: [PATCH 17/20] De-duplicate test and fix compilation errors --- libarchive/archive_write_set_format_zip.c | 2 +- libarchive/test/test_read_format_rar5.c | 44 ++++--------------- .../test/test_read_format_rar5_unicode.rar.uu | 18 +++++--- 3 files changed, 20 insertions(+), 44 deletions(-) diff --git a/libarchive/archive_write_set_format_zip.c b/libarchive/archive_write_set_format_zip.c index e37e7b5edd..e01cde6871 100644 --- a/libarchive/archive_write_set_format_zip.c +++ b/libarchive/archive_write_set_format_zip.c @@ -906,7 +906,7 @@ archive_write_zip_header(struct archive_write *a, struct archive_entry *entry) archive_le32enc(e, (uint32_t)archive_entry_ctime(entry)); e += 4; } - archive_le16enc(ut + 2, e - ut - 4); + archive_le16enc(ut + 2, (uint16_t)(e - ut - 4)); } /* diff --git a/libarchive/test/test_read_format_rar5.c b/libarchive/test/test_read_format_rar5.c index 9bed0c82e3..d9864ce9d7 100644 --- a/libarchive/test/test_read_format_rar5.c +++ b/libarchive/test/test_read_format_rar5.c @@ -806,36 +806,6 @@ DEFINE_TEST(test_read_format_rar5_extract_win32) EPILOGUE(); } -DEFINE_TEST(test_read_format_rar5_unicode) -{ -#if !defined(WIN32) || defined(__CYGWIN__) - skipping("Skipping test on non-Windows"); - return; -#else - /* Corresponds to the names: - * ๐Ÿ‘‹๐ŸŒŽ.txt - * ๐’ฎ๐“Ž๐“‚๐’ท๐‘œ๐“๐’พ๐’ธ ๐ฟ๐’พ๐“ƒ๐“€.txt - * โ’ฝโ“โ“กโ““ โ“โ“˜โ“โ“š.txt */ - const wchar_t* emoji_name = L"\U0001f44b\U0001f30e.txt"; - const wchar_t* italic_name = L"\U0001d4ae\U0001d4ce\U0001d4c2\U0001d4b7\U0001d45c\U0001d4c1\U0001d4be\U0001d4b8 \U0001d43f\U0001d4be\U0001d4c3\U0001d4c0.txt"; - const wchar_t* circle_name = L"\u24bd\u24d0\u24e1\u24d3 \u24c1\u24d8\u24dd\u24da.txt"; - - PROLOGUE("test_read_format_rar5_unicode.rar"); - assertA(0 == archive_read_next_header(a, &ae)); - assertEqualWString(emoji_name, archive_entry_pathname_w(ae)); - assertEqualInt(archive_entry_mode(ae), AE_IFREG | 0644); - assertA(0 == archive_read_next_header(a, &ae)); - assertEqualWString(circle_name, archive_entry_pathname_w(ae)); - assertEqualInt(archive_entry_mode(ae), AE_IFREG | 0644); - assertEqualWString(emoji_name, archive_entry_hardlink_w(ae)); - assertA(0 == archive_read_next_header(a, &ae)); - assertEqualWString(italic_name, archive_entry_pathname_w(ae)); - assertEqualInt(archive_entry_mode(ae), AE_IFLNK | 0644); - assertEqualWString(emoji_name, archive_entry_symlink_w(ae)); - EPILOGUE(); -#endif -} - DEFINE_TEST(test_read_format_rar5_block_by_block) { /* This test uses strange buffer sizes intentionally. */ @@ -1423,8 +1393,9 @@ DEFINE_TEST(test_read_format_rar5_unicode) assertA(0 == archive_read_next_header(a, &ae)); assertEqualInt(AE_IFREG, archive_entry_filetype(ae)); #if defined(_WIN32) && !defined(__CYGWIN__) - assertEqualWString(L"\U0001d4bd\U0001d4b6\U0001d4c7\U0001d4b9\U0001d4c1" - "\U0001d4be\U0001d4c3\U0001d4c0.txt", archive_entry_pathname_w(ae)); + assertEqualWString( + L"\U0001d4bd\U0001d4b6\U0001d4c7\U0001d4b9\U0001d4c1\U0001d4be\U0001d4c3\U0001d4c0.txt", + archive_entry_pathname_w(ae)); assertEqualWString(L"\U0001d4bb\U0001d4be\U0001d4c1\U0001d452.txt", archive_entry_hardlink_w(ae)); #else @@ -1442,8 +1413,9 @@ DEFINE_TEST(test_read_format_rar5_unicode) assertEqualInt(AE_IFLNK, archive_entry_filetype(ae)); assertEqualInt(AE_SYMLINK_TYPE_FILE, archive_entry_symlink_type(ae)); #if defined(_WIN32) && !defined(__CYGWIN__) - assertEqualWString(L"\U0001d4c8\U0001d4ce\U0001d4c2\U0001d4c1\U0001d4be" - "\U0001d4c3\U0001d4c0.txt", archive_entry_pathname_w(ae)); + assertEqualWString( + L"\U0001d4c8\U0001d4ce\U0001d4c2\U0001d4c1\U0001d4be\U0001d4c3\U0001d4c0.txt", + archive_entry_pathname_w(ae)); assertEqualWString(L"\U0001d4bb\U0001d4be\U0001d4c1\U0001d452.txt", archive_entry_symlink_w(ae)); #else @@ -1471,8 +1443,8 @@ DEFINE_TEST(test_read_format_rar5_unicode) assertEqualInt(AE_IFLNK, archive_entry_filetype(ae)); assertEqualInt(AE_SYMLINK_TYPE_DIRECTORY, archive_entry_symlink_type(ae)); #if defined(_WIN32) && !defined(__CYGWIN__) - assertEqualWString(L"\U0001d4b9\U0001d4be\U0001d4c7_\U0001d4c8" - "\U0001d4ce\U0001d4c2\U0001d4c1\U0001d4be\U0001d4c3\U0001d4c0", + assertEqualWString( + L"\U0001d4b9\U0001d4be\U0001d4c7_\U0001d4c8\U0001d4ce\U0001d4c2\U0001d4c1\U0001d4be\U0001d4c3\U0001d4c0", archive_entry_pathname_w(ae)); assertEqualWString(L"\U0001d4b9\U0001d4be\U0001d4c7", archive_entry_symlink_w(ae)); diff --git a/libarchive/test/test_read_format_rar5_unicode.rar.uu b/libarchive/test/test_read_format_rar5_unicode.rar.uu index 515b8657fb..79eacefa9a 100644 --- a/libarchive/test/test_read_format_rar5_unicode.rar.uu +++ b/libarchive/test/test_read_format_rar5_unicode.rar.uu @@ -1,9 +1,13 @@ -begin 0744 test_read_format_rar5_unicode.rar -M4F%R(1H'`0#SX8+K"P$%!P`&`0&`@(``//TR$"@"`PN-``2-`"#FQN;K@``` -M#/"?D8OPGXR.+G1X=`H#`D:)>%;RZ]D!2&5L;&\L('=O*3F.*3G>*3FBYT>'0*`P)& -MB7A6\NO9`1`%!``,\)^1B_"?C(XN='ATD-'.1V$"`QP`!`"@"`````"````U -M\)V2KO"=DX[PG9."\)V2M_"=D9SPG9.!\)V2OO"=DK@@\)V0O_"=DK[PG9.# -L\)V3@"YT>'0*`P(>7'J>!.S9`1`%`@`,\)^1B_"?C(XN='AT'7=640,%!``` +begin 0744 odt_uuencoding_file.dat +M4F%R(1H'`0#SX8+K"P$%!P`&`0&`@(``55;EE#`"`PNA``2A`"#)4'4K@``` +M%/"=DKOPG9*^\)V3@?"=D9(N='AT"@,"(VJ&"15PV@%4:&ES(&ES(&9R;VT@ +M\)V2N_"=DK[PG9.!\)V1DBYT>'1&7U-L5P(#)``$(2``````@```)/"=DKWP +MG9*V\)V3A_"=DKGPG9.!\)V2OO"=DX/PG9.`+G1X=`H#`B-JA@D5<-H!&`4$ +M`!3PG9*[\)V2OO"=DX'PG9&2+G1X=/Y8_-94`@,D``0`H`@`````@```(/"= +MDXCPG9..\)V3@O"=DX'PG9*^\)V3@_"=DX`N='AT"@,"B!_'Q[5QV@$8!0(` +M%/"=DKOPG9*^\)V3@?"=D9(N='ATJSU'Y"("`PL``0`0@```#/"=DKGPG9*^ +M\)V3APH#`E&KC3&M<=H!RO>)D%$"`QP``0"0"(```"GPG9*Y\)V2OO"=DX=? +M\)V3B/"=DX[PG9."\)V3@?"=DK[PG9.#\)V3@`H#`H@?Q\>U<=H!$`4"`0SP +3G9*Y\)V2OO"=DX<==U91`P4$```` ` end From bf768cc5fdf008f6fe7765a39b58a4e8193eedc9 Mon Sep 17 00:00:00 2001 From: Duncan Horn Date: Sun, 28 Apr 2024 23:41:52 -0700 Subject: [PATCH 18/20] Test to see if setting locale gets entry names to be non-null --- libarchive/test/test_read_format_rar5.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/libarchive/test/test_read_format_rar5.c b/libarchive/test/test_read_format_rar5.c index d9864ce9d7..67922cb71c 100644 --- a/libarchive/test/test_read_format_rar5.c +++ b/libarchive/test/test_read_format_rar5.c @@ -1377,6 +1377,14 @@ DEFINE_TEST(test_read_format_rar5_unicode) { PROLOGUE("test_read_format_rar5_unicode.rar"); +#if !defined(_WIN32) || defined(__CYGWIN__) + if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) { + skipping("en_US.UTF-8 locale not available on this system."); + EPILOGUE(); + return; + } +#endif + /* Script "file.txt" */ assertA(0 == archive_read_next_header(a, &ae)); assertEqualInt(AE_IFREG, archive_entry_filetype(ae)); From c25b463e32999cdc2f758c03ca0a201dceba62a9 Mon Sep 17 00:00:00 2001 From: Duncan Horn Date: Sun, 28 Apr 2024 23:46:59 -0700 Subject: [PATCH 19/20] Missing include --- libarchive/test/test_read_format_rar5.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libarchive/test/test_read_format_rar5.c b/libarchive/test/test_read_format_rar5.c index 67922cb71c..5e7e190c2c 100644 --- a/libarchive/test/test_read_format_rar5.c +++ b/libarchive/test/test_read_format_rar5.c @@ -24,6 +24,8 @@ */ #include "test.h" +#include + /* Some tests will want to calculate some CRC32's, and this header can * help. */ #define __LIBARCHIVE_BUILD From e630b07a07cf3cb47474e263ce0e2ec7eb4c121c Mon Sep 17 00:00:00 2001 From: Duncan Horn Date: Sun, 28 Apr 2024 23:53:15 -0700 Subject: [PATCH 20/20] Always forget the makefile... --- Makefile.am | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile.am b/Makefile.am index f02a23682c..b277e254d3 100644 --- a/Makefile.am +++ b/Makefile.am @@ -881,6 +881,7 @@ libarchive_test_EXTRA_DIST=\ libarchive/test/test_read_format_rar5_arm.rar.uu \ libarchive/test/test_read_format_rar5_blake2.rar.uu \ libarchive/test/test_read_format_rar5_compressed.rar.uu \ + libarchive/test/test_read_format_rar5_unicode.rar.uu \ libarchive/test/test_read_format_rar5_different_window_size.rar.uu \ libarchive/test/test_read_format_rar5_different_solid_window_size.rar.uu \ libarchive/test/test_read_format_rar5_distance_overflow.rar.uu \