From a578fb345a0221b000fb6c2857ae7d790febe94b Mon Sep 17 00:00:00 2001 From: Duncan Horn <40036384+dunhor@users.noreply.github.com> Date: Wed, 19 Jun 2024 20:15:13 -0700 Subject: [PATCH 1/5] Fix issue when skipping first file in 7zip archive that is a multiple of 65536 bytes (#2245) We noticed an issue where we had an archive that, if you skipped the first entry and tried to extract the second, you'd get a failure saying `Truncated 7-Zip file body`. Turns out that this is because the first file in the archive is a multiple of 65,536 bytes (the size of the uncompressed buffer) and therefore after `read_stream` skipped all of the first file, `uncompressed_buffer_bytes_remaining` was set to zero (because all data was consumed) and then it calls `get_uncompressed_data` with `minimum` set to zero. This then saw that `minimum > zip->uncompressed_buffer_bytes_remaining` evaluated to false, causing us to read zero bytes, which got interpreted as a truncated archive. The fix here is simple: we now always call `extract_pack_stream` when `uncompressed_buffer_bytes_remaining` is zero before exiting the skipping loop. --- Makefile.am | 1 + libarchive/archive_read_support_format_7zip.c | 6 ++- libarchive/test/test_read_format_7zip.c | 42 +++++++++++++++++++ ...test_read_format_7zip_extract_second.7z.uu | 11 +++++ 4 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 libarchive/test/test_read_format_7zip_extract_second.7z.uu diff --git a/Makefile.am b/Makefile.am index 7560b14fe7..532b367c23 100644 --- a/Makefile.am +++ b/Makefile.am @@ -789,6 +789,7 @@ libarchive_test_EXTRA_DIST=\ libarchive/test/test_read_format_7zip_encryption.7z.uu \ libarchive/test/test_read_format_7zip_encryption_header.7z.uu \ libarchive/test/test_read_format_7zip_encryption_partially.7z.uu \ + libarchive/test/test_read_format_7zip_extract_second.7z.uu \ libarchive/test/test_read_format_7zip_lzma1.7z.uu \ libarchive/test/test_read_format_7zip_lzma1_2.7z.uu \ libarchive/test/test_read_format_7zip_lzma1_lzma2.7z.uu \ diff --git a/libarchive/archive_read_support_format_7zip.c b/libarchive/archive_read_support_format_7zip.c index 634521d952..e322808e73 100644 --- a/libarchive/archive_read_support_format_7zip.c +++ b/libarchive/archive_read_support_format_7zip.c @@ -3462,7 +3462,7 @@ read_stream(struct archive_read *a, const void **buff, size_t size, /* * Skip the bytes we already has skipped in skip_stream(). */ - while (skip_bytes) { + while (1) { ssize_t skipped; if (zip->uncompressed_buffer_bytes_remaining == 0) { @@ -3482,6 +3482,10 @@ read_stream(struct archive_read *a, const void **buff, size_t size, return (ARCHIVE_FATAL); } } + + if (!skip_bytes) + break; + skipped = get_uncompressed_data( a, buff, (size_t)skip_bytes, 0); if (skipped < 0) diff --git a/libarchive/test/test_read_format_7zip.c b/libarchive/test/test_read_format_7zip.c index cff82f2c3d..bb47be6682 100644 --- a/libarchive/test/test_read_format_7zip.c +++ b/libarchive/test/test_read_format_7zip.c @@ -1257,5 +1257,47 @@ DEFINE_TEST(test_read_format_7zip_win_attrib) assertEqualString("system", archive_entry_fflags_text(ae)); + assertEqualInt(ARCHIVE_OK, archive_read_free(a)); +} + +DEFINE_TEST(test_read_format_7zip_extract_second) +{ + struct archive *a; + char buffer[256]; + + assert((a = archive_read_new()) != NULL); + + if (ARCHIVE_OK != archive_read_support_filter_lzma(a)) { + skipping( + "7zip:lzma decoding is not supported on this platform"); + assertEqualInt(ARCHIVE_OK, archive_read_free(a)); + return; + } + + assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); + + /* + * The test archive has two files: first.txt which is a 65,536 file (the + * size of the uncompressed buffer), and second.txt which has contents + * we will validate. This test ensures we can skip first.txt and still + * be able to read the contents of second.txt + */ + const char *refname = "test_read_format_7zip_extract_second.7z"; + extract_reference_file(refname); + + assertEqualIntA(a, ARCHIVE_OK, + archive_read_open_filename(a, refname, 10240)); + + struct archive_entry *ae; + + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assertEqualString("first.txt", archive_entry_pathname(ae)); + + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assertEqualString("second.txt", archive_entry_pathname(ae)); + + assertEqualInt(23, archive_read_data(a, buffer, sizeof(buffer))); + assertEqualMem("This is from second.txt", buffer, 23); + assertEqualInt(ARCHIVE_OK, archive_read_free(a)); } diff --git a/libarchive/test/test_read_format_7zip_extract_second.7z.uu b/libarchive/test/test_read_format_7zip_extract_second.7z.uu new file mode 100644 index 0000000000..aa3d6e2357 --- /dev/null +++ b/libarchive/test/test_read_format_7zip_extract_second.7z.uu @@ -0,0 +1,11 @@ +begin 644 test_read_format_7zip_extract_second.7z +M-WJ\KR<<``-N%=VX!@$````````B`````````*R\U.<`&`Q"DFIGO`[1,RO\ +MN,RA7-QU1L&_]O_/$0MMLIEBUR3'BDX@M2C-5'VG./-4,5@W3Q@*__^7_,[H +MEO`DB'[ZI>@H2_E>/W.2G$$.P01-X!YN5";SS[3#7Z4Q1G/EF.0'^D*[S8&8 +M[FV9DYX7,SA%^.Q\'?__P!@`````@3,'K@_4WV/Q0A7VLXG$X?GH4=5W^`UM +M$N_EX$)LE*?K$W5?WLP:X0T[Q%V^?A!0E\VZRBB,)(MO`C`LO[O!3(1YL)<: +MJ."`';WU;>GP5',%Z=6?*/H9*Z)&\*!2^,RV`R30UOBH8+5.;;2IKF +M0W://&'?"L?0L2!)`*]F30B0&/_'<4``%P9Z`0F`C``'"P$``2,#`0$%70`` +-@``,@*@*`6]FB2D````` +` +end From 5ec2b8446289dcaf328288804e19f9457ca4ef76 Mon Sep 17 00:00:00 2001 From: Steve Lhomme Date: Thu, 20 Jun 2024 22:57:15 +0200 Subject: [PATCH 2/5] Fix usage of GetVolumePathNameW in UWP before 20H1 (#2247) It started being allowed in UWP in 20H1. --- libarchive/archive_windows.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/libarchive/archive_windows.h b/libarchive/archive_windows.h index 6b7006a00a..14909da162 100644 --- a/libarchive/archive_windows.h +++ b/libarchive/archive_windows.h @@ -292,12 +292,17 @@ typedef int mbstate_t; size_t wcrtomb(char *, wchar_t, mbstate_t *); #endif -#if defined(_MSC_VER) && _MSC_VER < 1300 +#if !WINAPI_FAMILY_PARTITION (WINAPI_PARTITION_DESKTOP) && NTDDI_VERSION < NTDDI_WIN10_VB +// not supported in UWP SDK before 20H1 +#define GetVolumePathNameW(f, v, c) (0) +#elif defined(_MSC_VER) && _MSC_VER < 1300 WINBASEAPI BOOL WINAPI GetVolumePathNameW( LPCWSTR lpszFileName, LPWSTR lpszVolumePathName, DWORD cchBufferLength ); +#endif +#if defined(_MSC_VER) && _MSC_VER < 1300 # if _WIN32_WINNT < 0x0500 /* windows.h not providing 0x500 API */ typedef struct _FILE_ALLOCATED_RANGE_BUFFER { LARGE_INTEGER FileOffset; From 56e023631298f6b4988c5ca1c04a45857b886e91 Mon Sep 17 00:00:00 2001 From: Duncan Horn <40036384+dunhor@users.noreply.github.com> Date: Thu, 20 Jun 2024 14:01:47 -0700 Subject: [PATCH 3/5] Fix & optimize string conversion functions for Windows (#2226) All three parts of this change effectively stem from the same assumption: most of the code in `archive_string.c` assumes that MBS <-> UTF-8 string conversion can be done directly and efficiently. This is not quite true on Windows, where conversion looks more like MBS <-> WCS <-> UTF-8. This results in a few inefficiencies currently present in the code. First, if the caller is asking for either the MBS or UTF-8 string, but it's not currently set on the `archive_mstring`, then on Windows, it's more efficient to first check if the WCS is set and do the conversion with that. Otherwise, we'll end up doing a wasteful intermediate step of converting either the MBS or UTF-8 string to WCS, which we already have. Second, in the `archive_mstring_update_utf8` function, it's more efficient on Windows to first convert to WCS and use that result to convert to MBS, as opposed to the fallback I introduced in a previous change, which converts UTF-8 to MBS first and disposes of the intermediate WCS, only to re-calculate it. --- libarchive/archive_string.c | 84 +++++++++-- .../test/test_archive_string_conversion.c | 135 ++++++++++++++++++ 2 files changed, 208 insertions(+), 11 deletions(-) diff --git a/libarchive/archive_string.c b/libarchive/archive_string.c index be6c39600d..41bfe7af1d 100644 --- a/libarchive/archive_string.c +++ b/libarchive/archive_string.c @@ -3874,6 +3874,30 @@ archive_mstring_get_utf8(struct archive *a, struct archive_mstring *aes, } *p = NULL; +#if defined(_WIN32) && !defined(__CYGWIN__) + /* + * On Windows, first try converting from WCS because (1) there's no + * guarantee that the conversion to MBS will succeed, e.g. when using + * CP_ACP, and (2) that's more efficient than converting to MBS, just to + * convert back to WCS again before finally converting to UTF-8 + */ + if ((aes->aes_set & AES_SET_WCS) != 0) { + sc = archive_string_conversion_to_charset(a, "UTF-8", 1); + if (sc == NULL) + return (-1);/* Couldn't allocate memory for sc. */ + archive_string_empty(&(aes->aes_utf8)); + r = archive_string_append_from_wcs_in_codepage(&(aes->aes_utf8), + aes->aes_wcs.s, aes->aes_wcs.length, sc); + if (a == NULL) + free_sconv_object(sc); + if (r == 0) { + aes->aes_set |= AES_SET_UTF8; + *p = aes->aes_utf8.s; + return (0);/* success. */ + } else + return (-1);/* failure. */ + } +#endif /* Try converting WCS to MBS first if MBS does not exist yet. */ if ((aes->aes_set & AES_SET_MBS) == 0) { const char *pm; /* unused */ @@ -3958,6 +3982,32 @@ archive_mstring_get_wcs(struct archive *a, struct archive_mstring *aes, } *wp = NULL; +#if defined(_WIN32) && !defined(__CYGWIN__) + /* + * On Windows, prefer converting from UTF-8 directly to WCS because: + * (1) there's no guarantee that the string can be represented in MBS (e.g. + * with CP_ACP), and (2) in order to convert from UTF-8 to MBS, we're going + * to need to convert from UTF-8 to WCS anyway and its wasteful to throw + * away that intermediate result + */ + if (aes->aes_set & AES_SET_UTF8) { + struct archive_string_conv *sc; + + sc = archive_string_conversion_from_charset(a, "UTF-8", 1); + if (sc != NULL) { + archive_wstring_empty((&aes->aes_wcs)); + r = archive_wstring_append_from_mbs_in_codepage(&(aes->aes_wcs), + aes->aes_utf8.s, aes->aes_utf8.length, sc); + if (a == NULL) + free_sconv_object(sc); + if (r == 0) { + aes->aes_set |= AES_SET_WCS; + *wp = aes->aes_wcs.s; + return (0); + } + } + } +#endif /* Try converting UTF8 to MBS first if MBS does not exist yet. */ if ((aes->aes_set & AES_SET_MBS) == 0) { const char *p; /* unused */ @@ -4211,21 +4261,31 @@ archive_mstring_update_utf8(struct archive *a, struct archive_mstring *aes, aes->aes_set = AES_SET_UTF8; /* Only UTF8 is set now. */ - /* Try converting UTF-8 to MBS, return false on failure. */ sc = archive_string_conversion_from_charset(a, "UTF-8", 1); if (sc == NULL) return (-1);/* Couldn't allocate memory for sc. */ - r = archive_strcpy_l(&(aes->aes_mbs), utf8, sc); #if defined(_WIN32) && !defined(__CYGWIN__) - /* On failure, make an effort to convert UTF8 to WCS as the active code page - * may not be able to represent all characters in the string */ - if (r != 0) { - if (archive_wstring_append_from_mbs_in_codepage(&(aes->aes_wcs), - aes->aes_utf8.s, aes->aes_utf8.length, sc) == 0) - aes->aes_set = AES_SET_UTF8 | AES_SET_WCS; - } -#endif + /* On Windows, there's no good way to convert from UTF8 -> MBS directly, so + * prefer to first convert to WCS as (1) it's wasteful to throw away the + * intermediate result, and (2) WCS will still be set even if we fail to + * convert to MBS (e.g. with ACP that can't represent the characters) */ + r = archive_wstring_append_from_mbs_in_codepage(&(aes->aes_wcs), + aes->aes_utf8.s, aes->aes_utf8.length, sc); + + if (a == NULL) + free_sconv_object(sc); + if (r != 0) + return (-1); /* This will guarantee we can't convert to MBS */ + aes->aes_set = AES_SET_UTF8 | AES_SET_WCS; /* Both UTF8 and WCS set. */ + + /* Try converting WCS to MBS, return false on failure. */ + if (archive_string_append_from_wcs(&(aes->aes_mbs), aes->aes_wcs.s, + aes->aes_wcs.length)) + return (-1); +#else + /* Try converting UTF-8 to MBS, return false on failure. */ + r = archive_strcpy_l(&(aes->aes_mbs), utf8, sc); if (a == NULL) free_sconv_object(sc); @@ -4237,8 +4297,10 @@ archive_mstring_update_utf8(struct archive *a, struct archive_mstring *aes, if (archive_wstring_append_from_mbs(&(aes->aes_wcs), aes->aes_mbs.s, aes->aes_mbs.length)) return (-1); - aes->aes_set = AES_SET_UTF8 | AES_SET_WCS | AES_SET_MBS; +#endif /* All conversions succeeded. */ + aes->aes_set = AES_SET_UTF8 | AES_SET_WCS | AES_SET_MBS; + return (0); } diff --git a/libarchive/test/test_archive_string_conversion.c b/libarchive/test/test_archive_string_conversion.c index d8c75888a4..67e9b762aa 100644 --- a/libarchive/test/test_archive_string_conversion.c +++ b/libarchive/test/test_archive_string_conversion.c @@ -882,3 +882,138 @@ DEFINE_TEST(test_archive_string_conversion) test_archive_string_canonicalization(); test_archive_string_set_get(); } + +DEFINE_TEST(test_archive_string_conversion_utf16_utf8) +{ +#if !defined(_WIN32) || defined(__CYGWIN__) + skipping("This test is meant to verify unicode string handling on Windows"); +#else + struct archive_mstring mstr; + const char* utf8_string; + + memset(&mstr, 0, sizeof(mstr)); + + assertEqualInt(ARCHIVE_OK, + archive_mstring_copy_wcs(&mstr, L"\U0000043f\U00000440\U00000438")); + + /* Conversion from WCS to UTF-8 should always succeed */ + assertEqualInt(ARCHIVE_OK, + archive_mstring_get_utf8(NULL, &mstr, &utf8_string)); + assertEqualString("\xD0\xBF\xD1\x80\xD0\xB8", utf8_string); + + archive_mstring_clean(&mstr); +#endif +} + +DEFINE_TEST(test_archive_string_conversion_utf8_utf16) +{ +#if !defined(_WIN32) || defined(__CYGWIN__) + skipping("This test is meant to verify unicode string handling on Windows"); +#else + struct archive_mstring mstr; + const wchar_t* wcs_string; + + memset(&mstr, 0, sizeof(mstr)); + + assertEqualInt(6, + archive_mstring_copy_utf8(&mstr, "\xD0\xBF\xD1\x80\xD0\xB8")); + + /* Conversion from UTF-8 to WCS should always succeed */ + assertEqualInt(ARCHIVE_OK, + archive_mstring_get_wcs(NULL, &mstr, &wcs_string)); + assertEqualWString(L"\U0000043f\U00000440\U00000438", wcs_string); + + archive_mstring_clean(&mstr); +#endif +} + +DEFINE_TEST(test_archive_string_update_utf8_win) +{ +#if !defined(_WIN32) || defined(__CYGWIN__) + skipping("This test is meant to verify unicode string handling on Windows" + " with the C locale"); +#else + static const char utf8_string[] = "\xD0\xBF\xD1\x80\xD0\xB8"; + static const wchar_t wcs_string[] = L"\U0000043f\U00000440\U00000438"; + struct archive_mstring mstr; + int r; + + memset(&mstr, 0, sizeof(mstr)); + + r = archive_mstring_update_utf8(NULL, &mstr, utf8_string); + + /* On Windows, this should reliably fail with the C locale */ + assertEqualInt(-1, r); + assertEqualInt(0, mstr.aes_set & AES_SET_MBS); + + /* NOTE: We access the internals to validate that they were set by the + * 'archive_mstring_update_utf8' function */ + /* UTF-8 should always be set */ + assertEqualInt(AES_SET_UTF8, mstr.aes_set & AES_SET_UTF8); + assertEqualString(utf8_string, mstr.aes_utf8.s); + /* WCS should always be set as well */ + assertEqualInt(AES_SET_WCS, mstr.aes_set & AES_SET_WCS); + assertEqualWString(wcs_string, mstr.aes_wcs.s); + + archive_mstring_clean(&mstr); +#endif +} + +DEFINE_TEST(test_archive_string_update_utf8_utf8) +{ + static const char utf8_string[] = "\xD0\xBF\xD1\x80\xD0\xB8"; + static const wchar_t wcs_string[] = L"\U0000043f\U00000440\U00000438"; + struct archive_mstring mstr; + int r; + + memset(&mstr, 0, sizeof(mstr)); + + if (setlocale(LC_ALL, "en_US.UTF-8") == NULL) { + skipping("UTF-8 not supported on this system."); + return; + } + + r = archive_mstring_update_utf8(NULL, &mstr, utf8_string); + + /* All conversions should have succeeded */ + assertEqualInt(0, r); + assertEqualInt(AES_SET_MBS | AES_SET_WCS | AES_SET_UTF8, mstr.aes_set); + assertEqualString(utf8_string, mstr.aes_utf8.s); + assertEqualString(utf8_string, mstr.aes_mbs.s); + assertEqualWString(wcs_string, mstr.aes_wcs.s); + + archive_mstring_clean(&mstr); +} + +DEFINE_TEST(test_archive_string_update_utf8_koi8) +{ + static const char utf8_string[] = "\xD0\xBF\xD1\x80\xD0\xB8"; + static const char koi8_string[] = "\xD0\xD2\xC9"; + static const wchar_t wcs_string[] = L"\U0000043f\U00000440\U00000438"; + struct archive_mstring mstr; + int r; + + memset(&mstr, 0, sizeof(mstr)); + + if (setlocale(LC_ALL, "ru_RU.KOI8-R") == NULL) { + skipping("KOI8-R locale not available on this system."); + return; + } + + r = archive_mstring_update_utf8(NULL, &mstr, utf8_string); + + /* All conversions should have succeeded */ + assertEqualInt(0, r); + assertEqualInt(AES_SET_MBS | AES_SET_WCS | AES_SET_UTF8, mstr.aes_set); + assertEqualString(utf8_string, mstr.aes_utf8.s); + assertEqualString(koi8_string, mstr.aes_mbs.s); +#if defined(_WIN32) && !defined(__CYGWIN__) + assertEqualWString(wcs_string, mstr.aes_wcs.s); +#else + /* No guarantee of how WCS strings behave, however this test test is + * primarily meant for Windows */ + (void)wcs_string; +#endif + + archive_mstring_clean(&mstr); +} From 07206cd172c73cbe3b6b3d64e00f427fa0befa54 Mon Sep 17 00:00:00 2001 From: Duncan Horn <40036384+dunhor@users.noreply.github.com> Date: Thu, 20 Jun 2024 14:03:54 -0700 Subject: [PATCH 4/5] Fix gnutar creation with unicode hardlink names on Windows (#2227) The code currently uses `archive_entry_hardlink` to determine if an entry is a hardlink, however on Windows, this call will fail if the path cannot be represented in the current locale. This instead checks to see if any entry in the `archive_mstring` is set. --- libarchive/archive_entry.c | 6 ++ libarchive/archive_entry.h | 1 + libarchive/archive_write_set_format_gnutar.c | 2 +- .../test/test_gnutar_filename_encoding.c | 102 ++++++++++++++++++ 4 files changed, 110 insertions(+), 1 deletion(-) diff --git a/libarchive/archive_entry.c b/libarchive/archive_entry.c index 178f7f6283..ef322341a9 100644 --- a/libarchive/archive_entry.c +++ b/libarchive/archive_entry.c @@ -526,6 +526,12 @@ archive_entry_hardlink_w(struct archive_entry *entry) return (NULL); } +int +archive_entry_hardlink_is_set(struct archive_entry *entry) +{ + return (entry->ae_set & AE_SET_HARDLINK) != 0; +} + int _archive_entry_hardlink_l(struct archive_entry *entry, const char **p, size_t *len, struct archive_string_conv *sc) diff --git a/libarchive/archive_entry.h b/libarchive/archive_entry.h index b51f34e42b..3a0afffb08 100644 --- a/libarchive/archive_entry.h +++ b/libarchive/archive_entry.h @@ -263,6 +263,7 @@ __LA_DECL void archive_entry_set_link_to_hardlink(struct archive_entry *); __LA_DECL const char *archive_entry_hardlink(struct archive_entry *); __LA_DECL const char *archive_entry_hardlink_utf8(struct archive_entry *); __LA_DECL const wchar_t *archive_entry_hardlink_w(struct archive_entry *); +__LA_DECL int archive_entry_hardlink_is_set(struct archive_entry *); __LA_DECL la_int64_t archive_entry_ino(struct archive_entry *); __LA_DECL la_int64_t archive_entry_ino64(struct archive_entry *); __LA_DECL int archive_entry_ino_is_set(struct archive_entry *); diff --git a/libarchive/archive_write_set_format_gnutar.c b/libarchive/archive_write_set_format_gnutar.c index a88350b874..a3a49c573c 100644 --- a/libarchive/archive_write_set_format_gnutar.c +++ b/libarchive/archive_write_set_format_gnutar.c @@ -523,7 +523,7 @@ archive_write_gnutar_header(struct archive_write *a, goto exit_write_header; } - if (archive_entry_hardlink(entry) != NULL) { + if (archive_entry_hardlink_is_set(entry)) { tartype = '1'; } else switch (archive_entry_filetype(entry)) { diff --git a/libarchive/test/test_gnutar_filename_encoding.c b/libarchive/test/test_gnutar_filename_encoding.c index f473ddfb4f..476ec2149f 100644 --- a/libarchive/test/test_gnutar_filename_encoding.c +++ b/libarchive/test/test_gnutar_filename_encoding.c @@ -389,3 +389,105 @@ DEFINE_TEST(test_gnutar_filename_encoding_CP932_UTF8) assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7); } +DEFINE_TEST(test_gnutar_filename_encoding_UTF16_win) +{ +#if !defined(_WIN32) || defined(__CYGWIN__) + skipping("This test is meant to verify unicode string handling" + " on Windows with UTF-16 names"); + return; +#else + struct archive *a; + struct archive_entry *entry; + char buff[4096]; + size_t used; + + /* + * Don't call setlocale because we're verifying that the '_w' functions + * work as expected when 'hdrcharset' is UTF-8 + */ + + /* Part 1: file */ + a = archive_write_new(); + assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); + if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { + skipping("This system cannot convert character-set" + " from UTF-16 to UTF-8."); + archive_write_free(a); + return; + } + assertEqualInt(ARCHIVE_OK, + archive_write_open_memory(a, buff, sizeof(buff), &used)); + + entry = archive_entry_new2(a); + /* Set the filename using a UTF-16 string */ + archive_entry_copy_pathname_w(entry, L"\u8868.txt"); + archive_entry_set_filetype(entry, AE_IFREG); + archive_entry_set_size(entry, 0); + assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); + archive_entry_free(entry); + assertEqualInt(ARCHIVE_OK, archive_write_free(a)); + + /* Check UTF-8 version. */ + assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7); + + /* Part 2: directory */ + a = archive_write_new(); + assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); + assertEqualInt(ARCHIVE_OK, archive_write_set_options(a, "hdrcharset=UTF-8")); + assertEqualInt(ARCHIVE_OK, + archive_write_open_memory(a, buff, sizeof(buff), &used)); + + entry = archive_entry_new2(a); + /* Set the directory name using a UTF-16 string */ + /* NOTE: Explicitly not adding trailing slash to test that code path */ + archive_entry_copy_pathname_w(entry, L"\u8868"); + archive_entry_set_filetype(entry, AE_IFDIR); + archive_entry_set_size(entry, 0); + assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); + archive_entry_free(entry); + assertEqualInt(ARCHIVE_OK, archive_write_free(a)); + + /* Check UTF-8 version. */ + assertEqualMem(buff, "\xE8\xA1\xA8/", 4); + + /* Part 3: symlink */ + a = archive_write_new(); + assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); + assertEqualInt(ARCHIVE_OK, archive_write_set_options(a, "hdrcharset=UTF-8")); + assertEqualInt(ARCHIVE_OK, + archive_write_open_memory(a, buff, sizeof(buff), &used)); + + entry = archive_entry_new2(a); + /* Set the symlink target using a UTF-16 string */ + archive_entry_set_pathname(entry, "link.txt"); + archive_entry_copy_symlink_w(entry, L"\u8868.txt"); + archive_entry_set_filetype(entry, AE_IFLNK); + archive_entry_set_symlink_type(entry, AE_SYMLINK_TYPE_FILE); + archive_entry_set_size(entry, 0); + assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); + archive_entry_free(entry); + assertEqualInt(ARCHIVE_OK, archive_write_free(a)); + + /* Check UTF-8 version. */ + assertEqualMem(buff + 157, "\xE8\xA1\xA8.txt", 7); + + /* Part 4: hardlink */ + a = archive_write_new(); + assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); + assertEqualInt(ARCHIVE_OK, archive_write_set_options(a, "hdrcharset=UTF-8")); + assertEqualInt(ARCHIVE_OK, + archive_write_open_memory(a, buff, sizeof(buff), &used)); + + entry = archive_entry_new2(a); + /* Set the symlink target using a UTF-16 string */ + archive_entry_set_pathname(entry, "link.txt"); + archive_entry_copy_hardlink_w(entry, L"\u8868.txt"); + archive_entry_set_size(entry, 0); + assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); + archive_entry_free(entry); + assertEqualInt(ARCHIVE_OK, archive_write_free(a)); + + /* Check UTF-8 version. */ + assertEqualMem(buff + 157, "\xE8\xA1\xA8.txt", 7); +#endif +} From 898dc8319355b7e985f68a9819f182aaed61b53a Mon Sep 17 00:00:00 2001 From: Fatima Qarni Date: Sat, 22 Jun 2024 17:49:53 -0500 Subject: [PATCH 5/5] Checks for null references (#2251) Microsoft's static analysis tool found some vulnerabilities from unguarded null references that I changed in [microsoft/cmake](https://github.com/microsoft/cmake). Pushing these changes upstream so they can be added to [kitware/cmake](https://github.com/Kitware/CMake). --- libarchive/archive_read_support_format_7zip.c | 2 +- libarchive/archive_write_set_format_cpio_binary.c | 3 +++ libarchive/archive_write_set_format_cpio_odc.c | 3 +++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/libarchive/archive_read_support_format_7zip.c b/libarchive/archive_read_support_format_7zip.c index e322808e73..b1e0c91ad1 100644 --- a/libarchive/archive_read_support_format_7zip.c +++ b/libarchive/archive_read_support_format_7zip.c @@ -1063,7 +1063,7 @@ ppmd_read(void *p) ssize_t bytes_avail = 0; const uint8_t* data = __archive_read_ahead(a, (size_t)zip->ppstream.stream_in+1, &bytes_avail); - if(bytes_avail < zip->ppstream.stream_in+1) { + if(data == NULL || bytes_avail < zip->ppstream.stream_in+1) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated 7z file data"); diff --git a/libarchive/archive_write_set_format_cpio_binary.c b/libarchive/archive_write_set_format_cpio_binary.c index 7a010ee00f..a22d06ea38 100644 --- a/libarchive/archive_write_set_format_cpio_binary.c +++ b/libarchive/archive_write_set_format_cpio_binary.c @@ -577,6 +577,9 @@ archive_write_binary_close(struct archive_write *a) struct archive_entry *trailer; trailer = archive_entry_new2(NULL); + if (trailer == NULL) { + return ARCHIVE_FATAL; + } /* nlink = 1 here for GNU cpio compat. */ archive_entry_set_nlink(trailer, 1); archive_entry_set_size(trailer, 0); diff --git a/libarchive/archive_write_set_format_cpio_odc.c b/libarchive/archive_write_set_format_cpio_odc.c index 426f779a2b..6dce78b454 100644 --- a/libarchive/archive_write_set_format_cpio_odc.c +++ b/libarchive/archive_write_set_format_cpio_odc.c @@ -467,6 +467,9 @@ archive_write_odc_close(struct archive_write *a) struct archive_entry *trailer; trailer = archive_entry_new2(NULL); + if (trailer == NULL) { + return ARCHIVE_FATAL; + } /* nlink = 1 here for GNU cpio compat. */ archive_entry_set_nlink(trailer, 1); archive_entry_set_size(trailer, 0);