From a578fb345a0221b000fb6c2857ae7d790febe94b Mon Sep 17 00:00:00 2001
From: Duncan Horn <40036384+dunhor@users.noreply.github.com>
Date: Wed, 19 Jun 2024 20:15:13 -0700
Subject: [PATCH 1/5] Fix issue when skipping first file in 7zip archive that
 is a multiple of 65536 bytes (#2245)

We noticed an issue where we had an archive that, if you skipped the
first entry and tried to extract the second, you'd get a failure saying
`Truncated 7-Zip file body`. Turns out that this is because the first
file in the archive is a multiple of 65,536 bytes (the size of the
uncompressed buffer) and therefore after `read_stream` skipped all of
the first file, `uncompressed_buffer_bytes_remaining` was set to zero
(because all data was consumed) and then it calls
`get_uncompressed_data` with `minimum` set to zero. This then saw that
`minimum > zip->uncompressed_buffer_bytes_remaining` evaluated to false,
causing us to read zero bytes, which got interpreted as a truncated
archive.

The fix here is simple: we now always call `extract_pack_stream` when
`uncompressed_buffer_bytes_remaining` is zero before exiting the
skipping loop.
---
 Makefile.am                                   |  1 +
 libarchive/archive_read_support_format_7zip.c |  6 ++-
 libarchive/test/test_read_format_7zip.c       | 42 +++++++++++++++++++
 ...test_read_format_7zip_extract_second.7z.uu | 11 +++++
 4 files changed, 59 insertions(+), 1 deletion(-)
 create mode 100644 libarchive/test/test_read_format_7zip_extract_second.7z.uu

diff --git a/Makefile.am b/Makefile.am
index 7560b14fe7..532b367c23 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -789,6 +789,7 @@ libarchive_test_EXTRA_DIST=\
 	libarchive/test/test_read_format_7zip_encryption.7z.uu \
 	libarchive/test/test_read_format_7zip_encryption_header.7z.uu \
 	libarchive/test/test_read_format_7zip_encryption_partially.7z.uu \
+	libarchive/test/test_read_format_7zip_extract_second.7z.uu \
 	libarchive/test/test_read_format_7zip_lzma1.7z.uu \
 	libarchive/test/test_read_format_7zip_lzma1_2.7z.uu \
 	libarchive/test/test_read_format_7zip_lzma1_lzma2.7z.uu \
diff --git a/libarchive/archive_read_support_format_7zip.c b/libarchive/archive_read_support_format_7zip.c
index 634521d952..e322808e73 100644
--- a/libarchive/archive_read_support_format_7zip.c
+++ b/libarchive/archive_read_support_format_7zip.c
@@ -3462,7 +3462,7 @@ read_stream(struct archive_read *a, const void **buff, size_t size,
 	/*
 	 * Skip the bytes we already has skipped in skip_stream().
 	 */
-	while (skip_bytes) {
+	while (1) {
 		ssize_t skipped;
 
 		if (zip->uncompressed_buffer_bytes_remaining == 0) {
@@ -3482,6 +3482,10 @@ read_stream(struct archive_read *a, const void **buff, size_t size,
 				return (ARCHIVE_FATAL);
 			}
 		}
+
+		if (!skip_bytes)
+			break;
+
 		skipped = get_uncompressed_data(
 			a, buff, (size_t)skip_bytes, 0);
 		if (skipped < 0)
diff --git a/libarchive/test/test_read_format_7zip.c b/libarchive/test/test_read_format_7zip.c
index cff82f2c3d..bb47be6682 100644
--- a/libarchive/test/test_read_format_7zip.c
+++ b/libarchive/test/test_read_format_7zip.c
@@ -1257,5 +1257,47 @@ DEFINE_TEST(test_read_format_7zip_win_attrib)
 	assertEqualString("system", archive_entry_fflags_text(ae));
 
 
+	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
+}
+
+DEFINE_TEST(test_read_format_7zip_extract_second)
+{
+	struct archive *a;
+	char buffer[256];
+
+	assert((a = archive_read_new()) != NULL);
+
+	if (ARCHIVE_OK != archive_read_support_filter_lzma(a)) {
+		skipping(
+		    "7zip:lzma decoding is not supported on this platform");
+		assertEqualInt(ARCHIVE_OK, archive_read_free(a));
+		return;
+	}
+
+	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
+
+	/*
+	 * The test archive has two files: first.txt which is a 65,536 file (the
+	 * size of the uncompressed buffer), and second.txt which has contents
+	 * we will validate. This test ensures we can skip first.txt and still
+	 * be able to read the contents of second.txt
+	 */
+	const char *refname = "test_read_format_7zip_extract_second.7z";
+	extract_reference_file(refname);
+
+	assertEqualIntA(a, ARCHIVE_OK,
+		archive_read_open_filename(a, refname, 10240));
+
+	struct archive_entry *ae;
+
+	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
+	assertEqualString("first.txt", archive_entry_pathname(ae));
+
+	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
+	assertEqualString("second.txt", archive_entry_pathname(ae));
+
+	assertEqualInt(23, archive_read_data(a, buffer, sizeof(buffer)));
+	assertEqualMem("This is from second.txt", buffer, 23);
+
 	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
 }
diff --git a/libarchive/test/test_read_format_7zip_extract_second.7z.uu b/libarchive/test/test_read_format_7zip_extract_second.7z.uu
new file mode 100644
index 0000000000..aa3d6e2357
--- /dev/null
+++ b/libarchive/test/test_read_format_7zip_extract_second.7z.uu
@@ -0,0 +1,11 @@
+begin 644 test_read_format_7zip_extract_second.7z
+M-WJ\KR<<``-N%=VX!@$````````B`````````*R\U.<`&`Q"DFIGO`[1,RO\
+MN,RA7-QU1L&_]O_/$0MMLIEBUR3'BDX@M2C-5'VG./-4,5@W3Q@*__^7_,[H
+MEO`DB'[ZI>@H2_E>/W.2G$$.P01-X!YN5";SS[3#7Z4Q1G/EF.0'^D*[S8&8
+M[FV9DYX7,SA%^.Q\'?__P!@`````@3,'K@_4WV/Q0A7VLXG$X?GH4=5W^`UM
+M$N_EX$)LE*?K$W5?WLP:X0T[Q%V^?A!0E\VZRBB,)(MO`C`LO[O!3(1YL)<:
+MJ."`';WU;>GP5',%Z=6?*/H9*Z)&\*!2^<F\P&>,RV`R30UOBH8+5.;;2IKF
+M0W://&'?"L?0L2!)`*]F30B0&/_'<4``%P9Z`0F`C``'"P$``2,#`0$%70``
+-@``,@*@*`6]FB2D`````
+`
+end

From 5ec2b8446289dcaf328288804e19f9457ca4ef76 Mon Sep 17 00:00:00 2001
From: Steve Lhomme <robux4@ycbcr.xyz>
Date: Thu, 20 Jun 2024 22:57:15 +0200
Subject: [PATCH 2/5] Fix usage of GetVolumePathNameW in UWP before 20H1
 (#2247)

It started being allowed in UWP in 20H1.
---
 libarchive/archive_windows.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/libarchive/archive_windows.h b/libarchive/archive_windows.h
index 6b7006a00a..14909da162 100644
--- a/libarchive/archive_windows.h
+++ b/libarchive/archive_windows.h
@@ -292,12 +292,17 @@ typedef int mbstate_t;
 size_t wcrtomb(char *, wchar_t, mbstate_t *);
 #endif
 
-#if defined(_MSC_VER) && _MSC_VER < 1300
+#if !WINAPI_FAMILY_PARTITION (WINAPI_PARTITION_DESKTOP) && NTDDI_VERSION < NTDDI_WIN10_VB
+// not supported in UWP SDK before 20H1
+#define GetVolumePathNameW(f, v, c)   (0)
+#elif defined(_MSC_VER) && _MSC_VER < 1300
 WINBASEAPI BOOL WINAPI GetVolumePathNameW(
        LPCWSTR lpszFileName,
        LPWSTR lpszVolumePathName,
        DWORD cchBufferLength
        );
+#endif
+#if defined(_MSC_VER) && _MSC_VER < 1300
 # if _WIN32_WINNT < 0x0500 /* windows.h not providing 0x500 API */
 typedef struct _FILE_ALLOCATED_RANGE_BUFFER {
        LARGE_INTEGER FileOffset;

From 56e023631298f6b4988c5ca1c04a45857b886e91 Mon Sep 17 00:00:00 2001
From: Duncan Horn <40036384+dunhor@users.noreply.github.com>
Date: Thu, 20 Jun 2024 14:01:47 -0700
Subject: [PATCH 3/5] Fix & optimize string conversion functions for Windows
 (#2226)

All three parts of this change effectively stem from the same
assumption: most of the code in `archive_string.c` assumes that MBS <->
UTF-8 string conversion can be done directly and efficiently. This is
not quite true on Windows, where conversion looks more like MBS <-> WCS
<-> UTF-8. This results in a few inefficiencies currently present in the
code.

First, if the caller is asking for either the MBS or UTF-8 string, but
it's not currently set on the `archive_mstring`, then on Windows, it's
more efficient to first check if the WCS is set and do the conversion
with that. Otherwise, we'll end up doing a wasteful intermediate step of
converting either the MBS or UTF-8 string to WCS, which we already have.

Second, in the `archive_mstring_update_utf8` function, it's more
efficient on Windows to first convert to WCS and use that result to
convert to MBS, as opposed to the fallback I introduced in a previous
change, which converts UTF-8 to MBS first and disposes of the
intermediate WCS, only to re-calculate it.
---
 libarchive/archive_string.c                   |  84 +++++++++--
 .../test/test_archive_string_conversion.c     | 135 ++++++++++++++++++
 2 files changed, 208 insertions(+), 11 deletions(-)

diff --git a/libarchive/archive_string.c b/libarchive/archive_string.c
index be6c39600d..41bfe7af1d 100644
--- a/libarchive/archive_string.c
+++ b/libarchive/archive_string.c
@@ -3874,6 +3874,30 @@ archive_mstring_get_utf8(struct archive *a, struct archive_mstring *aes,
 	}
 
 	*p = NULL;
+#if defined(_WIN32) && !defined(__CYGWIN__)
+	/*
+	 * On Windows, first try converting from WCS because (1) there's no
+	 * guarantee that the conversion to MBS will succeed, e.g. when using
+	 * CP_ACP, and (2) that's more efficient than converting to MBS, just to
+	 * convert back to WCS again before finally converting to UTF-8
+	 */
+	if ((aes->aes_set & AES_SET_WCS) != 0) {
+		sc = archive_string_conversion_to_charset(a, "UTF-8", 1);
+		if (sc == NULL)
+			return (-1);/* Couldn't allocate memory for sc. */
+		archive_string_empty(&(aes->aes_utf8));
+		r = archive_string_append_from_wcs_in_codepage(&(aes->aes_utf8),
+			aes->aes_wcs.s, aes->aes_wcs.length, sc);
+		if (a == NULL)
+			free_sconv_object(sc);
+		if (r == 0) {
+			aes->aes_set |= AES_SET_UTF8;
+			*p = aes->aes_utf8.s;
+			return (0);/* success. */
+		} else
+			return (-1);/* failure. */
+	}
+#endif
 	/* Try converting WCS to MBS first if MBS does not exist yet. */
 	if ((aes->aes_set & AES_SET_MBS) == 0) {
 		const char *pm; /* unused */
@@ -3958,6 +3982,32 @@ archive_mstring_get_wcs(struct archive *a, struct archive_mstring *aes,
 	}
 
 	*wp = NULL;
+#if defined(_WIN32) && !defined(__CYGWIN__)
+	/*
+	 * On Windows, prefer converting from UTF-8 directly to WCS because:
+	 * (1) there's no guarantee that the string can be represented in MBS (e.g.
+	 * with CP_ACP), and (2) in order to convert from UTF-8 to MBS, we're going
+	 * to need to convert from UTF-8 to WCS anyway and its wasteful to throw
+	 * away that intermediate result
+	 */
+	if (aes->aes_set & AES_SET_UTF8) {
+		struct archive_string_conv *sc;
+
+		sc = archive_string_conversion_from_charset(a, "UTF-8", 1);
+		if (sc != NULL) {
+			archive_wstring_empty((&aes->aes_wcs));
+			r = archive_wstring_append_from_mbs_in_codepage(&(aes->aes_wcs),
+			    aes->aes_utf8.s, aes->aes_utf8.length, sc);
+			if (a == NULL)
+				free_sconv_object(sc);
+			if (r == 0) {
+				aes->aes_set |= AES_SET_WCS;
+				*wp = aes->aes_wcs.s;
+				return (0);
+			}
+		}
+	}
+#endif
 	/* Try converting UTF8 to MBS first if MBS does not exist yet. */
 	if ((aes->aes_set & AES_SET_MBS) == 0) {
 		const char *p; /* unused */
@@ -4211,21 +4261,31 @@ archive_mstring_update_utf8(struct archive *a, struct archive_mstring *aes,
 
 	aes->aes_set = AES_SET_UTF8;	/* Only UTF8 is set now. */
 
-	/* Try converting UTF-8 to MBS, return false on failure. */
 	sc = archive_string_conversion_from_charset(a, "UTF-8", 1);
 	if (sc == NULL)
 		return (-1);/* Couldn't allocate memory for sc. */
-	r = archive_strcpy_l(&(aes->aes_mbs), utf8, sc);
 
 #if defined(_WIN32) && !defined(__CYGWIN__)
-	/* On failure, make an effort to convert UTF8 to WCS as the active code page
-	 * may not be able to represent all characters in the string */
-	if (r != 0) {
-		if (archive_wstring_append_from_mbs_in_codepage(&(aes->aes_wcs),
-			aes->aes_utf8.s, aes->aes_utf8.length, sc) == 0)
-			aes->aes_set = AES_SET_UTF8 | AES_SET_WCS;
-	}
-#endif
+	/* On Windows, there's no good way to convert from UTF8 -> MBS directly, so
+	 * prefer to first convert to WCS as (1) it's wasteful to throw away the
+	 * intermediate result, and (2) WCS will still be set even if we fail to
+	 * convert to MBS (e.g. with ACP that can't represent the characters) */
+	r = archive_wstring_append_from_mbs_in_codepage(&(aes->aes_wcs),
+		aes->aes_utf8.s, aes->aes_utf8.length, sc);
+
+	if (a == NULL)
+		free_sconv_object(sc);
+	if (r != 0)
+		return (-1); /* This will guarantee we can't convert to MBS */
+	aes->aes_set = AES_SET_UTF8 | AES_SET_WCS; /* Both UTF8 and WCS set. */
+
+	/* Try converting WCS to MBS, return false on failure. */
+	if (archive_string_append_from_wcs(&(aes->aes_mbs), aes->aes_wcs.s,
+	    aes->aes_wcs.length))
+		return (-1);
+#else
+	/* Try converting UTF-8 to MBS, return false on failure. */
+	r = archive_strcpy_l(&(aes->aes_mbs), utf8, sc);
 
 	if (a == NULL)
 		free_sconv_object(sc);
@@ -4237,8 +4297,10 @@ archive_mstring_update_utf8(struct archive *a, struct archive_mstring *aes,
 	if (archive_wstring_append_from_mbs(&(aes->aes_wcs), aes->aes_mbs.s,
 	    aes->aes_mbs.length))
 		return (-1);
-	aes->aes_set = AES_SET_UTF8 | AES_SET_WCS | AES_SET_MBS;
+#endif
 
 	/* All conversions succeeded. */
+	aes->aes_set = AES_SET_UTF8 | AES_SET_WCS | AES_SET_MBS;
+
 	return (0);
 }
diff --git a/libarchive/test/test_archive_string_conversion.c b/libarchive/test/test_archive_string_conversion.c
index d8c75888a4..67e9b762aa 100644
--- a/libarchive/test/test_archive_string_conversion.c
+++ b/libarchive/test/test_archive_string_conversion.c
@@ -882,3 +882,138 @@ DEFINE_TEST(test_archive_string_conversion)
 	test_archive_string_canonicalization();
 	test_archive_string_set_get();
 }
+
+DEFINE_TEST(test_archive_string_conversion_utf16_utf8)
+{
+#if !defined(_WIN32) || defined(__CYGWIN__)
+	skipping("This test is meant to verify unicode string handling on Windows");
+#else
+	struct archive_mstring mstr;
+	const char* utf8_string;
+
+	memset(&mstr, 0, sizeof(mstr));
+
+	assertEqualInt(ARCHIVE_OK,
+	    archive_mstring_copy_wcs(&mstr, L"\U0000043f\U00000440\U00000438"));
+
+	/* Conversion from WCS to UTF-8 should always succeed */
+	assertEqualInt(ARCHIVE_OK,
+	    archive_mstring_get_utf8(NULL, &mstr, &utf8_string));
+	assertEqualString("\xD0\xBF\xD1\x80\xD0\xB8", utf8_string);
+
+	archive_mstring_clean(&mstr);
+#endif
+}
+
+DEFINE_TEST(test_archive_string_conversion_utf8_utf16)
+{
+#if !defined(_WIN32) || defined(__CYGWIN__)
+	skipping("This test is meant to verify unicode string handling on Windows");
+#else
+	struct archive_mstring mstr;
+	const wchar_t* wcs_string;
+
+	memset(&mstr, 0, sizeof(mstr));
+
+	assertEqualInt(6,
+	    archive_mstring_copy_utf8(&mstr, "\xD0\xBF\xD1\x80\xD0\xB8"));
+
+	/* Conversion from UTF-8 to WCS should always succeed */
+	assertEqualInt(ARCHIVE_OK,
+	    archive_mstring_get_wcs(NULL, &mstr, &wcs_string));
+	assertEqualWString(L"\U0000043f\U00000440\U00000438", wcs_string);
+
+	archive_mstring_clean(&mstr);
+#endif
+}
+
+DEFINE_TEST(test_archive_string_update_utf8_win)
+{
+#if !defined(_WIN32) || defined(__CYGWIN__)
+	skipping("This test is meant to verify unicode string handling on Windows"
+	    " with the C locale");
+#else
+	static const char utf8_string[] = "\xD0\xBF\xD1\x80\xD0\xB8";
+	static const wchar_t wcs_string[] = L"\U0000043f\U00000440\U00000438";
+	struct archive_mstring mstr;
+	int r;
+
+	memset(&mstr, 0, sizeof(mstr));
+
+	r = archive_mstring_update_utf8(NULL, &mstr, utf8_string);
+
+	/* On Windows, this should reliably fail with the C locale */
+	assertEqualInt(-1, r);
+	assertEqualInt(0, mstr.aes_set & AES_SET_MBS);
+
+	/* NOTE: We access the internals to validate that they were set by the
+	 *       'archive_mstring_update_utf8' function */
+	/* UTF-8 should always be set */
+	assertEqualInt(AES_SET_UTF8, mstr.aes_set & AES_SET_UTF8);
+	assertEqualString(utf8_string, mstr.aes_utf8.s);
+	/* WCS should always be set as well */
+	assertEqualInt(AES_SET_WCS, mstr.aes_set & AES_SET_WCS);
+	assertEqualWString(wcs_string, mstr.aes_wcs.s);
+
+	archive_mstring_clean(&mstr);
+#endif
+}
+
+DEFINE_TEST(test_archive_string_update_utf8_utf8)
+{
+	static const char utf8_string[] = "\xD0\xBF\xD1\x80\xD0\xB8";
+	static const wchar_t wcs_string[] = L"\U0000043f\U00000440\U00000438";
+	struct archive_mstring mstr;
+	int r;
+
+	memset(&mstr, 0, sizeof(mstr));
+
+	if (setlocale(LC_ALL, "en_US.UTF-8") == NULL) {
+		skipping("UTF-8 not supported on this system.");
+		return;
+	}
+
+	r = archive_mstring_update_utf8(NULL, &mstr, utf8_string);
+
+	/* All conversions should have succeeded */
+	assertEqualInt(0, r);
+	assertEqualInt(AES_SET_MBS | AES_SET_WCS | AES_SET_UTF8, mstr.aes_set);
+	assertEqualString(utf8_string, mstr.aes_utf8.s);
+	assertEqualString(utf8_string, mstr.aes_mbs.s);
+	assertEqualWString(wcs_string, mstr.aes_wcs.s);
+
+	archive_mstring_clean(&mstr);
+}
+
+DEFINE_TEST(test_archive_string_update_utf8_koi8)
+{
+	static const char utf8_string[] = "\xD0\xBF\xD1\x80\xD0\xB8";
+	static const char koi8_string[] = "\xD0\xD2\xC9";
+	static const wchar_t wcs_string[] = L"\U0000043f\U00000440\U00000438";
+	struct archive_mstring mstr;
+	int r;
+
+	memset(&mstr, 0, sizeof(mstr));
+
+	if (setlocale(LC_ALL, "ru_RU.KOI8-R") == NULL) {
+		skipping("KOI8-R locale not available on this system.");
+		return;
+	}
+
+	r = archive_mstring_update_utf8(NULL, &mstr, utf8_string);
+
+	/* All conversions should have succeeded */
+	assertEqualInt(0, r);
+	assertEqualInt(AES_SET_MBS | AES_SET_WCS | AES_SET_UTF8, mstr.aes_set);
+	assertEqualString(utf8_string, mstr.aes_utf8.s);
+	assertEqualString(koi8_string, mstr.aes_mbs.s);
+#if defined(_WIN32) && !defined(__CYGWIN__)
+	assertEqualWString(wcs_string, mstr.aes_wcs.s);
+#else
+	/* No guarantee of how WCS strings behave, however this test test is
+	 * primarily meant for Windows */
+	(void)wcs_string;
+#endif
+
+	archive_mstring_clean(&mstr);
+}

From 07206cd172c73cbe3b6b3d64e00f427fa0befa54 Mon Sep 17 00:00:00 2001
From: Duncan Horn <40036384+dunhor@users.noreply.github.com>
Date: Thu, 20 Jun 2024 14:03:54 -0700
Subject: [PATCH 4/5] Fix gnutar creation with unicode hardlink names on
 Windows (#2227)

The code currently uses `archive_entry_hardlink` to determine if an
entry is a hardlink, however on Windows, this call will fail if the path
cannot be represented in the current locale. This instead checks to see
if any entry in the `archive_mstring` is set.
---
 libarchive/archive_entry.c                    |   6 ++
 libarchive/archive_entry.h                    |   1 +
 libarchive/archive_write_set_format_gnutar.c  |   2 +-
 .../test/test_gnutar_filename_encoding.c      | 102 ++++++++++++++++++
 4 files changed, 110 insertions(+), 1 deletion(-)

diff --git a/libarchive/archive_entry.c b/libarchive/archive_entry.c
index 178f7f6283..ef322341a9 100644
--- a/libarchive/archive_entry.c
+++ b/libarchive/archive_entry.c
@@ -526,6 +526,12 @@ archive_entry_hardlink_w(struct archive_entry *entry)
 	return (NULL);
 }
 
+int
+archive_entry_hardlink_is_set(struct archive_entry *entry)
+{
+	return (entry->ae_set & AE_SET_HARDLINK) != 0;
+}
+
 int
 _archive_entry_hardlink_l(struct archive_entry *entry,
     const char **p, size_t *len, struct archive_string_conv *sc)
diff --git a/libarchive/archive_entry.h b/libarchive/archive_entry.h
index b51f34e42b..3a0afffb08 100644
--- a/libarchive/archive_entry.h
+++ b/libarchive/archive_entry.h
@@ -263,6 +263,7 @@ __LA_DECL void		 archive_entry_set_link_to_hardlink(struct archive_entry *);
 __LA_DECL const char	*archive_entry_hardlink(struct archive_entry *);
 __LA_DECL const char	*archive_entry_hardlink_utf8(struct archive_entry *);
 __LA_DECL const wchar_t	*archive_entry_hardlink_w(struct archive_entry *);
+__LA_DECL int		 archive_entry_hardlink_is_set(struct archive_entry *);
 __LA_DECL la_int64_t	 archive_entry_ino(struct archive_entry *);
 __LA_DECL la_int64_t	 archive_entry_ino64(struct archive_entry *);
 __LA_DECL int		 archive_entry_ino_is_set(struct archive_entry *);
diff --git a/libarchive/archive_write_set_format_gnutar.c b/libarchive/archive_write_set_format_gnutar.c
index a88350b874..a3a49c573c 100644
--- a/libarchive/archive_write_set_format_gnutar.c
+++ b/libarchive/archive_write_set_format_gnutar.c
@@ -523,7 +523,7 @@ archive_write_gnutar_header(struct archive_write *a,
 			goto exit_write_header;
 	}
 
-	if (archive_entry_hardlink(entry) != NULL) {
+	if (archive_entry_hardlink_is_set(entry)) {
 		tartype = '1';
 	} else
 		switch (archive_entry_filetype(entry)) {
diff --git a/libarchive/test/test_gnutar_filename_encoding.c b/libarchive/test/test_gnutar_filename_encoding.c
index f473ddfb4f..476ec2149f 100644
--- a/libarchive/test/test_gnutar_filename_encoding.c
+++ b/libarchive/test/test_gnutar_filename_encoding.c
@@ -389,3 +389,105 @@ DEFINE_TEST(test_gnutar_filename_encoding_CP932_UTF8)
 	assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7);
 }
 
+DEFINE_TEST(test_gnutar_filename_encoding_UTF16_win)
+{
+#if !defined(_WIN32) || defined(__CYGWIN__)
+	skipping("This test is meant to verify unicode string handling"
+		" on Windows with UTF-16 names");
+	return;
+#else
+	struct archive *a;
+	struct archive_entry *entry;
+	char buff[4096];
+	size_t used;
+
+	/*
+	 * Don't call setlocale because we're verifying that the '_w' functions
+	 * work as expected when 'hdrcharset' is UTF-8
+	 */
+
+	/* Part 1: file */
+	a = archive_write_new();
+	assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a));
+	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
+		skipping("This system cannot convert character-set"
+		    " from UTF-16 to UTF-8.");
+		archive_write_free(a);
+		return;
+	}
+	assertEqualInt(ARCHIVE_OK,
+	    archive_write_open_memory(a, buff, sizeof(buff), &used));
+
+	entry = archive_entry_new2(a);
+	/* Set the filename using a UTF-16 string */
+	archive_entry_copy_pathname_w(entry, L"\u8868.txt");
+	archive_entry_set_filetype(entry, AE_IFREG);
+	archive_entry_set_size(entry, 0);
+	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+	archive_entry_free(entry);
+	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
+
+	/* Check UTF-8 version. */
+	assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7);
+
+	/* Part 2: directory */
+	a = archive_write_new();
+	assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a));
+	assertEqualInt(ARCHIVE_OK, archive_write_set_options(a, "hdrcharset=UTF-8"));
+	assertEqualInt(ARCHIVE_OK,
+	    archive_write_open_memory(a, buff, sizeof(buff), &used));
+
+	entry = archive_entry_new2(a);
+	/* Set the directory name using a UTF-16 string */
+	/* NOTE: Explicitly not adding trailing slash to test that code path */
+	archive_entry_copy_pathname_w(entry, L"\u8868");
+	archive_entry_set_filetype(entry, AE_IFDIR);
+	archive_entry_set_size(entry, 0);
+	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+	archive_entry_free(entry);
+	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
+
+	/* Check UTF-8 version. */
+	assertEqualMem(buff, "\xE8\xA1\xA8/", 4);
+
+	/* Part 3: symlink */
+	a = archive_write_new();
+	assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a));
+	assertEqualInt(ARCHIVE_OK, archive_write_set_options(a, "hdrcharset=UTF-8"));
+	assertEqualInt(ARCHIVE_OK,
+	    archive_write_open_memory(a, buff, sizeof(buff), &used));
+
+	entry = archive_entry_new2(a);
+	/* Set the symlink target using a UTF-16 string */
+	archive_entry_set_pathname(entry, "link.txt");
+	archive_entry_copy_symlink_w(entry, L"\u8868.txt");
+	archive_entry_set_filetype(entry, AE_IFLNK);
+	archive_entry_set_symlink_type(entry, AE_SYMLINK_TYPE_FILE);
+	archive_entry_set_size(entry, 0);
+	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+	archive_entry_free(entry);
+	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
+
+	/* Check UTF-8 version. */
+	assertEqualMem(buff + 157, "\xE8\xA1\xA8.txt", 7);
+
+	/* Part 4: hardlink */
+	a = archive_write_new();
+	assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a));
+	assertEqualInt(ARCHIVE_OK, archive_write_set_options(a, "hdrcharset=UTF-8"));
+	assertEqualInt(ARCHIVE_OK,
+	    archive_write_open_memory(a, buff, sizeof(buff), &used));
+
+	entry = archive_entry_new2(a);
+	/* Set the symlink target using a UTF-16 string */
+	archive_entry_set_pathname(entry, "link.txt");
+	archive_entry_copy_hardlink_w(entry, L"\u8868.txt");
+	archive_entry_set_size(entry, 0);
+	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+	archive_entry_free(entry);
+	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
+
+	/* Check UTF-8 version. */
+	assertEqualMem(buff + 157, "\xE8\xA1\xA8.txt", 7);
+#endif
+}

From 898dc8319355b7e985f68a9819f182aaed61b53a Mon Sep 17 00:00:00 2001
From: Fatima Qarni <fatima.q832@gmail.com>
Date: Sat, 22 Jun 2024 17:49:53 -0500
Subject: [PATCH 5/5] Checks for null references (#2251)

Microsoft's static analysis tool found some vulnerabilities from
unguarded null references that I changed in
[microsoft/cmake](https://github.com/microsoft/cmake). Pushing these
changes upstream so they can be added to
[kitware/cmake](https://github.com/Kitware/CMake).
---
 libarchive/archive_read_support_format_7zip.c     | 2 +-
 libarchive/archive_write_set_format_cpio_binary.c | 3 +++
 libarchive/archive_write_set_format_cpio_odc.c    | 3 +++
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/libarchive/archive_read_support_format_7zip.c b/libarchive/archive_read_support_format_7zip.c
index e322808e73..b1e0c91ad1 100644
--- a/libarchive/archive_read_support_format_7zip.c
+++ b/libarchive/archive_read_support_format_7zip.c
@@ -1063,7 +1063,7 @@ ppmd_read(void *p)
 		ssize_t bytes_avail = 0;
 		const uint8_t* data = __archive_read_ahead(a,
 		    (size_t)zip->ppstream.stream_in+1, &bytes_avail);
-		if(bytes_avail < zip->ppstream.stream_in+1) {
+		if(data == NULL || bytes_avail < zip->ppstream.stream_in+1) {
 			archive_set_error(&a->archive,
 			    ARCHIVE_ERRNO_FILE_FORMAT,
 			    "Truncated 7z file data");
diff --git a/libarchive/archive_write_set_format_cpio_binary.c b/libarchive/archive_write_set_format_cpio_binary.c
index 7a010ee00f..a22d06ea38 100644
--- a/libarchive/archive_write_set_format_cpio_binary.c
+++ b/libarchive/archive_write_set_format_cpio_binary.c
@@ -577,6 +577,9 @@ archive_write_binary_close(struct archive_write *a)
 	struct archive_entry *trailer;
 
 	trailer = archive_entry_new2(NULL);
+	if (trailer == NULL) {
+		return ARCHIVE_FATAL;
+	}
 	/* nlink = 1 here for GNU cpio compat. */
 	archive_entry_set_nlink(trailer, 1);
 	archive_entry_set_size(trailer, 0);
diff --git a/libarchive/archive_write_set_format_cpio_odc.c b/libarchive/archive_write_set_format_cpio_odc.c
index 426f779a2b..6dce78b454 100644
--- a/libarchive/archive_write_set_format_cpio_odc.c
+++ b/libarchive/archive_write_set_format_cpio_odc.c
@@ -467,6 +467,9 @@ archive_write_odc_close(struct archive_write *a)
 	struct archive_entry *trailer;
 
 	trailer = archive_entry_new2(NULL);
+	if (trailer == NULL) {
+		return ARCHIVE_FATAL;
+	}
 	/* nlink = 1 here for GNU cpio compat. */
 	archive_entry_set_nlink(trailer, 1);
 	archive_entry_set_size(trailer, 0);