From 3f3b3efec27ef8c9587b5c725345ff235b98820a Mon Sep 17 00:00:00 2001
From: Sevan Janiyan <venture37@geeklan.co.uk>
Date: Tue, 11 Jun 2024 04:42:13 +0100
Subject: [PATCH 01/14] Always use our supplied la_queue.h (#2222)

On legacy systems the OS supplied `sys/queue.h` may lack the required
macros, so to avoid having to verify if the version of queue.h is of
use, opt to always to `la_queue.h` which will match expectations.

Allows libarchive to build on legacy Darwin where `STAILQ_FOREACH` would
be missing from `sys/queue.h`.

Resolves #2220
---
 CMakeLists.txt              | 1 -
 build/cmake/config.h.in     | 3 ---
 configure.ac                | 2 +-
 libarchive/config_freebsd.h | 1 -
 unzip/bsdunzip.c            | 4 ----
 5 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 51c8c08141..3c86789c8b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -743,7 +743,6 @@ LA_CHECK_INCLUDE_FILE("sys/mkdev.h" HAVE_SYS_MKDEV_H)
 LA_CHECK_INCLUDE_FILE("sys/mount.h" HAVE_SYS_MOUNT_H)
 LA_CHECK_INCLUDE_FILE("sys/param.h" HAVE_SYS_PARAM_H)
 LA_CHECK_INCLUDE_FILE("sys/poll.h" HAVE_SYS_POLL_H)
-LA_CHECK_INCLUDE_FILE("sys/queue.h" HAVE_SYS_QUEUE_H)
 LA_CHECK_INCLUDE_FILE("sys/richacl.h" HAVE_SYS_RICHACL_H)
 LA_CHECK_INCLUDE_FILE("sys/select.h" HAVE_SYS_SELECT_H)
 LA_CHECK_INCLUDE_FILE("sys/stat.h" HAVE_SYS_STAT_H)
diff --git a/build/cmake/config.h.in b/build/cmake/config.h.in
index d47694c0c1..d6e5487921 100644
--- a/build/cmake/config.h.in
+++ b/build/cmake/config.h.in
@@ -1132,9 +1132,6 @@ typedef uint64_t uintmax_t;
 /* Define to 1 if you have the <sys/poll.h> header file. */
 #cmakedefine HAVE_SYS_POLL_H 1
 
-/* Define to 1 if you have the <sys/queue.h> header file. */
-#cmakedefine HAVE_SYS_QUEUE_H 1
-
 /* Define to 1 if you have the <sys/richacl.h> header file. */
 #cmakedefine HAVE_SYS_RICHACL_H 1
 
diff --git a/configure.ac b/configure.ac
index c4c219f1cc..7250b686fa 100644
--- a/configure.ac
+++ b/configure.ac
@@ -362,7 +362,7 @@ AC_CHECK_HEADERS([locale.h membership.h paths.h poll.h pthread.h pwd.h])
 AC_CHECK_HEADERS([readpassphrase.h signal.h spawn.h])
 AC_CHECK_HEADERS([stdarg.h stdint.h stdlib.h string.h])
 AC_CHECK_HEADERS([sys/acl.h sys/cdefs.h sys/ea.h sys/extattr.h])
-AC_CHECK_HEADERS([sys/ioctl.h sys/mkdev.h sys/mount.h sys/queue.h])
+AC_CHECK_HEADERS([sys/ioctl.h sys/mkdev.h sys/mount.h])
 AC_CHECK_HEADERS([sys/param.h sys/poll.h sys/richacl.h])
 AC_CHECK_HEADERS([sys/select.h sys/statfs.h sys/statvfs.h sys/sysmacros.h])
 AC_CHECK_HEADERS([sys/time.h sys/utime.h sys/utsname.h sys/vfs.h sys/xattr.h])
diff --git a/libarchive/config_freebsd.h b/libarchive/config_freebsd.h
index d0f3e2300c..a1bf0dfe9f 100644
--- a/libarchive/config_freebsd.h
+++ b/libarchive/config_freebsd.h
@@ -203,7 +203,6 @@
 #define HAVE_SYS_MOUNT_H 1
 #define HAVE_SYS_PARAM_H 1
 #define HAVE_SYS_POLL_H 1
-#define HAVE_SYS_QUEUE_H 1
 #define HAVE_SYS_SELECT_H 1
 #define HAVE_SYS_STATVFS_H 1
 #define HAVE_SYS_STAT_H 1
diff --git a/unzip/bsdunzip.c b/unzip/bsdunzip.c
index cec1810483..50e08e3995 100644
--- a/unzip/bsdunzip.c
+++ b/unzip/bsdunzip.c
@@ -36,11 +36,7 @@
 
 #include "bsdunzip_platform.h"
 
-#ifdef HAVE_SYS_QUEUE_H
-#include <sys/queue.h>
-#else
 #include "la_queue.h"
-#endif
 #ifdef HAVE_SYS_STAT_H
 #include <sys/stat.h>
 #endif

From 13afc8cbde82b559f15f7d70c3d55a89f7425d01 Mon Sep 17 00:00:00 2001
From: Duncan Horn <40036384+dunhor@users.noreply.github.com>
Date: Mon, 10 Jun 2024 21:23:13 -0700
Subject: [PATCH 02/14] Update archive_entry_link_resolver to copy the "wide"
 pathname for hardlinks on Windows (#2225)

On Windows, if you are using `archive_entry_link_resolver` and give it
an entry that links to past entry whose pathname was set using a "wide"
string that cannot be represented by the current locale (i.e. WCS -> MBS
conversion fails), this code will crash due to a null pointer read. This
updates to use the `_w` function instead on Windows.

Note: this is a partial cherry-pick from
https://github.com/libarchive/libarchive/pull/2095, which I'm going to
go through and break into smaller pieces in hopes of getting some things
in while discussion of other things can continue.
---
 libarchive/archive_entry_link_resolver.c | 15 ++++++++
 libarchive/test/test_link_resolver.c     | 45 ++++++++++++++++++++++++
 2 files changed, 60 insertions(+)

diff --git a/libarchive/archive_entry_link_resolver.c b/libarchive/archive_entry_link_resolver.c
index 6c61734302..c2fd6895f2 100644
--- a/libarchive/archive_entry_link_resolver.c
+++ b/libarchive/archive_entry_link_resolver.c
@@ -201,16 +201,26 @@ archive_entry_linkify(struct archive_entry_linkresolver *res,
 		le = find_entry(res, *e);
 		if (le != NULL) {
 			archive_entry_unset_size(*e);
+#if defined(_WIN32) && !defined(__CYGWIN__)
+			archive_entry_copy_hardlink_w(*e,
+			    archive_entry_pathname_w(le->canonical));
+#else
 			archive_entry_copy_hardlink(*e,
 			    archive_entry_pathname(le->canonical));
+#endif
 		} else
 			insert_entry(res, *e);
 		return;
 	case ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE:
 		le = find_entry(res, *e);
 		if (le != NULL) {
+#if defined(_WIN32) && !defined(__CYGWIN__)
+			archive_entry_copy_hardlink_w(*e,
+			    archive_entry_pathname_w(le->canonical));
+#else
 			archive_entry_copy_hardlink(*e,
 			    archive_entry_pathname(le->canonical));
+#endif
 		} else
 			insert_entry(res, *e);
 		return;
@@ -229,8 +239,13 @@ archive_entry_linkify(struct archive_entry_linkresolver *res,
 			le->entry = t;
 			/* Make the old entry into a hardlink. */
 			archive_entry_unset_size(*e);
+#if defined(_WIN32) && !defined(__CYGWIN__)
+			archive_entry_copy_hardlink_w(*e,
+			    archive_entry_pathname_w(le->canonical));
+#else
 			archive_entry_copy_hardlink(*e,
 			    archive_entry_pathname(le->canonical));
+#endif
 			/* If we ran out of links, return the
 			 * final entry as well. */
 			if (le->links == 0) {
diff --git a/libarchive/test/test_link_resolver.c b/libarchive/test/test_link_resolver.c
index 5bea9a463b..6c6230c4d0 100644
--- a/libarchive/test/test_link_resolver.c
+++ b/libarchive/test/test_link_resolver.c
@@ -202,3 +202,48 @@ DEFINE_TEST(test_link_resolver)
 	test_linkify_old_cpio();
 	test_linkify_new_cpio();
 }
+
+DEFINE_TEST(test_link_resolver_unicode_win)
+{
+#if !defined(_WIN32) || defined(__CYGWIN__)
+	skipping("This test is meant to verify unicode string handling"
+	    " on Windows with UTF-16 names");
+	return;
+#else
+	struct archive_entry *entry, *e2;
+	struct archive_entry_linkresolver *resolver;
+
+	/* Initialize the resolver. */
+	assert(NULL != (resolver = archive_entry_linkresolver_new()));
+	archive_entry_linkresolver_set_strategy(resolver,
+	    ARCHIVE_FORMAT_TAR_USTAR);
+
+	/* Create an entry with a unicode filename and 2 links. */
+	assert(NULL != (entry = archive_entry_new()));
+	archive_entry_copy_pathname_w(entry, L"\u4f60\u597d.txt");
+	archive_entry_set_ino(entry, 1);
+	archive_entry_set_dev(entry, 2);
+	archive_entry_set_nlink(entry, 2);
+	archive_entry_set_size(entry, 10);
+	archive_entry_linkify(resolver, &entry, &e2);
+
+	/* Shouldn't be altered, since it wasn't seen before. */
+	assert(e2 == NULL);
+	assertEqualWString(L"\u4f60\u597d.txt", archive_entry_pathname_w(entry));
+	assertEqualWString(NULL, archive_entry_hardlink_w(entry));
+	assertEqualInt(10, archive_entry_size(entry));
+
+	/* Link to the same file contents, but a new unicode name. */
+	archive_entry_copy_pathname_w(entry, L"\u518d\u89c1.txt");
+	archive_entry_linkify(resolver, &entry, &e2);
+
+	/* Size & link path should have changed. */
+	assert(e2 == NULL);
+	assertEqualWString(L"\u518d\u89c1.txt", archive_entry_pathname_w(entry));
+	assertEqualWString(L"\u4f60\u597d.txt", archive_entry_hardlink_w(entry));
+	assertEqualInt(0, archive_entry_size(entry));
+
+	archive_entry_free(entry);
+	archive_entry_linkresolver_free(resolver);
+#endif
+}

From 98f7bbd6eaa4216a8a98e2d1c168ca244d0a95a6 Mon Sep 17 00:00:00 2001
From: Sevan Janiyan <venture37@geeklan.co.uk>
Date: Tue, 11 Jun 2024 05:35:49 +0100
Subject: [PATCH 03/14] configure.ac: autoupdate (#2223)

Clear up the autoconf warnings.
---
 configure.ac | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/configure.ac b/configure.ac
index 7250b686fa..75c71d5c42 100644
--- a/configure.ac
+++ b/configure.ac
@@ -113,8 +113,8 @@ AC_PROG_CC
 AM_PROG_CC_C_O
 AC_PROG_CPP
 AC_USE_SYSTEM_EXTENSIONS
-AC_LIBTOOL_WIN32_DLL
-AC_PROG_LIBTOOL
+
+LT_INIT([win32-dll])
 AC_CHECK_TOOL([STRIP],[strip])
 AC_PROG_MKDIR_P
 
@@ -777,7 +777,6 @@ AX_COMPILE_CHECK_SIZEOF(long)
 AC_CHECK_HEADERS_ONCE([sys/time.h])
 
 # Checks for library functions.
-AC_PROG_GCC_TRADITIONAL
 AC_HEADER_MAJOR
 AC_FUNC_FSEEKO
 AC_FUNC_MEMCMP

From ffa43aef31874b878e897dcb56f2791302e96dcb Mon Sep 17 00:00:00 2001
From: Lukas Javorsky <ljavorsk@redhat.com>
Date: Tue, 11 Jun 2024 06:41:25 +0200
Subject: [PATCH 04/14] Use calloc instead of malloc to clear the memory from
 leftovers (#2207)

This ensures that the buffer is properly initialized and does not
contain any leftover data from previous operations. It is used later in
the `archive_entry_copy_hardlink_l` function call and could be
uninitialized.
---
 libarchive/archive_read_support_format_iso9660.c | 4 ++--
 libarchive/archive_read_support_format_xar.c     | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/libarchive/archive_read_support_format_iso9660.c b/libarchive/archive_read_support_format_iso9660.c
index db5cdb67f1..25ab11bf59 100644
--- a/libarchive/archive_read_support_format_iso9660.c
+++ b/libarchive/archive_read_support_format_iso9660.c
@@ -1212,7 +1212,7 @@ archive_read_format_iso9660_read_header(struct archive_read *a,
 			}
 		}
 		if (iso9660->utf16be_previous_path == NULL) {
-			iso9660->utf16be_previous_path = malloc(UTF16_NAME_MAX);
+			iso9660->utf16be_previous_path = calloc(1, UTF16_NAME_MAX);
 			if (iso9660->utf16be_previous_path == NULL) {
 				archive_set_error(&a->archive, ENOMEM,
 				    "No memory");
@@ -3033,7 +3033,7 @@ heap_add_entry(struct archive_read *a, struct heap_queue *heap,
 			return (ARCHIVE_FATAL);
 		}
 		new_pending_files = (struct file_info **)
-		    malloc(new_size * sizeof(new_pending_files[0]));
+		    calloc(new_size, sizeof(new_pending_files[0]));
 		if (new_pending_files == NULL) {
 			archive_set_error(&a->archive,
 			    ENOMEM, "Out of memory");
diff --git a/libarchive/archive_read_support_format_xar.c b/libarchive/archive_read_support_format_xar.c
index b9bef05161..dbc31df94e 100644
--- a/libarchive/archive_read_support_format_xar.c
+++ b/libarchive/archive_read_support_format_xar.c
@@ -1242,7 +1242,7 @@ heap_add_entry(struct archive_read *a,
 			return (ARCHIVE_FATAL);
 		}
 		new_pending_files = (struct xar_file **)
-		    malloc(new_size * sizeof(new_pending_files[0]));
+		    calloc(new_size, sizeof(new_pending_files[0]));
 		if (new_pending_files == NULL) {
 			archive_set_error(&a->archive,
 			    ENOMEM, "Out of memory");

From bea3a56118b4b4e5ac652f94685b80d3d1b82611 Mon Sep 17 00:00:00 2001
From: Mrmaxmeier <3913977+Mrmaxmeier@users.noreply.github.com>
Date: Wed, 12 Jun 2024 20:57:20 +0200
Subject: [PATCH 05/14] Fuzzing: Expose `DONT_FAIL_ON_CRC_ERROR` as a CMake
 option and honor it in the rar5 decoder (#2229)

Hey,

the fuzzing infrastructure over at OSSFuzz builds libarchive with the
CMake option `-DDONT_FAIL_ON_CRC_ERROR=1`.

https://github.com/google/oss-fuzz/blob/e4643b64b3af4932bff23bb87afdfbac2a301969/projects/libarchive/build.sh#L35
This, unfortunatly, does not do anything since it's never been defined
as an option.

Building the fuzzers with CRC checks disabled should improve fuzzing
efficacy a bunch.

Thanks!
---
 CMakeLists.txt                                | 5 +++++
 libarchive/archive_read_support_format_rar5.c | 2 ++
 2 files changed, 7 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3c86789c8b..7bfaf4cfa7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2173,6 +2173,11 @@ IF(APPLE)
   ADD_DEFINITIONS(-Wno-deprecated-declarations)
 ENDIF(APPLE)
 
+OPTION(DONT_FAIL_ON_CRC_ERROR "Ignore CRC errors during parsing (For fuzzing)" OFF)
+IF(DONT_FAIL_ON_CRC_ERROR)
+  ADD_DEFINITIONS(-DDONT_FAIL_ON_CRC_ERROR=1)
+ENDIF(DONT_FAIL_ON_CRC_ERROR)
+
 IF(ENABLE_TEST)
   ADD_CUSTOM_TARGET(run_all_tests)
 ENDIF(ENABLE_TEST)
diff --git a/libarchive/archive_read_support_format_rar5.c b/libarchive/archive_read_support_format_rar5.c
index e06effe8b4..bd5a02179f 100644
--- a/libarchive/archive_read_support_format_rar5.c
+++ b/libarchive/archive_read_support_format_rar5.c
@@ -2229,10 +2229,12 @@ static int process_base_block(struct archive_read* a,
 	/* Verify the CRC32 of the header data. */
 	computed_crc = (uint32_t) crc32(0, p, (int) hdr_size);
 	if(computed_crc != hdr_crc) {
+#ifndef DONT_FAIL_ON_CRC_ERROR
 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
 		    "Header CRC error");
 
 		return ARCHIVE_FATAL;
+#endif
 	}
 
 	/* If the checksum is OK, we proceed with parsing. */

From ad26e379dbe8943c99fd7a9b078e2ed4e23ab8e9 Mon Sep 17 00:00:00 2001
From: Duncan Horn <40036384+dunhor@users.noreply.github.com>
Date: Wed, 12 Jun 2024 12:00:24 -0700
Subject: [PATCH 06/14] Add unicode test for creating zip files on Windows
 (#2231)

There's no bug fix here - this just adds a test to verify that zip
creation when using the _w functions works as expected on Windows.

Note: this is a partial cherry-pick from
https://github.com/libarchive/libarchive/pull/2095, which I'm going to
go through and break into smaller pieces in hopes of getting some things
in while discussion of other things can continue.
---
 libarchive/test/test_zip_filename_encoding.c | 95 ++++++++++++++++++++
 1 file changed, 95 insertions(+)

diff --git a/libarchive/test/test_zip_filename_encoding.c b/libarchive/test/test_zip_filename_encoding.c
index 448fb9b1d4..b6786f2c3b 100644
--- a/libarchive/test/test_zip_filename_encoding.c
+++ b/libarchive/test/test_zip_filename_encoding.c
@@ -527,3 +527,98 @@ DEFINE_TEST(test_zip_filename_encoding_CP932)
 	assertEqualInt(0, buff[7]);
 	assertEqualMem(buff + 30, "abcABC", 6);
 }
+
+DEFINE_TEST(test_zip_filename_encoding_UTF16_win)
+{
+#if !defined(_WIN32) || defined(__CYGWIN__)
+	skipping("This test is meant to verify unicode string handling"
+		" on Windows with UTF-16 names");
+	return;
+#else
+	struct archive *a;
+	struct archive_entry *entry;
+	char buff[4096];
+	size_t used;
+
+	/*
+	 * Don't call setlocale because we're verifying that the '_w' functions
+	 * work as expected when 'hdrcharset' is UTF-8
+	 */
+
+	/* Part 1: file */
+	a = archive_write_new();
+	assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
+	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
+		skipping("This system cannot convert character-set"
+		    " from UTF-16 to UTF-8.");
+		archive_write_free(a);
+		return;
+	}
+	assertEqualInt(ARCHIVE_OK,
+	    archive_write_open_memory(a, buff, sizeof(buff), &used));
+
+	entry = archive_entry_new2(a);
+	/* Set the filename using a UTF-16 string */
+	archive_entry_copy_pathname_w(entry, L"\u8868.txt");
+	archive_entry_set_filetype(entry, AE_IFREG);
+	archive_entry_set_size(entry, 0);
+	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+	archive_entry_free(entry);
+	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
+
+	/* A bit 11 of general purpose flag should be 1,
+	 * which indicates the filename charset is UTF-8. */
+	assertEqualInt(0x08, buff[7]);
+	/* Check UTF-8 version. */
+	assertEqualMem(buff + 30, "\xE8\xA1\xA8.txt", 7);
+
+	/* Part 2: directory */
+	a = archive_write_new();
+	assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
+	assertEqualInt(ARCHIVE_OK, archive_write_set_options(a, "hdrcharset=UTF-8"));
+	assertEqualInt(ARCHIVE_OK,
+	    archive_write_open_memory(a, buff, sizeof(buff), &used));
+
+	entry = archive_entry_new2(a);
+	/* Set the directory name using a UTF-16 string */
+	/* NOTE: Explicitly not adding trailing slash to test that code path */
+	archive_entry_copy_pathname_w(entry, L"\u8868");
+	archive_entry_set_filetype(entry, AE_IFDIR);
+	archive_entry_set_size(entry, 0);
+	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+	archive_entry_free(entry);
+	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
+
+	/* A bit 11 of general purpose flag should be 1,
+	 * which indicates the filename charset is UTF-8. */
+	assertEqualInt(0x08, buff[7]);
+	/* Check UTF-8 version. */
+	assertEqualMem(buff+ 30, "\xE8\xA1\xA8/", 4);
+
+	/* Part 3: symlink */
+	a = archive_write_new();
+	assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
+	assertEqualInt(ARCHIVE_OK, archive_write_set_options(a, "hdrcharset=UTF-8"));
+	assertEqualInt(ARCHIVE_OK,
+	    archive_write_open_memory(a, buff, sizeof(buff), &used));
+
+	entry = archive_entry_new2(a);
+	/* Set the symlink target using a UTF-16 string */
+	archive_entry_set_pathname(entry, "link.txt");
+	archive_entry_copy_symlink_w(entry, L"\u8868.txt");
+	archive_entry_set_filetype(entry, AE_IFLNK);
+	archive_entry_set_symlink_type(entry, AE_SYMLINK_TYPE_FILE);
+	archive_entry_set_size(entry, 0);
+	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+	archive_entry_free(entry);
+	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
+
+	/* A bit 11 of general purpose flag should be 0,
+	 * because the file name is ASCII. */
+	assertEqualInt(0, buff[7]);
+	/* Check UTF-8 version. */
+	assertEqualMem(buff + 38, "\xE8\xA1\xA8.txt", 7);
+
+	/* NOTE: ZIP does not support hardlinks */
+#endif
+}

From 6ee1eebefdf41f36ef1a548c9a7000d132c453f3 Mon Sep 17 00:00:00 2001
From: Duncan Horn <40036384+dunhor@users.noreply.github.com>
Date: Wed, 12 Jun 2024 12:01:40 -0700
Subject: [PATCH 07/14] Update ustar creation sanity check to use WCS path on
 Windows (#2230)

On Windows, the MBS pathname might be null if the string was set with a
WCS that can't be represented by the current locale. This is handled
properly by the rest of the code, but there's a sanity check that does
not make the proper distinction.

Note: this is a partial cherry-pick from
https://github.com/libarchive/libarchive/pull/2095, which I'm going to
go through and break into smaller pieces in hopes of getting some things
in while discussion of other things can continue.
---
 libarchive/archive_write_set_format_ustar.c   |   4 +
 .../test/test_ustar_filename_encoding.c       | 102 ++++++++++++++++++
 2 files changed, 106 insertions(+)

diff --git a/libarchive/archive_write_set_format_ustar.c b/libarchive/archive_write_set_format_ustar.c
index 673487b27f..d8f0b45846 100644
--- a/libarchive/archive_write_set_format_ustar.c
+++ b/libarchive/archive_write_set_format_ustar.c
@@ -254,7 +254,11 @@ archive_write_ustar_header(struct archive_write *a, struct archive_entry *entry)
 		sconv = ustar->opt_sconv;
 
 	/* Sanity check. */
+#if defined(_WIN32) && !defined(__CYGWIN__)
+	if (archive_entry_pathname_w(entry) == NULL) {
+#else
 	if (archive_entry_pathname(entry) == NULL) {
+#endif
 		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
 		    "Can't record entry in tar file without pathname");
 		return (ARCHIVE_FAILED);
diff --git a/libarchive/test/test_ustar_filename_encoding.c b/libarchive/test/test_ustar_filename_encoding.c
index cc62453f1c..1242bd1d3c 100644
--- a/libarchive/test/test_ustar_filename_encoding.c
+++ b/libarchive/test/test_ustar_filename_encoding.c
@@ -390,3 +390,105 @@ DEFINE_TEST(test_ustar_filename_encoding_CP932_UTF8)
 	assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7);
 }
 
+DEFINE_TEST(test_ustar_filename_encoding_UTF16_win)
+{
+#if !defined(_WIN32) || defined(__CYGWIN__)
+	skipping("This test is meant to verify unicode string handling"
+		" on Windows with UTF-16 names");
+	return;
+#else
+	struct archive *a;
+	struct archive_entry *entry;
+	char buff[4096];
+	size_t used;
+
+	/*
+	 * Don't call setlocale because we're verifying that the '_w' functions
+	 * work as expected when 'hdrcharset' is UTF-8
+	 */
+
+	/* Part 1: file */
+	a = archive_write_new();
+	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
+	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
+		skipping("This system cannot convert character-set"
+		    " from UTF-16 to UTF-8.");
+		archive_write_free(a);
+		return;
+	}
+	assertEqualInt(ARCHIVE_OK,
+	    archive_write_open_memory(a, buff, sizeof(buff), &used));
+
+	entry = archive_entry_new2(a);
+	/* Set the filename using a UTF-16 string */
+	archive_entry_copy_pathname_w(entry, L"\u8868.txt");
+	archive_entry_set_filetype(entry, AE_IFREG);
+	archive_entry_set_size(entry, 0);
+	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+	archive_entry_free(entry);
+	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
+
+	/* Check UTF-8 version. */
+	assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7);
+
+	/* Part 2: directory */
+	a = archive_write_new();
+	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
+	assertEqualInt(ARCHIVE_OK, archive_write_set_options(a, "hdrcharset=UTF-8"));
+	assertEqualInt(ARCHIVE_OK,
+	    archive_write_open_memory(a, buff, sizeof(buff), &used));
+
+	entry = archive_entry_new2(a);
+	/* Set the directory name using a UTF-16 string */
+	/* NOTE: Explicitly not adding trailing slash to test that code path */
+	archive_entry_copy_pathname_w(entry, L"\u8868");
+	archive_entry_set_filetype(entry, AE_IFDIR);
+	archive_entry_set_size(entry, 0);
+	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+	archive_entry_free(entry);
+	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
+
+	/* Check UTF-8 version. */
+	assertEqualMem(buff, "\xE8\xA1\xA8/", 4);
+
+	/* Part 3: symlink */
+	a = archive_write_new();
+	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
+	assertEqualInt(ARCHIVE_OK, archive_write_set_options(a, "hdrcharset=UTF-8"));
+	assertEqualInt(ARCHIVE_OK,
+	    archive_write_open_memory(a, buff, sizeof(buff), &used));
+
+	entry = archive_entry_new2(a);
+	/* Set the symlink target using a UTF-16 string */
+	archive_entry_set_pathname(entry, "link.txt");
+	archive_entry_copy_symlink_w(entry, L"\u8868.txt");
+	archive_entry_set_filetype(entry, AE_IFLNK);
+	archive_entry_set_symlink_type(entry, AE_SYMLINK_TYPE_FILE);
+	archive_entry_set_size(entry, 0);
+	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+	archive_entry_free(entry);
+	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
+
+	/* Check UTF-8 version. */
+	assertEqualMem(buff + 157, "\xE8\xA1\xA8.txt", 7);
+
+	/* Part 4: hardlink */
+	a = archive_write_new();
+	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
+	assertEqualInt(ARCHIVE_OK, archive_write_set_options(a, "hdrcharset=UTF-8"));
+	assertEqualInt(ARCHIVE_OK,
+	    archive_write_open_memory(a, buff, sizeof(buff), &used));
+
+	entry = archive_entry_new2(a);
+	/* Set the symlink target using a UTF-16 string */
+	archive_entry_set_pathname(entry, "link.txt");
+	archive_entry_copy_hardlink_w(entry, L"\u8868.txt");
+	archive_entry_set_size(entry, 0);
+	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+	archive_entry_free(entry);
+	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
+
+	/* Check UTF-8 version. */
+	assertEqualMem(buff + 157, "\xE8\xA1\xA8.txt", 7);
+#endif
+}

From e3c0c8d4800fda6b92bc1c50d3ece82ae688dae5 Mon Sep 17 00:00:00 2001
From: "Dustin L. Howett" <dustin@howett.net>
Date: Fri, 14 Jun 2024 19:13:42 -0500
Subject: [PATCH 08/14] xar: guard against file entries containing multiple
 name elements (#2236)

It appears that there are xar archives (in the form of Apple .pkg files)
that contain TOCs with duplicated name elements:

```xml
<file id="25">
  <data> ... </data>
  <type>file</type>
  <name>PackageInfo</name>
  <name>PackageInfo</name>
  <name>PackageInfo</name>
</file>
```

When libarchive encounters one such file, it will produce an
archive_entry named PackageInfoPackageInfoPackageInfo.

To produce a test archive, the XAR writer was modified to emit two name
elements.
---
 Makefile.am                                   |  1 +
 libarchive/archive_read_support_format_xar.c  |  3 ++
 libarchive/test/test_read_format_xar.c        | 31 +++++++++++++++++++
 ..._format_xar_duplicate_filename_node.xar.uu | 14 +++++++++
 4 files changed, 49 insertions(+)
 create mode 100644 libarchive/test/test_read_format_xar_duplicate_filename_node.xar.uu

diff --git a/Makefile.am b/Makefile.am
index eb88654fce..372ade1bda 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -937,6 +937,7 @@ libarchive_test_EXTRA_DIST=\
 	libarchive/test/test_read_format_ustar_filename_koi8r.tar.Z.uu \
 	libarchive/test/test_read_format_warc.warc.uu \
 	libarchive/test/test_read_format_xar_doublelink.xar.uu \
+	libarchive/test/test_read_format_xar_duplicate_filename_node.xar.uu \
 	libarchive/test/test_read_format_zip.zip.uu \
 	libarchive/test/test_read_format_zip_7075_utf8_paths.zip.uu \
 	libarchive/test/test_read_format_zip_7z_deflate.zip.uu \
diff --git a/libarchive/archive_read_support_format_xar.c b/libarchive/archive_read_support_format_xar.c
index dbc31df94e..7c327e02f9 100644
--- a/libarchive/archive_read_support_format_xar.c
+++ b/libarchive/archive_read_support_format_xar.c
@@ -2707,6 +2707,9 @@ xml_data(void *userData, const char *s, size_t len)
 
 	switch (xar->xmlsts) {
 	case FILE_NAME:
+		if (xar->file->has & HAS_PATHNAME)
+			break;
+
 		if (xar->file->parent != NULL) {
 			archive_string_concat(&(xar->file->pathname),
 			    &(xar->file->parent->pathname));
diff --git a/libarchive/test/test_read_format_xar.c b/libarchive/test/test_read_format_xar.c
index daff292169..41cbd7dab9 100644
--- a/libarchive/test/test_read_format_xar.c
+++ b/libarchive/test/test_read_format_xar.c
@@ -860,3 +860,34 @@ DEFINE_TEST(test_read_format_xar)
         verify(archive12, sizeof(archive12), verify12, NULL, GZIP);
 	verifyB(archive13, sizeof(archive13));
 }
+
+DEFINE_TEST(test_read_format_xar_duplicate_filename_node)
+{
+	static const char *reffiles[] =
+	{
+		"test_read_format_xar_duplicate_filename_node.xar",
+		NULL
+	};
+	struct archive_entry *ae;
+	struct archive *a;
+	int r;
+
+	extract_reference_files(reffiles);
+	assert((a = archive_read_new()) != NULL);
+	assertA(0 == archive_read_support_filter_all(a));
+
+	r = archive_read_support_format_xar(a);
+	if (r == ARCHIVE_WARN) {
+		skipping("xar reading not fully supported on this platform");
+		assertEqualInt(ARCHIVE_OK, archive_read_free(a));
+		return;
+	}
+
+	assertA(0 == archive_read_open_filenames(a, reffiles, 10240));
+
+	assertA(0 == archive_read_next_header(a, &ae));
+	assertEqualString("File", archive_entry_pathname(ae));
+
+	assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a));
+	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
+}
diff --git a/libarchive/test/test_read_format_xar_duplicate_filename_node.xar.uu b/libarchive/test/test_read_format_xar_duplicate_filename_node.xar.uu
new file mode 100644
index 0000000000..18cbcf5898
--- /dev/null
+++ b/libarchive/test/test_read_format_xar_duplicate_filename_node.xar.uu
@@ -0,0 +1,14 @@
+begin 644 test_read_format_xar_duplicate_filename_node.xar
+M>&%R(0`<``$````````!EP````````.]`````7B<A5-);H,P%-U7ZAT0>VIC
+MC('((;N<H-UT][$-L<H0@1,E.7T-`0H=TI6?__OSP'>7JG3.JNUT4V]=_P6[
+MCJI%(W5=;-VWU[T7N[OT^8E?H+6/XW#3B`%8*%H%QMIY1E<J)9A0#S//IZ\$
+M;PC=^(RCM<ID=U#BHSM53F>NI=JZW0%\=R0MW>1YITR*.1K1S'3ZUL?A:`"C
+M-S2YFP2Y+I6CI:UFX;0&&W]O&8X&^#]AKD>5Y@,QP)G0=2-M&E'"DIBC^V\F
+MI3IKH>HF]0-&68!]PM$LF[6JW@8S9CM4K<Q/6O:%]\\L*^ZR8BD3O[<<)^^V
+M'XM>][$>J%9K57B@"FM5"0;FG_V7JB[,(?5MHB-<D.,<R<^1SD-=S7203VLX
+M;0D<CZ46PS:ABU?<]-%UT-(`6G'09R6]W_<KIG$LJ9\%"6!)"&80R#`)<"!R
+M'(DL)T`CH2384K\[6J5U,2T(\V>8#&+(0L`DQED2!@!)1(,PS^.$,I&)+"-1
+M&`*)./KIZ:N]:-%?COHMO%\?NI\?1_T]?@(L`P/8\;44V2:R;"\D48GC?CRL
+C(UAG)C5XG./E`@``)@`8>)R%4TEN@S`4W5?J'1![:F.,@<@`
+`
+end

From d12cb9ab217a0b30c7ddc33d7ebad44fa22f7d5e Mon Sep 17 00:00:00 2001
From: alice <alice@ayaya.dev>
Date: Sat, 15 Jun 2024 02:26:14 +0200
Subject: [PATCH 09/14] rar: fix UB negation overflow for INT32_MIN address
 (#2235)

certain rar files seem to have the lowest possible address here, so flip
the argument order to correctly evaluate this instead of invoking UB
(caught via sanitize=undefined)

---

the backtrace looks something like:

```
* frame #0: 0x00007a1e3898727b libarchive.so.13`execute_filter [inlined] execute_filter_e8(filter=<unavailable>, vm=<unavailable>, pos=<unavailable>, e9also=<unavailable>) at archive_read_support_format_rar.c:3640:47
    frame #1: 0x00007a1e3898727b libarchive.so.13`execute_filter(a=<unavailable>, filter=0x00007a1e39e2f090, vm=0x00007a1e31b1efd0, pos=<unavailable>) at archive_read_support_format_rar.c:0
    frame #2: 0x00007a1e38983ac3 libarchive.so.13`read_data_compressed [inlined] run_filters(a=0x00007a1e34209700) at archive_read_support_format_rar.c:3395:8
    frame #3: 0x00007a1e38983a9e libarchive.so.13`read_data_compressed(a=0x00007a1e34209700, buff=0x00007a1e31a01fd8, size=0x00007a1e31a01fd0, offset=0x00007a1e31a01fc0, looper=1) at archive_read_support_format_rar.c:2083:12
    frame #4: 0x00007a1e38981b10 libarchive.so.13`archive_read_format_rar_read_data(a=0x00007a1e34209700, buff=0x00007a1e31a01fd8, size=0x00007a1e31a01fd0, offset=0x00007a1e31a01fc0) at archive_read_support_format_rar.c:1130:11
    frame #5: 0x00006158bc5d30d3 file-roller`extract_archive_thread(result=0x00007a1e3711e2b0, object=<unavailable>, cancellable=0x00007a1e3870bf20) at fr-archive-libarchive.c:999:17
    frame #6: 0x00007a1e39928d6d libgio-2.0.so.0`run_in_thread(job=<unavailable>, c=<unavailable>, _data=0x00007a1e326e9740) at gsimpleasyncresult.c:899:5
    frame #7: 0x00007a1e3990614e libgio-2.0.so.0`io_job_thread(task=<unavailable>, source_object=<unavailable>, task_data=0x00007a1e2307fc20, cancellable=<unavailable>) at gioscheduler.c:75:16
    frame #8: 0x00007a1e399433bf libgio-2.0.so.0`g_task_thread_pool_thread(thread_data=0x00007a1e35c18ab0, pool_data=<unavailable>) at gtask.c:1583:3
    frame #9: 0x00007a1e39db77e8 libglib-2.0.so.0`g_thread_pool_thread_proxy(data=<unavailable>) at gthreadpool.c:336:15
    frame #10: 0x00007a1e39db5bfb libglib-2.0.so.0`g_thread_proxy(data=0x00007a1e378147d0) at gthread.c:835:20
    frame #11: 0x00007a1e3a0b5c7b ld-musl-x86_64.so.1`start(p=0x00007a1e31a02170) at pthread_create.c:208:17
    frame #12: 0x00007a1e3a0b8a8b ld-musl-x86_64.so.1`__clone + 47
```

note the 0xd which is 14 which is NegateOverflow in ubsan:

```
(lldb) x/1i $pc
->  0x7a1e3898727b: 67 0f b9 40 0d  other       ud1l   0xd(%eax), %eax
```

for reference, the totally legal rar file is
https://img.ayaya.dev/05WYGFOcRPN9 , and this seems to only crash when
extracted via file-roller (or inside nautilus)
---
 libarchive/archive_read_support_format_rar.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libarchive/archive_read_support_format_rar.c b/libarchive/archive_read_support_format_rar.c
index 157836b421..93d738a5ca 100644
--- a/libarchive/archive_read_support_format_rar.c
+++ b/libarchive/archive_read_support_format_rar.c
@@ -3681,7 +3681,7 @@ execute_filter_e8(struct rar_filter *filter, struct rar_virtual_machine *vm, siz
     {
       uint32_t currpos = (uint32_t)pos + i + 1;
       int32_t address = (int32_t)vm_read_32(vm, i + 1);
-      if (address < 0 && currpos >= (uint32_t)-address)
+      if (address < 0 && currpos >= -(uint32_t)address)
         vm_write_32(vm, i + 1, address + filesize);
       else if (address >= 0 && (uint32_t)address < filesize)
         vm_write_32(vm, i + 1, address - currpos);

From 54c22ee9e1cbeb9259619fb507adb86998d98321 Mon Sep 17 00:00:00 2001
From: Duncan Horn <40036384+dunhor@users.noreply.github.com>
Date: Sat, 15 Jun 2024 22:20:00 -0700
Subject: [PATCH 10/14] Fix a couple issues with creating PAX archives (#2228)

Note: this is a partial cherry-pick from
https://github.com/libarchive/libarchive/pull/2095, which I'm going to
go through and break into smaller pieces in hopes of getting some things
in while discussion of other things can continue.

There's basically two fixes here:

The first is to check for the presence of the WCS pathname on Windows
before failing since the conversion from WCS -> MBS might fail. Later
execution already handles such paths correctly.

The second is to set the converted link name on the target entry where
relevant. Note that there has been prior discussion on this here:
https://github.com/libarchive/libarchive/pull/2095/files#r1531599325
---
 libarchive/archive_write_set_format_pax.c    | 16 ++++
 libarchive/test/test_pax_filename_encoding.c | 96 ++++++++++++++++++++
 2 files changed, 112 insertions(+)

diff --git a/libarchive/archive_write_set_format_pax.c b/libarchive/archive_write_set_format_pax.c
index e93333074a..4aace46825 100644
--- a/libarchive/archive_write_set_format_pax.c
+++ b/libarchive/archive_write_set_format_pax.c
@@ -608,7 +608,15 @@ archive_write_pax_header(struct archive_write *a,
 	const time_t ustar_max_mtime = get_ustar_max_mtime();
 
 	/* Sanity check. */
+#if defined(_WIN32) && !defined(__CYGWIN__)
+	/* NOTE: If the caller supplied a pathname that fails WCS conversion (e.g.
+	 * if it is invalid UTF-8), we are expected to return ARCHIVE_WARN later on
+	 * in execution, hence the check for both pointers */
+	if ((archive_entry_pathname_w(entry_original) == NULL) &&
+	    (archive_entry_pathname(entry_original) == NULL)) {
+#else
 	if (archive_entry_pathname(entry_original) == NULL) {
+#endif
 		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
 			  "Can't record entry in tar file without pathname");
 		return (ARCHIVE_FAILED);
@@ -1032,6 +1040,14 @@ archive_write_pax_header(struct archive_write *a,
 					archive_entry_set_symlink(entry_main,
 					    "././@LongSymLink");
 			}
+			else {
+				/* Otherwise, has non-ASCII characters; update the paths to
+				 * however they got decoded above */
+				if (hardlink != NULL) 
+					archive_entry_set_hardlink(entry_main, linkpath);
+				else
+					archive_entry_set_symlink(entry_main, linkpath);
+			}
 			need_extension = 1;
 		}
 	}
diff --git a/libarchive/test/test_pax_filename_encoding.c b/libarchive/test/test_pax_filename_encoding.c
index 737641c5ab..3165b65dd3 100644
--- a/libarchive/test/test_pax_filename_encoding.c
+++ b/libarchive/test/test_pax_filename_encoding.c
@@ -579,6 +579,102 @@ DEFINE_TEST(test_pax_filename_encoding_KOI8R_CP1251)
 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
 }
 
+/*
+ * Verify that unicode filenames are correctly preserved on Windows
+ */
+DEFINE_TEST(test_pax_filename_encoding_UTF16_win)
+{
+#if !defined(_WIN32) || defined(__CYGWIN__)
+	skipping("This test is meant to verify unicode string handling"
+		" on Windows with UTF-16 names");
+	return;
+#else
+	struct archive *a;
+	struct archive_entry *entry;
+	char buff[0x2000];
+	size_t used;
+
+	/*
+	 * Don't call setlocale because we're verifying that the '_w' functions
+	 * work as expected when 'hdrcharset' is UTF-8
+	 */
+
+	/* Check if the platform completely supports the string conversion. */
+	a = archive_write_new();
+	assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a));
+	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
+		skipping("This system cannot convert character-set"
+		    " from UTF-16 to UTF-8.");
+		archive_write_free(a);
+		return;
+	}
+
+	/* Re-create a write archive object since filenames should be written
+	 * in UTF-8 by default. */
+	archive_write_free(a);
+
+	a = archive_write_new();
+	assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a));
+	assertEqualInt(ARCHIVE_OK,
+	    archive_write_open_memory(a, buff, sizeof(buff), &used));
+
+	/* Part 1: file */
+	entry = archive_entry_new2(a);
+	archive_entry_copy_pathname_w(entry, L"\u4f60\u597d.txt");
+	archive_entry_set_filetype(entry, AE_IFREG);
+	archive_entry_set_size(entry, 0);
+	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+
+	/* Part 2: directory */
+	/* NOTE: Explicitly not adding trailing slash to test that code path */
+	archive_entry_copy_pathname_w(entry, L"\u043f\u0440\u0438");
+	archive_entry_set_filetype(entry, AE_IFDIR);
+	archive_entry_set_size(entry, 0);
+	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+
+	/* Part 3: symlink */
+	archive_entry_copy_pathname_w(entry, L"\u518d\u89c1.txt");
+	archive_entry_copy_symlink_w(entry, L"\u4f60\u597d.txt");
+	archive_entry_set_filetype(entry, AE_IFLNK);
+	archive_entry_set_symlink_type(entry, AE_SYMLINK_TYPE_FILE);
+	archive_entry_set_size(entry, 0);
+	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+
+	/* Part 4: hardlink */
+	archive_entry_copy_pathname_w(entry, L"\u665a\u5b89.txt");
+	archive_entry_copy_hardlink_w(entry, L"\u4f60\u597d.txt");
+	archive_entry_set_filetype(entry, AE_IFREG);
+	archive_entry_set_size(entry, 0);
+	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+
+	archive_entry_free(entry);
+	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
+
+	/* Ensure that the names round trip properly */
+	a = archive_read_new();
+	archive_read_support_format_all(a);
+	archive_read_support_filter_all(a);
+	assertEqualInt(0, archive_read_open_memory(a, buff, used));
+
+	/* Read part 1: file */
+	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &entry));
+	assertEqualWString(L"\u4f60\u597d.txt", archive_entry_pathname_w(entry));
+
+	/* Read part 2: directory */
+	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &entry));
+	assertEqualWString(L"\u043f\u0440\u0438/", archive_entry_pathname_w(entry));
+
+	/* Read part 3: symlink */
+	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &entry));
+	assertEqualWString(L"\u518d\u89c1.txt", archive_entry_pathname_w(entry));
+	assertEqualWString(L"\u4f60\u597d.txt", archive_entry_symlink_w(entry));
+
+	/* Read part 4: hardlink */
+	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &entry));
+	assertEqualWString(L"\u665a\u5b89.txt", archive_entry_pathname_w(entry));
+	assertEqualWString(L"\u4f60\u597d.txt", archive_entry_hardlink_w(entry));
+#endif
+}
 
 DEFINE_TEST(test_pax_filename_encoding)
 {

From 2d8a5760c5ec553283a95a1aaca746f6eb472d0f Mon Sep 17 00:00:00 2001
From: Tim Kientzle <kientzle@acm.org>
Date: Sat, 15 Jun 2024 22:22:12 -0700
Subject: [PATCH 11/14] Parse tar headers incrementally (#2127)

This rebuilds the tar reader to parse all header data incrementally as
it appears in the stream.

This definitively fixes a longstanding issue with unsupported pax
attributes. Libarchive must limit the amount of data that it reads into
memory, and this has caused problems with large unknown attributes. By
scanning iteratively, we can instead identify an attribute by name and
then decide whether to read it into memory or whether to skip it without
reading.

This design also allows us to vary our sanity limits for different pax
attributes (e.g., an attribute that is a single number can be limited to
a few dozen bytes while an attribute holding an ACL is allowed to be a
few hundred kilobytes). This allows us to be a little more resistant to
malicious archives that might try to force allocation of very large
amounts of memory, though there is still work to be done here.

This includes a number of changes to archive_entry processing to allow
us to consistently keep the _first_ appearance of any given value
instead of the original architecture that recursively cached data in
memory in order to effectively process all the data from back-to-front.

Resolves #1855
Resolves #1939
---
 Makefile.am                                   |    2 +
 libarchive/archive_acl.c                      |   51 +-
 libarchive/archive_acl_private.h              |    2 +
 libarchive/archive_entry.c                    |  275 ++-
 libarchive/archive_entry.h                    |    5 +
 libarchive/archive_entry_private.h            |    4 +-
 libarchive/archive_read_support_format_tar.c  | 2091 +++++++++++------
 libarchive/test/CMakeLists.txt                |    1 +
 .../test/test_compat_solaris_pax_sparse.c     |    2 +-
 libarchive/test/test_entry.c                  |   14 +-
 libarchive/test/test_read_format_tar.c        |    2 +-
 .../test_read_format_tar_pax_large_attr.c     |   65 +
 ...st_read_format_tar_pax_large_attr.tar.Z.uu |  149 ++
 13 files changed, 1791 insertions(+), 872 deletions(-)
 create mode 100644 libarchive/test/test_read_format_tar_pax_large_attr.c
 create mode 100644 libarchive/test/test_read_format_tar_pax_large_attr.tar.Z.uu

diff --git a/Makefile.am b/Makefile.am
index 372ade1bda..1661d9c1a5 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -522,6 +522,7 @@ libarchive_test_SOURCES= \
 	libarchive/test/test_read_format_tar_empty_with_gnulabel.c \
 	libarchive/test/test_read_format_tar_filename.c \
 	libarchive/test/test_read_format_tar_invalid_pax_size.c \
+	libarchive/test/test_read_format_tar_pax_large_attr.c \
 	libarchive/test/test_read_format_tbz.c \
 	libarchive/test/test_read_format_tgz.c \
 	libarchive/test/test_read_format_tlz.c \
@@ -932,6 +933,7 @@ libarchive_test_EXTRA_DIST=\
 	libarchive/test/test_read_format_tar_empty_pax.tar.Z.uu \
 	libarchive/test/test_read_format_tar_filename_koi8r.tar.Z.uu \
 	libarchive/test/test_read_format_tar_invalid_pax_size.tar.uu \
+	libarchive/test/test_read_format_tar_pax_large_attr.tar.Z.uu \
 	libarchive/test/test_read_format_ustar_filename_cp866.tar.Z.uu \
 	libarchive/test/test_read_format_ustar_filename_eucjp.tar.Z.uu \
 	libarchive/test/test_read_format_ustar_filename_koi8r.tar.Z.uu \
diff --git a/libarchive/archive_acl.c b/libarchive/archive_acl.c
index 254324c713..77d99480f9 100644
--- a/libarchive/archive_acl.c
+++ b/libarchive/archive_acl.c
@@ -76,7 +76,7 @@ static int	is_nfs4_flags(const char *start, const char *end,
 		    int *result);
 static int	is_nfs4_perms(const char *start, const char *end,
 		    int *result);
-static void	next_field(const char **p, const char **start,
+static void	next_field(const char **p, size_t *l, const char **start,
 		    const char **end, char *sep);
 static void	append_entry(char **p, const char *prefix, int type,
 		    int tag, int flags, const char *name, int perm, int id);
@@ -1619,6 +1619,13 @@ next_field_w(const wchar_t **wp, const wchar_t **start,
 int
 archive_acl_from_text_l(struct archive_acl *acl, const char *text,
     int want_type, struct archive_string_conv *sc)
+{
+	return archive_acl_from_text_nl(acl, text, strlen(text), want_type, sc);
+}
+
+int
+archive_acl_from_text_nl(struct archive_acl *acl, const char *text,
+    size_t length, int want_type, struct archive_string_conv *sc)
 {
 	struct {
 		const char *start;
@@ -1649,7 +1656,7 @@ archive_acl_from_text_l(struct archive_acl *acl, const char *text,
 	ret = ARCHIVE_OK;
 	types = 0;
 
-	while (text != NULL &&  *text != '\0') {
+	while (text != NULL && length > 0 && *text != '\0') {
 		/*
 		 * Parse the fields out of the next entry,
 		 * advance 'text' to start of next entry.
@@ -1657,7 +1664,7 @@ archive_acl_from_text_l(struct archive_acl *acl, const char *text,
 		fields = 0;
 		do {
 			const char *start, *end;
-			next_field(&text, &start, &end, &sep);
+			next_field(&text, &length, &start, &end, &sep);
 			if (fields < numfields) {
 				field[fields].start = start;
 				field[fields].end = end;
@@ -2047,7 +2054,7 @@ is_nfs4_flags(const char *start, const char *end, int *permset)
 }
 
 /*
- * Match "[:whitespace:]*(.*)[:whitespace:]*[:,\n]".  *wp is updated
+ * Match "[:whitespace:]*(.*)[:whitespace:]*[:,\n]".  *p is updated
  * to point to just after the separator.  *start points to the first
  * character of the matched text and *end just after the last
  * character of the matched identifier.  In particular *end - *start
@@ -2055,42 +2062,42 @@ is_nfs4_flags(const char *start, const char *end, int *permset)
  * whitespace.
  */
 static void
-next_field(const char **p, const char **start,
+next_field(const char **p, size_t *l, const char **start,
     const char **end, char *sep)
 {
 	/* Skip leading whitespace to find start of field. */
-	while (**p == ' ' || **p == '\t' || **p == '\n') {
+	while (*l > 0 && (**p == ' ' || **p == '\t' || **p == '\n')) {
 		(*p)++;
+		(*l)--;
 	}
 	*start = *p;
 
-	/* Scan for the separator. */
-	while (**p != '\0' && **p != ',' && **p != ':' && **p != '\n' &&
-	    **p != '#') {
+	/* Locate end of field, trim trailing whitespace if necessary */
+	while (*l > 0 && **p != ' ' && **p != '\t' && **p != '\n' && **p != ',' && **p != ':' && **p != '#') {
 		(*p)++;
+		(*l)--;
 	}
-	*sep = **p;
+	*end = *p;
 
-	/* Locate end of field, trim trailing whitespace if necessary */
-	if (*p == *start) {
-		*end = *p;
-	} else {
-		*end = *p - 1;
-		while (**end == ' ' || **end == '\t' || **end == '\n') {
-			(*end)--;
-		}
-		(*end)++;
+	/* Scan for the separator. */
+	while (*l > 0 && **p != ',' && **p != ':' && **p != '\n' && **p != '#') {
+		(*p)++;
+		(*l)--;
 	}
+	*sep = **p;
 
 	/* Handle in-field comments */
 	if (*sep == '#') {
-		while (**p != '\0' && **p != ',' && **p != '\n') {
+		while (*l > 0 && **p != ',' && **p != '\n') {
 			(*p)++;
+			(*l)--;
 		}
 		*sep = **p;
 	}
 
-	/* Adjust scanner location. */
-	if (**p != '\0')
+	/* Skip separator. */
+	if (*l > 0) {
 		(*p)++;
+		(*l)--;
+	}
 }
diff --git a/libarchive/archive_acl_private.h b/libarchive/archive_acl_private.h
index 750b4dd7d8..2c9b505343 100644
--- a/libarchive/archive_acl_private.h
+++ b/libarchive/archive_acl_private.h
@@ -77,5 +77,7 @@ int archive_acl_from_text_w(struct archive_acl *, const wchar_t * /* wtext */,
     int /* type */);
 int archive_acl_from_text_l(struct archive_acl *, const char * /* text */,
     int /* type */, struct archive_string_conv *);
+int archive_acl_from_text_nl(struct archive_acl *, const char * /* text */,
+    size_t /* size of text */, int /* type */, struct archive_string_conv *);
 
 #endif /* ARCHIVE_ENTRY_PRIVATE_H_INCLUDED */
diff --git a/libarchive/archive_entry.c b/libarchive/archive_entry.c
index 9463233e6a..178f7f6283 100644
--- a/libarchive/archive_entry.c
+++ b/libarchive/archive_entry.c
@@ -118,7 +118,7 @@
 static char *	 ae_fflagstostr(unsigned long bitset, unsigned long bitclear);
 static const wchar_t	*ae_wcstofflags(const wchar_t *stringp,
 		    unsigned long *setp, unsigned long *clrp);
-static const char	*ae_strtofflags(const char *stringp,
+static const char	*ae_strtofflags(const char *stringp, size_t length,
 		    unsigned long *setp, unsigned long *clrp);
 
 #ifndef HAVE_WCSCPY
@@ -157,10 +157,9 @@ archive_entry_clear(struct archive_entry *entry)
 		return (NULL);
 	archive_mstring_clean(&entry->ae_fflags_text);
 	archive_mstring_clean(&entry->ae_gname);
-	archive_mstring_clean(&entry->ae_hardlink);
+	archive_mstring_clean(&entry->ae_linkname);
 	archive_mstring_clean(&entry->ae_pathname);
 	archive_mstring_clean(&entry->ae_sourcepath);
-	archive_mstring_clean(&entry->ae_symlink);
 	archive_mstring_clean(&entry->ae_uname);
 	archive_entry_copy_mac_metadata(entry, NULL, 0);
 	archive_acl_clear(&entry->acl);
@@ -195,10 +194,9 @@ archive_entry_clone(struct archive_entry *entry)
 	 * character sets are different? XXX */
 	archive_mstring_copy(&entry2->ae_fflags_text, &entry->ae_fflags_text);
 	archive_mstring_copy(&entry2->ae_gname, &entry->ae_gname);
-	archive_mstring_copy(&entry2->ae_hardlink, &entry->ae_hardlink);
+	archive_mstring_copy(&entry2->ae_linkname, &entry->ae_linkname);
 	archive_mstring_copy(&entry2->ae_pathname, &entry->ae_pathname);
 	archive_mstring_copy(&entry2->ae_sourcepath, &entry->ae_sourcepath);
-	archive_mstring_copy(&entry2->ae_symlink, &entry->ae_symlink);
 	entry2->ae_set = entry->ae_set;
 	archive_mstring_copy(&entry2->ae_uname, &entry->ae_uname);
 
@@ -477,6 +475,15 @@ _archive_entry_gname_l(struct archive_entry *entry,
 	return (archive_mstring_get_mbs_l(entry->archive, &entry->ae_gname, p, len, sc));
 }
 
+void
+archive_entry_set_link_to_hardlink(struct archive_entry *entry)
+{
+	if ((entry->ae_set & AE_SET_SYMLINK) != 0) {
+		entry->ae_set &= ~AE_SET_SYMLINK;
+	}
+	entry->ae_set |= AE_SET_HARDLINK;
+}
+
 const char *
 archive_entry_hardlink(struct archive_entry *entry)
 {
@@ -484,7 +491,7 @@ archive_entry_hardlink(struct archive_entry *entry)
 	if ((entry->ae_set & AE_SET_HARDLINK) == 0)
 		return (NULL);
 	if (archive_mstring_get_mbs(
-	    entry->archive, &entry->ae_hardlink, &p) == 0)
+	    entry->archive, &entry->ae_linkname, &p) == 0)
 		return (p);
 	if (errno == ENOMEM)
 		__archive_errx(1, "No memory");
@@ -498,7 +505,7 @@ archive_entry_hardlink_utf8(struct archive_entry *entry)
 	if ((entry->ae_set & AE_SET_HARDLINK) == 0)
 		return (NULL);
 	if (archive_mstring_get_utf8(
-	    entry->archive, &entry->ae_hardlink, &p) == 0)
+	    entry->archive, &entry->ae_linkname, &p) == 0)
 		return (p);
 	if (errno == ENOMEM)
 		__archive_errx(1, "No memory");
@@ -512,7 +519,7 @@ archive_entry_hardlink_w(struct archive_entry *entry)
 	if ((entry->ae_set & AE_SET_HARDLINK) == 0)
 		return (NULL);
 	if (archive_mstring_get_wcs(
-	    entry->archive, &entry->ae_hardlink, &p) == 0)
+	    entry->archive, &entry->ae_linkname, &p) == 0)
 		return (p);
 	if (errno == ENOMEM)
 		__archive_errx(1, "No memory");
@@ -528,7 +535,7 @@ _archive_entry_hardlink_l(struct archive_entry *entry,
 		*len = 0;
 		return (0);
 	}
-	return (archive_mstring_get_mbs_l(entry->archive, &entry->ae_hardlink, p, len, sc));
+	return (archive_mstring_get_mbs_l(entry->archive, &entry->ae_linkname, p, len, sc));
 }
 
 la_int64_t
@@ -648,32 +655,50 @@ archive_entry_perm_is_set(struct archive_entry *entry)
 	return (entry->ae_set & AE_SET_PERM);
 }
 
+int
+archive_entry_rdev_is_set(struct archive_entry *entry)
+{
+	return (entry->ae_set & AE_SET_RDEV);
+}
+
 dev_t
 archive_entry_rdev(struct archive_entry *entry)
 {
-	if (entry->ae_stat.aest_rdev_is_broken_down)
-		return ae_makedev(entry->ae_stat.aest_rdevmajor,
-		    entry->ae_stat.aest_rdevminor);
-	else
-		return (entry->ae_stat.aest_rdev);
+	if (archive_entry_rdev_is_set(entry)) {
+		if (entry->ae_stat.aest_rdev_is_broken_down)
+			return ae_makedev(entry->ae_stat.aest_rdevmajor,
+			    entry->ae_stat.aest_rdevminor);
+		else
+			return (entry->ae_stat.aest_rdev);
+	} else {
+		return 0;
+	}
 }
 
 dev_t
 archive_entry_rdevmajor(struct archive_entry *entry)
 {
-	if (entry->ae_stat.aest_rdev_is_broken_down)
-		return (entry->ae_stat.aest_rdevmajor);
-	else
-		return major(entry->ae_stat.aest_rdev);
+	if (archive_entry_rdev_is_set(entry)) {
+		if (entry->ae_stat.aest_rdev_is_broken_down)
+			return (entry->ae_stat.aest_rdevmajor);
+		else
+			return major(entry->ae_stat.aest_rdev);
+	} else {
+		return 0;
+	}
 }
 
 dev_t
 archive_entry_rdevminor(struct archive_entry *entry)
 {
-	if (entry->ae_stat.aest_rdev_is_broken_down)
-		return (entry->ae_stat.aest_rdevminor);
-	else
-		return minor(entry->ae_stat.aest_rdev);
+	if (archive_entry_rdev_is_set(entry)) {
+		if (entry->ae_stat.aest_rdev_is_broken_down)
+			return (entry->ae_stat.aest_rdevminor);
+		else
+			return minor(entry->ae_stat.aest_rdev);
+	} else {
+		return 0;
+	}
 }
 
 la_int64_t
@@ -717,13 +742,22 @@ archive_entry_symlink(struct archive_entry *entry)
 	if ((entry->ae_set & AE_SET_SYMLINK) == 0)
 		return (NULL);
 	if (archive_mstring_get_mbs(
-	    entry->archive, &entry->ae_symlink, &p) == 0)
+	    entry->archive, &entry->ae_linkname, &p) == 0)
 		return (p);
 	if (errno == ENOMEM)
 		__archive_errx(1, "No memory");
 	return (NULL);
 }
 
+void
+archive_entry_set_link_to_symlink(struct archive_entry *entry)
+{
+	if ((entry->ae_set & AE_SET_HARDLINK) != 0) {
+		entry->ae_set &= ~AE_SET_HARDLINK;
+	}
+	entry->ae_set |= AE_SET_SYMLINK;
+}
+
 int
 archive_entry_symlink_type(struct archive_entry *entry)
 {
@@ -737,7 +771,7 @@ archive_entry_symlink_utf8(struct archive_entry *entry)
 	if ((entry->ae_set & AE_SET_SYMLINK) == 0)
 		return (NULL);
 	if (archive_mstring_get_utf8(
-	    entry->archive, &entry->ae_symlink, &p) == 0)
+	    entry->archive, &entry->ae_linkname, &p) == 0)
 		return (p);
 	if (errno == ENOMEM)
 		__archive_errx(1, "No memory");
@@ -751,7 +785,7 @@ archive_entry_symlink_w(struct archive_entry *entry)
 	if ((entry->ae_set & AE_SET_SYMLINK) == 0)
 		return (NULL);
 	if (archive_mstring_get_wcs(
-	    entry->archive, &entry->ae_symlink, &p) == 0)
+	    entry->archive, &entry->ae_linkname, &p) == 0)
 		return (p);
 	if (errno == ENOMEM)
 		__archive_errx(1, "No memory");
@@ -767,7 +801,7 @@ _archive_entry_symlink_l(struct archive_entry *entry,
 		*len = 0;
 		return (0);
 	}
-	return (archive_mstring_get_mbs_l(entry->archive, &entry->ae_symlink, p, len, sc));
+	return (archive_mstring_get_mbs_l(entry->archive, &entry->ae_linkname, p, len, sc));
 }
 
 la_int64_t
@@ -864,10 +898,17 @@ archive_entry_set_fflags(struct archive_entry *entry,
 
 const char *
 archive_entry_copy_fflags_text(struct archive_entry *entry,
-    const char *flags)
+	const char *flags)
+{
+	return archive_entry_copy_fflags_text_len(entry, flags, strlen(flags));
+}
+
+const char *
+archive_entry_copy_fflags_text_len(struct archive_entry *entry,
+    const char *flags, size_t flags_length)
 {
-	archive_mstring_copy_mbs(&entry->ae_fflags_text, flags);
-	return (ae_strtofflags(flags,
+	archive_mstring_copy_mbs_len(&entry->ae_fflags_text, flags, flags_length);
+	return (ae_strtofflags(flags, flags_length,
 		    &entry->ae_fflags_set, &entry->ae_fflags_clear));
 }
 
@@ -949,17 +990,24 @@ archive_entry_set_ino64(struct archive_entry *entry, la_int64_t ino)
 void
 archive_entry_set_hardlink(struct archive_entry *entry, const char *target)
 {
-	archive_mstring_copy_mbs(&entry->ae_hardlink, target);
-	if (target != NULL)
-		entry->ae_set |= AE_SET_HARDLINK;
-	else
+	if (target == NULL) {
 		entry->ae_set &= ~AE_SET_HARDLINK;
+		if (entry->ae_set & AE_SET_SYMLINK) {
+			return;
+		}
+	} else {
+		entry->ae_set |= AE_SET_HARDLINK;
+	}
+	entry->ae_set &= ~AE_SET_SYMLINK;
+	archive_mstring_copy_mbs(&entry->ae_linkname, target);
 }
 
 void
 archive_entry_set_hardlink_utf8(struct archive_entry *entry, const char *target)
 {
-	archive_mstring_copy_utf8(&entry->ae_hardlink, target);
+	if (target == NULL && (entry->ae_set & AE_SET_SYMLINK))
+		return;
+	archive_mstring_copy_utf8(&entry->ae_linkname, target);
 	if (target != NULL)
 		entry->ae_set |= AE_SET_HARDLINK;
 	else
@@ -969,7 +1017,9 @@ archive_entry_set_hardlink_utf8(struct archive_entry *entry, const char *target)
 void
 archive_entry_copy_hardlink(struct archive_entry *entry, const char *target)
 {
-	archive_mstring_copy_mbs(&entry->ae_hardlink, target);
+	if (target == NULL && (entry->ae_set & AE_SET_SYMLINK))
+		return;
+	archive_mstring_copy_mbs(&entry->ae_linkname, target);
 	if (target != NULL)
 		entry->ae_set |= AE_SET_HARDLINK;
 	else
@@ -979,7 +1029,9 @@ archive_entry_copy_hardlink(struct archive_entry *entry, const char *target)
 void
 archive_entry_copy_hardlink_w(struct archive_entry *entry, const wchar_t *target)
 {
-	archive_mstring_copy_wcs(&entry->ae_hardlink, target);
+	if (target == NULL && (entry->ae_set & AE_SET_SYMLINK))
+		return;
+	archive_mstring_copy_wcs(&entry->ae_linkname, target);
 	if (target != NULL)
 		entry->ae_set |= AE_SET_HARDLINK;
 	else
@@ -989,12 +1041,14 @@ archive_entry_copy_hardlink_w(struct archive_entry *entry, const wchar_t *target
 int
 archive_entry_update_hardlink_utf8(struct archive_entry *entry, const char *target)
 {
+	if (target == NULL && (entry->ae_set & AE_SET_SYMLINK))
+		return (0);
 	if (target != NULL)
 		entry->ae_set |= AE_SET_HARDLINK;
 	else
 		entry->ae_set &= ~AE_SET_HARDLINK;
 	if (archive_mstring_update_utf8(entry->archive,
-	    &entry->ae_hardlink, target) == 0)
+	    &entry->ae_linkname, target) == 0)
 		return (1);
 	if (errno == ENOMEM)
 		__archive_errx(1, "No memory");
@@ -1007,7 +1061,9 @@ _archive_entry_copy_hardlink_l(struct archive_entry *entry,
 {
 	int r;
 
-	r = archive_mstring_copy_mbs_len_l(&entry->ae_hardlink,
+	if (target == NULL && (entry->ae_set & AE_SET_SYMLINK))
+		return (0);
+	r = archive_mstring_copy_mbs_len_l(&entry->ae_linkname,
 	    target, len, sc);
 	if (target != NULL && r == 0)
 		entry->ae_set |= AE_SET_HARDLINK;
@@ -1098,51 +1154,50 @@ archive_entry_set_devminor(struct archive_entry *entry, dev_t m)
 void
 archive_entry_set_link(struct archive_entry *entry, const char *target)
 {
-	if (entry->ae_set & AE_SET_SYMLINK)
-		archive_mstring_copy_mbs(&entry->ae_symlink, target);
-	else
-		archive_mstring_copy_mbs(&entry->ae_hardlink, target);
+	archive_mstring_copy_mbs(&entry->ae_linkname, target);
+	if ((entry->ae_set & AE_SET_SYMLINK) == 0) {
+		entry->ae_set |= AE_SET_HARDLINK;
+	}
 }
 
 void
 archive_entry_set_link_utf8(struct archive_entry *entry, const char *target)
 {
-	if (entry->ae_set & AE_SET_SYMLINK)
-		archive_mstring_copy_utf8(&entry->ae_symlink, target);
-	else
-		archive_mstring_copy_utf8(&entry->ae_hardlink, target);
+	archive_mstring_copy_utf8(&entry->ae_linkname, target);
+	if ((entry->ae_set & AE_SET_SYMLINK) == 0) {
+		entry->ae_set |= AE_SET_HARDLINK;
+	}
 }
 
 /* Set symlink if symlink is already set, else set hardlink. */
 void
 archive_entry_copy_link(struct archive_entry *entry, const char *target)
 {
-	if (entry->ae_set & AE_SET_SYMLINK)
-		archive_mstring_copy_mbs(&entry->ae_symlink, target);
-	else
-		archive_mstring_copy_mbs(&entry->ae_hardlink, target);
+	archive_mstring_copy_mbs(&entry->ae_linkname, target);
+	if ((entry->ae_set & AE_SET_SYMLINK) == 0) {
+		entry->ae_set |= AE_SET_HARDLINK;
+	}
 }
 
 /* Set symlink if symlink is already set, else set hardlink. */
 void
 archive_entry_copy_link_w(struct archive_entry *entry, const wchar_t *target)
 {
-	if (entry->ae_set & AE_SET_SYMLINK)
-		archive_mstring_copy_wcs(&entry->ae_symlink, target);
-	else
-		archive_mstring_copy_wcs(&entry->ae_hardlink, target);
+	archive_mstring_copy_wcs(&entry->ae_linkname, target);
+	if ((entry->ae_set & AE_SET_SYMLINK) == 0) {
+		entry->ae_set |= AE_SET_HARDLINK;
+	}
 }
 
 int
 archive_entry_update_link_utf8(struct archive_entry *entry, const char *target)
 {
 	int r;
-	if (entry->ae_set & AE_SET_SYMLINK)
-		r = archive_mstring_update_utf8(entry->archive,
-		    &entry->ae_symlink, target);
-	else
-		r = archive_mstring_update_utf8(entry->archive,
-		    &entry->ae_hardlink, target);
+	r = archive_mstring_update_utf8(entry->archive,
+		    &entry->ae_linkname, target);
+	if ((entry->ae_set & AE_SET_SYMLINK) == 0) {
+		entry->ae_set |= AE_SET_HARDLINK;
+	}
 	if (r == 0)
 		return (1);
 	if (errno == ENOMEM)
@@ -1156,12 +1211,11 @@ _archive_entry_copy_link_l(struct archive_entry *entry,
 {
 	int r;
 
-	if (entry->ae_set & AE_SET_SYMLINK)
-		r = archive_mstring_copy_mbs_len_l(&entry->ae_symlink,
-		    target, len, sc);
-	else
-		r = archive_mstring_copy_mbs_len_l(&entry->ae_hardlink,
+	r = archive_mstring_copy_mbs_len_l(&entry->ae_linkname,
 		    target, len, sc);
+	if ((entry->ae_set & AE_SET_SYMLINK) == 0) {
+		entry->ae_set |= AE_SET_HARDLINK;
+	}
 	return (r);
 }
 
@@ -1255,6 +1309,9 @@ archive_entry_set_rdev(struct archive_entry *entry, dev_t m)
 	entry->stat_valid = 0;
 	entry->ae_stat.aest_rdev = m;
 	entry->ae_stat.aest_rdev_is_broken_down = 0;
+	entry->ae_stat.aest_rdevmajor = 0;
+	entry->ae_stat.aest_rdevminor = 0;
+	entry->ae_set |= AE_SET_RDEV;
 }
 
 void
@@ -1262,7 +1319,9 @@ archive_entry_set_rdevmajor(struct archive_entry *entry, dev_t m)
 {
 	entry->stat_valid = 0;
 	entry->ae_stat.aest_rdev_is_broken_down = 1;
+	entry->ae_stat.aest_rdev = 0;
 	entry->ae_stat.aest_rdevmajor = m;
+	entry->ae_set |= AE_SET_RDEV;
 }
 
 void
@@ -1270,7 +1329,9 @@ archive_entry_set_rdevminor(struct archive_entry *entry, dev_t m)
 {
 	entry->stat_valid = 0;
 	entry->ae_stat.aest_rdev_is_broken_down = 1;
+	entry->ae_stat.aest_rdev = 0;
 	entry->ae_stat.aest_rdevminor = m;
+	entry->ae_set |= AE_SET_RDEV;
 }
 
 void
@@ -1303,11 +1364,14 @@ archive_entry_copy_sourcepath_w(struct archive_entry *entry, const wchar_t *path
 void
 archive_entry_set_symlink(struct archive_entry *entry, const char *linkname)
 {
-	archive_mstring_copy_mbs(&entry->ae_symlink, linkname);
-	if (linkname != NULL)
-		entry->ae_set |= AE_SET_SYMLINK;
-	else
+	if (linkname == NULL && (entry->ae_set & AE_SET_HARDLINK))
+		return;
+	archive_mstring_copy_mbs(&entry->ae_linkname, linkname);
+	entry->ae_set &= ~AE_SET_HARDLINK;
+	if (linkname == NULL)
 		entry->ae_set &= ~AE_SET_SYMLINK;
+	else
+		entry->ae_set |= AE_SET_SYMLINK;
 }
 
 void
@@ -1319,42 +1383,54 @@ archive_entry_set_symlink_type(struct archive_entry *entry, int type)
 void
 archive_entry_set_symlink_utf8(struct archive_entry *entry, const char *linkname)
 {
-	archive_mstring_copy_utf8(&entry->ae_symlink, linkname);
-	if (linkname != NULL)
-		entry->ae_set |= AE_SET_SYMLINK;
-	else
+	if (linkname == NULL && (entry->ae_set & AE_SET_HARDLINK))
+		return;
+	archive_mstring_copy_utf8(&entry->ae_linkname, linkname);
+	entry->ae_set &= ~AE_SET_HARDLINK;
+	if (linkname == NULL)
 		entry->ae_set &= ~AE_SET_SYMLINK;
+	else
+		entry->ae_set |= AE_SET_SYMLINK;
 }
 
 void
 archive_entry_copy_symlink(struct archive_entry *entry, const char *linkname)
 {
-	archive_mstring_copy_mbs(&entry->ae_symlink, linkname);
-	if (linkname != NULL)
-		entry->ae_set |= AE_SET_SYMLINK;
-	else
+	if (linkname == NULL && (entry->ae_set & AE_SET_HARDLINK))
+		return;
+	archive_mstring_copy_mbs(&entry->ae_linkname, linkname);
+	entry->ae_set &= ~AE_SET_HARDLINK;
+	if (linkname == NULL)
 		entry->ae_set &= ~AE_SET_SYMLINK;
+	else
+		entry->ae_set |= AE_SET_SYMLINK;
 }
 
 void
 archive_entry_copy_symlink_w(struct archive_entry *entry, const wchar_t *linkname)
 {
-	archive_mstring_copy_wcs(&entry->ae_symlink, linkname);
-	if (linkname != NULL)
-		entry->ae_set |= AE_SET_SYMLINK;
-	else
+	if (linkname == NULL && (entry->ae_set & AE_SET_HARDLINK))
+		return;
+	archive_mstring_copy_wcs(&entry->ae_linkname, linkname);
+	entry->ae_set &= ~AE_SET_HARDLINK;
+	if (linkname == NULL)
 		entry->ae_set &= ~AE_SET_SYMLINK;
+	else
+		entry->ae_set |= AE_SET_SYMLINK;
 }
 
 int
 archive_entry_update_symlink_utf8(struct archive_entry *entry, const char *linkname)
 {
-	if (linkname != NULL)
-		entry->ae_set |= AE_SET_SYMLINK;
-	else
+	if (linkname == NULL && (entry->ae_set & AE_SET_HARDLINK))
+		return (0);
+	entry->ae_set &= ~AE_SET_HARDLINK;
+	if (linkname == NULL)
 		entry->ae_set &= ~AE_SET_SYMLINK;
+	else
+		entry->ae_set |= AE_SET_SYMLINK;
 	if (archive_mstring_update_utf8(entry->archive,
-	    &entry->ae_symlink, linkname) == 0)
+	    &entry->ae_linkname, linkname) == 0)
 		return (1);
 	if (errno == ENOMEM)
 		__archive_errx(1, "No memory");
@@ -1367,12 +1443,15 @@ _archive_entry_copy_symlink_l(struct archive_entry *entry,
 {
 	int r;
 
-	r = archive_mstring_copy_mbs_len_l(&entry->ae_symlink,
+	if (linkname == NULL && (entry->ae_set & AE_SET_HARDLINK))
+		return (0);
+	entry->ae_set &= ~AE_SET_HARDLINK;
+	r = archive_mstring_copy_mbs_len_l(&entry->ae_linkname,
 	    linkname, len, sc);
-	if (linkname != NULL && r == 0)
-		entry->ae_set |= AE_SET_SYMLINK;
-	else
+	if (linkname == NULL || r != 0)
 		entry->ae_set &= ~AE_SET_SYMLINK;
+	else
+		entry->ae_set |= AE_SET_SYMLINK;
 	return (r);
 }
 
@@ -2031,7 +2110,7 @@ ae_fflagstostr(unsigned long bitset, unsigned long bitclear)
  *	provided string.
  */
 static const char *
-ae_strtofflags(const char *s, unsigned long *setp, unsigned long *clrp)
+ae_strtofflags(const char *s, size_t l, unsigned long *setp, unsigned long *clrp)
 {
 	const char *start, *end;
 	const struct flag *flag;
@@ -2042,15 +2121,19 @@ ae_strtofflags(const char *s, unsigned long *setp, unsigned long *clrp)
 	start = s;
 	failed = NULL;
 	/* Find start of first token. */
-	while (*start == '\t'  ||  *start == ' '  ||  *start == ',')
+	while (l > 0 && (*start == '\t'  ||  *start == ' '  ||  *start == ',')) {
 		start++;
-	while (*start != '\0') {
+		l--;
+	}
+	while (l > 0) {
 		size_t length;
 		/* Locate end of token. */
 		end = start;
-		while (*end != '\0'  &&  *end != '\t'  &&
-		    *end != ' '  &&  *end != ',')
+		while (l > 0 && *end != '\t'  &&
+		    *end != ' '  &&  *end != ',') {
 			end++;
+			l--;
+		}
 		length = end - start;
 		for (flag = fileflags; flag->name != NULL; flag++) {
 			size_t flag_length = strlen(flag->name);
@@ -2074,8 +2157,10 @@ ae_strtofflags(const char *s, unsigned long *setp, unsigned long *clrp)
 
 		/* Find start of next token. */
 		start = end;
-		while (*start == '\t'  ||  *start == ' '  ||  *start == ',')
+		while (l > 0 && (*start == '\t'  ||  *start == ' '  ||  *start == ',')) {
 			start++;
+			l--;
+		}
 
 	}
 
diff --git a/libarchive/archive_entry.h b/libarchive/archive_entry.h
index 3b41a0f8e3..b51f34e42b 100644
--- a/libarchive/archive_entry.h
+++ b/libarchive/archive_entry.h
@@ -259,6 +259,7 @@ __LA_DECL int		 archive_entry_gid_is_set(struct archive_entry *);
 __LA_DECL const char	*archive_entry_gname(struct archive_entry *);
 __LA_DECL const char	*archive_entry_gname_utf8(struct archive_entry *);
 __LA_DECL const wchar_t	*archive_entry_gname_w(struct archive_entry *);
+__LA_DECL void		 archive_entry_set_link_to_hardlink(struct archive_entry *);
 __LA_DECL const char	*archive_entry_hardlink(struct archive_entry *);
 __LA_DECL const char	*archive_entry_hardlink_utf8(struct archive_entry *);
 __LA_DECL const wchar_t	*archive_entry_hardlink_w(struct archive_entry *);
@@ -275,6 +276,7 @@ __LA_DECL const char	*archive_entry_pathname_utf8(struct archive_entry *);
 __LA_DECL const wchar_t	*archive_entry_pathname_w(struct archive_entry *);
 __LA_DECL __LA_MODE_T	 archive_entry_perm(struct archive_entry *);
 __LA_DECL int		 archive_entry_perm_is_set(struct archive_entry *);
+__LA_DECL int		 archive_entry_rdev_is_set(struct archive_entry *);
 __LA_DECL dev_t		 archive_entry_rdev(struct archive_entry *);
 __LA_DECL dev_t		 archive_entry_rdevmajor(struct archive_entry *);
 __LA_DECL dev_t		 archive_entry_rdevminor(struct archive_entry *);
@@ -283,6 +285,7 @@ __LA_DECL const wchar_t	*archive_entry_sourcepath_w(struct archive_entry *);
 __LA_DECL la_int64_t	 archive_entry_size(struct archive_entry *);
 __LA_DECL int		 archive_entry_size_is_set(struct archive_entry *);
 __LA_DECL const char	*archive_entry_strmode(struct archive_entry *);
+__LA_DECL void		 archive_entry_set_link_to_symlink(struct archive_entry *);
 __LA_DECL const char	*archive_entry_symlink(struct archive_entry *);
 __LA_DECL const char	*archive_entry_symlink_utf8(struct archive_entry *);
 __LA_DECL int		 archive_entry_symlink_type(struct archive_entry *);
@@ -324,6 +327,8 @@ __LA_DECL void	archive_entry_set_fflags(struct archive_entry *,
 /* Note that all recognized tokens are processed, regardless. */
 __LA_DECL const char *archive_entry_copy_fflags_text(struct archive_entry *,
 	    const char *);
+__LA_DECL const char *archive_entry_copy_fflags_text_len(struct archive_entry *,
+	    const char *, size_t);
 __LA_DECL const wchar_t *archive_entry_copy_fflags_text_w(struct archive_entry *,
 	    const wchar_t *);
 __LA_DECL void	archive_entry_set_gid(struct archive_entry *, la_int64_t);
diff --git a/libarchive/archive_entry_private.h b/libarchive/archive_entry_private.h
index 3423966c60..15f2a8ee28 100644
--- a/libarchive/archive_entry_private.h
+++ b/libarchive/archive_entry_private.h
@@ -149,6 +149,7 @@ struct archive_entry {
 #define	AE_SET_FILETYPE	1024
 #define	AE_SET_UID	2048
 #define	AE_SET_GID	4096
+#define	AE_SET_RDEV	8192
 
 	/*
 	 * Use aes here so that we get transparent mbs<->wcs conversions.
@@ -157,9 +158,8 @@ struct archive_entry {
 	unsigned long ae_fflags_set;		/* Bitmap fflags */
 	unsigned long ae_fflags_clear;
 	struct archive_mstring ae_gname;		/* Name of owning group */
-	struct archive_mstring ae_hardlink;	/* Name of target for hardlink */
+	struct archive_mstring ae_linkname;	/* Name of target for hardlink or symlink */
 	struct archive_mstring ae_pathname;	/* Name of entry */
-	struct archive_mstring ae_symlink;		/* symlink contents */
 	struct archive_mstring ae_uname;		/* Name of owner */
 
 	/* Not used within libarchive; useful for some clients. */
diff --git a/libarchive/archive_read_support_format_tar.c b/libarchive/archive_read_support_format_tar.c
index e5058ee82d..3b7bd8556c 100644
--- a/libarchive/archive_read_support_format_tar.c
+++ b/libarchive/archive_read_support_format_tar.c
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2003-2007 Tim Kientzle
+ * Copyright (c) 2003-2023 Tim Kientzle
  * Copyright (c) 2011-2012 Michihiro NAKAJIMA
  * Copyright (c) 2016 Martin Matuska
  * All rights reserved.
@@ -117,33 +117,29 @@ struct sparse_block {
 };
 
 struct tar {
-	struct archive_string	 acl_text;
 	struct archive_string	 entry_pathname;
 	/* For "GNU.sparse.name" and other similar path extensions. */
 	struct archive_string	 entry_pathname_override;
-	struct archive_string	 entry_linkpath;
 	struct archive_string	 entry_uname;
 	struct archive_string	 entry_gname;
-	struct archive_string	 longlink;
+	struct archive_string	 entry_linkpath;
 	struct archive_string	 longname;
-	struct archive_string	 pax_header;
 	struct archive_string	 pax_global;
 	struct archive_string	 line;
-	int			 pax_hdrcharset_binary;
-	int			 header_recursion_depth;
+	int			 pax_hdrcharset_utf8;
 	int64_t			 entry_bytes_remaining;
 	int64_t			 entry_offset;
 	int64_t			 entry_padding;
 	int64_t 		 entry_bytes_unconsumed;
 	int64_t			 realsize;
-	int			 sparse_allowed;
 	struct sparse_block	*sparse_list;
 	struct sparse_block	*sparse_last;
 	int64_t			 sparse_offset;
 	int64_t			 sparse_numbytes;
 	int			 sparse_gnu_major;
 	int			 sparse_gnu_minor;
-	char			 sparse_gnu_pending;
+	char			 sparse_gnu_attributes_seen;
+	char			 filetype;
 
 	struct archive_string	 localname;
 	struct archive_string_conv *opt_sconv;
@@ -168,25 +164,26 @@ static int	gnu_sparse_old_read(struct archive_read *, struct tar *,
 static int	gnu_sparse_old_parse(struct archive_read *, struct tar *,
 		    const struct gnu_sparse *sparse, int length);
 static int	gnu_sparse_01_parse(struct archive_read *, struct tar *,
-		    const char *);
+		    const char *, size_t);
 static ssize_t	gnu_sparse_10_read(struct archive_read *, struct tar *,
-			size_t *);
+		    size_t *);
 static int	header_Solaris_ACL(struct archive_read *,  struct tar *,
 		    struct archive_entry *, const void *, size_t *);
 static int	header_common(struct archive_read *,  struct tar *,
 		    struct archive_entry *, const void *);
 static int	header_old_tar(struct archive_read *, struct tar *,
 		    struct archive_entry *, const void *);
-static int	header_pax_extensions(struct archive_read *, struct tar *,
+static int	header_pax_extension(struct archive_read *, struct tar *,
 		    struct archive_entry *, const void *, size_t *);
 static int	header_pax_global(struct archive_read *, struct tar *,
 		    struct archive_entry *, const void *h, size_t *);
-static int	header_longlink(struct archive_read *, struct tar *,
-		    struct archive_entry *, const void *h, size_t *);
-static int	header_longname(struct archive_read *, struct tar *,
+static int	header_gnu_longlink(struct archive_read *, struct tar *,
 		    struct archive_entry *, const void *h, size_t *);
-static int	read_mac_metadata_blob(struct archive_read *, struct tar *,
+static int	header_gnu_longname(struct archive_read *, struct tar *,
 		    struct archive_entry *, const void *h, size_t *);
+static int	is_mac_metadata_entry(struct archive_entry *entry);
+static int	read_mac_metadata_blob(struct archive_read *,
+		    struct archive_entry *, size_t *);
 static int	header_volume(struct archive_read *, struct tar *,
 		    struct archive_entry *, const void *h, size_t *);
 static int	header_ustar(struct archive_read *, struct tar *,
@@ -204,21 +201,21 @@ static int	archive_read_format_tar_read_header(struct archive_read *,
 		    struct archive_entry *);
 static int	checksum(struct archive_read *, const void *);
 static int 	pax_attribute(struct archive_read *, struct tar *,
-		    struct archive_entry *, const char *key, const char *value,
-		    size_t value_length);
-static int	pax_attribute_acl(struct archive_read *, struct tar *,
-		    struct archive_entry *, const char *, int);
-static int	pax_attribute_xattr(struct archive_entry *, const char *,
-		    const char *);
-static int 	pax_header(struct archive_read *, struct tar *,
-		    struct archive_entry *, struct archive_string *);
-static void	pax_time(const char *, int64_t *sec, long *nanos);
+		    struct archive_entry *, const char *key, size_t key_length,
+		    size_t value_length, size_t *unconsumed);
+static int	pax_attribute_LIBARCHIVE_xattr(struct archive_entry *,
+		    const char *, size_t, const char *, size_t);
+static int	pax_attribute_SCHILY_acl(struct archive_read *, struct tar *,
+		    struct archive_entry *, size_t, int);
+static int	pax_attribute_SUN_holesdata(struct archive_read *, struct tar *,
+		    struct archive_entry *, const char *, size_t);
+static void	pax_time(const char *, size_t, int64_t *sec, long *nanos);
 static ssize_t	readline(struct archive_read *, struct tar *, const char **,
 		    ssize_t limit, size_t *);
 static int	read_body_to_string(struct archive_read *, struct tar *,
 		    struct archive_string *, const void *h, size_t *);
-static int	solaris_sparse_parse(struct archive_read *, struct tar *,
-		    struct archive_entry *, const char *);
+static int	read_bytes_to_string(struct archive_read *,
+		    struct archive_string *, size_t, size_t *);
 static int64_t	tar_atol(const char *, size_t);
 static int64_t	tar_atol10(const char *, size_t);
 static int64_t	tar_atol256(const char *, size_t);
@@ -226,9 +223,20 @@ static int64_t	tar_atol8(const char *, size_t);
 static int	tar_read_header(struct archive_read *, struct tar *,
 		    struct archive_entry *, size_t *);
 static int	tohex(int c);
-static char	*url_decode(const char *);
+static char	*url_decode(const char *, size_t);
 static void	tar_flush_unconsumed(struct archive_read *, size_t *);
 
+/* Sanity limits:  These numbers should be low enough to
+ * prevent a maliciously-crafted archive from forcing us to
+ * allocate extreme amounts of memory.  But of course, they
+ * need to be high enough for any correct value.  These
+ * will likely need some adjustment as we get more experience. */
+static const size_t guname_limit = 65536; /* Longest uname or gname: 64kiB */
+static const size_t pathname_limit = 1048576; /* Longest path name: 1MiB */
+static const size_t sparse_map_limit = 8 * 1048576; /* Longest sparse map: 8MiB */
+static const size_t xattr_limit = 16 * 1048576; /* Longest xattr: 16MiB */
+static const size_t fflags_limit = 512; /* Longest fflags */
+static const size_t acl_limit = 131072; /* Longest textual ACL: 128kiB */
 
 int
 archive_read_support_format_gnutar(struct archive *a)
@@ -283,17 +291,13 @@ archive_read_format_tar_cleanup(struct archive_read *a)
 
 	tar = (struct tar *)(a->format->data);
 	gnu_clear_sparse_list(tar);
-	archive_string_free(&tar->acl_text);
 	archive_string_free(&tar->entry_pathname);
 	archive_string_free(&tar->entry_pathname_override);
-	archive_string_free(&tar->entry_linkpath);
 	archive_string_free(&tar->entry_uname);
 	archive_string_free(&tar->entry_gname);
 	archive_string_free(&tar->line);
 	archive_string_free(&tar->pax_global);
-	archive_string_free(&tar->pax_header);
 	archive_string_free(&tar->longname);
-	archive_string_free(&tar->longlink);
 	archive_string_free(&tar->localname);
 	free(tar);
 	(a->format->data) = NULL;
@@ -505,6 +509,8 @@ archive_read_format_tar_read_header(struct archive_read *a,
 	 * probably not worthwhile just to support the relatively
 	 * obscure tar->cpio conversion case.
 	 */
+	/* TODO: Move this into `struct tar` to avoid conflicts
+	 * when reading multiple archives */
 	static int default_inode;
 	static int default_dev;
 	struct tar *tar;
@@ -627,7 +633,8 @@ archive_read_format_tar_read_data(struct archive_read *a,
 			return (ARCHIVE_FATAL);
 		if (*buff == NULL) {
 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
-			    "Truncated tar archive");
+			    "Truncated tar archive"
+			    " detected while reading data");
 			return (ARCHIVE_FATAL);
 		}
 		if (bytes_read > tar->entry_bytes_remaining)
@@ -688,7 +695,7 @@ archive_read_format_tar_skip(struct archive_read *a)
 }
 
 /*
- * This function recursively interprets all of the headers associated
+ * This function reads and interprets all of the headers associated
  * with a single entry.
  */
 static int
@@ -696,190 +703,259 @@ tar_read_header(struct archive_read *a, struct tar *tar,
     struct archive_entry *entry, size_t *unconsumed)
 {
 	ssize_t bytes;
-	int err, eof_vol_header;
+	int err = ARCHIVE_OK, err2;
+	int eof_fatal = 0; /* EOF is okay at some points... */
 	const char *h;
 	const struct archive_entry_header_ustar *header;
 	const struct archive_entry_header_gnutar *gnuheader;
 
-	eof_vol_header = 0;
-
-	/* Loop until we find a workable header record. */
-	for (;;) {
-		tar_flush_unconsumed(a, unconsumed);
+	/* Bitmask of what header types we've seen. */
+	int32_t seen_headers = 0;
+	static const int32_t seen_A_header = 1;
+	static const int32_t seen_g_header = 2;
+	static const int32_t seen_K_header = 4;
+	static const int32_t seen_L_header = 8;
+	static const int32_t seen_V_header = 16;
+	static const int32_t seen_x_header = 32; /* Also X */
+	static const int32_t seen_mac_metadata = 512;
+
+	tar->pax_hdrcharset_utf8 = 1;
+	tar->sparse_gnu_attributes_seen = 0;
+	archive_string_empty(&(tar->entry_gname));
+	archive_string_empty(&(tar->entry_pathname));
+	archive_string_empty(&(tar->entry_pathname_override));
+	archive_string_empty(&(tar->entry_uname));
 
-		/* Read 512-byte header record */
-		h = __archive_read_ahead(a, 512, &bytes);
-		if (bytes < 0)
-			return ((int)bytes);
-		if (bytes == 0) { /* EOF at a block boundary. */
-			/* Some writers do omit the block of nulls. <sigh> */
-			return (ARCHIVE_EOF);
-		}
-		if (bytes < 512) {  /* Short block at EOF; this is bad. */
-			archive_set_error(&a->archive,
-			    ARCHIVE_ERRNO_FILE_FORMAT,
-			    "Truncated tar archive");
-			return (ARCHIVE_FATAL);
-		}
-		*unconsumed = 512;
+	/* Ensure format is set. */
+	if (a->archive.archive_format_name == NULL) {
+		a->archive.archive_format = ARCHIVE_FORMAT_TAR;
+		a->archive.archive_format_name = "tar";
+	}
 
-		/* Header is workable if it's not an end-of-archive mark. */
-		if (h[0] != 0 || !archive_block_is_null(h))
-			break;
+	/*
+	 * TODO: Write global/default pax options into
+	 * 'entry' struct here before overwriting with
+	 * file-specific options.
+	 */
 
-		/* Ensure format is set for archives with only null blocks. */
-		if (a->archive.archive_format_name == NULL) {
-			a->archive.archive_format = ARCHIVE_FORMAT_TAR;
-			a->archive.archive_format_name = "tar";
-		}
+	/* Loop over all the headers needed for the next entry */
+	for (;;) {
 
-		if (!tar->read_concatenated_archives) {
-			/* Try to consume a second all-null record, as well. */
+		/* Find the next valid header record. */
+		while (1) {
 			tar_flush_unconsumed(a, unconsumed);
-			h = __archive_read_ahead(a, 512, NULL);
-			if (h != NULL && h[0] == 0 && archive_block_is_null(h))
-				__archive_read_consume(a, 512);
-			archive_clear_error(&a->archive);
-			return (ARCHIVE_EOF);
-		}
 
-		/*
-		 * We're reading concatenated archives, ignore this block and
-		 * loop to get the next.
-		 */
-	}
+			/* Read 512-byte header record */
+			h = __archive_read_ahead(a, 512, &bytes);
+			if (bytes < 0)
+				return ((int)bytes);
+			if (bytes == 0) { /* EOF at a block boundary. */
+				if (eof_fatal) {
+					/* We've read a special header already;
+					 * if there's no regular header, then this is
+					 * a premature EOF. */
+					archive_set_error(&a->archive, EINVAL,
+							  "Damaged tar archive");
+					return (ARCHIVE_FATAL);
+				} else {
+					return (ARCHIVE_EOF);
+				}
+			}
+			if (bytes < 512) {  /* Short block at EOF; this is bad. */
+				archive_set_error(&a->archive,
+				    ARCHIVE_ERRNO_FILE_FORMAT,
+				    "Truncated tar archive"
+				    " detected while reading next heaader");
+				return (ARCHIVE_FATAL);
+			}
+			*unconsumed += 512;
 
-	/*
-	 * Note: If the checksum fails and we return ARCHIVE_RETRY,
-	 * then the client is likely to just retry.  This is a very
-	 * crude way to search for the next valid header!
-	 *
-	 * TODO: Improve this by implementing a real header scan.
-	 */
-	if (!checksum(a, h)) {
-		tar_flush_unconsumed(a, unconsumed);
-		archive_set_error(&a->archive, EINVAL, "Damaged tar archive");
-		return (ARCHIVE_RETRY); /* Retryable: Invalid header */
-	}
+			if (h[0] == 0 && archive_block_is_null(h)) {
+				/* We found a NULL block which indicates end-of-archive */
 
-	if (++tar->header_recursion_depth > 32) {
-		tar_flush_unconsumed(a, unconsumed);
-		archive_set_error(&a->archive, EINVAL, "Too many special headers");
-		return (ARCHIVE_WARN);
-	}
+				if (tar->read_concatenated_archives) {
+					/* We're ignoring NULL blocks, so keep going. */
+					continue;
+				}
 
-	/* Determine the format variant. */
-	header = (const struct archive_entry_header_ustar *)h;
+				/* Try to consume a second all-null record, as well. */
+				/* If we can't, that's okay. */
+				tar_flush_unconsumed(a, unconsumed);
+				h = __archive_read_ahead(a, 512, NULL);
+				if (h != NULL && h[0] == 0 && archive_block_is_null(h))
+						__archive_read_consume(a, 512);
 
-	switch(header->typeflag[0]) {
-	case 'A': /* Solaris tar ACL */
-		a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
-		a->archive.archive_format_name = "Solaris tar";
-		err = header_Solaris_ACL(a, tar, entry, h, unconsumed);
-		break;
-	case 'g': /* POSIX-standard 'g' header. */
-		a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
-		a->archive.archive_format_name = "POSIX pax interchange format";
-		err = header_pax_global(a, tar, entry, h, unconsumed);
-		if (err == ARCHIVE_EOF)
-			return (err);
-		break;
-	case 'K': /* Long link name (GNU tar, others) */
-		err = header_longlink(a, tar, entry, h, unconsumed);
-		break;
-	case 'L': /* Long filename (GNU tar, others) */
-		err = header_longname(a, tar, entry, h, unconsumed);
-		break;
-	case 'V': /* GNU volume header */
-		err = header_volume(a, tar, entry, h, unconsumed);
-		if (err == ARCHIVE_EOF)
-			eof_vol_header = 1;
-		break;
-	case 'X': /* Used by SUN tar; same as 'x'. */
-		a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
-		a->archive.archive_format_name =
-		    "POSIX pax interchange format (Sun variant)";
-		err = header_pax_extensions(a, tar, entry, h, unconsumed);
-		break;
-	case 'x': /* POSIX-standard 'x' header. */
-		a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
-		a->archive.archive_format_name = "POSIX pax interchange format";
-		err = header_pax_extensions(a, tar, entry, h, unconsumed);
-		break;
-	default:
-		gnuheader = (const struct archive_entry_header_gnutar *)h;
-		if (memcmp(gnuheader->magic, "ustar  \0", 8) == 0) {
-			a->archive.archive_format = ARCHIVE_FORMAT_TAR_GNUTAR;
-			a->archive.archive_format_name = "GNU tar format";
-			err = header_gnutar(a, tar, entry, h, unconsumed);
-		} else if (memcmp(header->magic, "ustar", 5) == 0) {
-			if (a->archive.archive_format != ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) {
-				a->archive.archive_format = ARCHIVE_FORMAT_TAR_USTAR;
-				a->archive.archive_format_name = "POSIX ustar format";
+				archive_clear_error(&a->archive);
+				return (ARCHIVE_EOF);
 			}
-			err = header_ustar(a, tar, entry, h);
-		} else {
-			a->archive.archive_format = ARCHIVE_FORMAT_TAR;
-			a->archive.archive_format_name = "tar (non-POSIX)";
-			err = header_old_tar(a, tar, entry, h);
-		}
-	}
-	if (err == ARCHIVE_FATAL)
-		return (err);
-
-	tar_flush_unconsumed(a, unconsumed);
 
-	h = NULL;
-	header = NULL;
+			/* This is NOT a null block, so it must be a valid header. */
+			if (!checksum(a, h)) {
+				tar_flush_unconsumed(a, unconsumed);
+				archive_set_error(&a->archive, EINVAL, "Damaged tar archive");
+				/* If we've read some critical information (pax headers, etc)
+				 * and _then_ see a bad header, we can't really recover. */
+				if (eof_fatal) {
+					return (ARCHIVE_FATAL);
+				} else {
+					return (ARCHIVE_RETRY);
+				}
+			}
+			break;
+		}
 
-	--tar->header_recursion_depth;
-	/* Yuck.  Apple's design here ends up storing long pathname
-	 * extensions for both the AppleDouble extension entry and the
-	 * regular entry.
-	 */
-	if ((err == ARCHIVE_WARN || err == ARCHIVE_OK) &&
-	    tar->header_recursion_depth == 0 &&
-	    tar->process_mac_extensions) {
-		int err2 = read_mac_metadata_blob(a, tar, entry, h, unconsumed);
-		if (err2 < err)
-			err = err2;
-	}
-
-	/* We return warnings or success as-is.  Anything else is fatal. */
-	if (err == ARCHIVE_WARN || err == ARCHIVE_OK) {
-		if (tar->sparse_gnu_pending) {
-			if (tar->sparse_gnu_major == 1 &&
-			    tar->sparse_gnu_minor == 0) {
-				ssize_t bytes_read;
-
-				tar->sparse_gnu_pending = 0;
-				/* Read initial sparse map. */
-				bytes_read = gnu_sparse_10_read(a, tar, unconsumed);
-				if (bytes_read < 0)
-					return ((int)bytes_read);
-				tar->entry_bytes_remaining -= bytes_read;
+		/* Determine the format variant. */
+		header = (const struct archive_entry_header_ustar *)h;
+		switch(header->typeflag[0]) {
+		case 'A': /* Solaris tar ACL */
+			if (seen_headers & seen_A_header) {
+				return (ARCHIVE_FATAL);
+			}
+			seen_headers |= seen_A_header;
+			a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
+			a->archive.archive_format_name = "Solaris tar";
+			err2 = header_Solaris_ACL(a, tar, entry, h, unconsumed);
+			break;
+		case 'g': /* POSIX-standard 'g' header. */
+			if (seen_headers & seen_g_header) {
+				return (ARCHIVE_FATAL);
+			}
+			seen_headers |= seen_g_header;
+			a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
+			a->archive.archive_format_name = "POSIX pax interchange format";
+			err2 = header_pax_global(a, tar, entry, h, unconsumed);
+			break;
+		case 'K': /* Long link name (GNU tar, others) */
+			if (seen_headers & seen_K_header) {
+				return (ARCHIVE_FATAL);
+			}
+			seen_headers |= seen_K_header;
+			err2 = header_gnu_longlink(a, tar, entry, h, unconsumed);
+			break;
+		case 'L': /* Long filename (GNU tar, others) */
+			if (seen_headers & seen_L_header) {
+				return (ARCHIVE_FATAL);
+			}
+			seen_headers |= seen_L_header;
+			err2 = header_gnu_longname(a, tar, entry, h, unconsumed);
+			break;
+		case 'V': /* GNU volume header */
+			if (seen_headers & seen_V_header) {
+				return (ARCHIVE_FATAL);
+			}
+			seen_headers |= seen_V_header;
+			err2 = header_volume(a, tar, entry, h, unconsumed);
+			break;
+		case 'X': /* Used by SUN tar; same as 'x'. */
+			if (seen_headers & seen_x_header) {
+				return (ARCHIVE_FATAL);
+			}
+			seen_headers |= seen_x_header;
+			a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
+			a->archive.archive_format_name =
+				"POSIX pax interchange format (Sun variant)";
+			err2 = header_pax_extension(a, tar, entry, h, unconsumed);
+			break;
+		case 'x': /* POSIX-standard 'x' header. */
+			if (seen_headers & seen_x_header) {
+				return (ARCHIVE_FATAL);
+			}
+			seen_headers |= seen_x_header;
+			a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
+			a->archive.archive_format_name = "POSIX pax interchange format";
+			err2 = header_pax_extension(a, tar, entry, h, unconsumed);
+			break;
+		default: /* Regular header: Legacy tar, GNU tar, or ustar */
+			gnuheader = (const struct archive_entry_header_gnutar *)h;
+			if (memcmp(gnuheader->magic, "ustar  \0", 8) == 0) {
+				a->archive.archive_format = ARCHIVE_FORMAT_TAR_GNUTAR;
+				a->archive.archive_format_name = "GNU tar format";
+				err2 = header_gnutar(a, tar, entry, h, unconsumed);
+			} else if (memcmp(header->magic, "ustar", 5) == 0) {
+				if (a->archive.archive_format != ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) {
+					a->archive.archive_format = ARCHIVE_FORMAT_TAR_USTAR;
+					a->archive.archive_format_name = "POSIX ustar format";
+				}
+				err2 = header_ustar(a, tar, entry, h);
 			} else {
-				archive_set_error(&a->archive,
-				    ARCHIVE_ERRNO_MISC,
-				    "Unrecognized GNU sparse file format");
-				return (ARCHIVE_WARN);
+				a->archive.archive_format = ARCHIVE_FORMAT_TAR;
+				a->archive.archive_format_name = "tar (non-POSIX)";
+				err2 = header_old_tar(a, tar, entry, h);
+			}
+			err = err_combine(err, err2);
+			/* We return warnings or success as-is.  Anything else is fatal. */
+			if (err < ARCHIVE_WARN) {
+				return (ARCHIVE_FATAL);
+			}
+			/* Filename of the form `._filename` is an AppleDouble
+			 * extension entry.  The body is the macOS metadata blob;
+			 * this is followed by another entry with the actual
+			 * regular file data.
+			 * This design has two drawbacks:
+			 * = it's brittle; you might just have a file with such a name
+			 * = it duplicates any long pathname extensions
+			 *
+			 * TODO: This probably shouldn't be here at all.  Consider
+			 * just returning the contents as a regular entry here and
+			 * then dealing with it when we write data to disk.
+			 */
+			if (tar->process_mac_extensions
+			    && ((seen_headers & seen_mac_metadata) == 0)
+			    && is_mac_metadata_entry(entry)) {
+				err2 = read_mac_metadata_blob(a, entry, unconsumed);
+				if (err2 < ARCHIVE_WARN) {
+					return (ARCHIVE_FATAL);
+				}
+				err = err_combine(err, err2);
+				/* Note: Other headers can appear again. */
+				seen_headers = seen_mac_metadata;
+				break;
 			}
-			tar->sparse_gnu_pending = 0;
+
+			/* Reconcile GNU sparse attributes */
+			if (tar->sparse_gnu_attributes_seen) {
+				/* Only 'S' (GNU sparse) and ustar '0' regular files can be sparse */
+				if (tar->filetype != 'S' && tar->filetype != '0') {
+					archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
+							  "Non-regular file cannot be sparse");
+					return (ARCHIVE_WARN);
+				} else if (tar->sparse_gnu_major == 0 &&
+				    tar->sparse_gnu_minor == 0) {
+					/* Sparse map already parsed from 'x' header */
+				} else if (tar->sparse_gnu_major == 0 &&
+				    tar->sparse_gnu_minor == 1) {
+					/* Sparse map already parsed from 'x' header */
+				} else if (tar->sparse_gnu_major == 1 &&
+				    tar->sparse_gnu_minor == 0) {
+					/* Sparse map is prepended to file contents */
+					ssize_t bytes_read;
+					bytes_read = gnu_sparse_10_read(a, tar, unconsumed);
+					if (bytes_read < 0)
+						return ((int)bytes_read);
+					tar->entry_bytes_remaining -= bytes_read;
+				} else {
+					archive_set_error(&a->archive,
+							  ARCHIVE_ERRNO_MISC,
+							  "Unrecognized GNU sparse file format");
+					return (ARCHIVE_WARN);
+				}
+			}
+			return (err);
 		}
-		return (err);
-	}
-	if (err == ARCHIVE_EOF) {
-		if (!eof_vol_header) {
-			/* EOF when recursively reading a header is bad. */
-			archive_set_error(&a->archive, EINVAL,
-			    "Damaged tar archive");
-		} else {
-			/* If we encounter just a GNU volume header treat
-			 * this situation as an empty archive */
-			return (ARCHIVE_EOF);
+
+		/* We're between headers ... */
+		err = err_combine(err, err2);
+		if (err == ARCHIVE_FATAL)
+			return (err);
+
+		/* The GNU volume header and the pax `g` global header
+		 * are both allowed to be the only header in an
+		 * archive.  If we've seen any other header, a
+		 * following EOF is fatal. */
+		if ((seen_headers & ~seen_V_header & ~seen_g_header) != 0) {
+			eof_fatal = 1;
 		}
 	}
-	return (ARCHIVE_FATAL);
 }
 
 /*
@@ -959,6 +1035,7 @@ header_Solaris_ACL(struct archive_read *a, struct tar *tar,
     struct archive_entry *entry, const void *h, size_t *unconsumed)
 {
 	const struct archive_entry_header_ustar *header;
+	struct archive_string	 acl_text;
 	size_t size;
 	int err, acl_type;
 	int64_t type;
@@ -970,27 +1047,24 @@ header_Solaris_ACL(struct archive_read *a, struct tar *tar,
 	 */
 	header = (const struct archive_entry_header_ustar *)h;
 	size = (size_t)tar_atol(header->size, sizeof(header->size));
-	err = read_body_to_string(a, tar, &(tar->acl_text), h, unconsumed);
+	archive_string_init(&acl_text);
+	err = read_body_to_string(a, tar, &acl_text, h, unconsumed);
 	if (err != ARCHIVE_OK)
 		return (err);
 
-	/* Recursively read next header */
-	err = tar_read_header(a, tar, entry, unconsumed);
-	if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN))
-		return (err);
-
 	/* TODO: Examine the first characters to see if this
 	 * is an AIX ACL descriptor.  We'll likely never support
 	 * them, but it would be polite to recognize and warn when
 	 * we do see them. */
 
 	/* Leading octal number indicates ACL type and number of entries. */
-	p = acl = tar->acl_text.s;
+	p = acl = acl_text.s;
 	type = 0;
 	while (*p != '\0' && p < acl + size) {
 		if (*p < '0' || *p > '7') {
 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
 			    "Malformed Solaris ACL attribute (invalid digit)");
+			archive_string_free(&acl_text);
 			return(ARCHIVE_WARN);
 		}
 		type <<= 3;
@@ -998,6 +1072,7 @@ header_Solaris_ACL(struct archive_read *a, struct tar *tar,
 		if (type > 077777777) {
 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
 			    "Malformed Solaris ACL attribute (count too large)");
+			archive_string_free(&acl_text);
 			return (ARCHIVE_WARN);
 		}
 		p++;
@@ -1015,6 +1090,7 @@ header_Solaris_ACL(struct archive_read *a, struct tar *tar,
 		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
 		    "Malformed Solaris ACL attribute (unsupported type %o)",
 		    (int)type);
+		archive_string_free(&acl_text);
 		return (ARCHIVE_WARN);
 	}
 	p++;
@@ -1022,6 +1098,7 @@ header_Solaris_ACL(struct archive_read *a, struct tar *tar,
 	if (p >= acl + size) {
 		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
 		    "Malformed Solaris ACL attribute (body overflow)");
+		archive_string_free(&acl_text);
 		return(ARCHIVE_WARN);
 	}
 
@@ -1035,12 +1112,17 @@ header_Solaris_ACL(struct archive_read *a, struct tar *tar,
 	if (tar->sconv_acl == NULL) {
 		tar->sconv_acl = archive_string_conversion_from_charset(
 		    &(a->archive), "UTF-8", 1);
-		if (tar->sconv_acl == NULL)
+		if (tar->sconv_acl == NULL) {
+			archive_string_free(&acl_text);
 			return (ARCHIVE_FATAL);
+		}
 	}
 	archive_strncpy(&(tar->localname), acl, p - acl);
 	err = archive_acl_from_text_l(archive_entry_acl(entry),
 	    tar->localname.s, acl_type, tar->sconv_acl);
+	/* Workaround: Force perm_is_set() to be correct */
+	/* If this bit were stored in the ACL, this wouldn't be needed */
+	archive_entry_set_perm(entry, archive_entry_perm(entry));
 	if (err != ARCHIVE_OK) {
 		if (errno == ENOMEM) {
 			archive_set_error(&a->archive, ENOMEM,
@@ -1049,6 +1131,7 @@ header_Solaris_ACL(struct archive_read *a, struct tar *tar,
 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
 			    "Malformed Solaris ACL attribute (unparsable)");
 	}
+	archive_string_free(&acl_text);
 	return (err);
 }
 
@@ -1056,20 +1139,16 @@ header_Solaris_ACL(struct archive_read *a, struct tar *tar,
  * Interpret 'K' long linkname header.
  */
 static int
-header_longlink(struct archive_read *a, struct tar *tar,
+header_gnu_longlink(struct archive_read *a, struct tar *tar,
     struct archive_entry *entry, const void *h, size_t *unconsumed)
 {
 	int err;
 
-	err = read_body_to_string(a, tar, &(tar->longlink), h, unconsumed);
-	if (err != ARCHIVE_OK)
-		return (err);
-	err = tar_read_header(a, tar, entry, unconsumed);
-	if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN))
-		return (err);
-	/* Set symlink if symlink already set, else hardlink. */
-	archive_entry_copy_link(entry, tar->longlink.s);
-	return (ARCHIVE_OK);
+	struct archive_string linkpath;
+	archive_string_init(&linkpath);
+	err = read_body_to_string(a, tar, &linkpath, h, unconsumed);
+	archive_entry_set_link(entry, linkpath.s);
+	return (err);
 }
 
 static int
@@ -1091,7 +1170,7 @@ set_conversion_failed_error(struct archive_read *a,
  * Interpret 'L' long filename header.
  */
 static int
-header_longname(struct archive_read *a, struct tar *tar,
+header_gnu_longname(struct archive_read *a, struct tar *tar,
     struct archive_entry *entry, const void *h, size_t *unconsumed)
 {
 	int err;
@@ -1099,17 +1178,12 @@ header_longname(struct archive_read *a, struct tar *tar,
 	err = read_body_to_string(a, tar, &(tar->longname), h, unconsumed);
 	if (err != ARCHIVE_OK)
 		return (err);
-	/* Read and parse "real" header, then override name. */
-	err = tar_read_header(a, tar, entry, unconsumed);
-	if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN))
-		return (err);
 	if (archive_entry_copy_pathname_l(entry, tar->longname.s,
 	    archive_strlen(&(tar->longname)), tar->sconv) != 0)
 		err = set_conversion_failed_error(a, tar->sconv, "Pathname");
 	return (err);
 }
 
-
 /*
  * Interpret 'V' GNU tar volume header.
  */
@@ -1117,32 +1191,30 @@ static int
 header_volume(struct archive_read *a, struct tar *tar,
     struct archive_entry *entry, const void *h, size_t *unconsumed)
 {
-	(void)h;
+	const struct archive_entry_header_ustar *header;
+	int64_t size, to_consume;
+
+	(void)a; /* UNUSED */
+	(void)tar; /* UNUSED */
+	(void)entry; /* UNUSED */
 
-	/* Just skip this and read the next header. */
-	return (tar_read_header(a, tar, entry, unconsumed));
+	header = (const struct archive_entry_header_ustar *)h;
+	size = tar_atol(header->size, sizeof(header->size));
+	to_consume = ((size + 511) & ~511);
+	*unconsumed += to_consume;
+	return (ARCHIVE_OK);
 }
 
 /*
- * Read body of an archive entry into an archive_string object.
+ * Read the next `size` bytes into the provided string.
+ * Null-terminate the string.
  */
 static int
-read_body_to_string(struct archive_read *a, struct tar *tar,
-    struct archive_string *as, const void *h, size_t *unconsumed)
-{
-	int64_t size;
-	const struct archive_entry_header_ustar *header;
+read_bytes_to_string(struct archive_read *a,
+		     struct archive_string *as, size_t size,
+		     size_t *unconsumed) {
 	const void *src;
 
-	(void)tar; /* UNUSED */
-	header = (const struct archive_entry_header_ustar *)h;
-	size  = tar_atol(header->size, sizeof(header->size));
-	if ((size > 1048576) || (size < 0)) {
-		archive_set_error(&a->archive, EINVAL,
-		    "Special header too large");
-		return (ARCHIVE_FATAL);
-	}
-
 	/* Fail if we can't make our buffer big enough. */
 	if (archive_string_ensure(as, (size_t)size+1) == NULL) {
 		archive_set_error(&a->archive, ENOMEM,
@@ -1153,18 +1225,51 @@ read_body_to_string(struct archive_read *a, struct tar *tar,
 	tar_flush_unconsumed(a, unconsumed);
 
 	/* Read the body into the string. */
-	*unconsumed = (size_t)((size + 511) & ~ 511);
-	src = __archive_read_ahead(a, *unconsumed, NULL);
+	src = __archive_read_ahead(a, size, NULL);
 	if (src == NULL) {
+		archive_set_error(&a->archive, EINVAL,
+		    "Truncated archive"
+		    " detected while reading metadata");
 		*unconsumed = 0;
 		return (ARCHIVE_FATAL);
 	}
 	memcpy(as->s, src, (size_t)size);
 	as->s[size] = '\0';
 	as->length = (size_t)size;
+	*unconsumed += size;
 	return (ARCHIVE_OK);
 }
 
+/*
+ * Read body of an archive entry into an archive_string object.
+ */
+static int
+read_body_to_string(struct archive_read *a, struct tar *tar,
+    struct archive_string *as, const void *h, size_t *unconsumed)
+{
+	int64_t size;
+	const struct archive_entry_header_ustar *header;
+	int r;
+
+	(void)tar; /* UNUSED */
+	header = (const struct archive_entry_header_ustar *)h;
+	size  = tar_atol(header->size, sizeof(header->size));
+	if ((size > 1048576) || (size < 0)) {
+		archive_string_empty(as);
+		int64_t to_consume = ((size + 511) & ~511);
+		if (to_consume != __archive_read_consume(a, to_consume)) {
+			return (ARCHIVE_FATAL);
+		}
+		archive_set_error(&a->archive, EINVAL,
+		    "Special header too large: %d > 1MiB",
+		    (int)size);
+		return (ARCHIVE_WARN);
+	}
+	r = read_bytes_to_string(a, as, size, unconsumed);
+	*unconsumed += 0x1ff & (-size);
+	return(r);
+}
+
 /*
  * Parse out common header elements.
  *
@@ -1180,21 +1285,27 @@ header_common(struct archive_read *a, struct tar *tar,
     struct archive_entry *entry, const void *h)
 {
 	const struct archive_entry_header_ustar	*header;
-	char	tartype;
+	const char *existing_linkpath;
 	int     err = ARCHIVE_OK;
 
 	header = (const struct archive_entry_header_ustar *)h;
-	if (header->linkname[0])
-		archive_strncpy(&(tar->entry_linkpath),
-		    header->linkname, sizeof(header->linkname));
-	else
-		archive_string_empty(&(tar->entry_linkpath));
 
 	/* Parse out the numeric fields (all are octal) */
-	archive_entry_set_mode(entry,
-		(mode_t)tar_atol(header->mode, sizeof(header->mode)));
-	archive_entry_set_uid(entry, tar_atol(header->uid, sizeof(header->uid)));
-	archive_entry_set_gid(entry, tar_atol(header->gid, sizeof(header->gid)));
+
+	/* Split mode handling: Set filetype always, perm only if not already set */
+	archive_entry_set_filetype(entry,
+	    (mode_t)tar_atol(header->mode, sizeof(header->mode)));
+	if (!archive_entry_perm_is_set(entry)) {
+		archive_entry_set_perm(entry,
+			(mode_t)tar_atol(header->mode, sizeof(header->mode)));
+	}
+	if (!archive_entry_uid_is_set(entry)) {
+		archive_entry_set_uid(entry, tar_atol(header->uid, sizeof(header->uid)));
+	}
+	if (!archive_entry_gid_is_set(entry)) {
+		archive_entry_set_gid(entry, tar_atol(header->gid, sizeof(header->gid)));
+	}
+
 	tar->entry_bytes_remaining = tar_atol(header->size, sizeof(header->size));
 	if (tar->entry_bytes_remaining < 0) {
 		tar->entry_bytes_remaining = 0;
@@ -1209,21 +1320,38 @@ header_common(struct archive_read *a, struct tar *tar,
 		    "Tar entry size overflow");
 		return (ARCHIVE_FATAL);
 	}
-	tar->realsize = tar->entry_bytes_remaining;
-	archive_entry_set_size(entry, tar->entry_bytes_remaining);
-	archive_entry_set_mtime(entry, tar_atol(header->mtime, sizeof(header->mtime)), 0);
+	if (!tar->realsize_override) {
+		tar->realsize = tar->entry_bytes_remaining;
+	}
+	archive_entry_set_size(entry, tar->realsize);
+
+	if (!archive_entry_mtime_is_set(entry)) {
+		archive_entry_set_mtime(entry, tar_atol(header->mtime, sizeof(header->mtime)), 0);
+	}
 
 	/* Handle the tar type flag appropriately. */
-	tartype = header->typeflag[0];
+	tar->filetype = header->typeflag[0];
 
-	switch (tartype) {
+	/*
+	 * TODO: If the linkpath came from Pax extension header, then
+	 * we should obey the hdrcharset_utf8 flag when converting these.
+	 */
+	switch (tar->filetype) {
 	case '1': /* Hard link */
-		if (archive_entry_copy_hardlink_l(entry, tar->entry_linkpath.s,
-		    archive_strlen(&(tar->entry_linkpath)), tar->sconv) != 0) {
-			err = set_conversion_failed_error(a, tar->sconv,
-			    "Linkname");
-			if (err == ARCHIVE_FATAL)
-				return (err);
+		archive_entry_set_link_to_hardlink(entry);
+		existing_linkpath = archive_entry_hardlink(entry);
+		if (existing_linkpath == NULL || existing_linkpath[0] == '\0') {
+			struct archive_string linkpath;
+			archive_string_init(&linkpath);
+			archive_strncpy(&linkpath,
+					header->linkname, sizeof(header->linkname));
+			if (archive_entry_copy_hardlink_l(entry, linkpath.s,
+							  archive_strlen(&linkpath), tar->sconv) != 0) {
+				err = set_conversion_failed_error(a, tar->sconv,
+								  "Linkname");
+				if (err == ARCHIVE_FATAL)
+					return (err);
+			}
 		}
 		/*
 		 * The following may seem odd, but: Technically, tar
@@ -1283,16 +1411,24 @@ header_common(struct archive_read *a, struct tar *tar,
 		 */
 		break;
 	case '2': /* Symlink */
+		archive_entry_set_link_to_symlink(entry);
+		existing_linkpath = archive_entry_symlink(entry);
+		if (existing_linkpath == NULL || existing_linkpath[0] == '\0') {
+			struct archive_string linkpath;
+			archive_string_init(&linkpath);
+			archive_strncpy(&linkpath,
+					header->linkname, sizeof(header->linkname));
+			if (archive_entry_copy_symlink_l(entry, linkpath.s,
+			    archive_strlen(&linkpath), tar->sconv) != 0) {
+				err = set_conversion_failed_error(a, tar->sconv,
+				    "Linkname");
+				if (err == ARCHIVE_FATAL)
+					return (err);
+			}
+		}
 		archive_entry_set_filetype(entry, AE_IFLNK);
 		archive_entry_set_size(entry, 0);
 		tar->entry_bytes_remaining = 0;
-		if (archive_entry_copy_symlink_l(entry, tar->entry_linkpath.s,
-		    archive_strlen(&(tar->entry_linkpath)), tar->sconv) != 0) {
-			err = set_conversion_failed_error(a, tar->sconv,
-			    "Linkname");
-			if (err == ARCHIVE_FATAL)
-				return (err);
-		}
 		break;
 	case '3': /* Character device */
 		archive_entry_set_filetype(entry, AE_IFCHR);
@@ -1342,15 +1478,9 @@ header_common(struct archive_read *a, struct tar *tar,
 		 * sparse information in the extended area.
 		 */
 		/* FALLTHROUGH */
-	case '0':
-		/*
-		 * Enable sparse file "read" support only for regular
-		 * files and explicit GNU sparse files.  However, we
-		 * don't allow non-standard file types to be sparse.
-		 */
-		tar->sparse_allowed = 1;
+	case '0': /* ustar "regular" file */
 		/* FALLTHROUGH */
-	default: /* Regular file  and non-standard types */
+	default: /* Non-standard file types */
 		/*
 		 * Per POSIX: non-recognized types should always be
 		 * treated as regular files.
@@ -1390,21 +1520,13 @@ header_old_tar(struct archive_read *a, struct tar *tar,
 }
 
 /*
- * Read a Mac AppleDouble-encoded blob of file metadata,
- * if there is one.
+ * Is this likely an AppleDouble extension?
  */
 static int
-read_mac_metadata_blob(struct archive_read *a, struct tar *tar,
-    struct archive_entry *entry, const void *h, size_t *unconsumed)
-{
-	int64_t size;
-	size_t msize;
-	const void *data;
+is_mac_metadata_entry(struct archive_entry *entry) {
 	const char *p, *name;
 	const wchar_t *wp, *wname;
 
-	(void)h; /* UNUSED */
-
 	wname = wp = archive_entry_pathname_w(entry);
 	if (wp != NULL) {
 		/* Find the last path element. */
@@ -1416,8 +1538,8 @@ read_mac_metadata_blob(struct archive_read *a, struct tar *tar,
 		 * If last path element starts with "._", then
 		 * this is a Mac extension.
 		 */
-		if (wname[0] != L'.' || wname[1] != L'_' || wname[2] == L'\0')
-			return ARCHIVE_OK;
+		if (wname[0] == L'.' && wname[1] == L'_' && wname[2] != L'\0')
+			return 1;
 	} else {
 		/* Find the last path element. */
 		name = p = archive_entry_pathname(entry);
@@ -1431,9 +1553,29 @@ read_mac_metadata_blob(struct archive_read *a, struct tar *tar,
 		 * If last path element starts with "._", then
 		 * this is a Mac extension.
 		 */
-		if (name[0] != '.' || name[1] != '_' || name[2] == '\0')
-			return ARCHIVE_OK;
+		if (name[0] == '.' && name[1] == '_' && name[2] != '\0')
+			return 1;
 	}
+	/* Not a mac extension */
+	return 0;
+}
+
+/*
+ * Read a Mac AppleDouble-encoded blob of file metadata,
+ * if there is one.
+ *
+ * TODO: In Libarchive 4, we should consider ripping this
+ * out -- instead, return a file starting with `._` as
+ * a regular file and let the client (or archive_write logic)
+ * handle it.
+ */
+static int
+read_mac_metadata_blob(struct archive_read *a,
+    struct archive_entry *entry, size_t *unconsumed)
+{
+	int64_t size;
+	size_t msize;
+	const void *data;
 
  	/* Read the body as a Mac OS metadata blob. */
 	size = archive_entry_size(entry);
@@ -1443,6 +1585,17 @@ read_mac_metadata_blob(struct archive_read *a, struct tar *tar,
 		return (ARCHIVE_FATAL);
 	}
 
+	/* TODO: Should this merely skip the overlarge entry and
+	 * WARN?  Or is xattr_limit sufficiently large that we can
+	 * safely assume anything larger is malicious? */
+	if (size > (int64_t)xattr_limit) {
+		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
+		    "Oversized AppleDouble extension has size %llu > %llu",
+		    (unsigned long long)size,
+		    (unsigned long long)xattr_limit);
+		return (ARCHIVE_FATAL);
+	}
+
 	/*
 	 * TODO: Look beyond the body here to peek at the next header.
 	 * If it's a regular header (not an extension header)
@@ -1455,15 +1608,16 @@ read_mac_metadata_blob(struct archive_read *a, struct tar *tar,
 	 * Q: Is the above idea really possible?  Even
 	 * when there are GNU or pax extension entries?
 	 */
+	tar_flush_unconsumed(a, unconsumed);
 	data = __archive_read_ahead(a, msize, NULL);
 	if (data == NULL) {
 		*unconsumed = 0;
 		return (ARCHIVE_FATAL);
 	}
+	archive_entry_clear(entry);
 	archive_entry_copy_mac_metadata(entry, data, msize);
 	*unconsumed = (msize + 511) & ~ 511;
-	tar_flush_unconsumed(a, unconsumed);
-	return (tar_read_header(a, tar, entry, unconsumed));
+	return (ARCHIVE_OK);
 }
 
 /*
@@ -1473,76 +1627,57 @@ static int
 header_pax_global(struct archive_read *a, struct tar *tar,
     struct archive_entry *entry, const void *h, size_t *unconsumed)
 {
-	int err;
-
-	err = read_body_to_string(a, tar, &(tar->pax_global), h, unconsumed);
-	if (err != ARCHIVE_OK)
-		return (err);
-	err = tar_read_header(a, tar, entry, unconsumed);
-	return (err);
-}
-
-static int
-header_pax_extensions(struct archive_read *a, struct tar *tar,
-    struct archive_entry *entry, const void *h, size_t *unconsumed)
-{
-	int err, err2;
-
-	err = read_body_to_string(a, tar, &(tar->pax_header), h, unconsumed);
-	if (err != ARCHIVE_OK)
-		return (err);
+	const struct archive_entry_header_ustar *header;
+	int64_t size, to_consume;
 
-	/* Parse the next header. */
-	err = tar_read_header(a, tar, entry, unconsumed);
-	if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN))
-		return (err);
+	(void)a; /* UNUSED */
+	(void)tar; /* UNUSED */
+	(void)entry; /* UNUSED */
 
-	/*
-	 * TODO: Parse global/default options into 'entry' struct here
-	 * before handling file-specific options.
-	 *
-	 * This design (parse standard header, then overwrite with pax
-	 * extended attribute data) usually works well, but isn't ideal;
-	 * it would be better to parse the pax extended attributes first
-	 * and then skip any fields in the standard header that were
-	 * defined in the pax header.
-	 */
-	err2 = pax_header(a, tar, entry, &tar->pax_header);
-	err =  err_combine(err, err2);
-	tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
-	return (err);
+	header = (const struct archive_entry_header_ustar *)h;
+	size = tar_atol(header->size, sizeof(header->size));
+	to_consume = ((size + 511) & ~511);
+	*unconsumed += to_consume;
+	return (ARCHIVE_OK);
 }
 
-
 /*
  * Parse a file header for a Posix "ustar" archive entry.  This also
  * handles "pax" or "extended ustar" entries.
+ *
+ * In order to correctly handle pax attributes (which precede this),
+ * we have to skip parsing any field for which the entry already has
+ * contents.
  */
 static int
 header_ustar(struct archive_read *a, struct tar *tar,
     struct archive_entry *entry, const void *h)
 {
 	const struct archive_entry_header_ustar	*header;
-	struct archive_string *as;
+	struct archive_string as;
 	int err = ARCHIVE_OK, r;
 
 	header = (const struct archive_entry_header_ustar *)h;
 
 	/* Copy name into an internal buffer to ensure null-termination. */
-	as = &(tar->entry_pathname);
-	if (header->prefix[0]) {
-		archive_strncpy(as, header->prefix, sizeof(header->prefix));
-		if (as->s[archive_strlen(as) - 1] != '/')
-			archive_strappend_char(as, '/');
-		archive_strncat(as, header->name, sizeof(header->name));
-	} else {
-		archive_strncpy(as, header->name, sizeof(header->name));
-	}
-	if (archive_entry_copy_pathname_l(entry, as->s, archive_strlen(as),
-	    tar->sconv) != 0) {
-		err = set_conversion_failed_error(a, tar->sconv, "Pathname");
-		if (err == ARCHIVE_FATAL)
-			return (err);
+	const char *existing_pathname = archive_entry_pathname(entry);
+	if (existing_pathname == NULL || existing_pathname[0] == '\0') {
+		archive_string_init(&as);
+		if (header->prefix[0]) {
+			archive_strncpy(&as, header->prefix, sizeof(header->prefix));
+			if (as.s[archive_strlen(&as) - 1] != '/')
+				archive_strappend_char(&as, '/');
+			archive_strncat(&as, header->name, sizeof(header->name));
+		} else {
+			archive_strncpy(&as, header->name, sizeof(header->name));
+		}
+		if (archive_entry_copy_pathname_l(entry, as.s, archive_strlen(&as),
+		    tar->sconv) != 0) {
+			err = set_conversion_failed_error(a, tar->sconv, "Pathname");
+			if (err == ARCHIVE_FATAL)
+				return (err);
+		}
+		archive_string_free(&as);
 	}
 
 	/* Handle rest of common fields. */
@@ -1553,26 +1688,36 @@ header_ustar(struct archive_read *a, struct tar *tar,
 		err = r;
 
 	/* Handle POSIX ustar fields. */
-	if (archive_entry_copy_uname_l(entry,
-	    header->uname, sizeof(header->uname), tar->sconv) != 0) {
-		err = set_conversion_failed_error(a, tar->sconv, "Uname");
-		if (err == ARCHIVE_FATAL)
-			return (err);
-	}
-
-	if (archive_entry_copy_gname_l(entry,
-	    header->gname, sizeof(header->gname), tar->sconv) != 0) {
-		err = set_conversion_failed_error(a, tar->sconv, "Gname");
-		if (err == ARCHIVE_FATAL)
-			return (err);
+	const char *existing_uname = archive_entry_uname(entry);
+	if (existing_uname == NULL || existing_uname[0] == '\0') {
+		if (archive_entry_copy_uname_l(entry,
+		    header->uname, sizeof(header->uname), tar->sconv) != 0) {
+			err = set_conversion_failed_error(a, tar->sconv, "Uname");
+			if (err == ARCHIVE_FATAL)
+				return (err);
+		}
+	}
+
+	const char *existing_gname = archive_entry_gname(entry);
+	if (existing_gname == NULL || existing_gname[0] == '\0') {
+		if (archive_entry_copy_gname_l(entry,
+		    header->gname, sizeof(header->gname), tar->sconv) != 0) {
+			err = set_conversion_failed_error(a, tar->sconv, "Gname");
+			if (err == ARCHIVE_FATAL)
+				return (err);
+		}
 	}
 
 	/* Parse out device numbers only for char and block specials. */
 	if (header->typeflag[0] == '3' || header->typeflag[0] == '4') {
-		archive_entry_set_rdevmajor(entry, (dev_t)
-		    tar_atol(header->rdevmajor, sizeof(header->rdevmajor)));
-		archive_entry_set_rdevminor(entry, (dev_t)
-		    tar_atol(header->rdevminor, sizeof(header->rdevminor)));
+		if (!archive_entry_rdev_is_set(entry)) {
+			archive_entry_set_rdevmajor(entry, (dev_t)
+			    tar_atol(header->rdevmajor, sizeof(header->rdevmajor)));
+			archive_entry_set_rdevminor(entry, (dev_t)
+			    tar_atol(header->rdevminor, sizeof(header->rdevminor)));
+		}
+	} else {
+		archive_entry_set_rdev(entry, 0);
 	}
 
 	tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
@@ -1580,117 +1725,202 @@ header_ustar(struct archive_read *a, struct tar *tar,
 	return (err);
 }
 
-
-/*
- * Parse the pax extended attributes record.
- *
- * Returns non-zero if there's an error in the data.
- */
 static int
-pax_header(struct archive_read *a, struct tar *tar,
-    struct archive_entry *entry, struct archive_string *in_as)
+header_pax_extension(struct archive_read *a, struct tar *tar,
+    struct archive_entry *entry, const void *h, size_t *unconsumed)
 {
-	size_t attr_length, l, line_length, value_length;
-	char *p;
-	char *key, *value;
-	struct archive_string *as;
+	/* Sanity checks: The largest `x` body I've ever heard of was
+	 * a little over 4MB.  So I doubt there has ever been a
+	 * well-formed archive with an `x` body over 1GiB.  Similarly,
+	 * it seems plausible that no single attribute has ever been
+	 * larger than 100MB.  So if we see a larger value here, it's
+	 * almost certainly a sign of a corrupted/malicious archive. */
+
+	/* Maximum sane size for extension body: 1 GiB */
+	/* This cannot be raised to larger than 8GiB without
+	 * exceeding the maximum size for a standard ustar
+	 * entry. */
+	const int64_t ext_size_limit = 1024 * 1024 * (int64_t)1024;
+	/* Maximum size for a single line/attr: 100 million characters */
+	/* This cannot be raised to more than 2GiB without exceeding
+	 * a `size_t` on 32-bit platforms. */
+	const size_t max_parsed_line_length = 99999999ULL;
+	/* Largest attribute prolog:  size + name. */
+	const size_t max_size_name = 512;
+
+	/* Size and padding of the full extension body */
+	int64_t ext_size, ext_padding;
+	size_t line_length, value_length, name_length;
+	ssize_t to_read, did_read;
+	const struct archive_entry_header_ustar *header;
+	const char *p, *attr_start, *name_start;
 	struct archive_string_conv *sconv;
-	int err, err2;
-	char *attr = in_as->s;
+	struct archive_string *pas = NULL;
+	struct archive_string attr_name;
+	int err = ARCHIVE_OK, r;
 
-	attr_length = in_as->length;
-	tar->pax_hdrcharset_binary = 0;
-	archive_string_empty(&(tar->entry_gname));
-	archive_string_empty(&(tar->entry_linkpath));
-	archive_string_empty(&(tar->entry_pathname));
-	archive_string_empty(&(tar->entry_pathname_override));
-	archive_string_empty(&(tar->entry_uname));
-	err = ARCHIVE_OK;
-	while (attr_length > 0) {
-		/* Parse decimal length field at start of line. */
+	header = (const struct archive_entry_header_ustar *)h;
+	ext_size  = tar_atol(header->size, sizeof(header->size));
+	if (ext_size < 0) {
+	  archive_set_error(&a->archive, EINVAL,
+			    "pax extension header has invalid size: %lld",
+			    (long long)ext_size);
+	  return (ARCHIVE_FATAL);
+	}
+
+	ext_padding = 0x1ff & (-ext_size);
+	if (ext_size > ext_size_limit) {
+		/* Consume the pax extension body and return an error */
+		if (ext_size + ext_padding != __archive_read_consume(a, ext_size + ext_padding)) {
+			return (ARCHIVE_FATAL);
+		}
+		archive_set_error(&a->archive, EINVAL,
+		    "Ignoring oversized pax extensions: %d > %d",
+		    (int)ext_size, (int)ext_size_limit);
+		return (ARCHIVE_WARN);
+	}
+	tar_flush_unconsumed(a, unconsumed);
+
+	/* Parse the size/name of each pax attribute in the body */
+	archive_string_init(&attr_name);
+	while (ext_size > 0) {
+		/* Read enough bytes to parse the size/name of the next attribute */
+		to_read = max_size_name;
+		if (to_read > ext_size) {
+			to_read = ext_size;
+		}
+		p = __archive_read_ahead(a, to_read, &did_read);
+		if (did_read < 0) {
+			return ((int)did_read);
+		}
+		if (did_read == 0) { /* EOF */
+			archive_set_error(&a->archive, EINVAL,
+					  "Truncated tar archive"
+					  " detected while reading pax attribute name");
+			return (ARCHIVE_FATAL);
+		}
+		if (did_read > ext_size) {
+			did_read = ext_size;
+		}
+
+		/* Parse size of attribute */
 		line_length = 0;
-		l = attr_length;
-		p = attr; /* Record start of line. */
-		while (l>0) {
+		attr_start = p;
+		while (1) {
+			if (p >= attr_start + did_read) {
+				archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
+						  "Ignoring malformed pax attributes: overlarge attribute size field");
+				*unconsumed += ext_size + ext_padding;
+				return (ARCHIVE_WARN);
+			}
 			if (*p == ' ') {
 				p++;
-				l--;
 				break;
 			}
 			if (*p < '0' || *p > '9') {
 				archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
-				    "Ignoring malformed pax extended attributes");
+						  "Ignoring malformed pax attributes: malformed attribute size field");
+				*unconsumed += ext_size + ext_padding;
 				return (ARCHIVE_WARN);
 			}
 			line_length *= 10;
 			line_length += *p - '0';
-			if (line_length > 999999) {
+			if (line_length > max_parsed_line_length) {
 				archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
-				    "Rejecting pax extended attribute > 1MB");
+						  "Ignoring malformed pax attribute: size > %lld",
+						  (long long)max_parsed_line_length);
+				*unconsumed += ext_size + ext_padding;
 				return (ARCHIVE_WARN);
 			}
 			p++;
-			l--;
 		}
 
-		/*
-		 * Parsed length must be no bigger than available data,
-		 * at least 1, and the last character of the line must
-		 * be '\n'.
-		 */
-		if (line_length > attr_length
-		    || line_length < 1
-		    || attr[line_length - 1] != '\n')
-		{
-			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
-			    "Ignoring malformed pax extended attribute");
-			return (ARCHIVE_WARN);
+		if ((int64_t)line_length > ext_size) {
+				archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
+						  "Ignoring malformed pax attribute:  %lld > %lld",
+						  (long long)line_length, (long long)ext_size);
+				*unconsumed += ext_size + ext_padding;
+				return (ARCHIVE_WARN);
 		}
 
-		/* Null-terminate the line. */
-		attr[line_length - 1] = '\0';
-
-		/* Find end of key and null terminate it. */
-		key = p;
-		if (key[0] == '=')
-			return (-1);
-		while (*p && *p != '=')
-			++p;
-		if (*p == '\0') {
+		/* Parse name of attribute */
+		if (p >= attr_start + did_read
+		    || p >= attr_start + line_length
+		    || *p == '=') {
 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
-			    "Invalid pax extended attributes");
+					  "Ignoring malformed pax attributes: empty name found");
+			*unconsumed += ext_size + ext_padding;
 			return (ARCHIVE_WARN);
 		}
-		*p = '\0';
+		name_start = p;
+		while (1) {
+			if (p >= attr_start + did_read || p >= attr_start + line_length) {
+				archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
+						  "Ignoring malformed pax attributes: overlarge attribute name");
+				*unconsumed += ext_size + ext_padding;
+				return (ARCHIVE_WARN);
+			}
+			if (*p == '=') {
+				break;
+			}
+			p++;
+		}
+		name_length = p - name_start;
+		p++; // Skip '='
 
-		value = p + 1;
+		archive_strncpy(&attr_name, name_start, name_length);
 
-		/* Some values may be binary data */
-		value_length = attr + line_length - 1 - value;
+		ext_size -= p - attr_start;
+		value_length = line_length - (p - attr_start);
 
-		/* Identify this attribute and set it in the entry. */
-		err2 = pax_attribute(a, tar, entry, key, value, value_length);
-		if (err2 == ARCHIVE_FATAL)
-			return (err2);
-		err = err_combine(err, err2);
+		/* Consume size, name, and `=` */
+		*unconsumed += p - attr_start;
+		tar_flush_unconsumed(a, unconsumed);
+
+		/* pax_attribute will consume value_length - 1 */
+		r = pax_attribute(a, tar, entry, attr_name.s, archive_strlen(&attr_name), value_length - 1, unconsumed);
+		ext_size -= value_length - 1;
+
+		if (r < ARCHIVE_WARN) {
+			*unconsumed += ext_size + ext_padding;
+			return (r);
+		}
+		err = err_combine(err, r);
 
-		/* Skip to next line */
-		attr += line_length;
-		attr_length -= line_length;
+		/* Consume the `\n` that follows the pax attribute value. */
+		tar_flush_unconsumed(a, unconsumed);
+		p = __archive_read_ahead(a, 1, &did_read);
+		if (did_read < 0) {
+			return ((int)did_read);
+		}
+		if (did_read == 0) {
+			archive_set_error(&a->archive, EINVAL,
+					  "Truncated tar archive"
+					  " detected while completing pax attribute");
+			return (ARCHIVE_FATAL);
+		}
+		if (p[0] != '\n') {
+			archive_set_error(&a->archive, EINVAL,
+					  "Malformed pax attributes");
+			*unconsumed += ext_size + ext_padding;
+			return (ARCHIVE_WARN);
+		}
+		ext_size -= 1;
+		*unconsumed += 1;
+		tar_flush_unconsumed(a, unconsumed);
 	}
+	*unconsumed += ext_size + ext_padding;
 
 	/*
-	 * PAX format uses UTF-8 as default charset for its metadata
-	 * unless hdrcharset=BINARY is present in its header.
-	 * We apply the charset specified by the hdrcharset option only
-	 * when the hdrcharset attribute(in PAX header) is BINARY because
-	 * we respect the charset described in PAX header and BINARY also
-	 * means that metadata(filename,uname and gname) character-set
-	 * is unknown.
+	 * Some PAX values -- pathname, linkpath, uname, gname --
+	 * can't be copied into the entry until we know the character
+	 * set to use:
 	 */
-	if (tar->pax_hdrcharset_binary)
+	if (!tar->pax_hdrcharset_utf8)
+		/* PAX specified "BINARY", so use the default charset */
 		sconv = tar->opt_sconv;
 	else {
+		/* PAX default UTF-8 */
 		sconv = archive_string_conversion_from_charset(
 		    &(a->archive), "UTF-8", 1);
 		if (sconv == NULL)
@@ -1700,83 +1930,85 @@ pax_header(struct archive_read *a, struct tar *tar,
 			    SCONV_SET_OPT_UTF8_LIBARCHIVE2X);
 	}
 
+	/* Pathname */
+	pas = NULL;
+	if (archive_strlen(&(tar->entry_pathname_override)) > 0) {
+		/* Prefer GNU.sparse.name attribute if present */
+		/* GNU sparse files store a fake name under the standard
+		 * "pathname" key. */
+		pas = &(tar->entry_pathname_override);
+	} else if (archive_strlen(&(tar->entry_pathname)) > 0) {
+		/* Use standard "pathname" PAX extension */
+		pas = &(tar->entry_pathname);
+	}
+	if (pas != NULL) {
+		if (archive_entry_copy_pathname_l(entry, pas->s,
+		    archive_strlen(pas), sconv) != 0) {
+			err = set_conversion_failed_error(a, sconv, "Pathname");
+			if (err == ARCHIVE_FATAL)
+				return (err);
+			/* Use raw name without conversion */
+			archive_entry_copy_pathname(entry, pas->s);
+		}
+	}
+	/* Uname */
+	if (archive_strlen(&(tar->entry_uname)) > 0) {
+		if (archive_entry_copy_uname_l(entry, tar->entry_uname.s,
+		    archive_strlen(&(tar->entry_uname)), sconv) != 0) {
+			err = set_conversion_failed_error(a, sconv, "Uname");
+			if (err == ARCHIVE_FATAL)
+				return (err);
+			/* Use raw name without conversion */
+			archive_entry_copy_uname(entry, tar->entry_uname.s);
+		}
+	}
+	/* Gname */
 	if (archive_strlen(&(tar->entry_gname)) > 0) {
 		if (archive_entry_copy_gname_l(entry, tar->entry_gname.s,
 		    archive_strlen(&(tar->entry_gname)), sconv) != 0) {
 			err = set_conversion_failed_error(a, sconv, "Gname");
 			if (err == ARCHIVE_FATAL)
 				return (err);
-			/* Use a converted an original name. */
+			/* Use raw name without conversion */
 			archive_entry_copy_gname(entry, tar->entry_gname.s);
 		}
 	}
+	/* Linkpath */
 	if (archive_strlen(&(tar->entry_linkpath)) > 0) {
 		if (archive_entry_copy_link_l(entry, tar->entry_linkpath.s,
 		    archive_strlen(&(tar->entry_linkpath)), sconv) != 0) {
-			err = set_conversion_failed_error(a, sconv, "Linkname");
+			err = set_conversion_failed_error(a, sconv, "Linkpath");
 			if (err == ARCHIVE_FATAL)
 				return (err);
-			/* Use a converted an original name. */
+			/* Use raw name without conversion */
 			archive_entry_copy_link(entry, tar->entry_linkpath.s);
 		}
 	}
-	/*
-	 * Some extensions (such as the GNU sparse file extensions)
-	 * deliberately store a synthetic name under the regular 'path'
-	 * attribute and the real file name under a different attribute.
-	 * Since we're supposed to not care about the order, we
-	 * have no choice but to store all of the various filenames
-	 * we find and figure it all out afterwards.  This is the
-	 * figuring out part.
-	 */
-	as = NULL;
-	if (archive_strlen(&(tar->entry_pathname_override)) > 0)
-		as = &(tar->entry_pathname_override);
-	else if (archive_strlen(&(tar->entry_pathname)) > 0)
-		as = &(tar->entry_pathname);
-	if (as != NULL) {
-		if (archive_entry_copy_pathname_l(entry, as->s,
-		    archive_strlen(as), sconv) != 0) {
-			err = set_conversion_failed_error(a, sconv, "Pathname");
-			if (err == ARCHIVE_FATAL)
-				return (err);
-			/* Use a converted an original name. */
-			archive_entry_copy_pathname(entry, as->s);
-		}
-	}
-	if (archive_strlen(&(tar->entry_uname)) > 0) {
-		if (archive_entry_copy_uname_l(entry, tar->entry_uname.s,
-		    archive_strlen(&(tar->entry_uname)), sconv) != 0) {
-			err = set_conversion_failed_error(a, sconv, "Uname");
-			if (err == ARCHIVE_FATAL)
-				return (err);
-			/* Use a converted an original name. */
-			archive_entry_copy_uname(entry, tar->entry_uname.s);
-		}
-	}
+
+	/* Extension may have given us a corrected `entry_bytes_remaining` for
+	 * the main entry; update the padding appropriately. */
+	tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
 	return (err);
 }
 
 static int
-pax_attribute_xattr(struct archive_entry *entry,
-	const char *name, const char *value)
+pax_attribute_LIBARCHIVE_xattr(struct archive_entry *entry,
+	const char *name, size_t name_length, const char *value, size_t value_length)
 {
 	char *name_decoded;
 	void *value_decoded;
 	size_t value_len;
 
-	if (strlen(name) < 18 || (memcmp(name, "LIBARCHIVE.xattr.", 17)) != 0)
+	if (name_length < 1)
 		return 3;
 
-	name += 17;
-
 	/* URL-decode name */
-	name_decoded = url_decode(name);
+	name_decoded = url_decode(name, name_length);
 	if (name_decoded == NULL)
 		return 2;
 
 	/* Base-64 decode value */
-	value_decoded = base64_decode(value, strlen(value), &value_len);
+	value_decoded = base64_decode(value, value_length, &value_len);
 	if (value_decoded == NULL) {
 		free(name_decoded);
 		return 1;
@@ -1791,21 +2023,26 @@ pax_attribute_xattr(struct archive_entry *entry,
 }
 
 static int
-pax_attribute_schily_xattr(struct archive_entry *entry,
-	const char *name, const char *value, size_t value_length)
+pax_attribute_SCHILY_xattr(struct archive_entry *entry,
+	const char *name, size_t name_length, const char *value, size_t value_length)
 {
-	if (strlen(name) < 14 || (memcmp(name, "SCHILY.xattr.", 13)) != 0)
+	if (name_length < 1 || name_length > 128) {
 		return 1;
+	}
 
-	name += 13;
-
-	archive_entry_xattr_add_entry(entry, name, value, value_length);
+	char * null_terminated_name = malloc(name_length + 1);
+	if (null_terminated_name != NULL) {
+		memcpy(null_terminated_name, name, name_length);
+		null_terminated_name[name_length] = '\0';
+		archive_entry_xattr_add_entry(entry, null_terminated_name, value, value_length);
+		free(null_terminated_name);
+	}
 
 	return 0;
 }
 
 static int
-pax_attribute_rht_security_selinux(struct archive_entry *entry,
+pax_attribute_RHT_security_selinux(struct archive_entry *entry,
 	const char *value, size_t value_length)
 {
 	archive_entry_xattr_add_entry(entry, "security.selinux",
@@ -1815,10 +2052,11 @@ pax_attribute_rht_security_selinux(struct archive_entry *entry,
 }
 
 static int
-pax_attribute_acl(struct archive_read *a, struct tar *tar,
-    struct archive_entry *entry, const char *value, int type)
+pax_attribute_SCHILY_acl(struct archive_read *a, struct tar *tar,
+	struct archive_entry *entry, size_t value_length, int type)
 {
 	int r;
+	const char *p;
 	const char* errstr;
 
 	switch (type) {
@@ -1845,8 +2083,28 @@ pax_attribute_acl(struct archive_read *a, struct tar *tar,
 			return (ARCHIVE_FATAL);
 	}
 
-	r = archive_acl_from_text_l(archive_entry_acl(entry), value, type,
-	    tar->sconv_acl);
+	if (value_length > acl_limit) {
+		__archive_read_consume(a, value_length);
+		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
+				  "Unreasonably large ACL: %d > %d",
+				  (int)value_length, (int)acl_limit);
+		return (ARCHIVE_WARN);
+	}
+
+	p = __archive_read_ahead(a, value_length, NULL);
+	if (p == NULL) {
+		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
+				  "Truncated tar archive "
+				  "detected while reading ACL data");
+		return (ARCHIVE_FATAL);
+	}
+
+	r = archive_acl_from_text_nl(archive_entry_acl(entry), p, value_length,
+	    type, tar->sconv_acl);
+	__archive_read_consume(a, value_length);
+	/* Workaround: Force perm_is_set() to be correct */
+	/* If this bit were stored in the ACL, this wouldn't be needed */
+	archive_entry_set_perm(entry, archive_entry_perm(entry));
 	if (r != ARCHIVE_OK) {
 		if (r == ARCHIVE_FATAL) {
 			archive_set_error(&a->archive, ENOMEM,
@@ -1860,240 +2118,536 @@ pax_attribute_acl(struct archive_read *a, struct tar *tar,
 	return (r);
 }
 
+static int
+pax_attribute_read_time(struct archive_read *a, size_t value_length, int64_t *ps, long *pn, size_t *unconsumed) {
+	struct archive_string as;
+	int r;
+
+	if (value_length > 128) {
+		__archive_read_consume(a, value_length);
+		*ps = 0;
+		*pn = 0;
+		return (ARCHIVE_FATAL);
+	}
+
+	archive_string_init(&as);
+	r = read_bytes_to_string(a, &as, value_length, unconsumed);
+	if (r < ARCHIVE_OK) {
+		return (r);
+	}
+
+	pax_time(as.s, archive_strlen(&as), ps, pn);
+	if (*ps < 0 || *ps == INT64_MAX) {
+		return (ARCHIVE_WARN);
+	}
+	return (ARCHIVE_OK);
+}
+
+static int
+pax_attribute_read_number(struct archive_read *a, size_t value_length, int64_t *result) {
+	struct archive_string as;
+	size_t unconsumed = 0;
+	int r;
+
+	if (value_length > 64) {
+		__archive_read_consume(a, value_length);
+		*result = 0;
+		return (ARCHIVE_FATAL);
+	}
+
+	archive_string_init(&as);
+	r = read_bytes_to_string(a, &as, value_length, &unconsumed);
+	tar_flush_unconsumed(a, &unconsumed);
+	if (r < ARCHIVE_OK) {
+		return (r);
+	}
+
+	*result = tar_atol10(as.s, archive_strlen(&as));
+	if (*result < 0 || *result == INT64_MAX) {
+		*result = INT64_MAX;
+		return (ARCHIVE_WARN);
+	}
+	return (ARCHIVE_OK);
+}
+
 /*
- * Parse a single key=value attribute.  key/value pointers are
- * assumed to point into reasonably long-lived storage.
+ * Parse a single key=value attribute.
  *
- * Note that POSIX reserves all-lowercase keywords.  Vendor-specific
- * extensions should always have keywords of the form "VENDOR.attribute"
- * In particular, it's quite feasible to support many different
- * vendor extensions here.  I'm using "LIBARCHIVE" for extensions
- * unique to this library.
+ * POSIX reserves all-lowercase keywords.  Vendor-specific extensions
+ * should always have keywords of the form "VENDOR.attribute" In
+ * particular, it's quite feasible to support many different vendor
+ * extensions here.  I'm using "LIBARCHIVE" for extensions unique to
+ * this library.
  *
- * Investigate other vendor-specific extensions and see if
+ * TODO: Investigate other vendor-specific extensions and see if
  * any of them look useful.
  */
 static int
-pax_attribute(struct archive_read *a, struct tar *tar,
-    struct archive_entry *entry, const char *key, const char *value, size_t value_length)
+pax_attribute(struct archive_read *a, struct tar *tar, struct archive_entry *entry,
+	      const char *key, size_t key_length, size_t value_length, size_t *unconsumed)
 {
-	int64_t s;
+	int64_t t;
 	long n;
-	int err = ARCHIVE_OK, r;
+	const char *p;
+	ssize_t bytes_read;
+	int err = ARCHIVE_OK;
 
-	if (value == NULL)
-		value = "";	/* Disable compiler warning; do not pass
-				 * NULL pointer to strlen().  */
 	switch (key[0]) {
 	case 'G':
-		/* Reject GNU.sparse.* headers on non-regular files. */
-		if (strncmp(key, "GNU.sparse", 10) == 0 &&
-		    !tar->sparse_allowed) {
-			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
-			    "Non-regular file cannot be sparse");
-			return (ARCHIVE_FATAL);
-		}
-
-		/* GNU "0.0" sparse pax format. */
-		if (strcmp(key, "GNU.sparse.numblocks") == 0) {
-			tar->sparse_offset = -1;
-			tar->sparse_numbytes = -1;
-			tar->sparse_gnu_major = 0;
-			tar->sparse_gnu_minor = 0;
-		}
-		if (strcmp(key, "GNU.sparse.offset") == 0) {
-			tar->sparse_offset = tar_atol10(value, strlen(value));
-			if (tar->sparse_numbytes != -1) {
-				if (gnu_add_sparse_entry(a, tar,
-				    tar->sparse_offset, tar->sparse_numbytes)
-				    != ARCHIVE_OK)
-					return (ARCHIVE_FATAL);
-				tar->sparse_offset = -1;
-				tar->sparse_numbytes = -1;
-			}
-		}
-		if (strcmp(key, "GNU.sparse.numbytes") == 0) {
-			tar->sparse_numbytes = tar_atol10(value, strlen(value));
-			if (tar->sparse_offset != -1) {
-				if (gnu_add_sparse_entry(a, tar,
-				    tar->sparse_offset, tar->sparse_numbytes)
-				    != ARCHIVE_OK)
-					return (ARCHIVE_FATAL);
-				tar->sparse_offset = -1;
-				tar->sparse_numbytes = -1;
+		/* GNU.* extensions */
+		if (key_length > 4 && memcmp(key, "GNU.", 4) == 0) {
+			key += 4;
+			key_length -= 4;
+
+			/* GNU.sparse marks the existence of GNU sparse information */
+			if (key_length == 6 && memcmp(key, "sparse", 6) == 0) {
+				tar->sparse_gnu_attributes_seen = 1;
 			}
-		}
-		if (strcmp(key, "GNU.sparse.size") == 0) {
-			tar->realsize = tar_atol10(value, strlen(value));
-			archive_entry_set_size(entry, tar->realsize);
-			tar->realsize_override = 1;
-		}
-
-		/* GNU "0.1" sparse pax format. */
-		if (strcmp(key, "GNU.sparse.map") == 0) {
-			tar->sparse_gnu_major = 0;
-			tar->sparse_gnu_minor = 1;
-			if (gnu_sparse_01_parse(a, tar, value) != ARCHIVE_OK)
-				return (ARCHIVE_WARN);
-		}
 
-		/* GNU "1.0" sparse pax format */
-		if (strcmp(key, "GNU.sparse.major") == 0) {
-			tar->sparse_gnu_major = (int)tar_atol10(value, strlen(value));
-			tar->sparse_gnu_pending = 1;
-		}
-		if (strcmp(key, "GNU.sparse.minor") == 0) {
-			tar->sparse_gnu_minor = (int)tar_atol10(value, strlen(value));
-			tar->sparse_gnu_pending = 1;
-		}
-		if (strcmp(key, "GNU.sparse.name") == 0) {
-			/*
-			 * The real filename; when storing sparse
-			 * files, GNU tar puts a synthesized name into
-			 * the regular 'path' attribute in an attempt
-			 * to limit confusion. ;-)
-			 */
-			archive_strcpy(&(tar->entry_pathname_override), value);
-		}
-		if (strcmp(key, "GNU.sparse.realsize") == 0) {
-			tar->realsize = tar_atol10(value, strlen(value));
-			archive_entry_set_size(entry, tar->realsize);
-			tar->realsize_override = 1;
+			/* GNU.sparse.* extensions */
+			else if (key_length > 7 && memcmp(key, "sparse.", 7) == 0) {
+				tar->sparse_gnu_attributes_seen = 1;
+				key += 7;
+				key_length -= 7;
+
+				/* GNU "0.0" sparse pax format. */
+				if (key_length == 9 && memcmp(key, "numblocks", 9) == 0) {
+					/* GNU.sparse.numblocks */
+					tar->sparse_offset = -1;
+					tar->sparse_numbytes = -1;
+					tar->sparse_gnu_major = 0;
+					tar->sparse_gnu_minor = 0;
+				}
+				else if (key_length == 6 && memcmp(key, "offset", 6) == 0) {
+					/* GNU.sparse.offset */
+					if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
+						tar->sparse_offset = t;
+						if (tar->sparse_numbytes != -1) {
+							if (gnu_add_sparse_entry(a, tar,
+									 tar->sparse_offset, tar->sparse_numbytes)
+							    != ARCHIVE_OK)
+								return (ARCHIVE_FATAL);
+							tar->sparse_offset = -1;
+							tar->sparse_numbytes = -1;
+						}
+					}
+					return (err);
+				}
+				else if (key_length == 8 && memcmp(key, "numbytes", 8) == 0) {
+					/* GNU.sparse.numbytes */
+					if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
+						tar->sparse_numbytes = t;
+						if (tar->sparse_offset != -1) {
+							if (gnu_add_sparse_entry(a, tar,
+									 tar->sparse_offset, tar->sparse_numbytes)
+							    != ARCHIVE_OK)
+								return (ARCHIVE_FATAL);
+							tar->sparse_offset = -1;
+							tar->sparse_numbytes = -1;
+						}
+					}
+					return (err);
+				}
+				else if (key_length == 4 && memcmp(key, "size", 4) == 0) {
+					/* GNU.sparse.size */
+					if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
+						tar->realsize = t;
+						archive_entry_set_size(entry, tar->realsize);
+						tar->realsize_override = 1;
+					}
+					return (err);
+				}
+
+				/* GNU "0.1" sparse pax format. */
+				else if (key_length == 3 && memcmp(key, "map", 3) == 0) {
+					/* GNU.sparse.map */
+					tar->sparse_gnu_major = 0;
+					tar->sparse_gnu_minor = 1;
+					if (value_length > sparse_map_limit) {
+						archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
+								  "Unreasonably large sparse map: %d > %d",
+								  (int)value_length, (int)sparse_map_limit);
+						err = ARCHIVE_FAILED;
+					} else {
+						p = __archive_read_ahead(a, value_length, &bytes_read);
+						if (p != NULL) {
+							if (gnu_sparse_01_parse(a, tar, p, value_length) != ARCHIVE_OK) {
+								err = ARCHIVE_WARN;
+							}
+						} else {
+							return (ARCHIVE_FATAL);
+						}
+					}
+					__archive_read_consume(a, value_length);
+					return (err);
+				}
+
+				/* GNU "1.0" sparse pax format */
+				else if (key_length == 5 && memcmp(key, "major", 5) == 0) {
+					/* GNU.sparse.major */
+					if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK
+					    && t >= 0
+					    && t <= 10) {
+						tar->sparse_gnu_major = (int)t;
+					}
+					return (err);
+				}
+				else if (key_length == 5 && memcmp(key, "minor", 5) == 0) {
+					/* GNU.sparse.minor */
+					if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK
+					    && t >= 0
+					    && t <= 10) {
+						tar->sparse_gnu_minor = (int)t;
+					}
+					return (err);
+				}
+				else if (key_length == 4 && memcmp(key, "name", 4) == 0) {
+					/* GNU.sparse.name */
+					/*
+					 * The real filename; when storing sparse
+					 * files, GNU tar puts a synthesized name into
+					 * the regular 'path' attribute in an attempt
+					 * to limit confusion. ;-)
+					 */
+					if (value_length > pathname_limit) {
+						*unconsumed += value_length;
+						err = ARCHIVE_WARN;
+					} else {
+						err = read_bytes_to_string(a, &(tar->entry_pathname_override),
+									   value_length, unconsumed);
+					}
+					return (err);
+				}
+				else if (key_length == 8 && memcmp(key, "realsize", 8) == 0) {
+					/* GNU.sparse.realsize */
+					if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
+						tar->realsize = t;
+						archive_entry_set_size(entry, tar->realsize);
+						tar->realsize_override = 1;
+					}
+					return (err);
+				}
+			}
 		}
 		break;
 	case 'L':
-		/* Our extensions */
-/* TODO: Handle arbitrary extended attributes... */
-/*
-		if (strcmp(key, "LIBARCHIVE.xxxxxxx") == 0)
-			archive_entry_set_xxxxxx(entry, value);
-*/
-		if (strcmp(key, "LIBARCHIVE.creationtime") == 0) {
-			pax_time(value, &s, &n);
-			archive_entry_set_birthtime(entry, s, n);
-		}
-		if (strcmp(key, "LIBARCHIVE.symlinktype") == 0) {
-			if (strcmp(value, "file") == 0) {
-				archive_entry_set_symlink_type(entry,
-				    AE_SYMLINK_TYPE_FILE);
-			} else if (strcmp(value, "dir") == 0) {
-				archive_entry_set_symlink_type(entry,
-				    AE_SYMLINK_TYPE_DIRECTORY);
+		/* LIBARCHIVE extensions */
+		if (key_length > 11 && memcmp(key, "LIBARCHIVE.", 11) == 0) {
+			key_length -= 11;
+			key += 11;
+
+			/* TODO: Handle arbitrary extended attributes... */
+			/*
+			  if (strcmp(key, "LIBARCHIVE.xxxxxxx") == 0)
+				  archive_entry_set_xxxxxx(entry, value);
+			*/
+			if (key_length == 12 && memcmp(key, "creationtime", 12) == 0) {
+				/* LIBARCHIVE.creationtime */
+				if ((err = pax_attribute_read_time(a, value_length, &t, &n, unconsumed)) == ARCHIVE_OK) {
+					archive_entry_set_birthtime(entry, t, n);
+				}
+				return (err);
+			}
+			else if (key_length == 11 && memcmp(key, "symlinktype", 11) == 0) {
+				/* LIBARCHIVE.symlinktype */
+				if (value_length < 16) {
+					p = __archive_read_ahead(a, value_length, &bytes_read);
+					if (p != NULL) {
+						if (value_length == 4 && memcmp(p, "file", 4) == 0) {
+							archive_entry_set_symlink_type(entry,
+								       AE_SYMLINK_TYPE_FILE);
+						} else if (value_length == 3 && memcmp(p, "dir", 3) == 0) {
+							archive_entry_set_symlink_type(entry,
+								       AE_SYMLINK_TYPE_DIRECTORY);
+						} else {
+							archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
+									  "Unrecognized symlink type");
+							err = ARCHIVE_WARN;
+						}
+					} else {
+						archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
+								  "Truncated tar archive "
+								  "detected while reading `symlinktype` attribute");
+						return (ARCHIVE_FATAL);
+					}
+				} else {
+					archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
+							  "symlink type is very long"
+							  "(longest recognized value is 4 bytes, this is %d)",
+							  (int)value_length);
+					err = ARCHIVE_WARN;
+				}
+				__archive_read_consume(a, value_length);
+				return (err);
+			}
+			else if (key_length > 6 && memcmp(key, "xattr.", 6) == 0) {
+				key_length -= 6;
+				key += 6;
+				if (value_length > xattr_limit) {
+					err = ARCHIVE_WARN;
+				} else {
+					p = __archive_read_ahead(a, value_length, &bytes_read);
+					if (p == NULL
+					    || pax_attribute_LIBARCHIVE_xattr(entry, key, key_length, p, value_length)) {
+						/* TODO: Unable to parse xattr */
+						err = ARCHIVE_WARN;
+					}
+				}
+				__archive_read_consume(a, value_length);
+				return (err);
 			}
 		}
-		if (memcmp(key, "LIBARCHIVE.xattr.", 17) == 0)
-			pax_attribute_xattr(entry, key, value);
 		break;
 	case 'R':
 		/* GNU tar uses RHT.security header to store SELinux xattrs
 		 * SCHILY.xattr.security.selinux == RHT.security.selinux */
-		if (strcmp(key, "RHT.security.selinux") == 0) {
-			pax_attribute_rht_security_selinux(entry, value,
-			    value_length);
+		if (key_length == 20 && memcmp(key, "RHT.security.selinux", 20) == 0) {
+			if (value_length > xattr_limit) {
+				archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
+						  "Ignoring unreasonably large security.selinux attribute:"
+						  " %d > %d",
+						  (int)value_length, (int)xattr_limit);
+				/* TODO: Should this be FAILED instead? */
+				err = ARCHIVE_WARN;
+			} else {
+				p = __archive_read_ahead(a, value_length, &bytes_read);
+				if (p == NULL
+				    || pax_attribute_RHT_security_selinux(entry, p, value_length)) {
+					/* TODO: Unable to parse xattr */
+					err = ARCHIVE_WARN;
+				}
 			}
+			__archive_read_consume(a, value_length);
+			return (err);
+		}
 		break;
 	case 'S':
-		/* We support some keys used by the "star" archiver */
-		if (strcmp(key, "SCHILY.acl.access") == 0) {
-			r = pax_attribute_acl(a, tar, entry, value,
-			    ARCHIVE_ENTRY_ACL_TYPE_ACCESS);
-			if (r == ARCHIVE_FATAL)
-				return (r);
-		} else if (strcmp(key, "SCHILY.acl.default") == 0) {
-			r = pax_attribute_acl(a, tar, entry, value,
-			    ARCHIVE_ENTRY_ACL_TYPE_DEFAULT);
-			if (r == ARCHIVE_FATAL)
-				return (r);
-		} else if (strcmp(key, "SCHILY.acl.ace") == 0) {
-			r = pax_attribute_acl(a, tar, entry, value,
-			    ARCHIVE_ENTRY_ACL_TYPE_NFS4);
-			if (r == ARCHIVE_FATAL)
-				return (r);
-		} else if (strcmp(key, "SCHILY.devmajor") == 0) {
-			archive_entry_set_rdevmajor(entry,
-			    (dev_t)tar_atol10(value, strlen(value)));
-		} else if (strcmp(key, "SCHILY.devminor") == 0) {
-			archive_entry_set_rdevminor(entry,
-			    (dev_t)tar_atol10(value, strlen(value)));
-		} else if (strcmp(key, "SCHILY.fflags") == 0) {
-			archive_entry_copy_fflags_text(entry, value);
-		} else if (strcmp(key, "SCHILY.dev") == 0) {
-			archive_entry_set_dev(entry,
-			    (dev_t)tar_atol10(value, strlen(value)));
-		} else if (strcmp(key, "SCHILY.ino") == 0) {
-			archive_entry_set_ino(entry,
-			    tar_atol10(value, strlen(value)));
-		} else if (strcmp(key, "SCHILY.nlink") == 0) {
-			archive_entry_set_nlink(entry, (unsigned)
-			    tar_atol10(value, strlen(value)));
-		} else if (strcmp(key, "SCHILY.realsize") == 0) {
-			tar->realsize = tar_atol10(value, strlen(value));
-			tar->realsize_override = 1;
-			archive_entry_set_size(entry, tar->realsize);
-		} else if (strncmp(key, "SCHILY.xattr.", 13) == 0) {
-			pax_attribute_schily_xattr(entry, key, value,
-			    value_length);
-		} else if (strcmp(key, "SUN.holesdata") == 0) {
-			/* A Solaris extension for sparse. */
-			r = solaris_sparse_parse(a, tar, entry, value);
-			if (r < err) {
-				if (r == ARCHIVE_FATAL)
-					return (r);
-				err = r;
-				archive_set_error(&a->archive,
-				    ARCHIVE_ERRNO_MISC,
-				    "Parse error: SUN.holesdata");
+		/* SCHILY.* extensions used by "star" archiver */
+		if (key_length > 7 && memcmp(key, "SCHILY.", 7) == 0) {
+			key_length -= 7;
+			key += 7;
+
+			if (key_length == 10 && memcmp(key, "acl.access", 10) == 0) {
+				err = pax_attribute_SCHILY_acl(a, tar, entry, value_length,
+						      ARCHIVE_ENTRY_ACL_TYPE_ACCESS);
+				// TODO: Mark mode as set
+				return (err);
+			}
+			else if (key_length == 11 && memcmp(key, "acl.default", 11) == 0) {
+				err = pax_attribute_SCHILY_acl(a, tar, entry, value_length,
+						      ARCHIVE_ENTRY_ACL_TYPE_DEFAULT);
+				return (err);
+			}
+			else if (key_length == 7 && memcmp(key, "acl.ace", 7) == 0) {
+				err = pax_attribute_SCHILY_acl(a, tar, entry, value_length,
+						      ARCHIVE_ENTRY_ACL_TYPE_NFS4);
+				// TODO: Mark mode as set
+				return (err);
+			}
+			else if (key_length == 8 && memcmp(key, "devmajor", 8) == 0) {
+				if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
+					archive_entry_set_rdevmajor(entry, t);
+				}
+				return (err);
+			}
+			else if (key_length == 8 && memcmp(key, "devminor", 8) == 0) {
+				if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
+					archive_entry_set_rdevminor(entry, t);
+				}
+				return (err);
+			}
+			else if (key_length == 6 && memcmp(key, "fflags", 6) == 0) {
+				if (value_length < fflags_limit) {
+					p = __archive_read_ahead(a, value_length, &bytes_read);
+					if (p != NULL) {
+						archive_entry_copy_fflags_text_len(entry, p, value_length);
+						err = ARCHIVE_OK;
+					} else {
+						/* Truncated archive */
+						err = ARCHIVE_FATAL;
+					}
+				} else {
+					/* Overlong fflags field */
+					err = ARCHIVE_WARN;
+				}
+				__archive_read_consume(a, value_length);
+				return (err);
+			}
+			else if (key_length == 3 && memcmp(key, "dev", 3) == 0) {
+				if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
+					archive_entry_set_dev(entry, t);
+				}
+				return (err);
+			}
+			else if (key_length == 3 && memcmp(key, "ino", 3) == 0) {
+				if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
+					archive_entry_set_ino(entry, t);
+				}
+				return (err);
+			}
+			else if (key_length == 5 && memcmp(key, "nlink", 5) == 0) {
+				if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
+					archive_entry_set_nlink(entry, t);
+				}
+				return (err);
+			}
+			else if (key_length == 8 && memcmp(key, "realsize", 8) == 0) {
+				if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
+					tar->realsize = t;
+					tar->realsize_override = 1;
+					archive_entry_set_size(entry, tar->realsize);
+				}
+				return (err);
+			}
+			else if (key_length > 6 && memcmp(key, "xattr.", 6) == 0) {
+				key_length -= 6;
+				key += 6;
+				if (value_length < xattr_limit) {
+					p = __archive_read_ahead(a, value_length, &bytes_read);
+					if (p == NULL
+					    || pax_attribute_SCHILY_xattr(entry, key, key_length, p, value_length)) {
+						/* TODO: Unable to parse xattr */
+						err = ARCHIVE_WARN;
+					}
+				} else {
+					archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
+							  "Unreasonably large xattr: %d > %d",
+							  (int)value_length, (int)xattr_limit);
+					err = ARCHIVE_WARN;
+				}
+				__archive_read_consume(a, value_length);
+				return (err);
+			}
+		}
+		/* SUN.* extensions from Solaris tar */
+		if (key_length > 4 && memcmp(key, "SUN.", 4) == 0) {
+			key_length -= 4;
+			key += 4;
+
+			if (key_length == 9 && memcmp(key, "holesdata", 9) == 0) {
+				/* SUN.holesdata */
+				if (value_length < sparse_map_limit) {
+					p = __archive_read_ahead(a, value_length, &bytes_read);
+					if (p != NULL) {
+						err = pax_attribute_SUN_holesdata(a, tar, entry, p, value_length);
+						if (err < ARCHIVE_OK) {
+							archive_set_error(&a->archive,
+									  ARCHIVE_ERRNO_MISC,
+									  "Parse error: SUN.holesdata");
+						}
+					} else {
+						return (ARCHIVE_FATAL);
+					}
+				} else {
+					archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
+							  "Unreasonably large sparse map: %d > %d",
+							  (int)value_length, (int)sparse_map_limit);
+					err = ARCHIVE_FAILED;
+				}
+				__archive_read_consume(a, value_length);
+				return (err);
 			}
 		}
 		break;
 	case 'a':
-		if (strcmp(key, "atime") == 0) {
-			pax_time(value, &s, &n);
-			archive_entry_set_atime(entry, s, n);
+		if (key_length == 5 && memcmp(key, "atime", 5) == 0) {
+			if ((err = pax_attribute_read_time(a, value_length, &t, &n, unconsumed)) == ARCHIVE_OK) {
+				archive_entry_set_atime(entry, t, n);
+			}
+			return (err);
 		}
 		break;
 	case 'c':
-		if (strcmp(key, "ctime") == 0) {
-			pax_time(value, &s, &n);
-			archive_entry_set_ctime(entry, s, n);
-		} else if (strcmp(key, "charset") == 0) {
+		if (key_length == 5 && memcmp(key, "ctime", 5) == 0) {
+			if ((err = pax_attribute_read_time(a, value_length, &t, &n, unconsumed)) == ARCHIVE_OK) {
+				archive_entry_set_ctime(entry, t, n);
+			}
+			return (err);
+		} else if (key_length == 7 && memcmp(key, "charset", 7) == 0) {
 			/* TODO: Publish charset information in entry. */
-		} else if (strcmp(key, "comment") == 0) {
+		} else if (key_length == 7 && memcmp(key, "comment", 7) == 0) {
 			/* TODO: Publish comment in entry. */
 		}
 		break;
 	case 'g':
-		if (strcmp(key, "gid") == 0) {
-			archive_entry_set_gid(entry,
-			    tar_atol10(value, strlen(value)));
-		} else if (strcmp(key, "gname") == 0) {
-			archive_strcpy(&(tar->entry_gname), value);
+		if (key_length == 3 && memcmp(key, "gid", 3) == 0) {
+			if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
+				archive_entry_set_gid(entry, t);
+			}
+			return (err);
+		} else if (key_length == 5 && memcmp(key, "gname", 5) == 0) {
+			if (value_length > guname_limit) {
+				*unconsumed += value_length;
+				err = ARCHIVE_WARN;
+			} else {
+				err = read_bytes_to_string(a, &(tar->entry_gname), value_length, unconsumed);
+			}
+			return (err);
 		}
 		break;
 	case 'h':
-		if (strcmp(key, "hdrcharset") == 0) {
-			if (strcmp(value, "BINARY") == 0)
-				/* Binary  mode. */
-				tar->pax_hdrcharset_binary = 1;
-			else if (strcmp(value, "ISO-IR 10646 2000 UTF-8") == 0)
-				tar->pax_hdrcharset_binary = 0;
+		if (key_length == 10 && memcmp(key, "hdrcharset", 10) == 0) {
+			if (value_length < 64) {
+				p = __archive_read_ahead(a, value_length, &bytes_read);
+				if (p != NULL) {
+					if (value_length == 6
+					    && memcmp(p, "BINARY", 6) == 0) {
+						/* Binary  mode. */
+						tar->pax_hdrcharset_utf8 = 0;
+						err = ARCHIVE_OK;
+					} else if (value_length == 23
+						   && memcmp(p, "ISO-IR 10646 2000 UTF-8", 23) == 0) {
+						tar->pax_hdrcharset_utf8 = 1;
+						err = ARCHIVE_OK;
+					} else {
+						/* TODO: Unrecognized character set */
+						err  = ARCHIVE_WARN;
+					}
+				} else {
+					archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
+							  "Truncated tar archive "
+							  "detected while reading hdrcharset attribute");
+					return (ARCHIVE_FATAL);
+				}
+			} else {
+				archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
+						  "hdrcharset attribute is unreasonably large (%d bytes)",
+						  (int)value_length);
+				err = ARCHIVE_WARN;
+			}
+			__archive_read_consume(a, value_length);
+			return (err);
 		}
 		break;
 	case 'l':
 		/* pax interchange doesn't distinguish hardlink vs. symlink. */
-		if (strcmp(key, "linkpath") == 0) {
-			archive_strcpy(&(tar->entry_linkpath), value);
+		if (key_length == 8 && memcmp(key, "linkpath", 8) == 0) {
+			if (value_length > pathname_limit) {
+				*unconsumed += value_length;
+				err = ARCHIVE_WARN;
+			} else {
+				err = read_bytes_to_string(a, &tar->entry_linkpath, value_length, unconsumed);
+			}
+			return (err);
 		}
 		break;
 	case 'm':
-		if (strcmp(key, "mtime") == 0) {
-			pax_time(value, &s, &n);
-			archive_entry_set_mtime(entry, s, n);
+		if (key_length == 5 && memcmp(key, "mtime", 5) == 0) {
+			if ((err = pax_attribute_read_time(a, value_length, &t, &n, unconsumed)) == ARCHIVE_OK) {
+				archive_entry_set_mtime(entry, t, n);
+			}
+			return (err);
 		}
 		break;
 	case 'p':
-		if (strcmp(key, "path") == 0) {
-			archive_strcpy(&(tar->entry_pathname), value);
+		if (key_length == 4 && memcmp(key, "path", 4) == 0) {
+			if (value_length > pathname_limit) {
+				*unconsumed += value_length;
+				err = ARCHIVE_WARN;
+			} else {
+				err = read_bytes_to_string(a, &(tar->entry_pathname), value_length, unconsumed);
+			}
+			return (err);
 		}
 		break;
 	case 'r':
@@ -2102,48 +2656,54 @@ pax_attribute(struct archive_read *a, struct tar *tar,
 	case 's':
 		/* POSIX has reserved 'security.*' */
 		/* Someday: if (strcmp(key, "security.acl") == 0) { ... } */
-		if (strcmp(key, "size") == 0) {
+		if (key_length == 4 && memcmp(key, "size", 4) == 0) {
 			/* "size" is the size of the data in the entry. */
-			tar->entry_bytes_remaining
-			    = tar_atol10(value, strlen(value));
-			if (tar->entry_bytes_remaining < 0) {
-				tar->entry_bytes_remaining = 0;
-				archive_set_error(&a->archive,
-				    ARCHIVE_ERRNO_MISC,
-				    "Tar size attribute is negative");
-				return (ARCHIVE_FATAL);
+			if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
+				tar->entry_bytes_remaining = t;
+				/*
+				 * The "size" pax header keyword always overrides the
+				 * "size" field in the tar header.
+				 * GNU.sparse.realsize, GNU.sparse.size and
+				 * SCHILY.realsize override this value.
+				 */
+				if (!tar->realsize_override) {
+					archive_entry_set_size(entry,
+							       tar->entry_bytes_remaining);
+					tar->realsize
+						= tar->entry_bytes_remaining;
+				}
 			}
-			if (tar->entry_bytes_remaining == INT64_MAX) {
-				/* Note: tar_atol returns INT64_MAX on overflow */
+			else if (t == INT64_MAX) {
+				/* Note: pax_attr_read_number returns INT64_MAX on overflow or < 0 */
 				tar->entry_bytes_remaining = 0;
 				archive_set_error(&a->archive,
 				    ARCHIVE_ERRNO_MISC,
 				    "Tar size attribute overflow");
 				return (ARCHIVE_FATAL);
 			}
-			/*
-			 * The "size" pax header keyword always overrides the
-			 * "size" field in the tar header.
-			 * GNU.sparse.realsize, GNU.sparse.size and
-			 * SCHILY.realsize override this value.
-			 */
-			if (!tar->realsize_override) {
-				archive_entry_set_size(entry,
-				    tar->entry_bytes_remaining);
-				tar->realsize
-				    = tar->entry_bytes_remaining;
-			}
+			return (err);
 		}
 		break;
 	case 'u':
-		if (strcmp(key, "uid") == 0) {
-			archive_entry_set_uid(entry,
-			    tar_atol10(value, strlen(value)));
-		} else if (strcmp(key, "uname") == 0) {
-			archive_strcpy(&(tar->entry_uname), value);
+		if (key_length == 3 && memcmp(key, "uid", 3) == 0) {
+			if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
+				archive_entry_set_uid(entry, t);
+			}
+			return (err);
+		} else if (key_length == 5 && memcmp(key, "uname", 5) == 0) {
+			if (value_length > guname_limit) {
+				*unconsumed += value_length;
+				err = ARCHIVE_WARN;
+			} else {
+				err = read_bytes_to_string(a, &(tar->entry_uname), value_length, unconsumed);
+			}
+			return (err);
 		}
 		break;
 	}
+
+	/* Unrecognized key, just skip the entire value. */
+	__archive_read_consume(a, value_length);
 	return (err);
 }
 
@@ -2153,7 +2713,7 @@ pax_attribute(struct archive_read *a, struct tar *tar,
  * parse a decimal time value, which may include a fractional portion
  */
 static void
-pax_time(const char *p, int64_t *ps, long *pn)
+pax_time(const char *p, size_t length, int64_t *ps, long *pn)
 {
 	char digit;
 	int64_t	s;
@@ -2164,13 +2724,18 @@ pax_time(const char *p, int64_t *ps, long *pn)
 	limit = INT64_MAX / 10;
 	last_digit_limit = INT64_MAX % 10;
 
+	if (length <= 0) {
+		*ps = 0;
+		return;
+	}
 	s = 0;
 	sign = 1;
 	if (*p == '-') {
 		sign = -1;
 		p++;
+		length--;
 	}
-	while (*p >= '0' && *p <= '9') {
+	while (length > 0 && *p >= '0' && *p <= '9') {
 		digit = *p - '0';
 		if (s > limit ||
 		    (s == limit && digit > last_digit_limit)) {
@@ -2179,6 +2744,7 @@ pax_time(const char *p, int64_t *ps, long *pn)
 		}
 		s = (s * 10) + digit;
 		++p;
+		--length;
 	}
 
 	*ps = s * sign;
@@ -2186,13 +2752,14 @@ pax_time(const char *p, int64_t *ps, long *pn)
 	/* Calculate nanoseconds. */
 	*pn = 0;
 
-	if (*p != '.')
+	if (length <= 0 || *p != '.')
 		return;
 
 	l = 100000000UL;
 	do {
 		++p;
-		if (*p >= '0' && *p <= '9')
+		--length;
+		if (length > 0 && *p >= '0' && *p <= '9')
 			*pn += (*p - '0') * l;
 		else
 			break;
@@ -2223,49 +2790,65 @@ header_gnutar(struct archive_read *a, struct tar *tar,
 
 	/* Copy filename over (to ensure null termination). */
 	header = (const struct archive_entry_header_gnutar *)h;
-	if (archive_entry_copy_pathname_l(entry,
-	    header->name, sizeof(header->name), tar->sconv) != 0) {
-		err = set_conversion_failed_error(a, tar->sconv, "Pathname");
-		if (err == ARCHIVE_FATAL)
-			return (err);
+	const char *existing_pathname = archive_entry_pathname(entry);
+	if (existing_pathname == NULL || existing_pathname[0] == '\0') {
+		if (archive_entry_copy_pathname_l(entry,
+		    header->name, sizeof(header->name), tar->sconv) != 0) {
+			err = set_conversion_failed_error(a, tar->sconv, "Pathname");
+			if (err == ARCHIVE_FATAL)
+				return (err);
+		}
 	}
 
 	/* Fields common to ustar and GNU */
 	/* XXX Can the following be factored out since it's common
 	 * to ustar and gnu tar?  Is it okay to move it down into
 	 * header_common, perhaps?  */
-	if (archive_entry_copy_uname_l(entry,
-	    header->uname, sizeof(header->uname), tar->sconv) != 0) {
-		err = set_conversion_failed_error(a, tar->sconv, "Uname");
-		if (err == ARCHIVE_FATAL)
-			return (err);
+	const char *existing_uname = archive_entry_uname(entry);
+	if (existing_uname == NULL || existing_uname[0] == '\0') {
+		if (archive_entry_copy_uname_l(entry,
+		    header->uname, sizeof(header->uname), tar->sconv) != 0) {
+			err = set_conversion_failed_error(a, tar->sconv, "Uname");
+			if (err == ARCHIVE_FATAL)
+				return (err);
+		}
 	}
 
-	if (archive_entry_copy_gname_l(entry,
-	    header->gname, sizeof(header->gname), tar->sconv) != 0) {
-		err = set_conversion_failed_error(a, tar->sconv, "Gname");
-		if (err == ARCHIVE_FATAL)
-			return (err);
+	const char *existing_gname = archive_entry_gname(entry);
+	if (existing_gname == NULL || existing_gname[0] == '\0') {
+		if (archive_entry_copy_gname_l(entry,
+		    header->gname, sizeof(header->gname), tar->sconv) != 0) {
+			err = set_conversion_failed_error(a, tar->sconv, "Gname");
+			if (err == ARCHIVE_FATAL)
+				return (err);
+		}
 	}
 
 	/* Parse out device numbers only for char and block specials */
 	if (header->typeflag[0] == '3' || header->typeflag[0] == '4') {
-		archive_entry_set_rdevmajor(entry, (dev_t)
-		    tar_atol(header->rdevmajor, sizeof(header->rdevmajor)));
-		archive_entry_set_rdevminor(entry, (dev_t)
-		    tar_atol(header->rdevminor, sizeof(header->rdevminor)));
-	} else
+		if (!archive_entry_rdev_is_set(entry)) {
+			archive_entry_set_rdevmajor(entry, (dev_t)
+			    tar_atol(header->rdevmajor, sizeof(header->rdevmajor)));
+			archive_entry_set_rdevminor(entry, (dev_t)
+			    tar_atol(header->rdevminor, sizeof(header->rdevminor)));
+		}
+	} else {
 		archive_entry_set_rdev(entry, 0);
+	}
 
 	tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
 
 	/* Grab GNU-specific fields. */
-	t = tar_atol(header->atime, sizeof(header->atime));
-	if (t > 0)
-		archive_entry_set_atime(entry, t, 0);
-	t = tar_atol(header->ctime, sizeof(header->ctime));
-	if (t > 0)
-		archive_entry_set_ctime(entry, t, 0);
+	if (!archive_entry_atime_is_set(entry)) {
+		t = tar_atol(header->atime, sizeof(header->atime));
+		if (t > 0)
+			archive_entry_set_atime(entry, t, 0);
+	}
+	if (!archive_entry_ctime_is_set(entry)) {
+		t = tar_atol(header->ctime, sizeof(header->ctime));
+		if (t > 0)
+			archive_entry_set_ctime(entry, t, 0);
+	}
 
 	if (header->realsize[0] != 0) {
 		tar->realsize
@@ -2417,19 +3000,19 @@ gnu_sparse_old_parse(struct archive_read *a, struct tar *tar,
  * importantly, the sparse data was lost when extracted by archivers
  * that didn't recognize this extension.
  */
-
 static int
-gnu_sparse_01_parse(struct archive_read *a, struct tar *tar, const char *p)
+gnu_sparse_01_parse(struct archive_read *a, struct tar *tar, const char *p, size_t length)
 {
 	const char *e;
 	int64_t offset = -1, size = -1;
 
 	for (;;) {
 		e = p;
-		while (*e != '\0' && *e != ',') {
+		while (length > 0 && *e != ',') {
 			if (*e < '0' || *e > '9')
 				return (ARCHIVE_WARN);
 			e++;
+			length--;
 		}
 		if (offset < 0) {
 			offset = tar_atol10(p, e - p);
@@ -2444,9 +3027,10 @@ gnu_sparse_01_parse(struct archive_read *a, struct tar *tar, const char *p)
 				return (ARCHIVE_FATAL);
 			offset = -1;
 		}
-		if (*e == '\0')
+		if (length == 0)
 			return (ARCHIVE_OK);
 		p = e + 1;
+		length--;
 	}
 }
 
@@ -2568,8 +3152,8 @@ gnu_sparse_10_read(struct archive_read *a, struct tar *tar, size_t *unconsumed)
  * consist of both data and hole.
  */
 static int
-solaris_sparse_parse(struct archive_read *a, struct tar *tar,
-    struct archive_entry *entry, const char *p)
+pax_attribute_SUN_holesdata(struct archive_read *a, struct tar *tar,
+	struct archive_entry *entry, const char *p, size_t length)
 {
 	const char *e;
 	int64_t start, end;
@@ -2578,16 +3162,21 @@ solaris_sparse_parse(struct archive_read *a, struct tar *tar,
 	(void)entry; /* UNUSED */
 
 	end = 0;
-	if (*p == ' ')
+	if (length <= 0)
+		return (ARCHIVE_WARN);
+	if (*p == ' ') {
 		p++;
-	else
+		length--;
+	} else {
 		return (ARCHIVE_WARN);
+	}
 	for (;;) {
 		e = p;
-		while (*e != '\0' && *e != ' ') {
+		while (length > 0 && *e != ' ') {
 			if (*e < '0' || *e > '9')
 				return (ARCHIVE_WARN);
 			e++;
+			length--;
 		}
 		start = end;
 		end = tar_atol10(p, e - p);
@@ -2599,9 +3188,15 @@ solaris_sparse_parse(struct archive_read *a, struct tar *tar,
 				return (ARCHIVE_FATAL);
 			tar->sparse_last->hole = hole;
 		}
-		if (*e == '\0')
-			return (ARCHIVE_OK);
+		if (length == 0 || *e == '\n') {
+			if (length == 0 && *e == '\n') {
+				return (ARCHIVE_OK);
+			} else {
+				return (ARCHIVE_WARN);
+			}
+		}
 		p = e + 1;
+		length--;
 		hole = hole == 0;
 	}
 }
@@ -2903,22 +3498,23 @@ base64_decode(const char *s, size_t len, size_t *out_len)
 }
 
 static char *
-url_decode(const char *in)
+url_decode(const char *in, size_t length)
 {
 	char *out, *d;
 	const char *s;
 
-	out = (char *)malloc(strlen(in) + 1);
+	out = (char *)malloc(length + 1);
 	if (out == NULL)
 		return (NULL);
-	for (s = in, d = out; *s != '\0'; ) {
-		if (s[0] == '%' && s[1] != '\0' && s[2] != '\0') {
+	for (s = in, d = out; length > 0 && *s != '\0'; ) {
+		if (s[0] == '%' && length > 2) {
 			/* Try to convert % escape */
 			int digit1 = tohex(s[1]);
 			int digit2 = tohex(s[2]);
 			if (digit1 >= 0 && digit2 >= 0) {
 				/* Looks good, consume three chars */
 				s += 3;
+				length -= 3;
 				/* Convert output */
 				*d++ = ((digit1 << 4) | digit2);
 				continue;
@@ -2926,6 +3522,7 @@ url_decode(const char *in)
 			/* Else fall through and treat '%' as normal char */
 		}
 		*d++ = *s++;
+		--length;
 	}
 	*d = '\0';
 	return (out);
diff --git a/libarchive/test/CMakeLists.txt b/libarchive/test/CMakeLists.txt
index 6d28fc904a..0352a1e7f6 100644
--- a/libarchive/test/CMakeLists.txt
+++ b/libarchive/test/CMakeLists.txt
@@ -166,6 +166,7 @@ IF(ENABLE_TEST)
     test_read_format_tar_empty_pax.c
     test_read_format_tar_filename.c
     test_read_format_tar_invalid_pax_size.c
+    test_read_format_tar_pax_large_attr.c
     test_read_format_tbz.c
     test_read_format_tgz.c
     test_read_format_tlz.c
diff --git a/libarchive/test/test_compat_solaris_pax_sparse.c b/libarchive/test/test_compat_solaris_pax_sparse.c
index c25c4eb8f5..94e3e9189b 100644
--- a/libarchive/test/test_compat_solaris_pax_sparse.c
+++ b/libarchive/test/test_compat_solaris_pax_sparse.c
@@ -60,7 +60,7 @@ test_compat_solaris_pax_sparse_1(void)
 	assertEqualInt(0100644, archive_entry_mode(ae));
 
 	/* Verify the sparse information. */
-	failure("This sparse file should have tree data blocks");
+	failure("This sparse file should have three data blocks");
 	assertEqualInt(3, archive_entry_sparse_reset(ae));
 	assertEqualInt(ARCHIVE_OK,
 	    archive_entry_sparse_next(ae, &offset, &length));
diff --git a/libarchive/test/test_entry.c b/libarchive/test/test_entry.c
index 228fdd8bcc..38c406e35b 100644
--- a/libarchive/test/test_entry.c
+++ b/libarchive/test/test_entry.c
@@ -269,7 +269,7 @@ DEFINE_TEST(test_entry)
 	archive_entry_set_hardlink(e, "hardlink");
 	archive_entry_set_symlink(e, "symlink");
 	archive_entry_set_link(e, "link");
-	assertEqualString(archive_entry_hardlink(e), "hardlink");
+	assertEqualString(archive_entry_hardlink(e), NULL);
 	assertEqualString(archive_entry_symlink(e), "link");
 
 	/* mode */
@@ -513,7 +513,6 @@ DEFINE_TEST(test_entry)
 	archive_entry_set_rdev(e, 532);
 	archive_entry_set_size(e, 987654321);
 	archive_entry_copy_sourcepath(e, "source");
-	archive_entry_set_symlink(e, "symlinkname");
 	archive_entry_set_uid(e, 83);
 	archive_entry_set_uname(e, "user");
 	/* Add an ACL entry. */
@@ -548,7 +547,7 @@ DEFINE_TEST(test_entry)
 	assertEqualInt(archive_entry_rdev(e2), 532);
 	assertEqualInt(archive_entry_size(e2), 987654321);
 	assertEqualString(archive_entry_sourcepath(e2), "source");
-	assertEqualString(archive_entry_symlink(e2), "symlinkname");
+	assertEqualString(archive_entry_symlink(e2), NULL);
 	assertEqualInt(archive_entry_uid(e2), 83);
 	assertEqualString(archive_entry_uname(e2), "user");
 
@@ -649,7 +648,7 @@ DEFINE_TEST(test_entry)
 	assertEqualInt(archive_entry_rdev(e2), 532);
 	assertEqualInt(archive_entry_size(e2), 987654321);
 	assertEqualString(archive_entry_sourcepath(e2), "source");
-	assertEqualString(archive_entry_symlink(e2), "symlinkname");
+	assertEqualString(archive_entry_symlink(e2), NULL);
 	assertEqualInt(archive_entry_uid(e2), 83);
 	assertEqualString(archive_entry_uname(e2), "user");
 
@@ -705,6 +704,13 @@ DEFINE_TEST(test_entry)
 	/* Release clone. */
 	archive_entry_free(e2);
 
+	/* Verify that symlink is copied over by `clone` */
+	archive_entry_set_symlink(e, "symlinkpath");
+	e2 = archive_entry_clone(e);
+	assertEqualString(archive_entry_hardlink(e2), NULL);
+	assertEqualString(archive_entry_symlink(e2), "symlinkpath");
+	archive_entry_free(e2);
+
 	/*
 	 * Test clear() implementation.
 	 */
diff --git a/libarchive/test/test_read_format_tar.c b/libarchive/test/test_read_format_tar.c
index 2b7cd8dedc..1a2b326d0d 100644
--- a/libarchive/test/test_read_format_tar.c
+++ b/libarchive/test/test_read_format_tar.c
@@ -437,7 +437,7 @@ static void verify(unsigned char *d, size_t s,
 	assertA(0 == archive_read_support_filter_all(a));
 	assertA(0 == archive_read_support_format_all(a));
 	assertA(0 == archive_read_open_memory(a, buff, s + 1024));
-	assertA(0 == archive_read_next_header(a, &ae));
+	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
 	assertEqualInt(archive_filter_code(a, 0), compression);
 	assertEqualInt(archive_format(a), format);
 	assertEqualInt(archive_entry_is_encrypted(ae), 0);
diff --git a/libarchive/test/test_read_format_tar_pax_large_attr.c b/libarchive/test/test_read_format_tar_pax_large_attr.c
new file mode 100644
index 0000000000..1c2b55f9c6
--- /dev/null
+++ b/libarchive/test/test_read_format_tar_pax_large_attr.c
@@ -0,0 +1,65 @@
+/*-
+ * Copyright (c) 2003-2023 Tim Kientzle
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include "test.h"
+
+/*
+ * Read a pax formatted tar archive that has an extremely large
+ * (8,000,000 bytes) attribute of unknown type.  The pax reader should simply
+ * skip the attribute.
+ */
+DEFINE_TEST(test_read_format_tar_pax_large_attr)
+{
+	char name[] = "test_read_format_tar_pax_large_attr.tar.Z";
+	struct archive_entry *ae;
+	struct archive *a;
+
+	assert((a = archive_read_new()) != NULL);
+	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
+	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
+	extract_reference_file(name);
+	assertEqualIntA(a, ARCHIVE_OK, archive_read_open_filename(a, name, 10240));
+
+	/* Read first entry. */
+	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
+	assertEqualString("foo", archive_entry_pathname(ae));
+	assertEqualInt(1, archive_entry_mtime(ae));
+	assertEqualInt(1000, archive_entry_uid(ae));
+	assertEqualString("tim", archive_entry_uname(ae));
+	assertEqualInt(0, archive_entry_gid(ae));
+	assertEqualString("wheel", archive_entry_gname(ae));
+	assertEqualInt(0100644, archive_entry_mode(ae));
+	assertEqualInt(archive_entry_is_encrypted(ae), 0);
+	assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
+
+	/* Verify the end-of-archive. */
+	assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
+
+	/* Verify that the format detection worked. */
+	assertEqualInt(archive_filter_code(a, 0), ARCHIVE_FILTER_COMPRESS);
+	assertEqualInt(archive_format(a), ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE);
+
+	assertEqualInt(ARCHIVE_OK, archive_read_close(a));
+	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
+}
diff --git a/libarchive/test/test_read_format_tar_pax_large_attr.tar.Z.uu b/libarchive/test/test_read_format_tar_pax_large_attr.tar.Z.uu
new file mode 100644
index 0000000000..01eb55588e
--- /dev/null
+++ b/libarchive/test/test_read_format_tar_pax_large_attr.tar.Z.uu
@@ -0,0 +1,149 @@
+Decode this file with:
+cat test_read_format_tar_pax_large_attr.tar.Z.uu | uudecode -p | uncompress | hexdump -C
+
+It contains a single tar entry with a pax header that has a single
+attribute of 8000000 bytes
+00000200  38 30 30 30 30 30 30 20  75 6e 6b 6e 6f 77 6e 3d  |8000000 unknown=|
+00000210  2b 2b 2b 2b 2b 2b 2b 2b  2b 2b 2b 2b 2b 2b 2b 2b  |++++++++++++++++|
+*
+007a13f0  2b 2b 2b 2b 2b 2b 2b 2b  2b 2b 2b 2b 2b 2b 2b 0a  |+++++++++++++++.|
+
+begin 644 test_read_format_tar_pax_large_attr.tar.Z
+M'YV04,+@05(F#)DR<EZ8>?,&@,.'$"-*G$BQHL6+&"O"V&B#!@T0`#;"N&&C
+M!DB1,&1\#+EQ1L<8,42"B$&CA@P;-UQZM`$"1HP9-6+<```"3\:C2),J73JQ
+MSAPZ8>2PI).F#5.+3\.8,7-U(LJ-)[^"[$JVK-FS:-'B^`H#1!TW:]R\N>.F
+MQXJ[>//JW<NWK]^_@`,+'DRXL.'#B!,K7LRXL>/'D"-+GDRYLN7+F#-KWLRY
+ML^?/H$.+'DVZM.G3J%.K7LVZM>O7L&/+GDV[MNW;N'/KWLV[M^_?P(,+'TZ\
+MN/'CR),K7\Z\N?/GT*-+GTZ]NO7KV+-KW\Z]N_?OX,.+'T^^O/GSZ-.K7\^^
+MO?OW\./+GT^_OOW[^//KW\^_O___``8HX(`$%FC@@0@FJ.""##;HX(,01BCA
+MA!16:.&%&&:HX88<=NCAAR"&*.*())9HXHDHIJCBBBRVZ.*+,,8HXXPTUFCC
+MC3CFJ...//;HXX]`!BGDD$06:>212":IY)),-NGDDU!&*>645%9IY9589JGE
+MEEQVZ>678(8IYIADEFGFF6BFJ>::;+;IYIMPQBGGG'36:>>=>.:IYYY\]NGG
+MGX`&*NB@A!9JZ*&()JKHHHPVZNBCD$8JZ:245FKII9AFJNFFG';JZ:>@ABKJ
+MJ*26:NJIJ*:JZJJLMNKJJ[#&*NNLM-9JZZVXYJKKKKSVZNNOP`8K[+#$%FOL
+ML<@FJ^RRS#;K[+/01BOMM-16:^VUV&:K[;;<=NOMM^"&*^ZXY)9K[KGHIJON
+MNNRVZ^Z[\,8K[[STUFOOO?CFJ^^^_/;K[[\`!RSPP`07;/#!"">L\,(,-^SP
+MPQ!'+/'$%%=L\<489ZSQQAQW[/''((<L\L@DEVSRR2BGK/+*++?L\LLPQRSS
+MS#37;//-..>L\\X\]^SSST`'+?301!=M]-%()ZWTTDPW[?334$<M]=145VWU
+MU5AGK?767'?M]==@ARWVV&27;?;9:*>M]MILM^WVVW#'+??<=-=M]]UXYZWW
+MWGSW[???@`<N^."$%V[XX8@GKOCBC#?N^..01R[YY)17;OGEF&>N^>:<=^[Y
+MYZ"'+OKHI)=N^NFHIZ[ZZJRW[OKKL,<N^^RTUV[[[;CGKOONO/?N^^_`!R_\
+M\,07;_SQR">O_/+,-^_\\]!'+_WTU%=O_?789Z_]]MQW[_WWX(<O_OCDEV_^
+M^>BGK_[Z[+?O_OOPQR___/37;__]^.>O__[\]^___P`,H``'2,`"&O"`"$R@
+M`A?(P`8Z\($0C*`$)TC!"EKP@AC,H`8WR,$.>O"#(`RA"$=(PA*:\(0H3*$*
+M5\C"%KKPA3",H0QG2,,:VO"&.,RA#G?(PQ[Z\(=`#*(0ATC$(AKQB$A,HA*7
+MR,0F.O&)4(RB%*=(Q2I:\8I8S*(6M\C%+GKQBV`,HQC'2,8RFO&,:$RC&M?(
+MQC:Z\8UPC*,<YTC'.MKQCGC,HQ[WR,<^^O&/@`RD(`=)R$(:\I"(3*0B%\G(
+M1CKRD9",I"0G2<E*6O*2F,RD)C?)R4YZ\I.@#*4H1TG*4IKRE*A,I2I7R<I6
+MNO*5L(RE+&=)RUK:\I:XS*4N=\G+7OKRE\`,IC"'2<QB&O.8R$RF,I?)S&8Z
+M\YG0C*8TITG-:EKSFMC,IC:WR<UN>O.;X`RG.,=)SG*:\YSH3*<ZU\G.=KKS
+MG?",ISSG2<]ZVO.>^,RG/O?)SW[Z\Y\`#:A`!TK0@AKTH`A-J$(7RM"&.O2A
+M$(VH1"=*T8I:]*(8S:A&-\K1CGKTHR`-J4A'2M*2FO2D*$VI2E?*TI:Z]*4P
+MC:E,9TK3FMKTICC-J4YWRM.>^O2G0`VJ4(=*U*(:]:A(3:I2E\K4ICKUJ5"-
+MJE2G2M6J6O6J6,VJ5K?*U:YZ]:M@#:M8QTK6LIKUK&A-JUK7RM:VNO6M<(VK
+M7.=*U[K:]:YXS:M>]\K7OOKUKX`-K&`'2]C"&O:PB$VL8A?+V,8Z]K&0C:QD
+M)TO9REKVLIC-K&8WR]G.>O:SH`VM:$=+VM*:]K2H3:UJ5\O:UKKVM;"-K6QG
+M2]O:VO:VN,VM;G?+V][Z]K?`#:YPATO<XAKWN,A-KG*7R]SF.O>YT(VN=*=+
+MW>I:][K8S:YVM\O=[GKWN^`-KWC'2][RFO>\Z$VO>M?+WO:Z][WPC:]\YTO?
+M^MKWOOC-KW[WR]_^^O>_``ZP@`=,X`(;^,`(3K""%\S@!COXP1".L(0G3.$*
+M6_C"&,ZPAC?,X0Y[^,,@#K&(1TSB$IOXQ"A.L8I7S.(6N_C%,(ZQC&=,XQK;
+M^,8XSK&.=\SC'OOXQT`.LI"'3.0B&_G(2$ZRDI?,Y"8[^<E0CK*4ITSE*EOY
+MREC.LI:WS.4N>_G+8`ZSF,=,YC*;^<QH3K.:U\SF-KOYS7".LYSG3.<ZV_G.
+M>,ZSGO?,YS[[^<^`#K2@!TWH0AOZT(A.M*(7S>A&._K1D(ZTI"=-Z4I;^M*8
+MSK2F-\WI3GOZTZ`.M:A'3>I2F_K4J$ZUJE?-ZE:[^M6PCK6L9TWK6MOZUKC.
+MM:YWS>M>^_K7P`ZVL(=-[&(;^]C(3K:RE\WL9CO[V=".MK2G3>UJ6_O:V,ZV
+MMK?-[6Y[^]O@#K>XQTWN<IO[W.A.M[K7S>YVN_O=\(ZWO.=-[WK;^][XSK>^
+M]\WO?OO[WP`/N,`'3O""&_S@"$^XPA?.\(8[_.$0C[C$)T[QBEO\XAC/N,8W
+MSO&.>_SC(`^YR$=.\I*;_.0H3[G*5\[REKO\Y3"/N<QG3O.:V_SF.,^YSG?.
+M\Y[[_.=`#[K0AT[THAO]Z$A/NM*7SO2F._WI4(^ZU*=.]:I;_>I8S[K6M\[U
+MKGO]ZV`/N]C'3O:RF_WL:$^[VM?.]K:[_>UPC[O<YT[WNMO][GC/N][WSO>^
+M^_WO@`^\X`=/^,(;_O"(3[SB%\_XQCO^\9"/O.0G3_G*6_[RF,^\YC?/^<Y[
+M_O.@#[WH1T_ZTIO^]*A/O>I7S_K6N_[UL(^][&=/^]K;_O:XS[WN=\_[WOO^
+M]\`/OO"'3_SB&__XR$^^\I?/_.8[__G0C[[TIT_]ZEO_^MC/OO:WS_WN>__[
+MX`^_^,=/_O*;__SH3[_ZU\_^]KO__?"/O_SG3__ZV__^^,^__O?/__[[__\`
+M&(`".(`$6(`&>(`(F(`*N(`,V(`.^(`0&($2.($46($6>($8F($:N($<V($>
+M^($@&((B.((D6((F>((HF((JN((LV((N^((P&(,R.(,T6(,V>(,XF(,ZN(,\
+MV(,^^(-`&(1".(1$6(1&>(1(F(1*N(1,V(1.^(10&(52.(546(56>(58F(5:
+MN(5<V(5>^(5@&(9B.(9D6(9F>(9HF(9JN(9LV(9N^(9P&(=R.(=T6(=V>(=X
+MF(=ZN(=\V(=^^(>`&(B".(B$6(B&>(B(F(B*N(B,V(B.^(B0&(F2.(F46(F6
+M>(F8F(F:N(F<V(F>^(F@&(JB.(JD6(JF>(JHF(JJN(JLV(JN^(JP&(NR.(NT
+M6(NV>(NXF(NZN(N\V(N^^(O`&(S".(S$6(S&>(S(F(S*N(S,V(S.^(S0&(W2
+M.(W46(W6>(W8F(W:N(W<V(W>^(W@&([B.([D6([F>([HF([JN([LV([N^([P
+M&(_R.(_T6(_V>(_XF(_ZN(_\V(_^^(\`&9`".9`$69`&>9`(F9`*N9`,V9`.
+M^9`0&9$2.9$469$6>9$8F9$:N9$<V9$>^9$@&9(B.9(D69(F>9(HF9(JN9(L
+MV9(N^9(P&9,R.9,T69,V>9,XF9,ZN9,\V9,^^9-`&91".91$691&>91(F91*
+MN91,V91.^910&952.9546956>958F95:N95<V95>^95@&99B.99D699F>99H
+MF99JN99LV99N^99P&9=R.9=T69=V>9=XF9=ZN9=\V9=^^9>`&9B".9B$69B&
+M>9B(F9B*N9B,V9B.^9B0&9F2.9F469F6>9F8F9F:N9F<V9F>^9F@&9JB.9JD
+M69JF>9JHF9JJN9JLV9JN^9JP&9NR.9NT69NV>9NXF9NZN9N\V9N^^9O`&9S"
+M.9S$69S&>9S(F9S*N9S,V9S.^9S0&9W2.9W469W6>9W8F9W:N9W<V9W>^9W@
+M&9[B.9[D69[F>9[HF9[JN9[LV9[N^9[P&9_R.9_T69_V>9_XF9_ZN9_\V9_^
+M^9\`&J`".J`$6J`&>J`(FJ`*NJ`,VJ`.^J`0&J$2.J$46J$6>J$8FJ$:NJ$<
+MVJ$>^J$@&J(B.J(D6J(F>J(HFJ(JNJ(LVJ(N^J(P&J,R.J,T6J,V>J,XFJ,Z
+MNJ,\VJ,^^J-`&J1".J1$6J1&>J1(FJ1*NJ1,VJ1.^J10&J52.J546J56>J58
+MFJ5:NJ5<VJ5>^J5@&J9B.J9D6J9F>J9HFJ9JNJ9LVJ9N^J9P&J=R.J=T6J=V
+M>J=XFJ=ZNJ=\VJ=^^J>`&JB".JB$6JB&>JB(FJB*NJB,VJB.^JB0&JF2.JF4
+M6JF6>JF8FJF:NJF<VJF>^JF@&JJB.JJD6JJF>JJHFJJJNJJLVJJN^JJP&JNR
+M.JNT6JNV>JNXFJNZNJN\VJN^^JO`&JS".JS$6JS&>JS(FJS*NJS,VJS.^JS0
+M&JW2.JW46JW6>JW8FJW:NJW<VJW>^JW@&J[B.J[D6J[F>J[HFJ[JNJ[LVJ[N
+M^J[P&J_R.J_T6J_V>J_XFJ_ZNJ_\VJ_^^J\`&[`".[`$6[`&>[`(F[`*N[`,
+MV[`.^[`0&[$2.[$46[$6>[$8F[$:N[$<V[$>^[$@&[(B.[(D6[(F>[(HF[(J
+MN[(LV[(N^[(P&[,R.[,T6[,V>[,XF[,ZN[,\V[,^^[-`&[1".[1$6[1&>[1(
+MF[1*N[1,V[1.^[10&[52.[546[56>[58F[5:N[5<V[5>^[5@&[9B.[9D6[9F
+M>[9HF[9JN[9LV[9N^[9P&[=R.[=T6[=V>[=XF[=ZN[=\V[=^^[>`&[B".[B$
+M6[B&>[B(F[B*N[B,V[B.^[B0&[F2.[F46[F6>[F8F[F:N[F<V[F>^[F@&[JB
+M.[JD6[JF>[JHF[JJN[JLV[JN^[JP&[NR.[NT6[NV>[NXF[NZN[N\V[N^^[O`
+M&[S".[S$6[S&>[S(F[S*N[S,V[S.^[S0&[W2.[W46[W6>[W8F[W:N[W<V[W>
+M^[W@&[[B.[[D6[[F>[[HF[[JN[[LV[[N^[[P&[_R.[_T6[_V>[_XF[_ZN[_\
+MV[_^^[\`',`"/,`$7,`&?,`(G,`*O,`,W,`._,`0',$2/,$47,$6?,$8G,$:
+MO,$<W,$>_,$@',(B/,(D7,(F?,(HG,(JO,(LW,(N_,(P',,R/,,T7,,V?,,X
+MG,,ZO,,\W,,^_,-`',1"/,1$7,1&?,1(G,1*O,1,W,1._,10',52/,547,56
+M?,58G,5:O,5<W,5>_,5@',9B/,9D7,9F?,9HG,9JO,9LW,9N_,9P',=R/,=T
+M7,=V?,=XG,=ZO,=\W,=^_,>`',B"/,B$7,B&?,B(G,B*O,B,W,B._,B0',F2
+M/,F47,F6?,F8G,F:O,F<W,F>_,F@',JB/,JD7,JF?,JHG,JJO,JLW,JN_,JP
+M',NR/,NT7,NV?,NXG,NZO,N\W,N^_,O`',S"/,S$7,S&?,S(G,S*O,S,W,S.
+M_,S0',W2/,W47,W6?,W8G,W:O,W<W,W>_,W@',[B/,[D7,[F?,[HG,[JO,[L
+MW,[N_,[P',_R/,_T7,_V?,_XG,_ZO,_\W,_^_,\`'=`"/=`$7=`&?=`(G=`*
+MO=`,W=`._=`0'=$2/=$47=$6?=$8G=$:O=$<W=$>_=$@'=(B/=(D7=(F?=(H
+MG=(JO=(LW=(N_=(P'=,R/=,T7=,V?=,XG=,ZO=,\W=,^_=-`'=1"/=1$7=1&
+M?=1(G=1*O=1,W=1._=10'=52/=547=56?=58G=5:O=5<W=5>_=5@'=9B/=9D
+M7=9F?=9HG=9JO=9LW=9N_=9P'==R/==T7==V?==XG==ZO==\W==^_=>`'=B"
+M/=B$7=B&?=B(G=B*O=B,W=B._=B0'=F2/=F47=F6?=F8G=F:O=F<W=F>_=F@
+M'=JB/=JD7=JF?=JHG=JJO=JLW=JN_=JP'=NR/=NT7=NV?=NXG=NZO=N\W=N^
+M_=O`'=S"/=S$7=S&?=S(G=S*O=S,W=S._=S0'=W2/=W47=W6?=W8G=W:O=W<
+MW=W>_=W@'=[B/=[D7=[F?=[HG=[JO=[LW=[N_=[P'=_R/=_T7=_V?=_XG=_Z
+MO=_\W=_^_=\`'N`"/N`$7N`&?N`(GN`*ON`,WN`._N`0'N$2/N$47N$6?N$8
+MGN$:ON$<WN$>_N$@'N(B/N(D7N(F?N(HGN(JON(LWN(N_N(P'N,R/N,T7N,V
+M?N,XGN,ZON,\WN,^_N-`'N1"/N1$7N1&?N1(GN1*ON1,WN1._N10'N52/N54
+M7N56?N58GN5:ON5<WN5>_N5@'N9B/N9D7N9F?N9HGN9JON9LWN9N_N9P'N=R
+M/N=T7N=V?N=XGN=ZON=\WN=^_N>`'NB"/NB$7NB&?NB(GNB*ONB,WNB._NB0
+M'NF2/NF47NF6?NF8GNF:ONF<WNF>_NF@'NJB/NJD7NJF?NJHGNJJONJLWNJN
+M_NJP'NNR/NNT7NNV?NNXGNNZONN\WNN^_NO`'NS"/NS$7NS&?NS(GNS*ONS,
+MWNS._NS0'NW2/NW47NW6?NW8GNW:ONW<WNW>_NW@'N[B/N[D7N[F?N[HGN[J
+MON[LWN[N_N[P'N_R/N_T7N_V?N_XGN_ZON_\WN_^_N\`'_`"/_`$7_`&?_`(
+MG_`*O_`,W_`.__`0'_$2/_$47_$6?_$8G_$:O_$<W_$>__$@'_(B/_(D7_(F
+M?_(HG_(JO_(LW_(N__(P'_,R/_,T7_,V?_,XG_,ZO_,\W_,^__-`'_1"/_1$
+M7_1&?_1(G_1*O_1,W_1.__10'_52/_547_56?_58G_5:O_5<W_5>__5@'_9B
+M/_9D7_9F?_9HG_9JO_9LW_9N__9P'_=R/_=T7_=V?_=XG_=ZO_=\W_=^__>`
+M'_B"/_B$7_B&?_B(G_B*O_B,W_B.__B0'_F2/_F47_F6?_F8G_F:O_F<W_F>
+M__F@'_JB/_JD7_JF?_JHG_JJO_JLW_JN__JP'_NR/_NT7_NV?_NXG_NZO_N\
+MW_N^__O`'_S"/_S$7_S&?_S(G_S*O_S,W_S.__S0'_W2/_W47_W6?_W8G_W:
+MO_W<W_W>__W@'_[B/_[D7_[F?_[HG_[JO_[LW_[N__[P'__R/__T7__V?__X
+MG__ZO__\W__^__\`,``*P`%(``N@`3R`"#`!*L`%R``;H`-\@!`P`DK`"4@!
+M*Z`%O(`8,`-JP`W(`3N@!_R`(#`$BL`12`)+H`D\@2@P!:K`%<@"6Z`+?($P
+M,`;*P!E(`VN@#;R!.#`'ZL`=R`-[H`_\@4`P"`K!(4@$BZ`1/()(,`DJP27(
+M!)N@$WR"4#`*2L$I2`6KH!6\@E@P"VK!+<@%NZ`7_()@,`R*P3%(!LN@&3R#
+M:#`-JL$UR`;;H!M\@W`P#LK!.4@'ZZ`=O(-X,`_JP3W(!_N@'_R#@#`0"L)!
+M2`@+H2$\A(@P$2K"1<@(&Z$C?(20,!)*PDE("2NA);R$F#`3:L)-R`D[H2?\
+MA*`P%(K"44@*2Z$I/(6H,!6JPE7("ENA*WR%L#`6RL)92`MKH2V\A;@P%^K"
+M7<@+>Z$O_(7`,!@*PV%(#(NA,3R&R#`9*L-ER`R;H3-\AM`P&DK#:4@-JZ$U
+MO(;8,!MJPVW(#;NA-_R&X#`<BL-Q2`[+H3D\A^@P':K#=<@.VZ$[?(?P,![*
+MPWE(#^NA/;R'^#`?ZL-]R`_[H3_\AP`Q(`K$@4@0"Z)!/(@(,2$JQ(7($!NB
+M0WR($#$B2L2)2!%ID@)8"`TA+6C$C8@11$)'6`D;02C4@+;`$E!"6&`+,@$E
+MBH08T!-@`@UP"40!!G#$F4@3FT)6D`H;@2I8!8UX!]!`&2@#;"`ML(63*!-J
+MHE$\BD@Q*2K%I<@4FZ)3?(I0,2I*Q:E(%:NB5;R*6#$K:L6MR!6[HE?\BF`Q
+M+(K%L4@6RZ)9/(MH,2VJQ;7(%MNB6WR+<#$NRL6Y2!?KHEV\BW@Q+^K%O<@7
+M^Z)?_(N`,3`*QL%(&`NC83R,B#$Q*L;%R!@;HV-\C)`Q,DK&R4@9*Z-EO(R8
+),3-JQLW(&0$C
+`
+end

From 2cadb8708940ae09be0e70a628fe81bbdfb6f02f Mon Sep 17 00:00:00 2001
From: Tim Kientzle <kientzle@acm.org>
Date: Sun, 16 Jun 2024 20:22:14 -0700
Subject: [PATCH 12/14] Support ISOs with a non-standard PVD layouts (#2238)

The CSRG ISOs have a non-standard PVD layout with a 68-byte root
directory record (rather than the 34-byte record required by
ECMA119/ISO9660). I built a test image with this change and modified the
ISO9660 reader to accept it.

While I was working on the bid logic to recognize PVDs, I added a number
of additional correctness checks that should make our bidding a bit more
accurate. In particular, this should more than compensate for the
weakened check of the root directory record size.

Resolves #2232
---
 Makefile.am                                   |   1 +
 .../archive_read_support_format_iso9660.c     | 109 +++++++++++++++++-
 .../test/test_read_format_iso_3.iso.Z.uu      |  40 +++++++
 libarchive/test/test_read_format_iso_Z.c      |   8 +-
 4 files changed, 150 insertions(+), 8 deletions(-)
 create mode 100644 libarchive/test/test_read_format_iso_3.iso.Z.uu

diff --git a/Makefile.am b/Makefile.am
index 1661d9c1a5..7560b14fe7 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -833,6 +833,7 @@ libarchive_test_EXTRA_DIST=\
 	libarchive/test/test_read_format_huge_rpm.rpm.uu \
 	libarchive/test/test_read_format_iso.iso.Z.uu \
 	libarchive/test/test_read_format_iso_2.iso.Z.uu \
+	libarchive/test/test_read_format_iso_3.iso.Z.uu \
 	libarchive/test/test_read_format_iso_joliet.iso.Z.uu \
 	libarchive/test/test_read_format_iso_joliet_by_nero.iso.Z.uu \
 	libarchive/test/test_read_format_iso_joliet_long.iso.Z.uu \
diff --git a/libarchive/archive_read_support_format_iso9660.c b/libarchive/archive_read_support_format_iso9660.c
index 25ab11bf59..c38943a2fa 100644
--- a/libarchive/archive_read_support_format_iso9660.c
+++ b/libarchive/archive_read_support_format_iso9660.c
@@ -402,6 +402,9 @@ static int	isJolietSVD(struct iso9660 *, const unsigned char *);
 static int	isSVD(struct iso9660 *, const unsigned char *);
 static int	isEVD(struct iso9660 *, const unsigned char *);
 static int	isPVD(struct iso9660 *, const unsigned char *);
+static int	isRootDirectoryRecord(const unsigned char *);
+static int	isValid723Integer(const unsigned char *);
+static int	isValid733Integer(const unsigned char *);
 static int	next_cache_entry(struct archive_read *, struct iso9660 *,
 		    struct file_info **);
 static int	next_entry_seek(struct archive_read *, struct iso9660 *,
@@ -773,8 +776,9 @@ isSVD(struct iso9660 *iso9660, const unsigned char *h)
 
 	/* Read Root Directory Record in Volume Descriptor. */
 	p = h + SVD_root_directory_record_offset;
-	if (p[DR_length_offset] != 34)
+	if (!isRootDirectoryRecord(p)) {
 		return (0);
+	}
 
 	return (48);
 }
@@ -851,8 +855,9 @@ isEVD(struct iso9660 *iso9660, const unsigned char *h)
 
 	/* Read Root Directory Record in Volume Descriptor. */
 	p = h + PVD_root_directory_record_offset;
-	if (p[DR_length_offset] != 34)
+	if (!isRootDirectoryRecord(p)) {
 		return (0);
+	}
 
 	return (48);
 }
@@ -882,21 +887,43 @@ isPVD(struct iso9660 *iso9660, const unsigned char *h)
 	if (!isNull(iso9660, h, PVD_reserved2_offset, PVD_reserved2_size))
 		return (0);
 
+	/* Volume space size must be encoded according to 7.3.3 */
+	if (!isValid733Integer(h + PVD_volume_space_size_offset)) {
+		return (0);
+	}
+	volume_block = archive_le32dec(h + PVD_volume_space_size_offset);
+	if (volume_block <= SYSTEM_AREA_BLOCK+4)
+		return (0);
+
 	/* Reserved field must be 0. */
 	if (!isNull(iso9660, h, PVD_reserved3_offset, PVD_reserved3_size))
 		return (0);
 
+	/* Volume set size must be encoded according to 7.2.3 */
+	if (!isValid723Integer(h + PVD_volume_set_size_offset)) {
+		return (0);
+	}
+
+	/* Volume sequence number must be encoded according to 7.2.3 */
+	if (!isValid723Integer(h + PVD_volume_sequence_number_offset)) {
+		return (0);
+	}
+
 	/* Logical block size must be > 0. */
 	/* I've looked at Ecma 119 and can't find any stronger
 	 * restriction on this field. */
+	if (!isValid723Integer(h + PVD_logical_block_size_offset)) {
+		return (0);
+	}
 	logical_block_size =
 	    archive_le16dec(h + PVD_logical_block_size_offset);
 	if (logical_block_size <= 0)
 		return (0);
 
-	volume_block = archive_le32dec(h + PVD_volume_space_size_offset);
-	if (volume_block <= SYSTEM_AREA_BLOCK+4)
+	/* Path Table size must be encoded according to 7.3.3 */
+	if (!isValid733Integer(h + PVD_path_table_size_offset)) {
 		return (0);
+	}
 
 	/* File structure version must be 1 for ISO9660/ECMA119. */
 	if (h[PVD_file_structure_version_offset] != 1)
@@ -935,8 +962,9 @@ isPVD(struct iso9660 *iso9660, const unsigned char *h)
 
 	/* Read Root Directory Record in Volume Descriptor. */
 	p = h + PVD_root_directory_record_offset;
-	if (p[DR_length_offset] != 34)
+	if (!isRootDirectoryRecord(p)) {
 		return (0);
+	}
 
 	if (!iso9660->primary.location) {
 		iso9660->logical_block_size = logical_block_size;
@@ -951,6 +979,51 @@ isPVD(struct iso9660 *iso9660, const unsigned char *h)
 	return (48);
 }
 
+static int
+isRootDirectoryRecord(const unsigned char *p) {
+	int flags;
+
+	/* ECMA119/ISO9660 requires that the root directory record be _exactly_ 34 bytes.
+	 * However, we've seen images that have root directory records up to 68 bytes. */
+	if (p[DR_length_offset] < 34 || p[DR_length_offset] > 68) {
+		return (0);
+	}
+
+	/* The root directory location must be a 7.3.3 32-bit integer. */
+	if (!isValid733Integer(p + DR_extent_offset)) {
+		return (0);
+	}
+
+	/* The root directory size must be a 7.3.3 integer. */
+	if (!isValid733Integer(p + DR_size_offset)) {
+		return (0);
+	}
+
+	/* According to the standard, certain bits must be one or zero:
+	 * Bit 1: must be 1 (this is a directory)
+	 * Bit 2: must be 0 (not an associated file)
+	 * Bit 3: must be 0 (doesn't use extended attribute record)
+	 * Bit 7: must be 0 (final directory record for this file)
+	 */
+	flags = p[DR_flags_offset];
+	if ((flags & 0x8E) != 0x02) {
+		return (0);
+	}
+
+	/* Volume sequence number must be a 7.2.3 integer. */
+	if (!isValid723Integer(p + DR_volume_sequence_number_offset)) {
+		return (0);
+	}
+
+	/* Root directory name is a single zero byte... */
+	if (p[DR_name_len_offset] != 1 || p[DR_name_offset] != 0) {
+		return (0);
+	}
+
+	/* Nothing looked wrong, so let's accept it. */
+	return (1);
+}
+
 static int
 read_children(struct archive_read *a, struct file_info *parent)
 {
@@ -3127,6 +3200,32 @@ toi(const void *p, int n)
 	return (0);
 }
 
+/*
+ * ECMA119/ISO9660 stores multi-byte integers in one of
+ * three different formats:
+ *  * Little-endian (specified in section 7.2.1 and 7.3.1)
+ *  * Big-endian (specified in section 7.2.2 and 7.3.2)
+ *  * Both (specified in section 7.2.3 and 7.3.3)
+ *
+ * For values that follow section 7.2.3 (16-bit) or 7.3.3 (32-bit), we
+ * can check that the little-endian and big-endian forms agree with
+ * each other.  This helps us avoid trying to decode files that are
+ * not really ISO images.
+ */
+static int
+isValid723Integer(const unsigned char *p) {
+	return (p[0] == p[3] && p[1] == p[2]);
+}
+
+static int
+isValid733Integer(const unsigned char *p)
+{
+	return (p[0] == p[7]
+		&& p[1] == p[6]
+		&& p[2] == p[5]
+		&& p[3] == p[4]);
+}
+
 static time_t
 isodate7(const unsigned char *v)
 {
diff --git a/libarchive/test/test_read_format_iso_3.iso.Z.uu b/libarchive/test/test_read_format_iso_3.iso.Z.uu
new file mode 100644
index 0000000000..e7734ab33b
--- /dev/null
+++ b/libarchive/test/test_read_format_iso_3.iso.Z.uu
@@ -0,0 +1,40 @@
+Same as test_read_format_iso_2.iso.Z except that the root directory record
+size in the PVD has been changed to 68 bytes (instead of the 34 required
+by the standard).  This non-standard value was seen in the wild.
+
+begin 644 test_read_format_iso_3.iso.Z
+M'YV0``(*'$BPH,&#"!,J7,BPH<.'$"-*G$BQHL6+&#-JW,BQH\>/($.*'$FR
+MI,F3*%.J7,FRI<N7,&/*G$FSILV;.'/JW,FSI\^?0(,*'4JTJ-&C2),J7<JT
+MJ=.G4*-*G4JUJM6K6+-JW<JUJ]>O8,.*'4NVK-FS:-.J7<NVK=NW<./*G4NW
+MKMV[>//JW<NWK]^_@`,+'DRXL.'#B!,K7LRXL>/'D"-+GDRYLN7+F#-KWLRY
+ML^?/H$.+'DVZM.G3J%.K7LVZM>O7L&/+GDV[MNW;N'/KWLV[M^_?P(,+'TZ\
+MN/'CR),K7\Z\N?/GT*-+GTZ]NO7KV+-KW\Z]N_?OX,.+'T^^O/GSZ-.K7\^^
+MO?OW\./+GT^_OOW[^//KW\^_O_^_`0Q!!`PPQ!```$Z408<04Q`!PH,01BCA
+MA!16:.&%$!Z404$;>G0@``%\>"`""`#@04$>2)#0!`,1`0`%!<%8HD`EEF+C
+MC:4($)"(!V+HXX]`!BGDD$06:>212":IY)),-NGDDU!&*>645%9II8\R%!@#
+M##C$0(,,,6Q)PPP$DI-EF%QZ"::89,)`#H%PQBDGG&=NV>67:(Y9YH?_]>GG
+MGX7](R"!!@)JZ*&()JKHHL<="...`AU80:0`!`$B`!=0.@2CG';JZ:>@A@H8
+MGR]>:FI`DP9@*:F9!BCJJ[#&*NNLM#XE0JD$R3A0C3C:J*.I(0)PZZ,#Z4HC
+M`+WZ"BF((=XZ*4&3SAA0B6\@X$`%"AA`SJ\\!G%KI@1E*BT`U%J+[0+;+AOB
+MIK6VZ^Z[\,8;G;,%1;LK`-5>FVVZP!XX;(SDWIMLCNJ&:`(`%A1D@0$%&9`O
+MMMI2"B(!0KBP0PSR9JSQQAQWW!P:9;#!QAL*>&SRR2BGK'*GWQ8D[KT/*X`N
+MMSOZBVNQ`1\[,,W,!G`P!@5AP#!!#ILK,SD2!T``$19CO/+34$<M]=2$@2PR
+MR51GK?767'?M]==@ARWVV&27;?;9:*>M]MILM^WVVW#'+??<=-=M]]UXYZWW
+MWGSW[???@`<N^."$%V[XX8@GKOCBC#?N^..01R[YY)17;OGEF&>N^>:<=^[Y
+MYZ"'+OKHI)=N^NFHIZ[ZZJRW[OKKL,<N^^RTUV[[[;CGKOONO/?N^^_`!R_\
+M\,07;_SQR">O_/+,-^_\\]!'+_WTU%=O_?789Z_]]MQW[_WWX(<O_OCDEV_^
+M^>BGK_[Z[+?O_OOPQR___/37;__]^.>O__[\]^___P`,H``'2,`"&O"`"$R@
+M`A?(P`8Z\($0C*`$)TC!"EKP@AC,H`8WR,$.>O"#(`RA"$=(PA*:\(0H3*$*
+M5\C"%KKPA3",H0QG2,,:VO"&.,RA#G?(PQ[Z\(=`#*(0ATC$(AKQB$A,HA*7
+MR,0F.O&)4(RB%*=(Q2I:\8I8S*(6M\C%+GKQBV`,HQC'2,8RFO&,:$RC&M?(
+MQC:Z\8UPC*,<YTC'.MKQCGC,HQ[WR,<^^O&/@`RD(`=)R$(:\I"(3*0B%\G(
+M1CKRD9",I"0G2<E*6O*2F,RD)C?)R4YZ\I.@#*4H1TG*4IKRE*A,I2I7R<I6
+MNO*5L(RE+&=)RUK:\I:XS*4N=\G+7OKRE\`,IC"'2<QB&O.8R$RF,I?)S&8Z
+M\YG0C*8TITG-:EKSFMC,IC:WR<UN>O.;X`RG.,=)SG*:\YSH3*<ZU\G.=KKS
+MG?",ISSG2<]ZVO.>^,RG/O?)SW[Z\Y\`#:A`!TK0@AKTH`A-J$(7RM"&.O2A
+M$(VH1"=*T8I:]*(8S:A&-\K1CGKTHR`-J4A'2M*2FO2D*$VI2E?*TI:Z]*4P
+MC:E,9TK3FMKTICC-J4YWRM.>^O2G0`VJ4(=*U*(:]:A(3:I2E\K4ICKUJ5"-
+MJE2G2M6J6O6J6,VJ5K?*U:YZ]:M@#:M8QTK6LIKUK&A-JUK7RM:VNO6M<(VK
+".`$`
+`
+end
diff --git a/libarchive/test/test_read_format_iso_Z.c b/libarchive/test/test_read_format_iso_Z.c
index 09b0acb804..785b00bf85 100644
--- a/libarchive/test/test_read_format_iso_Z.c
+++ b/libarchive/test/test_read_format_iso_Z.c
@@ -53,11 +53,10 @@ test1(void)
 }
 
 static void
-test2(void)
+test_small(const char *name)
 {
 	struct archive_entry *ae;
 	struct archive *a;
-	const char *name = "test_read_format_iso_2.iso.Z";
 
 	extract_reference_file(name);
 
@@ -98,5 +97,8 @@ test2(void)
 DEFINE_TEST(test_read_format_iso_Z)
 {
 	test1();
-	test2();
+	/* A very small ISO image with a variety of contents. */
+	test_small("test_read_format_iso_2.iso.Z");
+	/* As above, but with a non-standard 68-byte root directory in the PVD */
+	test_small("test_read_format_iso_3.iso.Z");
 }

From 13c710a825d1c8b5604d8ce6eb712f7f7cfadc80 Mon Sep 17 00:00:00 2001
From: Tim Kientzle <kientzle@acm.org>
Date: Sun, 16 Jun 2024 20:23:11 -0700
Subject: [PATCH 13/14] [cpio test] Dates can be more than 12 bytes, depending
 on the locale (#2237)

In order to match cpio output, format the reference date with _at least_
12 bytes instead of _exactly_ 12 bytes. This should fix a gratuitous
test failure on certain systems that default to multi-byte locales.
---
 cpio/test/test_option_t.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/cpio/test/test_option_t.c b/cpio/test/test_option_t.c
index 953e4a8065..4130b7f918 100644
--- a/cpio/test/test_option_t.c
+++ b/cpio/test/test_option_t.c
@@ -89,6 +89,7 @@ DEFINE_TEST(test_option_t)
 	mtime = 1;
 #ifdef HAVE_LOCALE_H
 	setlocale(LC_ALL, "");
+	setlocale(LC_TIME, "");
 #endif
 #if defined(HAVE_LOCALTIME_S)
         tmptr = localtime_s(&tmbuf, &mtime) ? NULL : &tmbuf;
@@ -99,10 +100,10 @@ DEFINE_TEST(test_option_t)
 #endif
 #if defined(_WIN32) && !defined(__CYGWIN__)
 	strftime(date2, sizeof(date2)-1, "%b %d  %Y", tmptr);
-	_snprintf(date, sizeof(date)-1, "%12.12s file", date2);
+	_snprintf(date, sizeof(date)-1, "%12s file", date2);
 #else
 	strftime(date2, sizeof(date2)-1, "%b %e  %Y", tmptr);
-	snprintf(date, sizeof(date)-1, "%12.12s file", date2);
+	snprintf(date, sizeof(date)-1, "%12s file", date2);
 #endif
 	assertEqualMem(p + 42, date, strlen(date));
 	free(p);

From 3fdf9bf80fd4e9473052ef68a9ce7ccebfc5472c Mon Sep 17 00:00:00 2001
From: Tim Kientzle <kientzle@acm.org>
Date: Wed, 19 Jun 2024 16:18:35 -0700
Subject: [PATCH 14/14] Fix potential overflow warning in cpio test_option_t
 (#2250)

Fixes an error from #2237.
---
 cpio/test/test_option_t.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpio/test/test_option_t.c b/cpio/test/test_option_t.c
index 4130b7f918..46668ec276 100644
--- a/cpio/test/test_option_t.c
+++ b/cpio/test/test_option_t.c
@@ -33,7 +33,7 @@ DEFINE_TEST(test_option_t)
 	char *p;
 	int r;
 	time_t mtime;
-	char date[32];
+	char date[48];
 	char date2[32];
 	struct tm *tmptr;
 #if defined(HAVE_LOCALTIME_R) || defined(HAVE_LOCALTIME_S)