From 62c8caf6611a7d0662d80176c4fdb40f85794699 Mon Sep 17 00:00:00 2001 From: Martin Matuska Date: Wed, 3 Apr 2024 11:15:22 +0200 Subject: [PATCH 01/98] Revert "zip: update AppleDouble support for directories (#2100)" This reverts commit 390d83012fdba8c8db7fc9915338805882b0597a. PR reopened back for review. --- libarchive/archive_read_support_format_zip.c | 13 +-- libarchive/archive_write_disk_posix.c | 3 +- libarchive/test/test_write_disk_appledouble.c | 84 ------------------- .../test_write_disk_appledouble_zip.zip.uu | 27 ------ 4 files changed, 2 insertions(+), 125 deletions(-) delete mode 100644 libarchive/test/test_write_disk_appledouble_zip.zip.uu diff --git a/libarchive/archive_read_support_format_zip.c b/libarchive/archive_read_support_format_zip.c index d7b6f082ee..212bfff9fa 100644 --- a/libarchive/archive_read_support_format_zip.c +++ b/libarchive/archive_read_support_format_zip.c @@ -4083,17 +4083,6 @@ slurp_central_directory(struct archive_read *a, struct archive_entry* entry, } else { /* Generate resource fork name to find its * resource file at zip->tree_rsrc. */ - - /* If this is an entry ending with slash, - * make the resource for name slash-less - * as the actual resource fork doesn't end with '/'. - */ - size_t tmp_length = filename_length; - if (name[tmp_length - 1] == '/') { - tmp_length--; - r = rsrc_basename(name, tmp_length); - } - archive_strcpy(&(zip_entry->rsrcname), "__MACOSX/"); archive_strncat(&(zip_entry->rsrcname), @@ -4101,7 +4090,7 @@ slurp_central_directory(struct archive_read *a, struct archive_entry* entry, archive_strcat(&(zip_entry->rsrcname), "._"); archive_strncat(&(zip_entry->rsrcname), name + (r - name), - tmp_length - (r - name)); + filename_length - (r - name)); /* Register an entry to RB tree to sort it by * file offset. */ __archive_rb_tree_insert_node(&zip->tree, diff --git a/libarchive/archive_write_disk_posix.c b/libarchive/archive_write_disk_posix.c index 92db4ff05b..58265ee0dc 100644 --- a/libarchive/archive_write_disk_posix.c +++ b/libarchive/archive_write_disk_posix.c @@ -4427,8 +4427,7 @@ fixup_appledouble(struct archive_write_disk *a, const char *pathname) #else la_stat(datafork.s, &st) == -1 || #endif - (((st.st_mode & AE_IFMT) != AE_IFREG) && - ((st.st_mode & AE_IFMT) != AE_IFDIR))) + (st.st_mode & AE_IFMT) != AE_IFREG) goto skip_appledouble; /* diff --git a/libarchive/test/test_write_disk_appledouble.c b/libarchive/test/test_write_disk_appledouble.c index 8de6c8b504..3265a94d2f 100644 --- a/libarchive/test/test_write_disk_appledouble.c +++ b/libarchive/test/test_write_disk_appledouble.c @@ -236,87 +236,3 @@ DEFINE_TEST(test_write_disk_appledouble) assertEqualFile("hfscmp/file3", "nocmp/file3"); #endif } - -/* Test writing apple doubles to disk from zip format */ -DEFINE_TEST(test_write_disk_appledouble_zip) -{ -#if !defined(__APPLE__) || !defined(UF_COMPRESSED) || !defined(HAVE_SYS_XATTR_H)\ - || !defined(HAVE_ZLIB_H) - skipping("MacOS-specific AppleDouble test"); -#else - const char *refname = "test_write_disk_appledouble_zip.zip"; - struct archive *ad, *a; - struct archive_entry *ae; - struct stat st; - - extract_reference_file(refname); - - /* - * Extract an archive to disk. - */ - assert((ad = archive_write_disk_new()) != NULL); - assertEqualIntA(ad, ARCHIVE_OK, - archive_write_disk_set_standard_lookup(ad)); - assertEqualIntA(ad, ARCHIVE_OK, - archive_write_disk_set_options(ad, - ARCHIVE_EXTRACT_TIME | - ARCHIVE_EXTRACT_SECURE_SYMLINKS | - ARCHIVE_EXTRACT_SECURE_NODOTDOT)); - - assert((a = archive_read_new()) != NULL); - assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); - assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); - assertEqualIntA(a, ARCHIVE_OK, archive_read_open_filename(a, - refname, 512 * 20)); - - /* Skip The top level directory */ - assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); - assertEqualString("apple_double_dir/", archive_entry_pathname(ae)); - - /* Extract apple_double_test */ - assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); - assertEqualString("apple_double_dir/apple_double_dir_test/", archive_entry_pathname(ae)); - assertEqualIntA(a, ARCHIVE_OK, archive_read_extract2(a, ae, ad)); - - /* Extract ._apple_double_dir_test which will be merged into apple_double_dir_test as metadata. */ - assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); - assertEqualString("apple_double_dir/._apple_double_dir_test", archive_entry_pathname(ae)); - assertEqualIntA(a, ARCHIVE_OK, archive_read_extract2(a, ae, ad)); - - /* Extract test_file */ - assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); - assertEqualString("apple_double_dir/test_file", archive_entry_pathname(ae)); - assertEqualIntA(a, ARCHIVE_OK, archive_read_extract2(a, ae, ad)); - - /* Extract ._test_file which will be merged into test_file as metadata. */ - assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); - assertEqualString("apple_double_dir/._test_file", archive_entry_pathname(ae)); - assertEqualIntA(a, ARCHIVE_OK, archive_read_extract2(a, ae, ad)); - - assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); - assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a)); - assertEqualInt(ARCHIVE_OK, archive_read_free(a)); - assertEqualIntA(ad, ARCHIVE_OK, archive_write_free(ad)); - - /* Test test_file */ - assertEqualInt(0, stat("apple_double_dir/test_file", &st)); - assertFileSize("apple_double_dir/test_file", 5); - failure("'%s' should have Resource Fork", "test_file"); - assertEqualInt(1, has_xattr("apple_double_dir/test_file", "com.apple.ResourceFork")); - - /* Test apple_double_dir_test */ - failure("'%s' should have quarantine xattr", "apple_double_dir_test"); - assertEqualInt(1, has_xattr("apple_double_dir/apple_double_dir_test", "com.apple.quarantine")); - - /* Test ._test_file. */ - failure("'apple_double_dir/._test_file' should be merged and removed"); - assertFileNotExists("apple_double_dir/._test_file"); - - /* Test ._apple_double_dir_test */ - failure("'apple_double_dir/._._apple_double_dir_test' should be merged and removed"); - assertFileNotExists("apple_double_dir/._apple_double_dir_test"); - - assertChdir(".."); - -#endif -} diff --git a/libarchive/test/test_write_disk_appledouble_zip.zip.uu b/libarchive/test/test_write_disk_appledouble_zip.zip.uu deleted file mode 100644 index 5ab67533d5..0000000000 --- a/libarchive/test/test_write_disk_appledouble_zip.zip.uu +++ /dev/null @@ -1,27 +0,0 @@ -begin 644 test_write_disk_appledouble_zip.zip -M4$L#!`H```````MM?%@````````````````1`!``87!P;&5?9&]U8FQE7V1I -M Date: Sun, 7 Apr 2024 14:43:05 -0700 Subject: [PATCH 02/98] contrib/untar.c: cleanup (#2112) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reworks PR #1613 (commit a902fcd4), addressing the same issues as that earlier PR, but in a simpler and more portable fashion. Co-authored-by: Martin Matuška --- contrib/untar.c | 122 +++++++++++++++++++++++------------------------- 1 file changed, 59 insertions(+), 63 deletions(-) diff --git a/contrib/untar.c b/contrib/untar.c index db25118869..6d5e88e2f0 100644 --- a/contrib/untar.c +++ b/contrib/untar.c @@ -37,22 +37,28 @@ #include /* For mkdir() */ #if defined(_WIN32) && !defined(__CYGWIN__) -#define NUM_FORMAT "zu" -#include -#elif defined(__linux__) || defined(linux) || defined(__linux) -#define NUM_FORMAT "d" -#else -#define NUM_FORMAT "lu" -#endif /* defined(_WIN32) && !defined(__CYGWIN__) */ +#include +#endif -#define TO_STRING(x) #x -#define STR(x) TO_STRING(x) +#define BLOCKSIZE 512 -/* Parse an octal number, ignoring leading and trailing nonsense. */ +/* System call to create a directory. */ static int +system_mkdir(char *pathname, int mode) +{ +#if defined(_WIN32) && !defined(__CYGWIN__) + (void)mode; /* UNUSED */ + return _mkdir(pathname); +#else + return mkdir(pathname, mode); +#endif +} + +/* Parse an octal number, ignoring leading and trailing nonsense. */ +static unsigned long parseoct(const char *p, size_t n) { - int i = 0; + unsigned long i = 0; while ((*p < '0' || *p > '7') && n > 0) { ++p; @@ -72,7 +78,7 @@ static int is_end_of_archive(const char *p) { int n; - for (n = 511; n >= 0; --n) + for (n = 0; n < BLOCKSIZE; ++n) if (p[n] != '\0') return (0); return (1); @@ -90,12 +96,7 @@ create_dir(char *pathname, int mode) pathname[strlen(pathname) - 1] = '\0'; /* Try creating the directory. */ -#if defined(_WIN32) && !defined(__CYGWIN__) - r = _mkdir(pathname); -#else - r = mkdir(pathname, mode); -#endif - + r = system_mkdir(pathname, mode); if (r != 0) { /* On failure, try creating parent directory. */ p = strrchr(pathname, '/'); @@ -103,11 +104,7 @@ create_dir(char *pathname, int mode) *p = '\0'; create_dir(pathname, 0755); *p = '/'; -#if defined(_WIN32) && !defined(__CYGWIN__) - r = _mkdir(pathname); -#else - r = mkdir(pathname, mode); -#endif + r = system_mkdir(pathname, mode); } } if (r != 0) @@ -138,7 +135,7 @@ static int verify_checksum(const char *p) { int n, u = 0; - for (n = 0; n < 512; ++n) { + for (n = 0; n < BLOCKSIZE; ++n) { if (n < 148 || n > 155) /* Standard tar checksum adds unsigned bytes. */ u += ((unsigned char *)p)[n]; @@ -146,26 +143,25 @@ verify_checksum(const char *p) u += 0x20; } - return (u == parseoct(p + 148, 8)); + return (u == (int)parseoct(p + 148, 8)); } /* Extract a tar archive. */ static void untar(FILE *a, const char *path) { - enum { BUF_SIZE=512 }; - char buff[BUF_SIZE]; + char buff[BLOCKSIZE]; FILE *f = NULL; size_t bytes_read; - off_t filesize; + unsigned long filesize; printf("Extracting from %s\n", path); for (;;) { - bytes_read = fread(buff, 1, BUF_SIZE, a); - if (bytes_read < BUF_SIZE) { + bytes_read = fread(buff, 1, BLOCKSIZE, a); + if (bytes_read < BLOCKSIZE) { fprintf(stderr, - "Short read on %s: expected " STR(BUF_SIZE) ", got %"NUM_FORMAT"\n", - path, bytes_read); + "Short read on %s: expected %d, got %d\n", + path, BLOCKSIZE, (int)bytes_read); return; } if (is_end_of_archive(buff)) { @@ -178,51 +174,51 @@ untar(FILE *a, const char *path) } filesize = parseoct(buff + 124, 12); switch (buff[156]) { - case '1': - printf(" Ignoring hardlink %s\n", buff); - break; - case '2': - printf(" Ignoring symlink %s\n", buff); - break; - case '3': - printf(" Ignoring character device %s\n", buff); - break; - case '4': - printf(" Ignoring block device %s\n", buff); - break; - case '5': - printf(" Extracting dir %s\n", buff); - create_dir(buff, parseoct(buff + 100, 8)); - filesize = 0; - break; - case '6': - printf(" Ignoring FIFO %s\n", buff); - break; - default: - printf(" Extracting file %s\n", buff); - f = create_file(buff, parseoct(buff + 100, 8)); + case '1': + printf(" Ignoring hardlink %s\n", buff); + break; + case '2': + printf(" Ignoring symlink %s\n", buff); + break; + case '3': + printf(" Ignoring character device %s\n", buff); break; + case '4': + printf(" Ignoring block device %s\n", buff); + break; + case '5': + printf(" Extracting dir %s\n", buff); + create_dir(buff, (int)parseoct(buff + 100, 8)); + filesize = 0; + break; + case '6': + printf(" Ignoring FIFO %s\n", buff); + break; + default: + printf(" Extracting file %s\n", buff); + f = create_file(buff, (int)parseoct(buff + 100, 8)); + break; } while (filesize > 0) { - bytes_read = fread(buff, 1, BUF_SIZE, a); - if (bytes_read < BUF_SIZE) { + bytes_read = fread(buff, 1, BLOCKSIZE, a); + if (bytes_read < BLOCKSIZE) { fprintf(stderr, - "Short read on %s: Expected " STR(BUF_SIZE) ", got %"NUM_FORMAT"\n", - path, bytes_read); + "Short read on %s: Expected %d, got %d\n", + path, BLOCKSIZE, (int)bytes_read); return; } - if (filesize < BUF_SIZE) - bytes_read = filesize; + if (filesize < BLOCKSIZE) + bytes_read = (size_t)filesize; if (f != NULL) { if (fwrite(buff, 1, bytes_read, f) - != bytes_read) + != bytes_read) { fprintf(stderr, "Failed write\n"); fclose(f); f = NULL; } } - filesize -= (off_t)bytes_read; + filesize -= bytes_read; } if (f != NULL) { fclose(f); From 771f434c90b6530a48353e70c6a53308689044f9 Mon Sep 17 00:00:00 2001 From: Tim Kientzle Date: Sun, 7 Apr 2024 14:44:37 -0700 Subject: [PATCH 03/98] build: use standard HAVE_ pattern for ZSTD compression check (#2111) Follow-on to #1649: this just changes the name of the preprocessor macro to use the standard pattern HAVE_ In particular: newer ZSTD implementations have a growing variety of compression functions; the standard pattern will make it easier to select among those someday. --- CMakeLists.txt | 2 +- build/cmake/config.h.in | 5 ++--- configure.ac | 2 +- libarchive/archive_write_add_filter_zstd.c | 20 ++++++++++---------- libarchive/test/test_write_filter_zstd.c | 2 +- 5 files changed, 15 insertions(+), 16 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3122d929cf..1cb975a3b4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -660,7 +660,7 @@ IF(ZSTD_FOUND) SET(CMAKE_REQUIRED_LIBRARIES ${ZSTD_LIBRARY}) SET(CMAKE_REQUIRED_INCLUDES ${ZSTD_INCLUDE_DIR}) CHECK_FUNCTION_EXISTS(ZSTD_decompressStream HAVE_LIBZSTD) - CHECK_FUNCTION_EXISTS(ZSTD_compressStream HAVE_LIBZSTD_COMPRESSOR) + CHECK_FUNCTION_EXISTS(ZSTD_compressStream HAVE_ZSTD_compressStream) # # TODO: test for static library. # diff --git a/build/cmake/config.h.in b/build/cmake/config.h.in index 716657e3ff..045a6b4165 100644 --- a/build/cmake/config.h.in +++ b/build/cmake/config.h.in @@ -774,9 +774,8 @@ typedef uint64_t uintmax_t; /* Define to 1 if you have the `zstd' library (-lzstd). */ #cmakedefine HAVE_LIBZSTD 1 -/* Define to 1 if you have the `zstd' library (-lzstd) with compression - support. */ -#cmakedefine HAVE_LIBZSTD_COMPRESSOR 1 +/* Define to 1 if you have the ZSTD_compressStream function. */ +#cmakedefine HAVE_ZSTD_compressStream 1 /* Define to 1 if you have the header file. */ #cmakedefine HAVE_LIMITS_H 1 diff --git a/configure.ac b/configure.ac index 93f7af94af..29e9e8fbac 100644 --- a/configure.ac +++ b/configure.ac @@ -461,7 +461,7 @@ if test "x$with_zstd" != "xno"; then AC_CHECK_HEADERS([zstd.h]) AC_CHECK_LIB(zstd,ZSTD_decompressStream) AC_CHECK_LIB(zstd,ZSTD_compressStream, - AC_DEFINE([HAVE_LIBZSTD_COMPRESSOR], [1], [Define to 1 if you have the `zstd' library (-lzstd) with compression support.])) + AC_DEFINE([HAVE_ZSTD_compressStream], [1], [Define to 1 if you have the `zstd' library (-lzstd) with compression support.])) fi AC_ARG_WITH([lzma], diff --git a/libarchive/archive_write_add_filter_zstd.c b/libarchive/archive_write_add_filter_zstd.c index 9a74085b5b..94249accd0 100644 --- a/libarchive/archive_write_add_filter_zstd.c +++ b/libarchive/archive_write_add_filter_zstd.c @@ -53,7 +53,7 @@ struct private_data { int compression_level; int threads; int long_distance; -#if HAVE_ZSTD_H && HAVE_LIBZSTD_COMPRESSOR +#if HAVE_ZSTD_H && HAVE_ZSTD_compressStream enum { running, finishing, @@ -96,7 +96,7 @@ static int archive_compressor_zstd_write(struct archive_write_filter *, static int archive_compressor_zstd_flush(struct archive_write_filter *); static int archive_compressor_zstd_close(struct archive_write_filter *); static int archive_compressor_zstd_free(struct archive_write_filter *); -#if HAVE_ZSTD_H && HAVE_LIBZSTD_COMPRESSOR +#if HAVE_ZSTD_H && HAVE_ZSTD_compressStream static int drive_compressor(struct archive_write_filter *, struct private_data *, int, const void *, size_t); #endif @@ -130,7 +130,7 @@ archive_write_add_filter_zstd(struct archive *_a) data->compression_level = CLEVEL_DEFAULT; data->threads = 0; data->long_distance = 0; -#if HAVE_ZSTD_H && HAVE_LIBZSTD_COMPRESSOR +#if HAVE_ZSTD_H && HAVE_ZSTD_compressStream data->frame_per_file = 0; data->min_frame_in = 0; data->max_frame_in = SIZE_MAX; @@ -164,7 +164,7 @@ static int archive_compressor_zstd_free(struct archive_write_filter *f) { struct private_data *data = (struct private_data *)f->data; -#if HAVE_ZSTD_H && HAVE_LIBZSTD_COMPRESSOR +#if HAVE_ZSTD_H && HAVE_ZSTD_compressStream ZSTD_freeCStream(data->cstream); free(data->out.dst); #else @@ -242,7 +242,7 @@ archive_compressor_zstd_options(struct archive_write_filter *f, const char *key, /* If we don't have the library, hard-code the max level */ int minimum = CLEVEL_MIN; int maximum = CLEVEL_MAX; -#if HAVE_ZSTD_H && HAVE_LIBZSTD_COMPRESSOR +#if HAVE_ZSTD_H && HAVE_ZSTD_compressStream maximum = ZSTD_maxCLevel(); #if ZSTD_VERSION_NUMBER >= MINVER_MINCLEVEL if (ZSTD_versionNumber() >= MINVER_MINCLEVEL) { @@ -269,7 +269,7 @@ archive_compressor_zstd_options(struct archive_write_filter *f, const char *key, } data->threads = (int)threads; return (ARCHIVE_OK); -#if HAVE_ZSTD_H && HAVE_LIBZSTD_COMPRESSOR +#if HAVE_ZSTD_H && HAVE_ZSTD_compressStream } else if (strcmp(key, "frame-per-file") == 0) { data->frame_per_file = 1; return (ARCHIVE_OK); @@ -304,7 +304,7 @@ archive_compressor_zstd_options(struct archive_write_filter *f, const char *key, if (string_to_number(value, &long_distance) != ARCHIVE_OK) { return (ARCHIVE_WARN); } -#if HAVE_ZSTD_H && HAVE_LIBZSTD_COMPRESSOR && ZSTD_VERSION_NUMBER >= MINVER_LONG +#if HAVE_ZSTD_H && HAVE_ZSTD_compressStream && ZSTD_VERSION_NUMBER >= MINVER_LONG ZSTD_bounds bounds = ZSTD_cParam_getBounds(ZSTD_c_windowLog); if (ZSTD_isError(bounds.error)) { int max_distance = ((int)(sizeof(size_t) == 4 ? 30 : 31)); @@ -329,7 +329,7 @@ archive_compressor_zstd_options(struct archive_write_filter *f, const char *key, return (ARCHIVE_WARN); } -#if HAVE_ZSTD_H && HAVE_LIBZSTD_COMPRESSOR +#if HAVE_ZSTD_H && HAVE_ZSTD_compressStream /* * Setup callback. */ @@ -485,7 +485,7 @@ drive_compressor(struct archive_write_filter *f, return (ARCHIVE_FATAL); } -#else /* HAVE_ZSTD_H && HAVE_LIBZSTD_COMPRESSOR */ +#else /* HAVE_ZSTD_H && HAVE_ZSTD_compressStream */ static int archive_compressor_zstd_open(struct archive_write_filter *f) @@ -547,4 +547,4 @@ archive_compressor_zstd_close(struct archive_write_filter *f) return __archive_write_program_close(f, data->pdata); } -#endif /* HAVE_ZSTD_H && HAVE_LIBZSTD_COMPRESSOR */ +#endif /* HAVE_ZSTD_H && HAVE_ZSTD_compressStream */ diff --git a/libarchive/test/test_write_filter_zstd.c b/libarchive/test/test_write_filter_zstd.c index c68074e87b..da711f9e4b 100644 --- a/libarchive/test/test_write_filter_zstd.c +++ b/libarchive/test/test_write_filter_zstd.c @@ -132,7 +132,7 @@ DEFINE_TEST(test_write_filter_zstd) archive_write_set_filter_option(a, NULL, "threads", "-1")); /* negative */ assertEqualIntA(a, ARCHIVE_OK, archive_write_set_filter_option(a, NULL, "threads", "4")); -#if HAVE_ZSTD_H && HAVE_LIBZSTD_COMPRESSOR +#if HAVE_ZSTD_H && HAVE_ZSTD_compressStream /* frame-per-file: boolean */ assertEqualIntA(a, ARCHIVE_OK, archive_write_set_filter_option(a, NULL, "frame-per-file", "")); From 4fcc02d906cca4b9e21a78a833f1142a2689ec52 Mon Sep 17 00:00:00 2001 From: Martin Matuska Date: Mon, 8 Apr 2024 00:11:03 +0200 Subject: [PATCH 04/98] Release 3.7.3 --- CMakeLists.txt | 2 +- NEWS | 2 ++ configure.ac | 2 +- libarchive/archive.h | 2 +- 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1cb975a3b4..c0fbd70b38 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,7 +25,7 @@ endif() # MinSizeRel : Release Min Size build # None : No build type IF(NOT CMAKE_BUILD_TYPE) - SET(CMAKE_BUILD_TYPE "Debug" CACHE STRING "Build Type" FORCE) + SET(CMAKE_BUILD_TYPE "Release" CACHE STRING "Build Type" FORCE) ENDIF(NOT CMAKE_BUILD_TYPE) # Set a value type to properly display CMAKE_BUILD_TYPE on GUI if the # value type is "UNINITIALIZED". diff --git a/NEWS b/NEWS index d3b0eb9ff7..f4395fd1c9 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,5 @@ +Apr 08, 2024: libarchive 3.7.3 released + Sep 12, 2023: libarchive 3.7.2 released Jul 29, 2023: libarchive 3.7.1 released diff --git a/configure.ac b/configure.ac index 29e9e8fbac..503bb75ac9 100644 --- a/configure.ac +++ b/configure.ac @@ -4,7 +4,7 @@ dnl First, define all of the version numbers up front. dnl In particular, this allows the version macro to be used in AC_INIT dnl These first two version numbers are updated automatically on each release. -m4_define([LIBARCHIVE_VERSION_S],[3.7.3dev]) +m4_define([LIBARCHIVE_VERSION_S],[3.7.3]) m4_define([LIBARCHIVE_VERSION_N],[3007003]) dnl bsdtar and bsdcpio versioning tracks libarchive diff --git a/libarchive/archive.h b/libarchive/archive.h index 3b9eb51f50..2e3a9f31cd 100644 --- a/libarchive/archive.h +++ b/libarchive/archive.h @@ -155,7 +155,7 @@ __LA_DECL int archive_version_number(void); /* * Textual name/version of the library, useful for version displays. */ -#define ARCHIVE_VERSION_ONLY_STRING "3.7.3dev" +#define ARCHIVE_VERSION_ONLY_STRING "3.7.3" #define ARCHIVE_VERSION_STRING "libarchive " ARCHIVE_VERSION_ONLY_STRING __LA_DECL const char * archive_version_string(void); From b3be94ba4f36943745e64a6a0a3c21c2d1281d3c Mon Sep 17 00:00:00 2001 From: Martin Matuska Date: Mon, 8 Apr 2024 12:17:17 +0200 Subject: [PATCH 05/98] Libarchive 3.7.4dev --- CMakeLists.txt | 2 +- build/version | 2 +- configure.ac | 4 ++-- libarchive/archive.h | 2 +- libarchive/archive_entry.h | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c0fbd70b38..1cb975a3b4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,7 +25,7 @@ endif() # MinSizeRel : Release Min Size build # None : No build type IF(NOT CMAKE_BUILD_TYPE) - SET(CMAKE_BUILD_TYPE "Release" CACHE STRING "Build Type" FORCE) + SET(CMAKE_BUILD_TYPE "Debug" CACHE STRING "Build Type" FORCE) ENDIF(NOT CMAKE_BUILD_TYPE) # Set a value type to properly display CMAKE_BUILD_TYPE on GUI if the # value type is "UNINITIALIZED". diff --git a/build/version b/build/version index fcdfc39dce..b06fe47265 100644 --- a/build/version +++ b/build/version @@ -1 +1 @@ -3007003 +3007004 diff --git a/configure.ac b/configure.ac index 503bb75ac9..82bdf230f5 100644 --- a/configure.ac +++ b/configure.ac @@ -4,8 +4,8 @@ dnl First, define all of the version numbers up front. dnl In particular, this allows the version macro to be used in AC_INIT dnl These first two version numbers are updated automatically on each release. -m4_define([LIBARCHIVE_VERSION_S],[3.7.3]) -m4_define([LIBARCHIVE_VERSION_N],[3007003]) +m4_define([LIBARCHIVE_VERSION_S],[3.7.4dev]) +m4_define([LIBARCHIVE_VERSION_N],[3007004]) dnl bsdtar and bsdcpio versioning tracks libarchive m4_define([BSDTAR_VERSION_S],LIBARCHIVE_VERSION_S()) diff --git a/libarchive/archive.h b/libarchive/archive.h index 2e3a9f31cd..39acf721b9 100644 --- a/libarchive/archive.h +++ b/libarchive/archive.h @@ -34,7 +34,7 @@ * assert that ARCHIVE_VERSION_NUMBER >= 2012108. */ /* Note: Compiler will complain if this does not match archive_entry.h! */ -#define ARCHIVE_VERSION_NUMBER 3007003 +#define ARCHIVE_VERSION_NUMBER 3007004 #include #include /* for wchar_t */ diff --git a/libarchive/archive_entry.h b/libarchive/archive_entry.h index df9cb765f7..1c59ded7c9 100644 --- a/libarchive/archive_entry.h +++ b/libarchive/archive_entry.h @@ -28,7 +28,7 @@ #define ARCHIVE_ENTRY_H_INCLUDED /* Note: Compiler will complain if this does not match archive.h! */ -#define ARCHIVE_VERSION_NUMBER 3007003 +#define ARCHIVE_VERSION_NUMBER 3007004 /* * Note: archive_entry.h is for use outside of libarchive; the From 341800da257fb8c31b8b4281604fb7e3436b61f3 Mon Sep 17 00:00:00 2001 From: Sam James Date: Wed, 10 Apr 2024 04:15:17 +0100 Subject: [PATCH 06/98] configure.ac: fix bashism in dead code removal check (#2117) configure scripts need to be runnable with a POSIX-compliant /bin/sh. On many (but not all!) systems, /bin/sh is provided by Bash, so errors like this aren't spotted. Notably Debian defaults to /bin/sh provided by dash which doesn't tolerate such bashisms as '=='. This retains compatibility with bash. Signed-off-by: Sam James --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 82bdf230f5..3cf5f50e78 100644 --- a/configure.ac +++ b/configure.ac @@ -662,7 +662,7 @@ AC_LINK_IFELSE( DEAD_CODE_REMOVAL="";]) LDFLAGS=$save_LDFLAGS -if test "$DEAD_CODE_REMOVAL" == ""; then +if test "$DEAD_CODE_REMOVAL" = ""; then # Macos linkers have a -dead_strip flag, which is similar to --gc-sections. save_LDFLAGS=$LDFLAGS LDFLAGS="$LDFLAGS -Wl,-dead_strip" From b904d6ce9519f6e49fd6f8750c1454cb3e13a135 Mon Sep 17 00:00:00 2001 From: Tim Kientzle Date: Fri, 12 Apr 2024 17:28:25 -0700 Subject: [PATCH 07/98] Fix version string (#2118) Looks like the recent version number update from 3.7.3 -> 3.7.4dev missed a spot... --- libarchive/archive.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libarchive/archive.h b/libarchive/archive.h index 39acf721b9..7c526647cb 100644 --- a/libarchive/archive.h +++ b/libarchive/archive.h @@ -155,7 +155,7 @@ __LA_DECL int archive_version_number(void); /* * Textual name/version of the library, useful for version displays. */ -#define ARCHIVE_VERSION_ONLY_STRING "3.7.3" +#define ARCHIVE_VERSION_ONLY_STRING "3.7.4dev" #define ARCHIVE_VERSION_STRING "libarchive " ARCHIVE_VERSION_ONLY_STRING __LA_DECL const char * archive_version_string(void); From b5182fa98c4aaf973fbe93597f2ca1025dfda5fe Mon Sep 17 00:00:00 2001 From: Adrian Iain Lam Date: Sat, 13 Apr 2024 06:13:42 +0100 Subject: [PATCH 08/98] Fix unused-function warning. (#2114) `string_to_size` is only used in a code block conditionally compiled with `#if HAVE_ZSTD_H && HAVE_ZSTD_compressStream`. If this block is not compiled, GCC raises a warning with -Wunused-function. --- libarchive/archive_write_add_filter_zstd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libarchive/archive_write_add_filter_zstd.c b/libarchive/archive_write_add_filter_zstd.c index 94249accd0..df539d326e 100644 --- a/libarchive/archive_write_add_filter_zstd.c +++ b/libarchive/archive_write_add_filter_zstd.c @@ -190,6 +190,7 @@ string_to_number(const char *string, intmax_t *numberp) return (ARCHIVE_OK); } +#if HAVE_ZSTD_H && HAVE_ZSTD_compressStream static int string_to_size(const char *string, size_t *numberp) { @@ -224,6 +225,7 @@ string_to_size(const char *string, size_t *numberp) *numberp = (size_t)(number << shift); return (ARCHIVE_OK); } +#endif /* * Set write options. From 7bc8531da49ed4db1b98416be0b5959266dd19be Mon Sep 17 00:00:00 2001 From: Tobias Stoeckmann Date: Sat, 13 Apr 2024 05:14:59 +0000 Subject: [PATCH 09/98] Prevent usage of uninitialized variable in `__archive_mkstemp` (#2121) Calling `__archive_mkstemp` can lead to access of an uninitialized variable in `__archive_mktempx`, because `temp_name` is only initialized if supplied `template` argument is `NULL`. If `template` is not `NULL`, it is eventually compared with `temp_name.s` anyway. The fix is simple: Always initialize `temp_name`, which merely sets values in the struct. No memory allocation occurs and the check leads to the expected result. How to reproduce: 1. Compile libarchive with Visual Studio 2022 and CMake's Debug profile 2. Run test `bsdtar_test_option_safe_writes` 3. A popup (Microsoft Visual C++ Runtime Library) appears, stating that variable temp_name is being used without being initialized --- libarchive/archive_util.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/libarchive/archive_util.c b/libarchive/archive_util.c index 32d4bd4098..7b918fef04 100644 --- a/libarchive/archive_util.c +++ b/libarchive/archive_util.c @@ -255,10 +255,9 @@ __archive_mktempx(const char *tmpdir, wchar_t *template) #endif fd = -1; ws = NULL; + archive_string_init(&temp_name); if (template == NULL) { - archive_string_init(&temp_name); - /* Get a temporary directory. */ if (tmpdir == NULL) { size_t l; From bd974e1b7bac6f80e4a18f0dd261fa88d71eaa1c Mon Sep 17 00:00:00 2001 From: Tobias Stoeckmann Date: Sat, 13 Apr 2024 05:15:53 +0000 Subject: [PATCH 10/98] tools: Fix stack overflow with many arguments (#2122) Supplying a lot of "-" arguments to tools can lead to stack overflow due to recursive *_getopt function calls. Proof of Concept: 1. Compile libarchive with Visual Studio 2022 2. Call bsdtar with insane amount of arguments ``` PS> bsdtar.exe ("- "*10000).split(" ") ``` The event log shows that bsdtar.exe failed with `0xc00000fd` (stack overflow). If compiled with gcc, this does not happen by default because the code is internally optimized to use this suggested loop instead. You have to compile with CFLAGS="-O0" to provoke it with gcc as well. --- cat/cmdline.c | 16 +++++++++++----- cpio/cmdline.c | 16 +++++++++++----- tar/cmdline.c | 16 +++++++++++----- unzip/cmdline.c | 18 ++++++++++++------ 4 files changed, 45 insertions(+), 21 deletions(-) diff --git a/cat/cmdline.c b/cat/cmdline.c index ea1e0eed6d..851b63de06 100644 --- a/cat/cmdline.c +++ b/cat/cmdline.c @@ -114,12 +114,18 @@ bsdcat_getopt(struct bsdcat *bsdcat) enum { state_start = 0, state_old_tar, state_next_word, state_short, state_long }; - const struct bsdcat_option *popt, *match = NULL, *match2 = NULL; - const char *p, *long_prefix = "--"; + const struct bsdcat_option *popt, *match, *match2; + const char *p, *long_prefix; size_t optlength; - int opt = '?'; - int required = 0; + int opt; + int required; +again: + match = NULL; + match2 = NULL; + long_prefix = "--"; + opt = '?'; + required = 0; bsdcat->argument = NULL; /* First time through, initialize everything. */ @@ -172,7 +178,7 @@ bsdcat_getopt(struct bsdcat *bsdcat) if (opt == '\0') { /* End of this group; recurse to get next option. */ bsdcat->getopt_state = state_next_word; - return bsdcat_getopt(bsdcat); + goto again; } /* Does this option take an argument? */ diff --git a/cpio/cmdline.c b/cpio/cmdline.c index 312d762c8f..ab25492ede 100644 --- a/cpio/cmdline.c +++ b/cpio/cmdline.c @@ -114,12 +114,18 @@ cpio_getopt(struct cpio *cpio) static int state = state_start; static char *opt_word; - const struct option *popt, *match = NULL, *match2 = NULL; - const char *p, *long_prefix = "--"; + const struct option *popt, *match, *match2; + const char *p, *long_prefix; size_t optlength; - int opt = '?'; - int required = 0; + int opt; + int required; +again: + match = NULL; + match2 = NULL; + long_prefix = "--"; + opt = '?'; + required = 0; cpio->argument = NULL; /* First time through, initialize everything. */ @@ -169,7 +175,7 @@ cpio_getopt(struct cpio *cpio) if (opt == '\0') { /* End of this group; recurse to get next option. */ state = state_next_word; - return cpio_getopt(cpio); + goto again; } /* Does this option take an argument? */ diff --git a/tar/cmdline.c b/tar/cmdline.c index 72292e8f27..2a89f42b08 100644 --- a/tar/cmdline.c +++ b/tar/cmdline.c @@ -218,12 +218,18 @@ bsdtar_getopt(struct bsdtar *bsdtar) enum { state_start = 0, state_old_tar, state_next_word, state_short, state_long }; - const struct bsdtar_option *popt, *match = NULL, *match2 = NULL; - const char *p, *long_prefix = "--"; + const struct bsdtar_option *popt, *match, *match2; + const char *p, *long_prefix; size_t optlength; - int opt = '?'; - int required = 0; + int opt; + int required; +again: + match = NULL; + match2 = NULL; + long_prefix = "--"; + opt = '?'; + required = 0; bsdtar->argument = NULL; /* First time through, initialize everything. */ @@ -310,7 +316,7 @@ bsdtar_getopt(struct bsdtar *bsdtar) if (opt == '\0') { /* End of this group; recurse to get next option. */ bsdtar->getopt_state = state_next_word; - return bsdtar_getopt(bsdtar); + goto again; } /* Does this option take an argument? */ diff --git a/unzip/cmdline.c b/unzip/cmdline.c index ab1aeb31fe..4c6efc3e06 100644 --- a/unzip/cmdline.c +++ b/unzip/cmdline.c @@ -81,12 +81,18 @@ bsdunzip_getopt(struct bsdunzip *bsdunzip) { enum { state_start = 0, state_next_word, state_short, state_long }; - const struct bsdunzip_option *popt, *match = NULL, *match2 = NULL; - const char *p, *long_prefix = "--"; + const struct bsdunzip_option *popt, *match, *match2; + const char *p, *long_prefix; size_t optlength; - int opt = OPTION_NONE; - int required = 0; - + int opt; + int required; + +again: + match = NULL; + match2 = NULL; + long_prefix = "--"; + opt = OPTION_NONE; + required = 0; bsdunzip->argument = NULL; /* First time through, initialize everything. */ @@ -140,7 +146,7 @@ bsdunzip_getopt(struct bsdunzip *bsdunzip) if (opt == '\0') { /* End of this group; recurse to get next option. */ bsdunzip->getopt_state = state_next_word; - return bsdunzip_getopt(bsdunzip); + goto again; } /* Does this option take an argument? */ From f4d5aaed1e5c12c6a8b6d40ed80374ebab5f59a5 Mon Sep 17 00:00:00 2001 From: Tobias Stoeckmann Date: Sat, 13 Apr 2024 05:26:53 +0000 Subject: [PATCH 11/98] [Windows] Ignore files generated by Visual Studio with CMake (#2120) These files are created when building libarchive within Visual Studio Code, its CMake plugin and Visual Studio 2022 as compiler. Without this change, Visual Studio Code shows hundreds of files in GitLense plugin waiting to be commited. --- .gitignore | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/.gitignore b/.gitignore index 8dc637ee13..69a24e9093 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,18 @@ *~ *.o +*.obj *.exe +*.exp +*.ilk *.la +*.lib *.lo +*.recipe +*.sln +*.tlog *.cmake +*.vcxproj +*.vcxproj.filters !build/cmake/*.cmake .deps/ .libs/ @@ -29,7 +38,13 @@ build/autoconf/ltsugar.m4 build/autoconf/ltversion.m4 build/autoconf/lt~obsolete.m4 build/autoconf/missing +build/build/pkgconfig/libarchive.pc +build/cat/test/list.h +build/cpio/test/list.h +build/libarchive/test/list.h build/pkgconfig/libarchive.pc +build/tar/test/list.h +build/unzip/test/list.h cat/test/list.h config.cache config.h From 46818684c3dfcb8ace2721491b9e15aff5341725 Mon Sep 17 00:00:00 2001 From: Tobias Stoeckmann Date: Sat, 13 Apr 2024 05:40:02 +0000 Subject: [PATCH 12/98] Improve newline handling on Windows (#2115) If libarchive is compiled on Windows without cygwin, strip \r and \n the same way as it is done on POSIX systems. Also, entering an empty password as "\r\n" should lead to an empty string. Right now, the newlines are kept. Proof of Concept: 1. Compile libarchive with Visual Studio 2. Create a password-protected ZIP file ``` PS> bsdtar.exe --format zip --options 'zip:encryption' -cf archive.zip input.txt Enter passphrase: ``` 3. Extract ZIP file on Windows ``` PS> bsdtar.exe -xf archive.zip Enter passphrase: ``` 4. Extract ZIP file on Linux ``` $ bsdtar -xf archive.zip Enter passphrase: Enter passphrase: ``` As can be seen in step 4, it is impossible to extract the file on Linux with interactive input, because \r and \n are stripped. The only way to extract the content is through command line option passphrase: ``` $ bsdtar -xf archive.zip --passphrase $'\r\n' ``` It's also true the other way around: Creating a ZIP file with an empty password on Linux cannot be extracted interactively on Windows. Not allowing empty passwords at all should be part of another PR. This one is about unifying Windows and POSIX systems regarding newline handling. --- libarchive_fe/passphrase.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libarchive_fe/passphrase.c b/libarchive_fe/passphrase.c index 9d95d52706..90fef32d25 100644 --- a/libarchive_fe/passphrase.c +++ b/libarchive_fe/passphrase.c @@ -76,6 +76,7 @@ #if defined(_WIN32) && !defined(__CYGWIN__) +#include #include static char * @@ -113,8 +114,7 @@ readpassphrase(const char *prompt, char *buf, size_t bufsiz, int flags) WriteFile(hStdout, "\r\n", 2, NULL, NULL); buf[rbytes] = '\0'; /* Remove trailing carriage return(s). */ - if (rbytes > 2 && buf[rbytes - 2] == '\r' && buf[rbytes - 1] == '\n') - buf[rbytes - 2] = '\0'; + buf[strcspn(buf, "\r\n")] = '\0'; return (buf); } From 1a2fd29f6de7decd097dc4e30629ff96a068f72e Mon Sep 17 00:00:00 2001 From: Tobias Stoeckmann Date: Sat, 13 Apr 2024 05:44:51 +0000 Subject: [PATCH 13/98] Never allow empty passwords (#2116) Passwords for encryption must not be empty. Neither through command line option nor through interactive input. With this PR applied: ``` $ bsdtar --format zip --options zip:encryption -cf archive.zip input.txt Enter passphrase: bsdtar: Encryption needs passphrase ``` Output with command line argument (unaffected by this PR): ``` $ bsdtar --format zip --options zip:encryption --passphrase '' -cf archive.zip input.txt bsdtar: Empty passphrase is unacceptable ``` The outputs differ due to internal difference in handling the results. It is still possible to supply a passphrase through command line argument which cannot be entered interactively, i.e. $'\r\n'. See https://github.com/libarchive/libarchive/pull/2115 for more details. --- libarchive/archive_write_set_passphrase.c | 35 ++++++++++++----------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/libarchive/archive_write_set_passphrase.c b/libarchive/archive_write_set_passphrase.c index 977fc4a9ee..f871c8e2f8 100644 --- a/libarchive/archive_write_set_passphrase.c +++ b/libarchive/archive_write_set_passphrase.c @@ -30,14 +30,9 @@ #endif #include "archive_write_private.h" -int -archive_write_set_passphrase(struct archive *_a, const char *p) +static int +set_passphrase(struct archive_write *a, const char *p) { - struct archive_write *a = (struct archive_write *)_a; - - archive_check_magic(_a, ARCHIVE_WRITE_MAGIC, ARCHIVE_STATE_NEW, - "archive_write_set_passphrase"); - if (p == NULL || p[0] == '\0') { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Empty passphrase is unacceptable"); @@ -54,6 +49,18 @@ archive_write_set_passphrase(struct archive *_a, const char *p) } +int +archive_write_set_passphrase(struct archive *_a, const char *p) +{ + struct archive_write *a = (struct archive_write *)_a; + + archive_check_magic(_a, ARCHIVE_WRITE_MAGIC, ARCHIVE_STATE_NEW, + "archive_write_set_passphrase"); + + return (set_passphrase(a, p)); +} + + int archive_write_set_passphrase_callback(struct archive *_a, void *client_data, archive_passphrase_callback *cb) @@ -80,15 +87,9 @@ __archive_write_get_passphrase(struct archive_write *a) const char *p; p = a->passphrase_callback(&a->archive, a->passphrase_client_data); - if (p != NULL) { - a->passphrase = strdup(p); - if (a->passphrase == NULL) { - archive_set_error(&a->archive, ENOMEM, - "Can't allocate data for passphrase"); - return (NULL); - } - return (a->passphrase); - } + set_passphrase(a, p); + a->passphrase_callback = NULL; + a->passphrase_client_data = NULL; } - return (NULL); + return (a->passphrase); } From 5daa209cfa68da574d2082180273dea2a0382a86 Mon Sep 17 00:00:00 2001 From: evan-swinney Date: Tue, 16 Apr 2024 22:51:30 -0500 Subject: [PATCH 14/98] Change CMAKE_BUILD_TYPE comparison to be case-insensitive (#2130) Currently the `CMAKE_BUILD_TYPE` is being compared in a case-sensitive way. It seems current CMake documentation [suggests treating this in a case-insensitive manner now-a-days](https://cmake.org/cmake/help/latest/manual/cmake-buildsystem.7.html#case-sensitivity). This being case-sensitive creates needless complexities in other projects if they compile their own project with `cmake -DCMAKE_BUILD_TYPE=release ..`, etc. In this specific case, libarchive has a fatal error due to the lowercase `release`. I'd honestly like to remove these comparisons entirely; as I'm not sure if they're really needed or not if `libarchive` is only using the Makefile or Ninja generators with CMake. This PR changes the `CMAKE_BUILD_TYPE` comparison to be case-insensitive, and leaves the rest alone. This should also fix the following issue(s): * https://github.com/libarchive/libarchive/issues/1792 --- CMakeLists.txt | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1cb975a3b4..a953bcea1d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,13 +34,15 @@ IF("${cached_type}" STREQUAL "UNINITIALIZED") SET(CMAKE_BUILD_TYPE "${CMAKE_BUILD_TYPE}" CACHE STRING "Build Type" FORCE) ENDIF("${cached_type}" STREQUAL "UNINITIALIZED") # Check the Build Type. -IF(NOT "${CMAKE_BUILD_TYPE}" - MATCHES "^(Debug|Release|RelWithDebInfo|MinSizeRel|None)\$") +# Convert the CMAKE_BUILD_TYPE to uppercase to perform a case-insensitive comparison. +string(TOUPPER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE_UPPER) +IF(NOT "${CMAKE_BUILD_TYPE_UPPER}" + MATCHES "^(DEBUG|RELEASE|RELWITHDEBINFO|MINSIZEREL|NONE)\$") MESSAGE(FATAL_ERROR "Unknown keyword for CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}\n" - "Acceptable keywords: Debug,Release,RelWithDebInfo,MinSizeRel,None") -ENDIF(NOT "${CMAKE_BUILD_TYPE}" - MATCHES "^(Debug|Release|RelWithDebInfo|MinSizeRel|None)\$") + "Acceptable keywords: Debug, Release, RelWithDebInfo, MinSizeRel, None") +ENDIF(NOT "${CMAKE_BUILD_TYPE_UPPER}" + MATCHES "^(DEBUG|RELEASE|RELWITHDEBINFO|MINSIZEREL|NONE)\$") # On MacOS, prefer MacPorts libraries to system libraries. # I haven't come up with a compelling argument for this to be conditional. From d43c39247f4f5fb8b85ec2a3904c65519956cde1 Mon Sep 17 00:00:00 2001 From: Martin Matuska Date: Wed, 17 Apr 2024 09:15:35 +0200 Subject: [PATCH 15/98] tests: fix null format string error in test_compat_lzip.c --- libarchive/test/test_compat_lzip.c | 1 - 1 file changed, 1 deletion(-) diff --git a/libarchive/test/test_compat_lzip.c b/libarchive/test/test_compat_lzip.c index d3b8b11832..1420e5bb9a 100644 --- a/libarchive/test/test_compat_lzip.c +++ b/libarchive/test/test_compat_lzip.c @@ -195,7 +195,6 @@ compat_lzip_4(const char *name) assertEqualString("test.bin", archive_entry_pathname(ae)); /* Verify the end-of-archive. */ - archive_set_error(a, ARCHIVE_OK, NULL); assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); /* Verify that the format detection worked. */ From 27d2ff3b6bf68c09d8d232c0362648fd231e730b Mon Sep 17 00:00:00 2001 From: Tobias Stoeckmann Date: Fri, 19 Apr 2024 15:12:59 +0000 Subject: [PATCH 16/98] [Windows] Ignore more directories generated by Visual Studio (#2133) Compiling libarchive with Visual Studio (not Visual Studio Code) generates files in different directories. Ignore these as well. --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 69a24e9093..368828b8d0 100644 --- a/.gitignore +++ b/.gitignore @@ -17,6 +17,7 @@ .deps/ .libs/ bin/ +out/ .dirstamp Makefile Makefile.in @@ -62,6 +63,7 @@ CMakeCache.txt CMakeFiles/ DartConfiguration.tcl cmake.tmp/ +.vs/ .vscode/ doc/html/*.html From ae4e3d2f0d98e1195975cc7ef8e56c1cc4b13d97 Mon Sep 17 00:00:00 2001 From: Tobias Stoeckmann Date: Sat, 20 Apr 2024 22:39:38 +0000 Subject: [PATCH 17/98] 7zip: Limit amount of properties (#2131) The uint64_t variable propertiesSize is eventually casted to size_t which, on 32 bit systems, can result in integer truncation. In such a situation, it is possible that less than the minimum of 5 properties are parsed and processed, which will result in out of boundary reads in init_decompression because the error check `if (coder1->propertiesSize < 5)` still takes the uint64_t variable into account. --- libarchive/archive_read_support_format_7zip.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libarchive/archive_read_support_format_7zip.c b/libarchive/archive_read_support_format_7zip.c index 92495e628f..7e465935c9 100644 --- a/libarchive/archive_read_support_format_7zip.c +++ b/libarchive/archive_read_support_format_7zip.c @@ -2037,6 +2037,8 @@ read_Folder(struct archive_read *a, struct _7z_folder *f) if (parse_7zip_uint64( a, &(f->coders[i].propertiesSize)) < 0) return (-1); + if (UMAX_ENTRY < f->coders[i].propertiesSize) + return (-1); if ((p = header_bytes( a, (size_t)f->coders[i].propertiesSize)) == NULL) return (-1); From 91f27004a5c88589658e38d68e46d223da6b75ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Matu=C5=A1ka?= Date: Sun, 21 Apr 2024 05:23:22 +0200 Subject: [PATCH 18/98] Update AppleDouble support for directories (#2108) This is a clone of the reverted pull request #2100. Due to the latest circumstances we should require a more deep review of each new PR. --------- Co-authored-by: Justin Vreeland --- libarchive/archive_read_support_format_zip.c | 13 ++- libarchive/archive_write_disk_posix.c | 3 +- libarchive/test/test_write_disk_appledouble.c | 84 +++++++++++++++++++ .../test_write_disk_appledouble_zip.zip.uu | 27 ++++++ 4 files changed, 125 insertions(+), 2 deletions(-) create mode 100644 libarchive/test/test_write_disk_appledouble_zip.zip.uu diff --git a/libarchive/archive_read_support_format_zip.c b/libarchive/archive_read_support_format_zip.c index 212bfff9fa..d7b6f082ee 100644 --- a/libarchive/archive_read_support_format_zip.c +++ b/libarchive/archive_read_support_format_zip.c @@ -4083,6 +4083,17 @@ slurp_central_directory(struct archive_read *a, struct archive_entry* entry, } else { /* Generate resource fork name to find its * resource file at zip->tree_rsrc. */ + + /* If this is an entry ending with slash, + * make the resource for name slash-less + * as the actual resource fork doesn't end with '/'. + */ + size_t tmp_length = filename_length; + if (name[tmp_length - 1] == '/') { + tmp_length--; + r = rsrc_basename(name, tmp_length); + } + archive_strcpy(&(zip_entry->rsrcname), "__MACOSX/"); archive_strncat(&(zip_entry->rsrcname), @@ -4090,7 +4101,7 @@ slurp_central_directory(struct archive_read *a, struct archive_entry* entry, archive_strcat(&(zip_entry->rsrcname), "._"); archive_strncat(&(zip_entry->rsrcname), name + (r - name), - filename_length - (r - name)); + tmp_length - (r - name)); /* Register an entry to RB tree to sort it by * file offset. */ __archive_rb_tree_insert_node(&zip->tree, diff --git a/libarchive/archive_write_disk_posix.c b/libarchive/archive_write_disk_posix.c index 58265ee0dc..92db4ff05b 100644 --- a/libarchive/archive_write_disk_posix.c +++ b/libarchive/archive_write_disk_posix.c @@ -4427,7 +4427,8 @@ fixup_appledouble(struct archive_write_disk *a, const char *pathname) #else la_stat(datafork.s, &st) == -1 || #endif - (st.st_mode & AE_IFMT) != AE_IFREG) + (((st.st_mode & AE_IFMT) != AE_IFREG) && + ((st.st_mode & AE_IFMT) != AE_IFDIR))) goto skip_appledouble; /* diff --git a/libarchive/test/test_write_disk_appledouble.c b/libarchive/test/test_write_disk_appledouble.c index 3265a94d2f..8de6c8b504 100644 --- a/libarchive/test/test_write_disk_appledouble.c +++ b/libarchive/test/test_write_disk_appledouble.c @@ -236,3 +236,87 @@ DEFINE_TEST(test_write_disk_appledouble) assertEqualFile("hfscmp/file3", "nocmp/file3"); #endif } + +/* Test writing apple doubles to disk from zip format */ +DEFINE_TEST(test_write_disk_appledouble_zip) +{ +#if !defined(__APPLE__) || !defined(UF_COMPRESSED) || !defined(HAVE_SYS_XATTR_H)\ + || !defined(HAVE_ZLIB_H) + skipping("MacOS-specific AppleDouble test"); +#else + const char *refname = "test_write_disk_appledouble_zip.zip"; + struct archive *ad, *a; + struct archive_entry *ae; + struct stat st; + + extract_reference_file(refname); + + /* + * Extract an archive to disk. + */ + assert((ad = archive_write_disk_new()) != NULL); + assertEqualIntA(ad, ARCHIVE_OK, + archive_write_disk_set_standard_lookup(ad)); + assertEqualIntA(ad, ARCHIVE_OK, + archive_write_disk_set_options(ad, + ARCHIVE_EXTRACT_TIME | + ARCHIVE_EXTRACT_SECURE_SYMLINKS | + ARCHIVE_EXTRACT_SECURE_NODOTDOT)); + + assert((a = archive_read_new()) != NULL); + assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_open_filename(a, + refname, 512 * 20)); + + /* Skip The top level directory */ + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assertEqualString("apple_double_dir/", archive_entry_pathname(ae)); + + /* Extract apple_double_test */ + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assertEqualString("apple_double_dir/apple_double_dir_test/", archive_entry_pathname(ae)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_extract2(a, ae, ad)); + + /* Extract ._apple_double_dir_test which will be merged into apple_double_dir_test as metadata. */ + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assertEqualString("apple_double_dir/._apple_double_dir_test", archive_entry_pathname(ae)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_extract2(a, ae, ad)); + + /* Extract test_file */ + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assertEqualString("apple_double_dir/test_file", archive_entry_pathname(ae)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_extract2(a, ae, ad)); + + /* Extract ._test_file which will be merged into test_file as metadata. */ + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assertEqualString("apple_double_dir/._test_file", archive_entry_pathname(ae)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_extract2(a, ae, ad)); + + assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a)); + assertEqualInt(ARCHIVE_OK, archive_read_free(a)); + assertEqualIntA(ad, ARCHIVE_OK, archive_write_free(ad)); + + /* Test test_file */ + assertEqualInt(0, stat("apple_double_dir/test_file", &st)); + assertFileSize("apple_double_dir/test_file", 5); + failure("'%s' should have Resource Fork", "test_file"); + assertEqualInt(1, has_xattr("apple_double_dir/test_file", "com.apple.ResourceFork")); + + /* Test apple_double_dir_test */ + failure("'%s' should have quarantine xattr", "apple_double_dir_test"); + assertEqualInt(1, has_xattr("apple_double_dir/apple_double_dir_test", "com.apple.quarantine")); + + /* Test ._test_file. */ + failure("'apple_double_dir/._test_file' should be merged and removed"); + assertFileNotExists("apple_double_dir/._test_file"); + + /* Test ._apple_double_dir_test */ + failure("'apple_double_dir/._._apple_double_dir_test' should be merged and removed"); + assertFileNotExists("apple_double_dir/._apple_double_dir_test"); + + assertChdir(".."); + +#endif +} diff --git a/libarchive/test/test_write_disk_appledouble_zip.zip.uu b/libarchive/test/test_write_disk_appledouble_zip.zip.uu new file mode 100644 index 0000000000..5ab67533d5 --- /dev/null +++ b/libarchive/test/test_write_disk_appledouble_zip.zip.uu @@ -0,0 +1,27 @@ +begin 644 test_write_disk_appledouble_zip.zip +M4$L#!`H```````MM?%@````````````````1`!``87!P;&5?9&]U8FQE7V1I +M Date: Mon, 22 Apr 2024 01:55:41 +0900 Subject: [PATCH 19/98] fix: OOB in rar e8 filter (#2135) This patch fixes an out-of-bound error in rar e8 filter. --- libarchive/archive_read_support_format_rar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libarchive/archive_read_support_format_rar.c b/libarchive/archive_read_support_format_rar.c index 99a11d1700..266d0ee995 100644 --- a/libarchive/archive_read_support_format_rar.c +++ b/libarchive/archive_read_support_format_rar.c @@ -3615,7 +3615,7 @@ execute_filter_e8(struct rar_filter *filter, struct rar_virtual_machine *vm, siz uint32_t filesize = 0x1000000; uint32_t i; - if (length > PROGRAM_WORK_SIZE || length < 4) + if (length > PROGRAM_WORK_SIZE || length <= 4) return 0; for (i = 0; i <= length - 5; i++) From 91ba8ceacf383db8a005d267d3eb304342b58196 Mon Sep 17 00:00:00 2001 From: Martin Matuska Date: Tue, 23 Apr 2024 07:45:50 +0200 Subject: [PATCH 20/98] unzip: explicitly set en_US.UTF-8 locale in test_I.c --- unzip/test/test_I.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/unzip/test/test_I.c b/unzip/test/test_I.c index a6bad85a01..5d31ce8d16 100644 --- a/unzip/test/test_I.c +++ b/unzip/test/test_I.c @@ -25,12 +25,25 @@ */ #include "test.h" +#ifdef HAVE_LOCALE_H +#include +#endif + /* Test I arg - file name encoding */ DEFINE_TEST(test_I) { const char *reffile = "test_I.zip"; int r; +#if HAVE_SETLOCALE + if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) { + skipping("en_US.UTF-8 locale not available on this system."); + return; + } +#else + skipping("setlocale() not available on this system."); +#endif + extract_reference_file(reffile); r = systemf("%s -I UTF-8 %s >test.out 2>test.err", testprog, reffile); assertEqualInt(0, r); From ce5d6b6b922c394724ef21e053e1547824642ce1 Mon Sep 17 00:00:00 2001 From: Duncan Horn <40036384+dunhor@users.noreply.github.com> Date: Mon, 22 Apr 2024 23:25:39 -0700 Subject: [PATCH 21/98] rar: Avoid overwriting data at "end" of circular window buffer (#2124) fix "File CRC Error" when extracting specific rar4 archives Fixes #1794 --- libarchive/archive_read_support_format_rar.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/libarchive/archive_read_support_format_rar.c b/libarchive/archive_read_support_format_rar.c index 266d0ee995..79669a8f40 100644 --- a/libarchive/archive_read_support_format_rar.c +++ b/libarchive/archive_read_support_format_rar.c @@ -2176,6 +2176,19 @@ read_data_compressed(struct archive_read *a, const void **buff, size_t *size, { start = rar->offset; end = start + rar->dictionary_size; + + /* We don't want to overflow the window and overwrite data that we write + * at 'start'. Therefore, reduce the end length by the maximum match size, + * which is 260 bytes. You can compute this maximum by looking at the + * definition of 'expand', in particular when 'symbol >= 271'. */ + /* NOTE: It's possible for 'dictionary_size' to be less than this 260 + * value, however that will only be the case when 'unp_size' is small, + * which should only happen when the entry size is small and there's no + * risk of overflowing the buffer */ + if (rar->dictionary_size > 260) { + end -= 260; + } + if (rar->filters.filterstart < end) { end = rar->filters.filterstart; } From 22ea7d96cb64b9e9e9a80e88adfe2ef459ac6ea8 Mon Sep 17 00:00:00 2001 From: Tobias Stoeckmann Date: Wed, 10 Apr 2024 22:59:40 +0200 Subject: [PATCH 22/98] xar: Avoid infinite link loop (#2123) A file may have only one link target at a time. Otherwise the internal link structure could loop. Besides, a hard link realistically can only link to one file, not multiple ones. Consider such an archive invalid. Co-authored-by: Martin Matuska --- Makefile.am | 2 + libarchive/archive_read_support_format_xar.c | 5 ++ libarchive/test/CMakeLists.txt | 1 + .../test/test_read_format_xar_doublelink.c | 55 +++++++++++++++++++ .../test_read_format_xar_doublelink.xar.uu | 12 ++++ 5 files changed, 75 insertions(+) create mode 100644 libarchive/test/test_read_format_xar_doublelink.c create mode 100644 libarchive/test/test_read_format_xar_doublelink.xar.uu diff --git a/Makefile.am b/Makefile.am index 286f08694c..47b6fa1fc6 100644 --- a/Makefile.am +++ b/Makefile.am @@ -528,6 +528,7 @@ libarchive_test_SOURCES= \ libarchive/test/test_read_format_ustar_filename.c \ libarchive/test/test_read_format_warc.c \ libarchive/test/test_read_format_xar.c \ + libarchive/test/test_read_format_xar_doublelink.c \ libarchive/test/test_read_format_zip.c \ libarchive/test/test_read_format_zip_7075_utf8_paths.c \ libarchive/test/test_read_format_zip_comment_stored.c \ @@ -932,6 +933,7 @@ libarchive_test_EXTRA_DIST=\ libarchive/test/test_read_format_ustar_filename_eucjp.tar.Z.uu \ libarchive/test/test_read_format_ustar_filename_koi8r.tar.Z.uu \ libarchive/test/test_read_format_warc.warc.uu \ + libarchive/test/test_read_format_xar_doublelink.xar.uu \ libarchive/test/test_read_format_zip.zip.uu \ libarchive/test/test_read_format_zip_7075_utf8_paths.zip.uu \ libarchive/test/test_read_format_zip_7z_deflate.zip.uu \ diff --git a/libarchive/archive_read_support_format_xar.c b/libarchive/archive_read_support_format_xar.c index fd63594373..2c34326429 100644 --- a/libarchive/archive_read_support_format_xar.c +++ b/libarchive/archive_read_support_format_xar.c @@ -2055,6 +2055,11 @@ xml_start(struct archive_read *a, const char *name, struct xmlattr_list *list) attr = attr->next) { if (strcmp(attr->name, "link") != 0) continue; + if (xar->file->hdnext != NULL || xar->file->link != 0) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "File with multiple link targets"); + return (ARCHIVE_FATAL); + } if (strcmp(attr->value, "original") == 0) { xar->file->hdnext = xar->hdlink_orgs; xar->hdlink_orgs = xar->file; diff --git a/libarchive/test/CMakeLists.txt b/libarchive/test/CMakeLists.txt index 8209c25a5f..7b166c5fba 100644 --- a/libarchive/test/CMakeLists.txt +++ b/libarchive/test/CMakeLists.txt @@ -172,6 +172,7 @@ IF(ENABLE_TEST) test_read_format_ustar_filename.c test_read_format_warc.c test_read_format_xar.c + test_read_format_xar_doublelink.c test_read_format_zip.c test_read_format_zip_7075_utf8_paths.c test_read_format_zip_comment_stored.c diff --git a/libarchive/test/test_read_format_xar_doublelink.c b/libarchive/test/test_read_format_xar_doublelink.c new file mode 100644 index 0000000000..73ddebd285 --- /dev/null +++ b/libarchive/test/test_read_format_xar_doublelink.c @@ -0,0 +1,55 @@ +/*- + * Copyright (c) 2024 Martin Matuska + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include "test.h" + +#define __LIBARCHIVE_BUILD + +DEFINE_TEST(test_read_format_xar_doublelink) +{ + const char *refname = "test_read_format_xar_doublelink.xar"; + struct archive *a; + struct archive_entry *ae; + + extract_reference_file(refname); + + /* Verify with seeking reader. */ + assert((a = archive_read_new()) != NULL); + assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); + if(ARCHIVE_OK != archive_read_support_format_xar(a)) { + skipping("XAR format unsupported"); + assertEqualInt(ARCHIVE_OK, archive_read_free(a)); + return; + } + assertEqualIntA(a, ARCHIVE_OK, archive_read_open_filename(a, refname, + 10240)); + + assertA(ARCHIVE_FATAL == archive_read_next_header(a, &ae)); + assertEqualString(archive_error_string(a), + "File with multiple link targets"); + assert(archive_errno(a) != 0); + + assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a)); + assertEqualInt(ARCHIVE_OK, archive_read_free(a)); +} diff --git a/libarchive/test/test_read_format_xar_doublelink.xar.uu b/libarchive/test/test_read_format_xar_doublelink.xar.uu new file mode 100644 index 0000000000..5fed96af9d --- /dev/null +++ b/libarchive/test/test_read_format_xar_doublelink.xar.uu @@ -0,0 +1,12 @@ +begin 664 test_read_foxmat_xar_doublelink.xar +M>&%R(0`<``$````````!0`````````/7`````7B<[9/!D3&``]'1?GT!-1U;;:=[5[G<=WB0=P>^..@![<%Y94V# +M\R>*$9C6=LKT#7Y;OF8O>"%F_""=F"$^VC9\$&\=R#'LR$:E03#*RHR6&2N6 +MM*KIO,YS3JZ1M&D-[<;O-/+C<8`&^[7,<:P@;E:;52`R#5A6N?VQ@9CHIN.#_IY(['+:!!F4V#K5.],G+`8BU=%SU.8OF? +MH#*V`U%5K"@9)Z=5*G2P5RT8*YY+3J9%*ND(T?D\%/3$[U0GJ=W6T3=E+);&^E4=0%?0^#N\00;G(;8U7`]! Date: Tue, 23 Apr 2024 11:51:53 +0200 Subject: [PATCH 23/98] CI: add libxml2 to MacOS test build --- build/ci/github_actions/macos.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/build/ci/github_actions/macos.sh b/build/ci/github_actions/macos.sh index ba72b4a771..6941bf8738 100755 --- a/build/ci/github_actions/macos.sh +++ b/build/ci/github_actions/macos.sh @@ -18,6 +18,7 @@ then xz \ lz4 \ zstd \ + libxml2 \ openssl do brew list $pkg > /dev/null && brew upgrade $pkg || brew install $pkg From 3efcadf8865efa2e0c5d44606efda87bfd4835fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dag-Erling=20Sm=C3=B8rgrav?= Date: Mon, 4 Mar 2024 11:37:14 +0100 Subject: [PATCH 24/98] zstd: Implement core detection (#2083) The bsdtar manual page claims that setting zstd:threads to 0 tells zstd to use as many threads as there are cores in the system, but it actually disables multi-threading. Replace 0 with the number of configured processors. While here, add a previously missing overflow check. Co-authored-by: Martin Matuska --- CMakeLists.txt | 1 + build/cmake/config.h.in | 3 +++ configure.ac | 1 + libarchive/archive_write_add_filter_zstd.c | 14 +++++++++++++- tar/bsdtar.1 | 15 +++++++-------- 5 files changed, 25 insertions(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a953bcea1d..822056b910 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1497,6 +1497,7 @@ CHECK_FUNCTION_EXISTS_GLIBC(strncpy_s HAVE_STRNCPY_S) CHECK_FUNCTION_EXISTS_GLIBC(strnlen HAVE_STRNLEN) CHECK_FUNCTION_EXISTS_GLIBC(strrchr HAVE_STRRCHR) CHECK_FUNCTION_EXISTS_GLIBC(symlink HAVE_SYMLINK) +CHECK_FUNCTION_EXISTS_GLIBC(sysconf HAVE_SYSCONF) CHECK_FUNCTION_EXISTS_GLIBC(timegm HAVE_TIMEGM) CHECK_FUNCTION_EXISTS_GLIBC(tzset HAVE_TZSET) CHECK_FUNCTION_EXISTS_GLIBC(unlinkat HAVE_UNLINKAT) diff --git a/build/cmake/config.h.in b/build/cmake/config.h.in index 045a6b4165..d47694c0c1 100644 --- a/build/cmake/config.h.in +++ b/build/cmake/config.h.in @@ -1094,6 +1094,9 @@ typedef uint64_t uintmax_t; /* Define to 1 if you have the `symlink' function. */ #cmakedefine HAVE_SYMLINK 1 +/* Define to 1 if you have the `sysconf' function. */ +#cmakedefine HAVE_SYSCONF 1 + /* Define to 1 if you have the header file. */ #cmakedefine HAVE_SYS_ACL_H 1 diff --git a/configure.ac b/configure.ac index 3cf5f50e78..c778c043f0 100644 --- a/configure.ac +++ b/configure.ac @@ -804,6 +804,7 @@ AC_CHECK_FUNCS([nl_langinfo openat pipe poll posix_spawnp readlink readlinkat]) AC_CHECK_FUNCS([readpassphrase]) AC_CHECK_FUNCS([select setenv setlocale sigaction statfs statvfs]) AC_CHECK_FUNCS([strchr strdup strerror strncpy_s strnlen strrchr symlink]) +AC_CHECK_FUNCS([sysconf]) AC_CHECK_FUNCS([timegm tzset unlinkat unsetenv utime utimensat utimes vfork]) AC_CHECK_FUNCS([wcrtomb wcscmp wcscpy wcslen wctomb wmemcmp wmemcpy wmemmove]) AC_CHECK_FUNCS([_fseeki64 _get_timezone]) diff --git a/libarchive/archive_write_add_filter_zstd.c b/libarchive/archive_write_add_filter_zstd.c index df539d326e..b49f453102 100644 --- a/libarchive/archive_write_add_filter_zstd.c +++ b/libarchive/archive_write_add_filter_zstd.c @@ -29,6 +29,9 @@ #ifdef HAVE_ERRNO_H #include #endif +#ifdef HAVE_LIMITS_H +#include +#endif #ifdef HAVE_STDINT_H #include #endif @@ -38,6 +41,9 @@ #ifdef HAVE_STRING_H #include #endif +#ifdef HAVE_UNISTD_H +#include +#endif #ifdef HAVE_ZSTD_H #include #endif @@ -266,7 +272,13 @@ archive_compressor_zstd_options(struct archive_write_filter *f, const char *key, if (string_to_number(value, &threads) != ARCHIVE_OK) { return (ARCHIVE_WARN); } - if (threads < 0) { + +#if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN) + if (threads == 0) { + threads = sysconf(_SC_NPROCESSORS_ONLN); + } +#endif + if (threads < 0 || threads > INT_MAX) { return (ARCHIVE_WARN); } data->threads = (int)threads; diff --git a/tar/bsdtar.1 b/tar/bsdtar.1 index e570d2a48a..fe9ec95046 100644 --- a/tar/bsdtar.1 +++ b/tar/bsdtar.1 @@ -23,7 +23,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd March 1, 2024 +.Dd April 23, 2024 .Dt TAR 1 .Os .Sh NAME @@ -644,14 +644,13 @@ A decimal integer from 4 to 7 specifying the lz4 compression block size .It Cm lz4:block-dependence Use the previous block of the block being compressed for a compression dictionary to improve compression ratio. -.It Cm zstd:compression-level -A decimal integer specifying the zstd compression level. Supported values depend +.It Cm zstd:compression-level Ns = Ns Ar N +A decimal integer specifying the zstd compression level. +Supported values depend on the library version, common values are from 1 to 22. -.It Cm zstd:threads -Specify the number of worker threads to use. -Setting threads to a special value 0 makes -.Xr zstd 1 -use as many threads as there are CPU cores on the system. +.It Cm zstd:threads Ns = Ns Ar N +Specify the number of worker threads to use, or 0 to use as many +threads as there are CPU cores in the system. .It Cm zstd:frame-per-file Start a new compression frame at the beginning of each file in the archive. From ab27905ae7cd337cf30711fd36d287db1c1b92bb Mon Sep 17 00:00:00 2001 From: Martin Matuska Date: Tue, 23 Apr 2024 14:59:27 +0200 Subject: [PATCH 25/98] zstd: Implement core detection on Windows Fixes #2071 Co-authored-by: Mostyn Bramley-Moore --- libarchive/archive_write_add_filter_zstd.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/libarchive/archive_write_add_filter_zstd.c b/libarchive/archive_write_add_filter_zstd.c index b49f453102..7ea3d18c9b 100644 --- a/libarchive/archive_write_add_filter_zstd.c +++ b/libarchive/archive_write_add_filter_zstd.c @@ -277,6 +277,13 @@ archive_compressor_zstd_options(struct archive_write_filter *f, const char *key, if (threads == 0) { threads = sysconf(_SC_NPROCESSORS_ONLN); } +#elif !defined(__CYGWIN__) && defined(_WIN32_WINNT) && \ + _WIN32_WINNT >= 0x0601 /* _WIN32_WINNT_WIN7 */ + if (threads == 0) { + DWORD winCores = GetActiveProcessorCount( + ALL_PROCESSOR_GROUPS); + threads = (intmax_t)winCores; + } #endif if (threads < 0 || threads > INT_MAX) { return (ARCHIVE_WARN); From 284ba9539aa1217df77c2f14e6941483de48d382 Mon Sep 17 00:00:00 2001 From: Martin Matuska Date: Tue, 23 Apr 2024 16:32:16 +0200 Subject: [PATCH 26/98] CI: add "CI" prefix to dependabot commit messages --- .github/dependabot.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/dependabot.yml b/.github/dependabot.yml index e4b8a2f5e4..4b45e5b2a1 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -9,6 +9,8 @@ updates: directory: "/" schedule: interval: "weekly" + commit-message: + prefix: "CI" groups: all-actions: patterns: From e6d2ce1c6c15e0e4edaaf14b31a2ac335c27d9a3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 23 Apr 2024 16:41:54 +0200 Subject: [PATCH 27/98] CI: bump the all-actions group with 3 updates (#2140) Updates actions/checkout from 4.1.2 to 4.1.3 Updates actions/upload-artifact from 4.3.1 to 4.3.3 Updates github/codeql-action from 3.24.8 to 3.25.2 Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/ci.yml | 16 ++++++++-------- .github/workflows/cifuzz.yml | 2 +- .github/workflows/codeql.yml | 8 ++++---- .github/workflows/scorecard.yml | 6 +++--- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3ecc4434cd..dd49533dc0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -12,7 +12,7 @@ jobs: matrix: bs: [autotools, cmake] steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@1d96c772d19495a3b5c517cd2bc0cb401ea0529f # v4.1.3 - name: Install dependencies run: ./build/ci/github_actions/macos.sh prepare - name: Autogen @@ -45,7 +45,7 @@ jobs: run: ./build/ci/build.sh -a artifact env: BS: ${{ matrix.bs }} - - uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1 + - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 with: name: libarchive-macos-${{ matrix.bs }}-${{ github.sha }} path: libarchive.tar.xz @@ -57,7 +57,7 @@ jobs: bs: [autotools, cmake] crypto: [mbedtls, nettle, openssl] steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@1d96c772d19495a3b5c517cd2bc0cb401ea0529f # v4.1.3 - name: Update apt cache run: sudo apt-get update - name: Install dependencies @@ -91,14 +91,14 @@ jobs: run: ./build/ci/build.sh -a artifact env: BS: ${{ matrix.bs }} - - uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1 + - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 with: name: libarchive-ubuntu-${{ matrix.bs }}-${{ matrix.crypto }}-${{ github.sha }} path: libarchive.tar.xz Ubuntu-distcheck: runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@1d96c772d19495a3b5c517cd2bc0cb401ea0529f # v4.1.3 - name: Update package definitions run: sudo apt-get update - name: Install dependencies @@ -113,7 +113,7 @@ jobs: SKIP_OPEN_FD_ERR_TEST: 1 - name: Dist-Artifact run: ./build/ci/build.sh -a dist-artifact - - uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1 + - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 with: name: libarchive-${{ github.sha }} path: libarchive-dist.tar @@ -125,7 +125,7 @@ jobs: matrix: be: [mingw-gcc, msvc] steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@1d96c772d19495a3b5c517cd2bc0cb401ea0529f # v4.1.3 - name: Install mingw if: ${{ matrix.be=='mingw-gcc' }} run: choco install mingw @@ -161,7 +161,7 @@ jobs: shell: cmd env: BE: ${{ matrix.be }} - - uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1 + - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 with: name: libarchive-windows-${{ matrix.be }}-${{ github.sha }} path: libarchive.zip diff --git a/.github/workflows/cifuzz.yml b/.github/workflows/cifuzz.yml index 9dd1dc3ec1..ddacccc318 100644 --- a/.github/workflows/cifuzz.yml +++ b/.github/workflows/cifuzz.yml @@ -21,7 +21,7 @@ jobs: fuzz-seconds: 600 dry-run: false - name: Upload Crash - uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1 + uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 if: failure() && steps.build.outcome == 'success' with: name: artifacts diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index b9e4dcc48e..17e6bf72dd 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -26,18 +26,18 @@ jobs: steps: - name: Checkout - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + uses: actions/checkout@1d96c772d19495a3b5c517cd2bc0cb401ea0529f # v4.1.3 - name: Initialize CodeQL - uses: github/codeql-action/init@05963f47d870e2cb19a537396c1f668a348c7d8f # v3.24.8 + uses: github/codeql-action/init@8f596b4ae3cb3c588a5c46780b86dd53fef16c52 # v3.25.2 with: languages: ${{ matrix.language }} queries: +security-and-quality - name: Autobuild - uses: github/codeql-action/autobuild@05963f47d870e2cb19a537396c1f668a348c7d8f # v3.24.8 + uses: github/codeql-action/autobuild@8f596b4ae3cb3c588a5c46780b86dd53fef16c52 # v3.25.2 - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@05963f47d870e2cb19a537396c1f668a348c7d8f # v3.24.8 + uses: github/codeql-action/analyze@8f596b4ae3cb3c588a5c46780b86dd53fef16c52 # v3.25.2 with: category: "/language:${{ matrix.language }}" diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml index b6c4f17a9b..d05080c54d 100644 --- a/.github/workflows/scorecard.yml +++ b/.github/workflows/scorecard.yml @@ -29,7 +29,7 @@ jobs: steps: - name: "Checkout code" - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + uses: actions/checkout@1d96c772d19495a3b5c517cd2bc0cb401ea0529f # v4.1.3 with: persist-credentials: false @@ -52,7 +52,7 @@ jobs: # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF # format to the repository Actions tab. - name: "Upload artifact" - uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1 + uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 with: name: SARIF file path: results.sarif @@ -60,6 +60,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard. - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@05963f47d870e2cb19a537396c1f668a348c7d8f # v3.24.8 + uses: github/codeql-action/upload-sarif@8f596b4ae3cb3c588a5c46780b86dd53fef16c52 # v3.25.2 with: sarif_file: results.sarif From b6a979481b7d77c12fa17bbed94576b63bbcb0c0 Mon Sep 17 00:00:00 2001 From: Tobias Stoeckmann Date: Thu, 25 Apr 2024 09:18:30 +0000 Subject: [PATCH 28/98] zip: Fix out of boundary access (#2145) If a ZIP file contains a file with an empty name and mac-ext option is set, then a check accesses memory out of bound of `name`. --- libarchive/archive_read_support_format_zip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libarchive/archive_read_support_format_zip.c b/libarchive/archive_read_support_format_zip.c index d7b6f082ee..7552a1a1a2 100644 --- a/libarchive/archive_read_support_format_zip.c +++ b/libarchive/archive_read_support_format_zip.c @@ -4089,7 +4089,7 @@ slurp_central_directory(struct archive_read *a, struct archive_entry* entry, * as the actual resource fork doesn't end with '/'. */ size_t tmp_length = filename_length; - if (name[tmp_length - 1] == '/') { + if (tmp_length > 0 && name[tmp_length - 1] == '/') { tmp_length--; r = rsrc_basename(name, tmp_length); } From 5cc96c955adf5252ff3d16ff93a96b6273192d94 Mon Sep 17 00:00:00 2001 From: Tobias Stoeckmann Date: Thu, 25 Apr 2024 09:20:23 +0000 Subject: [PATCH 29/98] warc: Check realloc return value (#2144) Since realloc could fail, check its return value and set a proper error message. --- libarchive/archive_read_support_format_warc.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/libarchive/archive_read_support_format_warc.c b/libarchive/archive_read_support_format_warc.c index c49d44eba5..fcec5bc4cb 100644 --- a/libarchive/archive_read_support_format_warc.c +++ b/libarchive/archive_read_support_format_warc.c @@ -215,6 +215,7 @@ _warc_rdhdr(struct archive_read *a, struct archive_entry *entry) const char *buf; ssize_t nrd; const char *eoh; + char *tmp; /* for the file name, saves some strndup()'ing */ warc_string_t fnam; /* warc record type, not that we really use it a lot */ @@ -321,7 +322,14 @@ _warc_rdhdr(struct archive_read *a, struct archive_entry *entry) * malloc()+free() roundtrip */ if (fnam.len + 1U > w->pool.len) { w->pool.len = ((fnam.len + 64U) / 64U) * 64U; - w->pool.str = realloc(w->pool.str, w->pool.len); + tmp = realloc(w->pool.str, w->pool.len); + if (tmp == NULL) { + archive_set_error( + &a->archive, ENOMEM, + "Out of memory"); + return (ARCHIVE_FATAL); + } + w->pool.str = tmp; } memcpy(w->pool.str, fnam.str, fnam.len); w->pool.str[fnam.len] = '\0'; From f673faefaad28de77e91167e1a70d7c2a32af7fe Mon Sep 17 00:00:00 2001 From: Tobias Stoeckmann Date: Thu, 25 Apr 2024 09:22:45 +0000 Subject: [PATCH 30/98] Fix typos (#2143) Fix typos in comments, documentation, and error messages. --- README.md | 2 +- libarchive/archive.h | 2 +- libarchive/archive_entry_acl.3 | 2 +- libarchive/archive_read_disk.3 | 4 ++-- libarchive/archive_read_support_format_all.c | 2 +- libarchive/archive_read_support_format_ar.c | 4 ++-- libarchive/archive_read_support_format_lha.c | 4 ++-- libarchive/archive_read_support_format_mtree.c | 8 ++++---- libarchive/archive_read_support_format_zip.c | 2 +- libarchive/archive_write_private.h | 2 +- libarchive/archive_write_set_format_gnutar.c | 2 +- libarchive/libarchive_internals.3 | 2 +- libarchive/test/test_read_format_xar_doublelink.xar.uu | 2 +- 13 files changed, 19 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 727ed49856..933de69864 100644 --- a/README.md +++ b/README.md @@ -201,7 +201,7 @@ questions we are asked about libarchive: In case other thread calls the same function in parallel, it might get interrupted by it and cause the executable to use umask=0 for the remaining execution. - This will then lead to implicitely created directories to have 777 + This will then lead to implicitly created directories to have 777 permissions without sticky bit. * In particular, libarchive's modules to read or write a directory diff --git a/libarchive/archive.h b/libarchive/archive.h index 7c526647cb..773649a6ef 100644 --- a/libarchive/archive.h +++ b/libarchive/archive.h @@ -895,7 +895,7 @@ __LA_DECL int archive_write_set_options(struct archive *_a, const char *opts); /* - * Set a encryption passphrase. + * Set an encryption passphrase. */ __LA_DECL int archive_write_set_passphrase(struct archive *_a, const char *p); __LA_DECL int archive_write_set_passphrase_callback(struct archive *, diff --git a/libarchive/archive_entry_acl.3 b/libarchive/archive_entry_acl.3 index 50dd642c20..4d0d8b50ed 100644 --- a/libarchive/archive_entry_acl.3 +++ b/libarchive/archive_entry_acl.3 @@ -383,7 +383,7 @@ Prefix each default ACL entry with the word The mask and other ACLs don not contain a double colon. .El .Pp -The following flags are effecive only on NFSv4 ACL: +The following flags are effective only on NFSv4 ACL: .Bl -tag -offset indent -compact -width ARCHIV .It Dv ARCHIVE_ENTRY_ACL_STYLE_COMPACT Do not output minus characters for unset permissions and flags in NFSv4 ACL diff --git a/libarchive/archive_read_disk.3 b/libarchive/archive_read_disk.3 index 7cde3c2327..990c1514c4 100644 --- a/libarchive/archive_read_disk.3 +++ b/libarchive/archive_read_disk.3 @@ -288,11 +288,11 @@ calls. If matched based on calls to .Tn archive_match_time_excluded , or .Tn archive_match_owner_excluded , -then the callback function specified by the _excluded_func parameter will execute. This function will recieve data provided to the fourth parameter, void *_client_data. +then the callback function specified by the _excluded_func parameter will execute. This function will receive data provided to the fourth parameter, void *_client_data. .It Fn archive_read_disk_set_metadata_filter_callback Allows the caller to set a callback function during calls to .Xr archive_read_header 3 -to filter out metadata for each entry. The callback function recieves the +to filter out metadata for each entry. The callback function receives the .Tn struct archive object, void* custom filter data, and the .Tn struct archive_entry . diff --git a/libarchive/archive_read_support_format_all.c b/libarchive/archive_read_support_format_all.c index 5a4e1ab675..3b53c9ad5f 100644 --- a/libarchive/archive_read_support_format_all.c +++ b/libarchive/archive_read_support_format_all.c @@ -67,7 +67,7 @@ archive_read_support_format_all(struct archive *a) * increase the chance that a high bid from someone else will * make it unnecessary for these to do anything at all. */ - /* These three have potentially large look-ahead. */ + /* These have potentially large look-ahead. */ archive_read_support_format_7zip(a); archive_read_support_format_cab(a); archive_read_support_format_rar(a); diff --git a/libarchive/archive_read_support_format_ar.c b/libarchive/archive_read_support_format_ar.c index ca8effb0b0..6f1be8591f 100644 --- a/libarchive/archive_read_support_format_ar.c +++ b/libarchive/archive_read_support_format_ar.c @@ -270,7 +270,7 @@ _ar_read_header(struct archive_read *a, struct archive_entry *entry, } if (ar->strtab != NULL) { archive_set_error(&a->archive, EINVAL, - "More than one string tables exist"); + "More than one string table exists"); return (ARCHIVE_FATAL); } @@ -515,7 +515,7 @@ archive_read_format_ar_read_data(struct archive_read *a, if (ar->entry_padding) { if (skipped >= 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "Truncated ar archive- failed consuming padding"); + "Truncated ar archive - failed consuming padding"); } return (ARCHIVE_FATAL); } diff --git a/libarchive/archive_read_support_format_lha.c b/libarchive/archive_read_support_format_lha.c index 1c64b2900b..4d6290ac33 100644 --- a/libarchive/archive_read_support_format_lha.c +++ b/libarchive/archive_read_support_format_lha.c @@ -1693,7 +1693,7 @@ archive_read_format_lha_cleanup(struct archive_read *a) * example. * 1. a symbolic-name is 'aaa/bb/cc' * 2. a filename is 'xxx/bbb' - * then a archived pathname is 'xxx/bbb|aaa/bb/cc' + * then an archived pathname is 'xxx/bbb|aaa/bb/cc' */ static int lha_parse_linkname(struct archive_wstring *linkname, @@ -2385,7 +2385,7 @@ lzh_decode_blocks(struct lzh_stream *strm, int last) return (100); } - /* lzh_br_read_ahead() always try to fill the + /* lzh_br_read_ahead() always tries to fill the * cache buffer up. In specific situation we * are close to the end of the data, the cache * buffer will not be full and thus we have to diff --git a/libarchive/archive_read_support_format_mtree.c b/libarchive/archive_read_support_format_mtree.c index 630cff6e39..6971228eef 100644 --- a/libarchive/archive_read_support_format_mtree.c +++ b/libarchive/archive_read_support_format_mtree.c @@ -416,8 +416,8 @@ next_line(struct archive_read *a, } /* - * Compare characters with a mtree keyword. - * Returns the length of a mtree keyword if matched. + * Compare characters with an mtree keyword. + * Returns the length of an mtree keyword if matched. * Returns 0 if not matched. */ static int @@ -515,7 +515,7 @@ bid_keyword(const char *p, ssize_t len) /* * Test whether there is a set of mtree keywords. - * Returns the number of keyword. + * Returns the number of keywords. * Returns -1 if we got incorrect sequence. * This function expects a set of "keyword=value". * When "unset" is specified, expects a set of "keyword". @@ -760,7 +760,7 @@ detect_form(struct archive_read *a, int *is_form_d) multiline = 1; else { /* We've got plenty of correct lines - * to assume that this file is a mtree + * to assume that this file is an mtree * format. */ if (++entry_cnt >= MAX_BID_ENTRY) break; diff --git a/libarchive/archive_read_support_format_zip.c b/libarchive/archive_read_support_format_zip.c index 7552a1a1a2..c9759eaf9a 100644 --- a/libarchive/archive_read_support_format_zip.c +++ b/libarchive/archive_read_support_format_zip.c @@ -1393,7 +1393,7 @@ check_authentication_code(struct archive_read *a, const void *_p) * [CRC32] [compressed low] [compressed high] [uncompressed low] [uncompressed high] [other PK marker] * ``` * Since the 32-bit and 64-bit compressed sizes both match, the - * actualy size must fit in 32 bits, which implies the high-order + * actual size must fit in 32 bits, which implies the high-order * word of the compressed size is zero. So we know the uncompressed * low word is zero, which again implies that if we accept the shorter * format, there will not be a valid PK marker following it. diff --git a/libarchive/archive_write_private.h b/libarchive/archive_write_private.h index abd5a8ddcd..f259ccb165 100644 --- a/libarchive/archive_write_private.h +++ b/libarchive/archive_write_private.h @@ -158,7 +158,7 @@ int __archive_write_program_write(struct archive_write_filter *, struct archive_write_program_data *, const void *, size_t); /* - * Get a encryption passphrase. + * Get an encryption passphrase. */ const char * __archive_write_get_passphrase(struct archive_write *a); #endif diff --git a/libarchive/archive_write_set_format_gnutar.c b/libarchive/archive_write_set_format_gnutar.c index 92b06c5f5f..a88350b874 100644 --- a/libarchive/archive_write_set_format_gnutar.c +++ b/libarchive/archive_write_set_format_gnutar.c @@ -387,7 +387,7 @@ archive_write_gnutar_header(struct archive_write *a, if (r != 0) { if (errno == ENOMEM) { archive_set_error(&a->archive, ENOMEM, - "Can't allocate memory for Pathame"); + "Can't allocate memory for pathname"); ret = ARCHIVE_FATAL; goto exit_write_header; } diff --git a/libarchive/libarchive_internals.3 b/libarchive/libarchive_internals.3 index d4696f6482..2978b48c3e 100644 --- a/libarchive/libarchive_internals.3 +++ b/libarchive/libarchive_internals.3 @@ -124,7 +124,7 @@ to read the entire file into memory at once and return the entire file to libarchive as a single block; other clients may begin asynchronous I/O operations for the next block on each request. -.Ss Decompresssion Layer +.Ss Decompression Layer The decompression layer not only handles decompression, it also buffers data so that the format handlers see a much nicer I/O model. diff --git a/libarchive/test/test_read_format_xar_doublelink.xar.uu b/libarchive/test/test_read_format_xar_doublelink.xar.uu index 5fed96af9d..7aa638a86e 100644 --- a/libarchive/test/test_read_format_xar_doublelink.xar.uu +++ b/libarchive/test/test_read_format_xar_doublelink.xar.uu @@ -1,4 +1,4 @@ -begin 664 test_read_foxmat_xar_doublelink.xar +begin 664 test_read_format_xar_doublelink.xar M>&%R(0`<``$````````!0`````````/7`````7B<[9/!D3&``]'1?GT!-1U;;:=[5[G<=WB0=P>^..@![<%Y94V# M\R>*$9C6=LKT#7Y;OF8O>"%F_""=F"$^VC9\$&\=R#'LR$:E03#*RHR6&2N6 From d9f44c5b44038c735a78cc1b32fda1ea7b88be25 Mon Sep 17 00:00:00 2001 From: Tim Kientzle Date: Thu, 25 Apr 2024 02:39:22 -0700 Subject: [PATCH 31/98] bsdtar: Fix error handling around strtol() usages (#2110) The code here had a couple of bad code patterns that seem to have been copied throughout: * Checking errno after strtol() -- Standard C doesn't seem to actually require this, so we shouldn't rely on it * Casting the result of strtol() directly to `int`. This loses information prematurely. Instead, I've added `l` as a temporary of type `long`, use that to hold the result of `strtol()` until it can be checked. I've also removed the `errno` tests in favor of checking the end pointer value. The limit for --strip-components has been raised to 100 000. --- tar/bsdtar.c | 63 ++++++++++++++++++++++++---------------------------- 1 file changed, 29 insertions(+), 34 deletions(-) diff --git a/tar/bsdtar.c b/tar/bsdtar.c index b070e0faeb..42baab2861 100644 --- a/tar/bsdtar.c +++ b/tar/bsdtar.c @@ -157,6 +157,7 @@ main(int argc, char **argv) char *tptr, *uptr; char possible_help_request; char buff[16]; + long l; /* * Use a pointer for consistency, but stack-allocated storage @@ -301,16 +302,15 @@ main(int argc, char **argv) /* libarchive doesn't need this; just ignore it. */ break; case 'b': /* SUSv2 */ - errno = 0; tptr = NULL; - t = (int)strtol(bsdtar->argument, &tptr, 10); - if (errno || t <= 0 || t > 8192 || + l = strtol(bsdtar->argument, &tptr, 10); + if (l <= 0 || l > 8192L || *(bsdtar->argument) == '\0' || tptr == NULL || *tptr != '\0') { lafe_errc(1, 0, "Invalid or out of range " "(1..8192) argument to -b"); } - bsdtar->bytes_per_block = 512 * t; + bsdtar->bytes_per_block = 512 * (int)l; /* Explicit -b forces last block size. */ bsdtar->bytes_in_last_block = bsdtar->bytes_per_block; break; @@ -369,44 +369,42 @@ main(int argc, char **argv) bsdtar->filename = bsdtar->argument; break; case OPTION_GID: /* cpio */ - errno = 0; tptr = NULL; - t = (int)strtol(bsdtar->argument, &tptr, 10); - if (errno || t < 0 || *(bsdtar->argument) == '\0' || + l = strtol(bsdtar->argument, &tptr, 10); + if (l < 0 || l >= INT_MAX || *(bsdtar->argument) == '\0' || tptr == NULL || *tptr != '\0') { lafe_errc(1, 0, "Invalid argument to --gid"); } - bsdtar->gid = t; + bsdtar->gid = (int)l; break; case OPTION_GNAME: /* cpio */ bsdtar->gname = bsdtar->argument; break; case OPTION_GROUP: /* GNU tar */ - errno = 0; tptr = NULL; uptr = strchr(bsdtar->argument, ':'); - if(uptr != NULL) { - if(uptr[1] == 0) { + if (uptr != NULL) { + if (uptr[1] == '\0') { lafe_errc(1, 0, "Invalid argument to --group (missing id after :)"); } uptr[0] = 0; uptr++; - t = (int)strtol(uptr, &tptr, 10); - if (errno || t < 0 || *uptr == '\0' || + l = strtol(uptr, &tptr, 10); + if (l < 0 || l >= INT_MAX || *uptr == '\0' || tptr == NULL || *tptr != '\0') { lafe_errc(1, 0, "Invalid argument to --group (%s is not a number)", uptr); } else { - bsdtar->gid = t; + bsdtar->gid = (int)l; } bsdtar->gname = bsdtar->argument; } else { - t = (int)strtol(bsdtar->argument, &tptr, 10); - if (errno || t < 0 || *(bsdtar->argument) == '\0' || + l = strtol(bsdtar->argument, &tptr, 10); + if (l < 0 || l >= INT_MAX || *(bsdtar->argument) == '\0' || tptr == NULL || *tptr != '\0') { bsdtar->gname = bsdtar->argument; } else { - bsdtar->gid = t; + bsdtar->gid = (int)l; bsdtar->gname = ""; } } @@ -662,31 +660,30 @@ main(int argc, char **argv) bsdtar->option_options = bsdtar->argument; break; case OPTION_OWNER: /* GNU tar */ - errno = 0; tptr = NULL; uptr = strchr(bsdtar->argument, ':'); - if(uptr != NULL) { - if(uptr[1] == 0) { + if (uptr != NULL) { + if (uptr[1] == 0) { lafe_errc(1, 0, "Invalid argument to --owner (missing id after :)"); } uptr[0] = 0; uptr++; - t = (int)strtol(uptr, &tptr, 10); - if (errno || t < 0 || *uptr == '\0' || + l = strtol(uptr, &tptr, 10); + if (l < 0 || l >= INT_MAX || *uptr == '\0' || tptr == NULL || *tptr != '\0') { lafe_errc(1, 0, "Invalid argument to --owner (%s is not a number)", uptr); } else { - bsdtar->uid = t; + bsdtar->uid = (int)l; } bsdtar->uname = bsdtar->argument; } else { - t = (int)strtol(bsdtar->argument, &tptr, 10); - if (errno || t < 0 || *(bsdtar->argument) == '\0' || + l = strtol(bsdtar->argument, &tptr, 10); + if (l < 0 || l >= INT_MAX || *(bsdtar->argument) == '\0' || tptr == NULL || *tptr != '\0') { bsdtar->uname = bsdtar->argument; } else { - bsdtar->uid = t; + bsdtar->uid = (int)l; bsdtar->uname = ""; } } @@ -748,15 +745,14 @@ main(int argc, char **argv) bsdtar->extract_flags |= ARCHIVE_EXTRACT_OWNER; break; case OPTION_STRIP_COMPONENTS: /* GNU tar 1.15 */ - errno = 0; tptr = NULL; - t = (int)strtol(bsdtar->argument, &tptr, 10); - if (errno || t < 0 || *(bsdtar->argument) == '\0' || + l = strtol(bsdtar->argument, &tptr, 10); + if (l < 0 || l > 100000L || *(bsdtar->argument) == '\0' || tptr == NULL || *tptr != '\0') { lafe_errc(1, 0, "Invalid argument to " "--strip-components"); } - bsdtar->strip_components = t; + bsdtar->strip_components = (int)l; break; case 'T': /* GNU tar */ bsdtar->names_from_file = bsdtar->argument; @@ -776,14 +772,13 @@ main(int argc, char **argv) set_mode(bsdtar, opt); break; case OPTION_UID: /* cpio */ - errno = 0; tptr = NULL; - t = (int)strtol(bsdtar->argument, &tptr, 10); - if (errno || t < 0 || *(bsdtar->argument) == '\0' || + l = strtol(bsdtar->argument, &tptr, 10); + if (l < 0 || l >= INT_MAX || *(bsdtar->argument) == '\0' || tptr == NULL || *tptr != '\0') { lafe_errc(1, 0, "Invalid argument to --uid"); } - bsdtar->uid = t; + bsdtar->uid = (int)l; break; case OPTION_UNAME: /* cpio */ bsdtar->uname = bsdtar->argument; From 313aa1fa10b657de791e3202c168a6c833bc3543 Mon Sep 17 00:00:00 2001 From: Martin Matuska Date: Fri, 26 Apr 2024 11:22:15 +0200 Subject: [PATCH 32/98] Release 3.7.4 --- CMakeLists.txt | 2 +- NEWS | 2 ++ configure.ac | 2 +- libarchive/archive.h | 2 +- 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 822056b910..ec97e4c773 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,7 +25,7 @@ endif() # MinSizeRel : Release Min Size build # None : No build type IF(NOT CMAKE_BUILD_TYPE) - SET(CMAKE_BUILD_TYPE "Debug" CACHE STRING "Build Type" FORCE) + SET(CMAKE_BUILD_TYPE "Release" CACHE STRING "Build Type" FORCE) ENDIF(NOT CMAKE_BUILD_TYPE) # Set a value type to properly display CMAKE_BUILD_TYPE on GUI if the # value type is "UNINITIALIZED". diff --git a/NEWS b/NEWS index f4395fd1c9..ebdbb2a978 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,5 @@ +Apr 26, 2024: libarchive 3.7.4 released + Apr 08, 2024: libarchive 3.7.3 released Sep 12, 2023: libarchive 3.7.2 released diff --git a/configure.ac b/configure.ac index c778c043f0..5668d41cab 100644 --- a/configure.ac +++ b/configure.ac @@ -4,7 +4,7 @@ dnl First, define all of the version numbers up front. dnl In particular, this allows the version macro to be used in AC_INIT dnl These first two version numbers are updated automatically on each release. -m4_define([LIBARCHIVE_VERSION_S],[3.7.4dev]) +m4_define([LIBARCHIVE_VERSION_S],[3.7.4]) m4_define([LIBARCHIVE_VERSION_N],[3007004]) dnl bsdtar and bsdcpio versioning tracks libarchive diff --git a/libarchive/archive.h b/libarchive/archive.h index 773649a6ef..fd4dd20fad 100644 --- a/libarchive/archive.h +++ b/libarchive/archive.h @@ -155,7 +155,7 @@ __LA_DECL int archive_version_number(void); /* * Textual name/version of the library, useful for version displays. */ -#define ARCHIVE_VERSION_ONLY_STRING "3.7.4dev" +#define ARCHIVE_VERSION_ONLY_STRING "3.7.4" #define ARCHIVE_VERSION_STRING "libarchive " ARCHIVE_VERSION_ONLY_STRING __LA_DECL const char * archive_version_string(void); From 9951b9cd25a4363b4b36308e5058c5b8c2c55471 Mon Sep 17 00:00:00 2001 From: Martin Matuska Date: Fri, 26 Apr 2024 12:07:47 +0200 Subject: [PATCH 33/98] Libarchive 3.7.5dev --- CMakeLists.txt | 2 +- build/version | 2 +- configure.ac | 4 ++-- libarchive/archive.h | 4 ++-- libarchive/archive_entry.h | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ec97e4c773..822056b910 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,7 +25,7 @@ endif() # MinSizeRel : Release Min Size build # None : No build type IF(NOT CMAKE_BUILD_TYPE) - SET(CMAKE_BUILD_TYPE "Release" CACHE STRING "Build Type" FORCE) + SET(CMAKE_BUILD_TYPE "Debug" CACHE STRING "Build Type" FORCE) ENDIF(NOT CMAKE_BUILD_TYPE) # Set a value type to properly display CMAKE_BUILD_TYPE on GUI if the # value type is "UNINITIALIZED". diff --git a/build/version b/build/version index b06fe47265..49ac2b56a1 100644 --- a/build/version +++ b/build/version @@ -1 +1 @@ -3007004 +3007005 diff --git a/configure.ac b/configure.ac index 5668d41cab..c4c219f1cc 100644 --- a/configure.ac +++ b/configure.ac @@ -4,8 +4,8 @@ dnl First, define all of the version numbers up front. dnl In particular, this allows the version macro to be used in AC_INIT dnl These first two version numbers are updated automatically on each release. -m4_define([LIBARCHIVE_VERSION_S],[3.7.4]) -m4_define([LIBARCHIVE_VERSION_N],[3007004]) +m4_define([LIBARCHIVE_VERSION_S],[3.7.5dev]) +m4_define([LIBARCHIVE_VERSION_N],[3007005]) dnl bsdtar and bsdcpio versioning tracks libarchive m4_define([BSDTAR_VERSION_S],LIBARCHIVE_VERSION_S()) diff --git a/libarchive/archive.h b/libarchive/archive.h index fd4dd20fad..3379735c69 100644 --- a/libarchive/archive.h +++ b/libarchive/archive.h @@ -34,7 +34,7 @@ * assert that ARCHIVE_VERSION_NUMBER >= 2012108. */ /* Note: Compiler will complain if this does not match archive_entry.h! */ -#define ARCHIVE_VERSION_NUMBER 3007004 +#define ARCHIVE_VERSION_NUMBER 3007005 #include #include /* for wchar_t */ @@ -155,7 +155,7 @@ __LA_DECL int archive_version_number(void); /* * Textual name/version of the library, useful for version displays. */ -#define ARCHIVE_VERSION_ONLY_STRING "3.7.4" +#define ARCHIVE_VERSION_ONLY_STRING "3.7.5dev" #define ARCHIVE_VERSION_STRING "libarchive " ARCHIVE_VERSION_ONLY_STRING __LA_DECL const char * archive_version_string(void); diff --git a/libarchive/archive_entry.h b/libarchive/archive_entry.h index 1c59ded7c9..3b41a0f8e3 100644 --- a/libarchive/archive_entry.h +++ b/libarchive/archive_entry.h @@ -28,7 +28,7 @@ #define ARCHIVE_ENTRY_H_INCLUDED /* Note: Compiler will complain if this does not match archive.h! */ -#define ARCHIVE_VERSION_NUMBER 3007004 +#define ARCHIVE_VERSION_NUMBER 3007005 /* * Note: archive_entry.h is for use outside of libarchive; the From b910cb70d4c1b311c9d85cd536a6c91647c43df7 Mon Sep 17 00:00:00 2001 From: Tobias Stoeckmann Date: Sun, 28 Apr 2024 23:48:05 +0200 Subject: [PATCH 34/98] xar: Fix another infinite loop and expat error handling (#2150) Fixes two issues: - expat code keeps track of error conditions - adding link=original multiple times is prohibited --- libarchive/archive_read_support_format_xar.c | 8 ++++++-- libarchive/test/test_read_format_xar_doublelink.c | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/libarchive/archive_read_support_format_xar.c b/libarchive/archive_read_support_format_xar.c index 2c34326429..cefb36410e 100644 --- a/libarchive/archive_read_support_format_xar.c +++ b/libarchive/archive_read_support_format_xar.c @@ -2055,9 +2055,10 @@ xml_start(struct archive_read *a, const char *name, struct xmlattr_list *list) attr = attr->next) { if (strcmp(attr->name, "link") != 0) continue; - if (xar->file->hdnext != NULL || xar->file->link != 0) { + if (xar->file->hdnext != NULL || xar->file->link != 0 || + xar->file == xar->hdlink_orgs) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "File with multiple link targets"); + "File with multiple link attributes"); return (ARCHIVE_FATAL); } if (strcmp(attr->value, "original") == 0) { @@ -3256,6 +3257,9 @@ expat_start_cb(void *userData, const XML_Char *name, const XML_Char **atts) struct xmlattr_list list; int r; + if (ud->state != ARCHIVE_OK) + return; + r = expat_xmlattr_setup(a, &list, atts); if (r == ARCHIVE_OK) r = xml_start(a, (const char *)name, &list); diff --git a/libarchive/test/test_read_format_xar_doublelink.c b/libarchive/test/test_read_format_xar_doublelink.c index 73ddebd285..78d6626a09 100644 --- a/libarchive/test/test_read_format_xar_doublelink.c +++ b/libarchive/test/test_read_format_xar_doublelink.c @@ -47,7 +47,7 @@ DEFINE_TEST(test_read_format_xar_doublelink) assertA(ARCHIVE_FATAL == archive_read_next_header(a, &ae)); assertEqualString(archive_error_string(a), - "File with multiple link targets"); + "File with multiple link attributes"); assert(archive_errno(a) != 0); assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a)); From a1cb648d52f5b6d3f31184d9b6a7cbca628459b7 Mon Sep 17 00:00:00 2001 From: Wei-Cheng Pan Date: Mon, 29 Apr 2024 06:50:22 +0900 Subject: [PATCH 35/98] fix: OOB in rar delta filter (#2148) Ensure that `src` won't move ahead of `dst`, so `src` will not OOB. Since `dst` won't move in this function, and we are only increasing `src` position, this check should be enough. It should be safe to early return because this function does not allocate resources. --- libarchive/archive_read_support_format_rar.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/libarchive/archive_read_support_format_rar.c b/libarchive/archive_read_support_format_rar.c index 79669a8f40..619ee81e2b 100644 --- a/libarchive/archive_read_support_format_rar.c +++ b/libarchive/archive_read_support_format_rar.c @@ -3612,7 +3612,15 @@ execute_filter_delta(struct rar_filter *filter, struct rar_virtual_machine *vm) { uint8_t lastbyte = 0; for (idx = i; idx < length; idx += numchannels) + { + /* + * The src block should not overlap with the dst block. + * If so it would be better to consider this archive is broken. + */ + if (src >= dst) + return 0; lastbyte = dst[idx] = lastbyte - *src++; + } } filter->filteredblockaddress = length; From 3006bc5d02ad3ae3c4f9274f60c1f9d2d834734b Mon Sep 17 00:00:00 2001 From: Wei-Cheng Pan Date: Mon, 29 Apr 2024 06:53:19 +0900 Subject: [PATCH 36/98] fix: OOB in rar audio filter (#2149) This patch ensures that `src` won't move ahead of `dst`, so `src` will not OOB. Similar situation like in a1cb648. --- libarchive/archive_read_support_format_rar.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/libarchive/archive_read_support_format_rar.c b/libarchive/archive_read_support_format_rar.c index 619ee81e2b..4fc6626cac 100644 --- a/libarchive/archive_read_support_format_rar.c +++ b/libarchive/archive_read_support_format_rar.c @@ -3722,6 +3722,13 @@ execute_filter_audio(struct rar_filter *filter, struct rar_virtual_machine *vm) memset(&state, 0, sizeof(state)); for (j = i; j < length; j += numchannels) { + /* + * The src block should not overlap with the dst block. + * If so it would be better to consider this archive is broken. + */ + if (src >= dst) + return 0; + int8_t delta = (int8_t)*src++; uint8_t predbyte, byte; int prederror; From 0ce1b4c382f96f0591ea0496af49d2f8c8f8edb8 Mon Sep 17 00:00:00 2001 From: Tobias Stoeckmann Date: Mon, 29 Apr 2024 22:00:22 +0200 Subject: [PATCH 37/98] archive_match: check archive_read_support_format_raw() return value (#2153) If call of archive_read_support_format_raw fails, do not override the error return value with the return value of archive_read_support_format_empty(). Instead, return error code as expected. --- libarchive/archive_match.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libarchive/archive_match.c b/libarchive/archive_match.c index fc8a4ce812..b108ff0a73 100644 --- a/libarchive/archive_match.c +++ b/libarchive/archive_match.c @@ -605,7 +605,8 @@ add_pattern_from_file(struct archive_match *a, struct match_list *mlist, return (ARCHIVE_FATAL); } r = archive_read_support_format_raw(ar); - r = archive_read_support_format_empty(ar); + if (r == ARCHIVE_OK) + r = archive_read_support_format_empty(ar); if (r != ARCHIVE_OK) { archive_copy_error(&(a->archive), ar); archive_read_free(ar); From 287e05d539fcb9bb2aab22844c161070199b6698 Mon Sep 17 00:00:00 2001 From: Tobias Stoeckmann Date: Mon, 29 Apr 2024 22:05:44 +0200 Subject: [PATCH 38/98] archive_match: Turn counter into flag (#2154) When working with matches, the code does not care about the actual amount of times when it matched, but just if it matched so far at least once or never. Turning the counter into a boolean flag has the advantage that even insanely huge archives will never lead to integer overflow here. --- libarchive/archive_match.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/libarchive/archive_match.c b/libarchive/archive_match.c index b108ff0a73..3ab8eda360 100644 --- a/libarchive/archive_match.c +++ b/libarchive/archive_match.c @@ -46,7 +46,7 @@ struct match { struct match *next; - int matches; + int matched; struct archive_mstring pattern; }; @@ -725,12 +725,12 @@ path_excluded(struct archive_match *a, int mbs, const void *pathname) matched = NULL; for (match = a->inclusions.first; match != NULL; match = match->next){ - if (match->matches == 0 && + if (!match->matched && (r = match_path_inclusion(a, match, mbs, pathname)) != 0) { if (r < 0) return (r); a->inclusions.unmatched_count--; - match->matches++; + match->matched = 1; matched = match; } } @@ -753,11 +753,10 @@ path_excluded(struct archive_match *a, int mbs, const void *pathname) for (match = a->inclusions.first; match != NULL; match = match->next){ /* We looked at previously-unmatched inclusions already. */ - if (match->matches > 0 && + if (match->matched && (r = match_path_inclusion(a, match, mbs, pathname)) != 0) { if (r < 0) return (r); - match->matches++; return (0); } } @@ -880,7 +879,7 @@ match_list_unmatched_inclusions_next(struct archive_match *a, for (m = list->unmatched_next; m != NULL; m = m->next) { int r; - if (m->matches) + if (m->matched) continue; if (mbs) { const char *p; @@ -1794,7 +1793,7 @@ match_owner_name_mbs(struct archive_match *a, struct match_list *list, < 0 && errno == ENOMEM) return (error_nomem(a)); if (p != NULL && strcmp(p, name) == 0) { - m->matches++; + m->matched = 1; return (1); } } @@ -1815,7 +1814,7 @@ match_owner_name_wcs(struct archive_match *a, struct match_list *list, < 0 && errno == ENOMEM) return (error_nomem(a)); if (p != NULL && wcscmp(p, name) == 0) { - m->matches++; + m->matched = 1; return (1); } } From 93b11caed8b7e23081d3247b182fbc1b86a120f9 Mon Sep 17 00:00:00 2001 From: Tobias Stoeckmann Date: Mon, 29 Apr 2024 22:06:30 +0200 Subject: [PATCH 39/98] lha: Do not allow negative file sizes (#2155) Files sizes cannot be negative, so abort lha processing if archive claims otherwise. --- libarchive/archive_read_support_format_lha.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libarchive/archive_read_support_format_lha.c b/libarchive/archive_read_support_format_lha.c index 4d6290ac33..ae5a1d7d66 100644 --- a/libarchive/archive_read_support_format_lha.c +++ b/libarchive/archive_read_support_format_lha.c @@ -1347,6 +1347,8 @@ lha_read_file_extended_header(struct archive_read *a, struct lha *lha, lha->compsize = archive_le64dec(extdheader); extdheader += sizeof(uint64_t); lha->origsize = archive_le64dec(extdheader); + if (lha->compsize < 0 || lha->origsize < 0) + goto invalid; } break; case EXT_CODEPAGE: From 36047967a2c9c27b749b2fc8f1557096ee964085 Mon Sep 17 00:00:00 2001 From: Tim Kientzle Date: Mon, 29 Apr 2024 23:59:48 -0700 Subject: [PATCH 40/98] archive_string: Clean up strncat_from_utf8_to_utf8 (#2147) Replace some tricky direct manipulation of string internals with simpler and safer high-level string APIs. --- libarchive/archive_string.c | 96 ++++++++++++++++--------------------- 1 file changed, 42 insertions(+), 54 deletions(-) diff --git a/libarchive/archive_string.c b/libarchive/archive_string.c index f39677ad7a..be6c39600d 100644 --- a/libarchive/archive_string.c +++ b/libarchive/archive_string.c @@ -2640,81 +2640,69 @@ unicode_to_utf16le(char *p, size_t remaining, uint32_t uc) } /* - * Copy UTF-8 string in checking surrogate pair. - * If any surrogate pair are found, it would be canonicalized. + * Append new UTF-8 string to existing UTF-8 string. + * Existing string is assumed to already be in proper form; + * the new string will have invalid sequences replaced and + * surrogate pairs canonicalized. */ static int -strncat_from_utf8_to_utf8(struct archive_string *as, const void *_p, +strncat_from_utf8_to_utf8(struct archive_string *as, const void *_src, size_t len, struct archive_string_conv *sc) { - const char *s; - char *p, *endp; - int n, ret = 0; - + int ret = 0; + const char *src = _src; (void)sc; /* UNUSED */ + /* Pre-extend the destination */ if (archive_string_ensure(as, as->length + len + 1) == NULL) return (-1); - s = (const char *)_p; - p = as->s + as->length; - endp = as->s + as->buffer_length -1; - do { + /* Invariant: src points to the first UTF8 byte that hasn't + * been copied to the destination `as`. */ + for (;;) { + int n; uint32_t uc; - const char *ss = s; - size_t w; + const char *e = src; - /* - * Forward byte sequence until a conversion of that is needed. - */ - while ((n = utf8_to_unicode(&uc, s, len)) > 0) { - s += n; + /* Skip UTF-8 sequences until we reach end-of-string or + * a code point that needs conversion. */ + while ((n = utf8_to_unicode(&uc, e, len)) > 0) { + e += n; len -= n; } - if (ss < s) { - if (p + (s - ss) > endp) { - as->length = p - as->s; - if (archive_string_ensure(as, - as->buffer_length + len + 1) == NULL) - return (-1); - p = as->s + as->length; - endp = as->s + as->buffer_length -1; - } - - memcpy(p, ss, s - ss); - p += s - ss; + /* Copy the part that doesn't need conversion */ + if (e > src) { + if (archive_string_append(as, src, e - src) == NULL) + return (-1); + src = e; } - /* - * If n is negative, current byte sequence needs a replacement. - */ - if (n < 0) { + if (n == 0) { + /* We reached end-of-string */ + return (ret); + } else { + /* Next code point needs conversion */ + char t[4]; + size_t w; + + /* Try decoding a surrogate pair */ if (n == -3 && IS_SURROGATE_PAIR_LA(uc)) { - /* Current byte sequence may be CESU-8. */ - n = cesu8_to_unicode(&uc, s, len); + n = cesu8_to_unicode(&uc, src, len); } + /* Not a (valid) surrogate, so use a replacement char */ if (n < 0) { - ret = -1; - n *= -1;/* Use a replaced unicode character. */ - } - - /* Rebuild UTF-8 byte sequence. */ - while ((w = unicode_to_utf8(p, endp - p, uc)) == 0) { - as->length = p - as->s; - if (archive_string_ensure(as, - as->buffer_length + len + 1) == NULL) - return (-1); - p = as->s + as->length; - endp = as->s + as->buffer_length -1; + ret = -1; /* Return -1 if we used any replacement */ + n *= -1; } - p += w; - s += n; + /* Consume converted code point */ + src += n; len -= n; + /* Convert and append new UTF-8 sequence. */ + w = unicode_to_utf8(t, sizeof(t), uc); + if (archive_string_append(as, t, w) == NULL) + return (-1); } - } while (n > 0); - as->length = p - as->s; - as->s[as->length] = '\0'; - return (ret); + } } static int From 83e8b0ea8c3b07e07ac3dee90a8724565f8e53fd Mon Sep 17 00:00:00 2001 From: Martin Matuska Date: Tue, 30 Apr 2024 11:25:26 +0200 Subject: [PATCH 41/98] tests: setenv LANG to en_US.UTF-8 in bsdunzip test_I.c --- unzip/test/test_I.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/unzip/test/test_I.c b/unzip/test/test_I.c index 5d31ce8d16..d189edca1a 100644 --- a/unzip/test/test_I.c +++ b/unzip/test/test_I.c @@ -33,6 +33,7 @@ DEFINE_TEST(test_I) { const char *reffile = "test_I.zip"; + const char *lang; int r; #if HAVE_SETLOCALE @@ -44,6 +45,8 @@ DEFINE_TEST(test_I) skipping("setlocale() not available on this system."); #endif + lang = getenv("LANG"); + setenv("LANG", "en_US.UTF-8", 1); extract_reference_file(reffile); r = systemf("%s -I UTF-8 %s >test.out 2>test.err", testprog, reffile); assertEqualInt(0, r); @@ -51,4 +54,9 @@ DEFINE_TEST(test_I) assertEmptyFile("test.err"); assertTextFileContents("Hello, World!\n", "Γειά σου Κόσμε.txt"); + + if (lang == NULL) + unsetenv("LANG"); + else + setenv("LANG", lang, 1); } From b9f713540cc33a66a44728dd706aea487b989913 Mon Sep 17 00:00:00 2001 From: Tobias Stoeckmann Date: Fri, 3 May 2024 23:41:35 +0200 Subject: [PATCH 42/98] rpm: Calculate huge header sizes correctly (#2158) If an RPM file contains a huge header which is larger than 4 GB then libarchive starts parsing the RPM header as actual archive instead of skipping it. Switched to uint64_t from size_t for proper 32 bit support as well. --- libarchive/archive_read_support_filter_rpm.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/libarchive/archive_read_support_filter_rpm.c b/libarchive/archive_read_support_filter_rpm.c index 87e9f2ec19..44b5ba0a88 100644 --- a/libarchive/archive_read_support_filter_rpm.c +++ b/libarchive/archive_read_support_filter_rpm.c @@ -39,8 +39,8 @@ struct rpm { int64_t total_in; - size_t hpos; - size_t hlen; + uint64_t hpos; + uint64_t hlen; unsigned char header[16]; enum { ST_LEAD, /* Skipping 'Lead' section. */ @@ -161,9 +161,9 @@ rpm_filter_read(struct archive_read_filter *self, const void **buff) struct rpm *rpm; const unsigned char *b; ssize_t avail_in, total; - size_t used, n; - uint32_t section; - uint32_t bytes; + uint64_t used, n; + uint64_t section; + uint64_t bytes; rpm = (struct rpm *)self->data; *buff = NULL; From 80af74ccbf529b31c66b1879ebc570b9b828a2a6 Mon Sep 17 00:00:00 2001 From: Tobias Stoeckmann Date: Sat, 4 May 2024 00:18:34 +0200 Subject: [PATCH 43/98] zip: Improve bid for huge EOCDs (#2159) Cast any of cd_offset or cd_size to int64_t to avoid truncation of result because both variables are of type uint32_t. The calculation happens before comparison with current_offset, so it is not automatically expanded to int64_t during calculation. --- libarchive/archive_read_support_format_zip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libarchive/archive_read_support_format_zip.c b/libarchive/archive_read_support_format_zip.c index c9759eaf9a..ac80a99ad9 100644 --- a/libarchive/archive_read_support_format_zip.c +++ b/libarchive/archive_read_support_format_zip.c @@ -3680,7 +3680,7 @@ read_eocd(struct zip *zip, const char *p, int64_t current_offset) if (archive_le16dec(p + 10) != archive_le16dec(p + 8)) return 0; /* Central directory can't extend beyond start of EOCD record. */ - if (cd_offset + cd_size > current_offset) + if ((int64_t)cd_offset + cd_size > current_offset) return 0; /* Save the central directory location for later use. */ From bad9a4ebb6bee259ee82ba537bc7e72609b40767 Mon Sep 17 00:00:00 2001 From: Tobias Stoeckmann Date: Sat, 4 May 2024 00:19:55 +0200 Subject: [PATCH 44/98] Fix out of boundary access in mktemp functions (#2160) Some of the mktemp-related functions might access memory out of bounds if TMPDIR is empty or other such situations lead to an empty archive_string. --- libarchive/archive_util.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/libarchive/archive_util.c b/libarchive/archive_util.c index 7b918fef04..46d03f926d 100644 --- a/libarchive/archive_util.c +++ b/libarchive/archive_util.c @@ -280,7 +280,8 @@ __archive_mktempx(const char *tmpdir, wchar_t *template) if (archive_wstring_append_from_mbs(&temp_name, tmpdir, strlen(tmpdir)) < 0) goto exit_tmpfile; - if (temp_name.s[temp_name.length-1] != L'/') + if (temp_name.length == 0 || + temp_name.s[temp_name.length-1] != L'/') archive_wstrappend_wchar(&temp_name, L'/'); } @@ -454,7 +455,7 @@ get_tempdir(struct archive_string *temppath) tmp = "/tmp"; #endif archive_strcpy(temppath, tmp); - if (temppath->s[temppath->length-1] != '/') + if (temppath->length == 0 || temppath->s[temppath->length-1] != '/') archive_strappend_char(temppath, '/'); return (ARCHIVE_OK); } @@ -477,7 +478,8 @@ __archive_mktemp(const char *tmpdir) goto exit_tmpfile; } else { archive_strcpy(&temp_name, tmpdir); - if (temp_name.s[temp_name.length-1] != '/') + if (temp_name.length == 0 || + temp_name.s[temp_name.length-1] != '/') archive_strappend_char(&temp_name, '/'); } #ifdef O_TMPFILE @@ -538,7 +540,7 @@ __archive_mktempx(const char *tmpdir, char *template) goto exit_tmpfile; } else archive_strcpy(&temp_name, tmpdir); - if (temp_name.s[temp_name.length-1] == '/') { + if (temp_name.length > 0 && temp_name.s[temp_name.length-1] == '/') { temp_name.s[temp_name.length-1] = '\0'; temp_name.length --; } From 0936dd5c00c8ab53fd3c4917b30a3a4160ee1694 Mon Sep 17 00:00:00 2001 From: Tim Kientzle Date: Fri, 3 May 2024 16:27:42 -0700 Subject: [PATCH 45/98] rpm: Test handling of huge header sizes (#2163) This implements a test for the change in PR #2158 --- Makefile.am | 2 + libarchive/test/CMakeLists.txt | 1 + libarchive/test/test_read_format_huge_rpm.c | 50 ++++ .../test/test_read_format_huge_rpm.rpm.uu | 244 ++++++++++++++++++ 4 files changed, 297 insertions(+) create mode 100644 libarchive/test/test_read_format_huge_rpm.c create mode 100644 libarchive/test/test_read_format_huge_rpm.rpm.uu diff --git a/Makefile.am b/Makefile.am index 47b6fa1fc6..8a53329141 100644 --- a/Makefile.am +++ b/Makefile.am @@ -486,6 +486,7 @@ libarchive_test_SOURCES= \ libarchive/test/test_read_format_gtar_lzma.c \ libarchive/test/test_read_format_gtar_sparse.c \ libarchive/test/test_read_format_gtar_sparse_skip_entry.c \ + libarchive/test/test_read_format_huge_rpm.c \ libarchive/test/test_read_format_iso_Z.c \ libarchive/test/test_read_format_iso_multi_extent.c \ libarchive/test/test_read_format_iso_xorriso.c \ @@ -827,6 +828,7 @@ libarchive_test_EXTRA_DIST=\ libarchive/test/test_read_format_gtar_sparse_1_17_posix10.tar.uu \ libarchive/test/test_read_format_gtar_sparse_1_17_posix10_modified.tar.uu \ libarchive/test/test_read_format_gtar_sparse_skip_entry.tar.Z.uu \ + libarchive/test/test_read_format_huge_rpm.rpm.uu \ libarchive/test/test_read_format_iso.iso.Z.uu \ libarchive/test/test_read_format_iso_2.iso.Z.uu \ libarchive/test/test_read_format_iso_joliet.iso.Z.uu \ diff --git a/libarchive/test/CMakeLists.txt b/libarchive/test/CMakeLists.txt index 7b166c5fba..bef739ad47 100644 --- a/libarchive/test/CMakeLists.txt +++ b/libarchive/test/CMakeLists.txt @@ -130,6 +130,7 @@ IF(ENABLE_TEST) test_read_format_gtar_lzma.c test_read_format_gtar_sparse.c test_read_format_gtar_sparse_skip_entry.c + test_read_format_huge_rpm.c test_read_format_iso_Z.c test_read_format_iso_multi_extent.c test_read_format_iso_xorriso.c diff --git a/libarchive/test/test_read_format_huge_rpm.c b/libarchive/test/test_read_format_huge_rpm.c new file mode 100644 index 0000000000..729c1e188d --- /dev/null +++ b/libarchive/test/test_read_format_huge_rpm.c @@ -0,0 +1,50 @@ +/*- + * Copyright (c) 2003-2024 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include "test.h" + +DEFINE_TEST(test_read_format_huge_rpm) +{ + struct archive_entry *ae; + struct archive *a; + const char *name = "test_read_format_huge_rpm.rpm"; + + assert((a = archive_read_new()) != NULL); + assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); + extract_reference_file(name); + assertEqualIntA(a, ARCHIVE_OK, archive_read_open_filename(a, name, 2)); + + /* This archive should have no entries -- if it has entries, the bid has screwed up */ + assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); + + /* Verify that the format detection worked. */ + assertEqualInt(ARCHIVE_FILTER_RPM, archive_filter_code(a, 0)); + assertEqualString("rpm", archive_filter_name(a, 0)); + assertEqualInt(ARCHIVE_FORMAT_EMPTY, archive_format(a)); + + assertEqualInt(ARCHIVE_OK, archive_read_close(a)); + assertEqualInt(ARCHIVE_OK, archive_read_free(a)); +} + diff --git a/libarchive/test/test_read_format_huge_rpm.rpm.uu b/libarchive/test/test_read_format_huge_rpm.rpm.uu new file mode 100644 index 0000000000..db21118596 --- /dev/null +++ b/libarchive/test/test_read_format_huge_rpm.rpm.uu @@ -0,0 +1,244 @@ +( + # set up ST_LEAD + python -c 'import sys; sys.stdout.buffer.write(b"\xED\xAB\xEE\xDB\x03\x00\x00\x01" + (96 - 8) * b"\x00")' + # set up ST_HEADER with 0x800000 sections + python -c 'import sys; sys.stdout.buffer.write(b"\x8E\xAD\xE8\x01" + 4 * b"\x00" + b"\x80" + 7 * b"\x00")' + # create archive + touch input.txt + bsdtar -cf - input.txt +) > test_read_format_huge_rpm.rpm + +begin 644 test_read_format_huge_rpm.rpm +M[:ONVP,```$````````````````````````````````````````````````` +M```````````````````````````````````````````````````````````` +M````````CJWH`0````"``````````&EN<'5T+G1X=``````````````````` +M```````````````````````````````````````````````````````````` +M```````````````````````````````````````````P,#`V-#0@`#`P,# Date: Sat, 4 May 2024 19:58:25 +0200 Subject: [PATCH 46/98] Fix typos (#2165) Typos found with codespell. --- libarchive/test/test_compat_lzip.c | 2 +- .../test/test_read_format_zip_traditional_encryption_data.c | 2 +- libarchive/test/test_read_format_zip_winzip_aes.c | 2 +- libarchive/test/test_read_format_zip_winzip_aes_large.c | 2 +- libarchive/test/test_write_format_zip.c | 6 +++--- libarchive/test/test_write_format_zip_entry_size_unset.c | 2 +- unzip/test/test_n.c | 2 +- unzip/test/test_not_exist.c | 2 +- unzip/test/test_o.c | 2 +- 9 files changed, 11 insertions(+), 11 deletions(-) diff --git a/libarchive/test/test_compat_lzip.c b/libarchive/test/test_compat_lzip.c index 1420e5bb9a..50920eefb1 100644 --- a/libarchive/test/test_compat_lzip.c +++ b/libarchive/test/test_compat_lzip.c @@ -47,7 +47,7 @@ echo "f3" > $dir/d1/f3 rm -r $dir } # -# Make a lzip file from splitted tar file. +# Make a lzip file from split tar file. # name=test_compat_lzip_1 dir="$name`date +%Y%m%d%H%M%S`.$USER" diff --git a/libarchive/test/test_read_format_zip_traditional_encryption_data.c b/libarchive/test/test_read_format_zip_traditional_encryption_data.c index 8f0cfe6dc8..0cf2d0a157 100644 --- a/libarchive/test/test_read_format_zip_traditional_encryption_data.c +++ b/libarchive/test/test_read_format_zip_traditional_encryption_data.c @@ -41,7 +41,7 @@ DEFINE_TEST(test_read_format_zip_traditional_encryption_data) assertEqualIntA(a, ARCHIVE_OK, archive_write_add_filter_none(a)); if (ARCHIVE_OK != archive_write_set_options(a, "zip:encryption=traditional")) { - skipping("This system does not have cryptographic liberary"); + skipping("This system does not have cryptographic library"); archive_write_free(a); return; } diff --git a/libarchive/test/test_read_format_zip_winzip_aes.c b/libarchive/test/test_read_format_zip_winzip_aes.c index 7c5d5ea61f..9b609b4f29 100644 --- a/libarchive/test/test_read_format_zip_winzip_aes.c +++ b/libarchive/test/test_read_format_zip_winzip_aes.c @@ -38,7 +38,7 @@ test_winzip_aes(const char *refname, int need_libz) assertEqualIntA(a, ARCHIVE_OK, archive_write_add_filter_none(a)); if (ARCHIVE_OK != archive_write_set_options(a, "zip:encryption=aes256")) { - skipping("This system does not have cryptographic liberary"); + skipping("This system does not have cryptographic library"); archive_write_free(a); return; } diff --git a/libarchive/test/test_read_format_zip_winzip_aes_large.c b/libarchive/test/test_read_format_zip_winzip_aes_large.c index 4b6202bde1..4f52bb9111 100644 --- a/libarchive/test/test_read_format_zip_winzip_aes_large.c +++ b/libarchive/test/test_read_format_zip_winzip_aes_large.c @@ -39,7 +39,7 @@ DEFINE_TEST(test_read_format_zip_winzip_aes256_large) assertEqualIntA(a, ARCHIVE_OK, archive_write_add_filter_none(a)); if (ARCHIVE_OK != archive_write_set_options(a, "zip:encryption=aes256")) { - skipping("This system does not have cryptographic liberary"); + skipping("This system does not have cryptographic library"); archive_write_free(a); return; } diff --git a/libarchive/test/test_write_format_zip.c b/libarchive/test/test_write_format_zip.c index 54240eeb7a..583c78c3ac 100644 --- a/libarchive/test/test_write_format_zip.c +++ b/libarchive/test/test_write_format_zip.c @@ -699,7 +699,7 @@ DEFINE_TEST(test_write_format_zip_traditional_pkware_encryption) assertEqualIntA(a, ARCHIVE_OK, archive_write_add_filter_none(a)); if (ARCHIVE_OK != archive_write_set_options(a, "zip:encryption=zipcrypt")) { - skipping("This system does not have cryptographic liberary"); + skipping("This system does not have cryptographic library"); archive_write_free(a); free(buff); return; @@ -779,7 +779,7 @@ DEFINE_TEST(test_write_format_zip_winzip_aes128_encryption) assertEqualIntA(a, ARCHIVE_OK, archive_write_add_filter_none(a)); if (ARCHIVE_OK != archive_write_set_options(a, "zip:encryption=aes128")) { - skipping("This system does not have cryptographic liberary"); + skipping("This system does not have cryptographic library"); archive_write_free(a); free(buff); return; @@ -859,7 +859,7 @@ DEFINE_TEST(test_write_format_zip_winzip_aes256_encryption) assertEqualIntA(a, ARCHIVE_OK, archive_write_add_filter_none(a)); if (ARCHIVE_OK != archive_write_set_options(a, "zip:encryption=aes256")) { - skipping("This system does not have cryptographic liberary"); + skipping("This system does not have cryptographic library"); archive_write_free(a); free(buff); return; diff --git a/libarchive/test/test_write_format_zip_entry_size_unset.c b/libarchive/test/test_write_format_zip_entry_size_unset.c index 10c191f7ce..fe69478160 100644 --- a/libarchive/test/test_write_format_zip_entry_size_unset.c +++ b/libarchive/test/test_write_format_zip_entry_size_unset.c @@ -205,7 +205,7 @@ static void verify_contents(const char *zip_buff, size_t size) assertEqualInt(i4(data_descriptor + 4), crc); /* Check compressed size */ assertEqualInt(i4(data_descriptor + 8), sizeof(file_data1) + sizeof(file_data2)); - /* Chcek uncompresed size */ + /* Check uncompressed size */ assertEqualInt(i4(data_descriptor + 12), sizeof(file_data1) + sizeof(file_data2)); /* Get folder entry in central directory */ diff --git a/unzip/test/test_n.c b/unzip/test/test_n.c index 4e893f04b7..992f25ff89 100644 --- a/unzip/test/test_n.c +++ b/unzip/test/test_n.c @@ -25,7 +25,7 @@ */ #include "test.h" -/* Test n arg - don't overrite existing files */ +/* Test n arg - don't overwrite existing files */ DEFINE_TEST(test_n) { const char *reffile = "test_basic.zip"; diff --git a/unzip/test/test_not_exist.c b/unzip/test/test_not_exist.c index aa660dc646..9cd6cd5c57 100644 --- a/unzip/test/test_not_exist.c +++ b/unzip/test/test_not_exist.c @@ -25,7 +25,7 @@ */ #include "test.h" -/* Test non existant file */ +/* Test non existent file */ DEFINE_TEST(test_not_exist) { int r; diff --git a/unzip/test/test_o.c b/unzip/test/test_o.c index af0c412868..33c42b4056 100644 --- a/unzip/test/test_o.c +++ b/unzip/test/test_o.c @@ -25,7 +25,7 @@ */ #include "test.h" -/* Test o arg - overrite existing files */ +/* Test o arg - overwrite existing files */ DEFINE_TEST(test_o) { const char *reffile = "test_basic.zip"; From 7a6bb5f5ac3fd1f343577ae667d1829fbeacfb74 Mon Sep 17 00:00:00 2001 From: Tim Kientzle Date: Sat, 4 May 2024 12:15:56 -0700 Subject: [PATCH 47/98] =?UTF-8?q?Rename=20the=20test=20to=20match=20the=20?= =?UTF-8?q?filename,=20and=20avoid=20test=20failures=20on=20cas=E2=80=A6?= =?UTF-8?q?=20(#2166)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …e-insensitive filesystems Resolves #2164 --- unzip/test/test_P_encryption.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unzip/test/test_P_encryption.c b/unzip/test/test_P_encryption.c index beabbaa646..4a7472b4e2 100644 --- a/unzip/test/test_P_encryption.c +++ b/unzip/test/test_P_encryption.c @@ -26,7 +26,7 @@ #include "test.h" /* Test P arg - password protected */ -DEFINE_TEST(test_P) +DEFINE_TEST(test_P_encryption) { const char *reffile = "test_encrypted.zip"; int r; From 6818dd167cb190c76e0cfdeb40cd73aa22b96bd8 Mon Sep 17 00:00:00 2001 From: Tobias Stoeckmann Date: Sat, 4 May 2024 21:17:21 +0200 Subject: [PATCH 48/98] lha: Fix integer truncation on 32 bit systems (#2161) The comp size could be around INT_MAX on huge archives, which would lead to eventual integer truncation to size_t in archives with version 1 headers when fixed value 2 is added to comp_size on 32 bit systems. This fix is a no-op on 64 bit systems because size_t and uint64_t are of same size there. --- libarchive/archive_read_support_format_lha.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/libarchive/archive_read_support_format_lha.c b/libarchive/archive_read_support_format_lha.c index ae5a1d7d66..e417baad10 100644 --- a/libarchive/archive_read_support_format_lha.c +++ b/libarchive/archive_read_support_format_lha.c @@ -227,7 +227,7 @@ static int lha_read_file_header_1(struct archive_read *, struct lha *); static int lha_read_file_header_2(struct archive_read *, struct lha *); static int lha_read_file_header_3(struct archive_read *, struct lha *); static int lha_read_file_extended_header(struct archive_read *, - struct lha *, uint16_t *, int, size_t, size_t *); + struct lha *, uint16_t *, int, uint64_t, size_t *); static size_t lha_check_header_format(const void *); static int lha_skip_sfx(struct archive_read *); static time_t lha_dos_time(const unsigned char *); @@ -945,7 +945,7 @@ lha_read_file_header_1(struct archive_read *a, struct lha *lha) /* Read extended headers */ err2 = lha_read_file_extended_header(a, lha, NULL, 2, - (size_t)(lha->compsize + 2), &extdsize); + (uint64_t)(lha->compsize + 2), &extdsize); if (err2 < ARCHIVE_WARN) return (err2); if (err2 < err) @@ -1138,7 +1138,7 @@ lha_read_file_header_3(struct archive_read *a, struct lha *lha) */ static int lha_read_file_extended_header(struct archive_read *a, struct lha *lha, - uint16_t *crc, int sizefield_length, size_t limitsize, size_t *total_size) + uint16_t *crc, int sizefield_length, uint64_t limitsize, size_t *total_size) { const void *h; const unsigned char *extdheader; @@ -1187,8 +1187,7 @@ lha_read_file_extended_header(struct archive_read *a, struct lha *lha, } /* Sanity check to the extended header size. */ - if (((uint64_t)*total_size + extdsize) > - (uint64_t)limitsize || + if (((uint64_t)*total_size + extdsize) > limitsize || extdsize <= (size_t)sizefield_length) goto invalid; From 1e406c9ea204a8bca9067c40edb60e6c8ae168e6 Mon Sep 17 00:00:00 2001 From: Tobias Stoeckmann Date: Mon, 6 May 2024 00:40:57 +0200 Subject: [PATCH 49/98] uu: Stop processing if lines are too long (#2168) Processing excessively long lines could lead to out of boundary writes or denial of service due to O(n^2) runtime complexity. The OOB is properly fixed with first commit. The second commit stops processing of lines which are longer than uu allows due to its specification. --- libarchive/archive_read_support_filter_uu.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/libarchive/archive_read_support_filter_uu.c b/libarchive/archive_read_support_filter_uu.c index 689ceb8f87..7563e861e2 100644 --- a/libarchive/archive_read_support_filter_uu.c +++ b/libarchive/archive_read_support_filter_uu.c @@ -43,11 +43,13 @@ /* Maximum lookahead during bid phase */ #define UUENCODE_BID_MAX_READ 128*1024 /* in bytes */ +#define UUENCODE_MAX_LINE_LENGTH 34*1024 /* in bytes */ + struct uudecode { int64_t total; unsigned char *in_buff; #define IN_BUFF_SIZE (1024) - int in_cnt; + ssize_t in_cnt; size_t in_allocated; unsigned char *out_buff; #define OUT_BUFF_SIZE (64 * 1024) @@ -484,6 +486,12 @@ uudecode_filter_read(struct archive_read_filter *self, const void **buff) goto finish; } if (uudecode->in_cnt) { + if (uudecode->in_cnt > UUENCODE_MAX_LINE_LENGTH) { + archive_set_error(&self->archive->archive, + ARCHIVE_ERRNO_FILE_FORMAT, + "Invalid format data"); + return (ARCHIVE_FATAL); + } /* * If there is remaining data which is saved by * previous calling, use it first. @@ -533,7 +541,7 @@ uudecode_filter_read(struct archive_read_filter *self, const void **buff) return (ARCHIVE_FATAL); if (uudecode->in_buff != b) memmove(uudecode->in_buff, b, len); - uudecode->in_cnt = (int)len; + uudecode->in_cnt = len; if (total == 0) { /* Do not return 0; it means end-of-file. * We should try to read bytes more. */ From 47be31fb1c1296c9f7622d483936e8bfa0ec2fc4 Mon Sep 17 00:00:00 2001 From: Tobias Stoeckmann Date: Mon, 6 May 2024 00:41:25 +0200 Subject: [PATCH 50/98] Fix typos (#2169) Remove duplicated "of" in write-filter comments. --- libarchive/archive_write_add_filter_b64encode.c | 2 +- libarchive/archive_write_add_filter_bzip2.c | 2 +- libarchive/archive_write_add_filter_compress.c | 2 +- libarchive/archive_write_add_filter_xz.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/libarchive/archive_write_add_filter_b64encode.c b/libarchive/archive_write_add_filter_b64encode.c index 084d195402..16b41afa78 100644 --- a/libarchive/archive_write_add_filter_b64encode.c +++ b/libarchive/archive_write_add_filter_b64encode.c @@ -149,7 +149,7 @@ archive_filter_b64encode_open(struct archive_write_filter *f) size_t bs = 65536, bpb; if (f->archive->magic == ARCHIVE_WRITE_MAGIC) { - /* Buffer size should be a multiple number of the of bytes + /* Buffer size should be a multiple number of the bytes * per block for performance. */ bpb = archive_write_get_bytes_per_block(f->archive); if (bpb > bs) diff --git a/libarchive/archive_write_add_filter_bzip2.c b/libarchive/archive_write_add_filter_bzip2.c index 561e11b5d7..bc0ef26071 100644 --- a/libarchive/archive_write_add_filter_bzip2.c +++ b/libarchive/archive_write_add_filter_bzip2.c @@ -168,7 +168,7 @@ archive_compressor_bzip2_open(struct archive_write_filter *f) if (data->compressed == NULL) { size_t bs = 65536, bpb; if (f->archive->magic == ARCHIVE_WRITE_MAGIC) { - /* Buffer size should be a multiple number of the of bytes + /* Buffer size should be a multiple number of the bytes * per block for performance. */ bpb = archive_write_get_bytes_per_block(f->archive); if (bpb > bs) diff --git a/libarchive/archive_write_add_filter_compress.c b/libarchive/archive_write_add_filter_compress.c index 78afebda3e..e547e88728 100644 --- a/libarchive/archive_write_add_filter_compress.c +++ b/libarchive/archive_write_add_filter_compress.c @@ -158,7 +158,7 @@ archive_compressor_compress_open(struct archive_write_filter *f) } if (f->archive->magic == ARCHIVE_WRITE_MAGIC) { - /* Buffer size should be a multiple number of the of bytes + /* Buffer size should be a multiple number of the bytes * per block for performance. */ bpb = archive_write_get_bytes_per_block(f->archive); if (bpb > bs) diff --git a/libarchive/archive_write_add_filter_xz.c b/libarchive/archive_write_add_filter_xz.c index 18da08274d..e09d5e9173 100644 --- a/libarchive/archive_write_add_filter_xz.c +++ b/libarchive/archive_write_add_filter_xz.c @@ -310,7 +310,7 @@ archive_compressor_xz_open(struct archive_write_filter *f) if (data->compressed == NULL) { size_t bs = 65536, bpb; if (f->archive->magic == ARCHIVE_WRITE_MAGIC) { - /* Buffer size should be a multiple number of the of bytes + /* Buffer size should be a multiple number of the bytes * per block for performance. */ bpb = archive_write_get_bytes_per_block(f->archive); if (bpb > bs) From b00e916edadf50fa82f8ddcd83dd4b975b965eb5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 5 May 2024 15:56:25 -0700 Subject: [PATCH 51/98] CI: Bump the all-actions group with 2 updates (#2152) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps the all-actions group with 2 updates: [actions/checkout](https://github.com/actions/checkout) and [github/codeql-action](https://github.com/github/codeql-action). Updates `actions/checkout` from 4.1.3 to 4.1.4
Release notes

Sourced from actions/checkout's releases.

v4.1.4

What's Changed

Full Changelog: https://github.com/actions/checkout/compare/v4.1.3...v4.1.4

Changelog

Sourced from actions/checkout's changelog.

Changelog

v4.1.4

v4.1.3

v4.1.2

v4.1.1

v4.1.0

v4.0.0

v3.6.0

v3.5.3

v3.5.2

v3.5.1

v3.5.0

v3.4.0

... (truncated)

Commits

Updates `github/codeql-action` from 3.25.2 to 3.25.3
Changelog

Sourced from github/codeql-action's changelog.

CodeQL Action Changelog

See the releases page for the relevant changes to the CodeQL CLI and language packs.

Note that the only difference between v2 and v3 of the CodeQL Action is the node version they support, with v3 running on node 20 while we continue to release v2 to support running on node 16. For example 3.22.11 was the first v3 release and is functionally identical to 2.22.11. This approach ensures an easy way to track exactly which features are included in different versions, indicated by the minor and patch version numbers.

[UNRELEASED]

No user facing changes.

3.25.3 - 25 Apr 2024

  • Update default CodeQL bundle version to 2.17.1. #2247
  • Workflows running on macos-latest using CodeQL CLI versions before v2.15.1 will need to either upgrade their CLI version to v2.15.1 or newer, or change the platform to an Intel MacOS runner, such as macos-12. ARM machines with SIP disabled, including the newest macos-latest image, are unsupported for CLI versions before 2.15.1. #2261

3.25.2 - 22 Apr 2024

No user facing changes.

3.25.1 - 17 Apr 2024

  • We are rolling out a feature in April/May 2024 that improves the reliability and performance of analyzing code when analyzing a compiled language with the autobuild build mode. #2235
  • Fix a bug where the init Action would fail if --overwrite was specified in CODEQL_ACTION_EXTRA_OPTIONS. #2245

3.25.0 - 15 Apr 2024

  • The deprecated feature for extracting dependencies for a Python analysis has been removed. #2224

    As a result, the following inputs and environment variables are now ignored:

    • The setup-python-dependencies input to the init Action
    • The CODEQL_ACTION_DISABLE_PYTHON_DEPENDENCY_INSTALLATION environment variable

    We recommend removing any references to these from your workflows. For more information, see the release notes for CodeQL Action v3.23.0 and v2.23.0.

  • Automatically overwrite an existing database if found on the filesystem. #2229

  • Bump the minimum CodeQL bundle version to 2.12.6. #2232

  • A more relevant log message and a diagnostic are now emitted when the file program is not installed on a Linux runner, but is required for Go tracing to succeed. #2234

3.24.10 - 05 Apr 2024

  • Update default CodeQL bundle version to 2.17.0. #2219
  • Add a deprecation warning for customers using CodeQL version 2.12.5 and earlier. These versions of CodeQL were discontinued on 26 March 2024 alongside GitHub Enterprise Server 3.8, and will be unsupported by CodeQL Action versions 3.25.0 and later and versions 2.25.0 and later. #2220
    • If you are using one of these versions, please update to CodeQL CLI version 2.12.6 or later. For instance, if you have specified a custom version of the CLI using the 'tools' input to the 'init' Action, you can remove this input to use the default version.
    • Alternatively, if you want to continue using a version of the CodeQL CLI between 2.11.6 and 2.12.5, you can replace github/codeql-action/*@v3 by github/codeql-action/*@v3.24.10 and github/codeql-action/*@v2 by github/codeql-action/*@v2.24.10 in your code scanning workflow to ensure you continue using this version of the CodeQL Action.

3.24.9 - 22 Mar 2024

  • Update default CodeQL bundle version to 2.16.5. #2203

3.24.8 - 18 Mar 2024

... (truncated)

Commits
  • d39d31e Merge pull request #2262 from github/update-v3.25.3-ac2f82a1f
  • a727825 Move changenote to most recent section
  • 1efa859 Update changelog for v3.25.3
  • ac2f82a Log warning if SIP is disabled and CLI version is < 2.15.1 (#2261)
  • 0ad7791 Merge pull request #2247 from github/update-bundle/codeql-bundle-v2.17.1
  • 79d9ee7 Merge branch 'main' into update-bundle/codeql-bundle-v2.17.1
  • dbf2b17 Merge pull request #2255 from github/mergeback/v3.25.2-to-main-8f596b4a
  • ff6a3c4 Update checked-in dependencies
  • 619dc0c Update changelog and version after v3.25.2
  • 39e1e65 Add changelog note
  • Additional commits viewable in compare view

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore major version` will close this group update PR and stop Dependabot creating any more for the specific dependency's major version (unless you unignore this specific dependency's major version or upgrade to it yourself) - `@dependabot ignore minor version` will close this group update PR and stop Dependabot creating any more for the specific dependency's minor version (unless you unignore this specific dependency's minor version or upgrade to it yourself) - `@dependabot ignore ` will close this group update PR and stop Dependabot creating any more for the specific dependency (unless you unignore this specific dependency or upgrade to it yourself) - `@dependabot unignore ` will remove all of the ignore conditions of the specified dependency - `@dependabot unignore ` will remove the ignore condition of the specified dependency and ignore conditions
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/ci.yml | 8 ++++---- .github/workflows/codeql.yml | 8 ++++---- .github/workflows/scorecard.yml | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index dd49533dc0..da9de336e0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -12,7 +12,7 @@ jobs: matrix: bs: [autotools, cmake] steps: - - uses: actions/checkout@1d96c772d19495a3b5c517cd2bc0cb401ea0529f # v4.1.3 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - name: Install dependencies run: ./build/ci/github_actions/macos.sh prepare - name: Autogen @@ -57,7 +57,7 @@ jobs: bs: [autotools, cmake] crypto: [mbedtls, nettle, openssl] steps: - - uses: actions/checkout@1d96c772d19495a3b5c517cd2bc0cb401ea0529f # v4.1.3 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - name: Update apt cache run: sudo apt-get update - name: Install dependencies @@ -98,7 +98,7 @@ jobs: Ubuntu-distcheck: runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@1d96c772d19495a3b5c517cd2bc0cb401ea0529f # v4.1.3 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - name: Update package definitions run: sudo apt-get update - name: Install dependencies @@ -125,7 +125,7 @@ jobs: matrix: be: [mingw-gcc, msvc] steps: - - uses: actions/checkout@1d96c772d19495a3b5c517cd2bc0cb401ea0529f # v4.1.3 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - name: Install mingw if: ${{ matrix.be=='mingw-gcc' }} run: choco install mingw diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 17e6bf72dd..8a56abc33b 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -26,18 +26,18 @@ jobs: steps: - name: Checkout - uses: actions/checkout@1d96c772d19495a3b5c517cd2bc0cb401ea0529f # v4.1.3 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - name: Initialize CodeQL - uses: github/codeql-action/init@8f596b4ae3cb3c588a5c46780b86dd53fef16c52 # v3.25.2 + uses: github/codeql-action/init@d39d31e687223d841ef683f52467bd88e9b21c14 # v3.25.3 with: languages: ${{ matrix.language }} queries: +security-and-quality - name: Autobuild - uses: github/codeql-action/autobuild@8f596b4ae3cb3c588a5c46780b86dd53fef16c52 # v3.25.2 + uses: github/codeql-action/autobuild@d39d31e687223d841ef683f52467bd88e9b21c14 # v3.25.3 - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@8f596b4ae3cb3c588a5c46780b86dd53fef16c52 # v3.25.2 + uses: github/codeql-action/analyze@d39d31e687223d841ef683f52467bd88e9b21c14 # v3.25.3 with: category: "/language:${{ matrix.language }}" diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml index d05080c54d..a8d6562b62 100644 --- a/.github/workflows/scorecard.yml +++ b/.github/workflows/scorecard.yml @@ -29,7 +29,7 @@ jobs: steps: - name: "Checkout code" - uses: actions/checkout@1d96c772d19495a3b5c517cd2bc0cb401ea0529f # v4.1.3 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 with: persist-credentials: false @@ -60,6 +60,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard. - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@8f596b4ae3cb3c588a5c46780b86dd53fef16c52 # v3.25.2 + uses: github/codeql-action/upload-sarif@d39d31e687223d841ef683f52467bd88e9b21c14 # v3.25.3 with: sarif_file: results.sarif From 6ff1cd1e487ddf545337b88da3f1f5ca69a2f958 Mon Sep 17 00:00:00 2001 From: Tim Kientzle Date: Mon, 6 May 2024 20:46:26 -0700 Subject: [PATCH 52/98] Define INT_MAX via `#include ` (#2170) #2110 added usages of INT_MAX here without adding the necessary header. Resolves #2162 --- tar/bsdtar.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tar/bsdtar.c b/tar/bsdtar.c index 42baab2861..f0e71afd1c 100644 --- a/tar/bsdtar.c +++ b/tar/bsdtar.c @@ -43,6 +43,9 @@ #ifdef HAVE_LANGINFO_H #include #endif +#ifdef HAVE_LIMITS_H +#include +#endif #ifdef HAVE_LOCALE_H #include #endif From dd8f50c3174b4374938b92ef18faf8400214f756 Mon Sep 17 00:00:00 2001 From: Tobias Stoeckmann Date: Fri, 10 May 2024 01:32:09 +0200 Subject: [PATCH 53/98] unzip: Unify EOF handling (#2175) If EOF is encountered while reading the new filename after choosing 'r', avoid out of boundary access and usage of undefined memory content by treating it the same way as if the question itself was not answered. --- unzip/bsdunzip.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/unzip/bsdunzip.c b/unzip/bsdunzip.c index af3fb14c36..cec1810483 100644 --- a/unzip/bsdunzip.c +++ b/unzip/bsdunzip.c @@ -484,13 +484,8 @@ handle_existing_file(char **path) fprintf(stderr, "replace %s? [y]es, [n]o, [A]ll, [N]one, [r]ename: ", *path); - if (fgets(buf, sizeof(buf), stdin) == NULL) { - clearerr(stdin); - printf("NULL\n(EOF or read error, " - "treating as \"[N]one\"...)\n"); - n_opt = 1; - return -1; - } + if (fgets(buf, sizeof(buf), stdin) == NULL) + goto stdin_err; switch (*buf) { case 'A': o_opt = 1; @@ -512,6 +507,8 @@ handle_existing_file(char **path) *path = NULL; alen = 0; len = getline(path, &alen, stdin); + if (len < 1) + goto stdin_err; if ((*path)[len - 1] == '\n') (*path)[len - 1] = '\0'; return 0; @@ -519,6 +516,12 @@ handle_existing_file(char **path) break; } } +stdin_err: + clearerr(stdin); + printf("NULL\n(EOF or read error, " + "treating as \"[N]one\"...)\n"); + n_opt = 1; + return -1; } /* From 8bc1a3e66e3994ef1c70d236d6524c42b46f0f72 Mon Sep 17 00:00:00 2001 From: Tobias Stoeckmann Date: Fri, 10 May 2024 01:49:07 +0200 Subject: [PATCH 54/98] lzop: Prevent integer overflow (#2174) Cast to int64_t to cover all unsigned 32 bit values without running into issues with C standard. --- libarchive/archive_read_support_filter_lzop.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libarchive/archive_read_support_filter_lzop.c b/libarchive/archive_read_support_filter_lzop.c index e971063dc6..0aa85927b4 100644 --- a/libarchive/archive_read_support_filter_lzop.c +++ b/libarchive/archive_read_support_filter_lzop.c @@ -291,7 +291,8 @@ consume_header(struct archive_read_filter *self) if (p == NULL) goto truncated; len = archive_be32dec(p); - __archive_read_filter_consume(self->upstream, len + 4 + 4); + __archive_read_filter_consume(self->upstream, + (int64_t)len + 4 + 4); } state->flags = flags; state->in_stream = 1; From 576a24205050a0ce5f8209f13bc1d94912797883 Mon Sep 17 00:00:00 2001 From: Tobias Stoeckmann Date: Fri, 10 May 2024 01:49:50 +0200 Subject: [PATCH 55/98] shar: Check strdup return value (#2173) The strdup function could fail, so check for NULL return value. --- libarchive/archive_write_set_format_shar.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/libarchive/archive_write_set_format_shar.c b/libarchive/archive_write_set_format_shar.c index 52ea6adc22..da2bc0ca3e 100644 --- a/libarchive/archive_write_set_format_shar.c +++ b/libarchive/archive_write_set_format_shar.c @@ -209,6 +209,10 @@ archive_write_shar_header(struct archive_write *a, struct archive_entry *entry) if (archive_entry_filetype(entry) != AE_IFDIR) { /* Try to create the dir. */ p = strdup(name); + if (p == NULL) { + archive_set_error(&a->archive, ENOMEM, "Out of memory"); + return (ARCHIVE_FATAL); + } pp = strrchr(p, '/'); /* If there is a / character, try to create the dir. */ if (pp != NULL) { @@ -291,6 +295,10 @@ archive_write_shar_header(struct archive_write *a, struct archive_entry *entry) free(shar->last_dir); shar->last_dir = strdup(name); + if (shar->last_dir == NULL) { + archive_set_error(&a->archive, ENOMEM, "Out of memory"); + return (ARCHIVE_FATAL); + } /* Trim a trailing '/'. */ pp = strrchr(shar->last_dir, '/'); if (pp != NULL && pp[1] == '\0') From eac15e252010c1189a5c0f461364dbe2cd2a68b1 Mon Sep 17 00:00:00 2001 From: "Dustin L. Howett" Date: Thu, 9 May 2024 18:59:17 -0500 Subject: [PATCH 56/98] rar4 reader: protect copy_from_lzss_window_to_unp() (#2172) copy_from_lzss_window_to_unp unnecessarily took an `int` parameter where both of its callers were holding a `size_t`. A lzss opcode chain could be constructed that resulted in a negative copy length, which when passed into memcpy would result in a very, very large positive number. Switching copy_from_lzss_window_to_unp to take a `size_t` allows it to properly bounds-check length. In addition, this patch also ensures that `length` is not itself larger than the destination buffer. Security: CVE-2024-20696 --- libarchive/archive_read_support_format_rar.c | 28 +++++++++++++------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/libarchive/archive_read_support_format_rar.c b/libarchive/archive_read_support_format_rar.c index 4fc6626cac..5776df4bd9 100644 --- a/libarchive/archive_read_support_format_rar.c +++ b/libarchive/archive_read_support_format_rar.c @@ -432,7 +432,7 @@ static int make_table_recurse(struct archive_read *, struct huffman_code *, int, struct huffman_table_entry *, int, int); static int expand(struct archive_read *, int64_t *); static int copy_from_lzss_window_to_unp(struct archive_read *, const void **, - int64_t, int); + int64_t, size_t); static const void *rar_read_ahead(struct archive_read *, size_t, ssize_t *); static int parse_filter(struct archive_read *, const uint8_t *, uint16_t, uint8_t); @@ -2060,7 +2060,7 @@ read_data_compressed(struct archive_read *a, const void **buff, size_t *size, bs = rar->unp_buffer_size - rar->unp_offset; else bs = (size_t)rar->bytes_uncopied; - ret = copy_from_lzss_window_to_unp(a, buff, rar->offset, (int)bs); + ret = copy_from_lzss_window_to_unp(a, buff, rar->offset, bs); if (ret != ARCHIVE_OK) return (ret); rar->offset += bs; @@ -2213,7 +2213,7 @@ read_data_compressed(struct archive_read *a, const void **buff, size_t *size, bs = rar->unp_buffer_size - rar->unp_offset; else bs = (size_t)rar->bytes_uncopied; - ret = copy_from_lzss_window_to_unp(a, buff, rar->offset, (int)bs); + ret = copy_from_lzss_window_to_unp(a, buff, rar->offset, bs); if (ret != ARCHIVE_OK) return (ret); rar->offset += bs; @@ -3094,11 +3094,16 @@ copy_from_lzss_window(struct archive_read *a, void *buffer, static int copy_from_lzss_window_to_unp(struct archive_read *a, const void **buffer, - int64_t startpos, int length) + int64_t startpos, size_t length) { int windowoffs, firstpart; struct rar *rar = (struct rar *)(a->format->data); + if (length > rar->unp_buffer_size) + { + goto fatal; + } + if (!rar->unp_buffer) { if ((rar->unp_buffer = malloc(rar->unp_buffer_size)) == NULL) @@ -3110,17 +3115,17 @@ copy_from_lzss_window_to_unp(struct archive_read *a, const void **buffer, } windowoffs = lzss_offset_for_position(&rar->lzss, startpos); - if(windowoffs + length <= lzss_size(&rar->lzss)) { + if(windowoffs + length <= (size_t)lzss_size(&rar->lzss)) { memcpy(&rar->unp_buffer[rar->unp_offset], &rar->lzss.window[windowoffs], length); - } else if (length <= lzss_size(&rar->lzss)) { + } else if (length <= (size_t)lzss_size(&rar->lzss)) { firstpart = lzss_size(&rar->lzss) - windowoffs; if (firstpart < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Bad RAR file data"); return (ARCHIVE_FATAL); } - if (firstpart < length) { + if ((size_t)firstpart < length) { memcpy(&rar->unp_buffer[rar->unp_offset], &rar->lzss.window[windowoffs], firstpart); memcpy(&rar->unp_buffer[rar->unp_offset + firstpart], @@ -3130,9 +3135,7 @@ copy_from_lzss_window_to_unp(struct archive_read *a, const void **buffer, &rar->lzss.window[windowoffs], length); } } else { - archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, - "Bad RAR file data"); - return (ARCHIVE_FATAL); + goto fatal; } rar->unp_offset += length; if (rar->unp_offset >= rar->unp_buffer_size) @@ -3140,6 +3143,11 @@ copy_from_lzss_window_to_unp(struct archive_read *a, const void **buffer, else *buffer = NULL; return (ARCHIVE_OK); + +fatal: + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Bad RAR file data"); + return (ARCHIVE_FATAL); } static const void * From d517c678b8c0e73537db2b14a2b7b7bf1feefb5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Matu=C5=A1ka?= Date: Fri, 10 May 2024 03:00:33 +0200 Subject: [PATCH 57/98] unzip: do not use getenv() and setenv() in test_I.c (#2177) This setenv() call may clobber the memory pointed to by lang. It is also insufficient, since you don't run in a clean environment, so LANG may be overridden by an inherited LC_ALL or LC_CTYPE, or by the user's .profile (remember that system() does not execute the command directly, but passes it to a shell). Reported-By: dag-erling (quoting) --- unzip/test/test_I.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/unzip/test/test_I.c b/unzip/test/test_I.c index d189edca1a..bc70a38591 100644 --- a/unzip/test/test_I.c +++ b/unzip/test/test_I.c @@ -33,7 +33,12 @@ DEFINE_TEST(test_I) { const char *reffile = "test_I.zip"; - const char *lang; +#if !defined(_WIN32) || defined(__CYGWIN__) + const char *envstr = "env LANG=en_US.UTF-8 LC_ALL=en_US.UTF-8 " + "LC_CTYPE=en_US.UTF-8"; +#else + const char *envstr = ""; +#endif int r; #if HAVE_SETLOCALE @@ -45,18 +50,12 @@ DEFINE_TEST(test_I) skipping("setlocale() not available on this system."); #endif - lang = getenv("LANG"); - setenv("LANG", "en_US.UTF-8", 1); extract_reference_file(reffile); - r = systemf("%s -I UTF-8 %s >test.out 2>test.err", testprog, reffile); + r = systemf("%s %s -I UTF-8 %s >test.out 2>test.err", envstr, testprog, + reffile); assertEqualInt(0, r); assertNonEmptyFile("test.out"); assertEmptyFile("test.err"); assertTextFileContents("Hello, World!\n", "Γειά σου Κόσμε.txt"); - - if (lang == NULL) - unsetenv("LANG"); - else - setenv("LANG", lang, 1); } From dee8b59108d4b5d5feff14291d9c7c80b9d297bc Mon Sep 17 00:00:00 2001 From: Tobias Stoeckmann Date: Sat, 11 May 2024 18:53:02 +0200 Subject: [PATCH 58/98] 7zip: Fix Visual Studio compiler warnings (#2182) The cast in ppmd_read function is safe even on 32 bit systems. It is called byte for byte which makes it impossible to actually reach SIZE_MAX on any real world hardware. Fixes Visual Studio warnings: warning C4244: '=': conversion from 'uint64_t' to 'size_t', possible loss of data warning C4244: 'function': conversion from 'int64_t' to 'size_t', possible loss of data --- libarchive/archive_read_support_format_7zip.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/libarchive/archive_read_support_format_7zip.c b/libarchive/archive_read_support_format_7zip.c index 7e465935c9..a59034f698 100644 --- a/libarchive/archive_read_support_format_7zip.c +++ b/libarchive/archive_read_support_format_7zip.c @@ -877,10 +877,9 @@ archive_read_format_7zip_read_data(struct archive_read *a, if (zip->end_of_entry) return (ARCHIVE_EOF); - const uint64_t max_read_size = 16 * 1024 * 1024; // Don't try to read more than 16 MB at a time - size_t bytes_to_read = max_read_size; + size_t bytes_to_read = 16 * 1024 * 1024; // Don't try to read more than 16 MB at a time if ((uint64_t)bytes_to_read > zip->entry_bytes_remaining) { - bytes_to_read = zip->entry_bytes_remaining; + bytes_to_read = (size_t)zip->entry_bytes_remaining; } bytes = read_stream(a, buff, bytes_to_read, 0); if (bytes < 0) @@ -1063,7 +1062,7 @@ ppmd_read(void *p) */ ssize_t bytes_avail = 0; const uint8_t* data = __archive_read_ahead(a, - zip->ppstream.stream_in+1, &bytes_avail); + (size_t)zip->ppstream.stream_in+1, &bytes_avail); if(bytes_avail < zip->ppstream.stream_in+1) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, From cd95ec3b72d642d15bedecff0f47122e27a022b1 Mon Sep 17 00:00:00 2001 From: Tobias Stoeckmann Date: Sat, 11 May 2024 19:06:04 +0200 Subject: [PATCH 59/98] uu: Fix Visual Studio compiler warning (#2180) Switch from int64_t to ssize_t for l and body, which is a no-op on 64 bit systems. On 32 bit systems, this change is okay because these variables interact with other ssize_t variables in this scope. Fixes compiler warning regarding line 584 in which ssize_t and int64_t are mixed in calculations: warning C4244: '=': conversion from 'int64_t' to 'long' --- libarchive/archive_read_support_filter_uu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libarchive/archive_read_support_filter_uu.c b/libarchive/archive_read_support_filter_uu.c index 7563e861e2..8d361a4f8e 100644 --- a/libarchive/archive_read_support_filter_uu.c +++ b/libarchive/archive_read_support_filter_uu.c @@ -506,7 +506,7 @@ uudecode_filter_read(struct archive_read_filter *self, const void **buff) uudecode->in_cnt = 0; } for (;used < avail_in; d += llen, used += llen) { - int64_t l, body; + ssize_t l, body; b = d; len = get_line(b, avail_in - used, &nl); From 34338d6c4b3495ba0fea3baa12256efb367ad1e6 Mon Sep 17 00:00:00 2001 From: Tobias Stoeckmann Date: Sat, 11 May 2024 19:12:03 +0200 Subject: [PATCH 60/98] rar: Fix out ouf boundary access with large files (#2179) If a header has the FHD_LARGE flag set, it is not verified that enough bytes have been read. Check boundaries before accessing the additional bytes. --- libarchive/archive_read_support_format_rar.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/libarchive/archive_read_support_format_rar.c b/libarchive/archive_read_support_format_rar.c index 5776df4bd9..c8725bcae4 100644 --- a/libarchive/archive_read_support_format_rar.c +++ b/libarchive/archive_read_support_format_rar.c @@ -1469,6 +1469,11 @@ read_header(struct archive_read *a, struct archive_entry *entry, if (rar->file_flags & FHD_LARGE) { + if (p + 8 > endp) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Invalid header size"); + return (ARCHIVE_FATAL); + } memcpy(packed_size, file_header.pack_size, 4); memcpy(packed_size + 4, p, 4); /* High pack size */ p += 4; From 3085a3e9d328f1e3379adca822a0a2c722963855 Mon Sep 17 00:00:00 2001 From: Tobias Stoeckmann Date: Sun, 12 May 2024 21:26:19 +0200 Subject: [PATCH 61/98] [Windows] Fix test compilation warnings with Visual Studio (#2178) Fixes all test-related compiler warnings with Visual Studio 2022 on Windows 11. Contains some changes from https://github.com/libarchive/libarchive/pull/2095. CC: @dunhor --------- Co-authored-by: Duncan Horn --- cpio/test/test_option_c.c | 8 +-- libarchive/test/test_archive_match_time.c | 60 +++++++++++------------ libarchive/test/test_read_format_rar5.c | 12 +++-- test_utils/test_main.c | 2 + 4 files changed, 44 insertions(+), 38 deletions(-) diff --git a/cpio/test/test_option_c.c b/cpio/test/test_option_c.c index 0b6bed2fac..de25ed1ab5 100644 --- a/cpio/test/test_option_c.c +++ b/cpio/test/test_option_c.c @@ -119,9 +119,9 @@ DEFINE_TEST(test_option_c) assert(is_octal(e, 76)); /* Entire header is octal digits. */ assertEqualMem(e + 0, "070707", 6); /* Magic */ assert(is_octal(e + 6, 6)); /* dev */ - dev = from_octal(e + 6, 6); + dev = (int)from_octal(e + 6, 6); assert(is_octal(e + 12, 6)); /* ino */ - ino = from_octal(e + 12, 6); + ino = (int)from_octal(e + 12, 6); #if defined(_WIN32) && !defined(__CYGWIN__) /* Group members bits and others bits do not work. */ assertEqualMem(e + 18, "100666", 6); /* Mode */ @@ -129,10 +129,10 @@ DEFINE_TEST(test_option_c) assertEqualMem(e + 18, "100644", 6); /* Mode */ #endif if (uid < 0) - uid = from_octal(e + 24, 6); + uid = (int)from_octal(e + 24, 6); assertEqualInt(from_octal(e + 24, 6), uid); /* uid */ assert(is_octal(e + 30, 6)); /* gid */ - gid = from_octal(e + 30, 6); + gid = (int)from_octal(e + 30, 6); assertEqualMem(e + 36, "000001", 6); /* nlink */ failure("file entries should not have rdev set (dev field was 0%o)", dev); diff --git a/libarchive/test/test_archive_match_time.c b/libarchive/test/test_archive_match_time.c index 25a0623a7e..27ad1da2f1 100644 --- a/libarchive/test/test_archive_match_time.c +++ b/libarchive/test/test_archive_match_time.c @@ -316,15 +316,14 @@ test_newer_mtime_than_file_mbs(void) static void test_newer_ctime_than_file_mbs(void) { +#if defined(_WIN32) && !defined(__CYGWIN__) + skipping("Can't set ctime on Windows"); + return; +#else struct archive *a; struct archive_entry *ae; struct archive *m; -#if defined(_WIN32) && !defined(__CYGWIN__) - skipping("Can't set ctime on Windows"); - return; -#endif - if (!assert((m = archive_match_new()) != NULL)) return; if (!assert((ae = archive_entry_new()) != NULL)) { @@ -373,6 +372,7 @@ test_newer_ctime_than_file_mbs(void) archive_read_free(a); archive_entry_free(ae); archive_match_free(m); +#endif } static void @@ -435,15 +435,14 @@ test_newer_mtime_than_file_wcs(void) static void test_newer_ctime_than_file_wcs(void) { +#if defined(_WIN32) && !defined(__CYGWIN__) + skipping("Can't set ctime on Windows"); + return; +#else struct archive *a; struct archive_entry *ae; struct archive *m; -#if defined(_WIN32) && !defined(__CYGWIN__) - skipping("Can't set ctime on Windows"); - return; -#endif - if (!assert((m = archive_match_new()) != NULL)) return; if (!assert((ae = archive_entry_new()) != NULL)) { @@ -493,6 +492,7 @@ test_newer_ctime_than_file_wcs(void) archive_read_free(a); archive_entry_free(ae); archive_match_free(m); +#endif } static void @@ -787,15 +787,14 @@ test_older_mtime_than_file_mbs(void) static void test_older_ctime_than_file_mbs(void) { +#if defined(_WIN32) && !defined(__CYGWIN__) + skipping("Can't set ctime on Windows"); + return; +#else struct archive *a; struct archive_entry *ae; struct archive *m; -#if defined(_WIN32) && !defined(__CYGWIN__) - skipping("Can't set ctime on Windows"); - return; -#endif - if (!assert((m = archive_match_new()) != NULL)) return; if (!assert((ae = archive_entry_new()) != NULL)) { @@ -845,6 +844,7 @@ test_older_ctime_than_file_mbs(void) archive_read_free(a); archive_entry_free(ae); archive_match_free(m); +#endif } static void @@ -907,15 +907,14 @@ test_older_mtime_than_file_wcs(void) static void test_older_ctime_than_file_wcs(void) { +#if defined(_WIN32) && !defined(__CYGWIN__) + skipping("Can't set ctime on Windows"); + return; +#else struct archive *a; struct archive_entry *ae; struct archive *m; -#if defined(_WIN32) && !defined(__CYGWIN__) - skipping("Can't set ctime on Windows"); - return; -#endif - if (!assert((m = archive_match_new()) != NULL)) return; if (!assert((ae = archive_entry_new()) != NULL)) { @@ -965,6 +964,7 @@ test_older_ctime_than_file_wcs(void) archive_read_free(a); archive_entry_free(ae); archive_match_free(m); +#endif } static void @@ -1088,15 +1088,14 @@ test_mtime_between_files_wcs(void) static void test_ctime_between_files_mbs(void) { +#if defined(_WIN32) && !defined(__CYGWIN__) + skipping("Can't set ctime on Windows"); + return; +#else struct archive *a; struct archive_entry *ae; struct archive *m; -#if defined(_WIN32) && !defined(__CYGWIN__) - skipping("Can't set ctime on Windows"); - return; -#endif - if (!assert((m = archive_match_new()) != NULL)) return; if (!assert((ae = archive_entry_new()) != NULL)) { @@ -1147,20 +1146,20 @@ test_ctime_between_files_mbs(void) archive_read_free(a); archive_entry_free(ae); archive_match_free(m); +#endif } static void test_ctime_between_files_wcs(void) { +#if defined(_WIN32) && !defined(__CYGWIN__) + skipping("Can't set ctime on Windows"); + return; +#else struct archive *a; struct archive_entry *ae; struct archive *m; -#if defined(_WIN32) && !defined(__CYGWIN__) - skipping("Can't set ctime on Windows"); - return; -#endif - if (!assert((m = archive_match_new()) != NULL)) return; if (!assert((ae = archive_entry_new()) != NULL)) { @@ -1211,6 +1210,7 @@ test_ctime_between_files_wcs(void) archive_read_free(a); archive_entry_free(ae); archive_match_free(m); +#endif } static void diff --git a/libarchive/test/test_read_format_rar5.c b/libarchive/test/test_read_format_rar5.c index 705913b04c..f278b0719e 100644 --- a/libarchive/test/test_read_format_rar5.c +++ b/libarchive/test/test_read_format_rar5.c @@ -932,19 +932,22 @@ DEFINE_TEST(test_read_format_rar5_symlink) assertEqualInt(AE_IFLNK, archive_entry_filetype(ae)); assertEqualString("file.txt", archive_entry_symlink(ae)); assertEqualInt(AE_SYMLINK_TYPE_FILE, archive_entry_symlink_type(ae)); - assertA(0 == archive_read_data(a, NULL, archive_entry_size(ae))); + assertEqualInt(0, archive_entry_size(ae)); + assertA(0 == archive_read_data(a, NULL, (size_t)archive_entry_size(ae))); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("dirlink", archive_entry_pathname(ae)); assertEqualInt(AE_IFLNK, archive_entry_filetype(ae)); assertEqualString("dir", archive_entry_symlink(ae)); assertEqualInt(AE_SYMLINK_TYPE_DIRECTORY, archive_entry_symlink_type(ae)); - assertA(0 == archive_read_data(a, NULL, archive_entry_size(ae))); + assertEqualInt(0, archive_entry_size(ae)); + assertA(0 == archive_read_data(a, NULL, (size_t)archive_entry_size(ae))); assertA(0 == archive_read_next_header(a, &ae)); assertEqualString("dir", archive_entry_pathname(ae)); assertEqualInt(AE_IFDIR, archive_entry_filetype(ae)); - assertA(0 == archive_read_data(a, NULL, archive_entry_size(ae))); + assertEqualInt(0, archive_entry_size(ae)); + assertA(0 == archive_read_data(a, NULL, (size_t)archive_entry_size(ae))); assertA(ARCHIVE_EOF == archive_read_next_header(a, &ae)); @@ -969,7 +972,8 @@ DEFINE_TEST(test_read_format_rar5_hardlink) assertEqualString("hardlink.txt", archive_entry_pathname(ae)); assertEqualInt(AE_IFREG, archive_entry_filetype(ae)); assertEqualString("file.txt", archive_entry_hardlink(ae)); - assertA(0 == archive_read_data(a, NULL, archive_entry_size(ae))); + assertEqualInt(0, archive_entry_size(ae)); + assertA(0 == archive_read_data(a, NULL, (size_t)archive_entry_size(ae))); assertA(ARCHIVE_EOF == archive_read_next_header(a, &ae)); diff --git a/test_utils/test_main.c b/test_utils/test_main.c index 6617732a33..496db6d912 100644 --- a/test_utils/test_main.c +++ b/test_utils/test_main.c @@ -121,6 +121,8 @@ #define access _access #undef chdir #define chdir _chdir +#undef chmod +#define chmod _chmod #endif #ifndef fileno #define fileno _fileno From 826c0afe1954147bd239ab0a6d6b0f02ea395dde Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Matu=C5=A1ka?= Date: Mon, 13 May 2024 08:44:28 +0200 Subject: [PATCH 62/98] Resolve TODO: Return uname and gname overrides (#2141) Simplify introducing a struct cpio_owner Co-authored-by: Rose <83477269+AtariDreams@users.noreply.github.com> --- cpio/cmdline.c | 51 ++++++++++++++------ cpio/cpio.c | 46 ++++++++++-------- cpio/cpio.h | 8 +++- cpio/test/test_owner_parse.c | 90 +++++++++++++++++++++++------------- 4 files changed, 130 insertions(+), 65 deletions(-) diff --git a/cpio/cmdline.c b/cpio/cmdline.c index ab25492ede..0f26c51739 100644 --- a/cpio/cmdline.c +++ b/cpio/cmdline.c @@ -308,17 +308,22 @@ cpio_getopt(struct cpio *cpio) * Returns NULL if no error, otherwise returns error string for display. * */ -const char * -owner_parse(const char *spec, int *uid, int *gid) +int +owner_parse(const char *spec, struct cpio_owner *owner, const char **errmsg) { static char errbuff[128]; const char *u, *ue, *g; - *uid = -1; - *gid = -1; + owner->uid = -1; + owner->gid = -1; + + owner->uname = NULL; + owner->gname = NULL; - if (spec[0] == '\0') - return ("Invalid empty user/group spec"); + if (spec[0] == '\0') { + *errmsg = "Invalid empty user/group spec"; + return (-1); + } /* * Split spec into [user][:.][group] @@ -347,23 +352,29 @@ owner_parse(const char *spec, int *uid, int *gid) user = (char *)malloc(ue - u + 1); if (user == NULL) - return ("Couldn't allocate memory"); + goto alloc_error; memcpy(user, u, ue - u); user[ue - u] = '\0'; if ((pwent = getpwnam(user)) != NULL) { - *uid = pwent->pw_uid; + owner->uid = pwent->pw_uid; + owner->uname = strdup(pwent->pw_name); + if (owner->uname == NULL) { + free(user); + goto alloc_error; + } if (*ue != '\0') - *gid = pwent->pw_gid; + owner->gid = pwent->pw_gid; } else { char *end; errno = 0; - *uid = (int)strtoul(user, &end, 10); + owner->uid = (int)strtoul(user, &end, 10); if (errno || *end != '\0') { snprintf(errbuff, sizeof(errbuff), "Couldn't lookup user ``%s''", user); errbuff[sizeof(errbuff) - 1] = '\0'; free(user); - return (errbuff); + *errmsg = errbuff; + return (-1); } } free(user); @@ -372,18 +383,28 @@ owner_parse(const char *spec, int *uid, int *gid) if (*g != '\0') { struct group *grp; if ((grp = getgrnam(g)) != NULL) { - *gid = grp->gr_gid; + owner->gid = grp->gr_gid; + owner->gname = strdup(grp->gr_name); + if (owner->gname == NULL) { + free(owner->uname); + owner->uname = NULL; + goto alloc_error; + } } else { char *end; errno = 0; - *gid = (int)strtoul(g, &end, 10); + owner->gid = (int)strtoul(g, &end, 10); if (errno || *end != '\0') { snprintf(errbuff, sizeof(errbuff), "Couldn't lookup group ``%s''", g); errbuff[sizeof(errbuff) - 1] = '\0'; - return (errbuff); + *errmsg = errbuff; + return (-1); } } } - return (NULL); + return (0); +alloc_error: + *errmsg = "Couldn't allocate memory"; + return (-1); } diff --git a/cpio/cpio.c b/cpio/cpio.c index c9af535f6d..d4d9ac8855 100644 --- a/cpio/cpio.c +++ b/cpio/cpio.c @@ -132,9 +132,9 @@ main(int argc, char *argv[]) static char buff[16384]; struct cpio _cpio; /* Allocated on stack. */ struct cpio *cpio; + struct cpio_owner owner; const char *errmsg; char *tptr; - int uid, gid; int opt, t; cpio = &_cpio; @@ -142,6 +142,7 @@ main(int argc, char *argv[]) cpio->buff = buff; cpio->buff_size = sizeof(buff); + #if defined(HAVE_SIGACTION) && defined(SIGPIPE) { /* Ignore SIGPIPE signals. */ struct sigaction sa; @@ -161,7 +162,9 @@ main(int argc, char *argv[]) #endif cpio->uid_override = -1; + cpio->uname_override = NULL; cpio->gid_override = -1; + cpio->gname_override = NULL; cpio->argv = argv; cpio->argc = argc; cpio->mode = '\0'; @@ -320,21 +323,21 @@ main(int argc, char *argv[]) cpio->quiet = 1; break; case 'R': /* GNU cpio, also --owner */ - /* TODO: owner_parse should return uname/gname - * also; use that to set [ug]name_override. */ - errmsg = owner_parse(cpio->argument, &uid, &gid); - if (errmsg) { + errmsg = NULL; + if (owner_parse(cpio->argument, &owner, &errmsg) != 0) { + if (!errmsg) + errmsg = "Error parsing owner"; lafe_warnc(-1, "%s", errmsg); usage(); } - if (uid != -1) { - cpio->uid_override = uid; - cpio->uname_override = NULL; - } - if (gid != -1) { - cpio->gid_override = gid; - cpio->gname_override = NULL; - } + if (owner.uid != -1) + cpio->uid_override = owner.uid; + if (owner.uname != NULL) + cpio->uname_override = owner.uname; + if (owner.gid != -1) + cpio->gid_override = owner.gid; + if (owner.gname != NULL) + cpio->gname_override = owner.gname; break; case 'r': /* POSIX 1997 */ cpio->option_rename = 1; @@ -439,11 +442,14 @@ main(int argc, char *argv[]) } archive_match_free(cpio->matching); - free_cache(cpio->gname_cache); free_cache(cpio->uname_cache); + free(cpio->uname_override); + free_cache(cpio->gname_cache); + free(cpio->gname_override); archive_read_close(cpio->archive_read_disk); archive_read_free(cpio->archive_read_disk); free(cpio->destdir); + passphrase_free(cpio->ppbuff); return (cpio->return_value); } @@ -728,14 +734,14 @@ file_to_archive(struct cpio *cpio, const char *srcpath) return (r); } - if (cpio->uid_override >= 0) { + if (cpio->uid_override >= 0) archive_entry_set_uid(entry, cpio->uid_override); + if (cpio->gname_override != NULL) archive_entry_set_uname(entry, cpio->uname_override); - } - if (cpio->gid_override >= 0) { + if (cpio->gid_override >= 0) archive_entry_set_gid(entry, cpio->gid_override); + if (cpio->gname_override != NULL) archive_entry_set_gname(entry, cpio->gname_override); - } /* * Generate a destination path for this entry. @@ -1015,8 +1021,12 @@ mode_in(struct cpio *cpio) fprintf(stderr, "."); if (cpio->uid_override >= 0) archive_entry_set_uid(entry, cpio->uid_override); + if (cpio->uname_override != NULL) + archive_entry_set_uname(entry, cpio->uname_override); if (cpio->gid_override >= 0) archive_entry_set_gid(entry, cpio->gid_override); + if (cpio->gname_override != NULL) + archive_entry_set_gname(entry, cpio->gname_override); r = archive_write_header(ext, entry); if (r != ARCHIVE_OK) { fprintf(stderr, "%s: %s\n", diff --git a/cpio/cpio.h b/cpio/cpio.h index 3e97c0900b..3608268f12 100644 --- a/cpio/cpio.h +++ b/cpio/cpio.h @@ -94,8 +94,14 @@ struct cpio { char *ppbuff; }; -const char *owner_parse(const char *, int *, int *); +struct cpio_owner { + int uid; + int gid; + char *uname; + char *gname; +}; +int owner_parse(const char *, struct cpio_owner *, const char **); /* Fake short equivalents for long options that otherwise lack them. */ enum { diff --git a/cpio/test/test_owner_parse.c b/cpio/test/test_owner_parse.c index fc6f18943f..6fa850fa26 100644 --- a/cpio/test/test_owner_parse.c +++ b/cpio/test/test_owner_parse.c @@ -55,6 +55,14 @@ int_in_list(int i, const int *l, size_t n) failure("%d", i); return (0); } + +static void +free_cpio_owner(struct cpio_owner *owner) { + owner->uid = -1; + owner->gid = -1; + free(owner->uname); + free(owner->gname); +} #endif DEFINE_TEST(test_owner_parse) @@ -62,49 +70,58 @@ DEFINE_TEST(test_owner_parse) #if !defined(ROOT) skipping("No uid/gid configuration for this OS"); #else - int uid, gid; + struct cpio_owner owner; + const char *errstr; - assert(NULL == owner_parse(ROOT, &uid, &gid)); - assert(int_in_list(uid, root_uids, + assert(0 == owner_parse(ROOT, &owner, &errstr)); + assert(int_in_list(owner.uid, root_uids, sizeof(root_uids)/sizeof(root_uids[0]))); - assertEqualInt(-1, gid); - + assertEqualInt(-1, owner.gid); + free_cpio_owner(&owner); - assert(NULL == owner_parse(ROOT ":", &uid, &gid)); - assert(int_in_list(uid, root_uids, + assert(0 == owner_parse(ROOT ":", &owner, &errstr)); + assert(int_in_list(owner.uid, root_uids, sizeof(root_uids)/sizeof(root_uids[0]))); - assert(int_in_list(gid, root_gids, + assert(int_in_list(owner.gid, root_gids, sizeof(root_gids)/sizeof(root_gids[0]))); + free_cpio_owner(&owner); - assert(NULL == owner_parse(ROOT ".", &uid, &gid)); - assert(int_in_list(uid, root_uids, + assert(0 == owner_parse(ROOT ".", &owner, &errstr)); + assert(int_in_list(owner.uid, root_uids, sizeof(root_uids)/sizeof(root_uids[0]))); - assert(int_in_list(gid, root_gids, + assert(int_in_list(owner.gid, root_gids, sizeof(root_gids)/sizeof(root_gids[0]))); + free_cpio_owner(&owner); - assert(NULL == owner_parse("111", &uid, &gid)); - assertEqualInt(111, uid); - assertEqualInt(-1, gid); + assert(0 == owner_parse("111", &owner, &errstr)); + assertEqualInt(111, owner.uid); + assertEqualInt(-1, owner.gid); + free_cpio_owner(&owner); - assert(NULL == owner_parse("112:", &uid, &gid)); - assertEqualInt(112, uid); + assert(0 == owner_parse("112:", &owner, &errstr)); + assertEqualInt(112, owner.uid); /* Can't assert gid, since we don't know gid for user #112. */ + free_cpio_owner(&owner); - assert(NULL == owner_parse("113.", &uid, &gid)); - assertEqualInt(113, uid); + assert(0 == owner_parse("113.", &owner, &errstr)); + assertEqualInt(113, owner.uid); /* Can't assert gid, since we don't know gid for user #113. */ + free_cpio_owner(&owner); - assert(NULL == owner_parse(":114", &uid, &gid)); - assertEqualInt(-1, uid); - assertEqualInt(114, gid); + assert(0 == owner_parse(":114", &owner, &errstr)); + assertEqualInt(-1, owner.uid); + assertEqualInt(114, owner.gid); + free_cpio_owner(&owner); - assert(NULL == owner_parse(".115", &uid, &gid)); - assertEqualInt(-1, uid); - assertEqualInt(115, gid); + assert(0 == owner_parse(".115", &owner, &errstr)); + assertEqualInt(-1, owner.uid); + assertEqualInt(115, owner.gid); + free_cpio_owner(&owner); - assert(NULL == owner_parse("116:117", &uid, &gid)); - assertEqualInt(116, uid); - assertEqualInt(117, gid); + assert(0 == owner_parse("116:117", &owner, &errstr)); + assertEqualInt(116, owner.uid); + assertEqualInt(117, owner.gid); + free_cpio_owner(&owner); /* * TODO: Lookup current user/group name, build strings and @@ -112,9 +129,20 @@ DEFINE_TEST(test_owner_parse) * users. */ - assert(NULL != owner_parse(":nonexistentgroup", &uid, &gid)); - assert(NULL != owner_parse(ROOT ":nonexistentgroup", &uid, &gid)); - assert(NULL != - owner_parse("nonexistentuser:nonexistentgroup", &uid, &gid)); + errstr = NULL; + assert(0 != owner_parse(":nonexistentgroup", &owner, &errstr)); + assertEqualString(errstr, "Couldn't lookup group ``nonexistentgroup''"); + free_cpio_owner(&owner); + + errstr = NULL; + assert(0 != owner_parse(ROOT ":nonexistentgroup", &owner, &errstr)); + assertEqualString(errstr, "Couldn't lookup group ``nonexistentgroup''"); + free_cpio_owner(&owner); + + errstr = NULL; + assert(0 != owner_parse("nonexistentuser:nonexistentgroup", &owner, + &errstr)); + assertEqualString(errstr, "Couldn't lookup user ``nonexistentuser''"); + free_cpio_owner(&owner); #endif } From 1d6e5d117aaa38f06e778bab51aaeb423d05042d Mon Sep 17 00:00:00 2001 From: Tobias Stoeckmann Date: Mon, 13 May 2024 08:46:04 +0200 Subject: [PATCH 63/98] rpm: Fix Visual Studio compiler warnings (#2181) Adjust type of variables for their specific use case Add a new inline function to unify casting (and clarify code blocks) Use definition to explain magic number (and reduce casts) Use `hpos` instead of magic number to highlight that additional bytes will be parsed --- libarchive/archive_read_support_filter_rpm.c | 35 ++++++++++++-------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/libarchive/archive_read_support_filter_rpm.c b/libarchive/archive_read_support_filter_rpm.c index 44b5ba0a88..a55bc0cf71 100644 --- a/libarchive/archive_read_support_filter_rpm.c +++ b/libarchive/archive_read_support_filter_rpm.c @@ -53,7 +53,8 @@ struct rpm { } state; int first_header; }; -#define RPM_LEAD_SIZE 96 /* Size of 'Lead' section. */ +#define RPM_LEAD_SIZE 96 /* Size of 'Lead' section. */ +#define RPM_MIN_HEAD_SIZE 16 /* Minimum size of 'Head'. */ static int rpm_bidder_bid(struct archive_read_filter_bidder *, struct archive_read_filter *); @@ -63,6 +64,8 @@ static ssize_t rpm_filter_read(struct archive_read_filter *, const void **); static int rpm_filter_close(struct archive_read_filter *); +static inline size_t rpm_limit_bytes(uint64_t, size_t); + #if ARCHIVE_VERSION_NUMBER < 4000000 /* Deprecated; remove in libarchive 4.0 */ int @@ -155,13 +158,19 @@ rpm_bidder_init(struct archive_read_filter *self) return (ARCHIVE_OK); } +static inline size_t +rpm_limit_bytes(uint64_t bytes, size_t max) +{ + return (bytes > max ? max : (size_t)bytes); +} + static ssize_t rpm_filter_read(struct archive_read_filter *self, const void **buff) { struct rpm *rpm; const unsigned char *b; - ssize_t avail_in, total; - uint64_t used, n; + ssize_t avail_in, total, used; + size_t n; uint64_t section; uint64_t bytes; @@ -197,15 +206,14 @@ rpm_filter_read(struct archive_read_filter *self, const void **buff) } break; case ST_HEADER: - n = 16 - rpm->hpos; - if (n > avail_in - used) - n = avail_in - used; + n = rpm_limit_bytes(RPM_MIN_HEAD_SIZE - rpm->hpos, + avail_in - used); memcpy(rpm->header+rpm->hpos, b, n); b += n; used += n; rpm->hpos += n; - if (rpm->hpos == 16) { + if (rpm->hpos == RPM_MIN_HEAD_SIZE) { if (rpm->header[0] != 0x8e || rpm->header[1] != 0xad || rpm->header[2] != 0xe8 || @@ -219,21 +227,20 @@ rpm_filter_read(struct archive_read_filter *self, const void **buff) } rpm->state = ST_ARCHIVE; *buff = rpm->header; - total = rpm->hpos; + total = RPM_MIN_HEAD_SIZE; break; } /* Calculate 'Header' length. */ section = archive_be32dec(rpm->header+8); bytes = archive_be32dec(rpm->header+12); - rpm->hlen = 16 + section * 16 + bytes; + rpm->hlen = rpm->hpos + section * 16 + bytes; rpm->state = ST_HEADER_DATA; rpm->first_header = 0; } break; case ST_HEADER_DATA: - n = rpm->hlen - rpm->hpos; - if (n > avail_in - used) - n = avail_in - used; + n = rpm_limit_bytes(rpm->hlen - rpm->hpos, + avail_in - used); b += n; used += n; rpm->hpos += n; @@ -241,7 +248,7 @@ rpm_filter_read(struct archive_read_filter *self, const void **buff) rpm->state = ST_PADDING; break; case ST_PADDING: - while (used < (size_t)avail_in) { + while (used < avail_in) { if (*b != 0) { /* Read next header. */ rpm->state = ST_HEADER; @@ -259,7 +266,7 @@ rpm_filter_read(struct archive_read_filter *self, const void **buff) used = avail_in; break; } - if (used == (size_t)avail_in) { + if (used == avail_in) { rpm->total_in += used; __archive_read_filter_consume(self->upstream, used); b = NULL; From 5e8faa9b5015eecd24165faf6440b1ff68ab1302 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 14 May 2024 09:42:29 +0200 Subject: [PATCH 64/98] CI: Bump the all-actions group with 3 updates (#2184) Updates `actions/checkout` from 4.1.4 to 4.1.5 Updates `github/codeql-action` from 3.25.3 to 3.25.5 Updates `ossf/scorecard-action` from 2.3.1 to 2.3.3 Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/ci.yml | 8 ++++---- .github/workflows/codeql.yml | 8 ++++---- .github/workflows/scorecard.yml | 6 +++--- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index da9de336e0..4dd4ffb682 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -12,7 +12,7 @@ jobs: matrix: bs: [autotools, cmake] steps: - - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 + - uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b # v4.1.5 - name: Install dependencies run: ./build/ci/github_actions/macos.sh prepare - name: Autogen @@ -57,7 +57,7 @@ jobs: bs: [autotools, cmake] crypto: [mbedtls, nettle, openssl] steps: - - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 + - uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b # v4.1.5 - name: Update apt cache run: sudo apt-get update - name: Install dependencies @@ -98,7 +98,7 @@ jobs: Ubuntu-distcheck: runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 + - uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b # v4.1.5 - name: Update package definitions run: sudo apt-get update - name: Install dependencies @@ -125,7 +125,7 @@ jobs: matrix: be: [mingw-gcc, msvc] steps: - - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 + - uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b # v4.1.5 - name: Install mingw if: ${{ matrix.be=='mingw-gcc' }} run: choco install mingw diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 8a56abc33b..d2c527da73 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -26,18 +26,18 @@ jobs: steps: - name: Checkout - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 + uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b # v4.1.5 - name: Initialize CodeQL - uses: github/codeql-action/init@d39d31e687223d841ef683f52467bd88e9b21c14 # v3.25.3 + uses: github/codeql-action/init@b7cec7526559c32f1616476ff32d17ba4c59b2d6 # v3.25.5 with: languages: ${{ matrix.language }} queries: +security-and-quality - name: Autobuild - uses: github/codeql-action/autobuild@d39d31e687223d841ef683f52467bd88e9b21c14 # v3.25.3 + uses: github/codeql-action/autobuild@b7cec7526559c32f1616476ff32d17ba4c59b2d6 # v3.25.5 - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@d39d31e687223d841ef683f52467bd88e9b21c14 # v3.25.3 + uses: github/codeql-action/analyze@b7cec7526559c32f1616476ff32d17ba4c59b2d6 # v3.25.5 with: category: "/language:${{ matrix.language }}" diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml index a8d6562b62..7d49d37c07 100644 --- a/.github/workflows/scorecard.yml +++ b/.github/workflows/scorecard.yml @@ -29,12 +29,12 @@ jobs: steps: - name: "Checkout code" - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 + uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b # v4.1.5 with: persist-credentials: false - name: "Run analysis" - uses: ossf/scorecard-action@0864cf19026789058feabb7e87baa5f140aac736 # v2.3.1 + uses: ossf/scorecard-action@dc50aa9510b46c811795eb24b2f1ba02a914e534 # v2.3.3 with: results_file: results.sarif results_format: sarif @@ -60,6 +60,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard. - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@d39d31e687223d841ef683f52467bd88e9b21c14 # v3.25.3 + uses: github/codeql-action/upload-sarif@b7cec7526559c32f1616476ff32d17ba4c59b2d6 # v3.25.5 with: sarif_file: results.sarif From 69c171a5ab14f6a55077e4d32ae047952d2b5112 Mon Sep 17 00:00:00 2001 From: Tobias Stoeckmann Date: Wed, 15 May 2024 04:55:51 +0200 Subject: [PATCH 65/98] Fix Visual Studio compiler warnings (64 bit) (#2189) Some warnings are specific to 64 bit systems. Compiled with Visual Studio 2022 on Windows 11 x64. Co-authored-by: Duncan Horn --- libarchive/test/test_read_format_rar5.c | 2 +- test_utils/test_main.c | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/libarchive/test/test_read_format_rar5.c b/libarchive/test/test_read_format_rar5.c index f278b0719e..e51ed53d0c 100644 --- a/libarchive/test/test_read_format_rar5.c +++ b/libarchive/test/test_read_format_rar5.c @@ -843,7 +843,7 @@ DEFINE_TEST(test_read_format_rar5_block_by_block) struct archive_entry *ae; struct archive *a; uint8_t buf[173]; - int bytes_read; + ssize_t bytes_read; uint32_t computed_crc = 0; extract_reference_file("test_read_format_rar5_compressed.rar"); diff --git a/test_utils/test_main.c b/test_utils/test_main.c index 496db6d912..ae5e74225c 100644 --- a/test_utils/test_main.c +++ b/test_utils/test_main.c @@ -219,7 +219,8 @@ my_CreateSymbolicLinkA(const char *linkname, const char *target, static BOOLEAN (WINAPI *f)(LPCSTR, LPCSTR, DWORD); DWORD attrs; static int set; - int ret, tmpflags, llen, tlen; + int ret, tmpflags; + size_t llen, tlen; int flags = 0; char *src, *tgt, *p; if (!set) { @@ -3879,9 +3880,9 @@ main(int argc, char **argv) static const int limit = sizeof(tests) / sizeof(tests[0]); int test_set[sizeof(tests) / sizeof(tests[0])]; int i = 0, j = 0, tests_run = 0, tests_failed = 0, option; - int testprogdir_len; + size_t testprogdir_len; #ifdef PROGRAM - int tmp2_len; + size_t tmp2_len; #endif time_t now; struct tm *tmptr; @@ -4085,7 +4086,7 @@ main(int argc, char **argv) { char *testprg; - int testprg_len; + size_t testprg_len; #if defined(_WIN32) && !defined(__CYGWIN__) /* Command.com sometimes rejects '/' separators. */ testprg = strdup(testprogfile); From aa02684709be45abe0fd3b130a14f22722737436 Mon Sep 17 00:00:00 2001 From: Tobias Stoeckmann Date: Wed, 15 May 2024 04:57:25 +0200 Subject: [PATCH 66/98] xar: Fix Visual Studio compiler warnings (#2190) Cast to proper types as used in these structs and function prototype. --- libarchive/archive_read_support_format_xar.c | 10 ++++---- libarchive/archive_write_set_format_xar.c | 26 ++++++++++---------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/libarchive/archive_read_support_format_xar.c b/libarchive/archive_read_support_format_xar.c index cefb36410e..822ccad71f 100644 --- a/libarchive/archive_read_support_format_xar.c +++ b/libarchive/archive_read_support_format_xar.c @@ -1616,9 +1616,9 @@ decompress(struct archive_read *a, const void **buff, size_t *outbytes, switch (xar->rd_encoding) { case GZIP: xar->stream.next_in = (Bytef *)(uintptr_t)b; - xar->stream.avail_in = avail_in; + xar->stream.avail_in = (uInt)avail_in; xar->stream.next_out = (unsigned char *)outbuff; - xar->stream.avail_out = avail_out; + xar->stream.avail_out = (uInt)avail_out; r = inflate(&(xar->stream), 0); switch (r) { case Z_OK: /* Decompressor made some progress.*/ @@ -1635,9 +1635,9 @@ decompress(struct archive_read *a, const void **buff, size_t *outbytes, #if defined(HAVE_BZLIB_H) && defined(BZ_CONFIG_ERROR) case BZIP2: xar->bzstream.next_in = (char *)(uintptr_t)b; - xar->bzstream.avail_in = avail_in; + xar->bzstream.avail_in = (unsigned int)avail_in; xar->bzstream.next_out = (char *)outbuff; - xar->bzstream.avail_out = avail_out; + xar->bzstream.avail_out = (unsigned int)avail_out; r = BZ2_bzDecompress(&(xar->bzstream)); switch (r) { case BZ_STREAM_END: /* Found end of stream. */ @@ -3323,7 +3323,7 @@ expat_read_toc(struct archive_read *a) xar->toc_total += outbytes; PRINT_TOC(d, outbytes); - xr = XML_Parse(parser, d, outbytes, xar->toc_remaining == 0); + xr = XML_Parse(parser, d, (int)outbytes, xar->toc_remaining == 0); __archive_read_consume(a, used); if (xr == XML_STATUS_ERROR) { XML_ParserFree(parser); diff --git a/libarchive/archive_write_set_format_xar.c b/libarchive/archive_write_set_format_xar.c index 2cf655da18..96ef85c694 100644 --- a/libarchive/archive_write_set_format_xar.c +++ b/libarchive/archive_write_set_format_xar.c @@ -796,7 +796,7 @@ xar_finish_entry(struct archive_write *a) if (w > 0) xar->bytes_remaining -= w; else - return (w); + return ((int)w); } file = xar->cur_file; checksum_final(&(xar->e_sumwrk), &(file->data.e_sum)); @@ -1163,7 +1163,7 @@ make_file_entry(struct archive_write *a, xmlTextWriterPtr writer, /* * Make a file name entry, "". */ - l = ll = archive_strlen(&(file->basename)); + l = ll = (int)archive_strlen(&(file->basename)); tmp = malloc(l); if (tmp == NULL) { archive_set_error(&a->archive, ENOMEM, @@ -1189,7 +1189,7 @@ make_file_entry(struct archive_write *a, xmlTextWriterPtr writer, return (ARCHIVE_FATAL); } r = xmlTextWriterWriteBase64(writer, file->basename.s, - 0, archive_strlen(&(file->basename))); + 0, (int)archive_strlen(&(file->basename))); if (r < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, @@ -2231,10 +2231,10 @@ get_path_component(char *name, int n, const char *fn) p = strchr(fn, '/'); if (p == NULL) { - if ((l = strlen(fn)) == 0) + if ((l = (int)strlen(fn)) == 0) return (0); } else - l = p - fn; + l = (int)(p - fn); if (l > n -1) return (-1); memcpy(name, fn, l); @@ -2651,10 +2651,10 @@ compression_init_encoder_gzip(struct archive *a, * of ugly hackery to convert a const * pointer to * a non-const pointer. */ strm->next_in = (Bytef *)(uintptr_t)(const void *)lastrm->next_in; - strm->avail_in = lastrm->avail_in; + strm->avail_in = (uInt)lastrm->avail_in; strm->total_in = (uLong)lastrm->total_in; strm->next_out = lastrm->next_out; - strm->avail_out = lastrm->avail_out; + strm->avail_out = (uInt)lastrm->avail_out; strm->total_out = (uLong)lastrm->total_out; if (deflateInit2(strm, level, Z_DEFLATED, (withheader)?15:-15, @@ -2684,10 +2684,10 @@ compression_code_gzip(struct archive *a, * of ugly hackery to convert a const * pointer to * a non-const pointer. */ strm->next_in = (Bytef *)(uintptr_t)(const void *)lastrm->next_in; - strm->avail_in = lastrm->avail_in; + strm->avail_in = (uInt)lastrm->avail_in; strm->total_in = (uLong)lastrm->total_in; strm->next_out = lastrm->next_out; - strm->avail_out = lastrm->avail_out; + strm->avail_out = (uInt)lastrm->avail_out; strm->total_out = (uLong)lastrm->total_out; r = deflate(strm, (action == ARCHIVE_Z_FINISH)? Z_FINISH: Z_NO_FLUSH); @@ -2748,11 +2748,11 @@ compression_init_encoder_bzip2(struct archive *a, * of ugly hackery to convert a const * pointer to * a non-const pointer. */ strm->next_in = (char *)(uintptr_t)(const void *)lastrm->next_in; - strm->avail_in = lastrm->avail_in; + strm->avail_in = (unsigned int)lastrm->avail_in; strm->total_in_lo32 = (uint32_t)(lastrm->total_in & 0xffffffff); strm->total_in_hi32 = (uint32_t)(lastrm->total_in >> 32); strm->next_out = (char *)lastrm->next_out; - strm->avail_out = lastrm->avail_out; + strm->avail_out = (unsigned int)lastrm->avail_out; strm->total_out_lo32 = (uint32_t)(lastrm->total_out & 0xffffffff); strm->total_out_hi32 = (uint32_t)(lastrm->total_out >> 32); if (BZ2_bzCompressInit(strm, level, 0, 30) != BZ_OK) { @@ -2781,11 +2781,11 @@ compression_code_bzip2(struct archive *a, * of ugly hackery to convert a const * pointer to * a non-const pointer. */ strm->next_in = (char *)(uintptr_t)(const void *)lastrm->next_in; - strm->avail_in = lastrm->avail_in; + strm->avail_in = (unsigned int)lastrm->avail_in; strm->total_in_lo32 = (uint32_t)(lastrm->total_in & 0xffffffff); strm->total_in_hi32 = (uint32_t)(lastrm->total_in >> 32); strm->next_out = (char *)lastrm->next_out; - strm->avail_out = lastrm->avail_out; + strm->avail_out = (unsigned int)lastrm->avail_out; strm->total_out_lo32 = (uint32_t)(lastrm->total_out & 0xffffffff); strm->total_out_hi32 = (uint32_t)(lastrm->total_out >> 32); r = BZ2_bzCompress(strm, From d33f44ad5ce754d61e4e8e193edc52e50bf2896c Mon Sep 17 00:00:00 2001 From: Tobias Stoeckmann Date: Wed, 15 May 2024 04:59:03 +0200 Subject: [PATCH 67/98] zip: Fix Visual Studio compiler warnings (#2186) Use casts where appropriate (values cannot exceed data type limits). Fixes following warnings: warning C4244: '=': conversion from 'int64_t' to 'long', possible loss of data warning C4244: '=': conversion from 'uint64_t' to 'size_t', possible loss of data --- libarchive/archive_read_support_format_zip.c | 22 ++++++++++---------- libarchive/archive_write_set_format_zip.c | 2 +- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/libarchive/archive_read_support_format_zip.c b/libarchive/archive_read_support_format_zip.c index ac80a99ad9..689346882e 100644 --- a/libarchive/archive_read_support_format_zip.c +++ b/libarchive/archive_read_support_format_zip.c @@ -1924,7 +1924,7 @@ zip_read_data_zipx_xz(struct archive_read *a, const void **buff, return (ARCHIVE_FATAL); } - in_bytes = zipmin(zip->entry_bytes_remaining, bytes_avail); + in_bytes = (ssize_t)zipmin(zip->entry_bytes_remaining, bytes_avail); zip->zipx_lzma_stream.next_in = compressed_buf; zip->zipx_lzma_stream.avail_in = in_bytes; zip->zipx_lzma_stream.total_in = 0; @@ -1966,14 +1966,14 @@ zip_read_data_zipx_xz(struct archive_read *a, const void **buff, break; } - to_consume = zip->zipx_lzma_stream.total_in; + to_consume = (ssize_t)zip->zipx_lzma_stream.total_in; __archive_read_consume(a, to_consume); zip->entry_bytes_remaining -= to_consume; zip->entry_compressed_bytes_read += to_consume; zip->entry_uncompressed_bytes_read += zip->zipx_lzma_stream.total_out; - *size = zip->zipx_lzma_stream.total_out; + *size = (size_t)zip->zipx_lzma_stream.total_out; *buff = zip->uncompressed_buffer; return (ARCHIVE_OK); @@ -2014,7 +2014,7 @@ zip_read_data_zipx_lzma_alone(struct archive_read *a, const void **buff, } /* Set decompressor parameters. */ - in_bytes = zipmin(zip->entry_bytes_remaining, bytes_avail); + in_bytes = (ssize_t)zipmin(zip->entry_bytes_remaining, bytes_avail); zip->zipx_lzma_stream.next_in = compressed_buf; zip->zipx_lzma_stream.avail_in = in_bytes; @@ -2024,7 +2024,7 @@ zip_read_data_zipx_lzma_alone(struct archive_read *a, const void **buff, /* These lzma_alone streams lack end of stream marker, so let's * make sure the unpacker won't try to unpack more than it's * supposed to. */ - zipmin((int64_t) zip->uncompressed_buffer_size, + (size_t)zipmin((int64_t) zip->uncompressed_buffer_size, zip->entry->uncompressed_size - zip->entry_uncompressed_bytes_read); zip->zipx_lzma_stream.total_out = 0; @@ -2061,7 +2061,7 @@ zip_read_data_zipx_lzma_alone(struct archive_read *a, const void **buff, return (ARCHIVE_FATAL); } - to_consume = zip->zipx_lzma_stream.total_in; + to_consume = (ssize_t)zip->zipx_lzma_stream.total_in; /* Update pointers. */ __archive_read_consume(a, to_consume); @@ -2082,7 +2082,7 @@ zip_read_data_zipx_lzma_alone(struct archive_read *a, const void **buff, } /* Return values. */ - *size = zip->zipx_lzma_stream.total_out; + *size = (size_t)zip->zipx_lzma_stream.total_out; *buff = zip->uncompressed_buffer; /* If we're here, then we're good! */ @@ -2331,7 +2331,7 @@ zip_read_data_zipx_bzip2(struct archive_read *a, const void **buff, return (ARCHIVE_FATAL); } - in_bytes = zipmin(zip->entry_bytes_remaining, bytes_avail); + in_bytes = (ssize_t)zipmin(zip->entry_bytes_remaining, bytes_avail); if(in_bytes < 1) { /* libbz2 doesn't complain when caller feeds avail_in == 0. * It will actually return success in this case, which is @@ -2394,7 +2394,7 @@ zip_read_data_zipx_bzip2(struct archive_read *a, const void **buff, zip->entry_uncompressed_bytes_read += total_out; /* Give libarchive its due. */ - *size = total_out; + *size = (size_t)total_out; *buff = zip->uncompressed_buffer; return ARCHIVE_OK; @@ -2478,7 +2478,7 @@ zip_read_data_zipx_zstd(struct archive_read *a, const void **buff, return (ARCHIVE_FATAL); } - in_bytes = zipmin(zip->entry_bytes_remaining, bytes_avail); + in_bytes = (ssize_t)zipmin(zip->entry_bytes_remaining, bytes_avail); if(in_bytes < 1) { /* zstd doesn't complain when caller feeds avail_in == 0. * It will actually return success in this case, which is @@ -2524,7 +2524,7 @@ zip_read_data_zipx_zstd(struct archive_read *a, const void **buff, zip->entry_uncompressed_bytes_read += total_out; /* Give libarchive its due. */ - *size = total_out; + *size = (size_t)total_out; *buff = zip->uncompressed_buffer; return ARCHIVE_OK; diff --git a/libarchive/archive_write_set_format_zip.c b/libarchive/archive_write_set_format_zip.c index e37e7b5edd..e01cde6871 100644 --- a/libarchive/archive_write_set_format_zip.c +++ b/libarchive/archive_write_set_format_zip.c @@ -906,7 +906,7 @@ archive_write_zip_header(struct archive_write *a, struct archive_entry *entry) archive_le32enc(e, (uint32_t)archive_entry_ctime(entry)); e += 4; } - archive_le16enc(ut + 2, e - ut - 4); + archive_le16enc(ut + 2, (uint16_t)(e - ut - 4)); } /* From adf7388139098fe526965571bf1e2fe08947c523 Mon Sep 17 00:00:00 2001 From: Tobias Stoeckmann Date: Sat, 18 May 2024 06:36:22 +0200 Subject: [PATCH 68/98] xar: Switch from int to size_t API (#2195) Most of the functions already use size_t for sizes. Do the same for xml_data. The libraries expat and libxml2 do not support strings larger than 2 GB (limit is less than this) but it fixes one last Visual Studio compiler warning which I missed previously. --- libarchive/archive_read_support_format_xar.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libarchive/archive_read_support_format_xar.c b/libarchive/archive_read_support_format_xar.c index 822ccad71f..21cc7d8bb3 100644 --- a/libarchive/archive_read_support_format_xar.c +++ b/libarchive/archive_read_support_format_xar.c @@ -416,7 +416,7 @@ static void unknowntag_end(struct xar *, const char *); static int xml_start(struct archive_read *, const char *, struct xmlattr_list *); static void xml_end(void *, const char *); -static void xml_data(void *, const char *, int); +static void xml_data(void *, const char *, size_t); static int xml_parse_file_flags(struct xar *, const char *); static int xml_parse_file_ext2(struct xar *, const char *); #if defined(HAVE_LIBXML_XMLREADER_H) @@ -2674,7 +2674,7 @@ is_string(const char *known, const char *data, size_t len) } static void -xml_data(void *userData, const char *s, int len) +xml_data(void *userData, const char *s, size_t len) { struct archive_read *a; struct xar *xar; @@ -3280,7 +3280,7 @@ expat_data_cb(void *userData, const XML_Char *s, int len) { struct expat_userData *ud = (struct expat_userData *)userData; - xml_data(ud->archive, s, len); + xml_data(ud->archive, s, (size_t)len); } static int From 17333c27adc2d2c0d3831ae1dc98b48b19579d6d Mon Sep 17 00:00:00 2001 From: Tobias Stoeckmann Date: Sat, 18 May 2024 06:37:14 +0200 Subject: [PATCH 69/98] cab: Cover all error cases in __archive_read_ahead (#2194) If a short read is performed, fail with truncation error message as well. --- libarchive/archive_read_support_format_cab.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libarchive/archive_read_support_format_cab.c b/libarchive/archive_read_support_format_cab.c index 3b552a84de..4ed088d43c 100644 --- a/libarchive/archive_read_support_format_cab.c +++ b/libarchive/archive_read_support_format_cab.c @@ -1682,7 +1682,7 @@ cab_read_ahead_cfdata_lzx(struct archive_read *a, ssize_t *avail) cfdata->uncompressed_size - cab->xstrm.total_out; d = __archive_read_ahead(a, 1, &bytes_avail); - if (bytes_avail <= 0) { + if (d == NULL) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated CAB file data"); From 8fc0569bab522212fd593210e5a88e07918cbd5e Mon Sep 17 00:00:00 2001 From: Tobias Stoeckmann Date: Sat, 18 May 2024 06:43:47 +0200 Subject: [PATCH 70/98] 7zip: Fix NULL pointer dereference on non-zlib builds (#2193) If libarchive is built without zlib support, it is possible to trigger a NULL pointer dereference with specially crafted 7zip files. It takes multiple conditions to be able to reach the issue in crc32 and all 7zip-specific ones are fixed with this PR. --- libarchive/archive_crc32.h | 5 +++++ libarchive/archive_read_support_format_7zip.c | 17 ++++++++++++++--- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/libarchive/archive_crc32.h b/libarchive/archive_crc32.h index 98a4e2cf8f..d86a507ce7 100644 --- a/libarchive/archive_crc32.h +++ b/libarchive/archive_crc32.h @@ -30,6 +30,8 @@ #error This header is only to be used internally to libarchive. #endif +#include + /* * When zlib is unavailable, we should still be able to validate * uncompressed zip archives. That requires us to be able to compute @@ -46,6 +48,9 @@ crc32(unsigned long crc, const void *_p, size_t len) static volatile int crc_tbl_inited = 0; static unsigned long crc_tbl[256]; + if (_p == NULL) + return (0); + if (!crc_tbl_inited) { for (b = 0; b < 256; ++b) { crc2 = b; diff --git a/libarchive/archive_read_support_format_7zip.c b/libarchive/archive_read_support_format_7zip.c index a59034f698..13c4b071d4 100644 --- a/libarchive/archive_read_support_format_7zip.c +++ b/libarchive/archive_read_support_format_7zip.c @@ -2307,7 +2307,7 @@ read_SubStreamsInfo(struct archive_read *a, struct _7z_substream_info *ss, usizes = ss->unpackSizes; for (i = 0; i < numFolders; i++) { unsigned pack; - uint64_t sum; + uint64_t size, sum; if (f[i].numUnpackStreams == 0) continue; @@ -2317,10 +2317,15 @@ read_SubStreamsInfo(struct archive_read *a, struct _7z_substream_info *ss, for (pack = 1; pack < f[i].numUnpackStreams; pack++) { if (parse_7zip_uint64(a, usizes) < 0) return (-1); + if (*usizes > UINT64_MAX - sum) + return (-1); sum += *usizes++; } } - *usizes++ = folder_uncompressed_size(&f[i]) - sum; + size = folder_uncompressed_size(&f[i]); + if (size < sum) + return (-1); + *usizes++ = size - sum; } if (type == kSize) { @@ -2414,6 +2419,8 @@ read_StreamsInfo(struct archive_read *a, struct _7z_stream_info *si) packPos = si->pi.pos; for (i = 0; i < si->pi.numPackStreams; i++) { si->pi.positions[i] = packPos; + if (packPos > UINT64_MAX - si->pi.sizes[i]) + return (-1); packPos += si->pi.sizes[i]; if (packPos > zip->header_offset) return (-1); @@ -2435,6 +2442,10 @@ read_StreamsInfo(struct archive_read *a, struct _7z_stream_info *si) f = si->ci.folders; for (i = 0; i < si->ci.numFolders; i++) { f[i].packIndex = packIndex; + if (f[i].numPackedStreams > UINT32_MAX) + return (-1); + if (packIndex > UINT32_MAX - (uint32_t)f[i].numPackedStreams) + return (-1); packIndex += (uint32_t)f[i].numPackedStreams; if (packIndex > si->pi.numPackStreams) return (-1); @@ -3141,7 +3152,7 @@ get_uncompressed_data(struct archive_read *a, const void **buff, size_t size, /* Copy mode. */ *buff = __archive_read_ahead(a, minimum, &bytes_avail); - if (bytes_avail <= 0) { + if (*buff == NULL) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated 7-Zip file data"); From 3438d7387cf984370176291f69fd586be184f3fe Mon Sep 17 00:00:00 2001 From: Wei-Cheng Pan Date: Tue, 28 May 2024 18:40:32 +0900 Subject: [PATCH 71/98] fix(rar): add boundary checks to rgb filter (#2210) `blocklength` should be bigger than `3` (channel count) `byteoffset` should not be bigger than `2` (does not make sense as per the last loop) `src` should not overlap with `dst`. There is no allocation in this function so it should be safe to return early. Security: GHSA-9qqv-q4qw-mf8m --- libarchive/archive_read_support_format_rar.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/libarchive/archive_read_support_format_rar.c b/libarchive/archive_read_support_format_rar.c index c8725bcae4..123813084c 100644 --- a/libarchive/archive_read_support_format_rar.c +++ b/libarchive/archive_read_support_format_rar.c @@ -3681,7 +3681,7 @@ execute_filter_rgb(struct rar_filter *filter, struct rar_virtual_machine *vm) uint8_t *src, *dst; uint32_t i, j; - if (blocklength > PROGRAM_WORK_SIZE / 2 || stride > blocklength) + if (blocklength > PROGRAM_WORK_SIZE / 2 || stride > blocklength || blocklength < 3 || byteoffset > 2) return 0; src = &vm->memory[0]; @@ -3691,6 +3691,13 @@ execute_filter_rgb(struct rar_filter *filter, struct rar_virtual_machine *vm) uint8_t *prev = dst + i - stride; for (j = i; j < blocklength; j += 3) { + /* + * The src block should not overlap with the dst block. + * If so it would be better to consider this archive is broken. + */ + if (src >= dst) + return 0; + if (prev >= dst) { uint32_t delta1 = abs(prev[3] - prev[0]); From 136ce81b3da2f9368d1b0e494d5bd7e684a433b7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 28 May 2024 11:42:32 +0200 Subject: [PATCH 72/98] CI: Bump the all-actions group with 2 updates (#2198) Bumps the all-actions group with 2 updates: `actions/checkout` from 4.1.5 to 4.1.6 `github/codeql-action` from 3.25.5 to 3.25.6 Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/ci.yml | 8 ++++---- .github/workflows/codeql.yml | 8 ++++---- .github/workflows/scorecard.yml | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4dd4ffb682..6e6cdcaa09 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -12,7 +12,7 @@ jobs: matrix: bs: [autotools, cmake] steps: - - uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b # v4.1.5 + - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - name: Install dependencies run: ./build/ci/github_actions/macos.sh prepare - name: Autogen @@ -57,7 +57,7 @@ jobs: bs: [autotools, cmake] crypto: [mbedtls, nettle, openssl] steps: - - uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b # v4.1.5 + - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - name: Update apt cache run: sudo apt-get update - name: Install dependencies @@ -98,7 +98,7 @@ jobs: Ubuntu-distcheck: runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b # v4.1.5 + - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - name: Update package definitions run: sudo apt-get update - name: Install dependencies @@ -125,7 +125,7 @@ jobs: matrix: be: [mingw-gcc, msvc] steps: - - uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b # v4.1.5 + - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - name: Install mingw if: ${{ matrix.be=='mingw-gcc' }} run: choco install mingw diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index d2c527da73..d9b6a373be 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -26,18 +26,18 @@ jobs: steps: - name: Checkout - uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b # v4.1.5 + uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - name: Initialize CodeQL - uses: github/codeql-action/init@b7cec7526559c32f1616476ff32d17ba4c59b2d6 # v3.25.5 + uses: github/codeql-action/init@9fdb3e49720b44c48891d036bb502feb25684276 # v3.25.6 with: languages: ${{ matrix.language }} queries: +security-and-quality - name: Autobuild - uses: github/codeql-action/autobuild@b7cec7526559c32f1616476ff32d17ba4c59b2d6 # v3.25.5 + uses: github/codeql-action/autobuild@9fdb3e49720b44c48891d036bb502feb25684276 # v3.25.6 - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@b7cec7526559c32f1616476ff32d17ba4c59b2d6 # v3.25.5 + uses: github/codeql-action/analyze@9fdb3e49720b44c48891d036bb502feb25684276 # v3.25.6 with: category: "/language:${{ matrix.language }}" diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml index 7d49d37c07..a5f452ca24 100644 --- a/.github/workflows/scorecard.yml +++ b/.github/workflows/scorecard.yml @@ -29,7 +29,7 @@ jobs: steps: - name: "Checkout code" - uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b # v4.1.5 + uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 with: persist-credentials: false @@ -60,6 +60,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard. - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@b7cec7526559c32f1616476ff32d17ba4c59b2d6 # v3.25.5 + uses: github/codeql-action/upload-sarif@9fdb3e49720b44c48891d036bb502feb25684276 # v3.25.6 with: sarif_file: results.sarif From 2386414abbc9e6f0ec9808f2a5dd067af1f5f528 Mon Sep 17 00:00:00 2001 From: LukeSkyD <100560115+LukeSkyD@users.noreply.github.com> Date: Fri, 31 May 2024 16:11:12 +0200 Subject: [PATCH 73/98] rar reader: fix cast to unsigned int (#2209) Fixes compiler error `C4267: '+=': conversion from 'size_t' to 'unsigned int'. Possible loss of data.` which stops compiling with MSVC. --- libarchive/archive_read_support_format_rar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libarchive/archive_read_support_format_rar.c b/libarchive/archive_read_support_format_rar.c index 123813084c..354cb0d0ba 100644 --- a/libarchive/archive_read_support_format_rar.c +++ b/libarchive/archive_read_support_format_rar.c @@ -3142,7 +3142,7 @@ copy_from_lzss_window_to_unp(struct archive_read *a, const void **buffer, } else { goto fatal; } - rar->unp_offset += length; + rar->unp_offset += (unsigned int) length; if (rar->unp_offset >= rar->unp_buffer_size) *buffer = rar->unp_buffer; else From ed743f63fd478837bec4a15b6cde3f26aec7f38d Mon Sep 17 00:00:00 2001 From: Tobias Stoeckmann Date: Mon, 3 Jun 2024 01:21:30 +0200 Subject: [PATCH 74/98] Check realloc return values (#2204) If realloc fails, keep track of currently allocated memory instead of provoking memory leaks in error paths. --- libarchive/archive_read_support_format_rar.c | 25 +++++++++++++------- libarchive/archive_write_disk_posix.c | 9 ++++--- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/libarchive/archive_read_support_format_rar.c b/libarchive/archive_read_support_format_rar.c index 354cb0d0ba..3f3d7db2bc 100644 --- a/libarchive/archive_read_support_format_rar.c +++ b/libarchive/archive_read_support_format_rar.c @@ -1373,6 +1373,8 @@ read_header(struct archive_read *a, struct archive_entry *entry, struct archive_string_conv *sconv, *fn_sconv; unsigned long crc32_val; int ret = (ARCHIVE_OK), ret2; + char *newptr; + size_t newsize; rar = (struct rar *)(a->format->data); @@ -1519,8 +1521,7 @@ read_header(struct archive_read *a, struct archive_entry *entry, return (ARCHIVE_FATAL); } if (rar->filename_allocated < filename_size * 2 + 2) { - char *newptr; - size_t newsize = filename_size * 2 + 2; + newsize = filename_size * 2 + 2; newptr = realloc(rar->filename, newsize); if (newptr == NULL) { archive_set_error(&a->archive, ENOMEM, @@ -1657,13 +1658,16 @@ read_header(struct archive_read *a, struct archive_entry *entry, rar->cursor++; if (rar->cursor >= rar->nodes) { - rar->nodes++; - if ((rar->dbo = - realloc(rar->dbo, sizeof(*rar->dbo) * rar->nodes)) == NULL) + struct data_block_offsets *newdbo; + + newsize = sizeof(*rar->dbo) * (rar->nodes + 1); + if ((newdbo = realloc(rar->dbo, newsize)) == NULL) { archive_set_error(&a->archive, ENOMEM, "Couldn't allocate memory."); return (ARCHIVE_FATAL); } + rar->dbo = newdbo; + rar->nodes++; rar->dbo[rar->cursor].header_size = header_size; rar->dbo[rar->cursor].start_offset = -1; rar->dbo[rar->cursor].end_offset = -1; @@ -1683,9 +1687,14 @@ read_header(struct archive_read *a, struct archive_entry *entry, return (ARCHIVE_FATAL); } - rar->filename_save = (char*)realloc(rar->filename_save, - filename_size + 1); - memcpy(rar->filename_save, rar->filename, filename_size + 1); + newsize = filename_size + 1; + if ((newptr = realloc(rar->filename_save, newsize)) == NULL) + { + archive_set_error(&a->archive, ENOMEM, "Couldn't allocate memory."); + return (ARCHIVE_FATAL); + } + rar->filename_save = newptr; + memcpy(rar->filename_save, rar->filename, newsize); rar->filename_save_size = filename_size; /* Set info for seeking */ diff --git a/libarchive/archive_write_disk_posix.c b/libarchive/archive_write_disk_posix.c index 92db4ff05b..bac906d26d 100644 --- a/libarchive/archive_write_disk_posix.c +++ b/libarchive/archive_write_disk_posix.c @@ -4196,7 +4196,7 @@ copy_xattrs(struct archive_write_disk *a, int tmpfd, int dffd) } for (xattr_i = 0; xattr_i < xattr_size; xattr_i += strlen(xattr_names + xattr_i) + 1) { - char *xattr_val_saved; + char *p; ssize_t s; int f; @@ -4207,15 +4207,14 @@ copy_xattrs(struct archive_write_disk *a, int tmpfd, int dffd) ret = ARCHIVE_WARN; goto exit_xattr; } - xattr_val_saved = xattr_val; - xattr_val = realloc(xattr_val, s); - if (xattr_val == NULL) { + p = realloc(xattr_val, s); + if (p == NULL) { archive_set_error(&a->archive, ENOMEM, "Failed to get metadata(xattr)"); ret = ARCHIVE_WARN; - free(xattr_val_saved); goto exit_xattr; } + xattr_val = p; s = fgetxattr(tmpfd, xattr_names + xattr_i, xattr_val, s, 0, 0); if (s == -1) { archive_set_error(&a->archive, errno, From 582190e4a73f848152ea68c41a76b007cc7bb17f Mon Sep 17 00:00:00 2001 From: Tobias Stoeckmann Date: Mon, 3 Jun 2024 01:28:45 +0200 Subject: [PATCH 75/98] rar: Fix OOB access with unicode filenames (#2203) Prevent out of boundary accesses by revalidating offset every time it is incremented. --- libarchive/archive_read_support_format_rar.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/libarchive/archive_read_support_format_rar.c b/libarchive/archive_read_support_format_rar.c index 3f3d7db2bc..054b47ad0c 100644 --- a/libarchive/archive_read_support_format_rar.c +++ b/libarchive/archive_read_support_format_rar.c @@ -1545,7 +1545,7 @@ read_header(struct archive_read *a, struct archive_entry *entry, fn_end = filename_size * 2; filename_size = 0; offset = (unsigned)strlen(filename) + 1; - highbyte = *(p + offset++); + highbyte = offset >= end ? 0 : *(p + offset++); flagbits = 0; flagbyte = 0; while (offset < end && filename_size < fn_end) @@ -1560,14 +1560,22 @@ read_header(struct archive_read *a, struct archive_entry *entry, switch((flagbyte >> flagbits) & 3) { case 0: + if (offset >= end) + continue; filename[filename_size++] = '\0'; filename[filename_size++] = *(p + offset++); break; case 1: + if (offset >= end) + continue; filename[filename_size++] = highbyte; filename[filename_size++] = *(p + offset++); break; case 2: + if (offset >= end - 1) { + offset = end; + continue; + } filename[filename_size++] = *(p + offset + 1); filename[filename_size++] = *(p + offset); offset += 2; @@ -1575,9 +1583,15 @@ read_header(struct archive_read *a, struct archive_entry *entry, case 3: { char extra, high; - uint8_t length = *(p + offset++); + uint8_t length; + + if (offset >= end) + continue; + length = *(p + offset++); if (length & 0x80) { + if (offset >= end) + continue; extra = *(p + offset++); high = (char)highbyte; } else From 3af83ee6e7f513aa1aa3eec1998fc3c65b5131f5 Mon Sep 17 00:00:00 2001 From: Tobias Stoeckmann Date: Mon, 3 Jun 2024 01:30:14 +0200 Subject: [PATCH 76/98] 7zip: Fix DONT_FAIL_ON_CRC_ERROR check (#2197) This actually disabled CRC checks on regular builds and enabled it if CRC checks were supposed to be disabled. All other occurrences properly use ifndef instead of ifdef. --- libarchive/archive_read_support_format_7zip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libarchive/archive_read_support_format_7zip.c b/libarchive/archive_read_support_format_7zip.c index 13c4b071d4..8573ff18f4 100644 --- a/libarchive/archive_read_support_format_7zip.c +++ b/libarchive/archive_read_support_format_7zip.c @@ -3010,7 +3010,7 @@ slurp_central_directory(struct archive_read *a, struct _7zip *zip, /* CRC check. */ if (crc32(0, (const unsigned char *)p + 12, 20) != archive_le32dec(p + 8)) { -#ifdef DONT_FAIL_ON_CRC_ERROR +#ifndef DONT_FAIL_ON_CRC_ERROR archive_set_error(&a->archive, -1, "Header CRC error"); return (ARCHIVE_FATAL); #endif From 1aafde5923a5991dce13a042a0921f16ff648b1a Mon Sep 17 00:00:00 2001 From: Tobias Stoeckmann Date: Mon, 3 Jun 2024 01:35:41 +0200 Subject: [PATCH 77/98] Fix EOF handling of __archive_read_ahead (#2196) Reaching EOF for first time sets the correct amount of available bytes, but each subsequent call returns 0. Do not forget that the copy buffer can already contain data and return the amount of bytes left in there. See added test case. --- Makefile.am | 1 + libarchive/archive_private.h | 2 + libarchive/archive_read.c | 2 +- libarchive/test/CMakeLists.txt | 1 + libarchive/test/test_archive_read.c | 63 +++++++++++++++++++++++++++++ 5 files changed, 68 insertions(+), 1 deletion(-) create mode 100644 libarchive/test/test_archive_read.c diff --git a/Makefile.am b/Makefile.am index 8a53329141..eb88654fce 100644 --- a/Makefile.am +++ b/Makefile.am @@ -380,6 +380,7 @@ libarchive_test_SOURCES= \ libarchive/test/test_archive_match_path.c \ libarchive/test/test_archive_match_time.c \ libarchive/test/test_archive_pathmatch.c \ + libarchive/test/test_archive_read.c \ libarchive/test/test_archive_read_add_passphrase.c \ libarchive/test/test_archive_read_close_twice.c \ libarchive/test/test_archive_read_close_twice_open_fd.c \ diff --git a/libarchive/archive_private.h b/libarchive/archive_private.h index 0f05169b7d..5c5b5607ae 100644 --- a/libarchive/archive_private.h +++ b/libarchive/archive_private.h @@ -27,8 +27,10 @@ #define ARCHIVE_PRIVATE_H_INCLUDED #ifndef __LIBARCHIVE_BUILD +#ifndef __LIBARCHIVE_TEST #error This header is only to be used internally to libarchive. #endif +#endif #if HAVE_ICONV_H #include diff --git a/libarchive/archive_read.c b/libarchive/archive_read.c index d71fc2b7cc..1fa35853c5 100644 --- a/libarchive/archive_read.c +++ b/libarchive/archive_read.c @@ -1382,7 +1382,7 @@ __archive_read_filter_ahead(struct archive_read_filter *filter, if (filter->client_avail <= 0) { if (filter->end_of_file) { if (avail != NULL) - *avail = 0; + *avail = filter->avail; return (NULL); } bytes_read = (filter->vtable->read)(filter, diff --git a/libarchive/test/CMakeLists.txt b/libarchive/test/CMakeLists.txt index bef739ad47..6d28fc904a 100644 --- a/libarchive/test/CMakeLists.txt +++ b/libarchive/test/CMakeLists.txt @@ -24,6 +24,7 @@ IF(ENABLE_TEST) test_archive_match_path.c test_archive_match_time.c test_archive_pathmatch.c + test_archive_read.c test_archive_read_add_passphrase.c test_archive_read_close_twice.c test_archive_read_close_twice_open_fd.c diff --git a/libarchive/test/test_archive_read.c b/libarchive/test/test_archive_read.c new file mode 100644 index 0000000000..583e5166a0 --- /dev/null +++ b/libarchive/test/test_archive_read.c @@ -0,0 +1,63 @@ +/*- + * Copyright (c) 2024 Tobias Stoeckmann + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "test.h" + +#define __LIBARCHIVE_TEST +#include "archive_read_private.h" + +static char buf[1024]; + +DEFINE_TEST(test_archive_read_ahead_eof) +{ + struct archive *a; + struct archive_read *ar; + ssize_t avail; + + /* prepare a reader of raw in-memory data */ + assert((a = archive_read_new()) != NULL); + ar = (struct archive_read *)a; + + assertA(0 == archive_read_support_format_raw(a)); + assertA(0 == archive_read_open_memory(a, buf, sizeof(buf))); + + /* perform a read which can be fulfilled */ + assert(NULL != __archive_read_ahead(ar, sizeof(buf) - 1, &avail)); + assertEqualInt(sizeof(buf), avail); + + /* perform a read which cannot be fulfilled due to EOF */ + assert(NULL == __archive_read_ahead(ar, sizeof(buf) + 1, &avail)); + assertEqualInt(sizeof(buf), avail); + + /* perform the same read again */ + assert(NULL == __archive_read_ahead(ar, sizeof(buf) + 1, &avail)); + assertEqualInt(sizeof(buf), avail); + + /* perform another read which can be fulfilled */ + assert(NULL != __archive_read_ahead(ar, sizeof(buf), &avail)); + assertEqualInt(sizeof(buf), avail); + + assert(0 == archive_read_free(a)); +} From 9cbe1627ca89e0a798072964eed5595c5d6005a3 Mon Sep 17 00:00:00 2001 From: Tobias Stoeckmann Date: Mon, 3 Jun 2024 01:43:38 +0200 Subject: [PATCH 78/98] rar5: Fix Visual Studio compiler warnings (#2187) Turning window_mask into ssize_t and adjusting the signature of circular_memcpy reduces the amount of required casts. Since window_mask depends on value window_size, which is ssize_t, this change is safe. Also turned extra_data_size into int64_t, which is a no-op on 64 bit systems but prevents 32 bit systems from truncating these huge values in archives. Since these systems most likely have large file support, this could have meant truncation -- in theory. --- libarchive/archive_read_support_format_rar5.c | 46 +++++++++---------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/libarchive/archive_read_support_format_rar5.c b/libarchive/archive_read_support_format_rar5.c index e8846a5b0d..e06effe8b4 100644 --- a/libarchive/archive_read_support_format_rar5.c +++ b/libarchive/archive_read_support_format_rar5.c @@ -220,7 +220,7 @@ struct comp_state { decompression. */ uint8_t* filtered_buf; /* Buffer used when applying filters. */ const uint8_t* block_buf; /* Buffer used when merging blocks. */ - size_t window_mask; /* Convenience field; window_size - 1. */ + ssize_t window_mask; /* Convenience field; window_size - 1. */ int64_t write_ptr; /* This amount of data has been unpacked in the window buffer. */ int64_t last_write_ptr; /* This amount of data has been stored in @@ -505,7 +505,7 @@ static inline struct rar5* get_context(struct archive_read* a) { } /* Convenience functions used by filter implementations. */ -static void circular_memcpy(uint8_t* dst, uint8_t* window, const uint64_t mask, +static void circular_memcpy(uint8_t* dst, uint8_t* window, const ssize_t mask, int64_t start, int64_t end) { if((start & mask) > (end & mask)) { @@ -709,7 +709,7 @@ static int run_filter(struct archive_read* a, struct filter_info* flt) { static void push_data(struct archive_read* a, struct rar5* rar, const uint8_t* buf, int64_t idx_begin, int64_t idx_end) { - const uint64_t wmask = rar->cstate.window_mask; + const ssize_t wmask = rar->cstate.window_mask; const ssize_t solid_write_ptr = (rar->cstate.solid_offset + rar->cstate.last_write_ptr) & wmask; @@ -1246,7 +1246,7 @@ static int process_main_locator_extra_block(struct archive_read* a, } static int parse_file_extra_hash(struct archive_read* a, struct rar5* rar, - ssize_t* extra_data_size) + int64_t* extra_data_size) { size_t hash_type = 0; size_t value_len; @@ -1296,7 +1296,7 @@ static uint64_t time_win_to_unix(uint64_t win_time) { } static int parse_htime_item(struct archive_read* a, char unix_time, - uint64_t* where, ssize_t* extra_data_size) + uint64_t* where, int64_t* extra_data_size) { if(unix_time) { uint32_t time_val; @@ -1318,7 +1318,7 @@ static int parse_htime_item(struct archive_read* a, char unix_time, } static int parse_file_extra_version(struct archive_read* a, - struct archive_entry* e, ssize_t* extra_data_size) + struct archive_entry* e, int64_t* extra_data_size) { size_t flags = 0; size_t version = 0; @@ -1372,7 +1372,7 @@ static int parse_file_extra_version(struct archive_read* a, } static int parse_file_extra_htime(struct archive_read* a, - struct archive_entry* e, struct rar5* rar, ssize_t* extra_data_size) + struct archive_entry* e, struct rar5* rar, int64_t* extra_data_size) { char unix_time = 0; size_t flags = 0; @@ -1425,7 +1425,7 @@ static int parse_file_extra_htime(struct archive_read* a, } static int parse_file_extra_redir(struct archive_read* a, - struct archive_entry* e, struct rar5* rar, ssize_t* extra_data_size) + struct archive_entry* e, struct rar5* rar, int64_t* extra_data_size) { uint64_t value_size = 0; size_t target_size = 0; @@ -1496,7 +1496,7 @@ static int parse_file_extra_redir(struct archive_read* a, } static int parse_file_extra_owner(struct archive_read* a, - struct archive_entry* e, ssize_t* extra_data_size) + struct archive_entry* e, int64_t* extra_data_size) { uint64_t flags = 0; uint64_t value_size = 0; @@ -1576,15 +1576,15 @@ static int parse_file_extra_owner(struct archive_read* a, } static int process_head_file_extra(struct archive_read* a, - struct archive_entry* e, struct rar5* rar, ssize_t extra_data_size) + struct archive_entry* e, struct rar5* rar, int64_t extra_data_size) { - size_t extra_field_size; - size_t extra_field_id = 0; + uint64_t extra_field_size; + uint64_t extra_field_id = 0; int ret = ARCHIVE_FATAL; - size_t var_size; + uint64_t var_size; while(extra_data_size > 0) { - if(!read_var_sized(a, &extra_field_size, &var_size)) + if(!read_var(a, &extra_field_size, &var_size)) return ARCHIVE_EOF; extra_data_size -= var_size; @@ -1592,7 +1592,7 @@ static int process_head_file_extra(struct archive_read* a, return ARCHIVE_EOF; } - if(!read_var_sized(a, &extra_field_id, &var_size)) + if(!read_var(a, &extra_field_id, &var_size)) return ARCHIVE_EOF; extra_data_size -= var_size; @@ -1642,7 +1642,7 @@ static int process_head_file_extra(struct archive_read* a, static int process_head_file(struct archive_read* a, struct rar5* rar, struct archive_entry* entry, size_t block_flags) { - ssize_t extra_data_size = 0; + int64_t extra_data_size = 0; size_t data_size = 0; size_t file_flags = 0; size_t file_attr = 0; @@ -1682,12 +1682,12 @@ static int process_head_file(struct archive_read* a, struct rar5* rar, } if(block_flags & HFL_EXTRA_DATA) { - size_t edata_size = 0; - if(!read_var_sized(a, &edata_size, NULL)) + uint64_t edata_size = 0; + if(!read_var(a, &edata_size, NULL)) return ARCHIVE_EOF; /* Intentional type cast from unsigned to signed. */ - extra_data_size = (ssize_t) edata_size; + extra_data_size = (int64_t) edata_size; } if(block_flags & HFL_DATA) { @@ -1784,7 +1784,7 @@ static int process_head_file(struct archive_read* a, struct rar5* rar, * that its size will match new window_size. */ uint8_t* new_window_buf = - realloc(rar->cstate.window_buf, window_size); + realloc(rar->cstate.window_buf, (size_t) window_size); if(!new_window_buf) { archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, @@ -1980,7 +1980,7 @@ static int process_head_main(struct archive_read* a, struct rar5* rar, struct archive_entry* entry, size_t block_flags) { int ret; - size_t extra_data_size = 0; + uint64_t extra_data_size = 0; size_t extra_field_size = 0; size_t extra_field_id = 0; size_t archive_flags = 0; @@ -2002,7 +2002,7 @@ static int process_head_main(struct archive_read* a, struct rar5* rar, (void) entry; if(block_flags & HFL_EXTRA_DATA) { - if(!read_var_sized(a, &extra_data_size, NULL)) + if(!read_var(a, &extra_data_size, NULL)) return ARCHIVE_EOF; } else { extra_data_size = 0; @@ -2983,7 +2983,7 @@ static int decode_code_length(struct archive_read* a, struct rar5* rar, static int copy_string(struct archive_read* a, int len, int dist) { struct rar5* rar = get_context(a); - const uint64_t cmask = rar->cstate.window_mask; + const ssize_t cmask = rar->cstate.window_mask; const uint64_t write_ptr = rar->cstate.write_ptr + rar->cstate.solid_offset; int i; From edbb1fd452dc8f5ea05a3ceb8614128ebe199ccc Mon Sep 17 00:00:00 2001 From: Tobias Stoeckmann Date: Mon, 3 Jun 2024 01:49:26 +0200 Subject: [PATCH 79/98] Plug memory leaks (#2183) The parsers for 7zip and xar contain memory leaks. Also the testsuite 7zip leaks memory. Plug them. Noticed while running test suite with address sanitizer on Linux. --- libarchive/archive_read_support_format_7zip.c | 4 ++++ libarchive/archive_read_support_format_xar.c | 9 +++++++-- libarchive/test/test_read_format_7zip.c | 2 ++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/libarchive/archive_read_support_format_7zip.c b/libarchive/archive_read_support_format_7zip.c index 8573ff18f4..634521d952 100644 --- a/libarchive/archive_read_support_format_7zip.c +++ b/libarchive/archive_read_support_format_7zip.c @@ -1765,6 +1765,10 @@ free_decompression(struct archive_read *a, struct _7zip *zip) } zip->stream_valid = 0; } +#endif +#ifdef HAVE_ZSTD_H + if (zip->zstdstream_valid) + ZSTD_freeDStream(zip->zstd_dstream); #endif if (zip->ppmd7_valid) { __archive_ppmd7_functions.Ppmd7_Free( diff --git a/libarchive/archive_read_support_format_xar.c b/libarchive/archive_read_support_format_xar.c index 21cc7d8bb3..b9bef05161 100644 --- a/libarchive/archive_read_support_format_xar.c +++ b/libarchive/archive_read_support_format_xar.c @@ -3190,8 +3190,11 @@ xml2_read_toc(struct archive_read *a) if (r == ARCHIVE_OK) r = xml_start(a, name, &list); xmlattr_cleanup(&list); - if (r != ARCHIVE_OK) + if (r != ARCHIVE_OK) { + xmlFreeTextReader(reader); + xmlCleanupParser(); return (r); + } if (empty) xml_end(a, name); break; @@ -3316,8 +3319,10 @@ expat_read_toc(struct archive_read *a) d = NULL; r = rd_contents(a, &d, &outbytes, &used, xar->toc_remaining); - if (r != ARCHIVE_OK) + if (r != ARCHIVE_OK) { + XML_ParserFree(parser); return (r); + } xar->toc_remaining -= used; xar->offset += used; xar->toc_total += outbytes; diff --git a/libarchive/test/test_read_format_7zip.c b/libarchive/test/test_read_format_7zip.c index 9f76705dea..cff82f2c3d 100644 --- a/libarchive/test/test_read_format_7zip.c +++ b/libarchive/test/test_read_format_7zip.c @@ -1050,6 +1050,7 @@ test_arm_filter(const char *refname) extract_reference_file(refname); + assertEqualInt(ARCHIVE_OK, archive_read_free(a)); assert((a = archive_read_new()) != NULL); assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); @@ -1123,6 +1124,7 @@ test_arm64_filter(const char *refname) extract_reference_file(refname); + assertEqualInt(ARCHIVE_OK, archive_read_free(a)); assert((a = archive_read_new()) != NULL); assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); From 575e2eab4bf04c9092fcf668d72e1b37fdbd5083 Mon Sep 17 00:00:00 2001 From: Lukas Javorsky Date: Mon, 3 Jun 2024 01:50:22 +0200 Subject: [PATCH 80/98] Fix handling a negative bytes_per_block values (#2206) Related issue: https://github.com/libarchive/libarchive/issues/2185 --- libarchive/archive_write.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/libarchive/archive_write.c b/libarchive/archive_write.c index b70bc785c7..01c6843613 100644 --- a/libarchive/archive_write.c +++ b/libarchive/archive_write.c @@ -129,6 +129,10 @@ archive_write_new(void) int archive_write_set_bytes_per_block(struct archive *_a, int bytes_per_block) { + if (bytes_per_block < 0) { + // Do nothing if the bytes_per_block is negative + return 0; + } struct archive_write *a = (struct archive_write *)_a; archive_check_magic(&a->archive, ARCHIVE_WRITE_MAGIC, ARCHIVE_STATE_NEW, "archive_write_set_bytes_per_block"); @@ -137,7 +141,7 @@ archive_write_set_bytes_per_block(struct archive *_a, int bytes_per_block) } /* - * Get the current block size. -1 if it has never been set. + * Get the current block size. */ int archive_write_get_bytes_per_block(struct archive *_a) @@ -145,6 +149,10 @@ archive_write_get_bytes_per_block(struct archive *_a) struct archive_write *a = (struct archive_write *)_a; archive_check_magic(&a->archive, ARCHIVE_WRITE_MAGIC, ARCHIVE_STATE_ANY, "archive_write_get_bytes_per_block"); + if (a->bytes_per_block < 0) { + // Don't return a negative value + return 1; + } return (a->bytes_per_block); } From 9a3bce888bce22cd7ddf9dbc7d3f184c99b1fdf3 Mon Sep 17 00:00:00 2001 From: Tobias Stoeckmann Date: Mon, 3 Jun 2024 05:06:31 +0200 Subject: [PATCH 81/98] rar: Fix Visual Studio compiler warning (#2188) Fixes the following warning: warning C4244: '=': conversion from 'int64_t' to 'size_t', possible loss of data --- libarchive/archive_read_support_format_rar.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libarchive/archive_read_support_format_rar.c b/libarchive/archive_read_support_format_rar.c index 054b47ad0c..157836b421 100644 --- a/libarchive/archive_read_support_format_rar.c +++ b/libarchive/archive_read_support_format_rar.c @@ -3388,7 +3388,7 @@ run_filters(struct archive_read *a) if (filters == NULL || filter == NULL) return (0); - start = filters->filterstart; + start = (size_t)filters->filterstart; end = start + filter->blocklength; filters->filterstart = INT64_MAX; @@ -3428,7 +3428,7 @@ run_filters(struct archive_read *a) ret = copy_from_lzss_window(a, filters->vm->memory, start, filter->blocklength); if (ret != ARCHIVE_OK) return 0; - if (!execute_filter(a, filter, filters->vm, rar->offset)) + if (!execute_filter(a, filter, filters->vm, (size_t)rar->offset)) return 0; lastfilteraddress = filter->filteredblockaddress; @@ -3440,7 +3440,7 @@ run_filters(struct archive_read *a) while ((filter = filters->stack) != NULL && (int64_t)filter->blockstartpos == filters->filterstart && filter->blocklength == lastfilterlength) { memmove(&filters->vm->memory[0], &filters->vm->memory[lastfilteraddress], lastfilterlength); - if (!execute_filter(a, filter, filters->vm, rar->offset)) + if (!execute_filter(a, filter, filters->vm, (size_t)rar->offset)) return 0; lastfilteraddress = filter->filteredblockaddress; From f3a5248bb533ce9c08624abbda11d611aea7bb4b Mon Sep 17 00:00:00 2001 From: Tim Kientzle Date: Mon, 3 Jun 2024 00:59:10 -0700 Subject: [PATCH 82/98] Test on FreeBSD 13.3 (#2216) CI: Update FreeBSD 13 to 13.3 --- .cirrus.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.cirrus.yml b/.cirrus.yml index 9b1c5a72c2..02ea6fabb9 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -12,7 +12,7 @@ FreeBSD_task: freebsd_instance: image_family: freebsd-14-0 freebsd_instance: - image_family: freebsd-13-2 + image_family: freebsd-13-3 prepare_script: - ./build/ci/cirrus_ci/ci.sh prepare configure_script: From 6c467b7753fa8bd2c22dda280802d057ac06d9cc Mon Sep 17 00:00:00 2001 From: "Dustin L. Howett" Date: Mon, 3 Jun 2024 22:27:29 -0500 Subject: [PATCH 83/98] Fix tar -w on Windows (#2219) The tar utility reads from stderr to receive user input even when stdin is a pipe. That is unfortunately unsupported on Windows. The nearest equivalent is to reopen and read from the console input handle. Closes #2215 --- tar/util.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/tar/util.c b/tar/util.c index 37c3a23231..85060461cc 100644 --- a/tar/util.c +++ b/tar/util.c @@ -234,6 +234,7 @@ yes(const char *fmt, ...) char buff[32]; char *p; ssize_t l; + int read_fd = 2; /* stderr */ va_list ap; va_start(ap, fmt); @@ -242,7 +243,24 @@ yes(const char *fmt, ...) fprintf(stderr, " (y/N)? "); fflush(stderr); - l = read(2, buff, sizeof(buff) - 1); +#if defined(_WIN32) && !defined(__CYGWIN__) + /* To be resilient when stdin is a pipe, bsdtar prefers to read from + * stderr. On Windows, stderr cannot be read. The nearest "piping + * resilient" equivalent is reopening the console input handle. + */ + read_fd = _open("CONIN$", O_RDONLY); + if (read_fd < 0) { + fprintf(stderr, "Keyboard read failed\n"); + exit(1); + } +#endif + + l = read(read_fd, buff, sizeof(buff) - 1); + +#if defined(_WIN32) && !defined(__CYGWIN__) + _close(read_fd); +#endif + if (l < 0) { fprintf(stderr, "Keyboard read failed\n"); exit(1); From 89a01843ee090db671670dc22e1996a2bd625e3d Mon Sep 17 00:00:00 2001 From: Duncan Horn <40036384+dunhor@users.noreply.github.com> Date: Sat, 8 Jun 2024 12:41:04 -0700 Subject: [PATCH 84/98] Fix compilation when using Clang in "MSVC mode" (#2221) When using Clang in "MSVC mode" (i.e. clang-cl), command line arguments are interpreted as MSVC would interpret them, at least when there are conflicts. This means that `-Wall` - potentially among other switches - is interpreted _dramatically_ differently by clang-cl compared to "normal" Clang. In CMake, this can be detected by testing for `if (MSVC)` in addition to compiler id test, which is what I do here. Note: this is a partial cherry-pick from #2095, which I'm going to go through and break into smaller pieces in hopes of getting some things in while discussion of other things can continue. --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 822056b910..51c8c08141 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -107,7 +107,7 @@ endif () # aggressive about diagnosing build problems; this can get # relaxed somewhat in final shipping versions. IF (CMAKE_C_COMPILER_ID MATCHES "^GNU$" OR - CMAKE_C_COMPILER_ID MATCHES "^Clang$") + CMAKE_C_COMPILER_ID MATCHES "^Clang$" AND NOT MSVC) SET(CMAKE_REQUIRED_FLAGS "-Wall -Wformat -Wformat-security") ################################################################# # Set compile flags for all build types. @@ -144,7 +144,7 @@ IF (CMAKE_C_COMPILER_ID MATCHES "^GNU$" OR SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-dead_strip") ENDIF(NOT CMAKE_SYSTEM_NAME MATCHES "Darwin") ENDIF (CMAKE_C_COMPILER_ID MATCHES "^GNU$" OR - CMAKE_C_COMPILER_ID MATCHES "^Clang$") + CMAKE_C_COMPILER_ID MATCHES "^Clang$" AND NOT MSVC) IF (CMAKE_C_COMPILER_ID MATCHES "^XL$") SET(CMAKE_C_COMPILER "xlc_r") SET(CMAKE_REQUIRED_FLAGS "-qflag=e:e -qformat=sec") From 3f3b3efec27ef8c9587b5c725345ff235b98820a Mon Sep 17 00:00:00 2001 From: Sevan Janiyan Date: Tue, 11 Jun 2024 04:42:13 +0100 Subject: [PATCH 85/98] Always use our supplied la_queue.h (#2222) On legacy systems the OS supplied `sys/queue.h` may lack the required macros, so to avoid having to verify if the version of queue.h is of use, opt to always to `la_queue.h` which will match expectations. Allows libarchive to build on legacy Darwin where `STAILQ_FOREACH` would be missing from `sys/queue.h`. Resolves #2220 --- CMakeLists.txt | 1 - build/cmake/config.h.in | 3 --- configure.ac | 2 +- libarchive/config_freebsd.h | 1 - unzip/bsdunzip.c | 4 ---- 5 files changed, 1 insertion(+), 10 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 51c8c08141..3c86789c8b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -743,7 +743,6 @@ LA_CHECK_INCLUDE_FILE("sys/mkdev.h" HAVE_SYS_MKDEV_H) LA_CHECK_INCLUDE_FILE("sys/mount.h" HAVE_SYS_MOUNT_H) LA_CHECK_INCLUDE_FILE("sys/param.h" HAVE_SYS_PARAM_H) LA_CHECK_INCLUDE_FILE("sys/poll.h" HAVE_SYS_POLL_H) -LA_CHECK_INCLUDE_FILE("sys/queue.h" HAVE_SYS_QUEUE_H) LA_CHECK_INCLUDE_FILE("sys/richacl.h" HAVE_SYS_RICHACL_H) LA_CHECK_INCLUDE_FILE("sys/select.h" HAVE_SYS_SELECT_H) LA_CHECK_INCLUDE_FILE("sys/stat.h" HAVE_SYS_STAT_H) diff --git a/build/cmake/config.h.in b/build/cmake/config.h.in index d47694c0c1..d6e5487921 100644 --- a/build/cmake/config.h.in +++ b/build/cmake/config.h.in @@ -1132,9 +1132,6 @@ typedef uint64_t uintmax_t; /* Define to 1 if you have the header file. */ #cmakedefine HAVE_SYS_POLL_H 1 -/* Define to 1 if you have the header file. */ -#cmakedefine HAVE_SYS_QUEUE_H 1 - /* Define to 1 if you have the header file. */ #cmakedefine HAVE_SYS_RICHACL_H 1 diff --git a/configure.ac b/configure.ac index c4c219f1cc..7250b686fa 100644 --- a/configure.ac +++ b/configure.ac @@ -362,7 +362,7 @@ AC_CHECK_HEADERS([locale.h membership.h paths.h poll.h pthread.h pwd.h]) AC_CHECK_HEADERS([readpassphrase.h signal.h spawn.h]) AC_CHECK_HEADERS([stdarg.h stdint.h stdlib.h string.h]) AC_CHECK_HEADERS([sys/acl.h sys/cdefs.h sys/ea.h sys/extattr.h]) -AC_CHECK_HEADERS([sys/ioctl.h sys/mkdev.h sys/mount.h sys/queue.h]) +AC_CHECK_HEADERS([sys/ioctl.h sys/mkdev.h sys/mount.h]) AC_CHECK_HEADERS([sys/param.h sys/poll.h sys/richacl.h]) AC_CHECK_HEADERS([sys/select.h sys/statfs.h sys/statvfs.h sys/sysmacros.h]) AC_CHECK_HEADERS([sys/time.h sys/utime.h sys/utsname.h sys/vfs.h sys/xattr.h]) diff --git a/libarchive/config_freebsd.h b/libarchive/config_freebsd.h index d0f3e2300c..a1bf0dfe9f 100644 --- a/libarchive/config_freebsd.h +++ b/libarchive/config_freebsd.h @@ -203,7 +203,6 @@ #define HAVE_SYS_MOUNT_H 1 #define HAVE_SYS_PARAM_H 1 #define HAVE_SYS_POLL_H 1 -#define HAVE_SYS_QUEUE_H 1 #define HAVE_SYS_SELECT_H 1 #define HAVE_SYS_STATVFS_H 1 #define HAVE_SYS_STAT_H 1 diff --git a/unzip/bsdunzip.c b/unzip/bsdunzip.c index cec1810483..50e08e3995 100644 --- a/unzip/bsdunzip.c +++ b/unzip/bsdunzip.c @@ -36,11 +36,7 @@ #include "bsdunzip_platform.h" -#ifdef HAVE_SYS_QUEUE_H -#include -#else #include "la_queue.h" -#endif #ifdef HAVE_SYS_STAT_H #include #endif From 13afc8cbde82b559f15f7d70c3d55a89f7425d01 Mon Sep 17 00:00:00 2001 From: Duncan Horn <40036384+dunhor@users.noreply.github.com> Date: Mon, 10 Jun 2024 21:23:13 -0700 Subject: [PATCH 86/98] Update archive_entry_link_resolver to copy the "wide" pathname for hardlinks on Windows (#2225) On Windows, if you are using `archive_entry_link_resolver` and give it an entry that links to past entry whose pathname was set using a "wide" string that cannot be represented by the current locale (i.e. WCS -> MBS conversion fails), this code will crash due to a null pointer read. This updates to use the `_w` function instead on Windows. Note: this is a partial cherry-pick from https://github.com/libarchive/libarchive/pull/2095, which I'm going to go through and break into smaller pieces in hopes of getting some things in while discussion of other things can continue. --- libarchive/archive_entry_link_resolver.c | 15 ++++++++ libarchive/test/test_link_resolver.c | 45 ++++++++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/libarchive/archive_entry_link_resolver.c b/libarchive/archive_entry_link_resolver.c index 6c61734302..c2fd6895f2 100644 --- a/libarchive/archive_entry_link_resolver.c +++ b/libarchive/archive_entry_link_resolver.c @@ -201,16 +201,26 @@ archive_entry_linkify(struct archive_entry_linkresolver *res, le = find_entry(res, *e); if (le != NULL) { archive_entry_unset_size(*e); +#if defined(_WIN32) && !defined(__CYGWIN__) + archive_entry_copy_hardlink_w(*e, + archive_entry_pathname_w(le->canonical)); +#else archive_entry_copy_hardlink(*e, archive_entry_pathname(le->canonical)); +#endif } else insert_entry(res, *e); return; case ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE: le = find_entry(res, *e); if (le != NULL) { +#if defined(_WIN32) && !defined(__CYGWIN__) + archive_entry_copy_hardlink_w(*e, + archive_entry_pathname_w(le->canonical)); +#else archive_entry_copy_hardlink(*e, archive_entry_pathname(le->canonical)); +#endif } else insert_entry(res, *e); return; @@ -229,8 +239,13 @@ archive_entry_linkify(struct archive_entry_linkresolver *res, le->entry = t; /* Make the old entry into a hardlink. */ archive_entry_unset_size(*e); +#if defined(_WIN32) && !defined(__CYGWIN__) + archive_entry_copy_hardlink_w(*e, + archive_entry_pathname_w(le->canonical)); +#else archive_entry_copy_hardlink(*e, archive_entry_pathname(le->canonical)); +#endif /* If we ran out of links, return the * final entry as well. */ if (le->links == 0) { diff --git a/libarchive/test/test_link_resolver.c b/libarchive/test/test_link_resolver.c index 5bea9a463b..6c6230c4d0 100644 --- a/libarchive/test/test_link_resolver.c +++ b/libarchive/test/test_link_resolver.c @@ -202,3 +202,48 @@ DEFINE_TEST(test_link_resolver) test_linkify_old_cpio(); test_linkify_new_cpio(); } + +DEFINE_TEST(test_link_resolver_unicode_win) +{ +#if !defined(_WIN32) || defined(__CYGWIN__) + skipping("This test is meant to verify unicode string handling" + " on Windows with UTF-16 names"); + return; +#else + struct archive_entry *entry, *e2; + struct archive_entry_linkresolver *resolver; + + /* Initialize the resolver. */ + assert(NULL != (resolver = archive_entry_linkresolver_new())); + archive_entry_linkresolver_set_strategy(resolver, + ARCHIVE_FORMAT_TAR_USTAR); + + /* Create an entry with a unicode filename and 2 links. */ + assert(NULL != (entry = archive_entry_new())); + archive_entry_copy_pathname_w(entry, L"\u4f60\u597d.txt"); + archive_entry_set_ino(entry, 1); + archive_entry_set_dev(entry, 2); + archive_entry_set_nlink(entry, 2); + archive_entry_set_size(entry, 10); + archive_entry_linkify(resolver, &entry, &e2); + + /* Shouldn't be altered, since it wasn't seen before. */ + assert(e2 == NULL); + assertEqualWString(L"\u4f60\u597d.txt", archive_entry_pathname_w(entry)); + assertEqualWString(NULL, archive_entry_hardlink_w(entry)); + assertEqualInt(10, archive_entry_size(entry)); + + /* Link to the same file contents, but a new unicode name. */ + archive_entry_copy_pathname_w(entry, L"\u518d\u89c1.txt"); + archive_entry_linkify(resolver, &entry, &e2); + + /* Size & link path should have changed. */ + assert(e2 == NULL); + assertEqualWString(L"\u518d\u89c1.txt", archive_entry_pathname_w(entry)); + assertEqualWString(L"\u4f60\u597d.txt", archive_entry_hardlink_w(entry)); + assertEqualInt(0, archive_entry_size(entry)); + + archive_entry_free(entry); + archive_entry_linkresolver_free(resolver); +#endif +} From 98f7bbd6eaa4216a8a98e2d1c168ca244d0a95a6 Mon Sep 17 00:00:00 2001 From: Sevan Janiyan Date: Tue, 11 Jun 2024 05:35:49 +0100 Subject: [PATCH 87/98] configure.ac: autoupdate (#2223) Clear up the autoconf warnings. --- configure.ac | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/configure.ac b/configure.ac index 7250b686fa..75c71d5c42 100644 --- a/configure.ac +++ b/configure.ac @@ -113,8 +113,8 @@ AC_PROG_CC AM_PROG_CC_C_O AC_PROG_CPP AC_USE_SYSTEM_EXTENSIONS -AC_LIBTOOL_WIN32_DLL -AC_PROG_LIBTOOL + +LT_INIT([win32-dll]) AC_CHECK_TOOL([STRIP],[strip]) AC_PROG_MKDIR_P @@ -777,7 +777,6 @@ AX_COMPILE_CHECK_SIZEOF(long) AC_CHECK_HEADERS_ONCE([sys/time.h]) # Checks for library functions. -AC_PROG_GCC_TRADITIONAL AC_HEADER_MAJOR AC_FUNC_FSEEKO AC_FUNC_MEMCMP From ffa43aef31874b878e897dcb56f2791302e96dcb Mon Sep 17 00:00:00 2001 From: Lukas Javorsky Date: Tue, 11 Jun 2024 06:41:25 +0200 Subject: [PATCH 88/98] Use calloc instead of malloc to clear the memory from leftovers (#2207) This ensures that the buffer is properly initialized and does not contain any leftover data from previous operations. It is used later in the `archive_entry_copy_hardlink_l` function call and could be uninitialized. --- libarchive/archive_read_support_format_iso9660.c | 4 ++-- libarchive/archive_read_support_format_xar.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/libarchive/archive_read_support_format_iso9660.c b/libarchive/archive_read_support_format_iso9660.c index db5cdb67f1..25ab11bf59 100644 --- a/libarchive/archive_read_support_format_iso9660.c +++ b/libarchive/archive_read_support_format_iso9660.c @@ -1212,7 +1212,7 @@ archive_read_format_iso9660_read_header(struct archive_read *a, } } if (iso9660->utf16be_previous_path == NULL) { - iso9660->utf16be_previous_path = malloc(UTF16_NAME_MAX); + iso9660->utf16be_previous_path = calloc(1, UTF16_NAME_MAX); if (iso9660->utf16be_previous_path == NULL) { archive_set_error(&a->archive, ENOMEM, "No memory"); @@ -3033,7 +3033,7 @@ heap_add_entry(struct archive_read *a, struct heap_queue *heap, return (ARCHIVE_FATAL); } new_pending_files = (struct file_info **) - malloc(new_size * sizeof(new_pending_files[0])); + calloc(new_size, sizeof(new_pending_files[0])); if (new_pending_files == NULL) { archive_set_error(&a->archive, ENOMEM, "Out of memory"); diff --git a/libarchive/archive_read_support_format_xar.c b/libarchive/archive_read_support_format_xar.c index b9bef05161..dbc31df94e 100644 --- a/libarchive/archive_read_support_format_xar.c +++ b/libarchive/archive_read_support_format_xar.c @@ -1242,7 +1242,7 @@ heap_add_entry(struct archive_read *a, return (ARCHIVE_FATAL); } new_pending_files = (struct xar_file **) - malloc(new_size * sizeof(new_pending_files[0])); + calloc(new_size, sizeof(new_pending_files[0])); if (new_pending_files == NULL) { archive_set_error(&a->archive, ENOMEM, "Out of memory"); From bea3a56118b4b4e5ac652f94685b80d3d1b82611 Mon Sep 17 00:00:00 2001 From: Mrmaxmeier <3913977+Mrmaxmeier@users.noreply.github.com> Date: Wed, 12 Jun 2024 20:57:20 +0200 Subject: [PATCH 89/98] Fuzzing: Expose `DONT_FAIL_ON_CRC_ERROR` as a CMake option and honor it in the rar5 decoder (#2229) Hey, the fuzzing infrastructure over at OSSFuzz builds libarchive with the CMake option `-DDONT_FAIL_ON_CRC_ERROR=1`. https://github.com/google/oss-fuzz/blob/e4643b64b3af4932bff23bb87afdfbac2a301969/projects/libarchive/build.sh#L35 This, unfortunatly, does not do anything since it's never been defined as an option. Building the fuzzers with CRC checks disabled should improve fuzzing efficacy a bunch. Thanks! --- CMakeLists.txt | 5 +++++ libarchive/archive_read_support_format_rar5.c | 2 ++ 2 files changed, 7 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3c86789c8b..7bfaf4cfa7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2173,6 +2173,11 @@ IF(APPLE) ADD_DEFINITIONS(-Wno-deprecated-declarations) ENDIF(APPLE) +OPTION(DONT_FAIL_ON_CRC_ERROR "Ignore CRC errors during parsing (For fuzzing)" OFF) +IF(DONT_FAIL_ON_CRC_ERROR) + ADD_DEFINITIONS(-DDONT_FAIL_ON_CRC_ERROR=1) +ENDIF(DONT_FAIL_ON_CRC_ERROR) + IF(ENABLE_TEST) ADD_CUSTOM_TARGET(run_all_tests) ENDIF(ENABLE_TEST) diff --git a/libarchive/archive_read_support_format_rar5.c b/libarchive/archive_read_support_format_rar5.c index e06effe8b4..bd5a02179f 100644 --- a/libarchive/archive_read_support_format_rar5.c +++ b/libarchive/archive_read_support_format_rar5.c @@ -2229,10 +2229,12 @@ static int process_base_block(struct archive_read* a, /* Verify the CRC32 of the header data. */ computed_crc = (uint32_t) crc32(0, p, (int) hdr_size); if(computed_crc != hdr_crc) { +#ifndef DONT_FAIL_ON_CRC_ERROR archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Header CRC error"); return ARCHIVE_FATAL; +#endif } /* If the checksum is OK, we proceed with parsing. */ From ad26e379dbe8943c99fd7a9b078e2ed4e23ab8e9 Mon Sep 17 00:00:00 2001 From: Duncan Horn <40036384+dunhor@users.noreply.github.com> Date: Wed, 12 Jun 2024 12:00:24 -0700 Subject: [PATCH 90/98] Add unicode test for creating zip files on Windows (#2231) There's no bug fix here - this just adds a test to verify that zip creation when using the _w functions works as expected on Windows. Note: this is a partial cherry-pick from https://github.com/libarchive/libarchive/pull/2095, which I'm going to go through and break into smaller pieces in hopes of getting some things in while discussion of other things can continue. --- libarchive/test/test_zip_filename_encoding.c | 95 ++++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/libarchive/test/test_zip_filename_encoding.c b/libarchive/test/test_zip_filename_encoding.c index 448fb9b1d4..b6786f2c3b 100644 --- a/libarchive/test/test_zip_filename_encoding.c +++ b/libarchive/test/test_zip_filename_encoding.c @@ -527,3 +527,98 @@ DEFINE_TEST(test_zip_filename_encoding_CP932) assertEqualInt(0, buff[7]); assertEqualMem(buff + 30, "abcABC", 6); } + +DEFINE_TEST(test_zip_filename_encoding_UTF16_win) +{ +#if !defined(_WIN32) || defined(__CYGWIN__) + skipping("This test is meant to verify unicode string handling" + " on Windows with UTF-16 names"); + return; +#else + struct archive *a; + struct archive_entry *entry; + char buff[4096]; + size_t used; + + /* + * Don't call setlocale because we're verifying that the '_w' functions + * work as expected when 'hdrcharset' is UTF-8 + */ + + /* Part 1: file */ + a = archive_write_new(); + assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); + if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { + skipping("This system cannot convert character-set" + " from UTF-16 to UTF-8."); + archive_write_free(a); + return; + } + assertEqualInt(ARCHIVE_OK, + archive_write_open_memory(a, buff, sizeof(buff), &used)); + + entry = archive_entry_new2(a); + /* Set the filename using a UTF-16 string */ + archive_entry_copy_pathname_w(entry, L"\u8868.txt"); + archive_entry_set_filetype(entry, AE_IFREG); + archive_entry_set_size(entry, 0); + assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); + archive_entry_free(entry); + assertEqualInt(ARCHIVE_OK, archive_write_free(a)); + + /* A bit 11 of general purpose flag should be 1, + * which indicates the filename charset is UTF-8. */ + assertEqualInt(0x08, buff[7]); + /* Check UTF-8 version. */ + assertEqualMem(buff + 30, "\xE8\xA1\xA8.txt", 7); + + /* Part 2: directory */ + a = archive_write_new(); + assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); + assertEqualInt(ARCHIVE_OK, archive_write_set_options(a, "hdrcharset=UTF-8")); + assertEqualInt(ARCHIVE_OK, + archive_write_open_memory(a, buff, sizeof(buff), &used)); + + entry = archive_entry_new2(a); + /* Set the directory name using a UTF-16 string */ + /* NOTE: Explicitly not adding trailing slash to test that code path */ + archive_entry_copy_pathname_w(entry, L"\u8868"); + archive_entry_set_filetype(entry, AE_IFDIR); + archive_entry_set_size(entry, 0); + assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); + archive_entry_free(entry); + assertEqualInt(ARCHIVE_OK, archive_write_free(a)); + + /* A bit 11 of general purpose flag should be 1, + * which indicates the filename charset is UTF-8. */ + assertEqualInt(0x08, buff[7]); + /* Check UTF-8 version. */ + assertEqualMem(buff+ 30, "\xE8\xA1\xA8/", 4); + + /* Part 3: symlink */ + a = archive_write_new(); + assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); + assertEqualInt(ARCHIVE_OK, archive_write_set_options(a, "hdrcharset=UTF-8")); + assertEqualInt(ARCHIVE_OK, + archive_write_open_memory(a, buff, sizeof(buff), &used)); + + entry = archive_entry_new2(a); + /* Set the symlink target using a UTF-16 string */ + archive_entry_set_pathname(entry, "link.txt"); + archive_entry_copy_symlink_w(entry, L"\u8868.txt"); + archive_entry_set_filetype(entry, AE_IFLNK); + archive_entry_set_symlink_type(entry, AE_SYMLINK_TYPE_FILE); + archive_entry_set_size(entry, 0); + assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); + archive_entry_free(entry); + assertEqualInt(ARCHIVE_OK, archive_write_free(a)); + + /* A bit 11 of general purpose flag should be 0, + * because the file name is ASCII. */ + assertEqualInt(0, buff[7]); + /* Check UTF-8 version. */ + assertEqualMem(buff + 38, "\xE8\xA1\xA8.txt", 7); + + /* NOTE: ZIP does not support hardlinks */ +#endif +} From 6ee1eebefdf41f36ef1a548c9a7000d132c453f3 Mon Sep 17 00:00:00 2001 From: Duncan Horn <40036384+dunhor@users.noreply.github.com> Date: Wed, 12 Jun 2024 12:01:40 -0700 Subject: [PATCH 91/98] Update ustar creation sanity check to use WCS path on Windows (#2230) On Windows, the MBS pathname might be null if the string was set with a WCS that can't be represented by the current locale. This is handled properly by the rest of the code, but there's a sanity check that does not make the proper distinction. Note: this is a partial cherry-pick from https://github.com/libarchive/libarchive/pull/2095, which I'm going to go through and break into smaller pieces in hopes of getting some things in while discussion of other things can continue. --- libarchive/archive_write_set_format_ustar.c | 4 + .../test/test_ustar_filename_encoding.c | 102 ++++++++++++++++++ 2 files changed, 106 insertions(+) diff --git a/libarchive/archive_write_set_format_ustar.c b/libarchive/archive_write_set_format_ustar.c index 673487b27f..d8f0b45846 100644 --- a/libarchive/archive_write_set_format_ustar.c +++ b/libarchive/archive_write_set_format_ustar.c @@ -254,7 +254,11 @@ archive_write_ustar_header(struct archive_write *a, struct archive_entry *entry) sconv = ustar->opt_sconv; /* Sanity check. */ +#if defined(_WIN32) && !defined(__CYGWIN__) + if (archive_entry_pathname_w(entry) == NULL) { +#else if (archive_entry_pathname(entry) == NULL) { +#endif archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Can't record entry in tar file without pathname"); return (ARCHIVE_FAILED); diff --git a/libarchive/test/test_ustar_filename_encoding.c b/libarchive/test/test_ustar_filename_encoding.c index cc62453f1c..1242bd1d3c 100644 --- a/libarchive/test/test_ustar_filename_encoding.c +++ b/libarchive/test/test_ustar_filename_encoding.c @@ -390,3 +390,105 @@ DEFINE_TEST(test_ustar_filename_encoding_CP932_UTF8) assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7); } +DEFINE_TEST(test_ustar_filename_encoding_UTF16_win) +{ +#if !defined(_WIN32) || defined(__CYGWIN__) + skipping("This test is meant to verify unicode string handling" + " on Windows with UTF-16 names"); + return; +#else + struct archive *a; + struct archive_entry *entry; + char buff[4096]; + size_t used; + + /* + * Don't call setlocale because we're verifying that the '_w' functions + * work as expected when 'hdrcharset' is UTF-8 + */ + + /* Part 1: file */ + a = archive_write_new(); + assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a)); + if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { + skipping("This system cannot convert character-set" + " from UTF-16 to UTF-8."); + archive_write_free(a); + return; + } + assertEqualInt(ARCHIVE_OK, + archive_write_open_memory(a, buff, sizeof(buff), &used)); + + entry = archive_entry_new2(a); + /* Set the filename using a UTF-16 string */ + archive_entry_copy_pathname_w(entry, L"\u8868.txt"); + archive_entry_set_filetype(entry, AE_IFREG); + archive_entry_set_size(entry, 0); + assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); + archive_entry_free(entry); + assertEqualInt(ARCHIVE_OK, archive_write_free(a)); + + /* Check UTF-8 version. */ + assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7); + + /* Part 2: directory */ + a = archive_write_new(); + assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a)); + assertEqualInt(ARCHIVE_OK, archive_write_set_options(a, "hdrcharset=UTF-8")); + assertEqualInt(ARCHIVE_OK, + archive_write_open_memory(a, buff, sizeof(buff), &used)); + + entry = archive_entry_new2(a); + /* Set the directory name using a UTF-16 string */ + /* NOTE: Explicitly not adding trailing slash to test that code path */ + archive_entry_copy_pathname_w(entry, L"\u8868"); + archive_entry_set_filetype(entry, AE_IFDIR); + archive_entry_set_size(entry, 0); + assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); + archive_entry_free(entry); + assertEqualInt(ARCHIVE_OK, archive_write_free(a)); + + /* Check UTF-8 version. */ + assertEqualMem(buff, "\xE8\xA1\xA8/", 4); + + /* Part 3: symlink */ + a = archive_write_new(); + assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a)); + assertEqualInt(ARCHIVE_OK, archive_write_set_options(a, "hdrcharset=UTF-8")); + assertEqualInt(ARCHIVE_OK, + archive_write_open_memory(a, buff, sizeof(buff), &used)); + + entry = archive_entry_new2(a); + /* Set the symlink target using a UTF-16 string */ + archive_entry_set_pathname(entry, "link.txt"); + archive_entry_copy_symlink_w(entry, L"\u8868.txt"); + archive_entry_set_filetype(entry, AE_IFLNK); + archive_entry_set_symlink_type(entry, AE_SYMLINK_TYPE_FILE); + archive_entry_set_size(entry, 0); + assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); + archive_entry_free(entry); + assertEqualInt(ARCHIVE_OK, archive_write_free(a)); + + /* Check UTF-8 version. */ + assertEqualMem(buff + 157, "\xE8\xA1\xA8.txt", 7); + + /* Part 4: hardlink */ + a = archive_write_new(); + assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a)); + assertEqualInt(ARCHIVE_OK, archive_write_set_options(a, "hdrcharset=UTF-8")); + assertEqualInt(ARCHIVE_OK, + archive_write_open_memory(a, buff, sizeof(buff), &used)); + + entry = archive_entry_new2(a); + /* Set the symlink target using a UTF-16 string */ + archive_entry_set_pathname(entry, "link.txt"); + archive_entry_copy_hardlink_w(entry, L"\u8868.txt"); + archive_entry_set_size(entry, 0); + assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); + archive_entry_free(entry); + assertEqualInt(ARCHIVE_OK, archive_write_free(a)); + + /* Check UTF-8 version. */ + assertEqualMem(buff + 157, "\xE8\xA1\xA8.txt", 7); +#endif +} From e3c0c8d4800fda6b92bc1c50d3ece82ae688dae5 Mon Sep 17 00:00:00 2001 From: "Dustin L. Howett" Date: Fri, 14 Jun 2024 19:13:42 -0500 Subject: [PATCH 92/98] xar: guard against file entries containing multiple name elements (#2236) It appears that there are xar archives (in the form of Apple .pkg files) that contain TOCs with duplicated name elements: ```xml ... file PackageInfo PackageInfo PackageInfo ``` When libarchive encounters one such file, it will produce an archive_entry named PackageInfoPackageInfoPackageInfo. To produce a test archive, the XAR writer was modified to emit two name elements. --- Makefile.am | 1 + libarchive/archive_read_support_format_xar.c | 3 ++ libarchive/test/test_read_format_xar.c | 31 +++++++++++++++++++ ..._format_xar_duplicate_filename_node.xar.uu | 14 +++++++++ 4 files changed, 49 insertions(+) create mode 100644 libarchive/test/test_read_format_xar_duplicate_filename_node.xar.uu diff --git a/Makefile.am b/Makefile.am index eb88654fce..372ade1bda 100644 --- a/Makefile.am +++ b/Makefile.am @@ -937,6 +937,7 @@ libarchive_test_EXTRA_DIST=\ libarchive/test/test_read_format_ustar_filename_koi8r.tar.Z.uu \ libarchive/test/test_read_format_warc.warc.uu \ libarchive/test/test_read_format_xar_doublelink.xar.uu \ + libarchive/test/test_read_format_xar_duplicate_filename_node.xar.uu \ libarchive/test/test_read_format_zip.zip.uu \ libarchive/test/test_read_format_zip_7075_utf8_paths.zip.uu \ libarchive/test/test_read_format_zip_7z_deflate.zip.uu \ diff --git a/libarchive/archive_read_support_format_xar.c b/libarchive/archive_read_support_format_xar.c index dbc31df94e..7c327e02f9 100644 --- a/libarchive/archive_read_support_format_xar.c +++ b/libarchive/archive_read_support_format_xar.c @@ -2707,6 +2707,9 @@ xml_data(void *userData, const char *s, size_t len) switch (xar->xmlsts) { case FILE_NAME: + if (xar->file->has & HAS_PATHNAME) + break; + if (xar->file->parent != NULL) { archive_string_concat(&(xar->file->pathname), &(xar->file->parent->pathname)); diff --git a/libarchive/test/test_read_format_xar.c b/libarchive/test/test_read_format_xar.c index daff292169..41cbd7dab9 100644 --- a/libarchive/test/test_read_format_xar.c +++ b/libarchive/test/test_read_format_xar.c @@ -860,3 +860,34 @@ DEFINE_TEST(test_read_format_xar) verify(archive12, sizeof(archive12), verify12, NULL, GZIP); verifyB(archive13, sizeof(archive13)); } + +DEFINE_TEST(test_read_format_xar_duplicate_filename_node) +{ + static const char *reffiles[] = + { + "test_read_format_xar_duplicate_filename_node.xar", + NULL + }; + struct archive_entry *ae; + struct archive *a; + int r; + + extract_reference_files(reffiles); + assert((a = archive_read_new()) != NULL); + assertA(0 == archive_read_support_filter_all(a)); + + r = archive_read_support_format_xar(a); + if (r == ARCHIVE_WARN) { + skipping("xar reading not fully supported on this platform"); + assertEqualInt(ARCHIVE_OK, archive_read_free(a)); + return; + } + + assertA(0 == archive_read_open_filenames(a, reffiles, 10240)); + + assertA(0 == archive_read_next_header(a, &ae)); + assertEqualString("File", archive_entry_pathname(ae)); + + assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a)); + assertEqualInt(ARCHIVE_OK, archive_read_free(a)); +} diff --git a/libarchive/test/test_read_format_xar_duplicate_filename_node.xar.uu b/libarchive/test/test_read_format_xar_duplicate_filename_node.xar.uu new file mode 100644 index 0000000000..18cbcf5898 --- /dev/null +++ b/libarchive/test/test_read_format_xar_duplicate_filename_node.xar.uu @@ -0,0 +1,14 @@ +begin 644 test_read_format_xar_duplicate_filename_node.xar +M>&%R(0`<``$````````!EP````````.]`````7BVIC +MC('((;N7JG3.JNUT4V]=_P6[ +MCJI%(W5=;-VWU[T7N[OT^8E?H+6/XW#3B`%8*%H%QMIY1ENI=JZW0%\=R0MW>1YITR*.1K1S'3ZUL?A:`"C +M-S2YFP2Y+I6CI:UFX;0&&W]O&8X&^#]AKD>5Y@,QP)G0=2-M&E'"DIBC^V\F +MI3IKH>HF]0-&68!]PM$LF[6JW@8S9CM4K][$>J%9K57B@"FM5"0;FG_V7JB[,(?5MHB-8#&+(0L`DQED2!@!)1(,PS^.$,I&)+"-1 +M&`*)./KIZ:N]:-%?COHMO%\?NI\?1_T]?@(L`P/8\;44V2:R;"\D48GC?CRL +C(UAG)C5XG./E`@``)@`8>)R%4TEN@S`4W5?J'1![:F.,@<@` +` +end From d12cb9ab217a0b30c7ddc33d7ebad44fa22f7d5e Mon Sep 17 00:00:00 2001 From: alice Date: Sat, 15 Jun 2024 02:26:14 +0200 Subject: [PATCH 93/98] rar: fix UB negation overflow for INT32_MIN address (#2235) certain rar files seem to have the lowest possible address here, so flip the argument order to correctly evaluate this instead of invoking UB (caught via sanitize=undefined) --- the backtrace looks something like: ``` * frame #0: 0x00007a1e3898727b libarchive.so.13`execute_filter [inlined] execute_filter_e8(filter=, vm=, pos=, e9also=) at archive_read_support_format_rar.c:3640:47 frame #1: 0x00007a1e3898727b libarchive.so.13`execute_filter(a=, filter=0x00007a1e39e2f090, vm=0x00007a1e31b1efd0, pos=) at archive_read_support_format_rar.c:0 frame #2: 0x00007a1e38983ac3 libarchive.so.13`read_data_compressed [inlined] run_filters(a=0x00007a1e34209700) at archive_read_support_format_rar.c:3395:8 frame #3: 0x00007a1e38983a9e libarchive.so.13`read_data_compressed(a=0x00007a1e34209700, buff=0x00007a1e31a01fd8, size=0x00007a1e31a01fd0, offset=0x00007a1e31a01fc0, looper=1) at archive_read_support_format_rar.c:2083:12 frame #4: 0x00007a1e38981b10 libarchive.so.13`archive_read_format_rar_read_data(a=0x00007a1e34209700, buff=0x00007a1e31a01fd8, size=0x00007a1e31a01fd0, offset=0x00007a1e31a01fc0) at archive_read_support_format_rar.c:1130:11 frame #5: 0x00006158bc5d30d3 file-roller`extract_archive_thread(result=0x00007a1e3711e2b0, object=, cancellable=0x00007a1e3870bf20) at fr-archive-libarchive.c:999:17 frame #6: 0x00007a1e39928d6d libgio-2.0.so.0`run_in_thread(job=, c=, _data=0x00007a1e326e9740) at gsimpleasyncresult.c:899:5 frame #7: 0x00007a1e3990614e libgio-2.0.so.0`io_job_thread(task=, source_object=, task_data=0x00007a1e2307fc20, cancellable=) at gioscheduler.c:75:16 frame #8: 0x00007a1e399433bf libgio-2.0.so.0`g_task_thread_pool_thread(thread_data=0x00007a1e35c18ab0, pool_data=) at gtask.c:1583:3 frame #9: 0x00007a1e39db77e8 libglib-2.0.so.0`g_thread_pool_thread_proxy(data=) at gthreadpool.c:336:15 frame #10: 0x00007a1e39db5bfb libglib-2.0.so.0`g_thread_proxy(data=0x00007a1e378147d0) at gthread.c:835:20 frame #11: 0x00007a1e3a0b5c7b ld-musl-x86_64.so.1`start(p=0x00007a1e31a02170) at pthread_create.c:208:17 frame #12: 0x00007a1e3a0b8a8b ld-musl-x86_64.so.1`__clone + 47 ``` note the 0xd which is 14 which is NegateOverflow in ubsan: ``` (lldb) x/1i $pc -> 0x7a1e3898727b: 67 0f b9 40 0d other ud1l 0xd(%eax), %eax ``` for reference, the totally legal rar file is https://img.ayaya.dev/05WYGFOcRPN9 , and this seems to only crash when extracted via file-roller (or inside nautilus) --- libarchive/archive_read_support_format_rar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libarchive/archive_read_support_format_rar.c b/libarchive/archive_read_support_format_rar.c index 157836b421..93d738a5ca 100644 --- a/libarchive/archive_read_support_format_rar.c +++ b/libarchive/archive_read_support_format_rar.c @@ -3681,7 +3681,7 @@ execute_filter_e8(struct rar_filter *filter, struct rar_virtual_machine *vm, siz { uint32_t currpos = (uint32_t)pos + i + 1; int32_t address = (int32_t)vm_read_32(vm, i + 1); - if (address < 0 && currpos >= (uint32_t)-address) + if (address < 0 && currpos >= -(uint32_t)address) vm_write_32(vm, i + 1, address + filesize); else if (address >= 0 && (uint32_t)address < filesize) vm_write_32(vm, i + 1, address - currpos); From 54c22ee9e1cbeb9259619fb507adb86998d98321 Mon Sep 17 00:00:00 2001 From: Duncan Horn <40036384+dunhor@users.noreply.github.com> Date: Sat, 15 Jun 2024 22:20:00 -0700 Subject: [PATCH 94/98] Fix a couple issues with creating PAX archives (#2228) Note: this is a partial cherry-pick from https://github.com/libarchive/libarchive/pull/2095, which I'm going to go through and break into smaller pieces in hopes of getting some things in while discussion of other things can continue. There's basically two fixes here: The first is to check for the presence of the WCS pathname on Windows before failing since the conversion from WCS -> MBS might fail. Later execution already handles such paths correctly. The second is to set the converted link name on the target entry where relevant. Note that there has been prior discussion on this here: https://github.com/libarchive/libarchive/pull/2095/files#r1531599325 --- libarchive/archive_write_set_format_pax.c | 16 ++++ libarchive/test/test_pax_filename_encoding.c | 96 ++++++++++++++++++++ 2 files changed, 112 insertions(+) diff --git a/libarchive/archive_write_set_format_pax.c b/libarchive/archive_write_set_format_pax.c index e93333074a..4aace46825 100644 --- a/libarchive/archive_write_set_format_pax.c +++ b/libarchive/archive_write_set_format_pax.c @@ -608,7 +608,15 @@ archive_write_pax_header(struct archive_write *a, const time_t ustar_max_mtime = get_ustar_max_mtime(); /* Sanity check. */ +#if defined(_WIN32) && !defined(__CYGWIN__) + /* NOTE: If the caller supplied a pathname that fails WCS conversion (e.g. + * if it is invalid UTF-8), we are expected to return ARCHIVE_WARN later on + * in execution, hence the check for both pointers */ + if ((archive_entry_pathname_w(entry_original) == NULL) && + (archive_entry_pathname(entry_original) == NULL)) { +#else if (archive_entry_pathname(entry_original) == NULL) { +#endif archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Can't record entry in tar file without pathname"); return (ARCHIVE_FAILED); @@ -1032,6 +1040,14 @@ archive_write_pax_header(struct archive_write *a, archive_entry_set_symlink(entry_main, "././@LongSymLink"); } + else { + /* Otherwise, has non-ASCII characters; update the paths to + * however they got decoded above */ + if (hardlink != NULL) + archive_entry_set_hardlink(entry_main, linkpath); + else + archive_entry_set_symlink(entry_main, linkpath); + } need_extension = 1; } } diff --git a/libarchive/test/test_pax_filename_encoding.c b/libarchive/test/test_pax_filename_encoding.c index 737641c5ab..3165b65dd3 100644 --- a/libarchive/test/test_pax_filename_encoding.c +++ b/libarchive/test/test_pax_filename_encoding.c @@ -579,6 +579,102 @@ DEFINE_TEST(test_pax_filename_encoding_KOI8R_CP1251) assertEqualInt(ARCHIVE_OK, archive_write_free(a)); } +/* + * Verify that unicode filenames are correctly preserved on Windows + */ +DEFINE_TEST(test_pax_filename_encoding_UTF16_win) +{ +#if !defined(_WIN32) || defined(__CYGWIN__) + skipping("This test is meant to verify unicode string handling" + " on Windows with UTF-16 names"); + return; +#else + struct archive *a; + struct archive_entry *entry; + char buff[0x2000]; + size_t used; + + /* + * Don't call setlocale because we're verifying that the '_w' functions + * work as expected when 'hdrcharset' is UTF-8 + */ + + /* Check if the platform completely supports the string conversion. */ + a = archive_write_new(); + assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a)); + if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { + skipping("This system cannot convert character-set" + " from UTF-16 to UTF-8."); + archive_write_free(a); + return; + } + + /* Re-create a write archive object since filenames should be written + * in UTF-8 by default. */ + archive_write_free(a); + + a = archive_write_new(); + assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a)); + assertEqualInt(ARCHIVE_OK, + archive_write_open_memory(a, buff, sizeof(buff), &used)); + + /* Part 1: file */ + entry = archive_entry_new2(a); + archive_entry_copy_pathname_w(entry, L"\u4f60\u597d.txt"); + archive_entry_set_filetype(entry, AE_IFREG); + archive_entry_set_size(entry, 0); + assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); + + /* Part 2: directory */ + /* NOTE: Explicitly not adding trailing slash to test that code path */ + archive_entry_copy_pathname_w(entry, L"\u043f\u0440\u0438"); + archive_entry_set_filetype(entry, AE_IFDIR); + archive_entry_set_size(entry, 0); + assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); + + /* Part 3: symlink */ + archive_entry_copy_pathname_w(entry, L"\u518d\u89c1.txt"); + archive_entry_copy_symlink_w(entry, L"\u4f60\u597d.txt"); + archive_entry_set_filetype(entry, AE_IFLNK); + archive_entry_set_symlink_type(entry, AE_SYMLINK_TYPE_FILE); + archive_entry_set_size(entry, 0); + assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); + + /* Part 4: hardlink */ + archive_entry_copy_pathname_w(entry, L"\u665a\u5b89.txt"); + archive_entry_copy_hardlink_w(entry, L"\u4f60\u597d.txt"); + archive_entry_set_filetype(entry, AE_IFREG); + archive_entry_set_size(entry, 0); + assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); + + archive_entry_free(entry); + assertEqualInt(ARCHIVE_OK, archive_write_free(a)); + + /* Ensure that the names round trip properly */ + a = archive_read_new(); + archive_read_support_format_all(a); + archive_read_support_filter_all(a); + assertEqualInt(0, archive_read_open_memory(a, buff, used)); + + /* Read part 1: file */ + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &entry)); + assertEqualWString(L"\u4f60\u597d.txt", archive_entry_pathname_w(entry)); + + /* Read part 2: directory */ + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &entry)); + assertEqualWString(L"\u043f\u0440\u0438/", archive_entry_pathname_w(entry)); + + /* Read part 3: symlink */ + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &entry)); + assertEqualWString(L"\u518d\u89c1.txt", archive_entry_pathname_w(entry)); + assertEqualWString(L"\u4f60\u597d.txt", archive_entry_symlink_w(entry)); + + /* Read part 4: hardlink */ + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &entry)); + assertEqualWString(L"\u665a\u5b89.txt", archive_entry_pathname_w(entry)); + assertEqualWString(L"\u4f60\u597d.txt", archive_entry_hardlink_w(entry)); +#endif +} DEFINE_TEST(test_pax_filename_encoding) { From 2d8a5760c5ec553283a95a1aaca746f6eb472d0f Mon Sep 17 00:00:00 2001 From: Tim Kientzle Date: Sat, 15 Jun 2024 22:22:12 -0700 Subject: [PATCH 95/98] Parse tar headers incrementally (#2127) This rebuilds the tar reader to parse all header data incrementally as it appears in the stream. This definitively fixes a longstanding issue with unsupported pax attributes. Libarchive must limit the amount of data that it reads into memory, and this has caused problems with large unknown attributes. By scanning iteratively, we can instead identify an attribute by name and then decide whether to read it into memory or whether to skip it without reading. This design also allows us to vary our sanity limits for different pax attributes (e.g., an attribute that is a single number can be limited to a few dozen bytes while an attribute holding an ACL is allowed to be a few hundred kilobytes). This allows us to be a little more resistant to malicious archives that might try to force allocation of very large amounts of memory, though there is still work to be done here. This includes a number of changes to archive_entry processing to allow us to consistently keep the _first_ appearance of any given value instead of the original architecture that recursively cached data in memory in order to effectively process all the data from back-to-front. Resolves #1855 Resolves #1939 --- Makefile.am | 2 + libarchive/archive_acl.c | 51 +- libarchive/archive_acl_private.h | 2 + libarchive/archive_entry.c | 275 ++- libarchive/archive_entry.h | 5 + libarchive/archive_entry_private.h | 4 +- libarchive/archive_read_support_format_tar.c | 2091 +++++++++++------ libarchive/test/CMakeLists.txt | 1 + .../test/test_compat_solaris_pax_sparse.c | 2 +- libarchive/test/test_entry.c | 14 +- libarchive/test/test_read_format_tar.c | 2 +- .../test_read_format_tar_pax_large_attr.c | 65 + ...st_read_format_tar_pax_large_attr.tar.Z.uu | 149 ++ 13 files changed, 1791 insertions(+), 872 deletions(-) create mode 100644 libarchive/test/test_read_format_tar_pax_large_attr.c create mode 100644 libarchive/test/test_read_format_tar_pax_large_attr.tar.Z.uu diff --git a/Makefile.am b/Makefile.am index 372ade1bda..1661d9c1a5 100644 --- a/Makefile.am +++ b/Makefile.am @@ -522,6 +522,7 @@ libarchive_test_SOURCES= \ libarchive/test/test_read_format_tar_empty_with_gnulabel.c \ libarchive/test/test_read_format_tar_filename.c \ libarchive/test/test_read_format_tar_invalid_pax_size.c \ + libarchive/test/test_read_format_tar_pax_large_attr.c \ libarchive/test/test_read_format_tbz.c \ libarchive/test/test_read_format_tgz.c \ libarchive/test/test_read_format_tlz.c \ @@ -932,6 +933,7 @@ libarchive_test_EXTRA_DIST=\ libarchive/test/test_read_format_tar_empty_pax.tar.Z.uu \ libarchive/test/test_read_format_tar_filename_koi8r.tar.Z.uu \ libarchive/test/test_read_format_tar_invalid_pax_size.tar.uu \ + libarchive/test/test_read_format_tar_pax_large_attr.tar.Z.uu \ libarchive/test/test_read_format_ustar_filename_cp866.tar.Z.uu \ libarchive/test/test_read_format_ustar_filename_eucjp.tar.Z.uu \ libarchive/test/test_read_format_ustar_filename_koi8r.tar.Z.uu \ diff --git a/libarchive/archive_acl.c b/libarchive/archive_acl.c index 254324c713..77d99480f9 100644 --- a/libarchive/archive_acl.c +++ b/libarchive/archive_acl.c @@ -76,7 +76,7 @@ static int is_nfs4_flags(const char *start, const char *end, int *result); static int is_nfs4_perms(const char *start, const char *end, int *result); -static void next_field(const char **p, const char **start, +static void next_field(const char **p, size_t *l, const char **start, const char **end, char *sep); static void append_entry(char **p, const char *prefix, int type, int tag, int flags, const char *name, int perm, int id); @@ -1619,6 +1619,13 @@ next_field_w(const wchar_t **wp, const wchar_t **start, int archive_acl_from_text_l(struct archive_acl *acl, const char *text, int want_type, struct archive_string_conv *sc) +{ + return archive_acl_from_text_nl(acl, text, strlen(text), want_type, sc); +} + +int +archive_acl_from_text_nl(struct archive_acl *acl, const char *text, + size_t length, int want_type, struct archive_string_conv *sc) { struct { const char *start; @@ -1649,7 +1656,7 @@ archive_acl_from_text_l(struct archive_acl *acl, const char *text, ret = ARCHIVE_OK; types = 0; - while (text != NULL && *text != '\0') { + while (text != NULL && length > 0 && *text != '\0') { /* * Parse the fields out of the next entry, * advance 'text' to start of next entry. @@ -1657,7 +1664,7 @@ archive_acl_from_text_l(struct archive_acl *acl, const char *text, fields = 0; do { const char *start, *end; - next_field(&text, &start, &end, &sep); + next_field(&text, &length, &start, &end, &sep); if (fields < numfields) { field[fields].start = start; field[fields].end = end; @@ -2047,7 +2054,7 @@ is_nfs4_flags(const char *start, const char *end, int *permset) } /* - * Match "[:whitespace:]*(.*)[:whitespace:]*[:,\n]". *wp is updated + * Match "[:whitespace:]*(.*)[:whitespace:]*[:,\n]". *p is updated * to point to just after the separator. *start points to the first * character of the matched text and *end just after the last * character of the matched identifier. In particular *end - *start @@ -2055,42 +2062,42 @@ is_nfs4_flags(const char *start, const char *end, int *permset) * whitespace. */ static void -next_field(const char **p, const char **start, +next_field(const char **p, size_t *l, const char **start, const char **end, char *sep) { /* Skip leading whitespace to find start of field. */ - while (**p == ' ' || **p == '\t' || **p == '\n') { + while (*l > 0 && (**p == ' ' || **p == '\t' || **p == '\n')) { (*p)++; + (*l)--; } *start = *p; - /* Scan for the separator. */ - while (**p != '\0' && **p != ',' && **p != ':' && **p != '\n' && - **p != '#') { + /* Locate end of field, trim trailing whitespace if necessary */ + while (*l > 0 && **p != ' ' && **p != '\t' && **p != '\n' && **p != ',' && **p != ':' && **p != '#') { (*p)++; + (*l)--; } - *sep = **p; + *end = *p; - /* Locate end of field, trim trailing whitespace if necessary */ - if (*p == *start) { - *end = *p; - } else { - *end = *p - 1; - while (**end == ' ' || **end == '\t' || **end == '\n') { - (*end)--; - } - (*end)++; + /* Scan for the separator. */ + while (*l > 0 && **p != ',' && **p != ':' && **p != '\n' && **p != '#') { + (*p)++; + (*l)--; } + *sep = **p; /* Handle in-field comments */ if (*sep == '#') { - while (**p != '\0' && **p != ',' && **p != '\n') { + while (*l > 0 && **p != ',' && **p != '\n') { (*p)++; + (*l)--; } *sep = **p; } - /* Adjust scanner location. */ - if (**p != '\0') + /* Skip separator. */ + if (*l > 0) { (*p)++; + (*l)--; + } } diff --git a/libarchive/archive_acl_private.h b/libarchive/archive_acl_private.h index 750b4dd7d8..2c9b505343 100644 --- a/libarchive/archive_acl_private.h +++ b/libarchive/archive_acl_private.h @@ -77,5 +77,7 @@ int archive_acl_from_text_w(struct archive_acl *, const wchar_t * /* wtext */, int /* type */); int archive_acl_from_text_l(struct archive_acl *, const char * /* text */, int /* type */, struct archive_string_conv *); +int archive_acl_from_text_nl(struct archive_acl *, const char * /* text */, + size_t /* size of text */, int /* type */, struct archive_string_conv *); #endif /* ARCHIVE_ENTRY_PRIVATE_H_INCLUDED */ diff --git a/libarchive/archive_entry.c b/libarchive/archive_entry.c index 9463233e6a..178f7f6283 100644 --- a/libarchive/archive_entry.c +++ b/libarchive/archive_entry.c @@ -118,7 +118,7 @@ static char * ae_fflagstostr(unsigned long bitset, unsigned long bitclear); static const wchar_t *ae_wcstofflags(const wchar_t *stringp, unsigned long *setp, unsigned long *clrp); -static const char *ae_strtofflags(const char *stringp, +static const char *ae_strtofflags(const char *stringp, size_t length, unsigned long *setp, unsigned long *clrp); #ifndef HAVE_WCSCPY @@ -157,10 +157,9 @@ archive_entry_clear(struct archive_entry *entry) return (NULL); archive_mstring_clean(&entry->ae_fflags_text); archive_mstring_clean(&entry->ae_gname); - archive_mstring_clean(&entry->ae_hardlink); + archive_mstring_clean(&entry->ae_linkname); archive_mstring_clean(&entry->ae_pathname); archive_mstring_clean(&entry->ae_sourcepath); - archive_mstring_clean(&entry->ae_symlink); archive_mstring_clean(&entry->ae_uname); archive_entry_copy_mac_metadata(entry, NULL, 0); archive_acl_clear(&entry->acl); @@ -195,10 +194,9 @@ archive_entry_clone(struct archive_entry *entry) * character sets are different? XXX */ archive_mstring_copy(&entry2->ae_fflags_text, &entry->ae_fflags_text); archive_mstring_copy(&entry2->ae_gname, &entry->ae_gname); - archive_mstring_copy(&entry2->ae_hardlink, &entry->ae_hardlink); + archive_mstring_copy(&entry2->ae_linkname, &entry->ae_linkname); archive_mstring_copy(&entry2->ae_pathname, &entry->ae_pathname); archive_mstring_copy(&entry2->ae_sourcepath, &entry->ae_sourcepath); - archive_mstring_copy(&entry2->ae_symlink, &entry->ae_symlink); entry2->ae_set = entry->ae_set; archive_mstring_copy(&entry2->ae_uname, &entry->ae_uname); @@ -477,6 +475,15 @@ _archive_entry_gname_l(struct archive_entry *entry, return (archive_mstring_get_mbs_l(entry->archive, &entry->ae_gname, p, len, sc)); } +void +archive_entry_set_link_to_hardlink(struct archive_entry *entry) +{ + if ((entry->ae_set & AE_SET_SYMLINK) != 0) { + entry->ae_set &= ~AE_SET_SYMLINK; + } + entry->ae_set |= AE_SET_HARDLINK; +} + const char * archive_entry_hardlink(struct archive_entry *entry) { @@ -484,7 +491,7 @@ archive_entry_hardlink(struct archive_entry *entry) if ((entry->ae_set & AE_SET_HARDLINK) == 0) return (NULL); if (archive_mstring_get_mbs( - entry->archive, &entry->ae_hardlink, &p) == 0) + entry->archive, &entry->ae_linkname, &p) == 0) return (p); if (errno == ENOMEM) __archive_errx(1, "No memory"); @@ -498,7 +505,7 @@ archive_entry_hardlink_utf8(struct archive_entry *entry) if ((entry->ae_set & AE_SET_HARDLINK) == 0) return (NULL); if (archive_mstring_get_utf8( - entry->archive, &entry->ae_hardlink, &p) == 0) + entry->archive, &entry->ae_linkname, &p) == 0) return (p); if (errno == ENOMEM) __archive_errx(1, "No memory"); @@ -512,7 +519,7 @@ archive_entry_hardlink_w(struct archive_entry *entry) if ((entry->ae_set & AE_SET_HARDLINK) == 0) return (NULL); if (archive_mstring_get_wcs( - entry->archive, &entry->ae_hardlink, &p) == 0) + entry->archive, &entry->ae_linkname, &p) == 0) return (p); if (errno == ENOMEM) __archive_errx(1, "No memory"); @@ -528,7 +535,7 @@ _archive_entry_hardlink_l(struct archive_entry *entry, *len = 0; return (0); } - return (archive_mstring_get_mbs_l(entry->archive, &entry->ae_hardlink, p, len, sc)); + return (archive_mstring_get_mbs_l(entry->archive, &entry->ae_linkname, p, len, sc)); } la_int64_t @@ -648,32 +655,50 @@ archive_entry_perm_is_set(struct archive_entry *entry) return (entry->ae_set & AE_SET_PERM); } +int +archive_entry_rdev_is_set(struct archive_entry *entry) +{ + return (entry->ae_set & AE_SET_RDEV); +} + dev_t archive_entry_rdev(struct archive_entry *entry) { - if (entry->ae_stat.aest_rdev_is_broken_down) - return ae_makedev(entry->ae_stat.aest_rdevmajor, - entry->ae_stat.aest_rdevminor); - else - return (entry->ae_stat.aest_rdev); + if (archive_entry_rdev_is_set(entry)) { + if (entry->ae_stat.aest_rdev_is_broken_down) + return ae_makedev(entry->ae_stat.aest_rdevmajor, + entry->ae_stat.aest_rdevminor); + else + return (entry->ae_stat.aest_rdev); + } else { + return 0; + } } dev_t archive_entry_rdevmajor(struct archive_entry *entry) { - if (entry->ae_stat.aest_rdev_is_broken_down) - return (entry->ae_stat.aest_rdevmajor); - else - return major(entry->ae_stat.aest_rdev); + if (archive_entry_rdev_is_set(entry)) { + if (entry->ae_stat.aest_rdev_is_broken_down) + return (entry->ae_stat.aest_rdevmajor); + else + return major(entry->ae_stat.aest_rdev); + } else { + return 0; + } } dev_t archive_entry_rdevminor(struct archive_entry *entry) { - if (entry->ae_stat.aest_rdev_is_broken_down) - return (entry->ae_stat.aest_rdevminor); - else - return minor(entry->ae_stat.aest_rdev); + if (archive_entry_rdev_is_set(entry)) { + if (entry->ae_stat.aest_rdev_is_broken_down) + return (entry->ae_stat.aest_rdevminor); + else + return minor(entry->ae_stat.aest_rdev); + } else { + return 0; + } } la_int64_t @@ -717,13 +742,22 @@ archive_entry_symlink(struct archive_entry *entry) if ((entry->ae_set & AE_SET_SYMLINK) == 0) return (NULL); if (archive_mstring_get_mbs( - entry->archive, &entry->ae_symlink, &p) == 0) + entry->archive, &entry->ae_linkname, &p) == 0) return (p); if (errno == ENOMEM) __archive_errx(1, "No memory"); return (NULL); } +void +archive_entry_set_link_to_symlink(struct archive_entry *entry) +{ + if ((entry->ae_set & AE_SET_HARDLINK) != 0) { + entry->ae_set &= ~AE_SET_HARDLINK; + } + entry->ae_set |= AE_SET_SYMLINK; +} + int archive_entry_symlink_type(struct archive_entry *entry) { @@ -737,7 +771,7 @@ archive_entry_symlink_utf8(struct archive_entry *entry) if ((entry->ae_set & AE_SET_SYMLINK) == 0) return (NULL); if (archive_mstring_get_utf8( - entry->archive, &entry->ae_symlink, &p) == 0) + entry->archive, &entry->ae_linkname, &p) == 0) return (p); if (errno == ENOMEM) __archive_errx(1, "No memory"); @@ -751,7 +785,7 @@ archive_entry_symlink_w(struct archive_entry *entry) if ((entry->ae_set & AE_SET_SYMLINK) == 0) return (NULL); if (archive_mstring_get_wcs( - entry->archive, &entry->ae_symlink, &p) == 0) + entry->archive, &entry->ae_linkname, &p) == 0) return (p); if (errno == ENOMEM) __archive_errx(1, "No memory"); @@ -767,7 +801,7 @@ _archive_entry_symlink_l(struct archive_entry *entry, *len = 0; return (0); } - return (archive_mstring_get_mbs_l(entry->archive, &entry->ae_symlink, p, len, sc)); + return (archive_mstring_get_mbs_l(entry->archive, &entry->ae_linkname, p, len, sc)); } la_int64_t @@ -864,10 +898,17 @@ archive_entry_set_fflags(struct archive_entry *entry, const char * archive_entry_copy_fflags_text(struct archive_entry *entry, - const char *flags) + const char *flags) +{ + return archive_entry_copy_fflags_text_len(entry, flags, strlen(flags)); +} + +const char * +archive_entry_copy_fflags_text_len(struct archive_entry *entry, + const char *flags, size_t flags_length) { - archive_mstring_copy_mbs(&entry->ae_fflags_text, flags); - return (ae_strtofflags(flags, + archive_mstring_copy_mbs_len(&entry->ae_fflags_text, flags, flags_length); + return (ae_strtofflags(flags, flags_length, &entry->ae_fflags_set, &entry->ae_fflags_clear)); } @@ -949,17 +990,24 @@ archive_entry_set_ino64(struct archive_entry *entry, la_int64_t ino) void archive_entry_set_hardlink(struct archive_entry *entry, const char *target) { - archive_mstring_copy_mbs(&entry->ae_hardlink, target); - if (target != NULL) - entry->ae_set |= AE_SET_HARDLINK; - else + if (target == NULL) { entry->ae_set &= ~AE_SET_HARDLINK; + if (entry->ae_set & AE_SET_SYMLINK) { + return; + } + } else { + entry->ae_set |= AE_SET_HARDLINK; + } + entry->ae_set &= ~AE_SET_SYMLINK; + archive_mstring_copy_mbs(&entry->ae_linkname, target); } void archive_entry_set_hardlink_utf8(struct archive_entry *entry, const char *target) { - archive_mstring_copy_utf8(&entry->ae_hardlink, target); + if (target == NULL && (entry->ae_set & AE_SET_SYMLINK)) + return; + archive_mstring_copy_utf8(&entry->ae_linkname, target); if (target != NULL) entry->ae_set |= AE_SET_HARDLINK; else @@ -969,7 +1017,9 @@ archive_entry_set_hardlink_utf8(struct archive_entry *entry, const char *target) void archive_entry_copy_hardlink(struct archive_entry *entry, const char *target) { - archive_mstring_copy_mbs(&entry->ae_hardlink, target); + if (target == NULL && (entry->ae_set & AE_SET_SYMLINK)) + return; + archive_mstring_copy_mbs(&entry->ae_linkname, target); if (target != NULL) entry->ae_set |= AE_SET_HARDLINK; else @@ -979,7 +1029,9 @@ archive_entry_copy_hardlink(struct archive_entry *entry, const char *target) void archive_entry_copy_hardlink_w(struct archive_entry *entry, const wchar_t *target) { - archive_mstring_copy_wcs(&entry->ae_hardlink, target); + if (target == NULL && (entry->ae_set & AE_SET_SYMLINK)) + return; + archive_mstring_copy_wcs(&entry->ae_linkname, target); if (target != NULL) entry->ae_set |= AE_SET_HARDLINK; else @@ -989,12 +1041,14 @@ archive_entry_copy_hardlink_w(struct archive_entry *entry, const wchar_t *target int archive_entry_update_hardlink_utf8(struct archive_entry *entry, const char *target) { + if (target == NULL && (entry->ae_set & AE_SET_SYMLINK)) + return (0); if (target != NULL) entry->ae_set |= AE_SET_HARDLINK; else entry->ae_set &= ~AE_SET_HARDLINK; if (archive_mstring_update_utf8(entry->archive, - &entry->ae_hardlink, target) == 0) + &entry->ae_linkname, target) == 0) return (1); if (errno == ENOMEM) __archive_errx(1, "No memory"); @@ -1007,7 +1061,9 @@ _archive_entry_copy_hardlink_l(struct archive_entry *entry, { int r; - r = archive_mstring_copy_mbs_len_l(&entry->ae_hardlink, + if (target == NULL && (entry->ae_set & AE_SET_SYMLINK)) + return (0); + r = archive_mstring_copy_mbs_len_l(&entry->ae_linkname, target, len, sc); if (target != NULL && r == 0) entry->ae_set |= AE_SET_HARDLINK; @@ -1098,51 +1154,50 @@ archive_entry_set_devminor(struct archive_entry *entry, dev_t m) void archive_entry_set_link(struct archive_entry *entry, const char *target) { - if (entry->ae_set & AE_SET_SYMLINK) - archive_mstring_copy_mbs(&entry->ae_symlink, target); - else - archive_mstring_copy_mbs(&entry->ae_hardlink, target); + archive_mstring_copy_mbs(&entry->ae_linkname, target); + if ((entry->ae_set & AE_SET_SYMLINK) == 0) { + entry->ae_set |= AE_SET_HARDLINK; + } } void archive_entry_set_link_utf8(struct archive_entry *entry, const char *target) { - if (entry->ae_set & AE_SET_SYMLINK) - archive_mstring_copy_utf8(&entry->ae_symlink, target); - else - archive_mstring_copy_utf8(&entry->ae_hardlink, target); + archive_mstring_copy_utf8(&entry->ae_linkname, target); + if ((entry->ae_set & AE_SET_SYMLINK) == 0) { + entry->ae_set |= AE_SET_HARDLINK; + } } /* Set symlink if symlink is already set, else set hardlink. */ void archive_entry_copy_link(struct archive_entry *entry, const char *target) { - if (entry->ae_set & AE_SET_SYMLINK) - archive_mstring_copy_mbs(&entry->ae_symlink, target); - else - archive_mstring_copy_mbs(&entry->ae_hardlink, target); + archive_mstring_copy_mbs(&entry->ae_linkname, target); + if ((entry->ae_set & AE_SET_SYMLINK) == 0) { + entry->ae_set |= AE_SET_HARDLINK; + } } /* Set symlink if symlink is already set, else set hardlink. */ void archive_entry_copy_link_w(struct archive_entry *entry, const wchar_t *target) { - if (entry->ae_set & AE_SET_SYMLINK) - archive_mstring_copy_wcs(&entry->ae_symlink, target); - else - archive_mstring_copy_wcs(&entry->ae_hardlink, target); + archive_mstring_copy_wcs(&entry->ae_linkname, target); + if ((entry->ae_set & AE_SET_SYMLINK) == 0) { + entry->ae_set |= AE_SET_HARDLINK; + } } int archive_entry_update_link_utf8(struct archive_entry *entry, const char *target) { int r; - if (entry->ae_set & AE_SET_SYMLINK) - r = archive_mstring_update_utf8(entry->archive, - &entry->ae_symlink, target); - else - r = archive_mstring_update_utf8(entry->archive, - &entry->ae_hardlink, target); + r = archive_mstring_update_utf8(entry->archive, + &entry->ae_linkname, target); + if ((entry->ae_set & AE_SET_SYMLINK) == 0) { + entry->ae_set |= AE_SET_HARDLINK; + } if (r == 0) return (1); if (errno == ENOMEM) @@ -1156,12 +1211,11 @@ _archive_entry_copy_link_l(struct archive_entry *entry, { int r; - if (entry->ae_set & AE_SET_SYMLINK) - r = archive_mstring_copy_mbs_len_l(&entry->ae_symlink, - target, len, sc); - else - r = archive_mstring_copy_mbs_len_l(&entry->ae_hardlink, + r = archive_mstring_copy_mbs_len_l(&entry->ae_linkname, target, len, sc); + if ((entry->ae_set & AE_SET_SYMLINK) == 0) { + entry->ae_set |= AE_SET_HARDLINK; + } return (r); } @@ -1255,6 +1309,9 @@ archive_entry_set_rdev(struct archive_entry *entry, dev_t m) entry->stat_valid = 0; entry->ae_stat.aest_rdev = m; entry->ae_stat.aest_rdev_is_broken_down = 0; + entry->ae_stat.aest_rdevmajor = 0; + entry->ae_stat.aest_rdevminor = 0; + entry->ae_set |= AE_SET_RDEV; } void @@ -1262,7 +1319,9 @@ archive_entry_set_rdevmajor(struct archive_entry *entry, dev_t m) { entry->stat_valid = 0; entry->ae_stat.aest_rdev_is_broken_down = 1; + entry->ae_stat.aest_rdev = 0; entry->ae_stat.aest_rdevmajor = m; + entry->ae_set |= AE_SET_RDEV; } void @@ -1270,7 +1329,9 @@ archive_entry_set_rdevminor(struct archive_entry *entry, dev_t m) { entry->stat_valid = 0; entry->ae_stat.aest_rdev_is_broken_down = 1; + entry->ae_stat.aest_rdev = 0; entry->ae_stat.aest_rdevminor = m; + entry->ae_set |= AE_SET_RDEV; } void @@ -1303,11 +1364,14 @@ archive_entry_copy_sourcepath_w(struct archive_entry *entry, const wchar_t *path void archive_entry_set_symlink(struct archive_entry *entry, const char *linkname) { - archive_mstring_copy_mbs(&entry->ae_symlink, linkname); - if (linkname != NULL) - entry->ae_set |= AE_SET_SYMLINK; - else + if (linkname == NULL && (entry->ae_set & AE_SET_HARDLINK)) + return; + archive_mstring_copy_mbs(&entry->ae_linkname, linkname); + entry->ae_set &= ~AE_SET_HARDLINK; + if (linkname == NULL) entry->ae_set &= ~AE_SET_SYMLINK; + else + entry->ae_set |= AE_SET_SYMLINK; } void @@ -1319,42 +1383,54 @@ archive_entry_set_symlink_type(struct archive_entry *entry, int type) void archive_entry_set_symlink_utf8(struct archive_entry *entry, const char *linkname) { - archive_mstring_copy_utf8(&entry->ae_symlink, linkname); - if (linkname != NULL) - entry->ae_set |= AE_SET_SYMLINK; - else + if (linkname == NULL && (entry->ae_set & AE_SET_HARDLINK)) + return; + archive_mstring_copy_utf8(&entry->ae_linkname, linkname); + entry->ae_set &= ~AE_SET_HARDLINK; + if (linkname == NULL) entry->ae_set &= ~AE_SET_SYMLINK; + else + entry->ae_set |= AE_SET_SYMLINK; } void archive_entry_copy_symlink(struct archive_entry *entry, const char *linkname) { - archive_mstring_copy_mbs(&entry->ae_symlink, linkname); - if (linkname != NULL) - entry->ae_set |= AE_SET_SYMLINK; - else + if (linkname == NULL && (entry->ae_set & AE_SET_HARDLINK)) + return; + archive_mstring_copy_mbs(&entry->ae_linkname, linkname); + entry->ae_set &= ~AE_SET_HARDLINK; + if (linkname == NULL) entry->ae_set &= ~AE_SET_SYMLINK; + else + entry->ae_set |= AE_SET_SYMLINK; } void archive_entry_copy_symlink_w(struct archive_entry *entry, const wchar_t *linkname) { - archive_mstring_copy_wcs(&entry->ae_symlink, linkname); - if (linkname != NULL) - entry->ae_set |= AE_SET_SYMLINK; - else + if (linkname == NULL && (entry->ae_set & AE_SET_HARDLINK)) + return; + archive_mstring_copy_wcs(&entry->ae_linkname, linkname); + entry->ae_set &= ~AE_SET_HARDLINK; + if (linkname == NULL) entry->ae_set &= ~AE_SET_SYMLINK; + else + entry->ae_set |= AE_SET_SYMLINK; } int archive_entry_update_symlink_utf8(struct archive_entry *entry, const char *linkname) { - if (linkname != NULL) - entry->ae_set |= AE_SET_SYMLINK; - else + if (linkname == NULL && (entry->ae_set & AE_SET_HARDLINK)) + return (0); + entry->ae_set &= ~AE_SET_HARDLINK; + if (linkname == NULL) entry->ae_set &= ~AE_SET_SYMLINK; + else + entry->ae_set |= AE_SET_SYMLINK; if (archive_mstring_update_utf8(entry->archive, - &entry->ae_symlink, linkname) == 0) + &entry->ae_linkname, linkname) == 0) return (1); if (errno == ENOMEM) __archive_errx(1, "No memory"); @@ -1367,12 +1443,15 @@ _archive_entry_copy_symlink_l(struct archive_entry *entry, { int r; - r = archive_mstring_copy_mbs_len_l(&entry->ae_symlink, + if (linkname == NULL && (entry->ae_set & AE_SET_HARDLINK)) + return (0); + entry->ae_set &= ~AE_SET_HARDLINK; + r = archive_mstring_copy_mbs_len_l(&entry->ae_linkname, linkname, len, sc); - if (linkname != NULL && r == 0) - entry->ae_set |= AE_SET_SYMLINK; - else + if (linkname == NULL || r != 0) entry->ae_set &= ~AE_SET_SYMLINK; + else + entry->ae_set |= AE_SET_SYMLINK; return (r); } @@ -2031,7 +2110,7 @@ ae_fflagstostr(unsigned long bitset, unsigned long bitclear) * provided string. */ static const char * -ae_strtofflags(const char *s, unsigned long *setp, unsigned long *clrp) +ae_strtofflags(const char *s, size_t l, unsigned long *setp, unsigned long *clrp) { const char *start, *end; const struct flag *flag; @@ -2042,15 +2121,19 @@ ae_strtofflags(const char *s, unsigned long *setp, unsigned long *clrp) start = s; failed = NULL; /* Find start of first token. */ - while (*start == '\t' || *start == ' ' || *start == ',') + while (l > 0 && (*start == '\t' || *start == ' ' || *start == ',')) { start++; - while (*start != '\0') { + l--; + } + while (l > 0) { size_t length; /* Locate end of token. */ end = start; - while (*end != '\0' && *end != '\t' && - *end != ' ' && *end != ',') + while (l > 0 && *end != '\t' && + *end != ' ' && *end != ',') { end++; + l--; + } length = end - start; for (flag = fileflags; flag->name != NULL; flag++) { size_t flag_length = strlen(flag->name); @@ -2074,8 +2157,10 @@ ae_strtofflags(const char *s, unsigned long *setp, unsigned long *clrp) /* Find start of next token. */ start = end; - while (*start == '\t' || *start == ' ' || *start == ',') + while (l > 0 && (*start == '\t' || *start == ' ' || *start == ',')) { start++; + l--; + } } diff --git a/libarchive/archive_entry.h b/libarchive/archive_entry.h index 3b41a0f8e3..b51f34e42b 100644 --- a/libarchive/archive_entry.h +++ b/libarchive/archive_entry.h @@ -259,6 +259,7 @@ __LA_DECL int archive_entry_gid_is_set(struct archive_entry *); __LA_DECL const char *archive_entry_gname(struct archive_entry *); __LA_DECL const char *archive_entry_gname_utf8(struct archive_entry *); __LA_DECL const wchar_t *archive_entry_gname_w(struct archive_entry *); +__LA_DECL void archive_entry_set_link_to_hardlink(struct archive_entry *); __LA_DECL const char *archive_entry_hardlink(struct archive_entry *); __LA_DECL const char *archive_entry_hardlink_utf8(struct archive_entry *); __LA_DECL const wchar_t *archive_entry_hardlink_w(struct archive_entry *); @@ -275,6 +276,7 @@ __LA_DECL const char *archive_entry_pathname_utf8(struct archive_entry *); __LA_DECL const wchar_t *archive_entry_pathname_w(struct archive_entry *); __LA_DECL __LA_MODE_T archive_entry_perm(struct archive_entry *); __LA_DECL int archive_entry_perm_is_set(struct archive_entry *); +__LA_DECL int archive_entry_rdev_is_set(struct archive_entry *); __LA_DECL dev_t archive_entry_rdev(struct archive_entry *); __LA_DECL dev_t archive_entry_rdevmajor(struct archive_entry *); __LA_DECL dev_t archive_entry_rdevminor(struct archive_entry *); @@ -283,6 +285,7 @@ __LA_DECL const wchar_t *archive_entry_sourcepath_w(struct archive_entry *); __LA_DECL la_int64_t archive_entry_size(struct archive_entry *); __LA_DECL int archive_entry_size_is_set(struct archive_entry *); __LA_DECL const char *archive_entry_strmode(struct archive_entry *); +__LA_DECL void archive_entry_set_link_to_symlink(struct archive_entry *); __LA_DECL const char *archive_entry_symlink(struct archive_entry *); __LA_DECL const char *archive_entry_symlink_utf8(struct archive_entry *); __LA_DECL int archive_entry_symlink_type(struct archive_entry *); @@ -324,6 +327,8 @@ __LA_DECL void archive_entry_set_fflags(struct archive_entry *, /* Note that all recognized tokens are processed, regardless. */ __LA_DECL const char *archive_entry_copy_fflags_text(struct archive_entry *, const char *); +__LA_DECL const char *archive_entry_copy_fflags_text_len(struct archive_entry *, + const char *, size_t); __LA_DECL const wchar_t *archive_entry_copy_fflags_text_w(struct archive_entry *, const wchar_t *); __LA_DECL void archive_entry_set_gid(struct archive_entry *, la_int64_t); diff --git a/libarchive/archive_entry_private.h b/libarchive/archive_entry_private.h index 3423966c60..15f2a8ee28 100644 --- a/libarchive/archive_entry_private.h +++ b/libarchive/archive_entry_private.h @@ -149,6 +149,7 @@ struct archive_entry { #define AE_SET_FILETYPE 1024 #define AE_SET_UID 2048 #define AE_SET_GID 4096 +#define AE_SET_RDEV 8192 /* * Use aes here so that we get transparent mbs<->wcs conversions. @@ -157,9 +158,8 @@ struct archive_entry { unsigned long ae_fflags_set; /* Bitmap fflags */ unsigned long ae_fflags_clear; struct archive_mstring ae_gname; /* Name of owning group */ - struct archive_mstring ae_hardlink; /* Name of target for hardlink */ + struct archive_mstring ae_linkname; /* Name of target for hardlink or symlink */ struct archive_mstring ae_pathname; /* Name of entry */ - struct archive_mstring ae_symlink; /* symlink contents */ struct archive_mstring ae_uname; /* Name of owner */ /* Not used within libarchive; useful for some clients. */ diff --git a/libarchive/archive_read_support_format_tar.c b/libarchive/archive_read_support_format_tar.c index e5058ee82d..3b7bd8556c 100644 --- a/libarchive/archive_read_support_format_tar.c +++ b/libarchive/archive_read_support_format_tar.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2003-2007 Tim Kientzle + * Copyright (c) 2003-2023 Tim Kientzle * Copyright (c) 2011-2012 Michihiro NAKAJIMA * Copyright (c) 2016 Martin Matuska * All rights reserved. @@ -117,33 +117,29 @@ struct sparse_block { }; struct tar { - struct archive_string acl_text; struct archive_string entry_pathname; /* For "GNU.sparse.name" and other similar path extensions. */ struct archive_string entry_pathname_override; - struct archive_string entry_linkpath; struct archive_string entry_uname; struct archive_string entry_gname; - struct archive_string longlink; + struct archive_string entry_linkpath; struct archive_string longname; - struct archive_string pax_header; struct archive_string pax_global; struct archive_string line; - int pax_hdrcharset_binary; - int header_recursion_depth; + int pax_hdrcharset_utf8; int64_t entry_bytes_remaining; int64_t entry_offset; int64_t entry_padding; int64_t entry_bytes_unconsumed; int64_t realsize; - int sparse_allowed; struct sparse_block *sparse_list; struct sparse_block *sparse_last; int64_t sparse_offset; int64_t sparse_numbytes; int sparse_gnu_major; int sparse_gnu_minor; - char sparse_gnu_pending; + char sparse_gnu_attributes_seen; + char filetype; struct archive_string localname; struct archive_string_conv *opt_sconv; @@ -168,25 +164,26 @@ static int gnu_sparse_old_read(struct archive_read *, struct tar *, static int gnu_sparse_old_parse(struct archive_read *, struct tar *, const struct gnu_sparse *sparse, int length); static int gnu_sparse_01_parse(struct archive_read *, struct tar *, - const char *); + const char *, size_t); static ssize_t gnu_sparse_10_read(struct archive_read *, struct tar *, - size_t *); + size_t *); static int header_Solaris_ACL(struct archive_read *, struct tar *, struct archive_entry *, const void *, size_t *); static int header_common(struct archive_read *, struct tar *, struct archive_entry *, const void *); static int header_old_tar(struct archive_read *, struct tar *, struct archive_entry *, const void *); -static int header_pax_extensions(struct archive_read *, struct tar *, +static int header_pax_extension(struct archive_read *, struct tar *, struct archive_entry *, const void *, size_t *); static int header_pax_global(struct archive_read *, struct tar *, struct archive_entry *, const void *h, size_t *); -static int header_longlink(struct archive_read *, struct tar *, - struct archive_entry *, const void *h, size_t *); -static int header_longname(struct archive_read *, struct tar *, +static int header_gnu_longlink(struct archive_read *, struct tar *, struct archive_entry *, const void *h, size_t *); -static int read_mac_metadata_blob(struct archive_read *, struct tar *, +static int header_gnu_longname(struct archive_read *, struct tar *, struct archive_entry *, const void *h, size_t *); +static int is_mac_metadata_entry(struct archive_entry *entry); +static int read_mac_metadata_blob(struct archive_read *, + struct archive_entry *, size_t *); static int header_volume(struct archive_read *, struct tar *, struct archive_entry *, const void *h, size_t *); static int header_ustar(struct archive_read *, struct tar *, @@ -204,21 +201,21 @@ static int archive_read_format_tar_read_header(struct archive_read *, struct archive_entry *); static int checksum(struct archive_read *, const void *); static int pax_attribute(struct archive_read *, struct tar *, - struct archive_entry *, const char *key, const char *value, - size_t value_length); -static int pax_attribute_acl(struct archive_read *, struct tar *, - struct archive_entry *, const char *, int); -static int pax_attribute_xattr(struct archive_entry *, const char *, - const char *); -static int pax_header(struct archive_read *, struct tar *, - struct archive_entry *, struct archive_string *); -static void pax_time(const char *, int64_t *sec, long *nanos); + struct archive_entry *, const char *key, size_t key_length, + size_t value_length, size_t *unconsumed); +static int pax_attribute_LIBARCHIVE_xattr(struct archive_entry *, + const char *, size_t, const char *, size_t); +static int pax_attribute_SCHILY_acl(struct archive_read *, struct tar *, + struct archive_entry *, size_t, int); +static int pax_attribute_SUN_holesdata(struct archive_read *, struct tar *, + struct archive_entry *, const char *, size_t); +static void pax_time(const char *, size_t, int64_t *sec, long *nanos); static ssize_t readline(struct archive_read *, struct tar *, const char **, ssize_t limit, size_t *); static int read_body_to_string(struct archive_read *, struct tar *, struct archive_string *, const void *h, size_t *); -static int solaris_sparse_parse(struct archive_read *, struct tar *, - struct archive_entry *, const char *); +static int read_bytes_to_string(struct archive_read *, + struct archive_string *, size_t, size_t *); static int64_t tar_atol(const char *, size_t); static int64_t tar_atol10(const char *, size_t); static int64_t tar_atol256(const char *, size_t); @@ -226,9 +223,20 @@ static int64_t tar_atol8(const char *, size_t); static int tar_read_header(struct archive_read *, struct tar *, struct archive_entry *, size_t *); static int tohex(int c); -static char *url_decode(const char *); +static char *url_decode(const char *, size_t); static void tar_flush_unconsumed(struct archive_read *, size_t *); +/* Sanity limits: These numbers should be low enough to + * prevent a maliciously-crafted archive from forcing us to + * allocate extreme amounts of memory. But of course, they + * need to be high enough for any correct value. These + * will likely need some adjustment as we get more experience. */ +static const size_t guname_limit = 65536; /* Longest uname or gname: 64kiB */ +static const size_t pathname_limit = 1048576; /* Longest path name: 1MiB */ +static const size_t sparse_map_limit = 8 * 1048576; /* Longest sparse map: 8MiB */ +static const size_t xattr_limit = 16 * 1048576; /* Longest xattr: 16MiB */ +static const size_t fflags_limit = 512; /* Longest fflags */ +static const size_t acl_limit = 131072; /* Longest textual ACL: 128kiB */ int archive_read_support_format_gnutar(struct archive *a) @@ -283,17 +291,13 @@ archive_read_format_tar_cleanup(struct archive_read *a) tar = (struct tar *)(a->format->data); gnu_clear_sparse_list(tar); - archive_string_free(&tar->acl_text); archive_string_free(&tar->entry_pathname); archive_string_free(&tar->entry_pathname_override); - archive_string_free(&tar->entry_linkpath); archive_string_free(&tar->entry_uname); archive_string_free(&tar->entry_gname); archive_string_free(&tar->line); archive_string_free(&tar->pax_global); - archive_string_free(&tar->pax_header); archive_string_free(&tar->longname); - archive_string_free(&tar->longlink); archive_string_free(&tar->localname); free(tar); (a->format->data) = NULL; @@ -505,6 +509,8 @@ archive_read_format_tar_read_header(struct archive_read *a, * probably not worthwhile just to support the relatively * obscure tar->cpio conversion case. */ + /* TODO: Move this into `struct tar` to avoid conflicts + * when reading multiple archives */ static int default_inode; static int default_dev; struct tar *tar; @@ -627,7 +633,8 @@ archive_read_format_tar_read_data(struct archive_read *a, return (ARCHIVE_FATAL); if (*buff == NULL) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "Truncated tar archive"); + "Truncated tar archive" + " detected while reading data"); return (ARCHIVE_FATAL); } if (bytes_read > tar->entry_bytes_remaining) @@ -688,7 +695,7 @@ archive_read_format_tar_skip(struct archive_read *a) } /* - * This function recursively interprets all of the headers associated + * This function reads and interprets all of the headers associated * with a single entry. */ static int @@ -696,190 +703,259 @@ tar_read_header(struct archive_read *a, struct tar *tar, struct archive_entry *entry, size_t *unconsumed) { ssize_t bytes; - int err, eof_vol_header; + int err = ARCHIVE_OK, err2; + int eof_fatal = 0; /* EOF is okay at some points... */ const char *h; const struct archive_entry_header_ustar *header; const struct archive_entry_header_gnutar *gnuheader; - eof_vol_header = 0; - - /* Loop until we find a workable header record. */ - for (;;) { - tar_flush_unconsumed(a, unconsumed); + /* Bitmask of what header types we've seen. */ + int32_t seen_headers = 0; + static const int32_t seen_A_header = 1; + static const int32_t seen_g_header = 2; + static const int32_t seen_K_header = 4; + static const int32_t seen_L_header = 8; + static const int32_t seen_V_header = 16; + static const int32_t seen_x_header = 32; /* Also X */ + static const int32_t seen_mac_metadata = 512; + + tar->pax_hdrcharset_utf8 = 1; + tar->sparse_gnu_attributes_seen = 0; + archive_string_empty(&(tar->entry_gname)); + archive_string_empty(&(tar->entry_pathname)); + archive_string_empty(&(tar->entry_pathname_override)); + archive_string_empty(&(tar->entry_uname)); - /* Read 512-byte header record */ - h = __archive_read_ahead(a, 512, &bytes); - if (bytes < 0) - return ((int)bytes); - if (bytes == 0) { /* EOF at a block boundary. */ - /* Some writers do omit the block of nulls. */ - return (ARCHIVE_EOF); - } - if (bytes < 512) { /* Short block at EOF; this is bad. */ - archive_set_error(&a->archive, - ARCHIVE_ERRNO_FILE_FORMAT, - "Truncated tar archive"); - return (ARCHIVE_FATAL); - } - *unconsumed = 512; + /* Ensure format is set. */ + if (a->archive.archive_format_name == NULL) { + a->archive.archive_format = ARCHIVE_FORMAT_TAR; + a->archive.archive_format_name = "tar"; + } - /* Header is workable if it's not an end-of-archive mark. */ - if (h[0] != 0 || !archive_block_is_null(h)) - break; + /* + * TODO: Write global/default pax options into + * 'entry' struct here before overwriting with + * file-specific options. + */ - /* Ensure format is set for archives with only null blocks. */ - if (a->archive.archive_format_name == NULL) { - a->archive.archive_format = ARCHIVE_FORMAT_TAR; - a->archive.archive_format_name = "tar"; - } + /* Loop over all the headers needed for the next entry */ + for (;;) { - if (!tar->read_concatenated_archives) { - /* Try to consume a second all-null record, as well. */ + /* Find the next valid header record. */ + while (1) { tar_flush_unconsumed(a, unconsumed); - h = __archive_read_ahead(a, 512, NULL); - if (h != NULL && h[0] == 0 && archive_block_is_null(h)) - __archive_read_consume(a, 512); - archive_clear_error(&a->archive); - return (ARCHIVE_EOF); - } - /* - * We're reading concatenated archives, ignore this block and - * loop to get the next. - */ - } + /* Read 512-byte header record */ + h = __archive_read_ahead(a, 512, &bytes); + if (bytes < 0) + return ((int)bytes); + if (bytes == 0) { /* EOF at a block boundary. */ + if (eof_fatal) { + /* We've read a special header already; + * if there's no regular header, then this is + * a premature EOF. */ + archive_set_error(&a->archive, EINVAL, + "Damaged tar archive"); + return (ARCHIVE_FATAL); + } else { + return (ARCHIVE_EOF); + } + } + if (bytes < 512) { /* Short block at EOF; this is bad. */ + archive_set_error(&a->archive, + ARCHIVE_ERRNO_FILE_FORMAT, + "Truncated tar archive" + " detected while reading next heaader"); + return (ARCHIVE_FATAL); + } + *unconsumed += 512; - /* - * Note: If the checksum fails and we return ARCHIVE_RETRY, - * then the client is likely to just retry. This is a very - * crude way to search for the next valid header! - * - * TODO: Improve this by implementing a real header scan. - */ - if (!checksum(a, h)) { - tar_flush_unconsumed(a, unconsumed); - archive_set_error(&a->archive, EINVAL, "Damaged tar archive"); - return (ARCHIVE_RETRY); /* Retryable: Invalid header */ - } + if (h[0] == 0 && archive_block_is_null(h)) { + /* We found a NULL block which indicates end-of-archive */ - if (++tar->header_recursion_depth > 32) { - tar_flush_unconsumed(a, unconsumed); - archive_set_error(&a->archive, EINVAL, "Too many special headers"); - return (ARCHIVE_WARN); - } + if (tar->read_concatenated_archives) { + /* We're ignoring NULL blocks, so keep going. */ + continue; + } - /* Determine the format variant. */ - header = (const struct archive_entry_header_ustar *)h; + /* Try to consume a second all-null record, as well. */ + /* If we can't, that's okay. */ + tar_flush_unconsumed(a, unconsumed); + h = __archive_read_ahead(a, 512, NULL); + if (h != NULL && h[0] == 0 && archive_block_is_null(h)) + __archive_read_consume(a, 512); - switch(header->typeflag[0]) { - case 'A': /* Solaris tar ACL */ - a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; - a->archive.archive_format_name = "Solaris tar"; - err = header_Solaris_ACL(a, tar, entry, h, unconsumed); - break; - case 'g': /* POSIX-standard 'g' header. */ - a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; - a->archive.archive_format_name = "POSIX pax interchange format"; - err = header_pax_global(a, tar, entry, h, unconsumed); - if (err == ARCHIVE_EOF) - return (err); - break; - case 'K': /* Long link name (GNU tar, others) */ - err = header_longlink(a, tar, entry, h, unconsumed); - break; - case 'L': /* Long filename (GNU tar, others) */ - err = header_longname(a, tar, entry, h, unconsumed); - break; - case 'V': /* GNU volume header */ - err = header_volume(a, tar, entry, h, unconsumed); - if (err == ARCHIVE_EOF) - eof_vol_header = 1; - break; - case 'X': /* Used by SUN tar; same as 'x'. */ - a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; - a->archive.archive_format_name = - "POSIX pax interchange format (Sun variant)"; - err = header_pax_extensions(a, tar, entry, h, unconsumed); - break; - case 'x': /* POSIX-standard 'x' header. */ - a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; - a->archive.archive_format_name = "POSIX pax interchange format"; - err = header_pax_extensions(a, tar, entry, h, unconsumed); - break; - default: - gnuheader = (const struct archive_entry_header_gnutar *)h; - if (memcmp(gnuheader->magic, "ustar \0", 8) == 0) { - a->archive.archive_format = ARCHIVE_FORMAT_TAR_GNUTAR; - a->archive.archive_format_name = "GNU tar format"; - err = header_gnutar(a, tar, entry, h, unconsumed); - } else if (memcmp(header->magic, "ustar", 5) == 0) { - if (a->archive.archive_format != ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) { - a->archive.archive_format = ARCHIVE_FORMAT_TAR_USTAR; - a->archive.archive_format_name = "POSIX ustar format"; + archive_clear_error(&a->archive); + return (ARCHIVE_EOF); } - err = header_ustar(a, tar, entry, h); - } else { - a->archive.archive_format = ARCHIVE_FORMAT_TAR; - a->archive.archive_format_name = "tar (non-POSIX)"; - err = header_old_tar(a, tar, entry, h); - } - } - if (err == ARCHIVE_FATAL) - return (err); - - tar_flush_unconsumed(a, unconsumed); - h = NULL; - header = NULL; + /* This is NOT a null block, so it must be a valid header. */ + if (!checksum(a, h)) { + tar_flush_unconsumed(a, unconsumed); + archive_set_error(&a->archive, EINVAL, "Damaged tar archive"); + /* If we've read some critical information (pax headers, etc) + * and _then_ see a bad header, we can't really recover. */ + if (eof_fatal) { + return (ARCHIVE_FATAL); + } else { + return (ARCHIVE_RETRY); + } + } + break; + } - --tar->header_recursion_depth; - /* Yuck. Apple's design here ends up storing long pathname - * extensions for both the AppleDouble extension entry and the - * regular entry. - */ - if ((err == ARCHIVE_WARN || err == ARCHIVE_OK) && - tar->header_recursion_depth == 0 && - tar->process_mac_extensions) { - int err2 = read_mac_metadata_blob(a, tar, entry, h, unconsumed); - if (err2 < err) - err = err2; - } - - /* We return warnings or success as-is. Anything else is fatal. */ - if (err == ARCHIVE_WARN || err == ARCHIVE_OK) { - if (tar->sparse_gnu_pending) { - if (tar->sparse_gnu_major == 1 && - tar->sparse_gnu_minor == 0) { - ssize_t bytes_read; - - tar->sparse_gnu_pending = 0; - /* Read initial sparse map. */ - bytes_read = gnu_sparse_10_read(a, tar, unconsumed); - if (bytes_read < 0) - return ((int)bytes_read); - tar->entry_bytes_remaining -= bytes_read; + /* Determine the format variant. */ + header = (const struct archive_entry_header_ustar *)h; + switch(header->typeflag[0]) { + case 'A': /* Solaris tar ACL */ + if (seen_headers & seen_A_header) { + return (ARCHIVE_FATAL); + } + seen_headers |= seen_A_header; + a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; + a->archive.archive_format_name = "Solaris tar"; + err2 = header_Solaris_ACL(a, tar, entry, h, unconsumed); + break; + case 'g': /* POSIX-standard 'g' header. */ + if (seen_headers & seen_g_header) { + return (ARCHIVE_FATAL); + } + seen_headers |= seen_g_header; + a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; + a->archive.archive_format_name = "POSIX pax interchange format"; + err2 = header_pax_global(a, tar, entry, h, unconsumed); + break; + case 'K': /* Long link name (GNU tar, others) */ + if (seen_headers & seen_K_header) { + return (ARCHIVE_FATAL); + } + seen_headers |= seen_K_header; + err2 = header_gnu_longlink(a, tar, entry, h, unconsumed); + break; + case 'L': /* Long filename (GNU tar, others) */ + if (seen_headers & seen_L_header) { + return (ARCHIVE_FATAL); + } + seen_headers |= seen_L_header; + err2 = header_gnu_longname(a, tar, entry, h, unconsumed); + break; + case 'V': /* GNU volume header */ + if (seen_headers & seen_V_header) { + return (ARCHIVE_FATAL); + } + seen_headers |= seen_V_header; + err2 = header_volume(a, tar, entry, h, unconsumed); + break; + case 'X': /* Used by SUN tar; same as 'x'. */ + if (seen_headers & seen_x_header) { + return (ARCHIVE_FATAL); + } + seen_headers |= seen_x_header; + a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; + a->archive.archive_format_name = + "POSIX pax interchange format (Sun variant)"; + err2 = header_pax_extension(a, tar, entry, h, unconsumed); + break; + case 'x': /* POSIX-standard 'x' header. */ + if (seen_headers & seen_x_header) { + return (ARCHIVE_FATAL); + } + seen_headers |= seen_x_header; + a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; + a->archive.archive_format_name = "POSIX pax interchange format"; + err2 = header_pax_extension(a, tar, entry, h, unconsumed); + break; + default: /* Regular header: Legacy tar, GNU tar, or ustar */ + gnuheader = (const struct archive_entry_header_gnutar *)h; + if (memcmp(gnuheader->magic, "ustar \0", 8) == 0) { + a->archive.archive_format = ARCHIVE_FORMAT_TAR_GNUTAR; + a->archive.archive_format_name = "GNU tar format"; + err2 = header_gnutar(a, tar, entry, h, unconsumed); + } else if (memcmp(header->magic, "ustar", 5) == 0) { + if (a->archive.archive_format != ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) { + a->archive.archive_format = ARCHIVE_FORMAT_TAR_USTAR; + a->archive.archive_format_name = "POSIX ustar format"; + } + err2 = header_ustar(a, tar, entry, h); } else { - archive_set_error(&a->archive, - ARCHIVE_ERRNO_MISC, - "Unrecognized GNU sparse file format"); - return (ARCHIVE_WARN); + a->archive.archive_format = ARCHIVE_FORMAT_TAR; + a->archive.archive_format_name = "tar (non-POSIX)"; + err2 = header_old_tar(a, tar, entry, h); + } + err = err_combine(err, err2); + /* We return warnings or success as-is. Anything else is fatal. */ + if (err < ARCHIVE_WARN) { + return (ARCHIVE_FATAL); + } + /* Filename of the form `._filename` is an AppleDouble + * extension entry. The body is the macOS metadata blob; + * this is followed by another entry with the actual + * regular file data. + * This design has two drawbacks: + * = it's brittle; you might just have a file with such a name + * = it duplicates any long pathname extensions + * + * TODO: This probably shouldn't be here at all. Consider + * just returning the contents as a regular entry here and + * then dealing with it when we write data to disk. + */ + if (tar->process_mac_extensions + && ((seen_headers & seen_mac_metadata) == 0) + && is_mac_metadata_entry(entry)) { + err2 = read_mac_metadata_blob(a, entry, unconsumed); + if (err2 < ARCHIVE_WARN) { + return (ARCHIVE_FATAL); + } + err = err_combine(err, err2); + /* Note: Other headers can appear again. */ + seen_headers = seen_mac_metadata; + break; } - tar->sparse_gnu_pending = 0; + + /* Reconcile GNU sparse attributes */ + if (tar->sparse_gnu_attributes_seen) { + /* Only 'S' (GNU sparse) and ustar '0' regular files can be sparse */ + if (tar->filetype != 'S' && tar->filetype != '0') { + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "Non-regular file cannot be sparse"); + return (ARCHIVE_WARN); + } else if (tar->sparse_gnu_major == 0 && + tar->sparse_gnu_minor == 0) { + /* Sparse map already parsed from 'x' header */ + } else if (tar->sparse_gnu_major == 0 && + tar->sparse_gnu_minor == 1) { + /* Sparse map already parsed from 'x' header */ + } else if (tar->sparse_gnu_major == 1 && + tar->sparse_gnu_minor == 0) { + /* Sparse map is prepended to file contents */ + ssize_t bytes_read; + bytes_read = gnu_sparse_10_read(a, tar, unconsumed); + if (bytes_read < 0) + return ((int)bytes_read); + tar->entry_bytes_remaining -= bytes_read; + } else { + archive_set_error(&a->archive, + ARCHIVE_ERRNO_MISC, + "Unrecognized GNU sparse file format"); + return (ARCHIVE_WARN); + } + } + return (err); } - return (err); - } - if (err == ARCHIVE_EOF) { - if (!eof_vol_header) { - /* EOF when recursively reading a header is bad. */ - archive_set_error(&a->archive, EINVAL, - "Damaged tar archive"); - } else { - /* If we encounter just a GNU volume header treat - * this situation as an empty archive */ - return (ARCHIVE_EOF); + + /* We're between headers ... */ + err = err_combine(err, err2); + if (err == ARCHIVE_FATAL) + return (err); + + /* The GNU volume header and the pax `g` global header + * are both allowed to be the only header in an + * archive. If we've seen any other header, a + * following EOF is fatal. */ + if ((seen_headers & ~seen_V_header & ~seen_g_header) != 0) { + eof_fatal = 1; } } - return (ARCHIVE_FATAL); } /* @@ -959,6 +1035,7 @@ header_Solaris_ACL(struct archive_read *a, struct tar *tar, struct archive_entry *entry, const void *h, size_t *unconsumed) { const struct archive_entry_header_ustar *header; + struct archive_string acl_text; size_t size; int err, acl_type; int64_t type; @@ -970,27 +1047,24 @@ header_Solaris_ACL(struct archive_read *a, struct tar *tar, */ header = (const struct archive_entry_header_ustar *)h; size = (size_t)tar_atol(header->size, sizeof(header->size)); - err = read_body_to_string(a, tar, &(tar->acl_text), h, unconsumed); + archive_string_init(&acl_text); + err = read_body_to_string(a, tar, &acl_text, h, unconsumed); if (err != ARCHIVE_OK) return (err); - /* Recursively read next header */ - err = tar_read_header(a, tar, entry, unconsumed); - if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN)) - return (err); - /* TODO: Examine the first characters to see if this * is an AIX ACL descriptor. We'll likely never support * them, but it would be polite to recognize and warn when * we do see them. */ /* Leading octal number indicates ACL type and number of entries. */ - p = acl = tar->acl_text.s; + p = acl = acl_text.s; type = 0; while (*p != '\0' && p < acl + size) { if (*p < '0' || *p > '7') { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Malformed Solaris ACL attribute (invalid digit)"); + archive_string_free(&acl_text); return(ARCHIVE_WARN); } type <<= 3; @@ -998,6 +1072,7 @@ header_Solaris_ACL(struct archive_read *a, struct tar *tar, if (type > 077777777) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Malformed Solaris ACL attribute (count too large)"); + archive_string_free(&acl_text); return (ARCHIVE_WARN); } p++; @@ -1015,6 +1090,7 @@ header_Solaris_ACL(struct archive_read *a, struct tar *tar, archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Malformed Solaris ACL attribute (unsupported type %o)", (int)type); + archive_string_free(&acl_text); return (ARCHIVE_WARN); } p++; @@ -1022,6 +1098,7 @@ header_Solaris_ACL(struct archive_read *a, struct tar *tar, if (p >= acl + size) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Malformed Solaris ACL attribute (body overflow)"); + archive_string_free(&acl_text); return(ARCHIVE_WARN); } @@ -1035,12 +1112,17 @@ header_Solaris_ACL(struct archive_read *a, struct tar *tar, if (tar->sconv_acl == NULL) { tar->sconv_acl = archive_string_conversion_from_charset( &(a->archive), "UTF-8", 1); - if (tar->sconv_acl == NULL) + if (tar->sconv_acl == NULL) { + archive_string_free(&acl_text); return (ARCHIVE_FATAL); + } } archive_strncpy(&(tar->localname), acl, p - acl); err = archive_acl_from_text_l(archive_entry_acl(entry), tar->localname.s, acl_type, tar->sconv_acl); + /* Workaround: Force perm_is_set() to be correct */ + /* If this bit were stored in the ACL, this wouldn't be needed */ + archive_entry_set_perm(entry, archive_entry_perm(entry)); if (err != ARCHIVE_OK) { if (errno == ENOMEM) { archive_set_error(&a->archive, ENOMEM, @@ -1049,6 +1131,7 @@ header_Solaris_ACL(struct archive_read *a, struct tar *tar, archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Malformed Solaris ACL attribute (unparsable)"); } + archive_string_free(&acl_text); return (err); } @@ -1056,20 +1139,16 @@ header_Solaris_ACL(struct archive_read *a, struct tar *tar, * Interpret 'K' long linkname header. */ static int -header_longlink(struct archive_read *a, struct tar *tar, +header_gnu_longlink(struct archive_read *a, struct tar *tar, struct archive_entry *entry, const void *h, size_t *unconsumed) { int err; - err = read_body_to_string(a, tar, &(tar->longlink), h, unconsumed); - if (err != ARCHIVE_OK) - return (err); - err = tar_read_header(a, tar, entry, unconsumed); - if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN)) - return (err); - /* Set symlink if symlink already set, else hardlink. */ - archive_entry_copy_link(entry, tar->longlink.s); - return (ARCHIVE_OK); + struct archive_string linkpath; + archive_string_init(&linkpath); + err = read_body_to_string(a, tar, &linkpath, h, unconsumed); + archive_entry_set_link(entry, linkpath.s); + return (err); } static int @@ -1091,7 +1170,7 @@ set_conversion_failed_error(struct archive_read *a, * Interpret 'L' long filename header. */ static int -header_longname(struct archive_read *a, struct tar *tar, +header_gnu_longname(struct archive_read *a, struct tar *tar, struct archive_entry *entry, const void *h, size_t *unconsumed) { int err; @@ -1099,17 +1178,12 @@ header_longname(struct archive_read *a, struct tar *tar, err = read_body_to_string(a, tar, &(tar->longname), h, unconsumed); if (err != ARCHIVE_OK) return (err); - /* Read and parse "real" header, then override name. */ - err = tar_read_header(a, tar, entry, unconsumed); - if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN)) - return (err); if (archive_entry_copy_pathname_l(entry, tar->longname.s, archive_strlen(&(tar->longname)), tar->sconv) != 0) err = set_conversion_failed_error(a, tar->sconv, "Pathname"); return (err); } - /* * Interpret 'V' GNU tar volume header. */ @@ -1117,32 +1191,30 @@ static int header_volume(struct archive_read *a, struct tar *tar, struct archive_entry *entry, const void *h, size_t *unconsumed) { - (void)h; + const struct archive_entry_header_ustar *header; + int64_t size, to_consume; + + (void)a; /* UNUSED */ + (void)tar; /* UNUSED */ + (void)entry; /* UNUSED */ - /* Just skip this and read the next header. */ - return (tar_read_header(a, tar, entry, unconsumed)); + header = (const struct archive_entry_header_ustar *)h; + size = tar_atol(header->size, sizeof(header->size)); + to_consume = ((size + 511) & ~511); + *unconsumed += to_consume; + return (ARCHIVE_OK); } /* - * Read body of an archive entry into an archive_string object. + * Read the next `size` bytes into the provided string. + * Null-terminate the string. */ static int -read_body_to_string(struct archive_read *a, struct tar *tar, - struct archive_string *as, const void *h, size_t *unconsumed) -{ - int64_t size; - const struct archive_entry_header_ustar *header; +read_bytes_to_string(struct archive_read *a, + struct archive_string *as, size_t size, + size_t *unconsumed) { const void *src; - (void)tar; /* UNUSED */ - header = (const struct archive_entry_header_ustar *)h; - size = tar_atol(header->size, sizeof(header->size)); - if ((size > 1048576) || (size < 0)) { - archive_set_error(&a->archive, EINVAL, - "Special header too large"); - return (ARCHIVE_FATAL); - } - /* Fail if we can't make our buffer big enough. */ if (archive_string_ensure(as, (size_t)size+1) == NULL) { archive_set_error(&a->archive, ENOMEM, @@ -1153,18 +1225,51 @@ read_body_to_string(struct archive_read *a, struct tar *tar, tar_flush_unconsumed(a, unconsumed); /* Read the body into the string. */ - *unconsumed = (size_t)((size + 511) & ~ 511); - src = __archive_read_ahead(a, *unconsumed, NULL); + src = __archive_read_ahead(a, size, NULL); if (src == NULL) { + archive_set_error(&a->archive, EINVAL, + "Truncated archive" + " detected while reading metadata"); *unconsumed = 0; return (ARCHIVE_FATAL); } memcpy(as->s, src, (size_t)size); as->s[size] = '\0'; as->length = (size_t)size; + *unconsumed += size; return (ARCHIVE_OK); } +/* + * Read body of an archive entry into an archive_string object. + */ +static int +read_body_to_string(struct archive_read *a, struct tar *tar, + struct archive_string *as, const void *h, size_t *unconsumed) +{ + int64_t size; + const struct archive_entry_header_ustar *header; + int r; + + (void)tar; /* UNUSED */ + header = (const struct archive_entry_header_ustar *)h; + size = tar_atol(header->size, sizeof(header->size)); + if ((size > 1048576) || (size < 0)) { + archive_string_empty(as); + int64_t to_consume = ((size + 511) & ~511); + if (to_consume != __archive_read_consume(a, to_consume)) { + return (ARCHIVE_FATAL); + } + archive_set_error(&a->archive, EINVAL, + "Special header too large: %d > 1MiB", + (int)size); + return (ARCHIVE_WARN); + } + r = read_bytes_to_string(a, as, size, unconsumed); + *unconsumed += 0x1ff & (-size); + return(r); +} + /* * Parse out common header elements. * @@ -1180,21 +1285,27 @@ header_common(struct archive_read *a, struct tar *tar, struct archive_entry *entry, const void *h) { const struct archive_entry_header_ustar *header; - char tartype; + const char *existing_linkpath; int err = ARCHIVE_OK; header = (const struct archive_entry_header_ustar *)h; - if (header->linkname[0]) - archive_strncpy(&(tar->entry_linkpath), - header->linkname, sizeof(header->linkname)); - else - archive_string_empty(&(tar->entry_linkpath)); /* Parse out the numeric fields (all are octal) */ - archive_entry_set_mode(entry, - (mode_t)tar_atol(header->mode, sizeof(header->mode))); - archive_entry_set_uid(entry, tar_atol(header->uid, sizeof(header->uid))); - archive_entry_set_gid(entry, tar_atol(header->gid, sizeof(header->gid))); + + /* Split mode handling: Set filetype always, perm only if not already set */ + archive_entry_set_filetype(entry, + (mode_t)tar_atol(header->mode, sizeof(header->mode))); + if (!archive_entry_perm_is_set(entry)) { + archive_entry_set_perm(entry, + (mode_t)tar_atol(header->mode, sizeof(header->mode))); + } + if (!archive_entry_uid_is_set(entry)) { + archive_entry_set_uid(entry, tar_atol(header->uid, sizeof(header->uid))); + } + if (!archive_entry_gid_is_set(entry)) { + archive_entry_set_gid(entry, tar_atol(header->gid, sizeof(header->gid))); + } + tar->entry_bytes_remaining = tar_atol(header->size, sizeof(header->size)); if (tar->entry_bytes_remaining < 0) { tar->entry_bytes_remaining = 0; @@ -1209,21 +1320,38 @@ header_common(struct archive_read *a, struct tar *tar, "Tar entry size overflow"); return (ARCHIVE_FATAL); } - tar->realsize = tar->entry_bytes_remaining; - archive_entry_set_size(entry, tar->entry_bytes_remaining); - archive_entry_set_mtime(entry, tar_atol(header->mtime, sizeof(header->mtime)), 0); + if (!tar->realsize_override) { + tar->realsize = tar->entry_bytes_remaining; + } + archive_entry_set_size(entry, tar->realsize); + + if (!archive_entry_mtime_is_set(entry)) { + archive_entry_set_mtime(entry, tar_atol(header->mtime, sizeof(header->mtime)), 0); + } /* Handle the tar type flag appropriately. */ - tartype = header->typeflag[0]; + tar->filetype = header->typeflag[0]; - switch (tartype) { + /* + * TODO: If the linkpath came from Pax extension header, then + * we should obey the hdrcharset_utf8 flag when converting these. + */ + switch (tar->filetype) { case '1': /* Hard link */ - if (archive_entry_copy_hardlink_l(entry, tar->entry_linkpath.s, - archive_strlen(&(tar->entry_linkpath)), tar->sconv) != 0) { - err = set_conversion_failed_error(a, tar->sconv, - "Linkname"); - if (err == ARCHIVE_FATAL) - return (err); + archive_entry_set_link_to_hardlink(entry); + existing_linkpath = archive_entry_hardlink(entry); + if (existing_linkpath == NULL || existing_linkpath[0] == '\0') { + struct archive_string linkpath; + archive_string_init(&linkpath); + archive_strncpy(&linkpath, + header->linkname, sizeof(header->linkname)); + if (archive_entry_copy_hardlink_l(entry, linkpath.s, + archive_strlen(&linkpath), tar->sconv) != 0) { + err = set_conversion_failed_error(a, tar->sconv, + "Linkname"); + if (err == ARCHIVE_FATAL) + return (err); + } } /* * The following may seem odd, but: Technically, tar @@ -1283,16 +1411,24 @@ header_common(struct archive_read *a, struct tar *tar, */ break; case '2': /* Symlink */ + archive_entry_set_link_to_symlink(entry); + existing_linkpath = archive_entry_symlink(entry); + if (existing_linkpath == NULL || existing_linkpath[0] == '\0') { + struct archive_string linkpath; + archive_string_init(&linkpath); + archive_strncpy(&linkpath, + header->linkname, sizeof(header->linkname)); + if (archive_entry_copy_symlink_l(entry, linkpath.s, + archive_strlen(&linkpath), tar->sconv) != 0) { + err = set_conversion_failed_error(a, tar->sconv, + "Linkname"); + if (err == ARCHIVE_FATAL) + return (err); + } + } archive_entry_set_filetype(entry, AE_IFLNK); archive_entry_set_size(entry, 0); tar->entry_bytes_remaining = 0; - if (archive_entry_copy_symlink_l(entry, tar->entry_linkpath.s, - archive_strlen(&(tar->entry_linkpath)), tar->sconv) != 0) { - err = set_conversion_failed_error(a, tar->sconv, - "Linkname"); - if (err == ARCHIVE_FATAL) - return (err); - } break; case '3': /* Character device */ archive_entry_set_filetype(entry, AE_IFCHR); @@ -1342,15 +1478,9 @@ header_common(struct archive_read *a, struct tar *tar, * sparse information in the extended area. */ /* FALLTHROUGH */ - case '0': - /* - * Enable sparse file "read" support only for regular - * files and explicit GNU sparse files. However, we - * don't allow non-standard file types to be sparse. - */ - tar->sparse_allowed = 1; + case '0': /* ustar "regular" file */ /* FALLTHROUGH */ - default: /* Regular file and non-standard types */ + default: /* Non-standard file types */ /* * Per POSIX: non-recognized types should always be * treated as regular files. @@ -1390,21 +1520,13 @@ header_old_tar(struct archive_read *a, struct tar *tar, } /* - * Read a Mac AppleDouble-encoded blob of file metadata, - * if there is one. + * Is this likely an AppleDouble extension? */ static int -read_mac_metadata_blob(struct archive_read *a, struct tar *tar, - struct archive_entry *entry, const void *h, size_t *unconsumed) -{ - int64_t size; - size_t msize; - const void *data; +is_mac_metadata_entry(struct archive_entry *entry) { const char *p, *name; const wchar_t *wp, *wname; - (void)h; /* UNUSED */ - wname = wp = archive_entry_pathname_w(entry); if (wp != NULL) { /* Find the last path element. */ @@ -1416,8 +1538,8 @@ read_mac_metadata_blob(struct archive_read *a, struct tar *tar, * If last path element starts with "._", then * this is a Mac extension. */ - if (wname[0] != L'.' || wname[1] != L'_' || wname[2] == L'\0') - return ARCHIVE_OK; + if (wname[0] == L'.' && wname[1] == L'_' && wname[2] != L'\0') + return 1; } else { /* Find the last path element. */ name = p = archive_entry_pathname(entry); @@ -1431,9 +1553,29 @@ read_mac_metadata_blob(struct archive_read *a, struct tar *tar, * If last path element starts with "._", then * this is a Mac extension. */ - if (name[0] != '.' || name[1] != '_' || name[2] == '\0') - return ARCHIVE_OK; + if (name[0] == '.' && name[1] == '_' && name[2] != '\0') + return 1; } + /* Not a mac extension */ + return 0; +} + +/* + * Read a Mac AppleDouble-encoded blob of file metadata, + * if there is one. + * + * TODO: In Libarchive 4, we should consider ripping this + * out -- instead, return a file starting with `._` as + * a regular file and let the client (or archive_write logic) + * handle it. + */ +static int +read_mac_metadata_blob(struct archive_read *a, + struct archive_entry *entry, size_t *unconsumed) +{ + int64_t size; + size_t msize; + const void *data; /* Read the body as a Mac OS metadata blob. */ size = archive_entry_size(entry); @@ -1443,6 +1585,17 @@ read_mac_metadata_blob(struct archive_read *a, struct tar *tar, return (ARCHIVE_FATAL); } + /* TODO: Should this merely skip the overlarge entry and + * WARN? Or is xattr_limit sufficiently large that we can + * safely assume anything larger is malicious? */ + if (size > (int64_t)xattr_limit) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "Oversized AppleDouble extension has size %llu > %llu", + (unsigned long long)size, + (unsigned long long)xattr_limit); + return (ARCHIVE_FATAL); + } + /* * TODO: Look beyond the body here to peek at the next header. * If it's a regular header (not an extension header) @@ -1455,15 +1608,16 @@ read_mac_metadata_blob(struct archive_read *a, struct tar *tar, * Q: Is the above idea really possible? Even * when there are GNU or pax extension entries? */ + tar_flush_unconsumed(a, unconsumed); data = __archive_read_ahead(a, msize, NULL); if (data == NULL) { *unconsumed = 0; return (ARCHIVE_FATAL); } + archive_entry_clear(entry); archive_entry_copy_mac_metadata(entry, data, msize); *unconsumed = (msize + 511) & ~ 511; - tar_flush_unconsumed(a, unconsumed); - return (tar_read_header(a, tar, entry, unconsumed)); + return (ARCHIVE_OK); } /* @@ -1473,76 +1627,57 @@ static int header_pax_global(struct archive_read *a, struct tar *tar, struct archive_entry *entry, const void *h, size_t *unconsumed) { - int err; - - err = read_body_to_string(a, tar, &(tar->pax_global), h, unconsumed); - if (err != ARCHIVE_OK) - return (err); - err = tar_read_header(a, tar, entry, unconsumed); - return (err); -} - -static int -header_pax_extensions(struct archive_read *a, struct tar *tar, - struct archive_entry *entry, const void *h, size_t *unconsumed) -{ - int err, err2; - - err = read_body_to_string(a, tar, &(tar->pax_header), h, unconsumed); - if (err != ARCHIVE_OK) - return (err); + const struct archive_entry_header_ustar *header; + int64_t size, to_consume; - /* Parse the next header. */ - err = tar_read_header(a, tar, entry, unconsumed); - if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN)) - return (err); + (void)a; /* UNUSED */ + (void)tar; /* UNUSED */ + (void)entry; /* UNUSED */ - /* - * TODO: Parse global/default options into 'entry' struct here - * before handling file-specific options. - * - * This design (parse standard header, then overwrite with pax - * extended attribute data) usually works well, but isn't ideal; - * it would be better to parse the pax extended attributes first - * and then skip any fields in the standard header that were - * defined in the pax header. - */ - err2 = pax_header(a, tar, entry, &tar->pax_header); - err = err_combine(err, err2); - tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining); - return (err); + header = (const struct archive_entry_header_ustar *)h; + size = tar_atol(header->size, sizeof(header->size)); + to_consume = ((size + 511) & ~511); + *unconsumed += to_consume; + return (ARCHIVE_OK); } - /* * Parse a file header for a Posix "ustar" archive entry. This also * handles "pax" or "extended ustar" entries. + * + * In order to correctly handle pax attributes (which precede this), + * we have to skip parsing any field for which the entry already has + * contents. */ static int header_ustar(struct archive_read *a, struct tar *tar, struct archive_entry *entry, const void *h) { const struct archive_entry_header_ustar *header; - struct archive_string *as; + struct archive_string as; int err = ARCHIVE_OK, r; header = (const struct archive_entry_header_ustar *)h; /* Copy name into an internal buffer to ensure null-termination. */ - as = &(tar->entry_pathname); - if (header->prefix[0]) { - archive_strncpy(as, header->prefix, sizeof(header->prefix)); - if (as->s[archive_strlen(as) - 1] != '/') - archive_strappend_char(as, '/'); - archive_strncat(as, header->name, sizeof(header->name)); - } else { - archive_strncpy(as, header->name, sizeof(header->name)); - } - if (archive_entry_copy_pathname_l(entry, as->s, archive_strlen(as), - tar->sconv) != 0) { - err = set_conversion_failed_error(a, tar->sconv, "Pathname"); - if (err == ARCHIVE_FATAL) - return (err); + const char *existing_pathname = archive_entry_pathname(entry); + if (existing_pathname == NULL || existing_pathname[0] == '\0') { + archive_string_init(&as); + if (header->prefix[0]) { + archive_strncpy(&as, header->prefix, sizeof(header->prefix)); + if (as.s[archive_strlen(&as) - 1] != '/') + archive_strappend_char(&as, '/'); + archive_strncat(&as, header->name, sizeof(header->name)); + } else { + archive_strncpy(&as, header->name, sizeof(header->name)); + } + if (archive_entry_copy_pathname_l(entry, as.s, archive_strlen(&as), + tar->sconv) != 0) { + err = set_conversion_failed_error(a, tar->sconv, "Pathname"); + if (err == ARCHIVE_FATAL) + return (err); + } + archive_string_free(&as); } /* Handle rest of common fields. */ @@ -1553,26 +1688,36 @@ header_ustar(struct archive_read *a, struct tar *tar, err = r; /* Handle POSIX ustar fields. */ - if (archive_entry_copy_uname_l(entry, - header->uname, sizeof(header->uname), tar->sconv) != 0) { - err = set_conversion_failed_error(a, tar->sconv, "Uname"); - if (err == ARCHIVE_FATAL) - return (err); - } - - if (archive_entry_copy_gname_l(entry, - header->gname, sizeof(header->gname), tar->sconv) != 0) { - err = set_conversion_failed_error(a, tar->sconv, "Gname"); - if (err == ARCHIVE_FATAL) - return (err); + const char *existing_uname = archive_entry_uname(entry); + if (existing_uname == NULL || existing_uname[0] == '\0') { + if (archive_entry_copy_uname_l(entry, + header->uname, sizeof(header->uname), tar->sconv) != 0) { + err = set_conversion_failed_error(a, tar->sconv, "Uname"); + if (err == ARCHIVE_FATAL) + return (err); + } + } + + const char *existing_gname = archive_entry_gname(entry); + if (existing_gname == NULL || existing_gname[0] == '\0') { + if (archive_entry_copy_gname_l(entry, + header->gname, sizeof(header->gname), tar->sconv) != 0) { + err = set_conversion_failed_error(a, tar->sconv, "Gname"); + if (err == ARCHIVE_FATAL) + return (err); + } } /* Parse out device numbers only for char and block specials. */ if (header->typeflag[0] == '3' || header->typeflag[0] == '4') { - archive_entry_set_rdevmajor(entry, (dev_t) - tar_atol(header->rdevmajor, sizeof(header->rdevmajor))); - archive_entry_set_rdevminor(entry, (dev_t) - tar_atol(header->rdevminor, sizeof(header->rdevminor))); + if (!archive_entry_rdev_is_set(entry)) { + archive_entry_set_rdevmajor(entry, (dev_t) + tar_atol(header->rdevmajor, sizeof(header->rdevmajor))); + archive_entry_set_rdevminor(entry, (dev_t) + tar_atol(header->rdevminor, sizeof(header->rdevminor))); + } + } else { + archive_entry_set_rdev(entry, 0); } tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining); @@ -1580,117 +1725,202 @@ header_ustar(struct archive_read *a, struct tar *tar, return (err); } - -/* - * Parse the pax extended attributes record. - * - * Returns non-zero if there's an error in the data. - */ static int -pax_header(struct archive_read *a, struct tar *tar, - struct archive_entry *entry, struct archive_string *in_as) +header_pax_extension(struct archive_read *a, struct tar *tar, + struct archive_entry *entry, const void *h, size_t *unconsumed) { - size_t attr_length, l, line_length, value_length; - char *p; - char *key, *value; - struct archive_string *as; + /* Sanity checks: The largest `x` body I've ever heard of was + * a little over 4MB. So I doubt there has ever been a + * well-formed archive with an `x` body over 1GiB. Similarly, + * it seems plausible that no single attribute has ever been + * larger than 100MB. So if we see a larger value here, it's + * almost certainly a sign of a corrupted/malicious archive. */ + + /* Maximum sane size for extension body: 1 GiB */ + /* This cannot be raised to larger than 8GiB without + * exceeding the maximum size for a standard ustar + * entry. */ + const int64_t ext_size_limit = 1024 * 1024 * (int64_t)1024; + /* Maximum size for a single line/attr: 100 million characters */ + /* This cannot be raised to more than 2GiB without exceeding + * a `size_t` on 32-bit platforms. */ + const size_t max_parsed_line_length = 99999999ULL; + /* Largest attribute prolog: size + name. */ + const size_t max_size_name = 512; + + /* Size and padding of the full extension body */ + int64_t ext_size, ext_padding; + size_t line_length, value_length, name_length; + ssize_t to_read, did_read; + const struct archive_entry_header_ustar *header; + const char *p, *attr_start, *name_start; struct archive_string_conv *sconv; - int err, err2; - char *attr = in_as->s; + struct archive_string *pas = NULL; + struct archive_string attr_name; + int err = ARCHIVE_OK, r; - attr_length = in_as->length; - tar->pax_hdrcharset_binary = 0; - archive_string_empty(&(tar->entry_gname)); - archive_string_empty(&(tar->entry_linkpath)); - archive_string_empty(&(tar->entry_pathname)); - archive_string_empty(&(tar->entry_pathname_override)); - archive_string_empty(&(tar->entry_uname)); - err = ARCHIVE_OK; - while (attr_length > 0) { - /* Parse decimal length field at start of line. */ + header = (const struct archive_entry_header_ustar *)h; + ext_size = tar_atol(header->size, sizeof(header->size)); + if (ext_size < 0) { + archive_set_error(&a->archive, EINVAL, + "pax extension header has invalid size: %lld", + (long long)ext_size); + return (ARCHIVE_FATAL); + } + + ext_padding = 0x1ff & (-ext_size); + if (ext_size > ext_size_limit) { + /* Consume the pax extension body and return an error */ + if (ext_size + ext_padding != __archive_read_consume(a, ext_size + ext_padding)) { + return (ARCHIVE_FATAL); + } + archive_set_error(&a->archive, EINVAL, + "Ignoring oversized pax extensions: %d > %d", + (int)ext_size, (int)ext_size_limit); + return (ARCHIVE_WARN); + } + tar_flush_unconsumed(a, unconsumed); + + /* Parse the size/name of each pax attribute in the body */ + archive_string_init(&attr_name); + while (ext_size > 0) { + /* Read enough bytes to parse the size/name of the next attribute */ + to_read = max_size_name; + if (to_read > ext_size) { + to_read = ext_size; + } + p = __archive_read_ahead(a, to_read, &did_read); + if (did_read < 0) { + return ((int)did_read); + } + if (did_read == 0) { /* EOF */ + archive_set_error(&a->archive, EINVAL, + "Truncated tar archive" + " detected while reading pax attribute name"); + return (ARCHIVE_FATAL); + } + if (did_read > ext_size) { + did_read = ext_size; + } + + /* Parse size of attribute */ line_length = 0; - l = attr_length; - p = attr; /* Record start of line. */ - while (l>0) { + attr_start = p; + while (1) { + if (p >= attr_start + did_read) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "Ignoring malformed pax attributes: overlarge attribute size field"); + *unconsumed += ext_size + ext_padding; + return (ARCHIVE_WARN); + } if (*p == ' ') { p++; - l--; break; } if (*p < '0' || *p > '9') { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "Ignoring malformed pax extended attributes"); + "Ignoring malformed pax attributes: malformed attribute size field"); + *unconsumed += ext_size + ext_padding; return (ARCHIVE_WARN); } line_length *= 10; line_length += *p - '0'; - if (line_length > 999999) { + if (line_length > max_parsed_line_length) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "Rejecting pax extended attribute > 1MB"); + "Ignoring malformed pax attribute: size > %lld", + (long long)max_parsed_line_length); + *unconsumed += ext_size + ext_padding; return (ARCHIVE_WARN); } p++; - l--; } - /* - * Parsed length must be no bigger than available data, - * at least 1, and the last character of the line must - * be '\n'. - */ - if (line_length > attr_length - || line_length < 1 - || attr[line_length - 1] != '\n') - { - archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "Ignoring malformed pax extended attribute"); - return (ARCHIVE_WARN); + if ((int64_t)line_length > ext_size) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "Ignoring malformed pax attribute: %lld > %lld", + (long long)line_length, (long long)ext_size); + *unconsumed += ext_size + ext_padding; + return (ARCHIVE_WARN); } - /* Null-terminate the line. */ - attr[line_length - 1] = '\0'; - - /* Find end of key and null terminate it. */ - key = p; - if (key[0] == '=') - return (-1); - while (*p && *p != '=') - ++p; - if (*p == '\0') { + /* Parse name of attribute */ + if (p >= attr_start + did_read + || p >= attr_start + line_length + || *p == '=') { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "Invalid pax extended attributes"); + "Ignoring malformed pax attributes: empty name found"); + *unconsumed += ext_size + ext_padding; return (ARCHIVE_WARN); } - *p = '\0'; + name_start = p; + while (1) { + if (p >= attr_start + did_read || p >= attr_start + line_length) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "Ignoring malformed pax attributes: overlarge attribute name"); + *unconsumed += ext_size + ext_padding; + return (ARCHIVE_WARN); + } + if (*p == '=') { + break; + } + p++; + } + name_length = p - name_start; + p++; // Skip '=' - value = p + 1; + archive_strncpy(&attr_name, name_start, name_length); - /* Some values may be binary data */ - value_length = attr + line_length - 1 - value; + ext_size -= p - attr_start; + value_length = line_length - (p - attr_start); - /* Identify this attribute and set it in the entry. */ - err2 = pax_attribute(a, tar, entry, key, value, value_length); - if (err2 == ARCHIVE_FATAL) - return (err2); - err = err_combine(err, err2); + /* Consume size, name, and `=` */ + *unconsumed += p - attr_start; + tar_flush_unconsumed(a, unconsumed); + + /* pax_attribute will consume value_length - 1 */ + r = pax_attribute(a, tar, entry, attr_name.s, archive_strlen(&attr_name), value_length - 1, unconsumed); + ext_size -= value_length - 1; + + if (r < ARCHIVE_WARN) { + *unconsumed += ext_size + ext_padding; + return (r); + } + err = err_combine(err, r); - /* Skip to next line */ - attr += line_length; - attr_length -= line_length; + /* Consume the `\n` that follows the pax attribute value. */ + tar_flush_unconsumed(a, unconsumed); + p = __archive_read_ahead(a, 1, &did_read); + if (did_read < 0) { + return ((int)did_read); + } + if (did_read == 0) { + archive_set_error(&a->archive, EINVAL, + "Truncated tar archive" + " detected while completing pax attribute"); + return (ARCHIVE_FATAL); + } + if (p[0] != '\n') { + archive_set_error(&a->archive, EINVAL, + "Malformed pax attributes"); + *unconsumed += ext_size + ext_padding; + return (ARCHIVE_WARN); + } + ext_size -= 1; + *unconsumed += 1; + tar_flush_unconsumed(a, unconsumed); } + *unconsumed += ext_size + ext_padding; /* - * PAX format uses UTF-8 as default charset for its metadata - * unless hdrcharset=BINARY is present in its header. - * We apply the charset specified by the hdrcharset option only - * when the hdrcharset attribute(in PAX header) is BINARY because - * we respect the charset described in PAX header and BINARY also - * means that metadata(filename,uname and gname) character-set - * is unknown. + * Some PAX values -- pathname, linkpath, uname, gname -- + * can't be copied into the entry until we know the character + * set to use: */ - if (tar->pax_hdrcharset_binary) + if (!tar->pax_hdrcharset_utf8) + /* PAX specified "BINARY", so use the default charset */ sconv = tar->opt_sconv; else { + /* PAX default UTF-8 */ sconv = archive_string_conversion_from_charset( &(a->archive), "UTF-8", 1); if (sconv == NULL) @@ -1700,83 +1930,85 @@ pax_header(struct archive_read *a, struct tar *tar, SCONV_SET_OPT_UTF8_LIBARCHIVE2X); } + /* Pathname */ + pas = NULL; + if (archive_strlen(&(tar->entry_pathname_override)) > 0) { + /* Prefer GNU.sparse.name attribute if present */ + /* GNU sparse files store a fake name under the standard + * "pathname" key. */ + pas = &(tar->entry_pathname_override); + } else if (archive_strlen(&(tar->entry_pathname)) > 0) { + /* Use standard "pathname" PAX extension */ + pas = &(tar->entry_pathname); + } + if (pas != NULL) { + if (archive_entry_copy_pathname_l(entry, pas->s, + archive_strlen(pas), sconv) != 0) { + err = set_conversion_failed_error(a, sconv, "Pathname"); + if (err == ARCHIVE_FATAL) + return (err); + /* Use raw name without conversion */ + archive_entry_copy_pathname(entry, pas->s); + } + } + /* Uname */ + if (archive_strlen(&(tar->entry_uname)) > 0) { + if (archive_entry_copy_uname_l(entry, tar->entry_uname.s, + archive_strlen(&(tar->entry_uname)), sconv) != 0) { + err = set_conversion_failed_error(a, sconv, "Uname"); + if (err == ARCHIVE_FATAL) + return (err); + /* Use raw name without conversion */ + archive_entry_copy_uname(entry, tar->entry_uname.s); + } + } + /* Gname */ if (archive_strlen(&(tar->entry_gname)) > 0) { if (archive_entry_copy_gname_l(entry, tar->entry_gname.s, archive_strlen(&(tar->entry_gname)), sconv) != 0) { err = set_conversion_failed_error(a, sconv, "Gname"); if (err == ARCHIVE_FATAL) return (err); - /* Use a converted an original name. */ + /* Use raw name without conversion */ archive_entry_copy_gname(entry, tar->entry_gname.s); } } + /* Linkpath */ if (archive_strlen(&(tar->entry_linkpath)) > 0) { if (archive_entry_copy_link_l(entry, tar->entry_linkpath.s, archive_strlen(&(tar->entry_linkpath)), sconv) != 0) { - err = set_conversion_failed_error(a, sconv, "Linkname"); + err = set_conversion_failed_error(a, sconv, "Linkpath"); if (err == ARCHIVE_FATAL) return (err); - /* Use a converted an original name. */ + /* Use raw name without conversion */ archive_entry_copy_link(entry, tar->entry_linkpath.s); } } - /* - * Some extensions (such as the GNU sparse file extensions) - * deliberately store a synthetic name under the regular 'path' - * attribute and the real file name under a different attribute. - * Since we're supposed to not care about the order, we - * have no choice but to store all of the various filenames - * we find and figure it all out afterwards. This is the - * figuring out part. - */ - as = NULL; - if (archive_strlen(&(tar->entry_pathname_override)) > 0) - as = &(tar->entry_pathname_override); - else if (archive_strlen(&(tar->entry_pathname)) > 0) - as = &(tar->entry_pathname); - if (as != NULL) { - if (archive_entry_copy_pathname_l(entry, as->s, - archive_strlen(as), sconv) != 0) { - err = set_conversion_failed_error(a, sconv, "Pathname"); - if (err == ARCHIVE_FATAL) - return (err); - /* Use a converted an original name. */ - archive_entry_copy_pathname(entry, as->s); - } - } - if (archive_strlen(&(tar->entry_uname)) > 0) { - if (archive_entry_copy_uname_l(entry, tar->entry_uname.s, - archive_strlen(&(tar->entry_uname)), sconv) != 0) { - err = set_conversion_failed_error(a, sconv, "Uname"); - if (err == ARCHIVE_FATAL) - return (err); - /* Use a converted an original name. */ - archive_entry_copy_uname(entry, tar->entry_uname.s); - } - } + + /* Extension may have given us a corrected `entry_bytes_remaining` for + * the main entry; update the padding appropriately. */ + tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining); return (err); } static int -pax_attribute_xattr(struct archive_entry *entry, - const char *name, const char *value) +pax_attribute_LIBARCHIVE_xattr(struct archive_entry *entry, + const char *name, size_t name_length, const char *value, size_t value_length) { char *name_decoded; void *value_decoded; size_t value_len; - if (strlen(name) < 18 || (memcmp(name, "LIBARCHIVE.xattr.", 17)) != 0) + if (name_length < 1) return 3; - name += 17; - /* URL-decode name */ - name_decoded = url_decode(name); + name_decoded = url_decode(name, name_length); if (name_decoded == NULL) return 2; /* Base-64 decode value */ - value_decoded = base64_decode(value, strlen(value), &value_len); + value_decoded = base64_decode(value, value_length, &value_len); if (value_decoded == NULL) { free(name_decoded); return 1; @@ -1791,21 +2023,26 @@ pax_attribute_xattr(struct archive_entry *entry, } static int -pax_attribute_schily_xattr(struct archive_entry *entry, - const char *name, const char *value, size_t value_length) +pax_attribute_SCHILY_xattr(struct archive_entry *entry, + const char *name, size_t name_length, const char *value, size_t value_length) { - if (strlen(name) < 14 || (memcmp(name, "SCHILY.xattr.", 13)) != 0) + if (name_length < 1 || name_length > 128) { return 1; + } - name += 13; - - archive_entry_xattr_add_entry(entry, name, value, value_length); + char * null_terminated_name = malloc(name_length + 1); + if (null_terminated_name != NULL) { + memcpy(null_terminated_name, name, name_length); + null_terminated_name[name_length] = '\0'; + archive_entry_xattr_add_entry(entry, null_terminated_name, value, value_length); + free(null_terminated_name); + } return 0; } static int -pax_attribute_rht_security_selinux(struct archive_entry *entry, +pax_attribute_RHT_security_selinux(struct archive_entry *entry, const char *value, size_t value_length) { archive_entry_xattr_add_entry(entry, "security.selinux", @@ -1815,10 +2052,11 @@ pax_attribute_rht_security_selinux(struct archive_entry *entry, } static int -pax_attribute_acl(struct archive_read *a, struct tar *tar, - struct archive_entry *entry, const char *value, int type) +pax_attribute_SCHILY_acl(struct archive_read *a, struct tar *tar, + struct archive_entry *entry, size_t value_length, int type) { int r; + const char *p; const char* errstr; switch (type) { @@ -1845,8 +2083,28 @@ pax_attribute_acl(struct archive_read *a, struct tar *tar, return (ARCHIVE_FATAL); } - r = archive_acl_from_text_l(archive_entry_acl(entry), value, type, - tar->sconv_acl); + if (value_length > acl_limit) { + __archive_read_consume(a, value_length); + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "Unreasonably large ACL: %d > %d", + (int)value_length, (int)acl_limit); + return (ARCHIVE_WARN); + } + + p = __archive_read_ahead(a, value_length, NULL); + if (p == NULL) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Truncated tar archive " + "detected while reading ACL data"); + return (ARCHIVE_FATAL); + } + + r = archive_acl_from_text_nl(archive_entry_acl(entry), p, value_length, + type, tar->sconv_acl); + __archive_read_consume(a, value_length); + /* Workaround: Force perm_is_set() to be correct */ + /* If this bit were stored in the ACL, this wouldn't be needed */ + archive_entry_set_perm(entry, archive_entry_perm(entry)); if (r != ARCHIVE_OK) { if (r == ARCHIVE_FATAL) { archive_set_error(&a->archive, ENOMEM, @@ -1860,240 +2118,536 @@ pax_attribute_acl(struct archive_read *a, struct tar *tar, return (r); } +static int +pax_attribute_read_time(struct archive_read *a, size_t value_length, int64_t *ps, long *pn, size_t *unconsumed) { + struct archive_string as; + int r; + + if (value_length > 128) { + __archive_read_consume(a, value_length); + *ps = 0; + *pn = 0; + return (ARCHIVE_FATAL); + } + + archive_string_init(&as); + r = read_bytes_to_string(a, &as, value_length, unconsumed); + if (r < ARCHIVE_OK) { + return (r); + } + + pax_time(as.s, archive_strlen(&as), ps, pn); + if (*ps < 0 || *ps == INT64_MAX) { + return (ARCHIVE_WARN); + } + return (ARCHIVE_OK); +} + +static int +pax_attribute_read_number(struct archive_read *a, size_t value_length, int64_t *result) { + struct archive_string as; + size_t unconsumed = 0; + int r; + + if (value_length > 64) { + __archive_read_consume(a, value_length); + *result = 0; + return (ARCHIVE_FATAL); + } + + archive_string_init(&as); + r = read_bytes_to_string(a, &as, value_length, &unconsumed); + tar_flush_unconsumed(a, &unconsumed); + if (r < ARCHIVE_OK) { + return (r); + } + + *result = tar_atol10(as.s, archive_strlen(&as)); + if (*result < 0 || *result == INT64_MAX) { + *result = INT64_MAX; + return (ARCHIVE_WARN); + } + return (ARCHIVE_OK); +} + /* - * Parse a single key=value attribute. key/value pointers are - * assumed to point into reasonably long-lived storage. + * Parse a single key=value attribute. * - * Note that POSIX reserves all-lowercase keywords. Vendor-specific - * extensions should always have keywords of the form "VENDOR.attribute" - * In particular, it's quite feasible to support many different - * vendor extensions here. I'm using "LIBARCHIVE" for extensions - * unique to this library. + * POSIX reserves all-lowercase keywords. Vendor-specific extensions + * should always have keywords of the form "VENDOR.attribute" In + * particular, it's quite feasible to support many different vendor + * extensions here. I'm using "LIBARCHIVE" for extensions unique to + * this library. * - * Investigate other vendor-specific extensions and see if + * TODO: Investigate other vendor-specific extensions and see if * any of them look useful. */ static int -pax_attribute(struct archive_read *a, struct tar *tar, - struct archive_entry *entry, const char *key, const char *value, size_t value_length) +pax_attribute(struct archive_read *a, struct tar *tar, struct archive_entry *entry, + const char *key, size_t key_length, size_t value_length, size_t *unconsumed) { - int64_t s; + int64_t t; long n; - int err = ARCHIVE_OK, r; + const char *p; + ssize_t bytes_read; + int err = ARCHIVE_OK; - if (value == NULL) - value = ""; /* Disable compiler warning; do not pass - * NULL pointer to strlen(). */ switch (key[0]) { case 'G': - /* Reject GNU.sparse.* headers on non-regular files. */ - if (strncmp(key, "GNU.sparse", 10) == 0 && - !tar->sparse_allowed) { - archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "Non-regular file cannot be sparse"); - return (ARCHIVE_FATAL); - } - - /* GNU "0.0" sparse pax format. */ - if (strcmp(key, "GNU.sparse.numblocks") == 0) { - tar->sparse_offset = -1; - tar->sparse_numbytes = -1; - tar->sparse_gnu_major = 0; - tar->sparse_gnu_minor = 0; - } - if (strcmp(key, "GNU.sparse.offset") == 0) { - tar->sparse_offset = tar_atol10(value, strlen(value)); - if (tar->sparse_numbytes != -1) { - if (gnu_add_sparse_entry(a, tar, - tar->sparse_offset, tar->sparse_numbytes) - != ARCHIVE_OK) - return (ARCHIVE_FATAL); - tar->sparse_offset = -1; - tar->sparse_numbytes = -1; - } - } - if (strcmp(key, "GNU.sparse.numbytes") == 0) { - tar->sparse_numbytes = tar_atol10(value, strlen(value)); - if (tar->sparse_offset != -1) { - if (gnu_add_sparse_entry(a, tar, - tar->sparse_offset, tar->sparse_numbytes) - != ARCHIVE_OK) - return (ARCHIVE_FATAL); - tar->sparse_offset = -1; - tar->sparse_numbytes = -1; + /* GNU.* extensions */ + if (key_length > 4 && memcmp(key, "GNU.", 4) == 0) { + key += 4; + key_length -= 4; + + /* GNU.sparse marks the existence of GNU sparse information */ + if (key_length == 6 && memcmp(key, "sparse", 6) == 0) { + tar->sparse_gnu_attributes_seen = 1; } - } - if (strcmp(key, "GNU.sparse.size") == 0) { - tar->realsize = tar_atol10(value, strlen(value)); - archive_entry_set_size(entry, tar->realsize); - tar->realsize_override = 1; - } - - /* GNU "0.1" sparse pax format. */ - if (strcmp(key, "GNU.sparse.map") == 0) { - tar->sparse_gnu_major = 0; - tar->sparse_gnu_minor = 1; - if (gnu_sparse_01_parse(a, tar, value) != ARCHIVE_OK) - return (ARCHIVE_WARN); - } - /* GNU "1.0" sparse pax format */ - if (strcmp(key, "GNU.sparse.major") == 0) { - tar->sparse_gnu_major = (int)tar_atol10(value, strlen(value)); - tar->sparse_gnu_pending = 1; - } - if (strcmp(key, "GNU.sparse.minor") == 0) { - tar->sparse_gnu_minor = (int)tar_atol10(value, strlen(value)); - tar->sparse_gnu_pending = 1; - } - if (strcmp(key, "GNU.sparse.name") == 0) { - /* - * The real filename; when storing sparse - * files, GNU tar puts a synthesized name into - * the regular 'path' attribute in an attempt - * to limit confusion. ;-) - */ - archive_strcpy(&(tar->entry_pathname_override), value); - } - if (strcmp(key, "GNU.sparse.realsize") == 0) { - tar->realsize = tar_atol10(value, strlen(value)); - archive_entry_set_size(entry, tar->realsize); - tar->realsize_override = 1; + /* GNU.sparse.* extensions */ + else if (key_length > 7 && memcmp(key, "sparse.", 7) == 0) { + tar->sparse_gnu_attributes_seen = 1; + key += 7; + key_length -= 7; + + /* GNU "0.0" sparse pax format. */ + if (key_length == 9 && memcmp(key, "numblocks", 9) == 0) { + /* GNU.sparse.numblocks */ + tar->sparse_offset = -1; + tar->sparse_numbytes = -1; + tar->sparse_gnu_major = 0; + tar->sparse_gnu_minor = 0; + } + else if (key_length == 6 && memcmp(key, "offset", 6) == 0) { + /* GNU.sparse.offset */ + if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) { + tar->sparse_offset = t; + if (tar->sparse_numbytes != -1) { + if (gnu_add_sparse_entry(a, tar, + tar->sparse_offset, tar->sparse_numbytes) + != ARCHIVE_OK) + return (ARCHIVE_FATAL); + tar->sparse_offset = -1; + tar->sparse_numbytes = -1; + } + } + return (err); + } + else if (key_length == 8 && memcmp(key, "numbytes", 8) == 0) { + /* GNU.sparse.numbytes */ + if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) { + tar->sparse_numbytes = t; + if (tar->sparse_offset != -1) { + if (gnu_add_sparse_entry(a, tar, + tar->sparse_offset, tar->sparse_numbytes) + != ARCHIVE_OK) + return (ARCHIVE_FATAL); + tar->sparse_offset = -1; + tar->sparse_numbytes = -1; + } + } + return (err); + } + else if (key_length == 4 && memcmp(key, "size", 4) == 0) { + /* GNU.sparse.size */ + if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) { + tar->realsize = t; + archive_entry_set_size(entry, tar->realsize); + tar->realsize_override = 1; + } + return (err); + } + + /* GNU "0.1" sparse pax format. */ + else if (key_length == 3 && memcmp(key, "map", 3) == 0) { + /* GNU.sparse.map */ + tar->sparse_gnu_major = 0; + tar->sparse_gnu_minor = 1; + if (value_length > sparse_map_limit) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "Unreasonably large sparse map: %d > %d", + (int)value_length, (int)sparse_map_limit); + err = ARCHIVE_FAILED; + } else { + p = __archive_read_ahead(a, value_length, &bytes_read); + if (p != NULL) { + if (gnu_sparse_01_parse(a, tar, p, value_length) != ARCHIVE_OK) { + err = ARCHIVE_WARN; + } + } else { + return (ARCHIVE_FATAL); + } + } + __archive_read_consume(a, value_length); + return (err); + } + + /* GNU "1.0" sparse pax format */ + else if (key_length == 5 && memcmp(key, "major", 5) == 0) { + /* GNU.sparse.major */ + if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK + && t >= 0 + && t <= 10) { + tar->sparse_gnu_major = (int)t; + } + return (err); + } + else if (key_length == 5 && memcmp(key, "minor", 5) == 0) { + /* GNU.sparse.minor */ + if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK + && t >= 0 + && t <= 10) { + tar->sparse_gnu_minor = (int)t; + } + return (err); + } + else if (key_length == 4 && memcmp(key, "name", 4) == 0) { + /* GNU.sparse.name */ + /* + * The real filename; when storing sparse + * files, GNU tar puts a synthesized name into + * the regular 'path' attribute in an attempt + * to limit confusion. ;-) + */ + if (value_length > pathname_limit) { + *unconsumed += value_length; + err = ARCHIVE_WARN; + } else { + err = read_bytes_to_string(a, &(tar->entry_pathname_override), + value_length, unconsumed); + } + return (err); + } + else if (key_length == 8 && memcmp(key, "realsize", 8) == 0) { + /* GNU.sparse.realsize */ + if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) { + tar->realsize = t; + archive_entry_set_size(entry, tar->realsize); + tar->realsize_override = 1; + } + return (err); + } + } } break; case 'L': - /* Our extensions */ -/* TODO: Handle arbitrary extended attributes... */ -/* - if (strcmp(key, "LIBARCHIVE.xxxxxxx") == 0) - archive_entry_set_xxxxxx(entry, value); -*/ - if (strcmp(key, "LIBARCHIVE.creationtime") == 0) { - pax_time(value, &s, &n); - archive_entry_set_birthtime(entry, s, n); - } - if (strcmp(key, "LIBARCHIVE.symlinktype") == 0) { - if (strcmp(value, "file") == 0) { - archive_entry_set_symlink_type(entry, - AE_SYMLINK_TYPE_FILE); - } else if (strcmp(value, "dir") == 0) { - archive_entry_set_symlink_type(entry, - AE_SYMLINK_TYPE_DIRECTORY); + /* LIBARCHIVE extensions */ + if (key_length > 11 && memcmp(key, "LIBARCHIVE.", 11) == 0) { + key_length -= 11; + key += 11; + + /* TODO: Handle arbitrary extended attributes... */ + /* + if (strcmp(key, "LIBARCHIVE.xxxxxxx") == 0) + archive_entry_set_xxxxxx(entry, value); + */ + if (key_length == 12 && memcmp(key, "creationtime", 12) == 0) { + /* LIBARCHIVE.creationtime */ + if ((err = pax_attribute_read_time(a, value_length, &t, &n, unconsumed)) == ARCHIVE_OK) { + archive_entry_set_birthtime(entry, t, n); + } + return (err); + } + else if (key_length == 11 && memcmp(key, "symlinktype", 11) == 0) { + /* LIBARCHIVE.symlinktype */ + if (value_length < 16) { + p = __archive_read_ahead(a, value_length, &bytes_read); + if (p != NULL) { + if (value_length == 4 && memcmp(p, "file", 4) == 0) { + archive_entry_set_symlink_type(entry, + AE_SYMLINK_TYPE_FILE); + } else if (value_length == 3 && memcmp(p, "dir", 3) == 0) { + archive_entry_set_symlink_type(entry, + AE_SYMLINK_TYPE_DIRECTORY); + } else { + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "Unrecognized symlink type"); + err = ARCHIVE_WARN; + } + } else { + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Truncated tar archive " + "detected while reading `symlinktype` attribute"); + return (ARCHIVE_FATAL); + } + } else { + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "symlink type is very long" + "(longest recognized value is 4 bytes, this is %d)", + (int)value_length); + err = ARCHIVE_WARN; + } + __archive_read_consume(a, value_length); + return (err); + } + else if (key_length > 6 && memcmp(key, "xattr.", 6) == 0) { + key_length -= 6; + key += 6; + if (value_length > xattr_limit) { + err = ARCHIVE_WARN; + } else { + p = __archive_read_ahead(a, value_length, &bytes_read); + if (p == NULL + || pax_attribute_LIBARCHIVE_xattr(entry, key, key_length, p, value_length)) { + /* TODO: Unable to parse xattr */ + err = ARCHIVE_WARN; + } + } + __archive_read_consume(a, value_length); + return (err); } } - if (memcmp(key, "LIBARCHIVE.xattr.", 17) == 0) - pax_attribute_xattr(entry, key, value); break; case 'R': /* GNU tar uses RHT.security header to store SELinux xattrs * SCHILY.xattr.security.selinux == RHT.security.selinux */ - if (strcmp(key, "RHT.security.selinux") == 0) { - pax_attribute_rht_security_selinux(entry, value, - value_length); + if (key_length == 20 && memcmp(key, "RHT.security.selinux", 20) == 0) { + if (value_length > xattr_limit) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "Ignoring unreasonably large security.selinux attribute:" + " %d > %d", + (int)value_length, (int)xattr_limit); + /* TODO: Should this be FAILED instead? */ + err = ARCHIVE_WARN; + } else { + p = __archive_read_ahead(a, value_length, &bytes_read); + if (p == NULL + || pax_attribute_RHT_security_selinux(entry, p, value_length)) { + /* TODO: Unable to parse xattr */ + err = ARCHIVE_WARN; + } } + __archive_read_consume(a, value_length); + return (err); + } break; case 'S': - /* We support some keys used by the "star" archiver */ - if (strcmp(key, "SCHILY.acl.access") == 0) { - r = pax_attribute_acl(a, tar, entry, value, - ARCHIVE_ENTRY_ACL_TYPE_ACCESS); - if (r == ARCHIVE_FATAL) - return (r); - } else if (strcmp(key, "SCHILY.acl.default") == 0) { - r = pax_attribute_acl(a, tar, entry, value, - ARCHIVE_ENTRY_ACL_TYPE_DEFAULT); - if (r == ARCHIVE_FATAL) - return (r); - } else if (strcmp(key, "SCHILY.acl.ace") == 0) { - r = pax_attribute_acl(a, tar, entry, value, - ARCHIVE_ENTRY_ACL_TYPE_NFS4); - if (r == ARCHIVE_FATAL) - return (r); - } else if (strcmp(key, "SCHILY.devmajor") == 0) { - archive_entry_set_rdevmajor(entry, - (dev_t)tar_atol10(value, strlen(value))); - } else if (strcmp(key, "SCHILY.devminor") == 0) { - archive_entry_set_rdevminor(entry, - (dev_t)tar_atol10(value, strlen(value))); - } else if (strcmp(key, "SCHILY.fflags") == 0) { - archive_entry_copy_fflags_text(entry, value); - } else if (strcmp(key, "SCHILY.dev") == 0) { - archive_entry_set_dev(entry, - (dev_t)tar_atol10(value, strlen(value))); - } else if (strcmp(key, "SCHILY.ino") == 0) { - archive_entry_set_ino(entry, - tar_atol10(value, strlen(value))); - } else if (strcmp(key, "SCHILY.nlink") == 0) { - archive_entry_set_nlink(entry, (unsigned) - tar_atol10(value, strlen(value))); - } else if (strcmp(key, "SCHILY.realsize") == 0) { - tar->realsize = tar_atol10(value, strlen(value)); - tar->realsize_override = 1; - archive_entry_set_size(entry, tar->realsize); - } else if (strncmp(key, "SCHILY.xattr.", 13) == 0) { - pax_attribute_schily_xattr(entry, key, value, - value_length); - } else if (strcmp(key, "SUN.holesdata") == 0) { - /* A Solaris extension for sparse. */ - r = solaris_sparse_parse(a, tar, entry, value); - if (r < err) { - if (r == ARCHIVE_FATAL) - return (r); - err = r; - archive_set_error(&a->archive, - ARCHIVE_ERRNO_MISC, - "Parse error: SUN.holesdata"); + /* SCHILY.* extensions used by "star" archiver */ + if (key_length > 7 && memcmp(key, "SCHILY.", 7) == 0) { + key_length -= 7; + key += 7; + + if (key_length == 10 && memcmp(key, "acl.access", 10) == 0) { + err = pax_attribute_SCHILY_acl(a, tar, entry, value_length, + ARCHIVE_ENTRY_ACL_TYPE_ACCESS); + // TODO: Mark mode as set + return (err); + } + else if (key_length == 11 && memcmp(key, "acl.default", 11) == 0) { + err = pax_attribute_SCHILY_acl(a, tar, entry, value_length, + ARCHIVE_ENTRY_ACL_TYPE_DEFAULT); + return (err); + } + else if (key_length == 7 && memcmp(key, "acl.ace", 7) == 0) { + err = pax_attribute_SCHILY_acl(a, tar, entry, value_length, + ARCHIVE_ENTRY_ACL_TYPE_NFS4); + // TODO: Mark mode as set + return (err); + } + else if (key_length == 8 && memcmp(key, "devmajor", 8) == 0) { + if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) { + archive_entry_set_rdevmajor(entry, t); + } + return (err); + } + else if (key_length == 8 && memcmp(key, "devminor", 8) == 0) { + if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) { + archive_entry_set_rdevminor(entry, t); + } + return (err); + } + else if (key_length == 6 && memcmp(key, "fflags", 6) == 0) { + if (value_length < fflags_limit) { + p = __archive_read_ahead(a, value_length, &bytes_read); + if (p != NULL) { + archive_entry_copy_fflags_text_len(entry, p, value_length); + err = ARCHIVE_OK; + } else { + /* Truncated archive */ + err = ARCHIVE_FATAL; + } + } else { + /* Overlong fflags field */ + err = ARCHIVE_WARN; + } + __archive_read_consume(a, value_length); + return (err); + } + else if (key_length == 3 && memcmp(key, "dev", 3) == 0) { + if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) { + archive_entry_set_dev(entry, t); + } + return (err); + } + else if (key_length == 3 && memcmp(key, "ino", 3) == 0) { + if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) { + archive_entry_set_ino(entry, t); + } + return (err); + } + else if (key_length == 5 && memcmp(key, "nlink", 5) == 0) { + if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) { + archive_entry_set_nlink(entry, t); + } + return (err); + } + else if (key_length == 8 && memcmp(key, "realsize", 8) == 0) { + if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) { + tar->realsize = t; + tar->realsize_override = 1; + archive_entry_set_size(entry, tar->realsize); + } + return (err); + } + else if (key_length > 6 && memcmp(key, "xattr.", 6) == 0) { + key_length -= 6; + key += 6; + if (value_length < xattr_limit) { + p = __archive_read_ahead(a, value_length, &bytes_read); + if (p == NULL + || pax_attribute_SCHILY_xattr(entry, key, key_length, p, value_length)) { + /* TODO: Unable to parse xattr */ + err = ARCHIVE_WARN; + } + } else { + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "Unreasonably large xattr: %d > %d", + (int)value_length, (int)xattr_limit); + err = ARCHIVE_WARN; + } + __archive_read_consume(a, value_length); + return (err); + } + } + /* SUN.* extensions from Solaris tar */ + if (key_length > 4 && memcmp(key, "SUN.", 4) == 0) { + key_length -= 4; + key += 4; + + if (key_length == 9 && memcmp(key, "holesdata", 9) == 0) { + /* SUN.holesdata */ + if (value_length < sparse_map_limit) { + p = __archive_read_ahead(a, value_length, &bytes_read); + if (p != NULL) { + err = pax_attribute_SUN_holesdata(a, tar, entry, p, value_length); + if (err < ARCHIVE_OK) { + archive_set_error(&a->archive, + ARCHIVE_ERRNO_MISC, + "Parse error: SUN.holesdata"); + } + } else { + return (ARCHIVE_FATAL); + } + } else { + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "Unreasonably large sparse map: %d > %d", + (int)value_length, (int)sparse_map_limit); + err = ARCHIVE_FAILED; + } + __archive_read_consume(a, value_length); + return (err); } } break; case 'a': - if (strcmp(key, "atime") == 0) { - pax_time(value, &s, &n); - archive_entry_set_atime(entry, s, n); + if (key_length == 5 && memcmp(key, "atime", 5) == 0) { + if ((err = pax_attribute_read_time(a, value_length, &t, &n, unconsumed)) == ARCHIVE_OK) { + archive_entry_set_atime(entry, t, n); + } + return (err); } break; case 'c': - if (strcmp(key, "ctime") == 0) { - pax_time(value, &s, &n); - archive_entry_set_ctime(entry, s, n); - } else if (strcmp(key, "charset") == 0) { + if (key_length == 5 && memcmp(key, "ctime", 5) == 0) { + if ((err = pax_attribute_read_time(a, value_length, &t, &n, unconsumed)) == ARCHIVE_OK) { + archive_entry_set_ctime(entry, t, n); + } + return (err); + } else if (key_length == 7 && memcmp(key, "charset", 7) == 0) { /* TODO: Publish charset information in entry. */ - } else if (strcmp(key, "comment") == 0) { + } else if (key_length == 7 && memcmp(key, "comment", 7) == 0) { /* TODO: Publish comment in entry. */ } break; case 'g': - if (strcmp(key, "gid") == 0) { - archive_entry_set_gid(entry, - tar_atol10(value, strlen(value))); - } else if (strcmp(key, "gname") == 0) { - archive_strcpy(&(tar->entry_gname), value); + if (key_length == 3 && memcmp(key, "gid", 3) == 0) { + if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) { + archive_entry_set_gid(entry, t); + } + return (err); + } else if (key_length == 5 && memcmp(key, "gname", 5) == 0) { + if (value_length > guname_limit) { + *unconsumed += value_length; + err = ARCHIVE_WARN; + } else { + err = read_bytes_to_string(a, &(tar->entry_gname), value_length, unconsumed); + } + return (err); } break; case 'h': - if (strcmp(key, "hdrcharset") == 0) { - if (strcmp(value, "BINARY") == 0) - /* Binary mode. */ - tar->pax_hdrcharset_binary = 1; - else if (strcmp(value, "ISO-IR 10646 2000 UTF-8") == 0) - tar->pax_hdrcharset_binary = 0; + if (key_length == 10 && memcmp(key, "hdrcharset", 10) == 0) { + if (value_length < 64) { + p = __archive_read_ahead(a, value_length, &bytes_read); + if (p != NULL) { + if (value_length == 6 + && memcmp(p, "BINARY", 6) == 0) { + /* Binary mode. */ + tar->pax_hdrcharset_utf8 = 0; + err = ARCHIVE_OK; + } else if (value_length == 23 + && memcmp(p, "ISO-IR 10646 2000 UTF-8", 23) == 0) { + tar->pax_hdrcharset_utf8 = 1; + err = ARCHIVE_OK; + } else { + /* TODO: Unrecognized character set */ + err = ARCHIVE_WARN; + } + } else { + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Truncated tar archive " + "detected while reading hdrcharset attribute"); + return (ARCHIVE_FATAL); + } + } else { + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "hdrcharset attribute is unreasonably large (%d bytes)", + (int)value_length); + err = ARCHIVE_WARN; + } + __archive_read_consume(a, value_length); + return (err); } break; case 'l': /* pax interchange doesn't distinguish hardlink vs. symlink. */ - if (strcmp(key, "linkpath") == 0) { - archive_strcpy(&(tar->entry_linkpath), value); + if (key_length == 8 && memcmp(key, "linkpath", 8) == 0) { + if (value_length > pathname_limit) { + *unconsumed += value_length; + err = ARCHIVE_WARN; + } else { + err = read_bytes_to_string(a, &tar->entry_linkpath, value_length, unconsumed); + } + return (err); } break; case 'm': - if (strcmp(key, "mtime") == 0) { - pax_time(value, &s, &n); - archive_entry_set_mtime(entry, s, n); + if (key_length == 5 && memcmp(key, "mtime", 5) == 0) { + if ((err = pax_attribute_read_time(a, value_length, &t, &n, unconsumed)) == ARCHIVE_OK) { + archive_entry_set_mtime(entry, t, n); + } + return (err); } break; case 'p': - if (strcmp(key, "path") == 0) { - archive_strcpy(&(tar->entry_pathname), value); + if (key_length == 4 && memcmp(key, "path", 4) == 0) { + if (value_length > pathname_limit) { + *unconsumed += value_length; + err = ARCHIVE_WARN; + } else { + err = read_bytes_to_string(a, &(tar->entry_pathname), value_length, unconsumed); + } + return (err); } break; case 'r': @@ -2102,48 +2656,54 @@ pax_attribute(struct archive_read *a, struct tar *tar, case 's': /* POSIX has reserved 'security.*' */ /* Someday: if (strcmp(key, "security.acl") == 0) { ... } */ - if (strcmp(key, "size") == 0) { + if (key_length == 4 && memcmp(key, "size", 4) == 0) { /* "size" is the size of the data in the entry. */ - tar->entry_bytes_remaining - = tar_atol10(value, strlen(value)); - if (tar->entry_bytes_remaining < 0) { - tar->entry_bytes_remaining = 0; - archive_set_error(&a->archive, - ARCHIVE_ERRNO_MISC, - "Tar size attribute is negative"); - return (ARCHIVE_FATAL); + if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) { + tar->entry_bytes_remaining = t; + /* + * The "size" pax header keyword always overrides the + * "size" field in the tar header. + * GNU.sparse.realsize, GNU.sparse.size and + * SCHILY.realsize override this value. + */ + if (!tar->realsize_override) { + archive_entry_set_size(entry, + tar->entry_bytes_remaining); + tar->realsize + = tar->entry_bytes_remaining; + } } - if (tar->entry_bytes_remaining == INT64_MAX) { - /* Note: tar_atol returns INT64_MAX on overflow */ + else if (t == INT64_MAX) { + /* Note: pax_attr_read_number returns INT64_MAX on overflow or < 0 */ tar->entry_bytes_remaining = 0; archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Tar size attribute overflow"); return (ARCHIVE_FATAL); } - /* - * The "size" pax header keyword always overrides the - * "size" field in the tar header. - * GNU.sparse.realsize, GNU.sparse.size and - * SCHILY.realsize override this value. - */ - if (!tar->realsize_override) { - archive_entry_set_size(entry, - tar->entry_bytes_remaining); - tar->realsize - = tar->entry_bytes_remaining; - } + return (err); } break; case 'u': - if (strcmp(key, "uid") == 0) { - archive_entry_set_uid(entry, - tar_atol10(value, strlen(value))); - } else if (strcmp(key, "uname") == 0) { - archive_strcpy(&(tar->entry_uname), value); + if (key_length == 3 && memcmp(key, "uid", 3) == 0) { + if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) { + archive_entry_set_uid(entry, t); + } + return (err); + } else if (key_length == 5 && memcmp(key, "uname", 5) == 0) { + if (value_length > guname_limit) { + *unconsumed += value_length; + err = ARCHIVE_WARN; + } else { + err = read_bytes_to_string(a, &(tar->entry_uname), value_length, unconsumed); + } + return (err); } break; } + + /* Unrecognized key, just skip the entire value. */ + __archive_read_consume(a, value_length); return (err); } @@ -2153,7 +2713,7 @@ pax_attribute(struct archive_read *a, struct tar *tar, * parse a decimal time value, which may include a fractional portion */ static void -pax_time(const char *p, int64_t *ps, long *pn) +pax_time(const char *p, size_t length, int64_t *ps, long *pn) { char digit; int64_t s; @@ -2164,13 +2724,18 @@ pax_time(const char *p, int64_t *ps, long *pn) limit = INT64_MAX / 10; last_digit_limit = INT64_MAX % 10; + if (length <= 0) { + *ps = 0; + return; + } s = 0; sign = 1; if (*p == '-') { sign = -1; p++; + length--; } - while (*p >= '0' && *p <= '9') { + while (length > 0 && *p >= '0' && *p <= '9') { digit = *p - '0'; if (s > limit || (s == limit && digit > last_digit_limit)) { @@ -2179,6 +2744,7 @@ pax_time(const char *p, int64_t *ps, long *pn) } s = (s * 10) + digit; ++p; + --length; } *ps = s * sign; @@ -2186,13 +2752,14 @@ pax_time(const char *p, int64_t *ps, long *pn) /* Calculate nanoseconds. */ *pn = 0; - if (*p != '.') + if (length <= 0 || *p != '.') return; l = 100000000UL; do { ++p; - if (*p >= '0' && *p <= '9') + --length; + if (length > 0 && *p >= '0' && *p <= '9') *pn += (*p - '0') * l; else break; @@ -2223,49 +2790,65 @@ header_gnutar(struct archive_read *a, struct tar *tar, /* Copy filename over (to ensure null termination). */ header = (const struct archive_entry_header_gnutar *)h; - if (archive_entry_copy_pathname_l(entry, - header->name, sizeof(header->name), tar->sconv) != 0) { - err = set_conversion_failed_error(a, tar->sconv, "Pathname"); - if (err == ARCHIVE_FATAL) - return (err); + const char *existing_pathname = archive_entry_pathname(entry); + if (existing_pathname == NULL || existing_pathname[0] == '\0') { + if (archive_entry_copy_pathname_l(entry, + header->name, sizeof(header->name), tar->sconv) != 0) { + err = set_conversion_failed_error(a, tar->sconv, "Pathname"); + if (err == ARCHIVE_FATAL) + return (err); + } } /* Fields common to ustar and GNU */ /* XXX Can the following be factored out since it's common * to ustar and gnu tar? Is it okay to move it down into * header_common, perhaps? */ - if (archive_entry_copy_uname_l(entry, - header->uname, sizeof(header->uname), tar->sconv) != 0) { - err = set_conversion_failed_error(a, tar->sconv, "Uname"); - if (err == ARCHIVE_FATAL) - return (err); + const char *existing_uname = archive_entry_uname(entry); + if (existing_uname == NULL || existing_uname[0] == '\0') { + if (archive_entry_copy_uname_l(entry, + header->uname, sizeof(header->uname), tar->sconv) != 0) { + err = set_conversion_failed_error(a, tar->sconv, "Uname"); + if (err == ARCHIVE_FATAL) + return (err); + } } - if (archive_entry_copy_gname_l(entry, - header->gname, sizeof(header->gname), tar->sconv) != 0) { - err = set_conversion_failed_error(a, tar->sconv, "Gname"); - if (err == ARCHIVE_FATAL) - return (err); + const char *existing_gname = archive_entry_gname(entry); + if (existing_gname == NULL || existing_gname[0] == '\0') { + if (archive_entry_copy_gname_l(entry, + header->gname, sizeof(header->gname), tar->sconv) != 0) { + err = set_conversion_failed_error(a, tar->sconv, "Gname"); + if (err == ARCHIVE_FATAL) + return (err); + } } /* Parse out device numbers only for char and block specials */ if (header->typeflag[0] == '3' || header->typeflag[0] == '4') { - archive_entry_set_rdevmajor(entry, (dev_t) - tar_atol(header->rdevmajor, sizeof(header->rdevmajor))); - archive_entry_set_rdevminor(entry, (dev_t) - tar_atol(header->rdevminor, sizeof(header->rdevminor))); - } else + if (!archive_entry_rdev_is_set(entry)) { + archive_entry_set_rdevmajor(entry, (dev_t) + tar_atol(header->rdevmajor, sizeof(header->rdevmajor))); + archive_entry_set_rdevminor(entry, (dev_t) + tar_atol(header->rdevminor, sizeof(header->rdevminor))); + } + } else { archive_entry_set_rdev(entry, 0); + } tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining); /* Grab GNU-specific fields. */ - t = tar_atol(header->atime, sizeof(header->atime)); - if (t > 0) - archive_entry_set_atime(entry, t, 0); - t = tar_atol(header->ctime, sizeof(header->ctime)); - if (t > 0) - archive_entry_set_ctime(entry, t, 0); + if (!archive_entry_atime_is_set(entry)) { + t = tar_atol(header->atime, sizeof(header->atime)); + if (t > 0) + archive_entry_set_atime(entry, t, 0); + } + if (!archive_entry_ctime_is_set(entry)) { + t = tar_atol(header->ctime, sizeof(header->ctime)); + if (t > 0) + archive_entry_set_ctime(entry, t, 0); + } if (header->realsize[0] != 0) { tar->realsize @@ -2417,19 +3000,19 @@ gnu_sparse_old_parse(struct archive_read *a, struct tar *tar, * importantly, the sparse data was lost when extracted by archivers * that didn't recognize this extension. */ - static int -gnu_sparse_01_parse(struct archive_read *a, struct tar *tar, const char *p) +gnu_sparse_01_parse(struct archive_read *a, struct tar *tar, const char *p, size_t length) { const char *e; int64_t offset = -1, size = -1; for (;;) { e = p; - while (*e != '\0' && *e != ',') { + while (length > 0 && *e != ',') { if (*e < '0' || *e > '9') return (ARCHIVE_WARN); e++; + length--; } if (offset < 0) { offset = tar_atol10(p, e - p); @@ -2444,9 +3027,10 @@ gnu_sparse_01_parse(struct archive_read *a, struct tar *tar, const char *p) return (ARCHIVE_FATAL); offset = -1; } - if (*e == '\0') + if (length == 0) return (ARCHIVE_OK); p = e + 1; + length--; } } @@ -2568,8 +3152,8 @@ gnu_sparse_10_read(struct archive_read *a, struct tar *tar, size_t *unconsumed) * consist of both data and hole. */ static int -solaris_sparse_parse(struct archive_read *a, struct tar *tar, - struct archive_entry *entry, const char *p) +pax_attribute_SUN_holesdata(struct archive_read *a, struct tar *tar, + struct archive_entry *entry, const char *p, size_t length) { const char *e; int64_t start, end; @@ -2578,16 +3162,21 @@ solaris_sparse_parse(struct archive_read *a, struct tar *tar, (void)entry; /* UNUSED */ end = 0; - if (*p == ' ') + if (length <= 0) + return (ARCHIVE_WARN); + if (*p == ' ') { p++; - else + length--; + } else { return (ARCHIVE_WARN); + } for (;;) { e = p; - while (*e != '\0' && *e != ' ') { + while (length > 0 && *e != ' ') { if (*e < '0' || *e > '9') return (ARCHIVE_WARN); e++; + length--; } start = end; end = tar_atol10(p, e - p); @@ -2599,9 +3188,15 @@ solaris_sparse_parse(struct archive_read *a, struct tar *tar, return (ARCHIVE_FATAL); tar->sparse_last->hole = hole; } - if (*e == '\0') - return (ARCHIVE_OK); + if (length == 0 || *e == '\n') { + if (length == 0 && *e == '\n') { + return (ARCHIVE_OK); + } else { + return (ARCHIVE_WARN); + } + } p = e + 1; + length--; hole = hole == 0; } } @@ -2903,22 +3498,23 @@ base64_decode(const char *s, size_t len, size_t *out_len) } static char * -url_decode(const char *in) +url_decode(const char *in, size_t length) { char *out, *d; const char *s; - out = (char *)malloc(strlen(in) + 1); + out = (char *)malloc(length + 1); if (out == NULL) return (NULL); - for (s = in, d = out; *s != '\0'; ) { - if (s[0] == '%' && s[1] != '\0' && s[2] != '\0') { + for (s = in, d = out; length > 0 && *s != '\0'; ) { + if (s[0] == '%' && length > 2) { /* Try to convert % escape */ int digit1 = tohex(s[1]); int digit2 = tohex(s[2]); if (digit1 >= 0 && digit2 >= 0) { /* Looks good, consume three chars */ s += 3; + length -= 3; /* Convert output */ *d++ = ((digit1 << 4) | digit2); continue; @@ -2926,6 +3522,7 @@ url_decode(const char *in) /* Else fall through and treat '%' as normal char */ } *d++ = *s++; + --length; } *d = '\0'; return (out); diff --git a/libarchive/test/CMakeLists.txt b/libarchive/test/CMakeLists.txt index 6d28fc904a..0352a1e7f6 100644 --- a/libarchive/test/CMakeLists.txt +++ b/libarchive/test/CMakeLists.txt @@ -166,6 +166,7 @@ IF(ENABLE_TEST) test_read_format_tar_empty_pax.c test_read_format_tar_filename.c test_read_format_tar_invalid_pax_size.c + test_read_format_tar_pax_large_attr.c test_read_format_tbz.c test_read_format_tgz.c test_read_format_tlz.c diff --git a/libarchive/test/test_compat_solaris_pax_sparse.c b/libarchive/test/test_compat_solaris_pax_sparse.c index c25c4eb8f5..94e3e9189b 100644 --- a/libarchive/test/test_compat_solaris_pax_sparse.c +++ b/libarchive/test/test_compat_solaris_pax_sparse.c @@ -60,7 +60,7 @@ test_compat_solaris_pax_sparse_1(void) assertEqualInt(0100644, archive_entry_mode(ae)); /* Verify the sparse information. */ - failure("This sparse file should have tree data blocks"); + failure("This sparse file should have three data blocks"); assertEqualInt(3, archive_entry_sparse_reset(ae)); assertEqualInt(ARCHIVE_OK, archive_entry_sparse_next(ae, &offset, &length)); diff --git a/libarchive/test/test_entry.c b/libarchive/test/test_entry.c index 228fdd8bcc..38c406e35b 100644 --- a/libarchive/test/test_entry.c +++ b/libarchive/test/test_entry.c @@ -269,7 +269,7 @@ DEFINE_TEST(test_entry) archive_entry_set_hardlink(e, "hardlink"); archive_entry_set_symlink(e, "symlink"); archive_entry_set_link(e, "link"); - assertEqualString(archive_entry_hardlink(e), "hardlink"); + assertEqualString(archive_entry_hardlink(e), NULL); assertEqualString(archive_entry_symlink(e), "link"); /* mode */ @@ -513,7 +513,6 @@ DEFINE_TEST(test_entry) archive_entry_set_rdev(e, 532); archive_entry_set_size(e, 987654321); archive_entry_copy_sourcepath(e, "source"); - archive_entry_set_symlink(e, "symlinkname"); archive_entry_set_uid(e, 83); archive_entry_set_uname(e, "user"); /* Add an ACL entry. */ @@ -548,7 +547,7 @@ DEFINE_TEST(test_entry) assertEqualInt(archive_entry_rdev(e2), 532); assertEqualInt(archive_entry_size(e2), 987654321); assertEqualString(archive_entry_sourcepath(e2), "source"); - assertEqualString(archive_entry_symlink(e2), "symlinkname"); + assertEqualString(archive_entry_symlink(e2), NULL); assertEqualInt(archive_entry_uid(e2), 83); assertEqualString(archive_entry_uname(e2), "user"); @@ -649,7 +648,7 @@ DEFINE_TEST(test_entry) assertEqualInt(archive_entry_rdev(e2), 532); assertEqualInt(archive_entry_size(e2), 987654321); assertEqualString(archive_entry_sourcepath(e2), "source"); - assertEqualString(archive_entry_symlink(e2), "symlinkname"); + assertEqualString(archive_entry_symlink(e2), NULL); assertEqualInt(archive_entry_uid(e2), 83); assertEqualString(archive_entry_uname(e2), "user"); @@ -705,6 +704,13 @@ DEFINE_TEST(test_entry) /* Release clone. */ archive_entry_free(e2); + /* Verify that symlink is copied over by `clone` */ + archive_entry_set_symlink(e, "symlinkpath"); + e2 = archive_entry_clone(e); + assertEqualString(archive_entry_hardlink(e2), NULL); + assertEqualString(archive_entry_symlink(e2), "symlinkpath"); + archive_entry_free(e2); + /* * Test clear() implementation. */ diff --git a/libarchive/test/test_read_format_tar.c b/libarchive/test/test_read_format_tar.c index 2b7cd8dedc..1a2b326d0d 100644 --- a/libarchive/test/test_read_format_tar.c +++ b/libarchive/test/test_read_format_tar.c @@ -437,7 +437,7 @@ static void verify(unsigned char *d, size_t s, assertA(0 == archive_read_support_filter_all(a)); assertA(0 == archive_read_support_format_all(a)); assertA(0 == archive_read_open_memory(a, buff, s + 1024)); - assertA(0 == archive_read_next_header(a, &ae)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); assertEqualInt(archive_filter_code(a, 0), compression); assertEqualInt(archive_format(a), format); assertEqualInt(archive_entry_is_encrypted(ae), 0); diff --git a/libarchive/test/test_read_format_tar_pax_large_attr.c b/libarchive/test/test_read_format_tar_pax_large_attr.c new file mode 100644 index 0000000000..1c2b55f9c6 --- /dev/null +++ b/libarchive/test/test_read_format_tar_pax_large_attr.c @@ -0,0 +1,65 @@ +/*- + * Copyright (c) 2003-2023 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include "test.h" + +/* + * Read a pax formatted tar archive that has an extremely large + * (8,000,000 bytes) attribute of unknown type. The pax reader should simply + * skip the attribute. + */ +DEFINE_TEST(test_read_format_tar_pax_large_attr) +{ + char name[] = "test_read_format_tar_pax_large_attr.tar.Z"; + struct archive_entry *ae; + struct archive *a; + + assert((a = archive_read_new()) != NULL); + assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); + extract_reference_file(name); + assertEqualIntA(a, ARCHIVE_OK, archive_read_open_filename(a, name, 10240)); + + /* Read first entry. */ + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assertEqualString("foo", archive_entry_pathname(ae)); + assertEqualInt(1, archive_entry_mtime(ae)); + assertEqualInt(1000, archive_entry_uid(ae)); + assertEqualString("tim", archive_entry_uname(ae)); + assertEqualInt(0, archive_entry_gid(ae)); + assertEqualString("wheel", archive_entry_gname(ae)); + assertEqualInt(0100644, archive_entry_mode(ae)); + assertEqualInt(archive_entry_is_encrypted(ae), 0); + assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); + + /* Verify the end-of-archive. */ + assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); + + /* Verify that the format detection worked. */ + assertEqualInt(archive_filter_code(a, 0), ARCHIVE_FILTER_COMPRESS); + assertEqualInt(archive_format(a), ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE); + + assertEqualInt(ARCHIVE_OK, archive_read_close(a)); + assertEqualInt(ARCHIVE_OK, archive_read_free(a)); +} diff --git a/libarchive/test/test_read_format_tar_pax_large_attr.tar.Z.uu b/libarchive/test/test_read_format_tar_pax_large_attr.tar.Z.uu new file mode 100644 index 0000000000..01eb55588e --- /dev/null +++ b/libarchive/test/test_read_format_tar_pax_large_attr.tar.Z.uu @@ -0,0 +1,149 @@ +Decode this file with: +cat test_read_format_tar_pax_large_attr.tar.Z.uu | uudecode -p | uncompress | hexdump -C + +It contains a single tar entry with a pax header that has a single +attribute of 8000000 bytes +00000200 38 30 30 30 30 30 30 20 75 6e 6b 6e 6f 77 6e 3d |8000000 unknown=| +00000210 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b |++++++++++++++++| +* +007a13f0 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 0a |+++++++++++++++.| + +begin 644 test_read_format_tar_pax_large_attr.tar.Z +M'YV04,+@05(F#)DR?,&@,.'$"-*G$BQHL6+&"O"V&B#!@T0`#;"N&&C +M!DB1,&1\#+EQ1L<8,42"B$&CA@P;-UQZM`$"1HP9-6+<```"3\:C2),J73JQ +MSAPZ8>2PI).F#5.+3\.8,7-U(LJ-)[^"[$JVK-FS:-'B^`H#1!TW:]R\N>.F +MQXJ[>//JW/'D"-+GDRYLN7+F#-KWLRY +ML^?/H$.+'DVZM.G3J%.K7LVZM>O7L&/+GDV[MNW;N'/KWLV[M^_?P(,+'TZ\ +MN/'CR),K7\Z\N?/GT*-+GTZ]NO7KV+-KW\Z]N_?OX,.+'T^^O/GSZ-.K7\^^ +MO?OW\./+GT^_OOW[^//KW\^_O___``8HX(`$%FC@@0@FJ.""##;HX(,01BCA +MA!16:.&%&&:HX88<=NCAAR"&*.*())9HXHDHIJCBBBRVZ.*+,,8HXXPTUFCC +MC3CFJ...//;HXX]`!BGDD$06:>212":IY)),-NGDDU!&*>645%9IY9589JGE +MEEQVZ>678(8IYIADEFGFF6BFJ>::;+;IYIMPQBGGG'36:>>=>.:IYYY\]NGG +MGX`&*NB@A!9JZ*&()JKHHHPVZNBCD$8JZ:245FKII9AFJNFFG';JZ:>@ABKJ +MJ*26:NJIJ*:JZJJLMNKJJ[#&*NNLM-9JZZVXYJKKKKSVZNNOP`8K[+#$%FOL +ML<@FJ^RRS#;K[+/01BOMM-16:^VUV&:K[;;<=NOMM^"&*^ZXY)9K[KGHIJON +MNNRVZ^Z[\,8K[[STUFOOO?CFJ^^^_/;K[[\`!RSPP`07;/#!"">L\,(,-^SP +MPQ!'+/'$%%=L\<489ZSQQAQW[/''((L\\X\]^SSST`'+?301!=M]-%()ZWTTDPW[?334$M]MILM^WVVW#'+??<=-=M]]UXYZWW +MWGSW[???@`N^>:<=^[Y +MYZ"'+OKHI)=N^NFHIZ[ZZJRW[OKKL,O_/+,-^_\\]!'+_WTU%=O_?789Z_]]MQW[_WWX(BGK_[Z[+?O_OOPQR___/37;__]^.>O__[\]^___P`,H``'2,`"&O"`"$R@ +M`A?(P`8Z\($0C*`$)TC!"EKP@AC,H`8WR,$.>O"#(`RA"$=(PA*:\(0H3*$* +M5\C"%KKPA3",H0QG2,,:VO"&.,RA#G?(PQ[Z\(=`#*(0ATC$(AKQB$A,HA*7 +MR,0F.O&)4(RB%*=(Q2I:\8I8S*(6M\C%+GKQBV`,HQC'2,8RFO&,:$RC&M?( +MQC:Z\8UPC*,O.;X`RG.,=)SG*:\YSH3*^,RG/O?)SW[Z\Y\`#:A`!TK0@AKTH`A-J$(7RM"&.O2A +M$(VH1"=*T8I:]*(8S:A&-\K1CGKTHR`-J4A'2M*2FO2D*$VI2E?*TI:Z]*4P +MC:E,9TK3FMKTICC-J4YWRM.>^O2G0`VJ4(=*U*(:]:A(3:I2E\K4ICKUJ5"- +MJE2G2M6J6O6J6,VJ5K?*U:YZ]:M@#:M8QTK6LIKUK&A-JUK7RM:VNO6M<(VK +M7.=*U[K:]:YXS:M>]\K7OOKUKX`-K&`'2]C"&O:PB$VL8A?+V,8Z]K&0C:QD +M)TO9REKVLIC-K&8WR]G.>O:SH`VM:$=+VM*:]K2H3:UJ5\O:UKKVM;"-K6QG +M2]O:VO:VN,VM;G?+V][Z]K?`#:YPATOYT(VN=*=+ +MW>I:][K8S:YVM\O=[GKWN^`-KWC'2][RFO>\Z$VO>M?+WO:Z][WPC:]\YTO? +M^MKWOOC-KW[WR]_^^O>_``ZP@`=,X`(;^,`(3K""%\S@!COXP1".L(0G3.$* +M6_C"&,ZPAC?,X0Y[^,,@#K&(1TSB$IOXQ"A.L8I7S.(6N_C%,(ZQC&=,XQK; +M^,8XSK&.=\SC'OOXQT`.LI"'3.0B&_G(2$ZRDI?,Y"8[^_G+8`ZSF,=,YC*;^,ZSGO?,YS[[^<^`#K2@!TWH0AOZT(A.M*(7S>A&._K1D(ZTI"=-Z4I;^M*8 +MSK2F-\WI3GOZTZ`.M:A'3>I2F_K4J$ZUJE?-ZE:[^M6PCK6L9TWK6MOZUKC. +MM:YWS>M>^_K7P`ZVL(=-[&(;^]C(3K:RE\WL9CO[V=".MK2G3>UJ6_O:V,ZV +MMK?-[6Y[^]O@#K>XQTWNYVN_O=\(ZWO.=-[WK;^][XSK>^ +M]\WO?OO[WP`/N,`'3O""&_S@"$^XPA?.\(8[_.$0C[C$)T[QBEO\XAC/N,8W +MSO&.>_SC(`^YR$=.\I*;_.0H3[G*5\[REKO\Y3"/NI8S[K6M\[U +MKGO]ZV`/N]C'3O:RF_WL:$^[VM?.]K:[_>UPC[O^ +M^_WO@`^\X`=/^,(;_O"(3[SB%\_XQCO^\9"/O.0G3_G*6_[RF,^\YC?/^I7S_K6N_[UL(^][&=/^]K;_O:XS[WN=\_[WOO^ +M]\`/OO"'3_SB&__XR$^^\I?/_.8[__G0C[[TIT_]ZEO_^MC/OO:WS_WN>__[ +MX`^_^,=/_O*;__SH3[_ZU\_^]KO__?"/O_SG3__ZV__^^,^__O?/__[[__\` +M&(`".(`$6(`&>(`(F(`*N(`,V(`.^(`0&($2.($46($6>($8F($:N($ +M^($@&((B.((D6((F>((HF((JN((LV((N^((P&(,R.(,T6(,V>(,XF(,ZN(,\ +MV(,^^(-`&(1".(1$6(1&>(1(F(1*N(1,V(1.^(10&(52.(546(56>(58F(5: +MN(5^(5@&(9B.(9D6(9F>(9HF(9JN(9LV(9N^(9P&(=R.(=T6(=V>(=X +MF(=ZN(=\V(=^^(>`&(B".(B$6(B&>(B(F(B*N(B,V(B.^(B0&(F2.(F46(F6 +M>(F8F(F:N(F^(F@&(JB.(JD6(JF>(JHF(JJN(JLV(JN^(JP&(NR.(NT +M6(NV>(NXF(NZN(N\V(N^^(O`&(S".(S$6(S&>(S(F(S*N(S,V(S.^(S0&(W2 +M.(W46(W6>(W8F(W:N(W^(W@&([B.([D6([F>([HF([JN([LV([N^([P +M&(_R.(_T6(_V>(_XF(_ZN(_\V(_^^(\`&9`".9`$69`&>9`(F9`*N9`,V9`. +M^9`0&9$2.9$469$6>9$8F9$:N9$^9$@&9(B.9(D69(F>9(HF9(JN9(L +MV9(N^9(P&9,R.9,T69,V>9,XF9,ZN9,\V9,^^9-`&91".91$691&>91(F91* +MN91,V91.^910&952.9546956>958F95:N95^95@&99B.99D699F>99H +MF99JN99LV99N^99P&9=R.9=T69=V>9=XF9=ZN9=\V9=^^9>`&9B".9B$69B& +M>9B(F9B*N9B,V9B.^9B0&9F2.9F469F6>9F8F9F:N9F^9F@&9JB.9JD +M69JF>9JHF9JJN9JLV9JN^9JP&9NR.9NT69NV>9NXF9NZN9N\V9N^^9O`&9S" +M.9S$69S&>9S(F9S*N9S,V9S.^9S0&9W2.9W469W6>9W8F9W:N9W^9W@ +M&9[B.9[D69[F>9[HF9[JN9[LV9[N^9[P&9_R.9_T69_V>9_XF9_ZN9_\V9_^ +M^9\`&J`".J`$6J`&>J`(FJ`*NJ`,VJ`.^J`0&J$2.J$46J$6>J$8FJ$:NJ$< +MVJ$>^J$@&J(B.J(D6J(F>J(HFJ(JNJ(LVJ(N^J(P&J,R.J,T6J,V>J,XFJ,Z +MNJ,\VJ,^^J-`&J1".J1$6J1&>J1(FJ1*NJ1,VJ1.^J10&J52.J546J56>J58 +MFJ5:NJ5^J5@&J9B.J9D6J9F>J9HFJ9JNJ9LVJ9N^J9P&J=R.J=T6J=V +M>J=XFJ=ZNJ=\VJ=^^J>`&JB".JB$6JB&>JB(FJB*NJB,VJB.^JB0&JF2.JF4 +M6JF6>JF8FJF:NJF^JF@&JJB.JJD6JJF>JJHFJJJNJJLVJJN^JJP&JNR +M.JNT6JNV>JNXFJNZNJN\VJN^^JO`&JS".JS$6JS&>JS(FJS*NJS,VJS.^JS0 +M&JW2.JW46JW6>JW8FJW:NJW^JW@&J[B.J[D6J[F>J[HFJ[JNJ[LVJ[N +M^J[P&J_R.J_T6J_V>J_XFJ_ZNJ_\VJ_^^J\`&[`".[`$6[`&>[`(F[`*N[`, +MV[`.^[`0&[$2.[$46[$6>[$8F[$:N[$^[$@&[(B.[(D6[(F>[(HF[(J +MN[(LV[(N^[(P&[,R.[,T6[,V>[,XF[,ZN[,\V[,^^[-`&[1".[1$6[1&>[1( +MF[1*N[1,V[1.^[10&[52.[546[56>[58F[5:N[5^[5@&[9B.[9D6[9F +M>[9HF[9JN[9LV[9N^[9P&[=R.[=T6[=V>[=XF[=ZN[=\V[=^^[>`&[B".[B$ +M6[B&>[B(F[B*N[B,V[B.^[B0&[F2.[F46[F6>[F8F[F:N[F^[F@&[JB +M.[JD6[JF>[JHF[JJN[JLV[JN^[JP&[NR.[NT6[NV>[NXF[NZN[N\V[N^^[O` +M&[S".[S$6[S&>[S(F[S*N[S,V[S.^[S0&[W2.[W46[W6>[W8F[W:N[W +M^[W@&[[B.[[D6[[F>[[HF[[JN[[LV[[N^[[P&[_R.[_T6[_V>[_XF[_ZN[_\ +MV[_^^[\`',`"/,`$7,`&?,`(G,`*O,`,W,`._,`0',$2/,$47,$6?,$8G,$: +MO,$_,$@',(B/,(D7,(F?,(HG,(JO,(LW,(N_,(P',,R/,,T7,,V?,,X +MG,,ZO,,\W,,^_,-`',1"/,1$7,1&?,1(G,1*O,1,W,1._,10',52/,547,56 +M?,58G,5:O,5_,5@',9B/,9D7,9F?,9HG,9JO,9LW,9N_,9P',=R/,=T +M7,=V?,=XG,=ZO,=\W,=^_,>`',B"/,B$7,B&?,B(G,B*O,B,W,B._,B0',F2 +M/,F47,F6?,F8G,F:O,F_,F@',JB/,JD7,JF?,JHG,JJO,JLW,JN_,JP +M',NR/,NT7,NV?,NXG,NZO,N\W,N^_,O`',S"/,S$7,S&?,S(G,S*O,S,W,S. +M_,S0',W2/,W47,W6?,W8G,W:O,W_,W@',[B/,[D7,[F?,[HG,[JO,[L +MW,[N_,[P',_R/,_T7,_V?,_XG,_ZO,_\W,_^_,\`'=`"/=`$7=`&?=`(G=`* +MO=`,W=`._=`0'=$2/=$47=$6?=$8G=$:O=$_=$@'=(B/=(D7=(F?=(H +MG=(JO=(LW=(N_=(P'=,R/=,T7=,V?=,XG=,ZO=,\W=,^_=-`'=1"/=1$7=1& +M?=1(G=1*O=1,W=1._=10'=52/=547=56?=58G=5:O=5_=5@'=9B/=9D +M7=9F?=9HG=9JO=9LW=9N_=9P'==R/==T7==V?==XG==ZO==\W==^_=>`'=B" +M/=B$7=B&?=B(G=B*O=B,W=B._=B0'=F2/=F47=F6?=F8G=F:O=F_=F@ +M'=JB/=JD7=JF?=JHG=JJO=JLW=JN_=JP'=NR/=NT7=NV?=NXG=NZO=N\W=N^ +M_=O`'=S"/=S$7=S&?=S(G=S*O=S,W=S._=S0'=W2/=W47=W6?=W8G=W:O=W< +MW=W>_=W@'=[B/=[D7=[F?=[HG=[JO=[LW=[N_=[P'=_R/=_T7=_V?=_XG=_Z +MO=_\W=_^_=\`'N`"/N`$7N`&?N`(GN`*ON`,WN`._N`0'N$2/N$47N$6?N$8 +MGN$:ON$_N$@'N(B/N(D7N(F?N(HGN(JON(LWN(N_N(P'N,R/N,T7N,V +M?N,XGN,ZON,\WN,^_N-`'N1"/N1$7N1&?N1(GN1*ON1,WN1._N10'N52/N54 +M7N56?N58GN5:ON5_N5@'N9B/N9D7N9F?N9HGN9JON9LWN9N_N9P'N=R +M/N=T7N=V?N=XGN=ZON=\WN=^_N>`'NB"/NB$7NB&?NB(GNB*ONB,WNB._NB0 +M'NF2/NF47NF6?NF8GNF:ONF_NF@'NJB/NJD7NJF?NJHGNJJONJLWNJN +M_NJP'NNR/NNT7NNV?NNXGNNZONN\WNN^_NO`'NS"/NS$7NS&?NS(GNS*ONS, +MWNS._NS0'NW2/NW47NW6?NW8GNW:ONW_NW@'N[B/N[D7N[F?N[HGN[J +MON[LWN[N_N[P'N_R/N_T7N_V?N_XGN_ZON_\WN_^_N\`'_`"/_`$7_`&?_`( +MG_`*O_`,W_`.__`0'_$2/_$47_$6?_$8G_$:O_$__$@'_(B/_(D7_(F +M?_(HG_(JO_(LW_(N__(P'_,R/_,T7_,V?_,XG_,ZO_,\W_,^__-`'_1"/_1$ +M7_1&?_1(G_1*O_1,W_1.__10'_52/_547_56?_58G_5:O_5__5@'_9B +M/_9D7_9F?_9HG_9JO_9LW_9N__9P'_=R/_=T7_=V?_=XG_=ZO_=\W_=^__>` +M'_B"/_B$7_B&?_B(G_B*O_B,W_B.__B0'_F2/_F47_F6?_F8G_F:O_F +M__F@'_JB/_JD7_JF?_JHG_JJO_JLW_JN__JP'_NR/_NT7_NV?_NXG_NZO_N\ +MW_N^__O`'_S"/_S$7_S&?_S(G_S*O_S,W_S.__S0'_W2/_W47_W6?_W8G_W: +MO_W__W@'_[B/_[D7_[F?_[HG_[JO_[LW_[N__[P'__R/__T7__V?__X +MG__ZO__\W__^__\`,``*P`%(``N@`3R`"#`!*L`%R``;H`-\@!`P`DK`"4@! +M*Z`%O(`8,`-JP`W(`3N@!_R`(#`$BL`12`)+H`D\@2@P!:K`%<@"6Z`+?($P +M,`;*P!E(`VN@#;R!.#`'ZL`=R`-[H`_\@4`P"`K!(4@$BZ`1/()(,`DJP27( +M!)N@$WR"4#`*2L$I2`6KH!6\@E@P"VK!+<@%NZ`7_()@,`R*P3%(!LN@&3R# +M:#`-JL$UR`;;H!M\@W`P#LK!.4@'ZZ`=O(-X,`_JP3W(!_N@'_R#@#`0"L)! +M2`@+H2$\A(@P$2K"1<@(&Z$C?(20,!)*PDE("2NA);R$F#`3:L)-R`D[H2?\ +MA*`P%(K"44@*2Z$I/(6H,!6JPE7("ENA*WR%L#`6RL)92`MKH2V\A;@P%^K" +M7<@+>Z$O_(7`,!@*PV%(#(NA,3R&R#`9*L-ER`R;H3-\AM`P&DK#:4@-JZ$U +MO(;8,!MJPVW(#;NA-_R&X#` Date: Sun, 16 Jun 2024 20:22:14 -0700 Subject: [PATCH 96/98] Support ISOs with a non-standard PVD layouts (#2238) The CSRG ISOs have a non-standard PVD layout with a 68-byte root directory record (rather than the 34-byte record required by ECMA119/ISO9660). I built a test image with this change and modified the ISO9660 reader to accept it. While I was working on the bid logic to recognize PVDs, I added a number of additional correctness checks that should make our bidding a bit more accurate. In particular, this should more than compensate for the weakened check of the root directory record size. Resolves #2232 --- Makefile.am | 1 + .../archive_read_support_format_iso9660.c | 109 +++++++++++++++++- .../test/test_read_format_iso_3.iso.Z.uu | 40 +++++++ libarchive/test/test_read_format_iso_Z.c | 8 +- 4 files changed, 150 insertions(+), 8 deletions(-) create mode 100644 libarchive/test/test_read_format_iso_3.iso.Z.uu diff --git a/Makefile.am b/Makefile.am index 1661d9c1a5..7560b14fe7 100644 --- a/Makefile.am +++ b/Makefile.am @@ -833,6 +833,7 @@ libarchive_test_EXTRA_DIST=\ libarchive/test/test_read_format_huge_rpm.rpm.uu \ libarchive/test/test_read_format_iso.iso.Z.uu \ libarchive/test/test_read_format_iso_2.iso.Z.uu \ + libarchive/test/test_read_format_iso_3.iso.Z.uu \ libarchive/test/test_read_format_iso_joliet.iso.Z.uu \ libarchive/test/test_read_format_iso_joliet_by_nero.iso.Z.uu \ libarchive/test/test_read_format_iso_joliet_long.iso.Z.uu \ diff --git a/libarchive/archive_read_support_format_iso9660.c b/libarchive/archive_read_support_format_iso9660.c index 25ab11bf59..c38943a2fa 100644 --- a/libarchive/archive_read_support_format_iso9660.c +++ b/libarchive/archive_read_support_format_iso9660.c @@ -402,6 +402,9 @@ static int isJolietSVD(struct iso9660 *, const unsigned char *); static int isSVD(struct iso9660 *, const unsigned char *); static int isEVD(struct iso9660 *, const unsigned char *); static int isPVD(struct iso9660 *, const unsigned char *); +static int isRootDirectoryRecord(const unsigned char *); +static int isValid723Integer(const unsigned char *); +static int isValid733Integer(const unsigned char *); static int next_cache_entry(struct archive_read *, struct iso9660 *, struct file_info **); static int next_entry_seek(struct archive_read *, struct iso9660 *, @@ -773,8 +776,9 @@ isSVD(struct iso9660 *iso9660, const unsigned char *h) /* Read Root Directory Record in Volume Descriptor. */ p = h + SVD_root_directory_record_offset; - if (p[DR_length_offset] != 34) + if (!isRootDirectoryRecord(p)) { return (0); + } return (48); } @@ -851,8 +855,9 @@ isEVD(struct iso9660 *iso9660, const unsigned char *h) /* Read Root Directory Record in Volume Descriptor. */ p = h + PVD_root_directory_record_offset; - if (p[DR_length_offset] != 34) + if (!isRootDirectoryRecord(p)) { return (0); + } return (48); } @@ -882,21 +887,43 @@ isPVD(struct iso9660 *iso9660, const unsigned char *h) if (!isNull(iso9660, h, PVD_reserved2_offset, PVD_reserved2_size)) return (0); + /* Volume space size must be encoded according to 7.3.3 */ + if (!isValid733Integer(h + PVD_volume_space_size_offset)) { + return (0); + } + volume_block = archive_le32dec(h + PVD_volume_space_size_offset); + if (volume_block <= SYSTEM_AREA_BLOCK+4) + return (0); + /* Reserved field must be 0. */ if (!isNull(iso9660, h, PVD_reserved3_offset, PVD_reserved3_size)) return (0); + /* Volume set size must be encoded according to 7.2.3 */ + if (!isValid723Integer(h + PVD_volume_set_size_offset)) { + return (0); + } + + /* Volume sequence number must be encoded according to 7.2.3 */ + if (!isValid723Integer(h + PVD_volume_sequence_number_offset)) { + return (0); + } + /* Logical block size must be > 0. */ /* I've looked at Ecma 119 and can't find any stronger * restriction on this field. */ + if (!isValid723Integer(h + PVD_logical_block_size_offset)) { + return (0); + } logical_block_size = archive_le16dec(h + PVD_logical_block_size_offset); if (logical_block_size <= 0) return (0); - volume_block = archive_le32dec(h + PVD_volume_space_size_offset); - if (volume_block <= SYSTEM_AREA_BLOCK+4) + /* Path Table size must be encoded according to 7.3.3 */ + if (!isValid733Integer(h + PVD_path_table_size_offset)) { return (0); + } /* File structure version must be 1 for ISO9660/ECMA119. */ if (h[PVD_file_structure_version_offset] != 1) @@ -935,8 +962,9 @@ isPVD(struct iso9660 *iso9660, const unsigned char *h) /* Read Root Directory Record in Volume Descriptor. */ p = h + PVD_root_directory_record_offset; - if (p[DR_length_offset] != 34) + if (!isRootDirectoryRecord(p)) { return (0); + } if (!iso9660->primary.location) { iso9660->logical_block_size = logical_block_size; @@ -951,6 +979,51 @@ isPVD(struct iso9660 *iso9660, const unsigned char *h) return (48); } +static int +isRootDirectoryRecord(const unsigned char *p) { + int flags; + + /* ECMA119/ISO9660 requires that the root directory record be _exactly_ 34 bytes. + * However, we've seen images that have root directory records up to 68 bytes. */ + if (p[DR_length_offset] < 34 || p[DR_length_offset] > 68) { + return (0); + } + + /* The root directory location must be a 7.3.3 32-bit integer. */ + if (!isValid733Integer(p + DR_extent_offset)) { + return (0); + } + + /* The root directory size must be a 7.3.3 integer. */ + if (!isValid733Integer(p + DR_size_offset)) { + return (0); + } + + /* According to the standard, certain bits must be one or zero: + * Bit 1: must be 1 (this is a directory) + * Bit 2: must be 0 (not an associated file) + * Bit 3: must be 0 (doesn't use extended attribute record) + * Bit 7: must be 0 (final directory record for this file) + */ + flags = p[DR_flags_offset]; + if ((flags & 0x8E) != 0x02) { + return (0); + } + + /* Volume sequence number must be a 7.2.3 integer. */ + if (!isValid723Integer(p + DR_volume_sequence_number_offset)) { + return (0); + } + + /* Root directory name is a single zero byte... */ + if (p[DR_name_len_offset] != 1 || p[DR_name_offset] != 0) { + return (0); + } + + /* Nothing looked wrong, so let's accept it. */ + return (1); +} + static int read_children(struct archive_read *a, struct file_info *parent) { @@ -3127,6 +3200,32 @@ toi(const void *p, int n) return (0); } +/* + * ECMA119/ISO9660 stores multi-byte integers in one of + * three different formats: + * * Little-endian (specified in section 7.2.1 and 7.3.1) + * * Big-endian (specified in section 7.2.2 and 7.3.2) + * * Both (specified in section 7.2.3 and 7.3.3) + * + * For values that follow section 7.2.3 (16-bit) or 7.3.3 (32-bit), we + * can check that the little-endian and big-endian forms agree with + * each other. This helps us avoid trying to decode files that are + * not really ISO images. + */ +static int +isValid723Integer(const unsigned char *p) { + return (p[0] == p[3] && p[1] == p[2]); +} + +static int +isValid733Integer(const unsigned char *p) +{ + return (p[0] == p[7] + && p[1] == p[6] + && p[2] == p[5] + && p[3] == p[4]); +} + static time_t isodate7(const unsigned char *v) { diff --git a/libarchive/test/test_read_format_iso_3.iso.Z.uu b/libarchive/test/test_read_format_iso_3.iso.Z.uu new file mode 100644 index 0000000000..e7734ab33b --- /dev/null +++ b/libarchive/test/test_read_format_iso_3.iso.Z.uu @@ -0,0 +1,40 @@ +Same as test_read_format_iso_2.iso.Z except that the root directory record +size in the PVD has been changed to 68 bytes (instead of the 34 required +by the standard). This non-standard value was seen in the wild. + +begin 644 test_read_format_iso_3.iso.Z +M'YV0``(*'$BPH,&#"!,J7,BPH<.'$"-*G$BQHL6+&#-JW,BQH\>/($.*'$FR +MI,F3*%.J7,FRIO8,.*'4NVK-FS:-.J7//JW/'D"-+GDRYLN7+F#-KWLRY +ML^?/H$.+'DVZM.G3J%.K7LVZM>O7L&/+GDV[MNW;N'/KWLV[M^_?P(,+'TZ\ +MN/'CR),K7\Z\N?/GT*-+GTZ]NO7KV+-KW\Z]N_?OX,.+'T^^O/GSZ-.K7\^^ +MO?OW\./+GT^_OOW[^//KW\^_O_^_`0Q!!`PPQ!```$Z408<04Q`!PH,01BCA +MA!16:.&%$!Z404$;>G0@``%\>"`""`#@04$>2)#0!`,1`0`%!<%8HD`EEF+C +MC:4($)"(!V+HXX]`!BGDD$06:>212":IY)),-NGDDU!&*>645%9II8\R%!@# +M##C$0(,,,6Q)PPP$DI-EF%QZ"::89,)`#H%PQBDGG&=NV>67:(Y9YH?_]>GG +MGX7](R"!!@)JZ*&()JKHHL<="...`AU80:0`!`$B`!=0.@2CG';JZ:>@A@H8 +MGR]>:FI`DP9@*:F9!BCJJ[#&*NNLM#XE0JD$R3A0C3C:J*.I(0)PZZ,#Z4HC +M`+WZ"BF((=XZ*4&3SAA0B6\@X$`%"AA`SJ\\!G%KI@1E*BT`U%J+[0+;+AOB +MIK6VZ^Z[\,8;G;,%1;LK`-5>FVVZP!XX;(SDWIMLCNJ&:`(`%A1D@0$%&9`O +MMMI2"B(!0KBP0PSR9JSQQAQWW!P:9;#!QAL*>&SRR2BGK'*GWQ8D[KT/*X`N +MMSOZBVNQ`1\[,,W,!G`P!@5AP#!!#ILK,SD2!T``$19CO/+34$M]MILM^WVVW#'+??<=-=M]]UXYZWW +MWGSW[???@`N^>:<=^[Y +MYZ"'+OKHI)=N^NFHIZ[ZZJRW[OKKL,O_/+,-^_\\]!'+_WTU%=O_?789Z_]]MQW[_WWX(BGK_[Z[+?O_OOPQR___/37;__]^.>O__[\]^___P`,H``'2,`"&O"`"$R@ +M`A?(P`8Z\($0C*`$)TC!"EKP@AC,H`8WR,$.>O"#(`RA"$=(PA*:\(0H3*$* +M5\C"%KKPA3",H0QG2,,:VO"&.,RA#G?(PQ[Z\(=`#*(0ATC$(AKQB$A,HA*7 +MR,0F.O&)4(RB%*=(Q2I:\8I8S*(6M\C%+GKQBV`,HQC'2,8RFO&,:$RC&M?( +MQC:Z\8UPC*,O.;X`RG.,=)SG*:\YSH3*^,RG/O?)SW[Z\Y\`#:A`!TK0@AKTH`A-J$(7RM"&.O2A +M$(VH1"=*T8I:]*(8S:A&-\K1CGKTHR`-J4A'2M*2FO2D*$VI2E?*TI:Z]*4P +MC:E,9TK3FMKTICC-J4YWRM.>^O2G0`VJ4(=*U*(:]:A(3:I2E\K4ICKUJ5"- +MJE2G2M6J6O6J6,VJ5K?*U:YZ]:M@#:M8QTK6LIKUK&A-JUK7RM:VNO6M<(VK +".`$` +` +end diff --git a/libarchive/test/test_read_format_iso_Z.c b/libarchive/test/test_read_format_iso_Z.c index 09b0acb804..785b00bf85 100644 --- a/libarchive/test/test_read_format_iso_Z.c +++ b/libarchive/test/test_read_format_iso_Z.c @@ -53,11 +53,10 @@ test1(void) } static void -test2(void) +test_small(const char *name) { struct archive_entry *ae; struct archive *a; - const char *name = "test_read_format_iso_2.iso.Z"; extract_reference_file(name); @@ -98,5 +97,8 @@ test2(void) DEFINE_TEST(test_read_format_iso_Z) { test1(); - test2(); + /* A very small ISO image with a variety of contents. */ + test_small("test_read_format_iso_2.iso.Z"); + /* As above, but with a non-standard 68-byte root directory in the PVD */ + test_small("test_read_format_iso_3.iso.Z"); } From 13c710a825d1c8b5604d8ce6eb712f7f7cfadc80 Mon Sep 17 00:00:00 2001 From: Tim Kientzle Date: Sun, 16 Jun 2024 20:23:11 -0700 Subject: [PATCH 97/98] [cpio test] Dates can be more than 12 bytes, depending on the locale (#2237) In order to match cpio output, format the reference date with _at least_ 12 bytes instead of _exactly_ 12 bytes. This should fix a gratuitous test failure on certain systems that default to multi-byte locales. --- cpio/test/test_option_t.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cpio/test/test_option_t.c b/cpio/test/test_option_t.c index 953e4a8065..4130b7f918 100644 --- a/cpio/test/test_option_t.c +++ b/cpio/test/test_option_t.c @@ -89,6 +89,7 @@ DEFINE_TEST(test_option_t) mtime = 1; #ifdef HAVE_LOCALE_H setlocale(LC_ALL, ""); + setlocale(LC_TIME, ""); #endif #if defined(HAVE_LOCALTIME_S) tmptr = localtime_s(&tmbuf, &mtime) ? NULL : &tmbuf; @@ -99,10 +100,10 @@ DEFINE_TEST(test_option_t) #endif #if defined(_WIN32) && !defined(__CYGWIN__) strftime(date2, sizeof(date2)-1, "%b %d %Y", tmptr); - _snprintf(date, sizeof(date)-1, "%12.12s file", date2); + _snprintf(date, sizeof(date)-1, "%12s file", date2); #else strftime(date2, sizeof(date2)-1, "%b %e %Y", tmptr); - snprintf(date, sizeof(date)-1, "%12.12s file", date2); + snprintf(date, sizeof(date)-1, "%12s file", date2); #endif assertEqualMem(p + 42, date, strlen(date)); free(p); From 3fdf9bf80fd4e9473052ef68a9ce7ccebfc5472c Mon Sep 17 00:00:00 2001 From: Tim Kientzle Date: Wed, 19 Jun 2024 16:18:35 -0700 Subject: [PATCH 98/98] Fix potential overflow warning in cpio test_option_t (#2250) Fixes an error from #2237. --- cpio/test/test_option_t.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpio/test/test_option_t.c b/cpio/test/test_option_t.c index 4130b7f918..46668ec276 100644 --- a/cpio/test/test_option_t.c +++ b/cpio/test/test_option_t.c @@ -33,7 +33,7 @@ DEFINE_TEST(test_option_t) char *p; int r; time_t mtime; - char date[32]; + char date[48]; char date2[32]; struct tm *tmptr; #if defined(HAVE_LOCALTIME_R) || defined(HAVE_LOCALTIME_S)