Skip to content

Commit

Permalink
Merge branch 'libarchive:master' into fixwarn
Browse files Browse the repository at this point in the history
  • Loading branch information
dunhor authored Jun 27, 2024
2 parents b43c0bc + 898dc83 commit ab31182
Show file tree
Hide file tree
Showing 13 changed files with 390 additions and 15 deletions.
1 change: 1 addition & 0 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -789,6 +789,7 @@ libarchive_test_EXTRA_DIST=\
libarchive/test/test_read_format_7zip_encryption.7z.uu \
libarchive/test/test_read_format_7zip_encryption_header.7z.uu \
libarchive/test/test_read_format_7zip_encryption_partially.7z.uu \
libarchive/test/test_read_format_7zip_extract_second.7z.uu \
libarchive/test/test_read_format_7zip_lzma1.7z.uu \
libarchive/test/test_read_format_7zip_lzma1_2.7z.uu \
libarchive/test/test_read_format_7zip_lzma1_lzma2.7z.uu \
Expand Down
6 changes: 6 additions & 0 deletions libarchive/archive_entry.c
Original file line number Diff line number Diff line change
Expand Up @@ -526,6 +526,12 @@ archive_entry_hardlink_w(struct archive_entry *entry)
return (NULL);
}

int
archive_entry_hardlink_is_set(struct archive_entry *entry)
{
return (entry->ae_set & AE_SET_HARDLINK) != 0;
}

int
_archive_entry_hardlink_l(struct archive_entry *entry,
const char **p, size_t *len, struct archive_string_conv *sc)
Expand Down
1 change: 1 addition & 0 deletions libarchive/archive_entry.h
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,7 @@ __LA_DECL void archive_entry_set_link_to_hardlink(struct archive_entry *);
__LA_DECL const char *archive_entry_hardlink(struct archive_entry *);
__LA_DECL const char *archive_entry_hardlink_utf8(struct archive_entry *);
__LA_DECL const wchar_t *archive_entry_hardlink_w(struct archive_entry *);
__LA_DECL int archive_entry_hardlink_is_set(struct archive_entry *);
__LA_DECL la_int64_t archive_entry_ino(struct archive_entry *);
__LA_DECL la_int64_t archive_entry_ino64(struct archive_entry *);
__LA_DECL int archive_entry_ino_is_set(struct archive_entry *);
Expand Down
8 changes: 6 additions & 2 deletions libarchive/archive_read_support_format_7zip.c
Original file line number Diff line number Diff line change
Expand Up @@ -1063,7 +1063,7 @@ ppmd_read(void *p)
ssize_t bytes_avail = 0;
const uint8_t* data = __archive_read_ahead(a,
(size_t)zip->ppstream.stream_in+1, &bytes_avail);
if(bytes_avail < zip->ppstream.stream_in+1) {
if(data == NULL || bytes_avail < zip->ppstream.stream_in+1) {
archive_set_error(&a->archive,
ARCHIVE_ERRNO_FILE_FORMAT,
"Truncated 7z file data");
Expand Down Expand Up @@ -3462,7 +3462,7 @@ read_stream(struct archive_read *a, const void **buff, size_t size,
/*
* Skip the bytes we already has skipped in skip_stream().
*/
while (skip_bytes) {
while (1) {
ssize_t skipped;

if (zip->uncompressed_buffer_bytes_remaining == 0) {
Expand All @@ -3482,6 +3482,10 @@ read_stream(struct archive_read *a, const void **buff, size_t size,
return (ARCHIVE_FATAL);
}
}

if (!skip_bytes)
break;

skipped = get_uncompressed_data(
a, buff, (size_t)skip_bytes, 0);
if (skipped < 0)
Expand Down
84 changes: 73 additions & 11 deletions libarchive/archive_string.c
Original file line number Diff line number Diff line change
Expand Up @@ -3874,6 +3874,30 @@ archive_mstring_get_utf8(struct archive *a, struct archive_mstring *aes,
}

*p = NULL;
#if defined(_WIN32) && !defined(__CYGWIN__)
/*
* On Windows, first try converting from WCS because (1) there's no
* guarantee that the conversion to MBS will succeed, e.g. when using
* CP_ACP, and (2) that's more efficient than converting to MBS, just to
* convert back to WCS again before finally converting to UTF-8
*/
if ((aes->aes_set & AES_SET_WCS) != 0) {
sc = archive_string_conversion_to_charset(a, "UTF-8", 1);
if (sc == NULL)
return (-1);/* Couldn't allocate memory for sc. */
archive_string_empty(&(aes->aes_utf8));
r = archive_string_append_from_wcs_in_codepage(&(aes->aes_utf8),
aes->aes_wcs.s, aes->aes_wcs.length, sc);
if (a == NULL)
free_sconv_object(sc);
if (r == 0) {
aes->aes_set |= AES_SET_UTF8;
*p = aes->aes_utf8.s;
return (0);/* success. */
} else
return (-1);/* failure. */
}
#endif
/* Try converting WCS to MBS first if MBS does not exist yet. */
if ((aes->aes_set & AES_SET_MBS) == 0) {
const char *pm; /* unused */
Expand Down Expand Up @@ -3958,6 +3982,32 @@ archive_mstring_get_wcs(struct archive *a, struct archive_mstring *aes,
}

*wp = NULL;
#if defined(_WIN32) && !defined(__CYGWIN__)
/*
* On Windows, prefer converting from UTF-8 directly to WCS because:
* (1) there's no guarantee that the string can be represented in MBS (e.g.
* with CP_ACP), and (2) in order to convert from UTF-8 to MBS, we're going
* to need to convert from UTF-8 to WCS anyway and its wasteful to throw
* away that intermediate result
*/
if (aes->aes_set & AES_SET_UTF8) {
struct archive_string_conv *sc;

sc = archive_string_conversion_from_charset(a, "UTF-8", 1);
if (sc != NULL) {
archive_wstring_empty((&aes->aes_wcs));
r = archive_wstring_append_from_mbs_in_codepage(&(aes->aes_wcs),
aes->aes_utf8.s, aes->aes_utf8.length, sc);
if (a == NULL)
free_sconv_object(sc);
if (r == 0) {
aes->aes_set |= AES_SET_WCS;
*wp = aes->aes_wcs.s;
return (0);
}
}
}
#endif
/* Try converting UTF8 to MBS first if MBS does not exist yet. */
if ((aes->aes_set & AES_SET_MBS) == 0) {
const char *p; /* unused */
Expand Down Expand Up @@ -4211,21 +4261,31 @@ archive_mstring_update_utf8(struct archive *a, struct archive_mstring *aes,

aes->aes_set = AES_SET_UTF8; /* Only UTF8 is set now. */

/* Try converting UTF-8 to MBS, return false on failure. */
sc = archive_string_conversion_from_charset(a, "UTF-8", 1);
if (sc == NULL)
return (-1);/* Couldn't allocate memory for sc. */
r = archive_strcpy_l(&(aes->aes_mbs), utf8, sc);

#if defined(_WIN32) && !defined(__CYGWIN__)
/* On failure, make an effort to convert UTF8 to WCS as the active code page
* may not be able to represent all characters in the string */
if (r != 0) {
if (archive_wstring_append_from_mbs_in_codepage(&(aes->aes_wcs),
aes->aes_utf8.s, aes->aes_utf8.length, sc) == 0)
aes->aes_set = AES_SET_UTF8 | AES_SET_WCS;
}
#endif
/* On Windows, there's no good way to convert from UTF8 -> MBS directly, so
* prefer to first convert to WCS as (1) it's wasteful to throw away the
* intermediate result, and (2) WCS will still be set even if we fail to
* convert to MBS (e.g. with ACP that can't represent the characters) */
r = archive_wstring_append_from_mbs_in_codepage(&(aes->aes_wcs),
aes->aes_utf8.s, aes->aes_utf8.length, sc);

if (a == NULL)
free_sconv_object(sc);
if (r != 0)
return (-1); /* This will guarantee we can't convert to MBS */
aes->aes_set = AES_SET_UTF8 | AES_SET_WCS; /* Both UTF8 and WCS set. */

/* Try converting WCS to MBS, return false on failure. */
if (archive_string_append_from_wcs(&(aes->aes_mbs), aes->aes_wcs.s,
aes->aes_wcs.length))
return (-1);
#else
/* Try converting UTF-8 to MBS, return false on failure. */
r = archive_strcpy_l(&(aes->aes_mbs), utf8, sc);

if (a == NULL)
free_sconv_object(sc);
Expand All @@ -4237,8 +4297,10 @@ archive_mstring_update_utf8(struct archive *a, struct archive_mstring *aes,
if (archive_wstring_append_from_mbs(&(aes->aes_wcs), aes->aes_mbs.s,
aes->aes_mbs.length))
return (-1);
aes->aes_set = AES_SET_UTF8 | AES_SET_WCS | AES_SET_MBS;
#endif

/* All conversions succeeded. */
aes->aes_set = AES_SET_UTF8 | AES_SET_WCS | AES_SET_MBS;

return (0);
}
7 changes: 6 additions & 1 deletion libarchive/archive_windows.h
Original file line number Diff line number Diff line change
Expand Up @@ -292,12 +292,17 @@ typedef int mbstate_t;
size_t wcrtomb(char *, wchar_t, mbstate_t *);
#endif

#if defined(_MSC_VER) && _MSC_VER < 1300
#if !WINAPI_FAMILY_PARTITION (WINAPI_PARTITION_DESKTOP) && NTDDI_VERSION < NTDDI_WIN10_VB
// not supported in UWP SDK before 20H1
#define GetVolumePathNameW(f, v, c) (0)
#elif defined(_MSC_VER) && _MSC_VER < 1300
WINBASEAPI BOOL WINAPI GetVolumePathNameW(
LPCWSTR lpszFileName,
LPWSTR lpszVolumePathName,
DWORD cchBufferLength
);
#endif
#if defined(_MSC_VER) && _MSC_VER < 1300
# if _WIN32_WINNT < 0x0500 /* windows.h not providing 0x500 API */
typedef struct _FILE_ALLOCATED_RANGE_BUFFER {
LARGE_INTEGER FileOffset;
Expand Down
3 changes: 3 additions & 0 deletions libarchive/archive_write_set_format_cpio_binary.c
Original file line number Diff line number Diff line change
Expand Up @@ -577,6 +577,9 @@ archive_write_binary_close(struct archive_write *a)
struct archive_entry *trailer;

trailer = archive_entry_new2(NULL);
if (trailer == NULL) {
return ARCHIVE_FATAL;
}
/* nlink = 1 here for GNU cpio compat. */
archive_entry_set_nlink(trailer, 1);
archive_entry_set_size(trailer, 0);
Expand Down
3 changes: 3 additions & 0 deletions libarchive/archive_write_set_format_cpio_odc.c
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,9 @@ archive_write_odc_close(struct archive_write *a)
struct archive_entry *trailer;

trailer = archive_entry_new2(NULL);
if (trailer == NULL) {
return ARCHIVE_FATAL;
}
/* nlink = 1 here for GNU cpio compat. */
archive_entry_set_nlink(trailer, 1);
archive_entry_set_size(trailer, 0);
Expand Down
2 changes: 1 addition & 1 deletion libarchive/archive_write_set_format_gnutar.c
Original file line number Diff line number Diff line change
Expand Up @@ -523,7 +523,7 @@ archive_write_gnutar_header(struct archive_write *a,
goto exit_write_header;
}

if (archive_entry_hardlink(entry) != NULL) {
if (archive_entry_hardlink_is_set(entry)) {
tartype = '1';
} else
switch (archive_entry_filetype(entry)) {
Expand Down
135 changes: 135 additions & 0 deletions libarchive/test/test_archive_string_conversion.c
Original file line number Diff line number Diff line change
Expand Up @@ -882,3 +882,138 @@ DEFINE_TEST(test_archive_string_conversion)
test_archive_string_canonicalization();
test_archive_string_set_get();
}

DEFINE_TEST(test_archive_string_conversion_utf16_utf8)
{
#if !defined(_WIN32) || defined(__CYGWIN__)
skipping("This test is meant to verify unicode string handling on Windows");
#else
struct archive_mstring mstr;
const char* utf8_string;

memset(&mstr, 0, sizeof(mstr));

assertEqualInt(ARCHIVE_OK,
archive_mstring_copy_wcs(&mstr, L"\U0000043f\U00000440\U00000438"));

/* Conversion from WCS to UTF-8 should always succeed */
assertEqualInt(ARCHIVE_OK,
archive_mstring_get_utf8(NULL, &mstr, &utf8_string));
assertEqualString("\xD0\xBF\xD1\x80\xD0\xB8", utf8_string);

archive_mstring_clean(&mstr);
#endif
}

DEFINE_TEST(test_archive_string_conversion_utf8_utf16)
{
#if !defined(_WIN32) || defined(__CYGWIN__)
skipping("This test is meant to verify unicode string handling on Windows");
#else
struct archive_mstring mstr;
const wchar_t* wcs_string;

memset(&mstr, 0, sizeof(mstr));

assertEqualInt(6,
archive_mstring_copy_utf8(&mstr, "\xD0\xBF\xD1\x80\xD0\xB8"));

/* Conversion from UTF-8 to WCS should always succeed */
assertEqualInt(ARCHIVE_OK,
archive_mstring_get_wcs(NULL, &mstr, &wcs_string));
assertEqualWString(L"\U0000043f\U00000440\U00000438", wcs_string);

archive_mstring_clean(&mstr);
#endif
}

DEFINE_TEST(test_archive_string_update_utf8_win)
{
#if !defined(_WIN32) || defined(__CYGWIN__)
skipping("This test is meant to verify unicode string handling on Windows"
" with the C locale");
#else
static const char utf8_string[] = "\xD0\xBF\xD1\x80\xD0\xB8";
static const wchar_t wcs_string[] = L"\U0000043f\U00000440\U00000438";
struct archive_mstring mstr;
int r;

memset(&mstr, 0, sizeof(mstr));

r = archive_mstring_update_utf8(NULL, &mstr, utf8_string);

/* On Windows, this should reliably fail with the C locale */
assertEqualInt(-1, r);
assertEqualInt(0, mstr.aes_set & AES_SET_MBS);

/* NOTE: We access the internals to validate that they were set by the
* 'archive_mstring_update_utf8' function */
/* UTF-8 should always be set */
assertEqualInt(AES_SET_UTF8, mstr.aes_set & AES_SET_UTF8);
assertEqualString(utf8_string, mstr.aes_utf8.s);
/* WCS should always be set as well */
assertEqualInt(AES_SET_WCS, mstr.aes_set & AES_SET_WCS);
assertEqualWString(wcs_string, mstr.aes_wcs.s);

archive_mstring_clean(&mstr);
#endif
}

DEFINE_TEST(test_archive_string_update_utf8_utf8)
{
static const char utf8_string[] = "\xD0\xBF\xD1\x80\xD0\xB8";
static const wchar_t wcs_string[] = L"\U0000043f\U00000440\U00000438";
struct archive_mstring mstr;
int r;

memset(&mstr, 0, sizeof(mstr));

if (setlocale(LC_ALL, "en_US.UTF-8") == NULL) {
skipping("UTF-8 not supported on this system.");
return;
}

r = archive_mstring_update_utf8(NULL, &mstr, utf8_string);

/* All conversions should have succeeded */
assertEqualInt(0, r);
assertEqualInt(AES_SET_MBS | AES_SET_WCS | AES_SET_UTF8, mstr.aes_set);
assertEqualString(utf8_string, mstr.aes_utf8.s);
assertEqualString(utf8_string, mstr.aes_mbs.s);
assertEqualWString(wcs_string, mstr.aes_wcs.s);

archive_mstring_clean(&mstr);
}

DEFINE_TEST(test_archive_string_update_utf8_koi8)
{
static const char utf8_string[] = "\xD0\xBF\xD1\x80\xD0\xB8";
static const char koi8_string[] = "\xD0\xD2\xC9";
static const wchar_t wcs_string[] = L"\U0000043f\U00000440\U00000438";
struct archive_mstring mstr;
int r;

memset(&mstr, 0, sizeof(mstr));

if (setlocale(LC_ALL, "ru_RU.KOI8-R") == NULL) {
skipping("KOI8-R locale not available on this system.");
return;
}

r = archive_mstring_update_utf8(NULL, &mstr, utf8_string);

/* All conversions should have succeeded */
assertEqualInt(0, r);
assertEqualInt(AES_SET_MBS | AES_SET_WCS | AES_SET_UTF8, mstr.aes_set);
assertEqualString(utf8_string, mstr.aes_utf8.s);
assertEqualString(koi8_string, mstr.aes_mbs.s);
#if defined(_WIN32) && !defined(__CYGWIN__)
assertEqualWString(wcs_string, mstr.aes_wcs.s);
#else
/* No guarantee of how WCS strings behave, however this test test is
* primarily meant for Windows */
(void)wcs_string;
#endif

archive_mstring_clean(&mstr);
}
Loading

0 comments on commit ab31182

Please sign in to comment.