From 945dc0b7e73c520561fc08ddc1a3c6f6322ccc99 Mon Sep 17 00:00:00 2001 From: JCash Date: Fri, 29 Nov 2024 12:01:39 +0100 Subject: [PATCH 1/5] Added zip_entry_noallocread_offset --- src/zip.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++- src/zip.h | 25 +++++++++++++++++ test/test_read.c | 38 ++++++++++++++++++++++++++ 3 files changed, 133 insertions(+), 1 deletion(-) diff --git a/src/zip.c b/src/zip.c index f1238231..994244a3 100644 --- a/src/zip.c +++ b/src/zip.c @@ -114,7 +114,7 @@ struct zip_entry_mark_t { size_t lf_length; }; -static const char *const zip_errlist[33] = { +static const char *const zip_errlist[35] = { NULL, "not initialized\0", "invalid entry name\0", @@ -148,6 +148,8 @@ static const char *const zip_errlist[33] = { "cannot initialize reader\0", "cannot initialize writer\0", "cannot initialize writer from reader\0", + "invalid argument\0", + "cannot initialize reader iterator\0", }; const char *zip_strerror(int errnum) { @@ -1654,6 +1656,73 @@ ssize_t zip_entry_noallocread(struct zip_t *zip, void *buf, size_t bufsize) { return (ssize_t)zip->entry.uncomp_size; } +ssize_t zip_entry_noallocread_offset(struct zip_t *zip, + size_t offset, size_t size, void *buf) { + mz_zip_archive *pzip = NULL; + + if (!zip) { + // zip_t handler is not initialized + return (ssize_t)ZIP_ENOINIT; + } + + if (offset > (size_t)zip->entry.uncomp_size) { + return (ssize_t)ZIP_EINVAL; + } + + if ((offset+size) > (size_t)zip->entry.uncomp_size) { + size = (ssize_t)zip->entry.uncomp_size - offset; + } + + pzip = &(zip->archive); + if (pzip->m_zip_mode != MZ_ZIP_MODE_READING || + zip->entry.index < (ssize_t)0) { + // the entry is not found or we do not have read access + return (ssize_t)ZIP_ENOENT; + } + + mz_zip_reader_extract_iter_state* iter = + mz_zip_reader_extract_iter_new(pzip, (mz_uint)zip->entry.index, 0); + if (!iter) { + return (ssize_t)ZIP_ENORITER; + } + + mz_uint8 tmpbuf[ZIP_DEFAULT_ITER_BUF_SIZE]; + size_t tmpbuf_size = sizeof(tmpbuf); + size_t file_offset = 0; + size_t write_cursor = 0; + size_t to_read = size; + + // iterate until the requested offset is in range + while (file_offset < zip->entry.uncomp_size && to_read > 0) + { + size_t nread = mz_zip_reader_extract_iter_read(iter, tmpbuf, tmpbuf_size); + + if (nread == 0) + break; + + if (offset < (file_offset+nread)) { + size_t read_cursor = offset - file_offset; + MZ_ASSERT(read_cursor < tmpbuf_size); + size_t read_size = nread - read_cursor; + + if (to_read < read_size) + read_size = to_read; + MZ_ASSERT(read_size <= tmpbuf_size); + + memcpy(&((mz_uint8*)buf)[write_cursor], &tmpbuf[read_cursor], read_size); + + write_cursor += read_size; + offset += read_size; + to_read -= read_size; + } + + file_offset += nread; + } + + mz_zip_reader_extract_iter_free(iter); + return (ssize_t)write_cursor; +} + int zip_entry_fread(struct zip_t *zip, const char *filename) { mz_zip_archive *pzip = NULL; mz_uint idx; diff --git a/src/zip.h b/src/zip.h index dce99ffb..c2992fac 100644 --- a/src/zip.h +++ b/src/zip.h @@ -61,6 +61,11 @@ typedef long ssize_t; /* byte count or error */ */ #define ZIP_DEFAULT_COMPRESSION_LEVEL 6 +/** + * Default zip iterator stack size (in bytes) + */ +#define ZIP_DEFAULT_ITER_BUF_SIZE 32*1024 + /** * Error codes */ @@ -96,6 +101,8 @@ typedef long ssize_t; /* byte count or error */ #define ZIP_ERINIT -30 // cannot initialize reader #define ZIP_EWINIT -31 // cannot initialize writer #define ZIP_EWRINIT -32 // cannot initialize writer from reader +#define ZIP_EINVAL -33 // invalid argument +#define ZIP_ENORITER -34 // cannot initialize reader iterator /** * Looks up the error message string corresponding to an error number. @@ -373,6 +380,24 @@ extern ZIP_EXPORT ssize_t zip_entry_read(struct zip_t *zip, void **buf, extern ZIP_EXPORT ssize_t zip_entry_noallocread(struct zip_t *zip, void *buf, size_t bufsize); +/** + * Extracts the part of the current zip entry into a memory buffer using no memory + * allocation for the buffer. + * + * @param zip zip archive handler. + * @param offset the offset of the entry (in bytes). + * @param size requested number of bytes (in bytes). + * @param buf preallocated output buffer. + * + * @note the iterator api uses an allocation to create its state + * @note each call will iterate from the start of the entry + * + * @return the return code - the number of bytes actually read on success. + * Otherwise a negative number (< 0) on error (e.g. offset is too large). + */ +extern ZIP_EXPORT ssize_t zip_entry_noallocread_offset(struct zip_t *zip, + size_t offset, size_t size, void *buf); + /** * Extracts the current zip entry into output file. * diff --git a/test/test_read.c b/test/test_read.c index f3731bda..e5e0d828 100644 --- a/test/test_read.c +++ b/test/test_read.c @@ -129,11 +129,49 @@ MU_TEST(test_noallocread) { zip_close(zip); } + +MU_TEST(test_noallocread_offset) { + size_t expected_size = strlen(TESTDATA2); + char *expected_data = calloc(expected_size, sizeof(char)); + + struct zip_t *zip = zip_open(ZIPNAME, 0, 'r'); + mu_check(zip != NULL); + mu_assert_int_eq(1, zip_is64(zip)); + + mu_assert_int_eq(0, zip_entry_open(zip, "test/test-2.txt")); + zip_entry_noallocread(zip, (void *)expected_data, expected_size); + + // Read the file in different chunk sizes + for (size_t i = 1; i <= expected_size; ++i) { + size_t buflen = i; + char *tmpbuf = calloc(buflen, sizeof(char)); + + size_t offset = 0; + while (offset < expected_size) { + + ssize_t nread = zip_entry_noallocread_offset(zip, offset, buflen, tmpbuf); + + mu_assert(nread <= buflen, "too many bytes read"); + mu_assert(0u != nread, "no bytes read"); + + // check the data + for (ssize_t j = 0; j < nread; ++j) { + mu_assert_int_eq(expected_data[offset + j], tmpbuf[j]); + } + + offset += nread; + } + } + + zip_close(zip); +} + MU_TEST_SUITE(test_read_suite) { MU_SUITE_CONFIGURE(&test_setup, &test_teardown); MU_RUN_TEST(test_read); MU_RUN_TEST(test_noallocread); + MU_RUN_TEST(test_noallocread_offset); } #define UNUSED(x) (void)x From 143c4ed4d0a69da206b1748e9f2aec65b1463945 Mon Sep 17 00:00:00 2001 From: JCash Date: Fri, 29 Nov 2024 12:08:32 +0100 Subject: [PATCH 2/5] added doc --- README.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/README.md b/README.md index 5940fdc6..67831dfd 100644 --- a/README.md +++ b/README.md @@ -208,6 +208,27 @@ zip_stream_close(zip); free(buf); ``` +* Extract a partial zip entry + +```c +unsigned char buf[16]; +size_t bufsize = sizeof(buf); + +struct zip_t *zip = zip_open("foo.zip", 0, 'r'); +{ + zip_entry_open(zip, "foo-1.txt"); + { + size_t offset = 4; + ssize_t nread = zip_entry_noallocread_offset(zip, offset, bufsize, (void *)buf); + } + + zip_entry_close(zip); +} +zip_close(zip); + +free(buf); +``` + * List of all zip entries ```c From c363a857d795396c6df6e90bb3d98ca2c5052654 Mon Sep 17 00:00:00 2001 From: JCash Date: Fri, 29 Nov 2024 14:23:45 +0100 Subject: [PATCH 3/5] review fixes: removed extraneous memcpy --- README.md | 2 +- src/zip.c | 19 +++++++++++-------- src/zip.h | 7 +------ test/test_read.c | 2 +- 4 files changed, 14 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 67831dfd..8d9efe51 100644 --- a/README.md +++ b/README.md @@ -219,7 +219,7 @@ struct zip_t *zip = zip_open("foo.zip", 0, 'r'); zip_entry_open(zip, "foo-1.txt"); { size_t offset = 4; - ssize_t nread = zip_entry_noallocread_offset(zip, offset, bufsize, (void *)buf); + ssize_t nread = zip_entry_noallocreadwithoffset(zip, offset, bufsize, (void *)buf); } zip_entry_close(zip); diff --git a/src/zip.c b/src/zip.c index 994244a3..588421b9 100644 --- a/src/zip.c +++ b/src/zip.c @@ -1656,7 +1656,7 @@ ssize_t zip_entry_noallocread(struct zip_t *zip, void *buf, size_t bufsize) { return (ssize_t)zip->entry.uncomp_size; } -ssize_t zip_entry_noallocread_offset(struct zip_t *zip, +ssize_t zip_entry_noallocreadwithoffset(struct zip_t *zip, size_t offset, size_t size, void *buf) { mz_zip_archive *pzip = NULL; @@ -1665,7 +1665,7 @@ ssize_t zip_entry_noallocread_offset(struct zip_t *zip, return (ssize_t)ZIP_ENOINIT; } - if (offset > (size_t)zip->entry.uncomp_size) { + if (offset >= (size_t)zip->entry.uncomp_size) { return (ssize_t)ZIP_EINVAL; } @@ -1686,8 +1686,7 @@ ssize_t zip_entry_noallocread_offset(struct zip_t *zip, return (ssize_t)ZIP_ENORITER; } - mz_uint8 tmpbuf[ZIP_DEFAULT_ITER_BUF_SIZE]; - size_t tmpbuf_size = sizeof(tmpbuf); + mz_uint8* writebuf = (mz_uint8*)buf; size_t file_offset = 0; size_t write_cursor = 0; size_t to_read = size; @@ -1695,21 +1694,25 @@ ssize_t zip_entry_noallocread_offset(struct zip_t *zip, // iterate until the requested offset is in range while (file_offset < zip->entry.uncomp_size && to_read > 0) { - size_t nread = mz_zip_reader_extract_iter_read(iter, tmpbuf, tmpbuf_size); + size_t nread = mz_zip_reader_extract_iter_read(iter, (void*)&writebuf[write_cursor], to_read); if (nread == 0) break; if (offset < (file_offset+nread)) { size_t read_cursor = offset - file_offset; - MZ_ASSERT(read_cursor < tmpbuf_size); + MZ_ASSERT(read_cursor < size); size_t read_size = nread - read_cursor; if (to_read < read_size) read_size = to_read; - MZ_ASSERT(read_size <= tmpbuf_size); + MZ_ASSERT(read_size <= size); - memcpy(&((mz_uint8*)buf)[write_cursor], &tmpbuf[read_cursor], read_size); + // If it's an unaligned read (i.e. the first one) + if (read_cursor != 0) + { + memmove(&writebuf[write_cursor], &writebuf[read_cursor], read_size); + } write_cursor += read_size; offset += read_size; diff --git a/src/zip.h b/src/zip.h index c2992fac..c70806b9 100644 --- a/src/zip.h +++ b/src/zip.h @@ -61,11 +61,6 @@ typedef long ssize_t; /* byte count or error */ */ #define ZIP_DEFAULT_COMPRESSION_LEVEL 6 -/** - * Default zip iterator stack size (in bytes) - */ -#define ZIP_DEFAULT_ITER_BUF_SIZE 32*1024 - /** * Error codes */ @@ -395,7 +390,7 @@ extern ZIP_EXPORT ssize_t zip_entry_noallocread(struct zip_t *zip, void *buf, * @return the return code - the number of bytes actually read on success. * Otherwise a negative number (< 0) on error (e.g. offset is too large). */ -extern ZIP_EXPORT ssize_t zip_entry_noallocread_offset(struct zip_t *zip, +extern ZIP_EXPORT ssize_t zip_entry_noallocreadwithoffset(struct zip_t *zip, size_t offset, size_t size, void *buf); /** diff --git a/test/test_read.c b/test/test_read.c index e5e0d828..8e7df1eb 100644 --- a/test/test_read.c +++ b/test/test_read.c @@ -149,7 +149,7 @@ MU_TEST(test_noallocread_offset) { size_t offset = 0; while (offset < expected_size) { - ssize_t nread = zip_entry_noallocread_offset(zip, offset, buflen, tmpbuf); + ssize_t nread = zip_entry_noallocreadwithoffset(zip, offset, buflen, tmpbuf); mu_assert(nread <= buflen, "too many bytes read"); mu_assert(0u != nread, "no bytes read"); From 988f074f9d884406e46e71bdcceb9a2f22208c45 Mon Sep 17 00:00:00 2001 From: JCash Date: Fri, 29 Nov 2024 14:33:54 +0100 Subject: [PATCH 4/5] Improved test for the memmove's --- test/test_read.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/test/test_read.c b/test/test_read.c index 8e7df1eb..c33885bd 100644 --- a/test/test_read.c +++ b/test/test_read.c @@ -130,7 +130,7 @@ MU_TEST(test_noallocread) { } -MU_TEST(test_noallocread_offset) { +MU_TEST(test_noallocreadwithoffset) { size_t expected_size = strlen(TESTDATA2); char *expected_data = calloc(expected_size, sizeof(char)); @@ -146,20 +146,23 @@ MU_TEST(test_noallocread_offset) { size_t buflen = i; char *tmpbuf = calloc(buflen, sizeof(char)); - size_t offset = 0; - while (offset < expected_size) { + for (size_t j = 0; j < expected_size; ++j) { + // we test starting from different offsets, to make sure we hit the "unaligned" code path + size_t offset = j; + while (offset < expected_size) { - ssize_t nread = zip_entry_noallocreadwithoffset(zip, offset, buflen, tmpbuf); + ssize_t nread = zip_entry_noallocreadwithoffset(zip, offset, buflen, tmpbuf); - mu_assert(nread <= buflen, "too many bytes read"); - mu_assert(0u != nread, "no bytes read"); + mu_assert(nread <= buflen, "too many bytes read"); + mu_assert(0u != nread, "no bytes read"); - // check the data - for (ssize_t j = 0; j < nread; ++j) { - mu_assert_int_eq(expected_data[offset + j], tmpbuf[j]); - } + // check the data + for (ssize_t j = 0; j < nread; ++j) { + mu_assert_int_eq(expected_data[offset + j], tmpbuf[j]); + } - offset += nread; + offset += nread; + } } } @@ -171,7 +174,7 @@ MU_TEST_SUITE(test_read_suite) { MU_RUN_TEST(test_read); MU_RUN_TEST(test_noallocread); - MU_RUN_TEST(test_noallocread_offset); + MU_RUN_TEST(test_noallocreadwithoffset); } #define UNUSED(x) (void)x From 2d8ad2683d3e4ff66f35fb2c0f39b270342b6391 Mon Sep 17 00:00:00 2001 From: JCash Date: Fri, 29 Nov 2024 16:06:36 +0100 Subject: [PATCH 5/5] clang format fix --- src/zip.c | 27 +++++++++++++-------------- src/zip.h | 11 +++++++---- test/test_read.c | 7 ++++--- 3 files changed, 24 insertions(+), 21 deletions(-) diff --git a/src/zip.c b/src/zip.c index 588421b9..d00cece2 100644 --- a/src/zip.c +++ b/src/zip.c @@ -1656,8 +1656,8 @@ ssize_t zip_entry_noallocread(struct zip_t *zip, void *buf, size_t bufsize) { return (ssize_t)zip->entry.uncomp_size; } -ssize_t zip_entry_noallocreadwithoffset(struct zip_t *zip, - size_t offset, size_t size, void *buf) { +ssize_t zip_entry_noallocreadwithoffset(struct zip_t *zip, size_t offset, + size_t size, void *buf) { mz_zip_archive *pzip = NULL; if (!zip) { @@ -1669,7 +1669,7 @@ ssize_t zip_entry_noallocreadwithoffset(struct zip_t *zip, return (ssize_t)ZIP_EINVAL; } - if ((offset+size) > (size_t)zip->entry.uncomp_size) { + if ((offset + size) > (size_t)zip->entry.uncomp_size) { size = (ssize_t)zip->entry.uncomp_size - offset; } @@ -1680,26 +1680,26 @@ ssize_t zip_entry_noallocreadwithoffset(struct zip_t *zip, return (ssize_t)ZIP_ENOENT; } - mz_zip_reader_extract_iter_state* iter = + mz_zip_reader_extract_iter_state *iter = mz_zip_reader_extract_iter_new(pzip, (mz_uint)zip->entry.index, 0); if (!iter) { return (ssize_t)ZIP_ENORITER; } - mz_uint8* writebuf = (mz_uint8*)buf; - size_t file_offset = 0; - size_t write_cursor = 0; - size_t to_read = size; + mz_uint8 *writebuf = (mz_uint8 *)buf; + size_t file_offset = 0; + size_t write_cursor = 0; + size_t to_read = size; // iterate until the requested offset is in range - while (file_offset < zip->entry.uncomp_size && to_read > 0) - { - size_t nread = mz_zip_reader_extract_iter_read(iter, (void*)&writebuf[write_cursor], to_read); + while (file_offset < zip->entry.uncomp_size && to_read > 0) { + size_t nread = mz_zip_reader_extract_iter_read( + iter, (void *)&writebuf[write_cursor], to_read); if (nread == 0) break; - if (offset < (file_offset+nread)) { + if (offset < (file_offset + nread)) { size_t read_cursor = offset - file_offset; MZ_ASSERT(read_cursor < size); size_t read_size = nread - read_cursor; @@ -1709,8 +1709,7 @@ ssize_t zip_entry_noallocreadwithoffset(struct zip_t *zip, MZ_ASSERT(read_size <= size); // If it's an unaligned read (i.e. the first one) - if (read_cursor != 0) - { + if (read_cursor != 0) { memmove(&writebuf[write_cursor], &writebuf[read_cursor], read_size); } diff --git a/src/zip.h b/src/zip.h index c70806b9..3c0f3c6a 100644 --- a/src/zip.h +++ b/src/zip.h @@ -376,8 +376,8 @@ extern ZIP_EXPORT ssize_t zip_entry_noallocread(struct zip_t *zip, void *buf, size_t bufsize); /** - * Extracts the part of the current zip entry into a memory buffer using no memory - * allocation for the buffer. + * Extracts the part of the current zip entry into a memory buffer using no + * memory allocation for the buffer. * * @param zip zip archive handler. * @param offset the offset of the entry (in bytes). @@ -388,10 +388,13 @@ extern ZIP_EXPORT ssize_t zip_entry_noallocread(struct zip_t *zip, void *buf, * @note each call will iterate from the start of the entry * * @return the return code - the number of bytes actually read on success. - * Otherwise a negative number (< 0) on error (e.g. offset is too large). + * Otherwise a negative number (< 0) on error (e.g. offset is too + * large). */ extern ZIP_EXPORT ssize_t zip_entry_noallocreadwithoffset(struct zip_t *zip, - size_t offset, size_t size, void *buf); + size_t offset, + size_t size, + void *buf); /** * Extracts the current zip entry into output file. diff --git a/test/test_read.c b/test/test_read.c index c33885bd..472601c6 100644 --- a/test/test_read.c +++ b/test/test_read.c @@ -129,7 +129,6 @@ MU_TEST(test_noallocread) { zip_close(zip); } - MU_TEST(test_noallocreadwithoffset) { size_t expected_size = strlen(TESTDATA2); char *expected_data = calloc(expected_size, sizeof(char)); @@ -147,11 +146,13 @@ MU_TEST(test_noallocreadwithoffset) { char *tmpbuf = calloc(buflen, sizeof(char)); for (size_t j = 0; j < expected_size; ++j) { - // we test starting from different offsets, to make sure we hit the "unaligned" code path + // we test starting from different offsets, to make sure we hit the + // "unaligned" code path size_t offset = j; while (offset < expected_size) { - ssize_t nread = zip_entry_noallocreadwithoffset(zip, offset, buflen, tmpbuf); + ssize_t nread = + zip_entry_noallocreadwithoffset(zip, offset, buflen, tmpbuf); mu_assert(nread <= buflen, "too many bytes read"); mu_assert(0u != nread, "no bytes read");