From 945dc0b7e73c520561fc08ddc1a3c6f6322ccc99 Mon Sep 17 00:00:00 2001
From: JCash <mwesterdahl76@gmail.com>
Date: Fri, 29 Nov 2024 12:01:39 +0100
Subject: [PATCH 1/5] Added zip_entry_noallocread_offset

---
 src/zip.c        | 71 +++++++++++++++++++++++++++++++++++++++++++++++-
 src/zip.h        | 25 +++++++++++++++++
 test/test_read.c | 38 ++++++++++++++++++++++++++
 3 files changed, 133 insertions(+), 1 deletion(-)

diff --git a/src/zip.c b/src/zip.c
index f1238231..994244a3 100644
--- a/src/zip.c
+++ b/src/zip.c
@@ -114,7 +114,7 @@ struct zip_entry_mark_t {
   size_t lf_length;
 };
 
-static const char *const zip_errlist[33] = {
+static const char *const zip_errlist[35] = {
     NULL,
     "not initialized\0",
     "invalid entry name\0",
@@ -148,6 +148,8 @@ static const char *const zip_errlist[33] = {
     "cannot initialize reader\0",
     "cannot initialize writer\0",
     "cannot initialize writer from reader\0",
+    "invalid argument\0",
+    "cannot initialize reader iterator\0",
 };
 
 const char *zip_strerror(int errnum) {
@@ -1654,6 +1656,73 @@ ssize_t zip_entry_noallocread(struct zip_t *zip, void *buf, size_t bufsize) {
   return (ssize_t)zip->entry.uncomp_size;
 }
 
+ssize_t zip_entry_noallocread_offset(struct zip_t *zip,
+                                    size_t offset, size_t size, void *buf) {
+  mz_zip_archive *pzip = NULL;
+
+  if (!zip) {
+    // zip_t handler is not initialized
+    return (ssize_t)ZIP_ENOINIT;
+  }
+
+  if (offset > (size_t)zip->entry.uncomp_size) {
+    return (ssize_t)ZIP_EINVAL;
+  }
+
+  if ((offset+size) > (size_t)zip->entry.uncomp_size) {
+    size = (ssize_t)zip->entry.uncomp_size - offset;
+  }
+
+  pzip = &(zip->archive);
+  if (pzip->m_zip_mode != MZ_ZIP_MODE_READING ||
+      zip->entry.index < (ssize_t)0) {
+    // the entry is not found or we do not have read access
+    return (ssize_t)ZIP_ENOENT;
+  }
+
+  mz_zip_reader_extract_iter_state* iter =
+      mz_zip_reader_extract_iter_new(pzip, (mz_uint)zip->entry.index, 0);
+  if (!iter) {
+    return (ssize_t)ZIP_ENORITER;
+  }
+
+  mz_uint8  tmpbuf[ZIP_DEFAULT_ITER_BUF_SIZE];
+  size_t    tmpbuf_size = sizeof(tmpbuf);
+  size_t    file_offset = 0;
+  size_t    write_cursor = 0;
+  size_t    to_read = size;
+
+  // iterate until the requested offset is in range
+  while (file_offset < zip->entry.uncomp_size && to_read > 0)
+  {
+    size_t nread = mz_zip_reader_extract_iter_read(iter, tmpbuf, tmpbuf_size);
+
+    if (nread == 0)
+      break;
+
+    if (offset < (file_offset+nread)) {
+      size_t read_cursor = offset - file_offset;
+      MZ_ASSERT(read_cursor < tmpbuf_size);
+      size_t read_size = nread - read_cursor;
+
+      if (to_read < read_size)
+        read_size = to_read;
+      MZ_ASSERT(read_size <= tmpbuf_size);
+
+      memcpy(&((mz_uint8*)buf)[write_cursor], &tmpbuf[read_cursor], read_size);
+
+      write_cursor += read_size;
+      offset += read_size;
+      to_read -= read_size;
+    }
+
+    file_offset += nread;
+  }
+
+  mz_zip_reader_extract_iter_free(iter);
+  return (ssize_t)write_cursor;
+}
+
 int zip_entry_fread(struct zip_t *zip, const char *filename) {
   mz_zip_archive *pzip = NULL;
   mz_uint idx;
diff --git a/src/zip.h b/src/zip.h
index dce99ffb..c2992fac 100644
--- a/src/zip.h
+++ b/src/zip.h
@@ -61,6 +61,11 @@ typedef long ssize_t; /* byte count or error */
  */
 #define ZIP_DEFAULT_COMPRESSION_LEVEL 6
 
+/**
+ * Default zip iterator stack size (in bytes)
+ */
+#define ZIP_DEFAULT_ITER_BUF_SIZE 32*1024
+
 /**
  * Error codes
  */
@@ -96,6 +101,8 @@ typedef long ssize_t; /* byte count or error */
 #define ZIP_ERINIT -30      // cannot initialize reader
 #define ZIP_EWINIT -31      // cannot initialize writer
 #define ZIP_EWRINIT -32     // cannot initialize writer from reader
+#define ZIP_EINVAL -33      // invalid argument
+#define ZIP_ENORITER -34    // cannot initialize reader iterator
 
 /**
  * Looks up the error message string corresponding to an error number.
@@ -373,6 +380,24 @@ extern ZIP_EXPORT ssize_t zip_entry_read(struct zip_t *zip, void **buf,
 extern ZIP_EXPORT ssize_t zip_entry_noallocread(struct zip_t *zip, void *buf,
                                                 size_t bufsize);
 
+/**
+ * Extracts the part of the current zip entry into a memory buffer using no memory
+ * allocation for the buffer.
+ *
+ * @param zip zip archive handler.
+ * @param offset the offset of the entry (in bytes).
+ * @param size requested number of bytes (in bytes).
+ * @param buf preallocated output buffer.
+ *
+ * @note the iterator api uses an allocation to create its state
+ * @note each call will iterate from the start of the entry
+ *
+ * @return the return code - the number of bytes actually read on success.
+ *         Otherwise a negative number (< 0) on error (e.g. offset is too large).
+ */
+extern ZIP_EXPORT ssize_t zip_entry_noallocread_offset(struct zip_t *zip,
+                                        size_t offset, size_t size, void *buf);
+
 /**
  * Extracts the current zip entry into output file.
  *
diff --git a/test/test_read.c b/test/test_read.c
index f3731bda..e5e0d828 100644
--- a/test/test_read.c
+++ b/test/test_read.c
@@ -129,11 +129,49 @@ MU_TEST(test_noallocread) {
   zip_close(zip);
 }
 
+
+MU_TEST(test_noallocread_offset) {
+  size_t expected_size = strlen(TESTDATA2);
+  char *expected_data = calloc(expected_size, sizeof(char));
+
+  struct zip_t *zip = zip_open(ZIPNAME, 0, 'r');
+  mu_check(zip != NULL);
+  mu_assert_int_eq(1, zip_is64(zip));
+
+  mu_assert_int_eq(0, zip_entry_open(zip, "test/test-2.txt"));
+  zip_entry_noallocread(zip, (void *)expected_data, expected_size);
+
+  // Read the file in different chunk sizes
+  for (size_t i = 1; i <= expected_size; ++i) {
+    size_t buflen = i;
+    char *tmpbuf = calloc(buflen, sizeof(char));
+
+    size_t offset = 0;
+    while (offset < expected_size) {
+
+      ssize_t nread = zip_entry_noallocread_offset(zip, offset, buflen, tmpbuf);
+
+      mu_assert(nread <= buflen, "too many bytes read");
+      mu_assert(0u != nread, "no bytes read");
+
+      // check the data
+      for (ssize_t j = 0; j < nread; ++j) {
+        mu_assert_int_eq(expected_data[offset + j], tmpbuf[j]);
+      }
+
+      offset += nread;
+    }
+  }
+
+  zip_close(zip);
+}
+
 MU_TEST_SUITE(test_read_suite) {
   MU_SUITE_CONFIGURE(&test_setup, &test_teardown);
 
   MU_RUN_TEST(test_read);
   MU_RUN_TEST(test_noallocread);
+  MU_RUN_TEST(test_noallocread_offset);
 }
 
 #define UNUSED(x) (void)x

From 143c4ed4d0a69da206b1748e9f2aec65b1463945 Mon Sep 17 00:00:00 2001
From: JCash <mwesterdahl76@gmail.com>
Date: Fri, 29 Nov 2024 12:08:32 +0100
Subject: [PATCH 2/5] added doc

---
 README.md | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/README.md b/README.md
index 5940fdc6..67831dfd 100644
--- a/README.md
+++ b/README.md
@@ -208,6 +208,27 @@ zip_stream_close(zip);
 free(buf);
 ```
 
+* Extract a partial zip entry
+
+```c
+unsigned char buf[16];
+size_t bufsize = sizeof(buf);
+
+struct zip_t *zip = zip_open("foo.zip", 0, 'r');
+{
+    zip_entry_open(zip, "foo-1.txt");
+    {
+        size_t offset = 4;
+        ssize_t nread = zip_entry_noallocread_offset(zip, offset, bufsize, (void *)buf);
+    }
+
+    zip_entry_close(zip);
+}
+zip_close(zip);
+
+free(buf);
+```
+
 * List of all zip entries
 
 ```c

From c363a857d795396c6df6e90bb3d98ca2c5052654 Mon Sep 17 00:00:00 2001
From: JCash <mwesterdahl76@gmail.com>
Date: Fri, 29 Nov 2024 14:23:45 +0100
Subject: [PATCH 3/5] review fixes: removed extraneous memcpy

---
 README.md        |  2 +-
 src/zip.c        | 19 +++++++++++--------
 src/zip.h        |  7 +------
 test/test_read.c |  2 +-
 4 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/README.md b/README.md
index 67831dfd..8d9efe51 100644
--- a/README.md
+++ b/README.md
@@ -219,7 +219,7 @@ struct zip_t *zip = zip_open("foo.zip", 0, 'r');
     zip_entry_open(zip, "foo-1.txt");
     {
         size_t offset = 4;
-        ssize_t nread = zip_entry_noallocread_offset(zip, offset, bufsize, (void *)buf);
+        ssize_t nread = zip_entry_noallocreadwithoffset(zip, offset, bufsize, (void *)buf);
     }
 
     zip_entry_close(zip);
diff --git a/src/zip.c b/src/zip.c
index 994244a3..588421b9 100644
--- a/src/zip.c
+++ b/src/zip.c
@@ -1656,7 +1656,7 @@ ssize_t zip_entry_noallocread(struct zip_t *zip, void *buf, size_t bufsize) {
   return (ssize_t)zip->entry.uncomp_size;
 }
 
-ssize_t zip_entry_noallocread_offset(struct zip_t *zip,
+ssize_t zip_entry_noallocreadwithoffset(struct zip_t *zip,
                                     size_t offset, size_t size, void *buf) {
   mz_zip_archive *pzip = NULL;
 
@@ -1665,7 +1665,7 @@ ssize_t zip_entry_noallocread_offset(struct zip_t *zip,
     return (ssize_t)ZIP_ENOINIT;
   }
 
-  if (offset > (size_t)zip->entry.uncomp_size) {
+  if (offset >= (size_t)zip->entry.uncomp_size) {
     return (ssize_t)ZIP_EINVAL;
   }
 
@@ -1686,8 +1686,7 @@ ssize_t zip_entry_noallocread_offset(struct zip_t *zip,
     return (ssize_t)ZIP_ENORITER;
   }
 
-  mz_uint8  tmpbuf[ZIP_DEFAULT_ITER_BUF_SIZE];
-  size_t    tmpbuf_size = sizeof(tmpbuf);
+  mz_uint8* writebuf = (mz_uint8*)buf;
   size_t    file_offset = 0;
   size_t    write_cursor = 0;
   size_t    to_read = size;
@@ -1695,21 +1694,25 @@ ssize_t zip_entry_noallocread_offset(struct zip_t *zip,
   // iterate until the requested offset is in range
   while (file_offset < zip->entry.uncomp_size && to_read > 0)
   {
-    size_t nread = mz_zip_reader_extract_iter_read(iter, tmpbuf, tmpbuf_size);
+    size_t nread = mz_zip_reader_extract_iter_read(iter, (void*)&writebuf[write_cursor], to_read);
 
     if (nread == 0)
       break;
 
     if (offset < (file_offset+nread)) {
       size_t read_cursor = offset - file_offset;
-      MZ_ASSERT(read_cursor < tmpbuf_size);
+      MZ_ASSERT(read_cursor < size);
       size_t read_size = nread - read_cursor;
 
       if (to_read < read_size)
         read_size = to_read;
-      MZ_ASSERT(read_size <= tmpbuf_size);
+      MZ_ASSERT(read_size <= size);
 
-      memcpy(&((mz_uint8*)buf)[write_cursor], &tmpbuf[read_cursor], read_size);
+      // If it's an unaligned read (i.e. the first one)
+      if (read_cursor != 0)
+      {
+        memmove(&writebuf[write_cursor], &writebuf[read_cursor], read_size);
+      }
 
       write_cursor += read_size;
       offset += read_size;
diff --git a/src/zip.h b/src/zip.h
index c2992fac..c70806b9 100644
--- a/src/zip.h
+++ b/src/zip.h
@@ -61,11 +61,6 @@ typedef long ssize_t; /* byte count or error */
  */
 #define ZIP_DEFAULT_COMPRESSION_LEVEL 6
 
-/**
- * Default zip iterator stack size (in bytes)
- */
-#define ZIP_DEFAULT_ITER_BUF_SIZE 32*1024
-
 /**
  * Error codes
  */
@@ -395,7 +390,7 @@ extern ZIP_EXPORT ssize_t zip_entry_noallocread(struct zip_t *zip, void *buf,
  * @return the return code - the number of bytes actually read on success.
  *         Otherwise a negative number (< 0) on error (e.g. offset is too large).
  */
-extern ZIP_EXPORT ssize_t zip_entry_noallocread_offset(struct zip_t *zip,
+extern ZIP_EXPORT ssize_t zip_entry_noallocreadwithoffset(struct zip_t *zip,
                                         size_t offset, size_t size, void *buf);
 
 /**
diff --git a/test/test_read.c b/test/test_read.c
index e5e0d828..8e7df1eb 100644
--- a/test/test_read.c
+++ b/test/test_read.c
@@ -149,7 +149,7 @@ MU_TEST(test_noallocread_offset) {
     size_t offset = 0;
     while (offset < expected_size) {
 
-      ssize_t nread = zip_entry_noallocread_offset(zip, offset, buflen, tmpbuf);
+      ssize_t nread = zip_entry_noallocreadwithoffset(zip, offset, buflen, tmpbuf);
 
       mu_assert(nread <= buflen, "too many bytes read");
       mu_assert(0u != nread, "no bytes read");

From 988f074f9d884406e46e71bdcceb9a2f22208c45 Mon Sep 17 00:00:00 2001
From: JCash <mwesterdahl76@gmail.com>
Date: Fri, 29 Nov 2024 14:33:54 +0100
Subject: [PATCH 4/5] Improved test for the memmove's

---
 test/test_read.c | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/test/test_read.c b/test/test_read.c
index 8e7df1eb..c33885bd 100644
--- a/test/test_read.c
+++ b/test/test_read.c
@@ -130,7 +130,7 @@ MU_TEST(test_noallocread) {
 }
 
 
-MU_TEST(test_noallocread_offset) {
+MU_TEST(test_noallocreadwithoffset) {
   size_t expected_size = strlen(TESTDATA2);
   char *expected_data = calloc(expected_size, sizeof(char));
 
@@ -146,20 +146,23 @@ MU_TEST(test_noallocread_offset) {
     size_t buflen = i;
     char *tmpbuf = calloc(buflen, sizeof(char));
 
-    size_t offset = 0;
-    while (offset < expected_size) {
+    for (size_t j = 0; j < expected_size; ++j) {
+      // we test starting from different offsets, to make sure we hit the "unaligned" code path
+      size_t offset = j;
+      while (offset < expected_size) {
 
-      ssize_t nread = zip_entry_noallocreadwithoffset(zip, offset, buflen, tmpbuf);
+        ssize_t nread = zip_entry_noallocreadwithoffset(zip, offset, buflen, tmpbuf);
 
-      mu_assert(nread <= buflen, "too many bytes read");
-      mu_assert(0u != nread, "no bytes read");
+        mu_assert(nread <= buflen, "too many bytes read");
+        mu_assert(0u != nread, "no bytes read");
 
-      // check the data
-      for (ssize_t j = 0; j < nread; ++j) {
-        mu_assert_int_eq(expected_data[offset + j], tmpbuf[j]);
-      }
+        // check the data
+        for (ssize_t j = 0; j < nread; ++j) {
+          mu_assert_int_eq(expected_data[offset + j], tmpbuf[j]);
+        }
 
-      offset += nread;
+        offset += nread;
+      }
     }
   }
 
@@ -171,7 +174,7 @@ MU_TEST_SUITE(test_read_suite) {
 
   MU_RUN_TEST(test_read);
   MU_RUN_TEST(test_noallocread);
-  MU_RUN_TEST(test_noallocread_offset);
+  MU_RUN_TEST(test_noallocreadwithoffset);
 }
 
 #define UNUSED(x) (void)x

From 2d8ad2683d3e4ff66f35fb2c0f39b270342b6391 Mon Sep 17 00:00:00 2001
From: JCash <mwesterdahl76@gmail.com>
Date: Fri, 29 Nov 2024 16:06:36 +0100
Subject: [PATCH 5/5] clang format fix

---
 src/zip.c        | 27 +++++++++++++--------------
 src/zip.h        | 11 +++++++----
 test/test_read.c |  7 ++++---
 3 files changed, 24 insertions(+), 21 deletions(-)

diff --git a/src/zip.c b/src/zip.c
index 588421b9..d00cece2 100644
--- a/src/zip.c
+++ b/src/zip.c
@@ -1656,8 +1656,8 @@ ssize_t zip_entry_noallocread(struct zip_t *zip, void *buf, size_t bufsize) {
   return (ssize_t)zip->entry.uncomp_size;
 }
 
-ssize_t zip_entry_noallocreadwithoffset(struct zip_t *zip,
-                                    size_t offset, size_t size, void *buf) {
+ssize_t zip_entry_noallocreadwithoffset(struct zip_t *zip, size_t offset,
+                                        size_t size, void *buf) {
   mz_zip_archive *pzip = NULL;
 
   if (!zip) {
@@ -1669,7 +1669,7 @@ ssize_t zip_entry_noallocreadwithoffset(struct zip_t *zip,
     return (ssize_t)ZIP_EINVAL;
   }
 
-  if ((offset+size) > (size_t)zip->entry.uncomp_size) {
+  if ((offset + size) > (size_t)zip->entry.uncomp_size) {
     size = (ssize_t)zip->entry.uncomp_size - offset;
   }
 
@@ -1680,26 +1680,26 @@ ssize_t zip_entry_noallocreadwithoffset(struct zip_t *zip,
     return (ssize_t)ZIP_ENOENT;
   }
 
-  mz_zip_reader_extract_iter_state* iter =
+  mz_zip_reader_extract_iter_state *iter =
       mz_zip_reader_extract_iter_new(pzip, (mz_uint)zip->entry.index, 0);
   if (!iter) {
     return (ssize_t)ZIP_ENORITER;
   }
 
-  mz_uint8* writebuf = (mz_uint8*)buf;
-  size_t    file_offset = 0;
-  size_t    write_cursor = 0;
-  size_t    to_read = size;
+  mz_uint8 *writebuf = (mz_uint8 *)buf;
+  size_t file_offset = 0;
+  size_t write_cursor = 0;
+  size_t to_read = size;
 
   // iterate until the requested offset is in range
-  while (file_offset < zip->entry.uncomp_size && to_read > 0)
-  {
-    size_t nread = mz_zip_reader_extract_iter_read(iter, (void*)&writebuf[write_cursor], to_read);
+  while (file_offset < zip->entry.uncomp_size && to_read > 0) {
+    size_t nread = mz_zip_reader_extract_iter_read(
+        iter, (void *)&writebuf[write_cursor], to_read);
 
     if (nread == 0)
       break;
 
-    if (offset < (file_offset+nread)) {
+    if (offset < (file_offset + nread)) {
       size_t read_cursor = offset - file_offset;
       MZ_ASSERT(read_cursor < size);
       size_t read_size = nread - read_cursor;
@@ -1709,8 +1709,7 @@ ssize_t zip_entry_noallocreadwithoffset(struct zip_t *zip,
       MZ_ASSERT(read_size <= size);
 
       // If it's an unaligned read (i.e. the first one)
-      if (read_cursor != 0)
-      {
+      if (read_cursor != 0) {
         memmove(&writebuf[write_cursor], &writebuf[read_cursor], read_size);
       }
 
diff --git a/src/zip.h b/src/zip.h
index c70806b9..3c0f3c6a 100644
--- a/src/zip.h
+++ b/src/zip.h
@@ -376,8 +376,8 @@ extern ZIP_EXPORT ssize_t zip_entry_noallocread(struct zip_t *zip, void *buf,
                                                 size_t bufsize);
 
 /**
- * Extracts the part of the current zip entry into a memory buffer using no memory
- * allocation for the buffer.
+ * Extracts the part of the current zip entry into a memory buffer using no
+ * memory allocation for the buffer.
  *
  * @param zip zip archive handler.
  * @param offset the offset of the entry (in bytes).
@@ -388,10 +388,13 @@ extern ZIP_EXPORT ssize_t zip_entry_noallocread(struct zip_t *zip, void *buf,
  * @note each call will iterate from the start of the entry
  *
  * @return the return code - the number of bytes actually read on success.
- *         Otherwise a negative number (< 0) on error (e.g. offset is too large).
+ *         Otherwise a negative number (< 0) on error (e.g. offset is too
+ * large).
  */
 extern ZIP_EXPORT ssize_t zip_entry_noallocreadwithoffset(struct zip_t *zip,
-                                        size_t offset, size_t size, void *buf);
+                                                          size_t offset,
+                                                          size_t size,
+                                                          void *buf);
 
 /**
  * Extracts the current zip entry into output file.
diff --git a/test/test_read.c b/test/test_read.c
index c33885bd..472601c6 100644
--- a/test/test_read.c
+++ b/test/test_read.c
@@ -129,7 +129,6 @@ MU_TEST(test_noallocread) {
   zip_close(zip);
 }
 
-
 MU_TEST(test_noallocreadwithoffset) {
   size_t expected_size = strlen(TESTDATA2);
   char *expected_data = calloc(expected_size, sizeof(char));
@@ -147,11 +146,13 @@ MU_TEST(test_noallocreadwithoffset) {
     char *tmpbuf = calloc(buflen, sizeof(char));
 
     for (size_t j = 0; j < expected_size; ++j) {
-      // we test starting from different offsets, to make sure we hit the "unaligned" code path
+      // we test starting from different offsets, to make sure we hit the
+      // "unaligned" code path
       size_t offset = j;
       while (offset < expected_size) {
 
-        ssize_t nread = zip_entry_noallocreadwithoffset(zip, offset, buflen, tmpbuf);
+        ssize_t nread =
+            zip_entry_noallocreadwithoffset(zip, offset, buflen, tmpbuf);
 
         mu_assert(nread <= buflen, "too many bytes read");
         mu_assert(0u != nread, "no bytes read");