From e2749c8d0265ff7bc982347c1c9eb9b5ddfbdc91 Mon Sep 17 00:00:00 2001 From: Valentina Karnauhova Date: Sun, 19 Dec 2021 21:22:30 +0300 Subject: [PATCH 1/3] init --- CMakeLists.txt | 12 ++-- include/duckx.hpp | 34 +++++++++- src/duckx.cpp | 158 +++++++++++++++++++++++----------------------- 3 files changed, 119 insertions(+), 85 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3fa1d69..e24ed76 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,13 +21,13 @@ set(HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/include/duckx.hpp" set(SOURCES src/duckx.cpp) set(THIRD_PARTY_HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/pugixml/pugixml.hpp" - "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/pugixml/pugiconfig.hpp" - "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/zip/zip.h") -set(THIRD_PARTY_SRC "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/zip/zip.c") + "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/pugixml/pugiconfig.hpp") +# "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/zip/zip.h") +#set(THIRD_PARTY_SRC "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/zip/zip.c") include_directories("${CMAKE_CURRENT_SOURCE_DIR}/include" - "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/pugixml" - "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/zip") + "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/pugixml") + #"${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/zip") if(BUILD_SHARED_LIBS) add_library(duckx SHARED ${SOURCES} ${THIRD_PARTY_SRC}) @@ -37,6 +37,8 @@ endif() add_library(duckx::duckx ALIAS duckx) +target_link_libraries(duckx zip) + target_include_directories(duckx PUBLIC $ $ diff --git a/include/duckx.hpp b/include/duckx.hpp index 6b8f38e..9d47586 100755 --- a/include/duckx.hpp +++ b/include/duckx.hpp @@ -7,6 +7,7 @@ #ifndef DUCKX_H #define DUCKX_H +#include #include #include #include @@ -16,8 +17,19 @@ #include #include +#define PROD + +#ifndef PROD +#define DEBUG(x) cout << "[DEBUG] " << #x << ": " << (x) << endl; +#else +#define DEBUG +#endif + // TODO: Use container-iterator design pattern! +struct xml_string_writer; + + namespace duckx { // Run contains runs in a paragraph class Run { @@ -133,22 +145,42 @@ class Table { TableRow &rows(); }; +enum class MODE { + FILE, + BUFFER, +}; + // Document contains whole the docx file // and stores paragraphs class Document { private: friend class IteratorHelper; std::string directory; + MODE mode; Paragraph paragraph; Table table; pugi::xml_document document; + zip_error_t zipError; + int *errorp; + zip_file_t* docFile = NULL; + zip_t* zip = NULL; + char* buf; + size_t bufLen; + //the only way I know to write to zip is wait for zip_close, + //so we need to keep writers in order to flush it's contents to actual file + std::vector> saveWriters; public: Document(); + ~Document(); Document(std::string); + Document(char*, size_t); void file(std::string); + void buffer(char*, size_t); void open(); - void save() const; + void close(); + void save(const char* dst = NULL); + //void save(const char*) const; Paragraph ¶graphs(); Table &tables(); diff --git a/src/duckx.cpp b/src/duckx.cpp index 550ad9f..9f86462 100644 --- a/src/duckx.cpp +++ b/src/duckx.cpp @@ -235,105 +235,105 @@ duckx::Paragraph::insert_paragraph_after(const std::string &text, return *p; } -duckx::Document::Document() { - // TODO: this function must be removed! - this->directory = ""; -} +// TODO: this function must be removed! +duckx::Document::Document() : mode{MODE::FILE}, directory{""} { } -duckx::Document::Document(std::string directory) { - this->directory = directory; -} +duckx::Document::Document(std::string directory) : mode{MODE::FILE}, directory{directory} { } + +duckx::Document::Document(char* zipBuf, size_t len) : mode{MODE::BUFFER}, buf{zipBuf}, bufLen{len} { } void duckx::Document::file(std::string directory) { + this->mode = MODE::FILE; this->directory = directory; } -void duckx::Document::open() { - void *buf = NULL; - size_t bufsize; - - // Open file and load "xml" content to the document variable - zip_t *zip = - zip_open(this->directory.c_str(), ZIP_DEFAULT_COMPRESSION_LEVEL, 'r'); - - zip_entry_open(zip, "word/document.xml"); - zip_entry_read(zip, &buf, &bufsize); - - zip_entry_close(zip); - zip_close(zip); - - this->document.load_buffer(buf, bufsize); - - free(buf); - - this->paragraph.set_parent(document.child("w:document").child("w:body")); +void duckx::Document::buffer(char* zipBuf, size_t len) { + this->mode = MODE::BUFFER; + this->buf = zipBuf; + this->bufLen = len; } -void duckx::Document::save() const { - // minizip only supports appending or writing to new files - // so we must - // - make a new file - // - write any new files - // - copy the old files - // - delete old docx - // - rename new file to old file - - // Read document buffer - xml_string_writer writer; - this->document.print(writer); - - // Open file and replace "xml" content +duckx::Document::~Document() { + //makes double free or corruption + //if (zip != NULL) zip_close(zip); +} - std::string original_file = this->directory; - std::string temp_file = this->directory + ".tmp"; +void duckx::Document::close() { + if (zip != NULL) zip_close(zip); + zip = NULL; + this->saveWriters.clear(); +} - // Create the new file - zip_t *new_zip = - zip_open(temp_file.c_str(), ZIP_DEFAULT_COMPRESSION_LEVEL, 'w'); +void duckx::Document::open() { + if (this->mode != MODE::FILE && this->mode != MODE::BUFFER) throw "Unknown mode error, specify file or buffer"; - // Write out document.xml - zip_entry_open(new_zip, "word/document.xml"); + // Open file and load "xml" content to the document variable + //if (zip != NULL) zip_close(zip); + zip = this->mode == MODE::FILE ? + zip_open(this->directory.c_str(), ZIP_CREATE, NULL) : /*this->mode == MODE::BUFFER*/ + zip_open_from_source(zip_source_buffer_create(this->buf, this->bufLen, 0, &zipError), 0, &zipError) ; + docFile = zip_fopen(zip, "word/document.xml", 0); + if (!docFile) throw "Something's wrong with the document"; + struct zip_stat sb; + zip_stat(zip, "word/document.xml", 0, &sb); + size_t bufSize = sb.size; + void* docBuf = malloc(bufSize); + zip_fread(docFile, docBuf, bufSize); + this->document.load_buffer(docBuf, bufSize); + + + free(docBuf); + zip_fclose(docFile); - const char *buf = writer.result.c_str(); + this->paragraph.set_parent(document.child("w:document").child("w:body")); - zip_entry_write(new_zip, buf, strlen(buf)); - zip_entry_close(new_zip); +} - // Open the original zip and copy all files which are not replaced by duckX - zip_t *orig_zip = - zip_open(original_file.c_str(), ZIP_DEFAULT_COMPRESSION_LEVEL, 'r'); +void duckx::Document::save(const char* dst) { + using namespace std; + if (this->mode != MODE::FILE && this->mode != MODE::BUFFER) throw "Unknown mode error, specify file or buffer"; - // Loop & copy each relevant entry in the original zip - int orig_zip_entry_ct = zip_total_entries(orig_zip); - for (int i = 0; i < orig_zip_entry_ct; i++) { - zip_entry_openbyindex(orig_zip, i); - const char *name = zip_entry_name(orig_zip); + if (mode == MODE::BUFFER && dst == NULL) throw "Specify destination path"; - // Skip copying the original file - if (std::string(name) != std::string("word/document.xml")) { - // Read the old content - void *entry_buf; - size_t entry_buf_size; - zip_entry_read(orig_zip, &entry_buf, &entry_buf_size); + //see comment in include/duckx.hpp about writers + /*xml_string_writer writer; + this->document.print(writer); + char* newBuf = writer.result.c_str();*/ - // Write into new zip - zip_entry_open(new_zip, name); - zip_entry_write(new_zip, entry_buf, entry_buf_size); - zip_entry_close(new_zip); - free(entry_buf); + if (dst == NULL) { + auto writer = make_shared(); + this->saveWriters.push_back(writer); + this->document.print(*writer); + const char* newBuf = writer->result.c_str(); + auto source = zip_source_buffer_create(newBuf, strlen(newBuf), 0, &zipError); + auto idx = zip_name_locate(zip, "word/document.xml", 0); + zip_replace(zip, idx, source); + } + else { + xml_string_writer writer; + this->document.print(writer); + const char* newBuf = writer.result.c_str(); + zip_t* dstZip = zip_open(dst, ZIP_CREATE /*| ZIP_TRUNCATE*/, errorp); + for (int i = 0; i < zip_get_num_entries(zip, 0); ++i) { + auto name = zip_get_name(zip, i, 0); + if (string(name) != "word/document.xml") { + auto file = zip_fopen_index(zip, i, /*ZIP_FL_COMPRESSED*/ 0); //TODO unnecessary decompression + compression, need to think of workaround + struct zip_stat sb; + zip_stat(zip, name, 0, &sb); + auto fsize = /*sb.comp_size*/ sb.size; + auto tmpBuf = malloc(fsize); + DEBUG(zip_fread(file, tmpBuf, fsize)); + auto source = zip_source_buffer_create(tmpBuf, fsize, 1, &zipError); + DEBUG(source); + DEBUG(zip_file_add(dstZip, name, source, ZIP_FL_OVERWRITE)); + zip_fclose(file); + } } - - zip_entry_close(orig_zip); + auto source = zip_source_buffer_create(newBuf, strlen(newBuf), 0, &zipError); + zip_file_add(dstZip, "word/document.xml", source, ZIP_FL_OVERWRITE); + zip_close(dstZip); } - - // Close both zips - zip_close(orig_zip); - zip_close(new_zip); - - // Remove original zip, rename new to correct name - remove(original_file.c_str()); - rename(temp_file.c_str(), original_file.c_str()); } duckx::Paragraph &duckx::Document::paragraphs() { From 6eaa181c4491d24a56d171e876b5520763a461a9 Mon Sep 17 00:00:00 2001 From: Valentina Karnauhova Date: Sun, 19 Dec 2021 21:31:40 +0300 Subject: [PATCH 2/3] snake case instead of camel case --- .gitignore | 3 ++ include/duckx.hpp | 7 ++--- src/duckx.cpp | 77 ++++++++++++++++++++++++----------------------- 3 files changed, 45 insertions(+), 42 deletions(-) diff --git a/.gitignore b/.gitignore index e808716..78f11f6 100644 --- a/.gitignore +++ b/.gitignore @@ -36,3 +36,6 @@ build/* # vim swap files **/*.swp + +# dev script +scr.sh diff --git a/include/duckx.hpp b/include/duckx.hpp index 9d47586..e720d42 100755 --- a/include/duckx.hpp +++ b/include/duckx.hpp @@ -160,15 +160,14 @@ class Document { Paragraph paragraph; Table table; pugi::xml_document document; - zip_error_t zipError; + zip_error_t zip_error; int *errorp; - zip_file_t* docFile = NULL; zip_t* zip = NULL; char* buf; - size_t bufLen; + size_t buf_len; //the only way I know to write to zip is wait for zip_close, //so we need to keep writers in order to flush it's contents to actual file - std::vector> saveWriters; + std::vector> save_writers; public: Document(); diff --git a/src/duckx.cpp b/src/duckx.cpp index 9f86462..c6db869 100644 --- a/src/duckx.cpp +++ b/src/duckx.cpp @@ -240,28 +240,28 @@ duckx::Document::Document() : mode{MODE::FILE}, directory{""} { } duckx::Document::Document(std::string directory) : mode{MODE::FILE}, directory{directory} { } -duckx::Document::Document(char* zipBuf, size_t len) : mode{MODE::BUFFER}, buf{zipBuf}, bufLen{len} { } +duckx::Document::Document(char* zip_buf, size_t len) : mode{MODE::BUFFER}, buf{zip_buf}, buf_len{len} { } void duckx::Document::file(std::string directory) { this->mode = MODE::FILE; this->directory = directory; } -void duckx::Document::buffer(char* zipBuf, size_t len) { +void duckx::Document::buffer(char* zip_buf, size_t len) { this->mode = MODE::BUFFER; - this->buf = zipBuf; - this->bufLen = len; + this->buf = zip_buf; + this->buf_len = len; } duckx::Document::~Document() { //makes double free or corruption - //if (zip != NULL) zip_close(zip); + //if (this->zip != NULL) zip_close(zip); } void duckx::Document::close() { - if (zip != NULL) zip_close(zip); - zip = NULL; - this->saveWriters.clear(); + if (this->zip != NULL) zip_close(this->zip); + this->zip = NULL; + this->save_writers.clear(); } void duckx::Document::open() { @@ -269,21 +269,21 @@ void duckx::Document::open() { // Open file and load "xml" content to the document variable //if (zip != NULL) zip_close(zip); - zip = this->mode == MODE::FILE ? + this->zip = this->mode == MODE::FILE ? zip_open(this->directory.c_str(), ZIP_CREATE, NULL) : /*this->mode == MODE::BUFFER*/ - zip_open_from_source(zip_source_buffer_create(this->buf, this->bufLen, 0, &zipError), 0, &zipError) ; - docFile = zip_fopen(zip, "word/document.xml", 0); - if (!docFile) throw "Something's wrong with the document"; + zip_open_from_source(zip_source_buffer_create(this->buf, this->buf_len, 0, &this->zip_error), 0, &this->zip_error) ; + zip_file_t* doc_file = zip_fopen(this->zip, "word/document.xml", 0); + if (!doc_file) throw "Something's wrong with the document"; struct zip_stat sb; - zip_stat(zip, "word/document.xml", 0, &sb); - size_t bufSize = sb.size; - void* docBuf = malloc(bufSize); - zip_fread(docFile, docBuf, bufSize); - this->document.load_buffer(docBuf, bufSize); + zip_stat(this->zip, "word/document.xml", 0, &sb); + size_t buf_size = sb.size; + void* doc_buf = malloc(buf_size); + zip_fread(doc_file, doc_buf, buf_size); + this->document.load_buffer(doc_buf, buf_size); - free(docBuf); - zip_fclose(docFile); + free(doc_buf); + zip_fclose(doc_file); this->paragraph.set_parent(document.child("w:document").child("w:body")); @@ -298,41 +298,42 @@ void duckx::Document::save(const char* dst) { //see comment in include/duckx.hpp about writers /*xml_string_writer writer; this->document.print(writer); - char* newBuf = writer.result.c_str();*/ + char* new_buf = writer.result.c_str();*/ if (dst == NULL) { auto writer = make_shared(); - this->saveWriters.push_back(writer); + this->save_writers.push_back(writer); this->document.print(*writer); - const char* newBuf = writer->result.c_str(); - auto source = zip_source_buffer_create(newBuf, strlen(newBuf), 0, &zipError); - auto idx = zip_name_locate(zip, "word/document.xml", 0); - zip_replace(zip, idx, source); + const char* new_buf = writer->result.c_str(); + auto source = zip_source_buffer_create(new_buf, strlen(new_buf), 0, &this->zip_error); + auto idx = zip_name_locate(this->zip, "word/document.xml", 0); + zip_replace(this->zip, idx, source); } else { xml_string_writer writer; this->document.print(writer); - const char* newBuf = writer.result.c_str(); - zip_t* dstZip = zip_open(dst, ZIP_CREATE /*| ZIP_TRUNCATE*/, errorp); - for (int i = 0; i < zip_get_num_entries(zip, 0); ++i) { - auto name = zip_get_name(zip, i, 0); + const char* new_buf = writer.result.c_str(); + zip_t* dst_zip = zip_open(dst, ZIP_CREATE /*| ZIP_TRUNCATE*/, errorp); + for (int i = 0; i < zip_get_num_entries(this->zip, 0); ++i) { + auto name = zip_get_name(this->zip, i, 0); if (string(name) != "word/document.xml") { - auto file = zip_fopen_index(zip, i, /*ZIP_FL_COMPRESSED*/ 0); //TODO unnecessary decompression + compression, need to think of workaround + //TODO unnecessary decompression + compression, need to think of workaround + auto file = zip_fopen_index(this->zip, i, /*ZIP_FL_COMPRESSED*/ 0); struct zip_stat sb; - zip_stat(zip, name, 0, &sb); + zip_stat(this->zip, name, 0, &sb); auto fsize = /*sb.comp_size*/ sb.size; - auto tmpBuf = malloc(fsize); - DEBUG(zip_fread(file, tmpBuf, fsize)); - auto source = zip_source_buffer_create(tmpBuf, fsize, 1, &zipError); + auto tmp_buf = malloc(fsize); + DEBUG(zip_fread(file, tmp_buf, fsize)); + auto source = zip_source_buffer_create(tmp_buf, fsize, 1, &this->zip_error); DEBUG(source); - DEBUG(zip_file_add(dstZip, name, source, ZIP_FL_OVERWRITE)); + DEBUG(zip_file_add(dst_zip, name, source, ZIP_FL_OVERWRITE)); zip_fclose(file); } } - auto source = zip_source_buffer_create(newBuf, strlen(newBuf), 0, &zipError); - zip_file_add(dstZip, "word/document.xml", source, ZIP_FL_OVERWRITE); - zip_close(dstZip); + auto source = zip_source_buffer_create(new_buf, strlen(new_buf), 0, &this->zip_error); + zip_file_add(dst_zip, "word/document.xml", source, ZIP_FL_OVERWRITE); + zip_close(dst_zip); } } From 2bfff744aaef7e3126816a8337e4ad64ab818a42 Mon Sep 17 00:00:00 2001 From: Valentina Karnauhova Date: Sun, 19 Dec 2021 21:36:28 +0300 Subject: [PATCH 3/3] readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 53b4f69..ebbc540 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,7 @@ cmake --build . ## Requirements ## -- [zip](https://github.com/kuba--/zip) +- [libzip](https://github.com/nih-at/libzip) - [pugixml](https://github.com/zeux/pugixml)