Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Libzip buffer #74

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,6 @@ build/*

# vim swap files
**/*.swp

# dev script
scr.sh
12 changes: 7 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,13 @@ set(HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/include/duckx.hpp"
set(SOURCES src/duckx.cpp)

set(THIRD_PARTY_HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/pugixml/pugixml.hpp"
"${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/pugixml/pugiconfig.hpp"
"${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/zip/zip.h")
set(THIRD_PARTY_SRC "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/zip/zip.c")
"${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/pugixml/pugiconfig.hpp")
# "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/zip/zip.h")
#set(THIRD_PARTY_SRC "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/zip/zip.c")

include_directories("${CMAKE_CURRENT_SOURCE_DIR}/include"
"${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/pugixml"
"${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/zip")
"${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/pugixml")
#"${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/zip")

if(BUILD_SHARED_LIBS)
add_library(duckx SHARED ${SOURCES} ${THIRD_PARTY_SRC})
Expand All @@ -37,6 +37,8 @@ endif()

add_library(duckx::duckx ALIAS duckx)

target_link_libraries(duckx zip)

target_include_directories(duckx PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src>
$<INSTALL_INTERFACE:include>
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ cmake --build .

## Requirements ##

- [zip](https://github.com/kuba--/zip)
- [libzip](https://github.com/nih-at/libzip)
- [pugixml](https://github.com/zeux/pugixml)


Expand Down
33 changes: 32 additions & 1 deletion include/duckx.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#ifndef DUCKX_H
#define DUCKX_H

#include <bits/stdc++.h>
#include <cstdio>
#include <stdlib.h>
#include <string>
Expand All @@ -16,8 +17,19 @@
#include <pugixml.hpp>
#include <zip.h>

#define PROD

#ifndef PROD
#define DEBUG(x) cout << "[DEBUG] " << #x << ": " << (x) << endl;
#else
#define DEBUG
#endif

// TODO: Use container-iterator design pattern!

struct xml_string_writer;


namespace duckx {
// Run contains runs in a paragraph
class Run {
Expand Down Expand Up @@ -133,22 +145,41 @@ class Table {
TableRow &rows();
};

enum class MODE {
FILE,
BUFFER,
};

// Document contains whole the docx file
// and stores paragraphs
class Document {
private:
friend class IteratorHelper;
std::string directory;
MODE mode;
Paragraph paragraph;
Table table;
pugi::xml_document document;
zip_error_t zip_error;
int *errorp;
zip_t* zip = NULL;
char* buf;
size_t buf_len;
//the only way I know to write to zip is wait for zip_close,
//so we need to keep writers in order to flush it's contents to actual file
std::vector<std::shared_ptr<xml_string_writer>> save_writers;

public:
Document();
~Document();
Document(std::string);
Document(char*, size_t);
void file(std::string);
void buffer(char*, size_t);
void open();
void save() const;
void close();
void save(const char* dst = NULL);
//void save(const char*) const;

Paragraph &paragraphs();
Table &tables();
Expand Down
159 changes: 80 additions & 79 deletions src/duckx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -235,105 +235,106 @@ duckx::Paragraph::insert_paragraph_after(const std::string &text,
return *p;
}

duckx::Document::Document() {
// TODO: this function must be removed!
this->directory = "";
}
// TODO: this function must be removed!
duckx::Document::Document() : mode{MODE::FILE}, directory{""} { }

duckx::Document::Document(std::string directory) {
this->directory = directory;
}
duckx::Document::Document(std::string directory) : mode{MODE::FILE}, directory{directory} { }

duckx::Document::Document(char* zip_buf, size_t len) : mode{MODE::BUFFER}, buf{zip_buf}, buf_len{len} { }

void duckx::Document::file(std::string directory) {
this->mode = MODE::FILE;
this->directory = directory;
}

void duckx::Document::open() {
void *buf = NULL;
size_t bufsize;

// Open file and load "xml" content to the document variable
zip_t *zip =
zip_open(this->directory.c_str(), ZIP_DEFAULT_COMPRESSION_LEVEL, 'r');

zip_entry_open(zip, "word/document.xml");
zip_entry_read(zip, &buf, &bufsize);

zip_entry_close(zip);
zip_close(zip);

this->document.load_buffer(buf, bufsize);

free(buf);

this->paragraph.set_parent(document.child("w:document").child("w:body"));
void duckx::Document::buffer(char* zip_buf, size_t len) {
this->mode = MODE::BUFFER;
this->buf = zip_buf;
this->buf_len = len;
}

void duckx::Document::save() const {
// minizip only supports appending or writing to new files
// so we must
// - make a new file
// - write any new files
// - copy the old files
// - delete old docx
// - rename new file to old file

// Read document buffer
xml_string_writer writer;
this->document.print(writer);

// Open file and replace "xml" content
duckx::Document::~Document() {
//makes double free or corruption
//if (this->zip != NULL) zip_close(zip);
}

std::string original_file = this->directory;
std::string temp_file = this->directory + ".tmp";
void duckx::Document::close() {
if (this->zip != NULL) zip_close(this->zip);
this->zip = NULL;
this->save_writers.clear();
}

// Create the new file
zip_t *new_zip =
zip_open(temp_file.c_str(), ZIP_DEFAULT_COMPRESSION_LEVEL, 'w');
void duckx::Document::open() {
if (this->mode != MODE::FILE && this->mode != MODE::BUFFER) throw "Unknown mode error, specify file or buffer";

// Write out document.xml
zip_entry_open(new_zip, "word/document.xml");
// Open file and load "xml" content to the document variable
//if (zip != NULL) zip_close(zip);
this->zip = this->mode == MODE::FILE ?
zip_open(this->directory.c_str(), ZIP_CREATE, NULL) : /*this->mode == MODE::BUFFER*/
zip_open_from_source(zip_source_buffer_create(this->buf, this->buf_len, 0, &this->zip_error), 0, &this->zip_error) ;
zip_file_t* doc_file = zip_fopen(this->zip, "word/document.xml", 0);
if (!doc_file) throw "Something's wrong with the document";
struct zip_stat sb;
zip_stat(this->zip, "word/document.xml", 0, &sb);
size_t buf_size = sb.size;
void* doc_buf = malloc(buf_size);
zip_fread(doc_file, doc_buf, buf_size);
this->document.load_buffer(doc_buf, buf_size);


free(doc_buf);
zip_fclose(doc_file);

const char *buf = writer.result.c_str();
this->paragraph.set_parent(document.child("w:document").child("w:body"));

zip_entry_write(new_zip, buf, strlen(buf));
zip_entry_close(new_zip);
}

// Open the original zip and copy all files which are not replaced by duckX
zip_t *orig_zip =
zip_open(original_file.c_str(), ZIP_DEFAULT_COMPRESSION_LEVEL, 'r');
void duckx::Document::save(const char* dst) {
using namespace std;
if (this->mode != MODE::FILE && this->mode != MODE::BUFFER) throw "Unknown mode error, specify file or buffer";

// Loop & copy each relevant entry in the original zip
int orig_zip_entry_ct = zip_total_entries(orig_zip);
for (int i = 0; i < orig_zip_entry_ct; i++) {
zip_entry_openbyindex(orig_zip, i);
const char *name = zip_entry_name(orig_zip);
if (mode == MODE::BUFFER && dst == NULL) throw "Specify destination path";

// Skip copying the original file
if (std::string(name) != std::string("word/document.xml")) {
// Read the old content
void *entry_buf;
size_t entry_buf_size;
zip_entry_read(orig_zip, &entry_buf, &entry_buf_size);
//see comment in include/duckx.hpp about writers
/*xml_string_writer writer;
this->document.print(writer);
char* new_buf = writer.result.c_str();*/

// Write into new zip
zip_entry_open(new_zip, name);
zip_entry_write(new_zip, entry_buf, entry_buf_size);
zip_entry_close(new_zip);

free(entry_buf);
if (dst == NULL) {
auto writer = make_shared<xml_string_writer>();
this->save_writers.push_back(writer);
this->document.print(*writer);
const char* new_buf = writer->result.c_str();
auto source = zip_source_buffer_create(new_buf, strlen(new_buf), 0, &this->zip_error);
auto idx = zip_name_locate(this->zip, "word/document.xml", 0);
zip_replace(this->zip, idx, source);
}
else {
xml_string_writer writer;
this->document.print(writer);
const char* new_buf = writer.result.c_str();
zip_t* dst_zip = zip_open(dst, ZIP_CREATE /*| ZIP_TRUNCATE*/, errorp);
for (int i = 0; i < zip_get_num_entries(this->zip, 0); ++i) {
auto name = zip_get_name(this->zip, i, 0);
if (string(name) != "word/document.xml") {
//TODO unnecessary decompression + compression, need to think of workaround
auto file = zip_fopen_index(this->zip, i, /*ZIP_FL_COMPRESSED*/ 0);
struct zip_stat sb;
zip_stat(this->zip, name, 0, &sb);
auto fsize = /*sb.comp_size*/ sb.size;
auto tmp_buf = malloc(fsize);
DEBUG(zip_fread(file, tmp_buf, fsize));
auto source = zip_source_buffer_create(tmp_buf, fsize, 1, &this->zip_error);
DEBUG(source);
DEBUG(zip_file_add(dst_zip, name, source, ZIP_FL_OVERWRITE));
zip_fclose(file);
}
}

zip_entry_close(orig_zip);
auto source = zip_source_buffer_create(new_buf, strlen(new_buf), 0, &this->zip_error);
zip_file_add(dst_zip, "word/document.xml", source, ZIP_FL_OVERWRITE);
zip_close(dst_zip);
}

// Close both zips
zip_close(orig_zip);
zip_close(new_zip);

// Remove original zip, rename new to correct name
remove(original_file.c_str());
rename(temp_file.c_str(), original_file.c_str());
}

duckx::Paragraph &duckx::Document::paragraphs() {
Expand Down