diff --git a/CHANGELOG.md b/CHANGELOG.md index 72c9702..c333884 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,71 +2,100 @@ -## [Unreleased](https://gitlab.com/html-validate/html-validate/compare/Unreleased) () +## [Unreleased](https://github.com/bsorrentino/pdf-tools/releases/tag/Unreleased) () +### Features + * add --pageseparator option ([193abdf18db7c8f](https://github.com/bsorrentino/pdf-tools/commit/193abdf18db7c8f4e9b2f741832eec2d939f5834)) + > issue #6 + + + +### Documentation + + - update changelog template ([16281aca47a4a38](https://github.com/bsorrentino/pdf-tools/commit/16281aca47a4a38d6c33c3aa6ee462be202a3ec2)) + + - **README.md** update ([63e8702a39bfe33](https://github.com/bsorrentino/pdf-tools/commit/63e8702a39bfe3320f2a588e25f20b4368bcbb61)) + > add option to enable page separator + > issue #6 +### ALM + + - **package.json** update commander ([96707880c125e4a](https://github.com/bsorrentino/pdf-tools/commit/96707880c125e4abebe9fc662e834dd8a23a569d)) + > update commander package to 10.0.1 + > issue #6 + + - **package.json** move to next release ([bf4d30b4c46ed7f](https://github.com/bsorrentino/pdf-tools/commit/bf4d30b4c46ed7f5a240d4aabb0fd8c6a43a024c)) + "name: v1.0.2" is a release tag -## [v1.0.2](https://gitlab.com/html-validate/html-validate/compare/v1.0.2) (2022-08-12) +## [v1.0.2](https://github.com/bsorrentino/pdf-tools/releases/tag/v1.0.2) (2022-08-12) ### Documentation - - update changelog template ([f6a41269349d4e0](https://gitlab.com/html-validate/html-validate/commit/f6a41269349d4e01832c8ee800142899f95ca3f7)) - - update readme ([5f17d3e9348a200](https://gitlab.com/html-validate/html-validate/commit/5f17d3e9348a20063d16b8840304b3ee116c281b)) + - update changelog template ([f6a41269349d4e0](https://github.com/bsorrentino/pdf-tools/commit/f6a41269349d4e01832c8ee800142899f95ca3f7)) + + - update readme ([5f17d3e9348a200](https://github.com/bsorrentino/pdf-tools/commit/5f17d3e9348a20063d16b8840304b3ee116c281b)) + ### ALM - - support 'git-changelog-command-line' to generate changelog ([a0141c23186fd52](https://gitlab.com/html-validate/html-validate/commit/a0141c23186fd5206e4f52fba86163d7c30a6592)) + - support 'git-changelog-command-line' to generate changelog ([a0141c23186fd52](https://github.com/bsorrentino/pdf-tools/commit/a0141c23186fd5206e4f52fba86163d7c30a6592)) + "name: v1.0.1" is a release tag -## [v1.0.1](https://gitlab.com/html-validate/html-validate/compare/v1.0.1) (2022-03-21) +## [v1.0.1](https://github.com/bsorrentino/pdf-tools/releases/tag/v1.0.1) (2022-03-21) ### Documentation - - update readme ([5a09c3f204f34cd](https://gitlab.com/html-validate/html-validate/commit/5a09c3f204f34cd3da39d090b3634d2da9e2304c)) + - update readme ([5a09c3f204f34cd](https://github.com/bsorrentino/pdf-tools/commit/5a09c3f204f34cd3da39d090b3634d2da9e2304c)) + "name: v1.0.0" is a release tag -## [v1.0.0](https://gitlab.com/html-validate/html-validate/compare/v1.0.0) (2022-03-21) +## [v1.0.0](https://github.com/bsorrentino/pdf-tools/releases/tag/v1.0.0) (2022-03-21) ### Documentation - - update changelog ([f665d3ef54917b9](https://gitlab.com/html-validate/html-validate/commit/f665d3ef54917b95dcd57bae0cf250cae9889437)) + - update changelog ([f665d3ef54917b9](https://github.com/bsorrentino/pdf-tools/commit/f665d3ef54917b95dcd57bae0cf250cae9889437)) + ### Refactor - - upgrade pdf-dist version, build on nodejs 16 and test also on macos M1 ([91952e262b18918](https://gitlab.com/html-validate/html-validate/commit/91952e262b189185068c2fe8e3bf46de942ca811)) + - upgrade pdf-dist version, build on nodejs 16 and test also on macos M1 ([91952e262b18918](https://github.com/bsorrentino/pdf-tools/commit/91952e262b189185068c2fe8e3bf46de942ca811)) + ### ALM - - configure standard-version changelog template ([e29d8b6153b9db4](https://gitlab.com/html-validate/html-validate/commit/e29d8b6153b9db47b489585930d89d3167810cf6)) + - configure standard-version changelog template ([e29d8b6153b9db4](https://github.com/bsorrentino/pdf-tools/commit/e29d8b6153b9db47b489585930d89d3167810cf6)) + "name: v0.5.2" is a release tag -## [v0.5.2](https://gitlab.com/html-validate/html-validate/compare/v0.5.2) (2022-03-16) +## [v0.5.2](https://github.com/bsorrentino/pdf-tools/releases/tag/v0.5.2) (2022-03-16) ### Bug Fixes - - read version from package.json ([5be86eb0a09221a](https://gitlab.com/html-validate/html-validate/commit/5be86eb0a09221ae8f66b53095bcdc82ddfdc55f)) + - read version from package.json ([5be86eb0a09221a](https://github.com/bsorrentino/pdf-tools/commit/5be86eb0a09221ae8f66b53095bcdc82ddfdc55f)) + @@ -74,24 +103,27 @@ "name: v0.5.1" is a release tag -## [v0.5.1](https://gitlab.com/html-validate/html-validate/compare/v0.5.1) (2022-03-16) +## [v0.5.1](https://github.com/bsorrentino/pdf-tools/releases/tag/v0.5.1) (2022-03-16) ### Bug Fixes - - fix typescript warning ([16a1d33e9d2f309](https://gitlab.com/html-validate/html-validate/commit/16a1d33e9d2f3093615212387a4d5adbdaa06b27)) + - fix typescript warning ([16a1d33e9d2f309](https://github.com/bsorrentino/pdf-tools/commit/16a1d33e9d2f3093615212387a4d5adbdaa06b27)) + ### ALM - - add zx support, add clean script, add files specs ([4fbb3de5d072290](https://gitlab.com/html-validate/html-validate/commit/4fbb3de5d0722900538366a0082478864669c6d3)) - - make commitizen compliant ([fd550c91c58ccf3](https://gitlab.com/html-validate/html-validate/commit/fd550c91c58ccf3df60f3e8879ebe371d480cdc3)) + - add zx support, add clean script, add files specs ([4fbb3de5d072290](https://github.com/bsorrentino/pdf-tools/commit/4fbb3de5d0722900538366a0082478864669c6d3)) + + - make commitizen compliant ([fd550c91c58ccf3](https://github.com/bsorrentino/pdf-tools/commit/fd550c91c58ccf3df60f3e8879ebe371d480cdc3)) + "name: v0.5.0" is a release tag -## [v0.5.0](https://gitlab.com/html-validate/html-validate/compare/v0.5.0) (2021-06-09) +## [v0.5.0](https://github.com/bsorrentino/pdf-tools/releases/tag/v0.5.0) (2021-06-09) @@ -101,7 +133,7 @@ "name: v0.4.0" is a release tag -## [v0.4.0](https://gitlab.com/html-validate/html-validate/compare/v0.4.0) (2021-04-21) +## [v0.4.0](https://github.com/bsorrentino/pdf-tools/releases/tag/v0.4.0) (2021-04-21) @@ -111,7 +143,7 @@ "name: v0.3.1" is a release tag -## [v0.3.1](https://gitlab.com/html-validate/html-validate/compare/v0.3.1) (2021-04-21) +## [v0.3.1](https://github.com/bsorrentino/pdf-tools/releases/tag/v0.3.1) (2021-04-21) @@ -121,7 +153,7 @@ "name: v0.3.0" is a release tag -## [v0.3.0](https://gitlab.com/html-validate/html-validate/compare/v0.3.0) (2021-04-21) +## [v0.3.0](https://github.com/bsorrentino/pdf-tools/releases/tag/v0.3.0) (2021-04-21) @@ -131,7 +163,7 @@ "name: v0.2.1" is a release tag -## [v0.2.1](https://gitlab.com/html-validate/html-validate/compare/v0.2.1) (2021-04-02) +## [v0.2.1](https://github.com/bsorrentino/pdf-tools/releases/tag/v0.2.1) (2021-04-02) @@ -141,7 +173,7 @@ "name: v0.2.0" is a release tag -## [v0.2.0](https://gitlab.com/html-validate/html-validate/compare/v0.2.0) (2021-02-15) +## [v0.2.0](https://github.com/bsorrentino/pdf-tools/releases/tag/v0.2.0) (2021-02-15) @@ -151,7 +183,7 @@ "name: v0.1.0" is a release tag -## [v0.1.0](https://gitlab.com/html-validate/html-validate/compare/v0.1.0) (2021-02-02) +## [v0.1.0](https://github.com/bsorrentino/pdf-tools/releases/tag/v0.1.0) (2021-02-02) @@ -161,7 +193,7 @@ "name: v0.0.5" is a release tag -## [v0.0.5](https://gitlab.com/html-validate/html-validate/compare/v0.0.5) (2020-12-18) +## [v0.0.5](https://github.com/bsorrentino/pdf-tools/releases/tag/v0.0.5) (2020-12-18) @@ -171,7 +203,7 @@ "name: v0.0.4" is a release tag -## [v0.0.4](https://gitlab.com/html-validate/html-validate/compare/v0.0.4) (2020-11-26) +## [v0.0.4](https://github.com/bsorrentino/pdf-tools/releases/tag/v0.0.4) (2020-11-26) @@ -181,7 +213,7 @@ "name: v0.0.3" is a release tag -## [v0.0.3](https://gitlab.com/html-validate/html-validate/compare/v0.0.3) (2020-11-25) +## [v0.0.3](https://github.com/bsorrentino/pdf-tools/releases/tag/v0.0.3) (2020-11-25) @@ -191,7 +223,7 @@ "name: v0.0.2" is a release tag -## [v0.0.2](https://gitlab.com/html-validate/html-validate/compare/v0.0.2) (2020-11-24) +## [v0.0.2](https://github.com/bsorrentino/pdf-tools/releases/tag/v0.0.2) (2020-11-24) diff --git a/README.md b/README.md index 16972a5..129c515 100644 --- a/README.md +++ b/README.md @@ -59,10 +59,11 @@ pdftools pdf2md|p2md [options] **Options:** ``` - --stats print stats information - --debug print debug information -``` - + -ps, --pageseparator [separator] add page separator (default: "---") + --imageurl [url prefix] imgage url prefix + --stats print stats information + --debug print debug information + ``` ---- ## Conversion to Markdown @@ -73,7 +74,7 @@ pdftools pdf2md|p2md [options] * Detect and extract images * Extract plain text * Extract fonts and allow custom mapping through a generated file `.font.json` - > Supported fonts **bold**, _italic_, `monspace`, **_bold+italic_** + > Supported fonts **bold**, _italic_, `monospace`, **_bold+italic_** * Detect code block ( i.e. ` ``` `) * Detect external link diff --git a/changelog.mustache b/changelog.mustache index dbcaaca..5a4c13c 100644 --- a/changelog.mustache +++ b/changelog.mustache @@ -6,14 +6,18 @@ "{{.}}" is a release tag {{/ifReleaseTag}} -## [{{name}}](https://gitlab.com/html-validate/html-validate/compare/{{name}}) ({{tagDate .}}) +## [{{name}}](https://github.com/bsorrentino/pdf-tools/releases/tag/{{name}}) ({{tagDate .}}) {{#ifContainsType commits type='feat'}} ### Features {{#commits}} {{#ifCommitType . type='feat'}} - - {{#eachCommitScope .}} **{{.}}** {{/eachCommitScope}} {{{commitDescription .}}} ([{{hash}}](https://gitlab.com/html-validate/html-validate/commit/{{hashFull}})) + * {{#eachCommitScope .}} **{{.}}** {{/eachCommitScope}} {{{commitDescription .}}} ([{{hash}}](https://github.com/bsorrentino/pdf-tools/commit/{{hashFull}})) + {{#messageBodyItems}} + > {{.}} + {{/messageBodyItems}} + {{/ifCommitType}} {{/commits}} {{/ifContainsType}} @@ -23,7 +27,11 @@ {{#commits}} {{#ifCommitType . type='fix'}} - - {{#eachCommitScope .}} **{{.}}** {{/eachCommitScope}} {{{commitDescription .}}} ([{{hash}}](https://gitlab.com/html-validate/html-validate/commit/{{hashFull}})) + - {{#eachCommitScope .}} **{{.}}** {{/eachCommitScope}} {{{commitDescription .}}} ([{{hash}}](https://github.com/bsorrentino/pdf-tools/commit/{{hashFull}})) + {{#messageBodyItems}} + > {{.}} + {{/messageBodyItems}} + {{/ifCommitType}} {{/commits}} {{/ifContainsType}} @@ -33,7 +41,11 @@ {{#commits}} {{#ifCommitType . type='docs'}} - - {{#eachCommitScope .}} **{{.}}** {{/eachCommitScope}} {{{commitDescription .}}} ([{{hash}}](https://gitlab.com/html-validate/html-validate/commit/{{hashFull}})) + - {{#eachCommitScope .}} **{{.}}** {{/eachCommitScope}} {{{commitDescription .}}} ([{{hash}}](https://github.com/bsorrentino/pdf-tools/commit/{{hashFull}})) + {{#messageBodyItems}} + > {{.}} + {{/messageBodyItems}} + {{/ifCommitType}} {{/commits}} {{/ifContainsType}} @@ -43,7 +55,11 @@ {{#commits}} {{#ifCommitType . type='refactor'}} - - {{#eachCommitScope .}} **{{.}}** {{/eachCommitScope}} {{{commitDescription .}}} ([{{hash}}](https://gitlab.com/html-validate/html-validate/commit/{{hashFull}})) + - {{#eachCommitScope .}} **{{.}}** {{/eachCommitScope}} {{{commitDescription .}}} ([{{hash}}](https://github.com/bsorrentino/pdf-tools/commit/{{hashFull}})) + {{#messageBodyItems}} + > {{.}} + {{/messageBodyItems}} + {{/ifCommitType}} {{/commits}} {{/ifContainsType}} @@ -53,7 +69,11 @@ {{#commits}} {{#ifCommitType . type='build'}} - - {{#eachCommitScope .}} **{{.}}** {{/eachCommitScope}} {{{commitDescription .}}} ([{{hash}}](https://gitlab.com/html-validate/html-validate/commit/{{hashFull}})) + - {{#eachCommitScope .}} **{{.}}** {{/eachCommitScope}} {{{commitDescription .}}} ([{{hash}}](https://github.com/bsorrentino/pdf-tools/commit/{{hashFull}})) + {{#messageBodyItems}} + > {{.}} + {{/messageBodyItems}} + {{/ifCommitType}} {{/commits}} {{/ifContainsType}} diff --git a/package-lock.json b/package-lock.json index 3d69369..5571154 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,16 +1,16 @@ { "name": "@bsorrentino/pdf-tools", - "version": "1.0.1", + "version": "1.1.0", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "@bsorrentino/pdf-tools", - "version": "1.0.1", + "version": "1.1.0", "license": "MIT", "dependencies": { "canvas": "^2.9.1", - "commander": "^9.1.0", + "commander": "^10.0.1", "enumify": "^2.0.0", "jimp": "^0.16.1", "pdfjs-dist": "^2.13.216" @@ -2182,11 +2182,11 @@ } }, "node_modules/commander": { - "version": "9.1.0", - "resolved": "https://registry.npmjs.org/commander/-/commander-9.1.0.tgz", - "integrity": "sha512-i0/MaqBtdbnJ4XQs4Pmyb+oFQl+q0lsAmokVUH92SlSw4fkeAcG3bVon+Qt7hmtF+u3Het6o4VgrcY3qAoEB6w==", + "version": "10.0.1", + "resolved": "https://registry.npmjs.org/commander/-/commander-10.0.1.tgz", + "integrity": "sha512-y4Mg2tXshplEbSGzx7amzPwKKOCGuoSRP/CjEdwwk0FOGlUbq6lKuoyDZTNZkmxHdJtp54hdfY/JUrdL7Xfdug==", "engines": { - "node": "^12.20.0 || >=14" + "node": ">=14" } }, "node_modules/concat-map": { @@ -7662,9 +7662,9 @@ } }, "commander": { - "version": "9.1.0", - "resolved": "https://registry.npmjs.org/commander/-/commander-9.1.0.tgz", - "integrity": "sha512-i0/MaqBtdbnJ4XQs4Pmyb+oFQl+q0lsAmokVUH92SlSw4fkeAcG3bVon+Qt7hmtF+u3Het6o4VgrcY3qAoEB6w==" + "version": "10.0.1", + "resolved": "https://registry.npmjs.org/commander/-/commander-10.0.1.tgz", + "integrity": "sha512-y4Mg2tXshplEbSGzx7amzPwKKOCGuoSRP/CjEdwwk0FOGlUbq6lKuoyDZTNZkmxHdJtp54hdfY/JUrdL7Xfdug==" }, "concat-map": { "version": "0.0.1", diff --git a/package.json b/package.json index 5f34966..f06975e 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@bsorrentino/pdf-tools", - "version": "1.0.2", + "version": "1.1.0", "description": "", "main": "index.js", "bin": { @@ -19,7 +19,7 @@ "license": "MIT", "dependencies": { "canvas": "^2.9.1", - "commander": "^9.1.0", + "commander": "^10.0.1", "enumify": "^2.0.0", "jimp": "^0.16.1", "pdfjs-dist": "^2.13.216" @@ -34,50 +34,5 @@ }, "engines": { "node": ">=16" - }, - "standard-version": { - "skip": { - "tag": true - }, - "types": [ - { - "type": "feat", - "section": "Features" - }, - { - "type": "fix", - "section": "Bug Fixes" - }, - { - "type": "chore", - "hidden": true - }, - { - "type": "docs", - "section": "Documentation" - }, - { - "type": "style", - "hidden": true - }, - { - "type": "refactor", - "section": "Refactoring" - }, - { - "type": "perf", - "hidden": true - }, - { - "type": "test", - "hidden": true - }, - { - "type": "build", - "section": "Build" - } - ], - "commitUrlFormat": "https://github.com/mokkapps/changelog-generator-demo/commits/{{hash}}", - "compareUrlFormat": "https://github.com/mokkapps/changelog-generator-demo/compare/{{previousTag}}...{{currentTag}}" } } diff --git a/samples/Example_Presentation.pdf b/samples/Example_Presentation.pdf new file mode 100644 index 0000000..08c9159 Binary files /dev/null and b/samples/Example_Presentation.pdf differ diff --git a/src/index.ts b/src/index.ts index a60c681..c19a733 100644 --- a/src/index.ts +++ b/src/index.ts @@ -167,17 +167,27 @@ export async function run() { program.command('pdf2md ') .description('convert pdf to markdown format.') .alias('p2md') + .option('-ps, --pageseparator [separator]', 'add page separator', '---') .option('--imageurl [url prefix]', 'imgage url prefix') .option('--stats', 'print stats information') .option('--debug', 'print debug information') .action(async (pdfPath, cmdobj) => { - + console.debug( cmdobj ) globals.outDir = await createFolderIfDoesntExist(choosePath( pdfPath, cmdobj)) if( cmdobj.imageurl) { globals.imageUrlPrefix = cmdobj.imageurl } + if( cmdobj.pageseparator ) { + if( typeof(cmdobj.pageseparator) === 'string' ) { + globals.pageSeparator = cmdobj.pageseparator + } + else { + globals.pageSeparator = '---' + } + } + globals.options.debug = cmdobj.debug globals.options.stats = cmdobj.stats diff --git a/src/pdf2md.global.ts b/src/pdf2md.global.ts index e2ab9ba..bdfd0d6 100644 --- a/src/pdf2md.global.ts +++ b/src/pdf2md.global.ts @@ -30,9 +30,10 @@ class Globals { private _options = { filler: false, debug: false, - stats: false + stats: false, } + pageSeparator?: string outDir: string get useImageDuplicateDetection() { return true } diff --git a/src/pdf2md.main.ts b/src/pdf2md.main.ts index 9f2ff43..7ce962d 100644 --- a/src/pdf2md.main.ts +++ b/src/pdf2md.main.ts @@ -64,7 +64,8 @@ export async function pdfToMarkdown(pdfPath: string) { } const content = pages.map( page => toMarkdown( page ) ) - .reduce( (result, pageText ) => result.concat(pageText), '') + .map( text => (globals.pageSeparator) ? `${text}\n\n${globals.pageSeparator}\n\n` : text ) + .reduce( (result, pageText ) => result.concat(pageText), '') await writeFile( outFile, content )