From 63c10642ce72fc40a81e48a158ee0ced86786ad4 Mon Sep 17 00:00:00 2001 From: Vitor Mattos Date: Sat, 29 Jan 2022 19:11:46 -0300 Subject: [PATCH 1/7] Bump phpunit --- composer.json | 5 +++-- phpunit.xml | 21 +++++++++++---------- tests/Driver/DefaultDriverTest.php | 2 ++ tests/Driver/Fpdi2DriverTest.php | 2 ++ tests/Driver/TcpdiDriverTest.php | 2 ++ tests/MergerTest.php | 2 ++ 6 files changed, 22 insertions(+), 12 deletions(-) diff --git a/composer.json b/composer.json index 195d0aa..56b69b1 100644 --- a/composer.json +++ b/composer.json @@ -29,7 +29,8 @@ "rafikhaceb/tcpdi": "*" }, "require-dev": { - "phpunit/phpunit": "^7|^8", - "smalot/pdfparser": "~0.13" + "phpunit/phpunit": "^9.5", + "smalot/pdfparser": "~0.13", + "phpspec/prophecy-phpunit": "^2.0" } } diff --git a/phpunit.xml b/phpunit.xml index db44d5a..f296f21 100644 --- a/phpunit.xml +++ b/phpunit.xml @@ -1,15 +1,16 @@ - + - ./tests - - - ./tests/ReadmeIntegration.php + tests - - - ./src - - diff --git a/tests/Driver/DefaultDriverTest.php b/tests/Driver/DefaultDriverTest.php index 7defcad..04db6ee 100644 --- a/tests/Driver/DefaultDriverTest.php +++ b/tests/Driver/DefaultDriverTest.php @@ -5,9 +5,11 @@ namespace iio\libmergepdf\Driver; use iio\libmergepdf\Source\SourceInterface; +use Prophecy\PhpUnit\ProphecyTrait; class DefaultDriverTest extends \PHPUnit\Framework\TestCase { + use ProphecyTrait; public function testMerge() { $wrapped = $this->prophesize(DriverInterface::CLASS); diff --git a/tests/Driver/Fpdi2DriverTest.php b/tests/Driver/Fpdi2DriverTest.php index 9649d0d..711b5f0 100644 --- a/tests/Driver/Fpdi2DriverTest.php +++ b/tests/Driver/Fpdi2DriverTest.php @@ -9,9 +9,11 @@ use iio\libmergepdf\Source\SourceInterface; use setasign\Fpdi\Tcpdf\Fpdi; use Prophecy\Argument; +use Prophecy\PhpUnit\ProphecyTrait; class Fpdi2DriverTest extends \PHPUnit\Framework\TestCase { + use ProphecyTrait; public function testExceptionOnInvalidFpdi() { $this->expectException(\InvalidArgumentException::CLASS); diff --git a/tests/Driver/TcpdiDriverTest.php b/tests/Driver/TcpdiDriverTest.php index aa7b285..88b40fb 100644 --- a/tests/Driver/TcpdiDriverTest.php +++ b/tests/Driver/TcpdiDriverTest.php @@ -7,9 +7,11 @@ use iio\libmergepdf\Source\SourceInterface; use iio\libmergepdf\Exception; use iio\libmergepdf\Pages; +use Prophecy\PhpUnit\ProphecyTrait; class TcpdiDriverTest extends \PHPUnit\Framework\TestCase { + use ProphecyTrait; public function testExceptionOnFailure() { $tcpdi = $this->prophesize(\TCPDI::CLASS); diff --git a/tests/MergerTest.php b/tests/MergerTest.php index a8e7a5a..a57c773 100644 --- a/tests/MergerTest.php +++ b/tests/MergerTest.php @@ -7,9 +7,11 @@ use iio\libmergepdf\Driver\DriverInterface; use iio\libmergepdf\Source\FileSource; use iio\libmergepdf\Source\RawSource; +use Prophecy\PhpUnit\ProphecyTrait; class MergerTest extends \PHPUnit\Framework\TestCase { + use ProphecyTrait; public function testAddRaw() { $pages = $this->createMock(PagesInterface::CLASS); From e0c92faca0fc9e6d34c43c2cf7d624033eda483b Mon Sep 17 00:00:00 2001 From: Vitor Mattos Date: Sat, 29 Jan 2022 19:23:08 -0300 Subject: [PATCH 2/7] Drop support to old versions of PHP to work with new version of PHPUnit --- .travis.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index be3c435..29d9a17 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,8 +6,6 @@ php: - 8.0 - 7.4 - 7.3 - - 7.2 - - 7.1 install: - composer install From 76459795080739a719f557d0408eb7b9e39310e5 Mon Sep 17 00:00:00 2001 From: Vitor Mattos Date: Sat, 29 Jan 2022 20:13:31 -0300 Subject: [PATCH 3/7] Run PHPUnit on github actions --- .github/workflows/phpunit.yml | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 .github/workflows/phpunit.yml diff --git a/.github/workflows/phpunit.yml b/.github/workflows/phpunit.yml new file mode 100644 index 0000000..16ff7d1 --- /dev/null +++ b/.github/workflows/phpunit.yml @@ -0,0 +1,34 @@ +name: PHPUnit + +on: + pull_request: + push: + branches: [ master ] + +jobs: + phpunit: + runs-on: ubuntu-latest + strategy: + # do not stop on another job's failure + fail-fast: false + matrix: + coverage: ['xdebug'] + xdebug_mode: ['debug'] + php-versions: ['7.3', '7.4', '8.0'] + name: php${{ matrix.php-versions }} + + steps: + - name: Set up php ${{ matrix.php-versions }} + uses: shivammathur/setup-php@v2 + with: + php-version: ${{ matrix.php-versions }} + coverage: ${{ matrix.coverage }} + tools: phpunit + + - name: Set up PHP dependencies + run: composer i + + - name: PHPUnit + env: + XDEBUG_MODE: ${{ matrix.xdebug_mode }} + run: ./vendor/phpunit/phpunit/phpunit -c phpunit.xml From 305e0b7a55cc6fe09e42476eeecc6cf023a6ef5a Mon Sep 17 00:00:00 2001 From: Vitor Mattos Date: Sat, 29 Jan 2022 21:16:49 -0300 Subject: [PATCH 4/7] Move CI from travis to GitHub Actions --- .github/workflows/behat.yml | 38 +++++++++++++++++++++++++++++++++++ .github/workflows/phpunit.yml | 18 ++++++++++++----- .travis.yml | 17 ---------------- composer.json | 3 ++- 4 files changed, 53 insertions(+), 23 deletions(-) create mode 100644 .github/workflows/behat.yml delete mode 100644 .travis.yml diff --git a/.github/workflows/behat.yml b/.github/workflows/behat.yml new file mode 100644 index 0000000..9d51e34 --- /dev/null +++ b/.github/workflows/behat.yml @@ -0,0 +1,38 @@ +name: Behat + +on: + pull_request: + push: + branches: [ master ] + +jobs: + phpunit: + runs-on: ubuntu-latest + strategy: + matrix: + php-versions: ['7.3', '7.4', '8.0'] + name: php${{ matrix.php-versions }} + + steps: + - uses: actions/checkout@v2 + + - name: Set up php ${{ matrix.php-versions }} + uses: shivammathur/setup-php@v2 + with: + php-version: ${{ matrix.php-versions }} + coverage: ${{ matrix.coverage }} + + - name: Cache Composer packages + id: composer-cache + uses: actions/cache@v2 + with: + path: vendor + key: ${{ runner.os }}-php-${{ hashFiles('**/composer.lock') }} + restore-keys: | + ${{ runner.os }}-php- + + - name: Set up PHP dependencies + run: composer i + + - name: Behat + run: vendor/bin/behat diff --git a/.github/workflows/phpunit.yml b/.github/workflows/phpunit.yml index 16ff7d1..15dc731 100644 --- a/.github/workflows/phpunit.yml +++ b/.github/workflows/phpunit.yml @@ -9,8 +9,6 @@ jobs: phpunit: runs-on: ubuntu-latest strategy: - # do not stop on another job's failure - fail-fast: false matrix: coverage: ['xdebug'] xdebug_mode: ['debug'] @@ -18,15 +16,25 @@ jobs: name: php${{ matrix.php-versions }} steps: + - uses: actions/checkout@v2 + - name: Set up php ${{ matrix.php-versions }} uses: shivammathur/setup-php@v2 with: php-version: ${{ matrix.php-versions }} coverage: ${{ matrix.coverage }} - tools: phpunit - - name: Set up PHP dependencies - run: composer i + - name: Cache Composer packages + id: composer-cache + uses: actions/cache@v2 + with: + path: vendor + key: ${{ runner.os }}-php-${{ hashFiles('**/composer.lock') }} + restore-keys: | + ${{ runner.os }}-php- + + - name: Install dependencies + run: composer install --prefer-dist --no-progress - name: PHPUnit env: diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 29d9a17..0000000 --- a/.travis.yml +++ /dev/null @@ -1,17 +0,0 @@ -sudo: false - -language: php - -php: - - 8.0 - - 7.4 - - 7.3 - -install: - - composer install - - composer global require behat/behat - - export PATH=$PATH:$HOME/.config/composer/vendor/bin/ - -script: - - vendor/bin/phpunit - - behat diff --git a/composer.json b/composer.json index 56b69b1..c4be533 100644 --- a/composer.json +++ b/composer.json @@ -22,7 +22,8 @@ "require": { "php": "^7.1||^8.0", "tecnickcom/tcpdf": "^6.2.22", - "setasign/fpdi": "^2" + "setasign/fpdi": "^2", + "behat/behat": "^3.10" }, "conflict": { "setasign/fpdf": "*", From 8f14fd75aaf1e28575258ba424ba1f52d05cc394 Mon Sep 17 00:00:00 2001 From: Vitor Mattos Date: Sat, 29 Jan 2022 21:23:14 -0300 Subject: [PATCH 5/7] Run CI on PHP 8.1 --- .github/workflows/behat.yml | 2 +- .github/workflows/phpunit.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/behat.yml b/.github/workflows/behat.yml index 9d51e34..295bba5 100644 --- a/.github/workflows/behat.yml +++ b/.github/workflows/behat.yml @@ -10,7 +10,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - php-versions: ['7.3', '7.4', '8.0'] + php-versions: ['7.3', '7.4', '8.0', '8.1'] name: php${{ matrix.php-versions }} steps: diff --git a/.github/workflows/phpunit.yml b/.github/workflows/phpunit.yml index 15dc731..38434de 100644 --- a/.github/workflows/phpunit.yml +++ b/.github/workflows/phpunit.yml @@ -12,7 +12,7 @@ jobs: matrix: coverage: ['xdebug'] xdebug_mode: ['debug'] - php-versions: ['7.3', '7.4', '8.0'] + php-versions: ['7.3', '7.4', '8.0', '8.1'] name: php${{ matrix.php-versions }} steps: From 36cc6a8e013fbe390b71d2388a6e4c8b9d0c684e Mon Sep 17 00:00:00 2001 From: Vitor Mattos Date: Sun, 30 Jan 2022 15:28:11 -0300 Subject: [PATCH 6/7] Moved behat to require-dev --- composer.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/composer.json b/composer.json index c4be533..27e5cc7 100644 --- a/composer.json +++ b/composer.json @@ -22,8 +22,7 @@ "require": { "php": "^7.1||^8.0", "tecnickcom/tcpdf": "^6.2.22", - "setasign/fpdi": "^2", - "behat/behat": "^3.10" + "setasign/fpdi": "^2" }, "conflict": { "setasign/fpdf": "*", @@ -32,6 +31,7 @@ "require-dev": { "phpunit/phpunit": "^9.5", "smalot/pdfparser": "~0.13", - "phpspec/prophecy-phpunit": "^2.0" + "phpspec/prophecy-phpunit": "^2.0", + "behat/behat": "^3.10" } } From 4304a115056e595725e6105a42e47cebe0416ed2 Mon Sep 17 00:00:00 2001 From: Vitor Mattos Date: Sun, 30 Jan 2022 15:52:29 -0300 Subject: [PATCH 7/7] Move tcpdi_parser to package --- composer.json | 3 +- tcpdi/tcpdi.php | 6 +- tcpdi/tcpdi_parser.php | 1450 ---------------------------------------- 3 files changed, 5 insertions(+), 1454 deletions(-) delete mode 100644 tcpdi/tcpdi_parser.php diff --git a/composer.json b/composer.json index 27e5cc7..62d09e8 100644 --- a/composer.json +++ b/composer.json @@ -22,7 +22,8 @@ "require": { "php": "^7.1||^8.0", "tecnickcom/tcpdf": "^6.2.22", - "setasign/fpdi": "^2" + "setasign/fpdi": "^2", + "libresign/tcpdi_parser": "^0.4.0" }, "conflict": { "setasign/fpdf": "*", diff --git a/tcpdi/tcpdi.php b/tcpdi/tcpdi.php index f210d42..3200ddb 100644 --- a/tcpdi/tcpdi.php +++ b/tcpdi/tcpdi.php @@ -1,4 +1,7 @@ . -// -// See LICENSE file for more information. -// ------------------------------------------------------------------- -// -// Description : This is a PHP class for parsing PDF documents. -// -//============================================================+ - -/** - * @file - * This is a PHP class for parsing PDF documents.
- * @author Paul Nicholls - * @author Nicola Asuni - * @version 1.1 - */ - -if (!defined ('PDF_TYPE_NULL')) - define ('PDF_TYPE_NULL', 0); -if (!defined ('PDF_TYPE_NUMERIC')) - define ('PDF_TYPE_NUMERIC', 1); -if (!defined ('PDF_TYPE_TOKEN')) - define ('PDF_TYPE_TOKEN', 2); -if (!defined ('PDF_TYPE_HEX')) - define ('PDF_TYPE_HEX', 3); -if (!defined ('PDF_TYPE_STRING')) - define ('PDF_TYPE_STRING', 4); -if (!defined ('PDF_TYPE_DICTIONARY')) - define ('PDF_TYPE_DICTIONARY', 5); -if (!defined ('PDF_TYPE_ARRAY')) - define ('PDF_TYPE_ARRAY', 6); -if (!defined ('PDF_TYPE_OBJDEC')) - define ('PDF_TYPE_OBJDEC', 7); -if (!defined ('PDF_TYPE_OBJREF')) - define ('PDF_TYPE_OBJREF', 8); -if (!defined ('PDF_TYPE_OBJECT')) - define ('PDF_TYPE_OBJECT', 9); -if (!defined ('PDF_TYPE_STREAM')) - define ('PDF_TYPE_STREAM', 10); -if (!defined ('PDF_TYPE_BOOLEAN')) - define ('PDF_TYPE_BOOLEAN', 11); -if (!defined ('PDF_TYPE_REAL')) - define ('PDF_TYPE_REAL', 12); - -/** - * @class tcpdi_parser - * This is a PHP class for parsing PDF documents.
- * Based on TCPDF_PARSER, part of the TCPDF project by Nicola Asuni. - * @brief This is a PHP class for parsing PDF documents.. - * @version 1.1 - * @author Paul Nicholls - github.com/pauln - * @author Nicola Asuni - info@tecnick.com - */ -class tcpdi_parser { - /** - * Unique parser ID - * @public - */ - public $uniqueid = ''; - - /** - * Raw content of the PDF document. - * @private - */ - private $pdfdata = ''; - - /** - * XREF data. - * @protected - */ - protected $xref = array(); - - /** - * Object streams. - * @protected - */ - protected $objstreams = array(); - - /** - * Objects in objstreams. - * @protected - */ - protected $objstreamobjs = array(); - - /** - * List of seen XREF data locations. - * @protected - */ - protected $xref_seen_offsets = array(); - - /** - * Array of PDF objects. - * @protected - */ - protected $objects = array(); - - /** - * Array of object offsets. - * @private - */ - private $objoffsets = array(); - - /** - * Class object for decoding filters. - * @private - */ - private $FilterDecoders; - - /** - * Pages - * - * @private array - */ - private $pages; - - /** - * Page count - * @private integer - */ - private $page_count; - - /** - * actual page number - * @private integer - */ - private $pageno; - - /** - * PDF version of the loaded document - * @private string - */ - private $pdfVersion; - - /** - * Available BoxTypes - * - * @public array - */ - public $availableBoxes = array('/MediaBox', '/CropBox', '/BleedBox', '/TrimBox', '/ArtBox'); - -// ----------------------------------------------------------------------------- - - /** - * Parse a PDF document an return an array of objects. - * @param $data (string) PDF data to parse. - * @public - * @since 1.0.000 (2011-05-24) - */ - public function __construct($data, $uniqueid) { - if (empty($data)) { - $this->Error('Empty PDF data.'); - } - $this->uniqueid = $uniqueid; - $this->pdfdata = $data; - // get length - $pdflen = strlen($this->pdfdata); - // initialize class for decoding filters - $this->FilterDecoders = new TCPDF_FILTERS(); - // get xref and trailer data - $this->xref = $this->getXrefData(); - $this->findObjectOffsets(); - // parse all document objects - $this->objects = array(); - /*foreach ($this->xref['xref'] as $obj => $offset) { - if (!isset($this->objects[$obj]) AND ($offset > 0)) { - // decode only objects with positive offset - //$this->objects[$obj] = $this->getIndirectObject($obj, $offset, true); - } - }*/ - $this->getPDFVersion(); - $this->readPages(); - } - - /** - * Clean up when done, to free memory etc - */ - public function cleanUp() { - unset($this->pdfdata); - $this->pdfdata = ''; - unset($this->objstreams); - $this->objstreams = array(); - unset($this->objects); - $this->objects = array(); - unset($this->objstreamobjs); - $this->objstreamobjs = array(); - unset($this->xref); - $this->xref = array(); - unset($this->objoffsets); - $this->objoffsets = array(); - unset($this->pages); - $this->pages = array(); - } - - /** - * Return an array of parsed PDF document objects. - * @return (array) Array of parsed PDF document objects. - * @public - * @since 1.0.000 (2011-06-26) - */ - public function getParsedData() { - return array($this->xref, $this->objects, $this->pages); - } - - /** - * Get PDF-Version - * - * And reset the PDF Version used in FPDI if needed - * @public - */ - public function getPDFVersion() { - preg_match('/\d\.\d/', substr($this->pdfdata, 0, 16), $m); - if (isset($m[0])) - $this->pdfVersion = $m[0]; - return $this->pdfVersion; - } - - /** - * Read all /Page(es) - * - */ - function readPages() { - $params = $this->getObjectVal($this->xref['trailer'][1]['/Root']); - $objref = null; - if ($params && $params[1] && is_array($params[1][1])) { - foreach ($params[1][1] as $k=>$v) { - if ($k == '/Pages') { - $objref = $v; - break; - } - } - } - if ($objref == null || $objref[0] !== PDF_TYPE_OBJREF) { - // Offset not found. - return; - } - - $dict = $this->getObjectVal($objref); - if ($dict[0] == PDF_TYPE_OBJECT && $dict[1][0] == PDF_TYPE_DICTIONARY) { - // Dict wrapped in an object - $dict = $dict[1]; - } - - if ($dict[0] !== PDF_TYPE_DICTIONARY) { - return; - } - - $this->pages = array(); - if (isset($dict[1]['/Kids'])) { - $v = $dict[1]['/Kids']; - if ($v[0] == PDF_TYPE_ARRAY) { - foreach ($v[1] as $ref) { - $page = $this->getObjectVal($ref); - $this->readPage($page); - } - } - } - - $this->page_count = count($this->pages); - } - - /** - * Read a single /Page element, recursing through /Kids if necessary - * - */ - private function readPage($page) { - if (isset($page[1][1]['/Kids'])) { - // Nested pages! - foreach ($page[1][1]['/Kids'][1] as $subref) { - $subpage = $this->getObjectVal($subref); - $this->readPage($subpage); - } - } else { - $this->pages[] = $page; - } - } - - /** - * Get pagecount from sourcefile - * - * @return int - */ - function getPageCount() { - return $this->page_count; - } - - /** - * Get Cross-Reference (xref) table and trailer data from PDF document data. - * @param $offset (int) xref offset (if know). - * @param $xref (array) previous xref array (if any). - * @return Array containing xref and trailer data. - * @protected - * @since 1.0.000 (2011-05-24) - */ - protected function getXrefData($offset=0, $xref=array()) { - if ($offset == 0) { - // find last startxref - if (preg_match('/.*[\r\n]startxref[\s\r\n]+([0-9]+)[\s\r\n]+%%EOF/is', $this->pdfdata, $matches) == 0) { - $this->Error('Unable to find startxref'); - } - $startxref = $matches[1]; - } else { - if (preg_match('/([0-9]+[\s][0-9]+[\s]obj)/i', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset)) { - // Cross-Reference Stream object - $startxref = $offset; - } elseif (preg_match('/[\r\n]startxref[\s\r\n]+([0-9]+)[\s\r\n]+%%EOF/i', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset)) { - // startxref found - $startxref = $matches[1][0]; - } else { - $this->Error('Unable to find startxref'); - } - } - unset($matches); - - // DOMPDF gets the startxref wrong, giving us the linebreak before the xref starts. - $startxref += strspn($this->pdfdata, "\r\n", $startxref); - - // check xref position - if (strpos($this->pdfdata, 'xref', $startxref) == $startxref) { - // Cross-Reference - $xref = $this->decodeXref($startxref, $xref); - } else { - // Cross-Reference Stream - $xref = $this->decodeXrefStream($startxref, $xref); - } - if (empty($xref)) { - $this->Error('Unable to find xref'); - } - - return $xref; - } - - /** - * Decode the Cross-Reference section - * @param $startxref (int) Offset at which the xref section starts. - * @param $xref (array) Previous xref array (if any). - * @return Array containing xref and trailer data. - * @protected - * @since 1.0.000 (2011-06-20) - */ - protected function decodeXref($startxref, $xref=array()) { - $this->xref_seen_offsets[] = $startxref; - if (!isset($xref['xref_location'])) { - $xref['xref_location'] = $startxref; - $xref['max_object'] = 0; - } - // extract xref data (object indexes and offsets) - $xoffset = $startxref + 5; - // initialize object number - $obj_num = 0; - $offset = $xoffset; - while (preg_match('/^([0-9]+)[\s]([0-9]+)[\s]?([nf]?)/im', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset) > 0) { - $offset = (strlen($matches[0][0]) + $matches[0][1]); - if ($matches[3][0] == 'n') { - // create unique object index: [object number]_[generation number] - $gen_num = intval($matches[2][0]); - $index = $obj_num.'_'.$gen_num; - // check if object already exist - if (!isset($xref['xref'][$obj_num][$gen_num])) { - // store object offset position - $xref['xref'][$obj_num][$gen_num] = intval($matches[1][0]); - } - ++$obj_num; - $offset += 2; - } elseif ($matches[3][0] == 'f') { - ++$obj_num; - $offset += 2; - } else { - // object number (index) - $obj_num = intval($matches[1][0]); - } - } - unset($matches); - $xref['max_object'] = max($xref['max_object'], $obj_num); - // get trailer data - if (preg_match('/trailer[\s]*<<(.*)>>[\s\r\n]+(?:[%].*[\r\n]+)*startxref[\s\r\n]+/isU', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $xoffset) > 0) { - $trailer_data = $matches[1][0]; - if (!isset($xref['trailer']) OR empty($xref['trailer'])) { - // get only the last updated version - $xref['trailer'] = array(); - $xref['trailer'][0] = PDF_TYPE_DICTIONARY; - $xref['trailer'][1] = array(); - // parse trailer_data - if (preg_match('/Size[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) { - $xref['trailer'][1]['/Size'] = array(PDF_TYPE_NUMERIC, intval($matches[1])); - } - if (preg_match('/Root[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) { - $xref['trailer'][1]['/Root'] = array(PDF_TYPE_OBJREF, intval($matches[1]), intval($matches[2])); - } - if (preg_match('/Encrypt[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) { - $xref['trailer'][1]['/Encrypt'] = array(PDF_TYPE_OBJREF, intval($matches[1]), intval($matches[2])); - } - if (preg_match('/Info[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) { - $xref['trailer'][1]['/Info'] = array(PDF_TYPE_OBJREF, intval($matches[1]), intval($matches[2])); - } - if (preg_match('/ID[\s]*[\[][\s]*[<]([^>]*)[>][\s]*[<]([^>]*)[>]/i', $trailer_data, $matches) > 0) { - $xref['trailer'][1]['/ID'] = array(PDF_TYPE_ARRAY, array()); - $xref['trailer'][1]['/ID'][1][0] = array(PDF_TYPE_HEX, $matches[1]); - $xref['trailer'][1]['/ID'][1][1] = array(PDF_TYPE_HEX, $matches[2]); - } - } - if (preg_match('/Prev[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) { - // get previous xref - $prevoffset = intval($matches[1]); - if (!in_array($prevoffset, $this->xref_seen_offsets)) { - $this->xref_seen_offsets[] = $prevoffset; - $xref = $this->getXrefData($prevoffset, $xref); - } - } - unset($matches); - } else { - $this->Error('Unable to find trailer'); - } - return $xref; - } - - /** - * Decode the Cross-Reference Stream section - * @param $startxref (int) Offset at which the xref section starts. - * @param $xref (array) Previous xref array (if any). - * @return Array containing xref and trailer data. - * @protected - * @since 1.0.003 (2013-03-16) - */ - protected function decodeXrefStream($startxref, $xref=array()) { - // try to read Cross-Reference Stream - list($xrefobj, $unused) = $this->getRawObject($startxref); - $xrefcrs = $this->getIndirectObject($xrefobj[1], $startxref, true); - if (!isset($xref['xref_location'])) { - $xref['xref_location'] = $startxref; - $xref['max_object'] = 0; - } - if (!isset($xref['xref'])) { - $xref['xref'] = array(); - } - if (!isset($xref['trailer']) OR empty($xref['trailer'])) { - // get only the last updated version - $xref['trailer'] = array(); - $xref['trailer'][0] = PDF_TYPE_DICTIONARY; - $xref['trailer'][1] = array(); - $filltrailer = true; - } else { - $filltrailer = false; - } - $valid_crs = false; - $sarr = $xrefcrs[0][1]; - $keys = array_keys($sarr); - $columns = 1; // Default as per PDF 32000-1:2008. - $predictor = 1; // Default as per PDF 32000-1:2008. - foreach ($keys as $k=>$key) { - $v = $sarr[$key]; - if (($key == '/Type') AND ($v[0] == PDF_TYPE_TOKEN AND ($v[1] == 'XRef'))) { - $valid_crs = true; - } elseif (($key == '/Index') AND ($v[0] == PDF_TYPE_ARRAY AND count($v[1]) >= 2)) { - // first object number in the subsection - $index_first = intval($v[1][0][1]); - // number of entries in the subsection - $index_entries = intval($v[1][1][1]); - } elseif (($key == '/Prev') AND ($v[0] == PDF_TYPE_NUMERIC)) { - // get previous xref offset - $prevxref = intval($v[1]); - } elseif (($key == '/W') AND ($v[0] == PDF_TYPE_ARRAY)) { - // number of bytes (in the decoded stream) of the corresponding field - $wb = array(); - $wb[0] = intval($v[1][0][1]); - $wb[1] = intval($v[1][1][1]); - $wb[2] = intval($v[1][2][1]); - } elseif (($key == '/DecodeParms') AND ($v[0] == PDF_TYPE_DICTIONARY)) { - $decpar = $v[1]; - foreach ($decpar as $kdc => $vdc) { - if (($kdc == '/Columns') AND ($vdc[0] == PDF_TYPE_NUMERIC)) { - $columns = intval($vdc[1]); - } elseif (($kdc == '/Predictor') AND ($vdc[0] == PDF_TYPE_NUMERIC)) { - $predictor = intval($vdc[1]); - } - } - } elseif ($filltrailer) { - switch($key) { - case '/Size': - case '/Root': - case '/Info': - case '/ID': - $xref['trailer'][1][$key] = $v; - break; - default: - break; - } - } - } - // decode data - $obj_num = 0; - if ($valid_crs AND isset($xrefcrs[1][3][0])) { - // number of bytes in a row - $rowlen = ($columns + 1); - // convert the stream into an array of integers - $sdata = unpack('C*', $xrefcrs[1][3][0]); - // split the rows - $sdata = array_chunk($sdata, $rowlen); - // initialize decoded array - $ddata = array(); - // initialize first row with zeros - $prev_row = array_fill (0, $rowlen, 0); - // for each row apply PNG unpredictor - foreach ($sdata as $k => $row) { - // initialize new row - $ddata[$k] = array(); - // get PNG predictor value - if (empty($predictor)) { - $predictor = (10 + $row[0]); - } - // for each byte on the row - for ($i=1; $i<=$columns; ++$i) { - if (!isset($row[$i])) { - // No more data in this row - we're done here. - break; - } - // new index - $j = ($i - 1); - $row_up = $prev_row[$j]; - if ($i == 1) { - $row_left = 0; - $row_upleft = 0; - } else { - $row_left = $row[($i - 1)]; - $row_upleft = $prev_row[($j - 1)]; - } - switch ($predictor) { - case 1: // No prediction (equivalent to PNG None) - case 10: { // PNG prediction (on encoding, PNG None on all rows) - $ddata[$k][$j] = $row[$i]; - break; - } - case 11: { // PNG prediction (on encoding, PNG Sub on all rows) - $ddata[$k][$j] = (($row[$i] + $row_left) & 0xff); - break; - } - case 12: { // PNG prediction (on encoding, PNG Up on all rows) - $ddata[$k][$j] = (($row[$i] + $row_up) & 0xff); - break; - } - case 13: { // PNG prediction (on encoding, PNG Average on all rows) - $ddata[$k][$j] = (($row[$i] + (($row_left + $row_up) / 2)) & 0xff); - break; - } - case 14: { // PNG prediction (on encoding, PNG Paeth on all rows) - // initial estimate - $p = ($row_left + $row_up - $row_upleft); - // distances - $pa = abs($p - $row_left); - $pb = abs($p - $row_up); - $pc = abs($p - $row_upleft); - $pmin = min($pa, $pb, $pc); - // return minumum distance - switch ($pmin) { - case $pa: { - $ddata[$k][$j] = (($row[$i] + $row_left) & 0xff); - break; - } - case $pb: { - $ddata[$k][$j] = (($row[$i] + $row_up) & 0xff); - break; - } - case $pc: { - $ddata[$k][$j] = (($row[$i] + $row_upleft) & 0xff); - break; - } - } - break; - } - default: { // PNG prediction (on encoding, PNG optimum) - $this->Error("Unknown PNG predictor $predictor"); - break; - } - } - } - $prev_row = $ddata[$k]; - } // end for each row - // complete decoding - unset($sdata); - $sdata = array(); - // for every row - foreach ($ddata as $k => $row) { - // initialize new row - $sdata[$k] = array(0, 0, 0); - if ($wb[0] == 0) { - // default type field - $sdata[$k][0] = 1; - } - $i = 0; // count bytes on the row - // for every column - for ($c = 0; $c < 3; ++$c) { - // for every byte on the column - for ($b = 0; $b < $wb[$c]; ++$b) { - if (isset($row[$i])) { - $sdata[$k][$c] += ($row[$i] << (($wb[$c] - 1 - $b) * 8)); - } - ++$i; - } - } - } - unset($ddata); - // fill xref - if (isset($index_first)) { - $obj_num = $index_first; - } else { - $obj_num = 0; - } - foreach ($sdata as $k => $row) { - switch ($row[0]) { - case 0: { // (f) linked list of free objects - ++$obj_num; - break; - } - case 1: { // (n) objects that are in use but are not compressed - // create unique object index: [object number]_[generation number] - $index = $obj_num.'_'.$row[2]; - // check if object already exist - if (!isset($xref['xref'][$obj_num][$row[2]])) { - // store object offset position - $xref['xref'][$obj_num][$row[2]] = $row[1]; - } - ++$obj_num; - break; - } - case 2: { // compressed objects - // $row[1] = object number of the object stream in which this object is stored - // $row[2] = index of this object within the object stream - /*$index = $row[1].'_0_'.$row[2]; - $xref['xref'][$row[1]][0][$row[2]] = -1;*/ - break; - } - default: { // null objects - break; - } - } - } - } // end decoding data - $xref['max_object'] = max($xref['max_object'], $obj_num); - if (isset($prevxref)) { - // get previous xref - $xref = $this->getXrefData($prevxref, $xref); - } - return $xref; - } - - /** - * Get raw stream data - * @param $offset (int) Stream offset. - * @param $length (int) Stream length. - * @return string Steam content - * @protected - */ - protected function getRawStream($offset, $length) { - $offset += strspn($this->pdfdata, "\x00\x09\x0a\x0c\x0d\x20", $offset); - $offset += 6; // "stream" - $offset += strspn($this->pdfdata, "\x20", $offset); - $offset += strspn($this->pdfdata, "\r\n", $offset); - - $obj = array(); - $obj[] = PDF_TYPE_STREAM; - $obj[] = substr($this->pdfdata, $offset, $length); - - return array($obj, $offset+$length); - } - - /** - * Get object type, raw value and offset to next object - * @param $offset (int) Object offset. - * @return array containing object type, raw value and offset to next object - * @protected - * @since 1.0.000 (2011-06-20) - */ - protected function getRawObject($offset=0, $data=null) { - if ($data == null) { - $data =& $this->pdfdata; - } - $objtype = ''; // object type to be returned - $objval = ''; // object value to be returned - // skip initial white space chars: \x00 null (NUL), \x09 horizontal tab (HT), \x0A line feed (LF), \x0C form feed (FF), \x0D carriage return (CR), \x20 space (SP) - while (strspn($data[$offset], "\x00\x09\x0a\x0c\x0d\x20") == 1) { - $offset++; - } - // get first char - $char = $data[$offset]; - // get object type - switch ($char) { - case '%': { // \x25 PERCENT SIGN - // skip comment and search for next token - $next = strcspn($data, "\r\n", $offset); - if ($next > 0) { - $offset += $next; - list($obj, $unused) = $this->getRawObject($offset, $data); - return $obj; - } - break; - } - case '/': { // \x2F SOLIDUS - // name object - $objtype = PDF_TYPE_TOKEN; - ++$offset; - $length = strcspn($data, "\x00\x09\x0a\x0c\x0d\x20\x28\x29\x3c\x3e\x5b\x5d\x7b\x7d\x2f\x25", $offset); - $objval = substr($data, $offset, $length); - $offset += $length; - break; - } - case '(': // \x28 LEFT PARENTHESIS - case ')': { // \x29 RIGHT PARENTHESIS - // literal string object - $objtype = PDF_TYPE_STRING; - ++$offset; - $strpos = $offset; - if ($char == '(') { - $open_bracket = 1; - while ($open_bracket > 0) { - if (!isset($data[$strpos])) { - break; - } - $ch = $data[$strpos]; - switch ($ch) { - case '\\': { // REVERSE SOLIDUS (5Ch) (Backslash) - // skip next character - ++$strpos; - break; - } - case '(': { // LEFT PARENHESIS (28h) - ++$open_bracket; - break; - } - case ')': { // RIGHT PARENTHESIS (29h) - --$open_bracket; - break; - } - } - ++$strpos; - } - $objval = substr($data, $offset, ($strpos - $offset - 1)); - $offset = $strpos; - } - break; - } - case '[': // \x5B LEFT SQUARE BRACKET - case ']': { // \x5D RIGHT SQUARE BRACKET - // array object - $objtype = PDF_TYPE_ARRAY; - ++$offset; - if ($char == '[') { - // get array content - $objval = array(); - do { - // get element - list($element, $offset) = $this->getRawObject($offset, $data); - $objval[] = $element; - } while ($element[0] !== ']'); - // remove closing delimiter - array_pop($objval); - } else { - $objtype = ']'; - } - break; - } - case '<': // \x3C LESS-THAN SIGN - case '>': { // \x3E GREATER-THAN SIGN - if (isset($data[($offset + 1)]) AND ($data[($offset + 1)] == $char)) { - // dictionary object - $objtype = PDF_TYPE_DICTIONARY; - if ($char == '<') { - list ($objval, $offset) = $this->getDictValue($offset, $data); - } else { - $objtype = '>>'; - $offset += 2; - } - } else { - // hexadecimal string object - $objtype = PDF_TYPE_HEX; - ++$offset; - // The "Panose" entry in the FontDescriptor Style dict seems to have hex bytes separated by spaces. - if (($char == '<') AND (preg_match('/^([0-9A-Fa-f ]+)[>]/iU', substr($data, $offset), $matches) == 1)) { - $objval = $matches[1]; - $offset += strlen($matches[0]); - unset($matches); - } - } - break; - } - default: { - $frag = $data[$offset] . @$data[$offset+1] . @$data[$offset+2] . @$data[$offset+3]; - switch ($frag) { - case 'endo': - // indirect object - $objtype = 'endobj'; - $offset += 6; - break; - case 'stre': - // Streams should always be indirect objects, and thus processed by getRawStream(). - // If we get here, treat it as a null object as something has gone wrong. - case 'null': - // null object - $objtype = PDF_TYPE_NULL; - $offset += 4; - $objval = 'null'; - break; - case 'true': - // boolean true object - $objtype = PDF_TYPE_BOOLEAN; - $offset += 4; - $objval = true; - break; - case 'fals': - // boolean false object - $objtype = PDF_TYPE_BOOLEAN; - $offset += 5; - $objval = false; - break; - case 'ends': - // end stream object - $objtype = 'endstream'; - $offset += 9; - break; - default: - if (preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+([Robj]{1,3})/i', substr($data, $offset, 33), $matches) == 1) { - if ($matches[3] == 'R') { - // indirect object reference - $objtype = PDF_TYPE_OBJREF; - $offset += strlen($matches[0]); - $objval = array(intval($matches[1]), intval($matches[2])); - } elseif ($matches[3] == 'obj') { - // object start - $objtype = PDF_TYPE_OBJECT; - $objval = intval($matches[1]).'_'.intval($matches[2]); - $offset += strlen ($matches[0]); - } - } elseif (($numlen = strspn($data, '+-.0123456789', $offset)) > 0) { - // numeric object - $objval = substr($data, $offset, $numlen); - $objtype = (intval($objval) != $objval) ? PDF_TYPE_REAL : PDF_TYPE_NUMERIC; - $offset += $numlen; - } - unset($matches); - break; - } - break; - } - } - $obj = array(); - $obj[] = $objtype; - if ($objtype == PDF_TYPE_OBJREF && is_array($objval)) { - foreach ($objval as $val) { - $obj[] = $val; - } - } else { - $obj[] = $objval; - } - return array($obj, $offset); - } - private function getDictValue($offset, &$data) { - $objval = array(); - - // Extract dict from data. - $i=1; - $dict = ''; - $offset += 2; - do { - if ($data[$offset] == '>' && $data[$offset+1] == '>') { - $i--; - $dict .= '>>'; - $offset += 2; - } else if ($data[$offset] == '<' && $data[$offset+1] == '<') { - $i++; - $dict .= '<<'; - $offset += 2; - } else { - $dict .= $data[$offset]; - $offset++; - } - } while ($i>0); - - // Now that we have just the dict, parse it. - $dictoffset = 0; - do { - // Get dict element. - list($key, $eloffset) = $this->getRawObject($dictoffset, $dict); - if ($key[0] == '>>') { - break; - } - list($element, $dictoffset) = $this->getRawObject($eloffset, $dict); - $objval['/'.$key[1]] = $element; - unset($key); - unset($element); - } while (true); - - return array($objval, $offset); - } - - /** - * Get content of indirect object. - * @param $obj_ref (string) Object number and generation number separated by underscore character. - * @param $offset (int) Object offset. - * @param $decoding (boolean) If true decode streams. - * @return array containing object data. - * @protected - * @since 1.0.000 (2011-05-24) - */ - protected function getIndirectObject($obj_ref, $offset=0, $decoding=true) { - $obj = explode('_', $obj_ref); - if (($obj === false) OR (count($obj) != 2)) { - $this->Error('Invalid object reference: '.$obj); - return; - } - $objref = $obj[0].' '.$obj[1].' obj'; - - if (strpos($this->pdfdata, $objref, $offset) != $offset) { - // an indirect reference to an undefined object shall be considered a reference to the null object - return array('null', 'null', $offset); - } - // starting position of object content - $offset += strlen($objref); - // get array of object content - $objdata = array(); - $i = 0; // object main index - do { - if (($i > 0) AND (isset($objdata[($i - 1)][0])) AND ($objdata[($i - 1)][0] == PDF_TYPE_DICTIONARY) AND array_key_exists('/Length', $objdata[($i - 1)][1])) { - // Stream - get using /Length in stream's dict - $lengthobj = $objdata[($i-1)][1]['/Length']; - if ($lengthobj[0] === PDF_TYPE_OBJREF) { - $lengthobj = $this->getObjectVal($lengthobj); - if ($lengthobj[0] === PDF_TYPE_OBJECT) { - $lengthobj = $lengthobj[1]; - } - } - $streamlength = $lengthobj[1]; - list($element, $offset) = $this->getRawStream($offset, $streamlength); - } else { - // get element - list($element, $offset) = $this->getRawObject($offset); - } - // decode stream using stream's dictionary information - if ($decoding AND ($element[0] == PDF_TYPE_STREAM) AND (isset($objdata[($i - 1)][0])) AND ($objdata[($i - 1)][0] == PDF_TYPE_DICTIONARY)) { - $element[3] = $this->decodeStream($objdata[($i - 1)][1], $element[1]); - } - $objdata[$i] = $element; - ++$i; - } while ($element[0] != 'endobj'); - // remove closing delimiter - array_pop($objdata); - // return raw object content - return $objdata; - } - - /** - * Get the content of object, resolving indect object reference if necessary. - * @param $obj (string) Object value. - * @return array containing object data. - * @public - * @since 1.0.000 (2011-06-26) - */ - public function getObjectVal($obj) { - if ($obj[0] == PDF_TYPE_OBJREF) { - if (strpos($obj[1], '_') !== false) { - $key = explode('_', $obj[1]); - } else { - $key = array($obj[1], $obj[2]); - } - - $ret = array(0=>PDF_TYPE_OBJECT, 'obj'=>$key[0], 'gen'=>$key[1]); - - // reference to indirect object - $object = null; - if (isset($this->objects[$key[0]][$key[1]])) { - // this object has been already parsed - $object = $this->objects[$key[0]][$key[1]]; - } elseif (($offset = $this->findObjectOffset($key)) !== false) { - // parse new object - $this->objects[$key[0]][$key[1]] = $this->getIndirectObject($key[0].'_'.$key[1], $offset, false); - $object = $this->objects[$key[0]][$key[1]]; - } elseif (($key[1] == 0) && isset($this->objstreamobjs[$key[0]])) { - // Object is in an object stream - $streaminfo = $this->objstreamobjs[$key[0]]; - $objs = $streaminfo[0]; - if (!isset($this->objstreams[$objs[0]][$objs[1]])) { - // Fetch and decode object stream - $offset = $this->findObjectOffset($objs);; - $objstream = $this->getObjectVal(array(PDF_TYPE_OBJREF, $objs[0], $objs[1])); - $decoded = $this->decodeStream($objstream[1][1], $objstream[2][1]); - $this->objstreams[$objs[0]][$objs[1]] = $decoded[0]; // Store just the data, in case we need more from this objstream - // Free memory - unset($objstream); - unset($decoded); - } - $this->objects[$key[0]][$key[1]] = $this->getRawObject($streaminfo[1], $this->objstreams[$objs[0]][$objs[1]]); - $object = $this->objects[$key[0]][$key[1]]; - } - if (!is_null($object)) { - $ret[1] = $object[0]; - if (isset($object[1][0]) && $object[1][0] == PDF_TYPE_STREAM) { - $ret[0] = PDF_TYPE_STREAM; - $ret[2] = $object[1]; - } - return $ret; - } - } - return $obj; - } - - /** - * Extract object stream to find out what it contains. - * - */ - function extractObjectStream($key) { - $objref = array(PDF_TYPE_OBJREF, $key[0], $key[1]); - $obj = $this->getObjectVal($objref); - if ($obj[0] !== PDF_TYPE_STREAM || !isset($obj[1][1]['/First'][1])) { - // Not a valid object stream dictionary - skip it. - return; - } - $stream = $this->decodeStream($obj[1][1], $obj[2][1]);// Decode object stream, as we need the first bit - $first = intval($obj[1][1]['/First'][1]); - $ints = preg_split('/\s/', substr($stream[0], 0, $first)); // Get list of object / offset pairs - for ($j=1; $jobjstreamobjs[$ints[$j-1]] = array($key, $ints[$j]+$first); - } - } - - // Free memory - we may not need this at all. - unset($obj); - unset($stream); - } - - /** - * Find all object offsets. Saves having to scour the file multiple times. - * @private - */ - private function findObjectOffsets() { - $this->objoffsets = array(); - if (preg_match_all('/(*ANYCRLF)^[\s]*([0-9]+)[\s]+([0-9]+)[\s]+obj/im', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE) >= 1) { - $i = 0; - $laststreamend = 0; - foreach($matches[0] as $match) { - $offset = $match[1] + strspn($match[0], "\x00\x09\x0a\x0c\x0d\x20"); - if ($offset < $laststreamend) { - // Contained within another stream, skip it. - continue; - } - $this->objoffsets[trim($match[0])] = $offset; - $dictoffset = $match[1] + strlen($match[0]); - $dictfrag = substr($this->pdfdata, $dictoffset, 256); - if (preg_match('|^\s+<<[^>]+/Length\s+(\d+)|', $dictfrag, $lengthmatch, PREG_OFFSET_CAPTURE) == 1) { - $laststreamend += intval($lengthmatch[1][0]); - } - if (preg_match('|^\s+<<[^>]+/ObjStm|', $dictfrag, $objstm) == 1) { - $this->extractObjectStream(array($matches[1][$i][0], $matches[2][$i][0])); - } - $i++; - } - } - unset($lengthmatch); - unset($dictfrag); - unset($matches); - } - - /** - * Get offset of an object. Checks xref first, then offsets found by scouring the file. - * @param $key (array) Object key to find (obj, gen). - * @return int Offset of the object in $this->pdfdata. - * @private - */ - private function findObjectOffset($key) { - $objref = $key[0].' '.$key[1].' obj'; - if (isset($this->xref['xref'][$key[0]][$key[1]])) { - $offset = $this->xref['xref'][$key[0]][$key[1]]; - if (strpos($this->pdfdata, $objref, $offset) === $offset) { - // Offset is in xref table and matches actual position in file - //echo "Offset in XREF is correct, returning
"; - return $this->xref['xref'][$key[0]][$key[1]]; - } - } - if (array_key_exists($objref, $this->objoffsets)) { - //echo "Offset found in internal reftable
"; - return $this->objoffsets[$objref]; - } - return false; - } - - /** - * Decode the specified stream. - * @param $sdic (array) Stream's dictionary array. - * @param $stream (string) Stream to decode. - * @return array containing decoded stream data and remaining filters. - * @protected - * @since 1.0.000 (2011-06-22) - */ - protected function decodeStream($sdic, $stream) { - // get stream lenght and filters - $slength = strlen($stream); - if ($slength <= 0) { - return array('', array()); - } - $filters = array(); - foreach ($sdic as $k => $v) { - if ($v[0] == PDF_TYPE_TOKEN) { - if (($k == '/Length') AND ($v[0] == PDF_TYPE_NUMERIC)) { - // get declared stream lenght - $declength = intval($v[1]); - if ($declength < $slength) { - $stream = substr($stream, 0, $declength); - $slength = $declength; - } - } elseif ($k == '/Filter') { - if ($v[0] == PDF_TYPE_TOKEN) { - // single filter - $filters[] = $v[1]; - } elseif ($v[0] == PDF_TYPE_ARRAY) { - // array of filters - foreach ($v[1] as $flt) { - if ($flt[0] == PDF_TYPE_TOKEN) { - $filters[] = $flt[1]; - } - } - } - } - } - } - // decode the stream - $remaining_filters = array(); - foreach ($filters as $filter) { - if (in_array($filter, $this->FilterDecoders->getAvailableFilters())) { - $stream = $this->FilterDecoders->decodeFilter($filter, $stream); - } else { - // add missing filter to array - $remaining_filters[] = $filter; - } - } - return array($stream, $remaining_filters); - } - - - /** - * Set pageno - * - * @param int $pageno Pagenumber to use - */ - public function setPageno($pageno) { - $pageno = ((int) $pageno) - 1; - - if ($pageno < 0 || $pageno >= $this->getPageCount()) { - $this->error("Pagenumber is wrong! (Requested $pageno, max ".$this->getPageCount().")"); - } - - $this->pageno = $pageno; - } - - /** - * Get page-resources from current page - * - * @return array - */ - public function getPageResources() { - return $this->_getPageResources($this->pages[$this->pageno]); - } - - /** - * Get page-resources from /Page - * - * @param array $obj Array of pdf-data - */ - private function _getPageResources ($obj) { // $obj = /Page - $obj = $this->getObjectVal($obj); - - // If the current object has a resources - // dictionary associated with it, we use - // it. Otherwise, we move back to its - // parent object. - if (isset ($obj[1][1]['/Resources'])) { - $res = $obj[1][1]['/Resources']; - if ($res[0] == PDF_TYPE_OBJECT) - return $res[1]; - return $res; - } else { - if (!isset ($obj[1][1]['/Parent'])) { - return false; - } else { - $res = $this->_getPageResources($obj[1][1]['/Parent']); - if ($res[0] == PDF_TYPE_OBJECT) - return $res[1]; - return $res; - } - } - } - - /** - * Get annotations from current page - * - * @return array - */ - public function getPageAnnotations() { - return $this->_getPageAnnotations($this->pages[$this->pageno]); - } - - /** - * Get annotations from /Page - * - * @param array $obj Array of pdf-data - */ - private function _getPageAnnotations ($obj) { // $obj = /Page - $obj = $this->getObjectVal($obj); - - // If the current object has an annotations - // dictionary associated with it, we use - // it. Otherwise, we move back to its - // parent object. - if (isset ($obj[1][1]['/Annots'])) { - $annots = $obj[1][1]['/Annots']; - } else { - if (!isset ($obj[1][1]['/Parent'])) { - return false; - } else { - $annots = $this->_getPageAnnotations($obj[1][1]['/Parent']); - } - } - - if ($annots[0] == PDF_TYPE_OBJREF) - return $this->getObjectVal($annots); - return $annots; - } - - - /** - * Get content of current page - * - * If more /Contents is an array, the streams are concated - * - * @return string - */ - public function getContent() { - $buffer = ''; - - if (isset($this->pages[$this->pageno][1][1]['/Contents'])) { - $contents = $this->_getPageContent($this->pages[$this->pageno][1][1]['/Contents']); - foreach($contents AS $tmp_content) { - $buffer .= $this->_rebuildContentStream($tmp_content) . ' '; - } - } - - return $buffer; - } - - - /** - * Resolve all content-objects - * - * @param array $content_ref - * @return array - */ - private function _getPageContent($content_ref) { - $contents = array(); - - if ($content_ref[0] == PDF_TYPE_OBJREF) { - $content = $this->getObjectVal($content_ref); - if ($content[1][0] == PDF_TYPE_ARRAY) { - $contents = $this->_getPageContent($content[1]); - } else { - $contents[] = $content; - } - } elseif ($content_ref[0] == PDF_TYPE_ARRAY) { - foreach ($content_ref[1] AS $tmp_content_ref) { - $contents = array_merge($contents,$this->_getPageContent($tmp_content_ref)); - } - } - - return $contents; - } - - - /** - * Rebuild content-streams - * - * @param array $obj - * @return string - */ - private function _rebuildContentStream($obj) { - $filters = array(); - - if (isset($obj[1][1]['/Filter'])) { - $_filter = $obj[1][1]['/Filter']; - - if ($_filter[0] == PDF_TYPE_OBJREF) { - $tmpFilter = $this->getObjectVal($_filter); - $_filter = $tmpFilter[1]; - } - - if ($_filter[0] == PDF_TYPE_TOKEN) { - $filters[] = $_filter; - } elseif ($_filter[0] == PDF_TYPE_ARRAY) { - $filters = $_filter[1]; - } - } - - $stream = $obj[2][1]; - - foreach ($filters AS $_filter) { - $stream = $this->FilterDecoders->decodeFilter($_filter[1], $stream); - } - - return $stream; - } - - - /** - * Get a Box from a page - * Arrayformat is same as used by fpdf_tpl - * - * @param array $page a /Page - * @param string $box_index Type of Box @see $availableBoxes - * @param float Scale factor from user space units to points - * @return array - */ - public function getPageBox($page, $box_index, $k) { - $page = $this->getObjectVal($page); - $box = null; - if (isset($page[1][1][$box_index])) - $box =& $page[1][1][$box_index]; - - if (!is_null($box) && $box[0] == PDF_TYPE_OBJREF) { - $tmp_box = $this->getObjectVal($box); - $box = $tmp_box[1]; - } - - if (!is_null($box) && $box[0] == PDF_TYPE_ARRAY) { - $b =& $box[1]; - return array('x' => $b[0][1] / $k, - 'y' => $b[1][1] / $k, - 'w' => abs($b[0][1] - $b[2][1]) / $k, - 'h' => abs($b[1][1] - $b[3][1]) / $k, - 'llx' => min($b[0][1], $b[2][1]) / $k, - 'lly' => min($b[1][1], $b[3][1]) / $k, - 'urx' => max($b[0][1], $b[2][1]) / $k, - 'ury' => max($b[1][1], $b[3][1]) / $k, - ); - } elseif (!isset ($page[1][1]['/Parent'])) { - return false; - } else { - return $this->getPageBox($this->getObjectVal($page[1][1]['/Parent']), $box_index, $k); - } - } - - /** - * Get all page boxes by page no - * - * @param int The page number - * @param float Scale factor from user space units to points - * @return array - */ - public function getPageBoxes($pageno, $k) { - return $this->_getPageBoxes($this->pages[$pageno - 1], $k); - } - - /** - * Get all boxes from /Page - * - * @param array a /Page - * @return array - */ - private function _getPageBoxes($page, $k) { - $boxes = array(); - - foreach($this->availableBoxes AS $box) { - if ($_box = $this->getPageBox($page, $box, $k)) { - $boxes[$box] = $_box; - } - } - - return $boxes; - } - - /** - * Get the page rotation by pageno - * - * @param integer $pageno - * @return array - */ - public function getPageRotation($pageno) { - return $this->_getPageRotation($this->pages[$pageno - 1]); - } - - private function _getPageRotation($obj) { // $obj = /Page - $obj = $this->getObjectVal($obj); - if (isset ($obj[1][1]['/Rotate'])) { - $res = $this->getObjectVal($obj[1][1]['/Rotate']); - if ($res[0] == PDF_TYPE_OBJECT) - return $res[1]; - return $res; - } else { - if (!isset ($obj[1][1]['/Parent'])) { - return false; - } else { - $res = (array)$this->_getPageRotation($obj[1][1]['/Parent']); - if ($res[0] == PDF_TYPE_OBJECT) - return $res[1]; - return $res; - } - } - } - - /** - * This method is automatically called in case of fatal error; it simply outputs the message and halts the execution. - * @param $msg (string) The error message - * @public - * @since 1.0.000 (2011-05-23) - */ - public function Error($msg) { - // exit program and print error - die("TCPDI_PARSER ERROR [{$this->uniqueid}]: ".$msg); - } - -} // END OF TCPDF_PARSER CLASS - -//============================================================+ -// END OF FILE -//============================================================+