Skip to content

Commit b58c6da

Browse files
committed
Removed magical option array
1 parent e37e8ef commit b58c6da

16 files changed

+273
-386
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
2121
- Cleaned up the selector logic.
2222
- Fixed issue with greedy regex for charset detection.
2323
- Fixed bug causing infinite loops in some cases.
24+
- Refactored the way we handle options. Removed the magical option array.
2425

2526
### Removed
2627
- Curl interface and curl implementation has been removed.

composer.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@
2020
"paquettg/string-encode": "~1.0.0",
2121
"php-http/httplug": "^2.1",
2222
"php-http/guzzle6-adapter": "^2.0",
23-
"guzzlehttp/psr7": "^1.6"
23+
"guzzlehttp/psr7": "^1.6",
24+
"myclabs/php-enum": "^1.7"
2425
},
2526
"require-dev": {
2627
"phpunit/phpunit": "^7.5.1",

src/PHPHtmlParser/Content.php

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
namespace PHPHtmlParser;
66

7+
use PHPHtmlParser\Enum\StringToken;
78
use PHPHtmlParser\Exceptions\ContentLengthException;
89
use PHPHtmlParser\Exceptions\LogicalException;
910

@@ -75,11 +76,12 @@ public function char(?int $char = null): string
7576
* Moves the current position forward.
7677
*
7778
* @chainable
79+
*
7880
* @throws ContentLengthException
7981
*/
8082
public function fastForward(int $count): Content
8183
{
82-
if (!$this->canFastForward()) {
84+
if (!$this->canFastForward($count)) {
8385
// trying to go over the content length, throw exception
8486
throw new ContentLengthException('Attempt to fastForward pass the length of the content.');
8587
}
@@ -91,9 +93,9 @@ public function fastForward(int $count): Content
9193
/**
9294
* Checks if we can move the position forward.
9395
*/
94-
public function canFastForward(): bool
96+
public function canFastForward(int $count): bool
9597
{
96-
return \strlen($this->content) > $this->pos;
98+
return \strlen($this->content) >= $this->pos + $count;
9799
}
98100

99101
/**
@@ -175,8 +177,6 @@ public function copyUntil(string $string, bool $char = false, bool $escape = fal
175177
/**
176178
* Copies the content until the string is found and return it
177179
* unless the 'unless' is found in the substring.
178-
*
179-
* @return string
180180
*/
181181
public function copyUntilUnless(string $string, string $unless): string
182182
{
@@ -197,13 +197,11 @@ public function copyUntilUnless(string $string, string $unless): string
197197
/**
198198
* Copies the content until it reaches the token string.,.
199199
*
200-
* @return string
201-
*
202200
* @uses $this->copyUntil()
203201
*/
204-
public function copyByToken(string $token, bool $char = false, bool $escape = false)
202+
public function copyByToken(StringToken $stringToken, bool $char = false, bool $escape = false): string
205203
{
206-
$string = $this->$token;
204+
$string = $stringToken->getValue();
207205

208206
return $this->copyUntil($string, $char, $escape);
209207
}
@@ -236,13 +234,11 @@ public function skip(string $string, bool $copy = false): string
236234
/**
237235
* Skip a given token of pre-defined characters.
238236
*
239-
* @return Content|string
240-
*
241237
* @uses $this->skip()
242238
*/
243-
public function skipByToken(string $token, bool $copy = false)
239+
public function skipByToken(StringToken $skipToken, bool $copy = false): string
244240
{
245-
$string = $this->$token;
241+
$string = $skipToken->getValue();
246242

247243
return $this->skip($string, $copy);
248244
}

src/PHPHtmlParser/Dom.php

Lines changed: 40 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@
1010
use PHPHtmlParser\Dom\Collection;
1111
use PHPHtmlParser\Dom\HtmlNode;
1212
use PHPHtmlParser\Dom\TextNode;
13+
use PHPHtmlParser\Enum\StringToken;
1314
use PHPHtmlParser\Exceptions\ChildNotFoundException;
1415
use PHPHtmlParser\Exceptions\CircularException;
1516
use PHPHtmlParser\Exceptions\ContentLengthException;
16-
use PHPHtmlParser\Exceptions\CurlException;
1717
use PHPHtmlParser\Exceptions\LogicalException;
1818
use PHPHtmlParser\Exceptions\NotLoadedException;
1919
use PHPHtmlParser\Exceptions\StrictException;
@@ -72,9 +72,9 @@ class Dom
7272
/**
7373
* A global options array to be used by all load calls.
7474
*
75-
* @var array
75+
* @var ?Options
7676
*/
77-
private $globalOptions = [];
77+
private $globalOptions;
7878

7979
/**
8080
* A persistent option object to be used for all options in the
@@ -147,7 +147,7 @@ public function __get($name)
147147
* @throws StrictException
148148
* @throws LogicalException
149149
*/
150-
public function loadFromFile(string $file, array $options = []): Dom
150+
public function loadFromFile(string $file, ?Options $options = null): Dom
151151
{
152152
$content = @\file_get_contents($file);
153153
if ($content === false) {
@@ -168,7 +168,7 @@ public function loadFromFile(string $file, array $options = []): Dom
168168
* @throws StrictException
169169
* @throws \Psr\Http\Client\ClientExceptionInterface
170170
*/
171-
public function loadFromUrl(string $url, array $options = [], ?ClientInterface $client = null, ?RequestInterface $request = null): Dom
171+
public function loadFromUrl(string $url, ?Options $options, ?ClientInterface $client = null, ?RequestInterface $request = null): Dom
172172
{
173173
if ($client === null) {
174174
$client = new Client();
@@ -191,11 +191,15 @@ public function loadFromUrl(string $url, array $options = [], ?ClientInterface $
191191
* @throws CircularException
192192
* @throws StrictException
193193
*/
194-
public function loadStr(string $str, array $option = []): Dom
194+
public function loadStr(string $str, ?Options $options = null): Dom
195195
{
196196
$this->options = new Options();
197-
$this->options->setOptions($this->globalOptions)
198-
->setOptions($option);
197+
if ($this->globalOptions !== null) {
198+
$this->options->setFromOptions($this->globalOptions);
199+
}
200+
if ($options !== null) {
201+
$this->options->setFromOptions($options);
202+
}
199203

200204
$this->rawSize = \strlen($str);
201205
$this->raw = $str;
@@ -216,7 +220,7 @@ public function loadStr(string $str, array $option = []): Dom
216220
*
217221
* @chainable
218222
*/
219-
public function setOptions(array $options): Dom
223+
public function setOptions(Options $options): Dom
220224
{
221225
$this->globalOptions = $options;
222226

@@ -235,9 +239,7 @@ public function find(string $selector, int $nth = null)
235239
{
236240
$this->isLoaded();
237241

238-
$result = $this->root->find($selector, $nth);
239-
240-
return $result;
242+
return $this->root->find($selector, $nth);
241243
}
242244

243245
/**
@@ -463,7 +465,7 @@ private function isLoaded(): void
463465
*/
464466
private function clean(string $str): string
465467
{
466-
if ($this->options->get('cleanupInput') != true) {
468+
if ($this->options->isCleanupInput() != true) {
467469
// skip entire cleanup step
468470
return $str;
469471
}
@@ -488,7 +490,7 @@ private function clean(string $str): string
488490

489491
// clean out the \n\r
490492
$replace = ' ';
491-
if ($this->options->get('preserveLineBreaks')) {
493+
if ($this->options->isPreserveLineBreaks()) {
492494
$replace = '
';
493495
}
494496
$str = \str_replace(["\r\n", "\r", "\n"], $replace, $str);
@@ -515,7 +517,7 @@ private function clean(string $str): string
515517
}
516518

517519
// strip out <script> tags
518-
if ($this->options->get('removeScripts')) {
520+
if ($this->options->isRemoveScripts()) {
519521
$str = \mb_eregi_replace("<\s*script[^>]*[^/]>(.*?)<\s*/\s*script\s*>", '', $str);
520522
if ($str === false) {
521523
throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to remove scripts 1.');
@@ -527,7 +529,7 @@ private function clean(string $str): string
527529
}
528530

529531
// strip out <style> tags
530-
if ($this->options->get('removeStyles')) {
532+
if ($this->options->isRemoveStyles()) {
531533
$str = \mb_eregi_replace("<\s*style[^>]*[^/]>(.*?)<\s*/\s*style\s*>", '', $str);
532534
if ($str === false) {
533535
throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip out style tags 1.');
@@ -538,16 +540,8 @@ private function clean(string $str): string
538540
}
539541
}
540542

541-
// strip out server side scripts
542-
if ($this->options->get('serverSideScripts')) {
543-
$str = \mb_eregi_replace("(<\?)(.*?)(\?>)", '', $str);
544-
if ($str === false) {
545-
throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip out service side scripts.');
546-
}
547-
}
548-
549543
// strip smarty scripts
550-
if ($this->options->get('removeSmartyScripts')) {
544+
if ($this->options->isRemoveSmartyScripts()) {
551545
$str = \mb_eregi_replace("(\{\w)(.*?)(\})", '', $str);
552546
if ($str === false) {
553547
throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to remove smarty scripts.');
@@ -569,11 +563,11 @@ private function parse(): void
569563
{
570564
// add the root node
571565
$this->root = new HtmlNode('root');
572-
$this->root->setHtmlSpecialCharsDecode($this->options->htmlSpecialCharsDecode);
566+
$this->root->setHtmlSpecialCharsDecode($this->options->isHtmlSpecialCharsDecode());
573567
$activeNode = $this->root;
574568
while ($activeNode !== null) {
575569
if ($activeNode && $activeNode->tag->name() === 'script'
576-
&& $this->options->get('cleanupInput') != true
570+
&& $this->options->isCleanupInput() != true
577571
) {
578572
$str = $this->content->copyUntil('</');
579573
} else {
@@ -618,12 +612,12 @@ private function parse(): void
618612
if (!$node->getTag()->isSelfClosing()) {
619613
$activeNode = $node;
620614
}
621-
} elseif ($this->options->whitespaceTextNode ||
615+
} elseif ($this->options->isWhitespaceTextNode() ||
622616
\trim($str) != ''
623617
) {
624618
// we found text we care about
625-
$textNode = new TextNode($str, $this->options->removeDoubleSpace);
626-
$textNode->setHtmlSpecialCharsDecode($this->options->htmlSpecialCharsDecode);
619+
$textNode = new TextNode($str, $this->options->isRemoveDoubleSpace());
620+
$textNode->setHtmlSpecialCharsDecode($this->options->isHtmlSpecialCharsDecode());
627621
$activeNode->addChild($textNode);
628622
}
629623
}
@@ -656,7 +650,7 @@ private function parseTag(): array
656650
if ($this->content->char() == '/') {
657651
// end tag
658652
$tag = $this->content->fastForward(1)
659-
->copyByToken('slash', true);
653+
->copyByToken(StringToken::SLASH(), true);
660654
// move to end of tag
661655
$this->content->copyUntil('>');
662656
$this->content->fastForward(1);
@@ -675,20 +669,20 @@ private function parseTag(): array
675669
return $return;
676670
}
677671

678-
$tag = \strtolower($this->content->copyByToken('slash', true));
672+
$tag = \strtolower($this->content->copyByToken(StringToken::SLASH(), true));
679673
if (\trim($tag) == '') {
680674
// no tag found, invalid < found
681675
return $return;
682676
}
683677
$node = new HtmlNode($tag);
684-
$node->setHtmlSpecialCharsDecode($this->options->htmlSpecialCharsDecode);
678+
$node->setHtmlSpecialCharsDecode($this->options->isHtmlSpecialCharsDecode());
685679

686680
// attributes
687681
while (
688682
$this->content->char() != '>' &&
689683
$this->content->char() != '/'
690684
) {
691-
$space = $this->content->skipByToken('blank', true);
685+
$space = $this->content->skipByToken(StringToken::BLANK(), true);
692686
if (empty($space)) {
693687
try {
694688
$this->content->fastForward(1);
@@ -699,28 +693,28 @@ private function parseTag(): array
699693
continue;
700694
}
701695

702-
$name = $this->content->copyByToken('equal', true);
696+
$name = $this->content->copyByToken(StringToken::EQUAL(), true);
703697
if ($name == '/') {
704698
break;
705699
}
706700

707701
if (empty($name)) {
708-
$this->content->skipByToken('blank');
702+
$this->content->skipByToken(StringToken::BLANK());
709703
continue;
710704
}
711705

712-
$this->content->skipByToken('blank');
706+
$this->content->skipByToken(StringToken::BLANK());
713707
if ($this->content->char() == '=') {
714708
$this->content->fastForward(1)
715-
->skipByToken('blank');
709+
->skipByToken(StringToken::BLANK());
716710
switch ($this->content->char()) {
717711
case '"':
718712
$this->content->fastForward(1);
719713
$string = $this->content->copyUntil('"', true);
720714
do {
721715
$moreString = $this->content->copyUntilUnless('"', '=>');
722716
$string .= $moreString;
723-
} while (strlen($moreString) > 0 && $this->content->getPosition() < $this->size);
717+
} while (\strlen($moreString) > 0 && $this->content->getPosition() < $this->size);
724718
$attr['value'] = $string;
725719
$this->content->fastForward(1);
726720
$node->getTag()->setAttribute($name, $string);
@@ -731,18 +725,18 @@ private function parseTag(): array
731725
do {
732726
$moreString = $this->content->copyUntilUnless("'", '=>');
733727
$string .= $moreString;
734-
} while (strlen($moreString) > 0 && $this->content->getPosition() < $this->size);
728+
} while (\strlen($moreString) > 0 && $this->content->getPosition() < $this->size);
735729
$attr['value'] = $string;
736730
$this->content->fastForward(1);
737731
$node->getTag()->setAttribute($name, $string, false);
738732
break;
739733
default:
740-
$node->getTag()->setAttribute($name, $this->content->copyByToken('attr', true));
734+
$node->getTag()->setAttribute($name, $this->content->copyByToken(StringToken::ATTR(), true));
741735
break;
742736
}
743737
} else {
744738
// no value attribute
745-
if ($this->options->strict) {
739+
if ($this->options->isStrict()) {
746740
// can't have this in strict html
747741
$character = $this->content->getPosition();
748742
throw new StrictException("Tag '$tag' has an attribute '$name' with out a value! (character #$character)");
@@ -754,15 +748,15 @@ private function parseTag(): array
754748
}
755749
}
756750

757-
$this->content->skipByToken('blank');
751+
$this->content->skipByToken(StringToken::BLANK());
758752
$tag = \strtolower($tag);
759753
if ($this->content->char() == '/') {
760754
// self closing tag
761755
$node->getTag()->selfClosing();
762756
$this->content->fastForward(1);
763757
} elseif (\in_array($tag, $this->selfClosing, true)) {
764758
// Should be a self closing tag, check if we are strict
765-
if ($this->options->strict) {
759+
if ($this->options->isStrict()) {
766760
$character = $this->content->getPosition();
767761
throw new StrictException("Tag '$tag' is not self closing! (character #$character)");
768762
}
@@ -776,7 +770,7 @@ private function parseTag(): array
776770
}
777771
}
778772

779-
if ($this->content->canFastForward()) {
773+
if ($this->content->canFastForward(1)) {
780774
$this->content->fastForward(1);
781775
}
782776

@@ -798,7 +792,7 @@ private function detectCharset(): bool
798792
$encode->from($this->defaultCharset);
799793
$encode->to($this->defaultCharset);
800794

801-
$enforceEncoding = $this->options->enforceEncoding;
795+
$enforceEncoding = $this->options->getEnforceEncoding();
802796
if ($enforceEncoding !== null) {
803797
// they want to enforce the given encoding
804798
$encode->from($enforceEncoding);

0 commit comments

Comments
 (0)