10
10
use PHPHtmlParser \Dom \Collection ;
11
11
use PHPHtmlParser \Dom \HtmlNode ;
12
12
use PHPHtmlParser \Dom \TextNode ;
13
+ use PHPHtmlParser \Enum \StringToken ;
13
14
use PHPHtmlParser \Exceptions \ChildNotFoundException ;
14
15
use PHPHtmlParser \Exceptions \CircularException ;
15
16
use PHPHtmlParser \Exceptions \ContentLengthException ;
16
- use PHPHtmlParser \Exceptions \CurlException ;
17
17
use PHPHtmlParser \Exceptions \LogicalException ;
18
18
use PHPHtmlParser \Exceptions \NotLoadedException ;
19
19
use PHPHtmlParser \Exceptions \StrictException ;
@@ -72,9 +72,9 @@ class Dom
72
72
/**
73
73
* A global options array to be used by all load calls.
74
74
*
75
- * @var array
75
+ * @var ?Options
76
76
*/
77
- private $ globalOptions = [] ;
77
+ private $ globalOptions ;
78
78
79
79
/**
80
80
* A persistent option object to be used for all options in the
@@ -147,7 +147,7 @@ public function __get($name)
147
147
* @throws StrictException
148
148
* @throws LogicalException
149
149
*/
150
- public function loadFromFile (string $ file , array $ options = [] ): Dom
150
+ public function loadFromFile (string $ file , ? Options $ options = null ): Dom
151
151
{
152
152
$ content = @\file_get_contents ($ file );
153
153
if ($ content === false ) {
@@ -168,7 +168,7 @@ public function loadFromFile(string $file, array $options = []): Dom
168
168
* @throws StrictException
169
169
* @throws \Psr\Http\Client\ClientExceptionInterface
170
170
*/
171
- public function loadFromUrl (string $ url , array $ options = [] , ?ClientInterface $ client = null , ?RequestInterface $ request = null ): Dom
171
+ public function loadFromUrl (string $ url , ? Options $ options , ?ClientInterface $ client = null , ?RequestInterface $ request = null ): Dom
172
172
{
173
173
if ($ client === null ) {
174
174
$ client = new Client ();
@@ -191,11 +191,15 @@ public function loadFromUrl(string $url, array $options = [], ?ClientInterface $
191
191
* @throws CircularException
192
192
* @throws StrictException
193
193
*/
194
- public function loadStr (string $ str , array $ option = [] ): Dom
194
+ public function loadStr (string $ str , ? Options $ options = null ): Dom
195
195
{
196
196
$ this ->options = new Options ();
197
- $ this ->options ->setOptions ($ this ->globalOptions )
198
- ->setOptions ($ option );
197
+ if ($ this ->globalOptions !== null ) {
198
+ $ this ->options ->setFromOptions ($ this ->globalOptions );
199
+ }
200
+ if ($ options !== null ) {
201
+ $ this ->options ->setFromOptions ($ options );
202
+ }
199
203
200
204
$ this ->rawSize = \strlen ($ str );
201
205
$ this ->raw = $ str ;
@@ -216,7 +220,7 @@ public function loadStr(string $str, array $option = []): Dom
216
220
*
217
221
* @chainable
218
222
*/
219
- public function setOptions (array $ options ): Dom
223
+ public function setOptions (Options $ options ): Dom
220
224
{
221
225
$ this ->globalOptions = $ options ;
222
226
@@ -235,9 +239,7 @@ public function find(string $selector, int $nth = null)
235
239
{
236
240
$ this ->isLoaded ();
237
241
238
- $ result = $ this ->root ->find ($ selector , $ nth );
239
-
240
- return $ result ;
242
+ return $ this ->root ->find ($ selector , $ nth );
241
243
}
242
244
243
245
/**
@@ -463,7 +465,7 @@ private function isLoaded(): void
463
465
*/
464
466
private function clean (string $ str ): string
465
467
{
466
- if ($ this ->options ->get ( ' cleanupInput ' ) != true ) {
468
+ if ($ this ->options ->isCleanupInput ( ) != true ) {
467
469
// skip entire cleanup step
468
470
return $ str ;
469
471
}
@@ -488,7 +490,7 @@ private function clean(string $str): string
488
490
489
491
// clean out the \n\r
490
492
$ replace = ' ' ;
491
- if ($ this ->options ->get ( ' preserveLineBreaks ' )) {
493
+ if ($ this ->options ->isPreserveLineBreaks ( )) {
492
494
$ replace = ' ' ;
493
495
}
494
496
$ str = \str_replace (["\r\n" , "\r" , "\n" ], $ replace , $ str );
@@ -515,7 +517,7 @@ private function clean(string $str): string
515
517
}
516
518
517
519
// strip out <script> tags
518
- if ($ this ->options ->get ( ' removeScripts ' )) {
520
+ if ($ this ->options ->isRemoveScripts ( )) {
519
521
$ str = \mb_eregi_replace ("<\s*script[^>]*[^/]>(.*?)<\s*/\s*script\s*> " , '' , $ str );
520
522
if ($ str === false ) {
521
523
throw new LogicalException ('mb_eregi_replace returned false instead of a string. Error when attempting to remove scripts 1. ' );
@@ -527,7 +529,7 @@ private function clean(string $str): string
527
529
}
528
530
529
531
// strip out <style> tags
530
- if ($ this ->options ->get ( ' removeStyles ' )) {
532
+ if ($ this ->options ->isRemoveStyles ( )) {
531
533
$ str = \mb_eregi_replace ("<\s*style[^>]*[^/]>(.*?)<\s*/\s*style\s*> " , '' , $ str );
532
534
if ($ str === false ) {
533
535
throw new LogicalException ('mb_eregi_replace returned false instead of a string. Error when attempting to strip out style tags 1. ' );
@@ -538,16 +540,8 @@ private function clean(string $str): string
538
540
}
539
541
}
540
542
541
- // strip out server side scripts
542
- if ($ this ->options ->get ('serverSideScripts ' )) {
543
- $ str = \mb_eregi_replace ("(<\?)(.*?)(\?>) " , '' , $ str );
544
- if ($ str === false ) {
545
- throw new LogicalException ('mb_eregi_replace returned false instead of a string. Error when attempting to strip out service side scripts. ' );
546
- }
547
- }
548
-
549
543
// strip smarty scripts
550
- if ($ this ->options ->get ( ' removeSmartyScripts ' )) {
544
+ if ($ this ->options ->isRemoveSmartyScripts ( )) {
551
545
$ str = \mb_eregi_replace ("(\{\w)(.*?)(\}) " , '' , $ str );
552
546
if ($ str === false ) {
553
547
throw new LogicalException ('mb_eregi_replace returned false instead of a string. Error when attempting to remove smarty scripts. ' );
@@ -569,11 +563,11 @@ private function parse(): void
569
563
{
570
564
// add the root node
571
565
$ this ->root = new HtmlNode ('root ' );
572
- $ this ->root ->setHtmlSpecialCharsDecode ($ this ->options ->htmlSpecialCharsDecode );
566
+ $ this ->root ->setHtmlSpecialCharsDecode ($ this ->options ->isHtmlSpecialCharsDecode () );
573
567
$ activeNode = $ this ->root ;
574
568
while ($ activeNode !== null ) {
575
569
if ($ activeNode && $ activeNode ->tag ->name () === 'script '
576
- && $ this ->options ->get ( ' cleanupInput ' ) != true
570
+ && $ this ->options ->isCleanupInput ( ) != true
577
571
) {
578
572
$ str = $ this ->content ->copyUntil ('</ ' );
579
573
} else {
@@ -618,12 +612,12 @@ private function parse(): void
618
612
if (!$ node ->getTag ()->isSelfClosing ()) {
619
613
$ activeNode = $ node ;
620
614
}
621
- } elseif ($ this ->options ->whitespaceTextNode ||
615
+ } elseif ($ this ->options ->isWhitespaceTextNode () ||
622
616
\trim ($ str ) != ''
623
617
) {
624
618
// we found text we care about
625
- $ textNode = new TextNode ($ str , $ this ->options ->removeDoubleSpace );
626
- $ textNode ->setHtmlSpecialCharsDecode ($ this ->options ->htmlSpecialCharsDecode );
619
+ $ textNode = new TextNode ($ str , $ this ->options ->isRemoveDoubleSpace () );
620
+ $ textNode ->setHtmlSpecialCharsDecode ($ this ->options ->isHtmlSpecialCharsDecode () );
627
621
$ activeNode ->addChild ($ textNode );
628
622
}
629
623
}
@@ -656,7 +650,7 @@ private function parseTag(): array
656
650
if ($ this ->content ->char () == '/ ' ) {
657
651
// end tag
658
652
$ tag = $ this ->content ->fastForward (1 )
659
- ->copyByToken (' slash ' , true );
653
+ ->copyByToken (StringToken:: SLASH () , true );
660
654
// move to end of tag
661
655
$ this ->content ->copyUntil ('> ' );
662
656
$ this ->content ->fastForward (1 );
@@ -675,20 +669,20 @@ private function parseTag(): array
675
669
return $ return ;
676
670
}
677
671
678
- $ tag = \strtolower ($ this ->content ->copyByToken (' slash ' , true ));
672
+ $ tag = \strtolower ($ this ->content ->copyByToken (StringToken:: SLASH () , true ));
679
673
if (\trim ($ tag ) == '' ) {
680
674
// no tag found, invalid < found
681
675
return $ return ;
682
676
}
683
677
$ node = new HtmlNode ($ tag );
684
- $ node ->setHtmlSpecialCharsDecode ($ this ->options ->htmlSpecialCharsDecode );
678
+ $ node ->setHtmlSpecialCharsDecode ($ this ->options ->isHtmlSpecialCharsDecode () );
685
679
686
680
// attributes
687
681
while (
688
682
$ this ->content ->char () != '> ' &&
689
683
$ this ->content ->char () != '/ '
690
684
) {
691
- $ space = $ this ->content ->skipByToken (' blank ' , true );
685
+ $ space = $ this ->content ->skipByToken (StringToken:: BLANK () , true );
692
686
if (empty ($ space )) {
693
687
try {
694
688
$ this ->content ->fastForward (1 );
@@ -699,28 +693,28 @@ private function parseTag(): array
699
693
continue ;
700
694
}
701
695
702
- $ name = $ this ->content ->copyByToken (' equal ' , true );
696
+ $ name = $ this ->content ->copyByToken (StringToken:: EQUAL () , true );
703
697
if ($ name == '/ ' ) {
704
698
break ;
705
699
}
706
700
707
701
if (empty ($ name )) {
708
- $ this ->content ->skipByToken (' blank ' );
702
+ $ this ->content ->skipByToken (StringToken:: BLANK () );
709
703
continue ;
710
704
}
711
705
712
- $ this ->content ->skipByToken (' blank ' );
706
+ $ this ->content ->skipByToken (StringToken:: BLANK () );
713
707
if ($ this ->content ->char () == '= ' ) {
714
708
$ this ->content ->fastForward (1 )
715
- ->skipByToken (' blank ' );
709
+ ->skipByToken (StringToken:: BLANK () );
716
710
switch ($ this ->content ->char ()) {
717
711
case '" ' :
718
712
$ this ->content ->fastForward (1 );
719
713
$ string = $ this ->content ->copyUntil ('" ' , true );
720
714
do {
721
715
$ moreString = $ this ->content ->copyUntilUnless ('" ' , '=> ' );
722
716
$ string .= $ moreString ;
723
- } while (strlen ($ moreString ) > 0 && $ this ->content ->getPosition () < $ this ->size );
717
+ } while (\ strlen ($ moreString ) > 0 && $ this ->content ->getPosition () < $ this ->size );
724
718
$ attr ['value ' ] = $ string ;
725
719
$ this ->content ->fastForward (1 );
726
720
$ node ->getTag ()->setAttribute ($ name , $ string );
@@ -731,18 +725,18 @@ private function parseTag(): array
731
725
do {
732
726
$ moreString = $ this ->content ->copyUntilUnless ("' " , '=> ' );
733
727
$ string .= $ moreString ;
734
- } while (strlen ($ moreString ) > 0 && $ this ->content ->getPosition () < $ this ->size );
728
+ } while (\ strlen ($ moreString ) > 0 && $ this ->content ->getPosition () < $ this ->size );
735
729
$ attr ['value ' ] = $ string ;
736
730
$ this ->content ->fastForward (1 );
737
731
$ node ->getTag ()->setAttribute ($ name , $ string , false );
738
732
break ;
739
733
default :
740
- $ node ->getTag ()->setAttribute ($ name , $ this ->content ->copyByToken (' attr ' , true ));
734
+ $ node ->getTag ()->setAttribute ($ name , $ this ->content ->copyByToken (StringToken:: ATTR () , true ));
741
735
break ;
742
736
}
743
737
} else {
744
738
// no value attribute
745
- if ($ this ->options ->strict ) {
739
+ if ($ this ->options ->isStrict () ) {
746
740
// can't have this in strict html
747
741
$ character = $ this ->content ->getPosition ();
748
742
throw new StrictException ("Tag ' $ tag' has an attribute ' $ name' with out a value! (character # $ character) " );
@@ -754,15 +748,15 @@ private function parseTag(): array
754
748
}
755
749
}
756
750
757
- $ this ->content ->skipByToken (' blank ' );
751
+ $ this ->content ->skipByToken (StringToken:: BLANK () );
758
752
$ tag = \strtolower ($ tag );
759
753
if ($ this ->content ->char () == '/ ' ) {
760
754
// self closing tag
761
755
$ node ->getTag ()->selfClosing ();
762
756
$ this ->content ->fastForward (1 );
763
757
} elseif (\in_array ($ tag , $ this ->selfClosing , true )) {
764
758
// Should be a self closing tag, check if we are strict
765
- if ($ this ->options ->strict ) {
759
+ if ($ this ->options ->isStrict () ) {
766
760
$ character = $ this ->content ->getPosition ();
767
761
throw new StrictException ("Tag ' $ tag' is not self closing! (character # $ character) " );
768
762
}
@@ -776,7 +770,7 @@ private function parseTag(): array
776
770
}
777
771
}
778
772
779
- if ($ this ->content ->canFastForward ()) {
773
+ if ($ this ->content ->canFastForward (1 )) {
780
774
$ this ->content ->fastForward (1 );
781
775
}
782
776
@@ -798,7 +792,7 @@ private function detectCharset(): bool
798
792
$ encode ->from ($ this ->defaultCharset );
799
793
$ encode ->to ($ this ->defaultCharset );
800
794
801
- $ enforceEncoding = $ this ->options ->enforceEncoding ;
795
+ $ enforceEncoding = $ this ->options ->getEnforceEncoding () ;
802
796
if ($ enforceEncoding !== null ) {
803
797
// they want to enforce the given encoding
804
798
$ encode ->from ($ enforceEncoding );
0 commit comments