3
3
import java .util .regex .Matcher ;
4
4
import java .util .regex .Pattern ;
5
5
6
+ import static java .lang .String .valueOf ;
7
+
6
8
public class VerbalExpression {
7
9
8
10
private final Pattern pattern ;
@@ -14,10 +16,34 @@ public static class Builder {
14
16
private StringBuilder suffixes = new StringBuilder ();
15
17
private int modifiers = Pattern .MULTILINE ;
16
18
19
+ /**
20
+ * Package private. Use {@link #regex()} to build a new one
21
+ *
22
+ * @since 1.2
23
+ */
24
+ Builder () {
25
+ }
26
+
27
+ /**
28
+ * Escapes any non-word char with two backslashes
29
+ * used by any method, except {@link #add(String)}
30
+ *
31
+ * @param pValue - the string for char escaping
32
+ * @return sanitized string value
33
+ */
17
34
private String sanitize (final String pValue ) {
18
35
return pValue .replaceAll ("[\\ W]" , "\\ \\ $0" );
19
36
}
20
37
38
+ /**
39
+ * Counts occurrences of some substring in whole string
40
+ * Same as org.apache.commons.lang3.StringUtils#countMatches(String, java.lang.String)
41
+ * by effect. Used to count braces for {@link #or(String)} method
42
+ *
43
+ * @param where - where to find
44
+ * @param what - what needs to count matches
45
+ * @return 0 if nothing found, count of occurrences instead
46
+ */
21
47
private int countOccurrencesOf (String where , String what ) {
22
48
return (where .length () - where .replace (what , "" ).length ()) / what .length ();
23
49
}
@@ -40,11 +66,22 @@ public VerbalExpression build() {
40
66
* @param pValue - literal expression, not sanitized
41
67
* @return this builder
42
68
*/
43
- public Builder add (String pValue ) {
69
+ public Builder add (final String pValue ) {
44
70
this .source .append (pValue );
45
71
return this ;
46
72
}
47
73
74
+ /**
75
+ * Append a regex from builder and wrap it with unnamed group (?: ... )
76
+ *
77
+ * @param regex - VerbalExpression.Builder, that not changed
78
+ * @return this builder
79
+ * @since 1.2
80
+ */
81
+ public Builder add (final Builder regex ) {
82
+ return this .group ().add (regex .build ().toString ()).endGr ();
83
+ }
84
+
48
85
/**
49
86
* Enable or disable the expression to start at the beginning of the line
50
87
*
@@ -147,7 +184,7 @@ public Builder anything() {
147
184
* Add expression that matches anything, but not passed argument
148
185
*
149
186
* @param pValue - the string not to match
150
- * @return
187
+ * @return this builder
151
188
*/
152
189
public Builder anythingButNot (final String pValue ) {
153
190
return this .add ("(?:[^" + sanitize (pValue ) + "]*)" );
@@ -273,6 +310,12 @@ public Builder anyOf(final String pValue) {
273
310
return this ;
274
311
}
275
312
313
+ /**
314
+ * Shortcut to {@link #anyOf(String)}
315
+ *
316
+ * @param value - CharSequence every char from can be matched
317
+ * @return this builder
318
+ */
276
319
public Builder any (final String value ) {
277
320
return this .anyOf (value );
278
321
}
@@ -368,6 +411,15 @@ public Builder withAnyCase(final boolean pEnable) {
368
411
return this ;
369
412
}
370
413
414
+ /**
415
+ * Turn ON matching with ignoring case
416
+ * Example:
417
+ * // matches "a"
418
+ * // matches "A"
419
+ * regex().find("a").withAnyCase()
420
+ *
421
+ * @return this builder
422
+ */
371
423
public Builder withAnyCase () {
372
424
return withAnyCase (true );
373
425
}
@@ -381,16 +433,59 @@ public Builder searchOneLine(final boolean pEnable) {
381
433
return this ;
382
434
}
383
435
384
- public Builder multiple (final String pValue ) {
385
- switch (pValue .charAt (0 )) {
386
- case '*' :
387
- case '+' :
388
- return this .add (pValue );
436
+ /**
437
+ * Convenient method to show that string usage count is exact count, range count or simply one or more
438
+ * Usage:
439
+ * regex().multiply("abc") // Produce (?:abc)+
440
+ * regex().multiply("abc", null) // Produce (?:abc)+
441
+ * regex().multiply("abc", (int)from) // Produce (?:abc){from}
442
+ * regex().multiply("abc", (int)from, (int)to) // Produce (?:abc){from, to}
443
+ * regex().multiply("abc", (int)from, (int)to, (int)...) // Produce (?:abc)+
444
+ *
445
+ * @param pValue - the string to be looked for
446
+ * @param count - (optional) if passed one or two numbers, it used to show count or range count
447
+ * @return this builder
448
+ * @see #oneOrMore()
449
+ * @see #then(String)
450
+ * @see #zeroOrMore()
451
+ */
452
+ public Builder multiple (final String pValue , final int ... count ) {
453
+ if (count == null ) {
454
+ return this .then (pValue ).oneOrMore ();
455
+ }
456
+ switch (count .length ) {
457
+ case 1 :
458
+ return this .then (pValue ).count (count [0 ]);
459
+ case 2 :
460
+ return this .then (pValue ).count (count [0 ], count [1 ]);
389
461
default :
390
- return this .add ( this . sanitize ( pValue ) + '+' );
462
+ return this .then ( pValue ). oneOrMore ( );
391
463
}
392
464
}
393
465
466
+ /**
467
+ * Adds "+" char to regexp
468
+ * Same effect as {@link #atLeast(int)} with "1" argument
469
+ * Also, used by {@link #multiple(String, int...)} when second argument is null, or have length more than 2
470
+ *
471
+ * @return this builder
472
+ * @since 1.2
473
+ */
474
+ public Builder oneOrMore () {
475
+ return this .add ("+" );
476
+ }
477
+
478
+ /**
479
+ * Adds "*" char to regexp, means zero or more times repeated
480
+ * Same effect as {@link #atLeast(int)} with "0" argument
481
+ *
482
+ * @return this builder
483
+ * @since 1.2
484
+ */
485
+ public Builder zeroOrMore () {
486
+ return this .add ("*" );
487
+ }
488
+
394
489
/**
395
490
* Add count of previous group
396
491
* for example:
@@ -419,6 +514,22 @@ public Builder count(final int from, final int to) {
419
514
return this ;
420
515
}
421
516
517
+ /**
518
+ * Produce range count with only minimal number of occurrences
519
+ * for example:
520
+ * .find("w").atLeast(1) // produce (?:w){1,}
521
+ *
522
+ * @param from - minimal number of occurrences
523
+ * @return this Builder
524
+ * @see #count(int)
525
+ * @see #oneOrMore()
526
+ * @see #zeroOrMore()
527
+ * @since 1.2
528
+ */
529
+ public Builder atLeast (final int from ) {
530
+ return this .add ("{" ).add (valueOf (from )).add (",}" );
531
+ }
532
+
422
533
/**
423
534
* Add a alternative expression to be matched
424
535
*
@@ -452,6 +563,34 @@ public Builder capture() {
452
563
return this .add ("(" );
453
564
}
454
565
566
+ /**
567
+ * Shortcut for {@link #capture()}
568
+ *
569
+ * @return this builder
570
+ * @since 1.2
571
+ */
572
+ public Builder capt () {
573
+ return this .capture ();
574
+ }
575
+
576
+ /**
577
+ * Same as {@link #capture()}, but don't save result
578
+ * May be used to set count of duplicated captures, without creating a new saved capture
579
+ * Example:
580
+ * // Without group() - count(2) applies only to second capture
581
+ * regex().group()
582
+ * .capt().range("0", "1").endCapt().tab()
583
+ * .capt().digit().count(5).endCapt()
584
+ * .endGr().count(2);
585
+ *
586
+ * @return this builder
587
+ * @since 1.2
588
+ */
589
+ public Builder group () {
590
+ this .suffixes .append (")" );
591
+ return this .add ("(?:" );
592
+ }
593
+
455
594
/**
456
595
* Close brace for previous capture and remove last closed brace from suffixes
457
596
* Can be used to continue build regex after capture or to add multiply captures
@@ -463,12 +602,41 @@ public Builder endCapture() {
463
602
this .suffixes .setLength (suffixes .length () - 1 );
464
603
return this .add (")" );
465
604
} else {
466
- throw new IllegalStateException ("Can't end capture when it not started" );
605
+ throw new IllegalStateException ("Can't end capture (group) when it not started" );
467
606
}
468
607
}
469
- }
470
608
609
+ /**
610
+ * Shortcut for {@link #endCapture()}
611
+ *
612
+ * @return this builder
613
+ * @since 1.2
614
+ */
615
+ public Builder endCapt () {
616
+ return this .endCapture ();
617
+ }
471
618
619
+ /**
620
+ * Closes current unnamed and unmatching group
621
+ * Shortcut for {@link #endCapture()}
622
+ * Use it with {@link #group()} for prettify code
623
+ * Example:
624
+ * regex().group().maybe("word").count(2).endGr()
625
+ *
626
+ * @return this builder
627
+ * @since 1.2
628
+ */
629
+ public Builder endGr () {
630
+ return this .endCapture ();
631
+ }
632
+ }
633
+
634
+ /**
635
+ * Use builder {@link #regex()} (or {@link #regex(ru.lanwen.verbalregex.VerbalExpression.Builder)})
636
+ * to create new instance of VerbalExpression
637
+ *
638
+ * @param pattern - {@link java.util.regex.Pattern} that constructed by builder
639
+ */
472
640
private VerbalExpression (final Pattern pattern ) {
473
641
this .pattern = pattern ;
474
642
}
@@ -518,6 +686,7 @@ public String getText(final String toTest) {
518
686
* @param toTest - string to extract from
519
687
* @param group - group to extract
520
688
* @return extracted group
689
+ * @since 1.1
521
690
*/
522
691
public String getText (final String toTest , final int group ) {
523
692
Matcher m = pattern .matcher (toTest );
@@ -539,6 +708,7 @@ public String toString() {
539
708
*
540
709
* @param pBuilder - instance to clone
541
710
* @return new VerbalExpression.Builder copied from passed
711
+ * @since 1.1
542
712
*/
543
713
public static Builder regex (final Builder pBuilder ) {
544
714
Builder builder = new Builder ();
@@ -555,6 +725,7 @@ public static Builder regex(final Builder pBuilder) {
555
725
* Creates new instance of VerbalExpression builder
556
726
*
557
727
* @return new VerbalExpression.Builder
728
+ * @since 1.1
558
729
*/
559
730
public static Builder regex () {
560
731
return new Builder ();
0 commit comments