Skip to content

Commit d25272b

Browse files
committed
Merge pull request #27 from VerbalExpressions/1.2
This PR adds: add - method to add another regex builder to current regex add - unnamed group method and shortcuts for capture* methods rft - non backward compatibility - make Builder constructor package-private rft - non backward compatibility - change multiply method same as in original JS implementation add - oneOrMore, zeroOrMore and atLeast methods In tests: rft - replace assertTrue and assertFalse with assertThat with matcher add - one more complex example
2 parents 88884ab + 65e42b2 commit d25272b

File tree

8 files changed

+574
-99
lines changed

8 files changed

+574
-99
lines changed

src/main/java/ru/lanwen/verbalregex/VerbalExpression.java

+181-10
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import java.util.regex.Matcher;
44
import java.util.regex.Pattern;
55

6+
import static java.lang.String.valueOf;
7+
68
public class VerbalExpression {
79

810
private final Pattern pattern;
@@ -14,10 +16,34 @@ public static class Builder {
1416
private StringBuilder suffixes = new StringBuilder();
1517
private int modifiers = Pattern.MULTILINE;
1618

19+
/**
20+
* Package private. Use {@link #regex()} to build a new one
21+
*
22+
* @since 1.2
23+
*/
24+
Builder() {
25+
}
26+
27+
/**
28+
* Escapes any non-word char with two backslashes
29+
* used by any method, except {@link #add(String)}
30+
*
31+
* @param pValue - the string for char escaping
32+
* @return sanitized string value
33+
*/
1734
private String sanitize(final String pValue) {
1835
return pValue.replaceAll("[\\W]", "\\\\$0");
1936
}
2037

38+
/**
39+
* Counts occurrences of some substring in whole string
40+
* Same as org.apache.commons.lang3.StringUtils#countMatches(String, java.lang.String)
41+
* by effect. Used to count braces for {@link #or(String)} method
42+
*
43+
* @param where - where to find
44+
* @param what - what needs to count matches
45+
* @return 0 if nothing found, count of occurrences instead
46+
*/
2147
private int countOccurrencesOf(String where, String what) {
2248
return (where.length() - where.replace(what, "").length()) / what.length();
2349
}
@@ -40,11 +66,22 @@ public VerbalExpression build() {
4066
* @param pValue - literal expression, not sanitized
4167
* @return this builder
4268
*/
43-
public Builder add(String pValue) {
69+
public Builder add(final String pValue) {
4470
this.source.append(pValue);
4571
return this;
4672
}
4773

74+
/**
75+
* Append a regex from builder and wrap it with unnamed group (?: ... )
76+
*
77+
* @param regex - VerbalExpression.Builder, that not changed
78+
* @return this builder
79+
* @since 1.2
80+
*/
81+
public Builder add(final Builder regex) {
82+
return this.group().add(regex.build().toString()).endGr();
83+
}
84+
4885
/**
4986
* Enable or disable the expression to start at the beginning of the line
5087
*
@@ -147,7 +184,7 @@ public Builder anything() {
147184
* Add expression that matches anything, but not passed argument
148185
*
149186
* @param pValue - the string not to match
150-
* @return
187+
* @return this builder
151188
*/
152189
public Builder anythingButNot(final String pValue) {
153190
return this.add("(?:[^" + sanitize(pValue) + "]*)");
@@ -273,6 +310,12 @@ public Builder anyOf(final String pValue) {
273310
return this;
274311
}
275312

313+
/**
314+
* Shortcut to {@link #anyOf(String)}
315+
*
316+
* @param value - CharSequence every char from can be matched
317+
* @return this builder
318+
*/
276319
public Builder any(final String value) {
277320
return this.anyOf(value);
278321
}
@@ -368,6 +411,15 @@ public Builder withAnyCase(final boolean pEnable) {
368411
return this;
369412
}
370413

414+
/**
415+
* Turn ON matching with ignoring case
416+
* Example:
417+
* // matches "a"
418+
* // matches "A"
419+
* regex().find("a").withAnyCase()
420+
*
421+
* @return this builder
422+
*/
371423
public Builder withAnyCase() {
372424
return withAnyCase(true);
373425
}
@@ -381,16 +433,59 @@ public Builder searchOneLine(final boolean pEnable) {
381433
return this;
382434
}
383435

384-
public Builder multiple(final String pValue) {
385-
switch (pValue.charAt(0)) {
386-
case '*':
387-
case '+':
388-
return this.add(pValue);
436+
/**
437+
* Convenient method to show that string usage count is exact count, range count or simply one or more
438+
* Usage:
439+
* regex().multiply("abc") // Produce (?:abc)+
440+
* regex().multiply("abc", null) // Produce (?:abc)+
441+
* regex().multiply("abc", (int)from) // Produce (?:abc){from}
442+
* regex().multiply("abc", (int)from, (int)to) // Produce (?:abc){from, to}
443+
* regex().multiply("abc", (int)from, (int)to, (int)...) // Produce (?:abc)+
444+
*
445+
* @param pValue - the string to be looked for
446+
* @param count - (optional) if passed one or two numbers, it used to show count or range count
447+
* @return this builder
448+
* @see #oneOrMore()
449+
* @see #then(String)
450+
* @see #zeroOrMore()
451+
*/
452+
public Builder multiple(final String pValue, final int... count) {
453+
if (count == null) {
454+
return this.then(pValue).oneOrMore();
455+
}
456+
switch (count.length) {
457+
case 1:
458+
return this.then(pValue).count(count[0]);
459+
case 2:
460+
return this.then(pValue).count(count[0], count[1]);
389461
default:
390-
return this.add(this.sanitize(pValue) + '+');
462+
return this.then(pValue).oneOrMore();
391463
}
392464
}
393465

466+
/**
467+
* Adds "+" char to regexp
468+
* Same effect as {@link #atLeast(int)} with "1" argument
469+
* Also, used by {@link #multiple(String, int...)} when second argument is null, or have length more than 2
470+
*
471+
* @return this builder
472+
* @since 1.2
473+
*/
474+
public Builder oneOrMore() {
475+
return this.add("+");
476+
}
477+
478+
/**
479+
* Adds "*" char to regexp, means zero or more times repeated
480+
* Same effect as {@link #atLeast(int)} with "0" argument
481+
*
482+
* @return this builder
483+
* @since 1.2
484+
*/
485+
public Builder zeroOrMore() {
486+
return this.add("*");
487+
}
488+
394489
/**
395490
* Add count of previous group
396491
* for example:
@@ -419,6 +514,22 @@ public Builder count(final int from, final int to) {
419514
return this;
420515
}
421516

517+
/**
518+
* Produce range count with only minimal number of occurrences
519+
* for example:
520+
* .find("w").atLeast(1) // produce (?:w){1,}
521+
*
522+
* @param from - minimal number of occurrences
523+
* @return this Builder
524+
* @see #count(int)
525+
* @see #oneOrMore()
526+
* @see #zeroOrMore()
527+
* @since 1.2
528+
*/
529+
public Builder atLeast(final int from) {
530+
return this.add("{").add(valueOf(from)).add(",}");
531+
}
532+
422533
/**
423534
* Add a alternative expression to be matched
424535
*
@@ -452,6 +563,34 @@ public Builder capture() {
452563
return this.add("(");
453564
}
454565

566+
/**
567+
* Shortcut for {@link #capture()}
568+
*
569+
* @return this builder
570+
* @since 1.2
571+
*/
572+
public Builder capt() {
573+
return this.capture();
574+
}
575+
576+
/**
577+
* Same as {@link #capture()}, but don't save result
578+
* May be used to set count of duplicated captures, without creating a new saved capture
579+
* Example:
580+
* // Without group() - count(2) applies only to second capture
581+
* regex().group()
582+
* .capt().range("0", "1").endCapt().tab()
583+
* .capt().digit().count(5).endCapt()
584+
* .endGr().count(2);
585+
*
586+
* @return this builder
587+
* @since 1.2
588+
*/
589+
public Builder group() {
590+
this.suffixes.append(")");
591+
return this.add("(?:");
592+
}
593+
455594
/**
456595
* Close brace for previous capture and remove last closed brace from suffixes
457596
* Can be used to continue build regex after capture or to add multiply captures
@@ -463,12 +602,41 @@ public Builder endCapture() {
463602
this.suffixes.setLength(suffixes.length() - 1);
464603
return this.add(")");
465604
} else {
466-
throw new IllegalStateException("Can't end capture when it not started");
605+
throw new IllegalStateException("Can't end capture (group) when it not started");
467606
}
468607
}
469-
}
470608

609+
/**
610+
* Shortcut for {@link #endCapture()}
611+
*
612+
* @return this builder
613+
* @since 1.2
614+
*/
615+
public Builder endCapt() {
616+
return this.endCapture();
617+
}
471618

619+
/**
620+
* Closes current unnamed and unmatching group
621+
* Shortcut for {@link #endCapture()}
622+
* Use it with {@link #group()} for prettify code
623+
* Example:
624+
* regex().group().maybe("word").count(2).endGr()
625+
*
626+
* @return this builder
627+
* @since 1.2
628+
*/
629+
public Builder endGr() {
630+
return this.endCapture();
631+
}
632+
}
633+
634+
/**
635+
* Use builder {@link #regex()} (or {@link #regex(ru.lanwen.verbalregex.VerbalExpression.Builder)})
636+
* to create new instance of VerbalExpression
637+
*
638+
* @param pattern - {@link java.util.regex.Pattern} that constructed by builder
639+
*/
472640
private VerbalExpression(final Pattern pattern) {
473641
this.pattern = pattern;
474642
}
@@ -518,6 +686,7 @@ public String getText(final String toTest) {
518686
* @param toTest - string to extract from
519687
* @param group - group to extract
520688
* @return extracted group
689+
* @since 1.1
521690
*/
522691
public String getText(final String toTest, final int group) {
523692
Matcher m = pattern.matcher(toTest);
@@ -539,6 +708,7 @@ public String toString() {
539708
*
540709
* @param pBuilder - instance to clone
541710
* @return new VerbalExpression.Builder copied from passed
711+
* @since 1.1
542712
*/
543713
public static Builder regex(final Builder pBuilder) {
544714
Builder builder = new Builder();
@@ -555,6 +725,7 @@ public static Builder regex(final Builder pBuilder) {
555725
* Creates new instance of VerbalExpression builder
556726
*
557727
* @return new VerbalExpression.Builder
728+
* @since 1.1
558729
*/
559730
public static Builder regex() {
560731
return new Builder();

0 commit comments

Comments
 (0)