Skip to content

Commit 4f4ab88

Browse files
committed
WIP 276 Add excludes to CLI and Maven plugin
1 parent bc02849 commit 4f4ab88

File tree

6 files changed

+61
-4
lines changed

6 files changed

+61
-4
lines changed

htmlSanityCheck-cli/README.adoc

+10
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,16 @@ Default:
110110
include::../htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/check/AllCheckers.java[tag=checker-classes,indent=0]
111111
----
112112

113+
`--excludes` (optional):: This is a set of regular expressions of URLs or even hosts that HSC should not check.
114+
+
115+
Type: Set.
116+
+
117+
Default: Empty list
118+
+
119+
[source,groovy]
120+
----
121+
[]
122+
----
113123

114124
`--httpWarningCodes` (optional):: HTTP response codes treated as warning.
115125
+

htmlSanityCheck-cli/src/main/groovy/org/aim42/htmlsanitycheck/cli/HscCommand.groovy

+4
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,9 @@ class HscCommand implements Runnable {
8888
@Option(names = ["-s", "--suffix"], description = "File name suffixes to investigate", split = ",")
8989
String[] suffixes = ["html", "htm"]
9090

91+
@Option(names = ["-e", "--exclude"], description = "Exclude remote patterns to check", split = ',')
92+
String[] excludes = []
93+
9194
@Parameters(index = "0", arity = "0..1", description = "base directory (default: current directory)")
9295
File srcDir = new File(".").getAbsoluteFile()
9396

@@ -172,6 +175,7 @@ class HscCommand implements Runnable {
172175
.sourceDocuments(srcDocuments as Set)
173176
.checkingResultsDir(resultsDirectory)
174177
.checksToExecute(AllCheckers.CHECKER_CLASSES)
178+
.excludes(hscCommand.excludes as Set)
175179
.build()
176180

177181
// if we have no valid configuration, abort with exception

htmlSanityCheck-cli/src/test/groovy/org/aim42/htmlsanitycheck/cli/HscCommandSpec.groovy

+9-3
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,13 @@ import spock.lang.Unroll
88

99
class HscCommandSpec extends Specification {
1010

11-
private final static VALID_HTML = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"><html><head></head><body></body><html>"""
11+
private final static VALID_HTML = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN">
12+
<html>
13+
<head></head>
14+
<body>
15+
This <a href="https://tld.invalid/">Invalid TLD</a> should not make a problem!
16+
</body>
17+
<html>"""
1218
private final static INVALID_HTML = """<body><span id="id"/><span id="id"/></body> """
1319

1420
@Rule
@@ -86,7 +92,7 @@ class HscCommandSpec extends Specification {
8692
if (args) {
8793
mainArgs.add(args)
8894
}
89-
mainArgs.add(testProjectDir.root)
95+
mainArgs.add(testProjectDir.root as String)
9096

9197
when:
9298
HscCommand.main(mainArgs as String[])
@@ -112,7 +118,7 @@ class HscCommandSpec extends Specification {
112118
def "test with valid HTML file"() {
113119
given:
114120
htmlFile << VALID_HTML
115-
String[] args = ["-r", testResultsDir.root, testProjectDir.root]
121+
String[] args = ["-e", "^.*\\.invalid.*", "-r", testResultsDir.root, testProjectDir.root]
116122

117123
when:
118124
HscCommand.main(args)

htmlSanityCheck-maven-plugin/README.adoc

+11
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,17 @@ Default: All available checker classes.
130130
include::../htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/check/AllCheckers.java[tag=checker-classes,indent=0]
131131
----
132132

133+
`excludes` (optional):: This is a set of regular expressions of URLs or even hosts that HSC should not check.
134+
+
135+
Type: Set.
136+
+
137+
Default: Empty list
138+
+
139+
[source,groovy]
140+
----
141+
[]
142+
----
143+
133144
`httpWarningCodes` (optional):: Additional HTTP response codes treated as warning.
134145
+
135146
Type: List.

htmlSanityCheck-maven-plugin/src/main/java/org/aim42/htmlsanitycheck/maven/HtmlSanityCheckMojo.java

+17
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import java.io.File;
1717
import java.io.IOException;
1818
import java.util.Collections;
19+
import java.util.HashSet;
1920
import java.util.List;
2021
import java.util.Set;
2122

@@ -183,6 +184,20 @@ public class HtmlSanityCheckMojo extends AbstractMojo {
183184
@Parameter
184185
private List<Class<? extends Checker>> checkerClasses = AllCheckers.CHECKER_CLASSES;
185186

187+
/**
188+
* (optional)
189+
* Provides a set of patterns to exclude certain URLs (hosts) from being processed.
190+
* <p>
191+
* Patterns can include wildcards and will be matched against the hrefs.
192+
* Useful for excluding hosts that frequently run into problems (temporarily not reachable etc.).
193+
* <p>
194+
* Type: Set of Strings.
195+
* <p></p>
196+
* Default: Empty set (no exclusions).
197+
*/
198+
@Parameter
199+
private Set<String> excludes = new HashSet<>();
200+
186201
static PerRunResults performChecks(Configuration myConfig) throws MojoExecutionException {
187202
try {
188203
AllChecksRunner allChecksRunner = new AllChecksRunner(myConfig);
@@ -249,10 +264,12 @@ void logBuildParameter(Configuration myConfig) {
249264
getLog().info("Results dir : " + myConfig.getCheckingResultsDir());
250265
getLog().info("JUnit dir : " + myConfig.getJunitResultsDir());
251266
getLog().info("Fail on errors : " + myConfig.getFailOnErrors());
267+
getLog().info("Excludes : " + myConfig.getExcludes());
252268
}
253269

254270
protected Configuration setupConfiguration() {
255271
Configuration result = Configuration.builder()
272+
.excludes(excludes)
256273
.sourceDocuments(sourceDocuments)
257274
.sourceDir(sourceDir)
258275
.checkingResultsDir(checkingResultsDir)

htmlSanityCheck-maven-plugin/src/test/java/org/aim42/htmlsanitycheck/maven/HtmlSanityCheckMojoTest.java

+10-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,13 @@
1919

2020
class HtmlSanityCheckMojoTest {
2121

22-
static final String VALID_HTML = "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\"><html><head></head><body></body><html>";
22+
static final String VALID_HTML = "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\"> " +
23+
"<html> " +
24+
" <head></head> " +
25+
" <body>" +
26+
" This <a href=\"https://tld.invalid/\">Invalid TLD</a> should not make a problem! " +
27+
" </body> " +
28+
"<html>";
2329

2430
@Test
2531
void setupConfiguration() {
@@ -162,13 +168,16 @@ void execuuserte() throws IOException, MojoExecutionException {
162168
Files.write(sourceFile.toPath(), VALID_HTML.getBytes(StandardCharsets.UTF_8));
163169
Set<File> fileset = new HashSet<>();
164170
fileset.add(sourceFile);
171+
Set<String> excludes = new HashSet<>();
172+
excludes.add("^.*\\.invalid.*");
165173

166174
Configuration myConfig = Configuration.builder()
167175
.checksToExecute(AllCheckers.CHECKER_CLASSES)
168176
.junitResultsDir(junitDir.toFile())
169177
.checkingResultsDir(resultDir.toFile())
170178
.sourceDir(sourceDir.toFile())
171179
.sourceDocuments(fileset)
180+
.excludes(excludes)
172181
.build();
173182
HtmlSanityCheckMojo mojo = new HtmlSanityCheckMojo();
174183

0 commit comments

Comments
 (0)