Skip to content

Commit 52093eb

Browse files
author
Kristinn Sigurdsson
committed
Only copy source tag if not null.
1 parent 308cee8 commit 52093eb

File tree

1 file changed

+4
-5
lines changed

1 file changed

+4
-5
lines changed

modules/src/main/java/org/archive/modules/extractor/ExtractorRobotsTxt.java

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,6 @@
1212

1313
import org.apache.commons.httpclient.URIException;
1414
import org.archive.modules.CrawlURI;
15-
import org.archive.modules.extractor.ContentExtractor;
16-
import org.archive.modules.extractor.Hop;
17-
import org.archive.modules.extractor.LinkContext;
1815

1916
public class ExtractorRobotsTxt extends ContentExtractor {
2017
private static final Logger LOGGER = Logger
@@ -65,8 +62,10 @@ protected boolean innerExtract(CrawlURI curi) {
6562
curi.getViaContext());
6663

6764
// Also copy the source over:
68-
curiClone.setSourceTag(curi.getSourceTag());
69-
65+
if (curi.getSourceTag() != null) {
66+
curiClone.setSourceTag(curi.getSourceTag());
67+
}
68+
7069
// Parse the robots for the sitemaps.
7170
List<String> links = parseRobotsTxt(
7271
curi.getRecorder()

0 commit comments

Comments
 (0)