Skip to content

Commit

Permalink
Only copy source tag if not null.
Browse files Browse the repository at this point in the history
  • Loading branch information
Kristinn Sigurdsson committed Apr 20, 2021
1 parent 308cee8 commit 52093eb
Showing 1 changed file with 4 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,6 @@

import org.apache.commons.httpclient.URIException;
import org.archive.modules.CrawlURI;
import org.archive.modules.extractor.ContentExtractor;
import org.archive.modules.extractor.Hop;
import org.archive.modules.extractor.LinkContext;

public class ExtractorRobotsTxt extends ContentExtractor {
private static final Logger LOGGER = Logger
Expand Down Expand Up @@ -65,8 +62,10 @@ protected boolean innerExtract(CrawlURI curi) {
curi.getViaContext());

// Also copy the source over:
curiClone.setSourceTag(curi.getSourceTag());

if (curi.getSourceTag() != null) {
curiClone.setSourceTag(curi.getSourceTag());
}

// Parse the robots for the sitemaps.
List<String> links = parseRobotsTxt(
curi.getRecorder()
Expand Down

0 comments on commit 52093eb

Please sign in to comment.