Skip to content

Commit

Permalink
Add sitemap extraction to default profile
Browse files Browse the repository at this point in the history
  • Loading branch information
kristinn committed Jul 5, 2019
1 parent ba8f669 commit 308cee8
Showing 1 changed file with 9 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,11 @@ http://example.example/example
</bean>
<bean id="extractorHttp" class="org.archive.modules.extractor.ExtractorHTTP">
</bean>
<bean id="extractorRobotsTxt" class="org.archive.modules.extractor.ExtractorRobotsTxt">
</bean>
<bean id="extractorSitemap" class="org.archive.modules.extractor.ExtractorSitemap">
</bean>

<bean id="extractorHtml" class="org.archive.modules.extractor.ExtractorHTML">
<!-- <property name="extractJavascript" value="true" /> -->
<!-- <property name="extractValueAttributes" value="true" /> -->
Expand Down Expand Up @@ -320,6 +325,10 @@ http://example.example/example
<ref bean="fetchHttp"/>
<!-- ...extract outlinks from HTTP headers... -->
<ref bean="extractorHttp"/>
<!-- ...extract sitemap urls from robots.txt... -->
<ref bean="extractorRobotsTxt"/>
<!-- ...extract links from sitemaps... -->
<ref bean="extractorSitemap"/>
<!-- ...extract outlinks from HTML content... -->
<ref bean="extractorHtml"/>
<!-- ...extract outlinks from CSS content... -->
Expand Down

0 comments on commit 308cee8

Please sign in to comment.