File tree Expand file tree Collapse file tree 1 file changed +9
-0
lines changed
engine/src/main/resources/org/archive/crawler/restlet Expand file tree Collapse file tree 1 file changed +9
-0
lines changed Original file line number Diff line number Diff line change @@ -288,6 +288,11 @@ http://example.example/example
288
288
</bean >
289
289
<bean id =" extractorHttp" class =" org.archive.modules.extractor.ExtractorHTTP" >
290
290
</bean >
291
+ <bean id =" extractorRobotsTxt" class =" org.archive.modules.extractor.ExtractorRobotsTxt" >
292
+ </bean >
293
+ <bean id =" extractorSitemap" class =" org.archive.modules.extractor.ExtractorSitemap" >
294
+ </bean >
295
+
291
296
<bean id =" extractorHtml" class =" org.archive.modules.extractor.ExtractorHTML" >
292
297
<!-- <property name="extractJavascript" value="true" /> -->
293
298
<!-- <property name="extractValueAttributes" value="true" /> -->
@@ -320,6 +325,10 @@ http://example.example/example
320
325
<ref bean =" fetchHttp" />
321
326
<!-- ...extract outlinks from HTTP headers... -->
322
327
<ref bean =" extractorHttp" />
328
+ <!-- ...extract sitemap urls from robots.txt... -->
329
+ <ref bean =" extractorRobotsTxt" />
330
+ <!-- ...extract links from sitemaps... -->
331
+ <ref bean =" extractorSitemap" />
323
332
<!-- ...extract outlinks from HTML content... -->
324
333
<ref bean =" extractorHtml" />
325
334
<!-- ...extract outlinks from CSS content... -->
You can’t perform that action at this time.
0 commit comments