diff --git a/src/main/groovy/io/seqera/wave/filter/DenyCrawlerFilter.groovy b/src/main/groovy/io/seqera/wave/filter/DenyCrawlerFilter.groovy index 715ac4e34..add0e3c39 100644 --- a/src/main/groovy/io/seqera/wave/filter/DenyCrawlerFilter.groovy +++ b/src/main/groovy/io/seqera/wave/filter/DenyCrawlerFilter.groovy @@ -63,7 +63,7 @@ class DenyCrawlerFilter implements HttpServerFilter { Publisher> doFilter(HttpRequest request, ServerFilterChain chain) { final userAgent = request.getHeaders().get("User-Agent")?.toLowerCase() // Check if the request path matches any of the ignored paths - if (isCrawler(userAgent)) { + if (isCrawler(userAgent) && request.path!='/robots.txt') { // Return immediately without processing the request log.warn("Request denied [${request.methodName}] ${request.uri}\n- Headers:${RegHelper.dumpHeaders(request)}") return Flux.just(HttpResponse.status(HttpStatus.METHOD_NOT_ALLOWED)) diff --git a/src/test/groovy/io/seqera/wave/filter/DenyCrawlerFilterTest.groovy b/src/test/groovy/io/seqera/wave/filter/DenyCrawlerFilterTest.groovy new file mode 100644 index 000000000..c68b51454 --- /dev/null +++ b/src/test/groovy/io/seqera/wave/filter/DenyCrawlerFilterTest.groovy @@ -0,0 +1,58 @@ +/* + * Wave, containers provisioning service + * Copyright (c) 2023-2024, Seqera Labs + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +package io.seqera.wave.filter + +import spock.lang.Specification + +import io.micronaut.http.HttpRequest +import io.micronaut.http.client.HttpClient +import io.micronaut.http.client.annotation.Client +import io.micronaut.http.client.exceptions.HttpClientResponseException +import io.micronaut.test.extensions.spock.annotation.MicronautTest +import jakarta.inject.Inject + +/** + * + * @author Paolo Di Tommaso + */ +@MicronautTest +class DenyCrawlerFilterTest extends Specification { + + @Inject + @Client("/") + HttpClient client + + def 'should allow robots.txt' () { + when: + def request = HttpRequest.GET("/robots.txt").header("User-Agent", "Googlebot") + def resp = client.toBlocking().exchange(request, String) + then: + resp.status.code == 200 + } + + def 'should disallow anything else' () { + when: + def request = HttpRequest.GET("/service-info").header("User-Agent", "Googlebot") + client.toBlocking().exchange(request, String) + then: + HttpClientResponseException e = thrown(HttpClientResponseException) + e.status.code == 405 + } + +}