diff --git a/src/main/groovy/io/seqera/wave/controller/ServiceInfoController.groovy b/src/main/groovy/io/seqera/wave/controller/ServiceInfoController.groovy
index 85d43dd4a..80ecd495f 100644
--- a/src/main/groovy/io/seqera/wave/controller/ServiceInfoController.groovy
+++ b/src/main/groovy/io/seqera/wave/controller/ServiceInfoController.groovy
@@ -59,7 +59,7 @@ class ServiceInfoController {
: HttpResponse.badRequest()
}
- @Get(uri = "/openapi")
+ @Get("/openapi")
HttpResponse getOpenAPI() {
HttpResponse.redirect(URI.create("/openapi/"))
}
diff --git a/src/main/groovy/io/seqera/wave/filter/DenyCrawlerFilter.groovy b/src/main/groovy/io/seqera/wave/filter/DenyCrawlerFilter.groovy
new file mode 100644
index 000000000..593eedd4b
--- /dev/null
+++ b/src/main/groovy/io/seqera/wave/filter/DenyCrawlerFilter.groovy
@@ -0,0 +1,78 @@
+/*
+ * Wave, containers provisioning service
+ * Copyright (c) 2023-2024, Seqera Labs
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+
+package io.seqera.wave.filter
+
+import groovy.transform.CompileStatic
+import groovy.util.logging.Slf4j
+import io.micronaut.http.HttpRequest
+import io.micronaut.http.HttpResponse
+import io.micronaut.http.HttpStatus
+import io.micronaut.http.MutableHttpResponse
+import io.micronaut.http.annotation.Filter
+import io.micronaut.http.filter.HttpServerFilter
+import io.micronaut.http.filter.ServerFilterChain
+import org.reactivestreams.Publisher
+import reactor.core.publisher.Flux
+/**
+ * Block the access to known crawler bots
+ *
+ * @author Paolo Di Tommaso
+ */
+@Slf4j
+@CompileStatic
+@Filter("/**")
+class DenyCrawlerFilter implements HttpServerFilter {
+
+ private static final List CRAWLER_AGENTS = Arrays.asList(
+ "googlebot",
+ "bingbot",
+ "yandexbot",
+ "baiduspider",
+ "duckduckbot",
+ "slurp",
+ "facebot",
+ "twitterbot",
+ "mj12bot",
+ "ahrefsbot"
+ )
+
+ static boolean isCrawler(String userAgent) {
+ return userAgent
+ ? CRAWLER_AGENTS.stream().anyMatch(userAgent::contains)
+ : false
+ }
+
+ @Override
+ Publisher> doFilter(HttpRequest> request, ServerFilterChain chain) {
+ final userAgent = request.getHeaders().get("User-Agent")?.toLowerCase()
+ // Check if the request path matches any of the ignored paths
+ if (isCrawler(userAgent)) {
+ // Return immediately without processing the request
+ log.debug("Request denied: ${request}")
+ return Flux.just(HttpResponse.status(HttpStatus.METHOD_NOT_ALLOWED))
+ }
+ // Continue processing the request
+ return chain.proceed(request)
+ }
+
+ @Override
+ int getOrder() {
+ return FilterOrder.DENY_CRAWLER
+ }
+}
diff --git a/src/main/groovy/io/seqera/wave/filter/FilterOrder.groovy b/src/main/groovy/io/seqera/wave/filter/FilterOrder.groovy
index 057d4fcca..0c8919690 100644
--- a/src/main/groovy/io/seqera/wave/filter/FilterOrder.groovy
+++ b/src/main/groovy/io/seqera/wave/filter/FilterOrder.groovy
@@ -27,6 +27,7 @@ package io.seqera.wave.filter
*/
interface FilterOrder {
+ final int DENY_CRAWLER = -110
final int DENY_PATHS = -100
final int RATE_LIMITER = -50
final int PULL_METRICS = 10
diff --git a/src/main/groovy/io/seqera/wave/service/data/stream/MessageStream.groovy b/src/main/groovy/io/seqera/wave/service/data/stream/MessageStream.groovy
index cb77741b4..5f0ae3d91 100644
--- a/src/main/groovy/io/seqera/wave/service/data/stream/MessageStream.groovy
+++ b/src/main/groovy/io/seqera/wave/service/data/stream/MessageStream.groovy
@@ -28,7 +28,7 @@ interface MessageStream {
/**
* Initialize the stream with the given Id
*
- * @param streamId The uniqur ID of the stream to be initialized
+ * @param streamId The unique ID of the stream to be initialized
*/
void init(String streamId)
diff --git a/src/test/groovy/io/seqera/wave/controller/InspectControllerTest.groovy b/src/test/groovy/io/seqera/wave/controller/InspectControllerTest.groovy
index 98788cf6c..de1324258 100644
--- a/src/test/groovy/io/seqera/wave/controller/InspectControllerTest.groovy
+++ b/src/test/groovy/io/seqera/wave/controller/InspectControllerTest.groovy
@@ -29,7 +29,6 @@ import io.micronaut.test.annotation.MockBean
import io.micronaut.test.extensions.spock.annotation.MicronautTest
import io.seqera.wave.api.ContainerInspectRequest
import io.seqera.wave.api.ContainerInspectResponse
-import io.seqera.wave.exception.BadRequestException
import io.seqera.wave.service.logs.BuildLogService
import io.seqera.wave.service.logs.BuildLogServiceImpl
import jakarta.inject.Inject
diff --git a/src/test/groovy/io/seqera/wave/controller/ScanControllerTest.groovy b/src/test/groovy/io/seqera/wave/controller/ScanControllerTest.groovy
index 5677bf77a..013132a62 100644
--- a/src/test/groovy/io/seqera/wave/controller/ScanControllerTest.groovy
+++ b/src/test/groovy/io/seqera/wave/controller/ScanControllerTest.groovy
@@ -92,11 +92,10 @@ class ScanControllerTest extends Specification {
res.body().requestId == scan.requestId
}
-
def "should return 404 and null"() {
when:
def req = HttpRequest.GET("/v1alpha1/scans/unknown")
- def res = client.toBlocking().exchange(req, WaveScanRecord)
+ client.toBlocking().exchange(req, WaveScanRecord)
then:
def e = thrown(HttpClientResponseException)
diff --git a/src/test/groovy/io/seqera/wave/controller/ServiceInfoControllerTest.groovy b/src/test/groovy/io/seqera/wave/controller/ServiceInfoControllerTest.groovy
index c79ef7d91..dd3c7795a 100644
--- a/src/test/groovy/io/seqera/wave/controller/ServiceInfoControllerTest.groovy
+++ b/src/test/groovy/io/seqera/wave/controller/ServiceInfoControllerTest.groovy
@@ -20,25 +20,61 @@ package io.seqera.wave.controller
import spock.lang.Specification
-import io.micronaut.http.HttpResponse
+import io.micronaut.http.HttpRequest
import io.micronaut.http.HttpStatus
-
+import io.micronaut.http.client.DefaultHttpClientConfiguration
+import io.micronaut.http.client.HttpClient
+import io.micronaut.http.client.annotation.Client
+import io.micronaut.http.client.exceptions.HttpClientResponseException
+import io.micronaut.runtime.server.EmbeddedServer
+import io.micronaut.test.extensions.spock.annotation.MicronautTest
+import jakarta.inject.Inject
/**
*
* @author Munish Chouhan
*/
+@MicronautTest
class ServiceInfoControllerTest extends Specification {
+ @Inject
+ @Client("/")
+ HttpClient client
+
+ @Inject
+ EmbeddedServer embeddedServer;
+
+ def 'should get service info' () {
+ when:
+ def request = HttpRequest.GET("/service-info")
+ def resp = client.toBlocking().exchange(request, String)
+ then:
+ resp.status.code == 200
+ }
+
+ def 'should deny service info' () {
+ when:
+ def request = HttpRequest.GET("/service-info").header('User-Agent','Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)')
+ client.toBlocking().exchange(request, String)
+ then:
+ def e = thrown(HttpClientResponseException)
+ e.status == HttpStatus.METHOD_NOT_ALLOWED
+ }
+
def 'should redirect to /openapi/'() {
given:
- def controller = new ServiceInfoController()
-
+ def uri = embeddedServer.getContextURI()
+ and:
+ // Create a new HttpClient with redirects disabled
+ def config = new DefaultHttpClientConfiguration()
+ config.setFollowRedirects(false)
+ def client = HttpClient.create(uri.toURL(), config)
when:
- HttpResponse response = controller.getOpenAPI()
+ def request = HttpRequest.GET("/openapi")
+ def resp = client.toBlocking().exchange(request, String)
then:
- response.status == HttpStatus.MOVED_PERMANENTLY
- response.header('Location') == '/openapi/'
+ resp.status == HttpStatus.MOVED_PERMANENTLY // Expect 301
+ resp.headers.get("Location") == "/openapi/" // Validate redirect location
}
}