Skip to content

Commit

Permalink
Add DenyCrawlerFilter (#803)
Browse files Browse the repository at this point in the history
Signed-off-by: Paolo Di Tommaso <[email protected]>
  • Loading branch information
pditommaso authored Feb 28, 2025
1 parent 3aec041 commit edfae00
Show file tree
Hide file tree
Showing 7 changed files with 125 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ class ServiceInfoController {
: HttpResponse.badRequest()
}

@Get(uri = "/openapi")
@Get("/openapi")
HttpResponse getOpenAPI() {
HttpResponse.redirect(URI.create("/openapi/"))
}
Expand Down
78 changes: 78 additions & 0 deletions src/main/groovy/io/seqera/wave/filter/DenyCrawlerFilter.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/*
* Wave, containers provisioning service
* Copyright (c) 2023-2024, Seqera Labs
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/

package io.seqera.wave.filter

import groovy.transform.CompileStatic
import groovy.util.logging.Slf4j
import io.micronaut.http.HttpRequest
import io.micronaut.http.HttpResponse
import io.micronaut.http.HttpStatus
import io.micronaut.http.MutableHttpResponse
import io.micronaut.http.annotation.Filter
import io.micronaut.http.filter.HttpServerFilter
import io.micronaut.http.filter.ServerFilterChain
import org.reactivestreams.Publisher
import reactor.core.publisher.Flux
/**
* Block the access to known crawler bots
*
* @author Paolo Di Tommaso <[email protected]>
*/
@Slf4j
@CompileStatic
@Filter("/**")
class DenyCrawlerFilter implements HttpServerFilter {

private static final List<String> CRAWLER_AGENTS = Arrays.asList(
"googlebot",
"bingbot",
"yandexbot",
"baiduspider",
"duckduckbot",
"slurp",
"facebot",
"twitterbot",
"mj12bot",
"ahrefsbot"
)

static boolean isCrawler(String userAgent) {
return userAgent
? CRAWLER_AGENTS.stream().anyMatch(userAgent::contains)
: false
}

@Override
Publisher<MutableHttpResponse<?>> doFilter(HttpRequest<?> request, ServerFilterChain chain) {
final userAgent = request.getHeaders().get("User-Agent")?.toLowerCase()
// Check if the request path matches any of the ignored paths
if (isCrawler(userAgent)) {
// Return immediately without processing the request
log.debug("Request denied: ${request}")
return Flux.just(HttpResponse.status(HttpStatus.METHOD_NOT_ALLOWED))
}
// Continue processing the request
return chain.proceed(request)
}

@Override
int getOrder() {
return FilterOrder.DENY_CRAWLER
}
}
1 change: 1 addition & 0 deletions src/main/groovy/io/seqera/wave/filter/FilterOrder.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ package io.seqera.wave.filter
*/
interface FilterOrder {

final int DENY_CRAWLER = -110
final int DENY_PATHS = -100
final int RATE_LIMITER = -50
final int PULL_METRICS = 10
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ interface MessageStream<M> {
/**
* Initialize the stream with the given Id
*
* @param streamId The uniqur ID of the stream to be initialized
* @param streamId The unique ID of the stream to be initialized
*/
void init(String streamId)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ import io.micronaut.test.annotation.MockBean
import io.micronaut.test.extensions.spock.annotation.MicronautTest
import io.seqera.wave.api.ContainerInspectRequest
import io.seqera.wave.api.ContainerInspectResponse
import io.seqera.wave.exception.BadRequestException
import io.seqera.wave.service.logs.BuildLogService
import io.seqera.wave.service.logs.BuildLogServiceImpl
import jakarta.inject.Inject
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,11 +92,10 @@ class ScanControllerTest extends Specification {
res.body().requestId == scan.requestId
}


def "should return 404 and null"() {
when:
def req = HttpRequest.GET("/v1alpha1/scans/unknown")
def res = client.toBlocking().exchange(req, WaveScanRecord)
client.toBlocking().exchange(req, WaveScanRecord)

then:
def e = thrown(HttpClientResponseException)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,25 +20,61 @@ package io.seqera.wave.controller

import spock.lang.Specification

import io.micronaut.http.HttpResponse
import io.micronaut.http.HttpRequest
import io.micronaut.http.HttpStatus

import io.micronaut.http.client.DefaultHttpClientConfiguration
import io.micronaut.http.client.HttpClient
import io.micronaut.http.client.annotation.Client
import io.micronaut.http.client.exceptions.HttpClientResponseException
import io.micronaut.runtime.server.EmbeddedServer
import io.micronaut.test.extensions.spock.annotation.MicronautTest
import jakarta.inject.Inject
/**
*
* @author Munish Chouhan <[email protected]>
*/
@MicronautTest
class ServiceInfoControllerTest extends Specification {

@Inject
@Client("/")
HttpClient client

@Inject
EmbeddedServer embeddedServer;

def 'should get service info' () {
when:
def request = HttpRequest.GET("/service-info")
def resp = client.toBlocking().exchange(request, String)
then:
resp.status.code == 200
}

def 'should deny service info' () {
when:
def request = HttpRequest.GET("/service-info").header('User-Agent','Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)')
client.toBlocking().exchange(request, String)
then:
def e = thrown(HttpClientResponseException)
e.status == HttpStatus.METHOD_NOT_ALLOWED
}

def 'should redirect to /openapi/'() {
given:
def controller = new ServiceInfoController()

def uri = embeddedServer.getContextURI()
and:
// Create a new HttpClient with redirects disabled
def config = new DefaultHttpClientConfiguration()
config.setFollowRedirects(false)
def client = HttpClient.create(uri.toURL(), config)
when:
HttpResponse response = controller.getOpenAPI()
def request = HttpRequest.GET("/openapi")
def resp = client.toBlocking().exchange(request, String)

then:
response.status == HttpStatus.MOVED_PERMANENTLY
response.header('Location') == '/openapi/'
resp.status == HttpStatus.MOVED_PERMANENTLY // Expect 301
resp.headers.get("Location") == "/openapi/" // Validate redirect location
}

}

0 comments on commit edfae00

Please sign in to comment.