Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add DenyCrawlerFilter #803

Merged
merged 1 commit into from
Feb 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ class ServiceInfoController {
: HttpResponse.badRequest()
}

@Get(uri = "/openapi")
@Get("/openapi")
HttpResponse getOpenAPI() {
HttpResponse.redirect(URI.create("/openapi/"))
}
Expand Down
78 changes: 78 additions & 0 deletions src/main/groovy/io/seqera/wave/filter/DenyCrawlerFilter.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/*
* Wave, containers provisioning service
* Copyright (c) 2023-2024, Seqera Labs
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/

package io.seqera.wave.filter

import groovy.transform.CompileStatic
import groovy.util.logging.Slf4j
import io.micronaut.http.HttpRequest
import io.micronaut.http.HttpResponse
import io.micronaut.http.HttpStatus
import io.micronaut.http.MutableHttpResponse
import io.micronaut.http.annotation.Filter
import io.micronaut.http.filter.HttpServerFilter
import io.micronaut.http.filter.ServerFilterChain
import org.reactivestreams.Publisher
import reactor.core.publisher.Flux
/**
* Block the access to known crawler bots
*
* @author Paolo Di Tommaso <[email protected]>
*/
@Slf4j
@CompileStatic
@Filter("/**")
class DenyCrawlerFilter implements HttpServerFilter {

private static final List<String> CRAWLER_AGENTS = Arrays.asList(
"googlebot",
"bingbot",
"yandexbot",
"baiduspider",
"duckduckbot",
"slurp",
"facebot",
"twitterbot",
"mj12bot",
"ahrefsbot"
)

static boolean isCrawler(String userAgent) {
return userAgent
? CRAWLER_AGENTS.stream().anyMatch(userAgent::contains)
: false
}

@Override
Publisher<MutableHttpResponse<?>> doFilter(HttpRequest<?> request, ServerFilterChain chain) {
final userAgent = request.getHeaders().get("User-Agent")?.toLowerCase()
// Check if the request path matches any of the ignored paths
if (isCrawler(userAgent)) {
// Return immediately without processing the request
log.debug("Request denied: ${request}")
return Flux.just(HttpResponse.status(HttpStatus.METHOD_NOT_ALLOWED))
}
// Continue processing the request
return chain.proceed(request)
}

@Override
int getOrder() {
return FilterOrder.DENY_CRAWLER
}
}
1 change: 1 addition & 0 deletions src/main/groovy/io/seqera/wave/filter/FilterOrder.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ package io.seqera.wave.filter
*/
interface FilterOrder {

final int DENY_CRAWLER = -110
final int DENY_PATHS = -100
final int RATE_LIMITER = -50
final int PULL_METRICS = 10
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ interface MessageStream<M> {
/**
* Initialize the stream with the given Id
*
* @param streamId The uniqur ID of the stream to be initialized
* @param streamId The unique ID of the stream to be initialized
*/
void init(String streamId)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ import io.micronaut.test.annotation.MockBean
import io.micronaut.test.extensions.spock.annotation.MicronautTest
import io.seqera.wave.api.ContainerInspectRequest
import io.seqera.wave.api.ContainerInspectResponse
import io.seqera.wave.exception.BadRequestException
import io.seqera.wave.service.logs.BuildLogService
import io.seqera.wave.service.logs.BuildLogServiceImpl
import jakarta.inject.Inject
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,11 +92,10 @@ class ScanControllerTest extends Specification {
res.body().requestId == scan.requestId
}


def "should return 404 and null"() {
when:
def req = HttpRequest.GET("/v1alpha1/scans/unknown")
def res = client.toBlocking().exchange(req, WaveScanRecord)
client.toBlocking().exchange(req, WaveScanRecord)

then:
def e = thrown(HttpClientResponseException)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,25 +20,61 @@ package io.seqera.wave.controller

import spock.lang.Specification

import io.micronaut.http.HttpResponse
import io.micronaut.http.HttpRequest
import io.micronaut.http.HttpStatus

import io.micronaut.http.client.DefaultHttpClientConfiguration
import io.micronaut.http.client.HttpClient
import io.micronaut.http.client.annotation.Client
import io.micronaut.http.client.exceptions.HttpClientResponseException
import io.micronaut.runtime.server.EmbeddedServer
import io.micronaut.test.extensions.spock.annotation.MicronautTest
import jakarta.inject.Inject
/**
*
* @author Munish Chouhan <[email protected]>
*/
@MicronautTest
class ServiceInfoControllerTest extends Specification {

@Inject
@Client("/")
HttpClient client

@Inject
EmbeddedServer embeddedServer;

def 'should get service info' () {
when:
def request = HttpRequest.GET("/service-info")
def resp = client.toBlocking().exchange(request, String)
then:
resp.status.code == 200
}

def 'should deny service info' () {
when:
def request = HttpRequest.GET("/service-info").header('User-Agent','Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)')
client.toBlocking().exchange(request, String)
then:
def e = thrown(HttpClientResponseException)
e.status == HttpStatus.METHOD_NOT_ALLOWED
}

def 'should redirect to /openapi/'() {
given:
def controller = new ServiceInfoController()

def uri = embeddedServer.getContextURI()
and:
// Create a new HttpClient with redirects disabled
def config = new DefaultHttpClientConfiguration()
config.setFollowRedirects(false)
def client = HttpClient.create(uri.toURL(), config)
when:
HttpResponse response = controller.getOpenAPI()
def request = HttpRequest.GET("/openapi")
def resp = client.toBlocking().exchange(request, String)

then:
response.status == HttpStatus.MOVED_PERMANENTLY
response.header('Location') == '/openapi/'
resp.status == HttpStatus.MOVED_PERMANENTLY // Expect 301
resp.headers.get("Location") == "/openapi/" // Validate redirect location
}

}