-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Signed-off-by: Paolo Di Tommaso <[email protected]>
- Loading branch information
1 parent
3aec041
commit 4e19ec4
Showing
7 changed files
with
125 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
78 changes: 78 additions & 0 deletions
78
src/main/groovy/io/seqera/wave/filter/DenyCrawlerFilter.groovy
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
/* | ||
* Wave, containers provisioning service | ||
* Copyright (c) 2023-2024, Seqera Labs | ||
* | ||
* This program is free software: you can redistribute it and/or modify | ||
* it under the terms of the GNU Affero General Public License as published by | ||
* the Free Software Foundation, either version 3 of the License, or | ||
* (at your option) any later version. | ||
* | ||
* This program is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
* GNU Affero General Public License for more details. | ||
* | ||
* You should have received a copy of the GNU Affero General Public License | ||
* along with this program. If not, see <https://www.gnu.org/licenses/>. | ||
*/ | ||
|
||
package io.seqera.wave.filter | ||
|
||
import groovy.transform.CompileStatic | ||
import groovy.util.logging.Slf4j | ||
import io.micronaut.http.HttpRequest | ||
import io.micronaut.http.HttpResponse | ||
import io.micronaut.http.HttpStatus | ||
import io.micronaut.http.MutableHttpResponse | ||
import io.micronaut.http.annotation.Filter | ||
import io.micronaut.http.filter.HttpServerFilter | ||
import io.micronaut.http.filter.ServerFilterChain | ||
import org.reactivestreams.Publisher | ||
import reactor.core.publisher.Flux | ||
/** | ||
* Block the access to known crawler bots | ||
* | ||
* @author Paolo Di Tommaso <[email protected]> | ||
*/ | ||
@Slf4j | ||
@CompileStatic | ||
@Filter("/**") | ||
class DenyCrawlerFilter implements HttpServerFilter { | ||
|
||
private static final List<String> CRAWLER_AGENTS = Arrays.asList( | ||
"googlebot", | ||
"bingbot", | ||
"yandexbot", | ||
"baiduspider", | ||
"duckduckbot", | ||
"slurp", | ||
"facebot", | ||
"twitterbot", | ||
"mj12bot", | ||
"ahrefsbot" | ||
) | ||
|
||
static boolean isCrawler(String userAgent) { | ||
return userAgent | ||
? CRAWLER_AGENTS.stream().anyMatch(userAgent::contains) | ||
: false | ||
} | ||
|
||
@Override | ||
Publisher<MutableHttpResponse<?>> doFilter(HttpRequest<?> request, ServerFilterChain chain) { | ||
final userAgent = request.getHeaders().get("User-Agent")?.toLowerCase() | ||
// Check if the request path matches any of the ignored paths | ||
if (isCrawler(userAgent)) { | ||
// Return immediately without processing the request | ||
log.debug("Request denied: ${request}") | ||
return Flux.just(HttpResponse.status(HttpStatus.METHOD_NOT_ALLOWED)) | ||
} | ||
// Continue processing the request | ||
return chain.proceed(request) | ||
} | ||
|
||
@Override | ||
int getOrder() { | ||
return FilterOrder.DENY_CRAWLER | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,25 +20,61 @@ package io.seqera.wave.controller | |
|
||
import spock.lang.Specification | ||
|
||
import io.micronaut.http.HttpResponse | ||
import io.micronaut.http.HttpRequest | ||
import io.micronaut.http.HttpStatus | ||
|
||
import io.micronaut.http.client.DefaultHttpClientConfiguration | ||
import io.micronaut.http.client.HttpClient | ||
import io.micronaut.http.client.annotation.Client | ||
import io.micronaut.http.client.exceptions.HttpClientResponseException | ||
import io.micronaut.runtime.server.EmbeddedServer | ||
import io.micronaut.test.extensions.spock.annotation.MicronautTest | ||
import jakarta.inject.Inject | ||
/** | ||
* | ||
* @author Munish Chouhan <[email protected]> | ||
*/ | ||
@MicronautTest | ||
class ServiceInfoControllerTest extends Specification { | ||
|
||
@Inject | ||
@Client("/") | ||
HttpClient client | ||
|
||
@Inject | ||
EmbeddedServer embeddedServer; | ||
|
||
def 'should get service info' () { | ||
when: | ||
def request = HttpRequest.GET("/service-info") | ||
def resp = client.toBlocking().exchange(request, String) | ||
then: | ||
resp.status.code == 200 | ||
} | ||
|
||
def 'should deny service info' () { | ||
when: | ||
def request = HttpRequest.GET("/service-info").header('User-Agent','Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)') | ||
client.toBlocking().exchange(request, String) | ||
then: | ||
def e = thrown(HttpClientResponseException) | ||
e.status == HttpStatus.METHOD_NOT_ALLOWED | ||
} | ||
|
||
def 'should redirect to /openapi/'() { | ||
given: | ||
def controller = new ServiceInfoController() | ||
|
||
def uri = embeddedServer.getContextURI() | ||
and: | ||
// Create a new HttpClient with redirects disabled | ||
def config = new DefaultHttpClientConfiguration() | ||
config.setFollowRedirects(false) | ||
def client = HttpClient.create(uri.toURL(), config) | ||
when: | ||
HttpResponse response = controller.getOpenAPI() | ||
def request = HttpRequest.GET("/openapi") | ||
def resp = client.toBlocking().exchange(request, String) | ||
|
||
then: | ||
response.status == HttpStatus.MOVED_PERMANENTLY | ||
response.header('Location') == '/openapi/' | ||
resp.status == HttpStatus.MOVED_PERMANENTLY // Expect 301 | ||
resp.headers.get("Location") == "/openapi/" // Validate redirect location | ||
} | ||
|
||
} |