diff --git a/cmd/anubis/CHANGELOG.md b/cmd/anubis/CHANGELOG.md new file mode 100644 index 00000000..612bec1b --- /dev/null +++ b/cmd/anubis/CHANGELOG.md @@ -0,0 +1,5 @@ +# CHANGELOG + +## 2025-01-24 + +- Added support for custom bot policy documentation, allowing administrators to change how Anubis works to meet their needs. diff --git a/cmd/anubis/README.md b/cmd/anubis/README.md index 77495a13..60de1de2 100644 --- a/cmd/anubis/README.md +++ b/cmd/anubis/README.md @@ -12,7 +12,7 @@ Anubis [weighs the soul of your connection](https://en.wikipedia.org/wiki/Weighing_of_souls) using a sha256 proof-of-work challenge in order to protect upstream resources from scraper bots. -Installing and using this will likely result in your website not being indexed by Google or other search engines. This is considered a feature of Anubis, not a bug. +Installing and using this will likely result in your website not being indexed by some search engines. This is considered a feature of Anubis, not a bug. This is a bit of a nuclear response, but AI scraper bots scraping so aggressively have forced my hand. I hate that I have to do this, but this is what we get for the modern Internet because bots don't conform to standards like robots.txt, even when they claim to. @@ -165,6 +165,10 @@ Anubis uses these environment variables for configuration: | `SERVE_ROBOTS_TXT` | `false` | If set `true`, Anubis will serve a default `robots.txt` file that disallows all known AI scrapers by name and then additionally disallows every scraper. This is useful if facts and circumstances make it difficult to change the underlying service to serve such a `robots.txt` file. | | `TARGET` | `http://localhost:3923` | The URL of the service that Anubis should forward valid requests to. | +### Policies + +Anubis has support for custom bot policies, matched by User-Agent string and request path. Check the [bot policy documentation](./docs/policies.md) for more information. + ### Docker compose Add Anubis to your compose file pointed at your service: diff --git a/cmd/anubis/botPolicies.json b/cmd/anubis/botPolicies.json new file mode 100644 index 00000000..8f3d88f1 --- /dev/null +++ b/cmd/anubis/botPolicies.json @@ -0,0 +1,59 @@ +{ + "bots": [ + { + "name": "amazonbot", + "user_agent_regex": "Amazonbot", + "action": "DENY" + }, + { + "name": "googlebot", + "user_agent_regex": "\\+http\\:\\/\\/www\\.google\\.com/bot\\.html", + "action": "ALLOW" + }, + { + "name": "bingbot", + "user_agent_regex": "\\+http\\:\\/\\/www\\.bing\\.com/bingbot\\.htm", + "action": "ALLOW" + }, + { + "name": "well-known", + "path_regex": "^/.well-known/.*$", + "action": "ALLOW" + }, + { + "name": "favicon", + "path_regex": "^/favicon.ico$", + "action": "ALLOW" + }, + { + "name": "robots-txt", + "path_regex": "^/robots.txt$", + "action": "ALLOW" + }, + { + "name": "rss-readers", + "path_regex": ".*\\.(rss|xml|atom|json)$", + "action": "ALLOW" + }, + { + "name": "lightpanda", + "user_agent_regex": "^Lightpanda/.*$", + "action": "DENY" + }, + { + "name": "headless-chrome", + "user_agent_regex": "HeadlessChrome", + "action": "DENY" + }, + { + "name": "headless-chromium", + "user_agent_regex": "HeadlessChromium", + "action": "DENY" + }, + { + "name": "generic-browser", + "user_agent_regex": "Mozilla", + "action": "CHALLENGE" + } + ] +} \ No newline at end of file diff --git a/cmd/anubis/docs/policies.md b/cmd/anubis/docs/policies.md new file mode 100644 index 00000000..1e1b9118 --- /dev/null +++ b/cmd/anubis/docs/policies.md @@ -0,0 +1,77 @@ +# Policies + +Out of the box, Anubis is pretty heavy-handed. It will aggressively challenge everything that might be a browser (usually indicated by having `Mozilla` in its user agent). However, some bots are smart enough to get past the challenge. Some things that look like bots may actually be fine (IE: RSS readers). Some resources need to be visible no matter what. Some resources and remotes are fine to begin with. + +Bot policies let you customize the rules that Anubis uses to allow, deny, or challenge incoming requests. Currently you can set policies by the following matches: + +- Request path +- User agent string + +Here's an example rule that denies [Amazonbot](https://developer.amazon.com/en/amazonbot): + +```json +{ + "name": "amazonbot", + "user_agent_regex": "Amazonbot", + "action": "DENY" +} +``` + +When this rule is evaluated, Anubis will check the `User-Agent` string of the request. If it contains `Amazonbot`, Anubis will send an error page to the user saying that access is denied, but in such a way that makes scrapers think they have correctly loaded the webpage. + +Right now the only kinds of policies you can write are bot policies. Other forms of policies will be added in the future. + +Here is a minimal policy file that will protect against most scraper bots: + +```json +{ + "bots": [ + { + "name": "well-known", + "path_regex": "^/.well-known/.*$", + "action": "ALLOW" + }, + { + "name": "favicon", + "path_regex": "^/favicon.ico$", + "action": "ALLOW" + }, + { + "name": "robots-txt", + "path_regex": "^/robots.txt$", + "action": "ALLOW" + }, + { + "name": "generic-browser", + "user_agent_regex": "Mozilla", + "action": "CHALLENGE" + } + ] +} +``` + +This allows requests to [`/.well-known`](https://en.wikipedia.org/wiki/Well-known_URI), `/favicon.ico`, `/robots.txt`, and challenges any request that has the word `Mozilla` in its User-Agent string. The [default policy file](../botPolicies.json) is a bit more cohesive, but this should be more than enough for most users. + +If no rules match the request, it is allowed through. + +## Writing your own rules + +There are three actions that can be returned from a rule: + +| Action | Effects | +| :---------- | :-------------------------------------------------------------------------------- | +| `ALLOW` | Bypass all further checks and send the request to the backend. | +| `DENY` | Deny the request and send back an error message that scrapers think is a success. | +| `CHALLENGE` | Show a challenge page and/or validate that clients have passed a challenge. | + +Name your rules in lower case using kebab-case. Rule names will be exposed in Prometheus metrics. + +In case your service needs it for risk calculation reasons, Anubis exposes information about the rules that any requests match using a few headers: + +| Header | Explanation | Example | +| :---------------- | :--------------------------------------------------- | :--------------- | +| `X-Anubis-Rule` | The name of the rule that was matched | `bot/lightpanda` | +| `X-Anubis-Action` | The action that Anubis took in response to that rule | `CHALLENGE` | +| `X-Anubis-Status` | The status and how strict Anubis was in its checks | `PASS-FULL` | + +Policy rules are matched using [Go's standard library regular expressions package](https://pkg.go.dev/regexp). You can mess around with the syntax at [regex101.com](https://regex101.com), make sure to select the Golang option. diff --git a/cmd/anubis/internal/config/config.go b/cmd/anubis/internal/config/config.go new file mode 100644 index 00000000..7f986c1b --- /dev/null +++ b/cmd/anubis/internal/config/config.go @@ -0,0 +1,57 @@ +package config + +import ( + "errors" + "fmt" +) + +type Rule string + +const ( + RuleUnknown = "" + RuleAllow = "ALLOW" + RuleDeny = "DENY" + RuleChallenge = "CHALLENGE" +) + +type Bot struct { + Name string `json:"name"` + UserAgentRegex *string `json:"user_agent_regex"` + PathRegex *string `json:"path_regex"` + Action Rule `json:"action"` +} + +var ( + ErrBotMustHaveName = errors.New("config.Bot: must set name") + ErrBotMustHaveUserAgentPathOrBoth = errors.New("config.Bot: must set either user_agent_regex, path_regex, or both") + ErrUnknownAction = errors.New("config.Bot: unknown action") +) + +func (b Bot) Valid() error { + var err error + + if b.Name == "" { + err = errors.Join(err, ErrBotMustHaveName) + } + + if b.UserAgentRegex == nil && b.PathRegex == nil { + err = errors.Join(err, ErrBotMustHaveUserAgentPathOrBoth) + } + + switch b.Action { + case RuleAllow, RuleChallenge, RuleDeny: + // okay + default: + err = errors.Join(err, fmt.Errorf("%w: %q", ErrUnknownAction, b.Action)) + } + + if err != nil { + return fmt.Errorf("config: bot entry for %q is not valid: %w", b.Name, err) + } + + return nil +} + +type Config struct { + Bots []Bot `json:"bots"` +} diff --git a/cmd/anubis/main.go b/cmd/anubis/main.go index 4de13edc..43d9ab07 100644 --- a/cmd/anubis/main.go +++ b/cmd/anubis/main.go @@ -10,6 +10,7 @@ import ( "encoding/json" "flag" "fmt" + "io" "log" "log/slog" "math" @@ -17,6 +18,7 @@ import ( "net/http" "net/http/httputil" "net/url" + "os" "strconv" "strings" "time" @@ -27,6 +29,7 @@ import ( "github.com/prometheus/client_golang/prometheus/promauto" "github.com/prometheus/client_golang/prometheus/promhttp" "within.website/x" + "within.website/x/cmd/anubis/internal/config" "within.website/x/internal" "within.website/x/xess" ) @@ -36,16 +39,12 @@ var ( challengeDifficulty = flag.Int("difficulty", 5, "difficulty of the challenge") metricsBind = flag.String("metrics-bind", ":9090", "TCP port to bind metrics to") robotsTxt = flag.Bool("serve-robots-txt", false, "serve a robots.txt file that disallows all robots") + policyFname = flag.String("policy-fname", "", "full path to anubis policy document (defaults to a sensible built-in policy)") target = flag.String("target", "http://localhost:3923", "target to reverse proxy to") - //go:embed static + //go:embed static botPolicies.json static embed.FS - bypasses = promauto.NewCounter(prometheus.CounterOpts{ - Name: "anubis_bypasses", - Help: "The total number of requests that bypassed challenge validation", - }) - challengesIssued = promauto.NewCounter(prometheus.CounterOpts{ Name: "anubis_challenges_issued", Help: "The total number of challenges issued", @@ -78,7 +77,7 @@ const ( func main() { internal.HandleStartup() - s, err := New(*target) + s, err := New(*target, *policyFname) if err != nil { log.Fatal(err) } @@ -142,7 +141,7 @@ func (s *Server) challengeFor(r *http.Request) string { return result } -func New(target string) (*Server, error) { +func New(target, policyFname string) (*Server, error) { u, err := url.Parse(target) if err != nil { return nil, fmt.Errorf("failed to parse target URL: %w", err) @@ -155,65 +154,93 @@ func New(target string) (*Server, error) { rp := httputil.NewSingleHostReverseProxy(u) + var fin io.ReadCloser + + if policyFname != "" { + fin, err = os.Open(policyFname) + if err != nil { + return nil, fmt.Errorf("can't parse policy file %s: %w", policyFname, err) + } + } else { + policyFname = "(static)/botPolicies.json" + fin, err = static.Open("botPolicies.json") + if err != nil { + return nil, fmt.Errorf("[unexpected] can't parse builtin policy file %s: %w", policyFname, err) + } + } + + defer fin.Close() + + policy, err := parseConfig(fin, policyFname) + if err != nil { + return nil, err // parseConfig sets a fancy error for us + } + return &Server{ - rp: rp, - priv: priv, - pub: pub, + rp: rp, + priv: priv, + pub: pub, + policy: policy, }, nil } type Server struct { - rp *httputil.ReverseProxy - priv ed25519.PrivateKey - pub ed25519.PublicKey + rp *httputil.ReverseProxy + priv ed25519.PrivateKey + pub ed25519.PublicKey + policy *ParsedConfig } func (s *Server) maybeReverseProxy(w http.ResponseWriter, r *http.Request) { - switch { - case !strings.Contains(r.UserAgent(), "Mozilla"): - bypasses.Inc() - slog.Debug("non-browser user agent") - s.rp.ServeHTTP(w, r) - return - case strings.HasPrefix(r.URL.Path, "/.well-known/"): - bypasses.Inc() - slog.Debug("well-known path") - s.rp.ServeHTTP(w, r) - return - case strings.HasSuffix(r.URL.Path, ".rss") || strings.HasSuffix(r.URL.Path, ".xml") || strings.HasSuffix(r.URL.Path, ".atom"): - bypasses.Inc() - slog.Debug("rss path") + cr := s.check(r) + r.Header.Add("X-Anubis-Rule", cr.Name) + r.Header.Add("X-Anubis-Action", string(cr.Rule)) + lg := slog.With( + "check_result", cr, + "user_agent", r.UserAgent(), + "accept_language", r.Header.Get("Accept-Language"), + "priority", r.Header.Get("Priority"), + "x-forwarded-for", + r.Header.Get("X-Forwarded-For"), + "x-real-ip", r.Header.Get("X-Real-Ip"), + ) + policyApplications.WithLabelValues(cr.Name, string(cr.Rule)).Add(1) + + switch cr.Rule { + case config.RuleAllow: + lg.Debug("allowing traffic to origin (explicit)") s.rp.ServeHTTP(w, r) return - case r.URL.Path == "/favicon.ico": - bypasses.Inc() - slog.Debug("favicon path") - s.rp.ServeHTTP(w, r) + case config.RuleDeny: + clearCookie(w) + lg.Info("explicit deny") + templ.Handler(base("Oh noes!", errorPage("Access Denied")), templ.WithStatus(http.StatusOK)).ServeHTTP(w, r) return - case r.URL.Path == "/robots.txt": - bypasses.Inc() - slog.Debug("robots.txt path") - s.rp.ServeHTTP(w, r) + case config.RuleChallenge: + lg.Debug("challenge requested") + default: + clearCookie(w) + templ.Handler(base("Oh noes!", errorPage("Other internal server error (contact the admin)")), templ.WithStatus(http.StatusInternalServerError)).ServeHTTP(w, r) return } ckie, err := r.Cookie(cookieName) if err != nil { - slog.Debug("cookie not found", "path", r.URL.Path) + lg.Debug("cookie not found", "path", r.URL.Path) clearCookie(w) s.renderIndex(w, r) return } if err := ckie.Valid(); err != nil { - slog.Debug("cookie is invalid", "err", err) + lg.Debug("cookie is invalid", "err", err) clearCookie(w) s.renderIndex(w, r) return } if time.Now().After(ckie.Expires) && !ckie.Expires.IsZero() { - slog.Debug("cookie expired", "path", r.URL.Path) + lg.Debug("cookie expired", "path", r.URL.Path) clearCookie(w) s.renderIndex(w, r) return @@ -224,7 +251,7 @@ func (s *Server) maybeReverseProxy(w http.ResponseWriter, r *http.Request) { }) if !token.Valid { - slog.Debug("invalid token", "path", r.URL.Path) + lg.Debug("invalid token", "path", r.URL.Path) clearCookie(w) s.renderIndex(w, r) return @@ -234,27 +261,28 @@ func (s *Server) maybeReverseProxy(w http.ResponseWriter, r *http.Request) { exp, ok := claims["exp"].(float64) if !ok { - slog.Debug("exp is not int64", "ok", ok, "typeof(exp)", fmt.Sprintf("%T", exp)) + lg.Debug("exp is not int64", "ok", ok, "typeof(exp)", fmt.Sprintf("%T", exp)) clearCookie(w) s.renderIndex(w, r) return } if exp := time.Unix(int64(exp), 0); time.Now().After(exp) { - slog.Debug("token has expired", "exp", exp.Format(time.RFC3339)) + lg.Debug("token has expired", "exp", exp.Format(time.RFC3339)) clearCookie(w) s.renderIndex(w, r) return } if token.Valid && randomJitter() { - slog.Debug("cookie is not enrolled into secondary screening") + r.Header.Add("X-Anubis-Status", "PASS-BRIEF") + lg.Debug("cookie is not enrolled into secondary screening") s.rp.ServeHTTP(w, r) return } if claims["challenge"] != s.challengeFor(r) { - slog.Debug("invalid challenge", "path", r.URL.Path) + lg.Debug("invalid challenge", "path", r.URL.Path) clearCookie(w) s.renderIndex(w, r) return @@ -269,20 +297,22 @@ func (s *Server) maybeReverseProxy(w http.ResponseWriter, r *http.Request) { calcString := fmt.Sprintf("%s%d", s.challengeFor(r), nonce) calculated, err := sha256sum(calcString) if err != nil { - slog.Error("failed to calculate sha256sum", "path", r.URL.Path, "err", err) + lg.Error("failed to calculate sha256sum", "path", r.URL.Path, "err", err) clearCookie(w) http.Error(w, err.Error(), http.StatusInternalServerError) return } if subtle.ConstantTimeCompare([]byte(claims["response"].(string)), []byte(calculated)) != 1 { - slog.Debug("invalid response", "path", r.URL.Path) + lg.Debug("invalid response", "path", r.URL.Path) failedValidations.Inc() clearCookie(w) s.renderIndex(w, r) return } + slog.Debug("all checks passed") + r.Header.Add("X-Anubis-Status", "PASS-FULL") s.rp.ServeHTTP(w, r) } @@ -296,6 +326,8 @@ func (s *Server) makeChallenge(w http.ResponseWriter, r *http.Request) { challenge := s.challengeFor(r) difficulty := *challengeDifficulty + lg := slog.With("user_agent", r.UserAgent(), "accept_language", r.Header.Get("Accept-Language"), "priority", r.Header.Get("Priority"), "x-forwarded-for", r.Header.Get("X-Forwarded-For"), "x-real-ip", r.Header.Get("X-Real-Ip")) + json.NewEncoder(w).Encode(struct { Challenge string `json:"challenge"` Difficulty int `json:"difficulty"` @@ -303,14 +335,17 @@ func (s *Server) makeChallenge(w http.ResponseWriter, r *http.Request) { Challenge: challenge, Difficulty: difficulty, }) - slog.Debug("made challenge", "challenge", challenge, "difficulty", difficulty) + lg.Debug("made challenge", "challenge", challenge, "difficulty", difficulty) challengesIssued.Inc() } func (s *Server) passChallenge(w http.ResponseWriter, r *http.Request) { + lg := slog.With("user_agent", r.UserAgent(), "accept_language", r.Header.Get("Accept-Language"), "priority", r.Header.Get("Priority"), "x-forwarded-for", r.Header.Get("X-Forwarded-For"), "x-real-ip", r.Header.Get("X-Real-Ip")) + nonceStr := r.FormValue("nonce") if nonceStr == "" { clearCookie(w) + lg.Debug("no nonce") templ.Handler(base("Oh noes!", errorPage("missing nonce")), templ.WithStatus(http.StatusInternalServerError)).ServeHTTP(w, r) return } @@ -318,6 +353,7 @@ func (s *Server) passChallenge(w http.ResponseWriter, r *http.Request) { elapsedTimeStr := r.FormValue("elapsedTime") if elapsedTimeStr == "" { clearCookie(w) + lg.Debug("no elapsedTime") templ.Handler(base("Oh noes!", errorPage("missing elapsedTime")), templ.WithStatus(http.StatusInternalServerError)).ServeHTTP(w, r) return } @@ -325,6 +361,7 @@ func (s *Server) passChallenge(w http.ResponseWriter, r *http.Request) { elapsedTime, err := strconv.ParseFloat(elapsedTimeStr, 64) if err != nil { clearCookie(w) + lg.Debug("elapsedTime doesn't parse", "err", err) templ.Handler(base("Oh noes!", errorPage("invalid elapsedTime")), templ.WithStatus(http.StatusInternalServerError)).ServeHTTP(w, r) return } @@ -332,6 +369,7 @@ func (s *Server) passChallenge(w http.ResponseWriter, r *http.Request) { difficultyStr := r.FormValue("difficulty") if difficultyStr == "" { clearCookie(w) + lg.Debug("no difficulty") templ.Handler(base("Oh noes!", errorPage("missing difficulty")), templ.WithStatus(http.StatusInternalServerError)).ServeHTTP(w, r) return } @@ -339,11 +377,12 @@ func (s *Server) passChallenge(w http.ResponseWriter, r *http.Request) { difficulty, err := strconv.Atoi(difficultyStr) if err != nil { clearCookie(w) + lg.Debug("difficulty doesn't parse", "err", err) templ.Handler(base("Oh noes!", errorPage("invalid difficulty")), templ.WithStatus(http.StatusInternalServerError)).ServeHTTP(w, r) return } - slog.Info("challenge took", "elapsedTime", elapsedTime) + lg.Info("challenge took", "elapsedTime", elapsedTime) timeTaken.Observe(elapsedTime) response := r.FormValue("response") @@ -354,6 +393,7 @@ func (s *Server) passChallenge(w http.ResponseWriter, r *http.Request) { nonce, err := strconv.Atoi(nonceStr) if err != nil { clearCookie(w) + lg.Debug("nonce doesn't parse", "err", err) templ.Handler(base("Oh noes!", errorPage("invalid nonce")), templ.WithStatus(http.StatusInternalServerError)).ServeHTTP(w, r) return } @@ -362,12 +402,14 @@ func (s *Server) passChallenge(w http.ResponseWriter, r *http.Request) { calculated, err := sha256sum(calcString) if err != nil { clearCookie(w) + lg.Debug("can't parse shasum", "err", err) templ.Handler(base("Oh noes!", errorPage("failed to calculate sha256sum")), templ.WithStatus(http.StatusInternalServerError)).ServeHTTP(w, r) return } if subtle.ConstantTimeCompare([]byte(response), []byte(calculated)) != 1 { clearCookie(w) + lg.Debug("hash does not match", "got", response, "want", calculated) templ.Handler(base("Oh noes!", errorPage("invalid response")), templ.WithStatus(http.StatusForbidden)).ServeHTTP(w, r) failedValidations.Inc() return @@ -376,6 +418,7 @@ func (s *Server) passChallenge(w http.ResponseWriter, r *http.Request) { // compare the leading zeroes if !strings.HasPrefix(response, strings.Repeat("0", difficulty)) { clearCookie(w) + lg.Debug("difficulty check failed", "response", response, "difficulty", difficulty) templ.Handler(base("Oh noes!", errorPage("invalid response")), templ.WithStatus(http.StatusForbidden)).ServeHTTP(w, r) failedValidations.Inc() return @@ -392,7 +435,7 @@ func (s *Server) passChallenge(w http.ResponseWriter, r *http.Request) { }) tokenString, err := token.SignedString(s.priv) if err != nil { - slog.Error("failed to sign JWT", "err", err) + lg.Error("failed to sign JWT", "err", err) clearCookie(w) templ.Handler(base("Oh noes!", errorPage("failed to sign JWT")), templ.WithStatus(http.StatusInternalServerError)).ServeHTTP(w, r) return @@ -407,6 +450,7 @@ func (s *Server) passChallenge(w http.ResponseWriter, r *http.Request) { }) challengesValidated.Inc() + lg.Debug("challenge passed, redirecting to app") http.Redirect(w, r, redir, http.StatusFound) } diff --git a/cmd/anubis/policy.go b/cmd/anubis/policy.go new file mode 100644 index 00000000..7d778c70 --- /dev/null +++ b/cmd/anubis/policy.go @@ -0,0 +1,126 @@ +package main + +import ( + "encoding/json" + "errors" + "fmt" + "io" + "log/slog" + "net/http" + "regexp" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" + "within.website/x/cmd/anubis/internal/config" +) + +var ( + policyApplications = promauto.NewCounterVec(prometheus.CounterOpts{ + Name: "anubis_policy_results", + Help: "The results of each policy rule", + }, []string{"rule", "action"}) +) + +type ParsedConfig struct { + orig config.Config + + Bots []Bot +} + +type Bot struct { + Name string + UserAgent *regexp.Regexp + Path *regexp.Regexp + Action config.Rule `json:"action"` +} + +func parseConfig(fin io.Reader, fname string) (*ParsedConfig, error) { + var c config.Config + if err := json.NewDecoder(fin).Decode(&c); err != nil { + return nil, fmt.Errorf("can't parse policy config JSON %s: %w", fname, err) + } + + var err error + + result := &ParsedConfig{ + orig: c, + } + + for _, b := range c.Bots { + if berr := b.Valid(); berr != nil { + err = errors.Join(err, berr) + continue + } + + var botParseErr error + parsedBot := Bot{ + Name: b.Name, + Action: b.Action, + } + + if b.UserAgentRegex != nil { + userAgent, err := regexp.Compile(*b.UserAgentRegex) + if err != nil { + botParseErr = errors.Join(botParseErr, fmt.Errorf("while compiling user agent regexp: %w", err)) + continue + } else { + parsedBot.UserAgent = userAgent + } + } + + if b.PathRegex != nil { + path, err := regexp.Compile(*b.PathRegex) + if err != nil { + botParseErr = errors.Join(botParseErr, fmt.Errorf("while compiling path regexp: %w", err)) + continue + } else { + parsedBot.Path = path + } + } + + result.Bots = append(result.Bots, parsedBot) + } + + if err != nil { + return nil, fmt.Errorf("errors validating policy config JSON %s: %w", fname, err) + } + + return result, nil +} + +type CheckResult struct { + Name string + Rule config.Rule +} + +func (cr CheckResult) LogValue() slog.Value { + return slog.GroupValue( + slog.String("name", cr.Name), + slog.String("rule", string(cr.Rule))) +} + +func cr(name string, rule config.Rule) CheckResult { + return CheckResult{ + Name: name, + Rule: rule, + } +} + +// Check evaluates the list of rules, and returns the result +func (s *Server) check(r *http.Request) CheckResult { + for _, b := range s.policy.Bots { + if b.UserAgent != nil { + if b.UserAgent.MatchString(r.UserAgent()) { + return cr("bot/"+b.Name, b.Action) + } + } + + if b.Path != nil { + if b.Path.MatchString(r.URL.Path) { + return cr("bot/"+b.Name, b.Action) + } + } + } + + return cr("default/allow", config.RuleAllow) +}