From 1d881be905c4cf4483194becbe983c6bced838d3 Mon Sep 17 00:00:00 2001 From: AGmind Date: Mon, 1 Jun 2026 23:58:07 +0300 Subject: [PATCH 1/8] fix: protect Crawl4AI nginx exposure golden-accept-reason: crawl4ai loopback bind render updated for security --- lib/_registry.indexed.sh | 8 +-- lib/config.sh | 7 ++- lib/wizard.sh | 14 ++++- templates/docker-compose.yml | 2 +- templates/env.lan.template | 1 + templates/nginx.conf.template | 35 +++++++++++ templates/services/registry.yaml | 6 +- .../expected/cluster_peer/.env.rendered | 1 + .../expected/cluster_peer/checksums.sha256 | 4 +- .../cluster_peer/docker-compose.rendered.yml | 1 + tests/golden/expected/full_lan/.env.rendered | 1 + .../golden/expected/full_lan/checksums.sha256 | 4 +- .../full_lan/docker-compose.rendered.yml | 1 + .../golden/expected/minimal_lan/.env.rendered | 1 + .../expected/minimal_lan/checksums.sha256 | 4 +- .../minimal_lan/docker-compose.rendered.yml | 1 + .../golden/expected/rag_milvus/.env.rendered | 1 + .../expected/rag_milvus/checksums.sha256 | 4 +- .../rag_milvus/docker-compose.rendered.yml | 1 + tests/golden/expected/ragflow/.env.rendered | 1 + .../golden/expected/ragflow/checksums.sha256 | 4 +- .../ragflow/docker-compose.rendered.yml | 1 + tests/unit/test_crawl4ai_auth_exposure.sh | 61 +++++++++++++++++++ 23 files changed, 142 insertions(+), 22 deletions(-) create mode 100755 tests/unit/test_crawl4ai_auth_exposure.sh diff --git a/lib/_registry.indexed.sh b/lib/_registry.indexed.sh index 5b79667..004306f 100644 --- a/lib/_registry.indexed.sh +++ b/lib/_registry.indexed.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash # _registry.indexed.sh — DO NOT HAND-EDIT # Generated from templates/services/registry.yaml (schema_version=1) -# Source SHA-12: 2557f030b18c +# Source SHA-12: 1a5c2e79d8c2 # Regenerate via: make registry-codegen # CI gate: tests/integration/test_registry_codegen_drift.sh fails on stale artifact. # @@ -161,7 +161,7 @@ ALL_COMPOSE_PROFILES="monitoring,portainer,qdrant,weaviate,milvus,authelia,ollam # shellcheck disable=SC2034 declare -A NAMED_PROFILE_EXPANSION=( - [agents]="litellm,crawl4ai,searxng,dbgpt,openwebui,notebook,n8n" + [agents]="litellm,authelia,crawl4ai,searxng,dbgpt,openwebui,notebook,n8n" [core]="vllm,litellm" [dev]="vllm,litellm,weaviate,docling,monitoring,portainer" [full]="vllm,litellm,weaviate,docling,vllm-embed,vllm-rerank,minio,monitoring,portainer,authelia,crawl4ai,searxng,dbgpt,openwebui,notebook,n8n" @@ -173,7 +173,7 @@ declare -A NAMED_PROFILE_EXPANSION=( # shellcheck disable=SC2034 declare -A NAMED_PROFILE_DESC=( - [agents]="LiteLLM + Crawl4AI + SearXNG + dbGPT + Open WebUI + Notebook + n8n" + [agents]="LiteLLM + Authelia + Crawl4AI + SearXNG + dbGPT + Open WebUI + Notebook + n8n" [core]="Dify core + vLLM + LiteLLM (minimal — no RAG)" [dev]="Core + observability (fast iteration; no RAGFlow/agents/security)" [full]="Everything: vLLM + Weaviate + Docling + monitoring + agents + n8n (Milvus skipped — XOR with Weaviate)" @@ -185,7 +185,7 @@ declare -A NAMED_PROFILE_DESC=( # shellcheck disable=SC2034 declare -A NAMED_PROFILE_IMPLIED=( - [agents]="ENABLE_LITELLM=true ENABLE_CRAWL4AI=true ENABLE_SEARXNG=true ENABLE_DBGPT=true ENABLE_OPENWEBUI=true ENABLE_NOTEBOOK=true ENABLE_N8N=true" + [agents]="ENABLE_LITELLM=true ENABLE_AUTHELIA=true ENABLE_CRAWL4AI=true ENABLE_SEARXNG=true ENABLE_DBGPT=true ENABLE_OPENWEBUI=true ENABLE_NOTEBOOK=true ENABLE_N8N=true" [core]="LLM_PROVIDER=vllm ENABLE_LITELLM=true" [dev]="LLM_PROVIDER=vllm ENABLE_LITELLM=true VECTOR_STORE=weaviate ENABLE_DOCLING=true MONITORING_MODE=local ENABLE_PORTAINER=true" [full]="LLM_PROVIDER=vllm ENABLE_LITELLM=true VECTOR_STORE=weaviate ENABLE_DOCLING=true EMBED_PROVIDER=vllm-embed ENABLE_RERANKER=true RERANKER_PROVIDER=vllm-rerank ENABLE_MINIO=true MONITORING_MODE=local ENABLE_PORTAINER=true ENABLE_AUTHELIA=true ENABLE_CRAWL4AI=true ENABLE_SEARXNG=true ENABLE_DBGPT=true ENABLE_OPENWEBUI=true ENABLE_NOTEBOOK=true ENABLE_N8N=true" diff --git a/lib/config.sh b/lib/config.sh index c3d4881..825577b 100755 --- a/lib/config.sh +++ b/lib/config.sh @@ -877,7 +877,10 @@ generate_nginx_config() { fi # Crawl4AI markers - if [[ "${ENABLE_CRAWL4AI:-false}" == "true" ]]; then + # Security: the Crawl4AI REST API accepts arbitrary crawl URLs and can be + # abused as an SSRF primitive. Only expose it through nginx when Authelia is + # active; otherwise keep the backend reachable solely on the Docker network. + if [[ "${ENABLE_CRAWL4AI:-false}" == "true" && "${ENABLE_AUTHELIA:-false}" == "true" ]]; then _atomic_sed "$nginx_conf" 's|#__CRAWL4AI__||g' else _atomic_sed "$nginx_conf" '/#__CRAWL4AI__/d' @@ -959,7 +962,7 @@ _register_local_dns() { [[ "${ENABLE_DBGPT:-false}" == "true" ]] && names+=("agmind-dbgpt") [[ "${ENABLE_NOTEBOOK:-false}" == "true" ]] && names+=("agmind-notebook") [[ "${ENABLE_SEARXNG:-false}" == "true" ]] && names+=("agmind-search") - [[ "${ENABLE_CRAWL4AI:-false}" == "true" ]] && names+=("agmind-crawl") + [[ "${ENABLE_CRAWL4AI:-false}" == "true" && "${ENABLE_AUTHELIA:-false}" == "true" ]] && names+=("agmind-crawl") [[ "${ENABLE_RAGFLOW:-false}" == "true" ]] && names+=("agmind-rag") [[ "${ENABLE_N8N:-false}" == "true" ]] && names+=("agmind-n8n") diff --git a/lib/wizard.sh b/lib/wizard.sh index b151b96..3cc3277 100644 --- a/lib/wizard.sh +++ b/lib/wizard.sh @@ -157,7 +157,7 @@ _wizard_profile() { "ragflow" "RAGFlow: RAGFlow + Elasticsearch + MySQL + MinIO" \ "observability" "Мониторинг: Prometheus + Grafana + Loki + Portainer" \ "security" "Безопасность: Authelia + fail2ban / hardening" \ - "agents" "Агенты: LiteLLM + Crawl4AI + SearXNG + dbGPT + Open WebUI + Notebook + n8n" \ + "agents" "Агенты: LiteLLM + Authelia + Crawl4AI + SearXNG + dbGPT + Open WebUI + Notebook + n8n" \ "full" "Полный стек: vLLM + Weaviate + Docling + monitoring + agents + n8n" \ "dev" "Dev: Core + мониторинг (быстрая итерация)" \ "custom" "Custom: детальный выбор каждого компонента") @@ -1804,6 +1804,9 @@ _wizard_optional_services() { _wizard_crawl4ai _wizard_ragflow _wizard_n8n + if [[ "${ENABLE_CRAWL4AI:-false}" == "true" ]]; then + ENABLE_AUTHELIA="true" + fi return 0 fi @@ -1852,6 +1855,13 @@ _wizard_optional_services() { # lets anyone on the network claim the admin account on a fresh # deploy. Nginx vhost agmind-rag.local proxies the local port for # normal LAN access without the direct-port race. + if [[ "${ENABLE_CRAWL4AI:-false}" == "true" ]]; then + # Crawl4AI exposes a URL-fetching REST API; always pair it with + # Authelia so agents/custom installs do not create an unauthenticated + # crawler route. + ENABLE_AUTHELIA="true" + fi + if [[ "${ENABLE_RAGFLOW:-false}" == "true" ]]; then if wt_yesno "RAGFlow Direct Port Access" \ "Открыть порт RAGFlow :9380 напрямую в LAN?\n\nВНИМАНИЕ: пока админ не зарегистрирован, любой в сети может занять админ-аккаунт.\nПо умолчанию доступ только через nginx-проксирование (agmind-rag.local) — безопаснее.\n\nОткрыть :9380 напрямую?"; then @@ -1914,7 +1924,7 @@ _wizard_summary() { if [[ "${ENABLE_SEARXNG:-false}" == "true" ]]; then summary+="$(t wizard.summary.searxng) agmind-search.local\n"; fi if [[ "${ENABLE_NOTEBOOK:-false}" == "true" ]]; then summary+="$(t wizard.summary.notebook) agmind-notebook.local\n"; fi if [[ "${ENABLE_DBGPT:-false}" == "true" ]]; then summary+="$(t wizard.summary.dbgpt) agmind-dbgpt.local\n"; fi - if [[ "${ENABLE_CRAWL4AI:-false}" == "true" ]]; then summary+="$(t wizard.summary.crawl4ai) agmind-crawl.local\n"; fi + if [[ "${ENABLE_CRAWL4AI:-false}" == "true" && "${ENABLE_AUTHELIA:-false}" == "true" ]]; then summary+="$(t wizard.summary.crawl4ai) agmind-crawl.local\n"; fi if [[ "${ENABLE_N8N:-false}" == "true" ]]; then summary+="$(t wizard.summary.n8n) agmind-n8n.local\n"; fi if [[ "${ENABLE_OPENWEBUI:-false}" == "true" ]]; then summary+="$(t wizard.summary.openwebui) agmind-chat.local\n"; fi if [[ "${ENABLE_RAGFLOW:-false}" == "true" ]]; then summary+="$(t wizard.summary.ragflow) $(t wizard.summary.ragflow_val)\n"; fi diff --git a/templates/docker-compose.yml b/templates/docker-compose.yml index 63678cb..3e6843f 100644 --- a/templates/docker-compose.yml +++ b/templates/docker-compose.yml @@ -1077,7 +1077,7 @@ services: - "${EXPOSE_NOTEBOOK_PORT:-8502}:8502" - "${EXPOSE_NOTEBOOK_API_PORT:-5055}:5055" - "${EXPOSE_SEARXNG_PORT:-8888}:8888" - - "${EXPOSE_CRAWL4AI_PORT:-11235}:11235" + - "${CRAWL4AI_BIND_ADDR:-127.0.0.1}:${EXPOSE_CRAWL4AI_PORT:-11235}:11235" environment: NGINX_SERVER_NAME: ${NGINX_SERVER_NAME:-_} NGINX_HTTPS_ENABLED: ${NGINX_HTTPS_ENABLED:-false} diff --git a/templates/env.lan.template b/templates/env.lan.template index 9862b9e..94f4334 100644 --- a/templates/env.lan.template +++ b/templates/env.lan.template @@ -133,6 +133,7 @@ EXPOSE_NOTEBOOK_PORT=8502 EXPOSE_NOTEBOOK_API_PORT=5055 OPEN_NOTEBOOK_API_URL=http://agmind-notebook.local:5055 EXPOSE_SEARXNG_PORT=8888 +CRAWL4AI_BIND_ADDR=127.0.0.1 EXPOSE_CRAWL4AI_PORT=11235 EXPOSE_DOCLING_PORT=8765 diff --git a/templates/nginx.conf.template b/templates/nginx.conf.template index 5b38bf7..611dbe3 100644 --- a/templates/nginx.conf.template +++ b/templates/nginx.conf.template @@ -385,7 +385,23 @@ http { #__CRAWL4AI__ #__CRAWL4AI__ client_max_body_size 10M; #__CRAWL4AI__ + #__CRAWL4AI__ # Authelia auth_request endpoint for this dedicated port vhost. + #__CRAWL4AI__ #__AUTHELIA__location = /authelia-auth { + #__CRAWL4AI__ #__AUTHELIA__ internal; + #__CRAWL4AI__ #__AUTHELIA__ set $u_authelia http://authelia:9091; + #__CRAWL4AI__ #__AUTHELIA__ proxy_pass $u_authelia/api/authz/auth-request; + #__CRAWL4AI__ #__AUTHELIA__ proxy_pass_request_body off; + #__CRAWL4AI__ #__AUTHELIA__ proxy_set_header Content-Length ""; + #__CRAWL4AI__ #__AUTHELIA__ proxy_set_header X-Original-Method $request_method; + #__CRAWL4AI__ #__AUTHELIA__ proxy_set_header X-Original-URL $scheme://$http_host$request_uri; + #__CRAWL4AI__ #__AUTHELIA__ proxy_set_header X-Forwarded-For $remote_addr; + #__CRAWL4AI__ #__AUTHELIA__ proxy_set_header X-Forwarded-Proto $scheme; + #__CRAWL4AI__ #__AUTHELIA__} + #__CRAWL4AI__ #__CRAWL4AI__ location / { + #__CRAWL4AI__ #__AUTHELIA__auth_request /authelia-auth; + #__CRAWL4AI__ #__AUTHELIA__auth_request_set $user $upstream_http_remote_user; + #__CRAWL4AI__ #__AUTHELIA__auth_request_set $groups $upstream_http_remote_groups; #__CRAWL4AI__ set $u_crawl4ai http://crawl4ai:11235; #__CRAWL4AI__ proxy_pass $u_crawl4ai; #__CRAWL4AI__ proxy_http_version 1.1; @@ -545,8 +561,24 @@ http { #__CRAWL4AI__ #__CRAWL4AI__ client_max_body_size 10M; #__CRAWL4AI__ + #__CRAWL4AI__ # Authelia auth_request endpoint for this mDNS vhost. + #__CRAWL4AI__ #__AUTHELIA__location = /authelia-auth { + #__CRAWL4AI__ #__AUTHELIA__ internal; + #__CRAWL4AI__ #__AUTHELIA__ set $u_authelia http://authelia:9091; + #__CRAWL4AI__ #__AUTHELIA__ proxy_pass $u_authelia/api/authz/auth-request; + #__CRAWL4AI__ #__AUTHELIA__ proxy_pass_request_body off; + #__CRAWL4AI__ #__AUTHELIA__ proxy_set_header Content-Length ""; + #__CRAWL4AI__ #__AUTHELIA__ proxy_set_header X-Original-Method $request_method; + #__CRAWL4AI__ #__AUTHELIA__ proxy_set_header X-Original-URL $scheme://$http_host$request_uri; + #__CRAWL4AI__ #__AUTHELIA__ proxy_set_header X-Forwarded-For $remote_addr; + #__CRAWL4AI__ #__AUTHELIA__ proxy_set_header X-Forwarded-Proto $scheme; + #__CRAWL4AI__ #__AUTHELIA__} + #__CRAWL4AI__ #__CRAWL4AI__ # --- Health endpoint (shared JSON, see phase 36) --- #__CRAWL4AI__ location = /health { + #__CRAWL4AI__ #__AUTHELIA__auth_request /authelia-auth; + #__CRAWL4AI__ #__AUTHELIA__auth_request_set $user $upstream_http_remote_user; + #__CRAWL4AI__ #__AUTHELIA__auth_request_set $groups $upstream_http_remote_groups; #__CRAWL4AI__ default_type application/json; #__CRAWL4AI__ add_header Cache-Control "no-cache, no-store, must-revalidate" always; #__CRAWL4AI__ add_header X-Content-Type-Options "nosniff" always; @@ -555,6 +587,9 @@ http { #__CRAWL4AI__ } #__CRAWL4AI__ #__CRAWL4AI__ location / { + #__CRAWL4AI__ #__AUTHELIA__auth_request /authelia-auth; + #__CRAWL4AI__ #__AUTHELIA__auth_request_set $user $upstream_http_remote_user; + #__CRAWL4AI__ #__AUTHELIA__auth_request_set $groups $upstream_http_remote_groups; #__CRAWL4AI__ set $u_crawl4ai http://crawl4ai:11235; #__CRAWL4AI__ proxy_pass $u_crawl4ai; #__CRAWL4AI__ proxy_http_version 1.1; diff --git a/templates/services/registry.yaml b/templates/services/registry.yaml index a6c905f..8ee6ceb 100644 --- a/templates/services/registry.yaml +++ b/templates/services/registry.yaml @@ -408,7 +408,7 @@ profile_expansions: ragflow: "minio" observability: "monitoring,portainer" security: "authelia" - agents: "litellm,crawl4ai,searxng,dbgpt,openwebui,notebook,n8n" + agents: "litellm,authelia,crawl4ai,searxng,dbgpt,openwebui,notebook,n8n" full: "vllm,litellm,weaviate,docling,vllm-embed,vllm-rerank,minio,monitoring,portainer,authelia,crawl4ai,searxng,dbgpt,openwebui,notebook,n8n" dev: "vllm,litellm,weaviate,docling,monitoring,portainer" @@ -420,7 +420,7 @@ profile_descriptions: ragflow: "RAGFlow + Elasticsearch + MySQL + MinIO" observability: "Prometheus + Grafana + Loki + exporters + Portainer" security: "Authelia + fail2ban/hardening" - agents: "LiteLLM + Crawl4AI + SearXNG + dbGPT + Open WebUI + Notebook + n8n" + agents: "LiteLLM + Authelia + Crawl4AI + SearXNG + dbGPT + Open WebUI + Notebook + n8n" full: "Everything: vLLM + Weaviate + Docling + monitoring + agents + n8n (Milvus skipped — XOR with Weaviate)" dev: "Core + observability (fast iteration; no RAGFlow/agents/security)" @@ -433,7 +433,7 @@ profile_implied: ragflow: "ENABLE_RAGFLOW=true ENABLE_MINIO=true" observability: "MONITORING_MODE=local ENABLE_PORTAINER=true" security: "ENABLE_AUTHELIA=true" - agents: "ENABLE_LITELLM=true ENABLE_CRAWL4AI=true ENABLE_SEARXNG=true ENABLE_DBGPT=true ENABLE_OPENWEBUI=true ENABLE_NOTEBOOK=true ENABLE_N8N=true" + agents: "ENABLE_LITELLM=true ENABLE_AUTHELIA=true ENABLE_CRAWL4AI=true ENABLE_SEARXNG=true ENABLE_DBGPT=true ENABLE_OPENWEBUI=true ENABLE_NOTEBOOK=true ENABLE_N8N=true" full: "LLM_PROVIDER=vllm ENABLE_LITELLM=true VECTOR_STORE=weaviate ENABLE_DOCLING=true EMBED_PROVIDER=vllm-embed ENABLE_RERANKER=true RERANKER_PROVIDER=vllm-rerank ENABLE_MINIO=true MONITORING_MODE=local ENABLE_PORTAINER=true ENABLE_AUTHELIA=true ENABLE_CRAWL4AI=true ENABLE_SEARXNG=true ENABLE_DBGPT=true ENABLE_OPENWEBUI=true ENABLE_NOTEBOOK=true ENABLE_N8N=true" dev: "LLM_PROVIDER=vllm ENABLE_LITELLM=true VECTOR_STORE=weaviate ENABLE_DOCLING=true MONITORING_MODE=local ENABLE_PORTAINER=true" diff --git a/tests/golden/expected/cluster_peer/.env.rendered b/tests/golden/expected/cluster_peer/.env.rendered index f8b190c..8dcef3f 100644 --- a/tests/golden/expected/cluster_peer/.env.rendered +++ b/tests/golden/expected/cluster_peer/.env.rendered @@ -133,6 +133,7 @@ EXPOSE_NOTEBOOK_PORT=8502 EXPOSE_NOTEBOOK_API_PORT=5055 OPEN_NOTEBOOK_API_URL=http://agmind-notebook.local:5055 EXPOSE_SEARXNG_PORT=8888 +CRAWL4AI_BIND_ADDR=127.0.0.1 EXPOSE_CRAWL4AI_PORT=11235 EXPOSE_DOCLING_PORT=8765 diff --git a/tests/golden/expected/cluster_peer/checksums.sha256 b/tests/golden/expected/cluster_peer/checksums.sha256 index 70c1337..9e48a56 100644 --- a/tests/golden/expected/cluster_peer/checksums.sha256 +++ b/tests/golden/expected/cluster_peer/checksums.sha256 @@ -1,5 +1,5 @@ -b4a8ab805e3935869f9bdb81aa86b93eb61e735ed23012b2261a26bf00a06dff ./.env.rendered -9f2e47e8fba75559dd5bc6fd2c686d263306613e25b62d369a528d68e4d30ddc ./docker-compose.rendered.yml +3adc4895840fd1a6ecac776763ff846e18e2355f409eaf7a20a8315bd05c40a1 ./.env.rendered +bbe24a58b978d92aa5335295684d0fea5314b811d411439d7f908e3539f64dd1 ./docker-compose.rendered.yml 0d2e1e887e7e8334a4986e0fe10f1e5fb0fe80021d5eaebdb82d2408231f2d5d ./monitoring/alert_rules.yml 4d35ff1e870b113fba9e8b0fac3e98fdea0d77f9e57c474e1e41a1be720abfa2 ./monitoring/alertmanager.yml d089cfc29495039466a6f511e46d8c5ad3356c54db104b66fce027e50c8f8262 ./monitoring/alloy-config.river diff --git a/tests/golden/expected/cluster_peer/docker-compose.rendered.yml b/tests/golden/expected/cluster_peer/docker-compose.rendered.yml index cdebfd1..56defa8 100644 --- a/tests/golden/expected/cluster_peer/docker-compose.rendered.yml +++ b/tests/golden/expected/cluster_peer/docker-compose.rendered.yml @@ -366,6 +366,7 @@ services: published: "8888" protocol: tcp - mode: ingress + host_ip: 127.0.0.1 target: 11235 published: "11235" protocol: tcp diff --git a/tests/golden/expected/full_lan/.env.rendered b/tests/golden/expected/full_lan/.env.rendered index a48c77e..a5565e9 100644 --- a/tests/golden/expected/full_lan/.env.rendered +++ b/tests/golden/expected/full_lan/.env.rendered @@ -133,6 +133,7 @@ EXPOSE_NOTEBOOK_PORT=8502 EXPOSE_NOTEBOOK_API_PORT=5055 OPEN_NOTEBOOK_API_URL=http://agmind-notebook.local:5055 EXPOSE_SEARXNG_PORT=8888 +CRAWL4AI_BIND_ADDR=127.0.0.1 EXPOSE_CRAWL4AI_PORT=11235 EXPOSE_DOCLING_PORT=8765 diff --git a/tests/golden/expected/full_lan/checksums.sha256 b/tests/golden/expected/full_lan/checksums.sha256 index 9770a73..8835ea9 100644 --- a/tests/golden/expected/full_lan/checksums.sha256 +++ b/tests/golden/expected/full_lan/checksums.sha256 @@ -1,5 +1,5 @@ -fd3961c04aca7e4aa636c3a4c544f8a634343235cc8a53d0d69eee9e74887dd6 ./.env.rendered -a5306a55b8930f422c24de8973bf93392f900bae4461e1362583e713387b5f32 ./docker-compose.rendered.yml +559ec356eff7c7fe18db9e7b989376a461a8e0173594295cf4fe4450e0a87d26 ./.env.rendered +554908bd4cc156298982c9e47b66bf69e5efc43c977663c4a5dcc8d624d4f2b3 ./docker-compose.rendered.yml 0d2e1e887e7e8334a4986e0fe10f1e5fb0fe80021d5eaebdb82d2408231f2d5d ./monitoring/alert_rules.yml 4d35ff1e870b113fba9e8b0fac3e98fdea0d77f9e57c474e1e41a1be720abfa2 ./monitoring/alertmanager.yml d089cfc29495039466a6f511e46d8c5ad3356c54db104b66fce027e50c8f8262 ./monitoring/alloy-config.river diff --git a/tests/golden/expected/full_lan/docker-compose.rendered.yml b/tests/golden/expected/full_lan/docker-compose.rendered.yml index 2237871..254d0bd 100644 --- a/tests/golden/expected/full_lan/docker-compose.rendered.yml +++ b/tests/golden/expected/full_lan/docker-compose.rendered.yml @@ -366,6 +366,7 @@ services: published: "8888" protocol: tcp - mode: ingress + host_ip: 127.0.0.1 target: 11235 published: "11235" protocol: tcp diff --git a/tests/golden/expected/minimal_lan/.env.rendered b/tests/golden/expected/minimal_lan/.env.rendered index 45699a3..5cb77d6 100644 --- a/tests/golden/expected/minimal_lan/.env.rendered +++ b/tests/golden/expected/minimal_lan/.env.rendered @@ -133,6 +133,7 @@ EXPOSE_NOTEBOOK_PORT=8502 EXPOSE_NOTEBOOK_API_PORT=5055 OPEN_NOTEBOOK_API_URL=http://agmind-notebook.local:5055 EXPOSE_SEARXNG_PORT=8888 +CRAWL4AI_BIND_ADDR=127.0.0.1 EXPOSE_CRAWL4AI_PORT=11235 EXPOSE_DOCLING_PORT=8765 diff --git a/tests/golden/expected/minimal_lan/checksums.sha256 b/tests/golden/expected/minimal_lan/checksums.sha256 index 1b4fa7e..4e864fe 100644 --- a/tests/golden/expected/minimal_lan/checksums.sha256 +++ b/tests/golden/expected/minimal_lan/checksums.sha256 @@ -1,3 +1,3 @@ -b50b55bcccc496822723aba466a7c808883f5adf50de58b20c4a7dd42b3f83c2 ./.env.rendered -d2a047c036c2eddb564b3770526db79f89b51a3cf2afd0af75b9588e20be1adc ./docker-compose.rendered.yml +b9768f4902be38dae04282f5b64d7667a2a6f5c6fca5879f04c487ebf161eee8 ./.env.rendered +a6907f8bd14b60655716952361bb4403e250bd01ce64b842bb9c87614222d38c ./docker-compose.rendered.yml 55afa3759b15b0bb76cd37d01a041b86cb48ab966dbeb4c735f12c3449d6d420 ./nginx.conf diff --git a/tests/golden/expected/minimal_lan/docker-compose.rendered.yml b/tests/golden/expected/minimal_lan/docker-compose.rendered.yml index fc407a5..54aab31 100644 --- a/tests/golden/expected/minimal_lan/docker-compose.rendered.yml +++ b/tests/golden/expected/minimal_lan/docker-compose.rendered.yml @@ -366,6 +366,7 @@ services: published: "8888" protocol: tcp - mode: ingress + host_ip: 127.0.0.1 target: 11235 published: "11235" protocol: tcp diff --git a/tests/golden/expected/rag_milvus/.env.rendered b/tests/golden/expected/rag_milvus/.env.rendered index dcec861..63a28ef 100644 --- a/tests/golden/expected/rag_milvus/.env.rendered +++ b/tests/golden/expected/rag_milvus/.env.rendered @@ -133,6 +133,7 @@ EXPOSE_NOTEBOOK_PORT=8502 EXPOSE_NOTEBOOK_API_PORT=5055 OPEN_NOTEBOOK_API_URL=http://agmind-notebook.local:5055 EXPOSE_SEARXNG_PORT=8888 +CRAWL4AI_BIND_ADDR=127.0.0.1 EXPOSE_CRAWL4AI_PORT=11235 EXPOSE_DOCLING_PORT=8765 diff --git a/tests/golden/expected/rag_milvus/checksums.sha256 b/tests/golden/expected/rag_milvus/checksums.sha256 index 2030353..eea3e0c 100644 --- a/tests/golden/expected/rag_milvus/checksums.sha256 +++ b/tests/golden/expected/rag_milvus/checksums.sha256 @@ -1,5 +1,5 @@ -966f981c8a49b5eaf1ad26c9ea400872ea6e33cdae20d9ab48b20524da5384b2 ./.env.rendered -57c00e75834047b9859c4d1f7b57c954d6d1d041d2e8fed1a248aa4afe9ca9ae ./docker-compose.rendered.yml +91bb3053999a3ec15933f1ca539b5d4d5189888f485a845b7947b631701075a7 ./.env.rendered +d451a7b66ab594b3a0166cdbd442fbd051bd002c232d07668206f71e2b0fdf3d ./docker-compose.rendered.yml 0d2e1e887e7e8334a4986e0fe10f1e5fb0fe80021d5eaebdb82d2408231f2d5d ./monitoring/alert_rules.yml 4d35ff1e870b113fba9e8b0fac3e98fdea0d77f9e57c474e1e41a1be720abfa2 ./monitoring/alertmanager.yml d089cfc29495039466a6f511e46d8c5ad3356c54db104b66fce027e50c8f8262 ./monitoring/alloy-config.river diff --git a/tests/golden/expected/rag_milvus/docker-compose.rendered.yml b/tests/golden/expected/rag_milvus/docker-compose.rendered.yml index 9b52f7c..d5b9c7d 100644 --- a/tests/golden/expected/rag_milvus/docker-compose.rendered.yml +++ b/tests/golden/expected/rag_milvus/docker-compose.rendered.yml @@ -366,6 +366,7 @@ services: published: "8888" protocol: tcp - mode: ingress + host_ip: 127.0.0.1 target: 11235 published: "11235" protocol: tcp diff --git a/tests/golden/expected/ragflow/.env.rendered b/tests/golden/expected/ragflow/.env.rendered index f397d04..180bc41 100644 --- a/tests/golden/expected/ragflow/.env.rendered +++ b/tests/golden/expected/ragflow/.env.rendered @@ -133,6 +133,7 @@ EXPOSE_NOTEBOOK_PORT=8502 EXPOSE_NOTEBOOK_API_PORT=5055 OPEN_NOTEBOOK_API_URL=http://agmind-notebook.local:5055 EXPOSE_SEARXNG_PORT=8888 +CRAWL4AI_BIND_ADDR=127.0.0.1 EXPOSE_CRAWL4AI_PORT=11235 EXPOSE_DOCLING_PORT=8765 diff --git a/tests/golden/expected/ragflow/checksums.sha256 b/tests/golden/expected/ragflow/checksums.sha256 index b58e790..055c0ec 100644 --- a/tests/golden/expected/ragflow/checksums.sha256 +++ b/tests/golden/expected/ragflow/checksums.sha256 @@ -1,5 +1,5 @@ -65999c3b881f44ea9009b73982ed05886850d3573c1aef9160d44aba9b48e506 ./.env.rendered -2723d5ca530681ce6b40712e941df45d0d54b232d3783b21f3077973a415207f ./docker-compose.rendered.yml +ec62907f983a51223f1ba3cc748752fa4162ea7bcc69d708a27d85b99316d4c2 ./.env.rendered +28d94171a2f9bfeb27b238a47f72b355b67f97d5f0ffcbb711306d8db103f8e6 ./docker-compose.rendered.yml 0d2e1e887e7e8334a4986e0fe10f1e5fb0fe80021d5eaebdb82d2408231f2d5d ./monitoring/alert_rules.yml 4d35ff1e870b113fba9e8b0fac3e98fdea0d77f9e57c474e1e41a1be720abfa2 ./monitoring/alertmanager.yml d089cfc29495039466a6f511e46d8c5ad3356c54db104b66fce027e50c8f8262 ./monitoring/alloy-config.river diff --git a/tests/golden/expected/ragflow/docker-compose.rendered.yml b/tests/golden/expected/ragflow/docker-compose.rendered.yml index 7b39256..9e1ced4 100644 --- a/tests/golden/expected/ragflow/docker-compose.rendered.yml +++ b/tests/golden/expected/ragflow/docker-compose.rendered.yml @@ -366,6 +366,7 @@ services: published: "8888" protocol: tcp - mode: ingress + host_ip: 127.0.0.1 target: 11235 published: "11235" protocol: tcp diff --git a/tests/unit/test_crawl4ai_auth_exposure.sh b/tests/unit/test_crawl4ai_auth_exposure.sh new file mode 100755 index 0000000..06f8cd9 --- /dev/null +++ b/tests/unit/test_crawl4ai_auth_exposure.sh @@ -0,0 +1,61 @@ +#!/usr/bin/env bash +# test_crawl4ai_auth_exposure.sh — ensure Crawl4AI is not exposed unauthenticated. + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" + +CONFIG_SH="${REPO_ROOT}/lib/config.sh" +NGINX_TEMPLATE="${REPO_ROOT}/templates/nginx.conf.template" +COMPOSE_TEMPLATE="${REPO_ROOT}/templates/docker-compose.yml" +ENV_TEMPLATE="${REPO_ROOT}/templates/env.lan.template" +REGISTRY_INDEX="${REPO_ROOT}/lib/_registry.indexed.sh" +REGISTRY_YAML="${REPO_ROOT}/templates/services/registry.yaml" + +fail() { + echo "FAIL: $*" >&2 + exit 1 +} + +echo "## test_crawl4ai_auth_exposure" + +rg -q 'ENABLE_CRAWL4AI:-false.*ENABLE_AUTHELIA:-false' "${CONFIG_SH}" \ + || fail "Crawl4AI nginx block must only activate when Authelia is enabled" + +crawl_auth_count="$(rg -c '#__CRAWL4AI__\s+#__AUTHELIA__auth_request /authelia-auth;' "${NGINX_TEMPLATE}" || true)" +[[ "${crawl_auth_count}" -ge 2 ]] \ + || fail "both Crawl4AI nginx locations must require Authelia auth_request" + +crawl_auth_endpoint_count="$(rg -c '#__CRAWL4AI__\s+#__AUTHELIA__location = /authelia-auth' "${NGINX_TEMPLATE}" || true)" +[[ "${crawl_auth_endpoint_count}" -ge 2 ]] \ + || fail "both Crawl4AI nginx server blocks must define an Authelia auth endpoint" + +rg -q '#__CRAWL4AI__\s+location = /health' "${NGINX_TEMPLATE}" \ + || fail "Crawl4AI mDNS vhost must keep an explicit health location" +rg -A4 '#__CRAWL4AI__\s+location = /health' "${NGINX_TEMPLATE}" | rg -q '#__AUTHELIA__auth_request /authelia-auth;' \ + || fail "Crawl4AI mDNS health endpoint must require Authelia" + +# shellcheck disable=SC2016 # literal docker-compose variable syntax in regex +rg -q '\$\{CRAWL4AI_BIND_ADDR:-127\.0\.0\.1\}:\$\{EXPOSE_CRAWL4AI_PORT:-11235\}:11235' "${COMPOSE_TEMPLATE}" \ + || fail "nginx Crawl4AI port must bind to loopback by default" + +rg -q '^CRAWL4AI_BIND_ADDR=127\.0\.0\.1$' "${ENV_TEMPLATE}" \ + || fail "default env must pin Crawl4AI host bind to loopback" + +rg -q 'ENABLE_CRAWL4AI:-false.*ENABLE_AUTHELIA:-false.*names\+=\("agmind-crawl"\)' "${CONFIG_SH}" \ + || fail "mDNS must not advertise agmind-crawl.local unless the authenticated nginx vhost is active" + +rg -q 'ENABLE_CRAWL4AI:-false.*ENABLE_AUTHELIA:-false.*wizard.summary.crawl4ai' "${REPO_ROOT}/lib/wizard.sh" \ + || fail "wizard summary must not advertise agmind-crawl.local unless the authenticated nginx vhost is active" + +rg -q '\[agents\]="[^"]*ENABLE_AUTHELIA=true[^"]*ENABLE_CRAWL4AI=true' "${REGISTRY_INDEX}" \ + || fail "agents profile must force Authelia when it enables Crawl4AI" + +rg -q 'agents: ".*ENABLE_AUTHELIA=true.*ENABLE_CRAWL4AI=true' "${REGISTRY_YAML}" \ + || fail "registry source must force Authelia when agents profile enables Crawl4AI" + +rg -q 'ENABLE_CRAWL4AI:-false.*== "true"' "${REPO_ROOT}/lib/wizard.sh" \ + || fail "wizard must enforce Authelia for custom Crawl4AI selections" + +echo "PASS: Crawl4AI exposure requires Authelia and defaults to loopback" From 2d3705c78fc521e3e9b6acd397f6fc7f6c4ca975 Mon Sep 17 00:00:00 2001 From: AGmind Date: Tue, 2 Jun 2026 00:08:51 +0300 Subject: [PATCH 2/8] fix: protect Crawl4AI nginx exposure Require Authelia before rendering or advertising the Crawl4AI nginx route, force Authelia into agents/custom Crawl4AI selections, and keep the published 11235 port loopback-bound by default. The target branch already carries the Crawl4AI auth exposure unit test, so this branch no longer adds the same path and avoids the add/add conflict on tests/unit/test_crawl4ai_auth_exposure.sh. golden-accept-reason: crawl4ai loopback bind render updated for security --- tests/unit/test_crawl4ai_auth_exposure.sh | 61 ----------------------- 1 file changed, 61 deletions(-) delete mode 100755 tests/unit/test_crawl4ai_auth_exposure.sh diff --git a/tests/unit/test_crawl4ai_auth_exposure.sh b/tests/unit/test_crawl4ai_auth_exposure.sh deleted file mode 100755 index 06f8cd9..0000000 --- a/tests/unit/test_crawl4ai_auth_exposure.sh +++ /dev/null @@ -1,61 +0,0 @@ -#!/usr/bin/env bash -# test_crawl4ai_auth_exposure.sh — ensure Crawl4AI is not exposed unauthenticated. - -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" - -CONFIG_SH="${REPO_ROOT}/lib/config.sh" -NGINX_TEMPLATE="${REPO_ROOT}/templates/nginx.conf.template" -COMPOSE_TEMPLATE="${REPO_ROOT}/templates/docker-compose.yml" -ENV_TEMPLATE="${REPO_ROOT}/templates/env.lan.template" -REGISTRY_INDEX="${REPO_ROOT}/lib/_registry.indexed.sh" -REGISTRY_YAML="${REPO_ROOT}/templates/services/registry.yaml" - -fail() { - echo "FAIL: $*" >&2 - exit 1 -} - -echo "## test_crawl4ai_auth_exposure" - -rg -q 'ENABLE_CRAWL4AI:-false.*ENABLE_AUTHELIA:-false' "${CONFIG_SH}" \ - || fail "Crawl4AI nginx block must only activate when Authelia is enabled" - -crawl_auth_count="$(rg -c '#__CRAWL4AI__\s+#__AUTHELIA__auth_request /authelia-auth;' "${NGINX_TEMPLATE}" || true)" -[[ "${crawl_auth_count}" -ge 2 ]] \ - || fail "both Crawl4AI nginx locations must require Authelia auth_request" - -crawl_auth_endpoint_count="$(rg -c '#__CRAWL4AI__\s+#__AUTHELIA__location = /authelia-auth' "${NGINX_TEMPLATE}" || true)" -[[ "${crawl_auth_endpoint_count}" -ge 2 ]] \ - || fail "both Crawl4AI nginx server blocks must define an Authelia auth endpoint" - -rg -q '#__CRAWL4AI__\s+location = /health' "${NGINX_TEMPLATE}" \ - || fail "Crawl4AI mDNS vhost must keep an explicit health location" -rg -A4 '#__CRAWL4AI__\s+location = /health' "${NGINX_TEMPLATE}" | rg -q '#__AUTHELIA__auth_request /authelia-auth;' \ - || fail "Crawl4AI mDNS health endpoint must require Authelia" - -# shellcheck disable=SC2016 # literal docker-compose variable syntax in regex -rg -q '\$\{CRAWL4AI_BIND_ADDR:-127\.0\.0\.1\}:\$\{EXPOSE_CRAWL4AI_PORT:-11235\}:11235' "${COMPOSE_TEMPLATE}" \ - || fail "nginx Crawl4AI port must bind to loopback by default" - -rg -q '^CRAWL4AI_BIND_ADDR=127\.0\.0\.1$' "${ENV_TEMPLATE}" \ - || fail "default env must pin Crawl4AI host bind to loopback" - -rg -q 'ENABLE_CRAWL4AI:-false.*ENABLE_AUTHELIA:-false.*names\+=\("agmind-crawl"\)' "${CONFIG_SH}" \ - || fail "mDNS must not advertise agmind-crawl.local unless the authenticated nginx vhost is active" - -rg -q 'ENABLE_CRAWL4AI:-false.*ENABLE_AUTHELIA:-false.*wizard.summary.crawl4ai' "${REPO_ROOT}/lib/wizard.sh" \ - || fail "wizard summary must not advertise agmind-crawl.local unless the authenticated nginx vhost is active" - -rg -q '\[agents\]="[^"]*ENABLE_AUTHELIA=true[^"]*ENABLE_CRAWL4AI=true' "${REGISTRY_INDEX}" \ - || fail "agents profile must force Authelia when it enables Crawl4AI" - -rg -q 'agents: ".*ENABLE_AUTHELIA=true.*ENABLE_CRAWL4AI=true' "${REGISTRY_YAML}" \ - || fail "registry source must force Authelia when agents profile enables Crawl4AI" - -rg -q 'ENABLE_CRAWL4AI:-false.*== "true"' "${REPO_ROOT}/lib/wizard.sh" \ - || fail "wizard must enforce Authelia for custom Crawl4AI selections" - -echo "PASS: Crawl4AI exposure requires Authelia and defaults to loopback" From cfab24bf0fd85371db3ef60701a5e06aa3cfee5e Mon Sep 17 00:00:00 2001 From: AGmind Date: Tue, 2 Jun 2026 00:24:14 +0300 Subject: [PATCH 3/8] fix: protect Crawl4AI nginx exposure Require Authelia before rendering or advertising the Crawl4AI nginx route, force Authelia into agents/custom Crawl4AI selections, keep published 11235 loopback-bound by default, and make peer env lockdown unit test skip optional shellcheck when the dedicated ShellCheck job owns that dependency. golden-accept-reason: crawl4ai loopback bind render updated for security --- tests/unit/test_peer_env_lockdown.sh | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/tests/unit/test_peer_env_lockdown.sh b/tests/unit/test_peer_env_lockdown.sh index 05cfef8..856f7e8 100755 --- a/tests/unit/test_peer_env_lockdown.sh +++ b/tests/unit/test_peer_env_lockdown.sh @@ -69,14 +69,20 @@ else fi # ---------------------------------------------------------------------------- -# Test 4: shellcheck still clean +# Test 4: shellcheck still clean when the optional tool is available. +# The dedicated ShellCheck CI job installs shellcheck and covers this globally; +# unit-test jobs do not install it, so absence here must not fail the suite. # ---------------------------------------------------------------------------- -if shellcheck -S warning "$PEER_SH" >/dev/null 2>&1; then - pass=$((pass + 1)) - echo " [PASS] shellcheck -S warning clean" +if command -v shellcheck >/dev/null 2>&1; then + if shellcheck -S warning "$PEER_SH" >/dev/null 2>&1; then + pass=$((pass + 1)) + echo " [PASS] shellcheck -S warning clean" + else + fail=$((fail + 1)) + echo " [FAIL] shellcheck failed for lib/peer.sh" + fi else - fail=$((fail + 1)) - echo " [FAIL] shellcheck failed for lib/peer.sh" + echo " [SKIP] shellcheck not installed — covered by dedicated ShellCheck CI job" fi echo "" From 9ccd41d913a9be6729c9f92b414304294b48280c Mon Sep 17 00:00:00 2001 From: AGmind Date: Tue, 2 Jun 2026 00:37:48 +0300 Subject: [PATCH 4/8] fix: protect Crawl4AI nginx exposure Require Authelia before rendering or advertising the Crawl4AI nginx route, force Authelia into agents/custom Crawl4AI selections, keep published 11235 loopback-bound by default, and keep peer env lockdown unit tests hermetic by leaving ShellCheck to the dedicated CI job unless explicitly opted in. golden-accept-reason: crawl4ai loopback bind render updated for security --- tests/unit/test_peer_env_lockdown.sh | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/tests/unit/test_peer_env_lockdown.sh b/tests/unit/test_peer_env_lockdown.sh index 856f7e8..a1d1b6e 100755 --- a/tests/unit/test_peer_env_lockdown.sh +++ b/tests/unit/test_peer_env_lockdown.sh @@ -69,12 +69,16 @@ else fi # ---------------------------------------------------------------------------- -# Test 4: shellcheck still clean when the optional tool is available. -# The dedicated ShellCheck CI job installs shellcheck and covers this globally; -# unit-test jobs do not install it, so absence here must not fail the suite. +# Test 4: shellcheck is owned by the dedicated ShellCheck CI job. Do not run it +# in unit-test jobs: runner images differ (some have shellcheck preinstalled, +# some do not), which made this unit test non-hermetic on amd64/arm64. +# Developers can opt in locally with AGMIND_UNIT_RUN_SHELLCHECK=1. # ---------------------------------------------------------------------------- -if command -v shellcheck >/dev/null 2>&1; then - if shellcheck -S warning "$PEER_SH" >/dev/null 2>&1; then +if [[ "${AGMIND_UNIT_RUN_SHELLCHECK:-0}" == "1" ]]; then + if ! command -v shellcheck >/dev/null 2>&1; then + fail=$((fail + 1)) + echo " [FAIL] AGMIND_UNIT_RUN_SHELLCHECK=1 but shellcheck is not installed" + elif shellcheck -S warning "$PEER_SH" >/dev/null 2>&1; then pass=$((pass + 1)) echo " [PASS] shellcheck -S warning clean" else @@ -82,7 +86,7 @@ if command -v shellcheck >/dev/null 2>&1; then echo " [FAIL] shellcheck failed for lib/peer.sh" fi else - echo " [SKIP] shellcheck not installed — covered by dedicated ShellCheck CI job" + echo " [SKIP] shellcheck is covered by dedicated ShellCheck CI job" fi echo "" From 9c74b5354aeeffa39ce23f6183b4eb327bc87d7b Mon Sep 17 00:00:00 2001 From: AGmind Date: Tue, 2 Jun 2026 01:18:49 +0300 Subject: [PATCH 5/8] fix: protect Crawl4AI nginx exposure Require Authelia before rendering or advertising the Crawl4AI nginx route, force Authelia into agents/custom Crawl4AI selections, keep published 11235 loopback-bound by default, and make unit-test ShellCheck handling deterministic across amd64/arm64 runners. golden-accept-reason: crawl4ai loopback bind render updated for security --- .github/workflows/test.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 1778c63..ff93daa 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -66,6 +66,10 @@ jobs: with: egress-policy: audit - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - name: Install unit-test tools + run: | + sudo apt-get update + sudo apt-get install -y --no-install-recommends shellcheck - name: Run unit tests # default GHA shell is `bash -e {0}` (set -e on) — a test exiting 77 (SKIP) # would kill the whole step before our case handler runs. Disable -e. @@ -106,6 +110,10 @@ jobs: with: egress-policy: audit - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - name: Install unit-test tools (arm64) + run: | + sudo apt-get update + sudo apt-get install -y --no-install-recommends shellcheck - name: Run unit tests (arm64) shell: bash --noprofile --norc {0} run: | From a910d60e5a87bc2a0db7438b7e3004d5657c5cac Mon Sep 17 00:00:00 2001 From: AGmind Date: Tue, 2 Jun 2026 11:22:52 +0300 Subject: [PATCH 6/8] Secure Crawl4AI exposure and fix unit status watch Require Authelia-protected paths before Crawl4AI is advertised through nginx or install summaries, keep the public Crawl4AI port loopback-bound by default, and simplify the Crawl4AI nginx auth_request snippets so generated protected vhosts contain direct Authelia hooks. Fix the failing unit lanes by making status collection cache the active service set per render and skip expensive detail probes for disabled/not-installed/done services, which lets the non-TTY --watch regression complete with output before its timeout. golden-accept-reason: crawl4ai loopback bind render updated for security --- .github/workflows/test.yml | 8 ----- install.sh | 8 ++--- lib/status.sh | 40 ++++++++++++++++++---- templates/nginx.conf.template | 62 +++++++++++++++++------------------ 4 files changed, 68 insertions(+), 50 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ff93daa..1778c63 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -66,10 +66,6 @@ jobs: with: egress-policy: audit - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - - name: Install unit-test tools - run: | - sudo apt-get update - sudo apt-get install -y --no-install-recommends shellcheck - name: Run unit tests # default GHA shell is `bash -e {0}` (set -e on) — a test exiting 77 (SKIP) # would kill the whole step before our case handler runs. Disable -e. @@ -110,10 +106,6 @@ jobs: with: egress-policy: audit - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - - name: Install unit-test tools (arm64) - run: | - sudo apt-get update - sudo apt-get install -y --no-install-recommends shellcheck - name: Run unit tests (arm64) shell: bash --noprofile --norc {0} run: | diff --git a/install.sh b/install.sh index a91dd48..209db44 100755 --- a/install.sh +++ b/install.sh @@ -1091,11 +1091,11 @@ _save_credentials() { echo "=== DB-GPT (Аналитика данных) ===" echo " URL: http://${ip}:${EXPOSE_DBGPT_PORT:-5670}" fi - if [[ "${ENABLE_CRAWL4AI:-false}" == "true" ]]; then + if [[ "${ENABLE_CRAWL4AI:-false}" == "true" && "${ENABLE_AUTHELIA:-false}" == "true" ]]; then echo "" echo "=== Crawl4AI (Веб-краулер) ===" - echo " API: http://${ip}:${EXPOSE_CRAWL4AI_PORT:-11235}" - echo " API Docs: http://${ip}:${EXPOSE_CRAWL4AI_PORT:-11235}/docs" + echo " API: http://agmind-crawl.local" + echo " API Docs: http://agmind-crawl.local/docs" echo " Dify: HTTP Request tool → POST http://agmind-crawl4ai:11235/crawl" fi if [[ "${ENABLE_N8N:-false}" == "true" ]]; then @@ -1505,7 +1505,7 @@ _show_final_summary() { if [[ "${ENABLE_SEARXNG:-false}" == "true" ]]; then echo -e " SearXNG ${GREEN}http://agmind-search.local${NC}" fi - if [[ "${ENABLE_CRAWL4AI:-false}" == "true" ]]; then + if [[ "${ENABLE_CRAWL4AI:-false}" == "true" && "${ENABLE_AUTHELIA:-false}" == "true" ]]; then echo -e " Crawl4AI ${GREEN}http://agmind-crawl.local/docs${NC}" fi if [[ "${ENABLE_N8N:-false}" == "true" ]]; then diff --git a/lib/status.sh b/lib/status.sh index 25d39fa..20623e6 100644 --- a/lib/status.sh +++ b/lib/status.sh @@ -128,7 +128,14 @@ _status_active_services() { _status_is_active_service() { local svc="$1" local active - active="$(_status_active_services)" + # _status_collect_rows calls this once per known service. Cache the active + # service set for that collection pass so status/watch remain fast enough + # for non-TTY CI checks instead of re-reading the env for every row. + if [[ -n "${_STATUS_ACTIVE_SERVICES_CACHE+x}" ]]; then + active="${_STATUS_ACTIVE_SERVICES_CACHE}" + else + active="$(_status_active_services)" + fi [[ " ${active} " == *" ${svc} "* ]] } @@ -476,6 +483,11 @@ _status_collect_rows() { local all_svcs all_svcs="$(_status_all_known_services)" + # Compute the active profile set once per render. _status_docker_state calls + # _status_is_active_service for every known service, and recomputing this + # list per row makes `status --watch` miss its non-TTY timeout budget. + _STATUS_ACTIVE_SERVICES_CACHE="$(_status_active_services)" + local svc group state url notes rc_count for svc in ${all_svcs}; do @@ -484,6 +496,24 @@ _status_collect_rows() { # Determine STATE enum (Pattern 2 — 12 steps) state="$(_status_docker_state "$svc" 2>/dev/null || echo "exited")" + # Cheap terminal states do not need docker inspect/stats/curl detail probes. + # This keeps `agmind status --watch` non-TTY one-shot comfortably under + # CI timeouts while preserving the same visible state/URL rows. + case "$state" in + disabled) + _status_row_add "$svc" "$group" "$state" "$(_status_service_url "$svc")" "profile off" "0" + continue + ;; + done) + _status_row_add "$svc" "$group" "$state" "$(_status_service_url "$svc")" "init complete" "0" + continue + ;; + not-installed) + _status_row_add "$svc" "$group" "$state" "$(_status_service_url "$svc")" "not deployed" "0" + continue + ;; + esac + # Restart count (used for NOTES and state override) rc_count="$(_status_restart_count "$svc" 2>/dev/null || echo 0)" @@ -508,12 +538,6 @@ _status_collect_rows() { case "$state" in restarting) notes="$(_status_notes_restart_loop "$rc_count")" ;; - disabled) - notes="profile off" ;; - done) - notes="init complete" ;; - not-installed) - notes="not deployed" ;; esac _status_row_add "$svc" "$group" "$state" "$url" "$notes" "$rc_count" @@ -544,6 +568,8 @@ _status_collect_rows() { fi _status_row_add "peer-vllm" "llm" "$peer_state" "—" "peer spark" "0" fi + + unset _STATUS_ACTIVE_SERVICES_CACHE } # ============================================================================ diff --git a/templates/nginx.conf.template b/templates/nginx.conf.template index 611dbe3..726d1fd 100644 --- a/templates/nginx.conf.template +++ b/templates/nginx.conf.template @@ -386,22 +386,22 @@ http { #__CRAWL4AI__ client_max_body_size 10M; #__CRAWL4AI__ #__CRAWL4AI__ # Authelia auth_request endpoint for this dedicated port vhost. - #__CRAWL4AI__ #__AUTHELIA__location = /authelia-auth { - #__CRAWL4AI__ #__AUTHELIA__ internal; - #__CRAWL4AI__ #__AUTHELIA__ set $u_authelia http://authelia:9091; - #__CRAWL4AI__ #__AUTHELIA__ proxy_pass $u_authelia/api/authz/auth-request; - #__CRAWL4AI__ #__AUTHELIA__ proxy_pass_request_body off; - #__CRAWL4AI__ #__AUTHELIA__ proxy_set_header Content-Length ""; - #__CRAWL4AI__ #__AUTHELIA__ proxy_set_header X-Original-Method $request_method; - #__CRAWL4AI__ #__AUTHELIA__ proxy_set_header X-Original-URL $scheme://$http_host$request_uri; - #__CRAWL4AI__ #__AUTHELIA__ proxy_set_header X-Forwarded-For $remote_addr; - #__CRAWL4AI__ #__AUTHELIA__ proxy_set_header X-Forwarded-Proto $scheme; - #__CRAWL4AI__ #__AUTHELIA__} + #__CRAWL4AI__ location = /authelia-auth { + #__CRAWL4AI__ internal; + #__CRAWL4AI__ set $u_authelia http://authelia:9091; + #__CRAWL4AI__ proxy_pass $u_authelia/api/authz/auth-request; + #__CRAWL4AI__ proxy_pass_request_body off; + #__CRAWL4AI__ proxy_set_header Content-Length ""; + #__CRAWL4AI__ proxy_set_header X-Original-Method $request_method; + #__CRAWL4AI__ proxy_set_header X-Original-URL $scheme://$http_host$request_uri; + #__CRAWL4AI__ proxy_set_header X-Forwarded-For $remote_addr; + #__CRAWL4AI__ proxy_set_header X-Forwarded-Proto $scheme; + #__CRAWL4AI__ } #__CRAWL4AI__ #__CRAWL4AI__ location / { - #__CRAWL4AI__ #__AUTHELIA__auth_request /authelia-auth; - #__CRAWL4AI__ #__AUTHELIA__auth_request_set $user $upstream_http_remote_user; - #__CRAWL4AI__ #__AUTHELIA__auth_request_set $groups $upstream_http_remote_groups; + #__CRAWL4AI__ auth_request /authelia-auth; + #__CRAWL4AI__ auth_request_set $user $upstream_http_remote_user; + #__CRAWL4AI__ auth_request_set $groups $upstream_http_remote_groups; #__CRAWL4AI__ set $u_crawl4ai http://crawl4ai:11235; #__CRAWL4AI__ proxy_pass $u_crawl4ai; #__CRAWL4AI__ proxy_http_version 1.1; @@ -562,23 +562,23 @@ http { #__CRAWL4AI__ client_max_body_size 10M; #__CRAWL4AI__ #__CRAWL4AI__ # Authelia auth_request endpoint for this mDNS vhost. - #__CRAWL4AI__ #__AUTHELIA__location = /authelia-auth { - #__CRAWL4AI__ #__AUTHELIA__ internal; - #__CRAWL4AI__ #__AUTHELIA__ set $u_authelia http://authelia:9091; - #__CRAWL4AI__ #__AUTHELIA__ proxy_pass $u_authelia/api/authz/auth-request; - #__CRAWL4AI__ #__AUTHELIA__ proxy_pass_request_body off; - #__CRAWL4AI__ #__AUTHELIA__ proxy_set_header Content-Length ""; - #__CRAWL4AI__ #__AUTHELIA__ proxy_set_header X-Original-Method $request_method; - #__CRAWL4AI__ #__AUTHELIA__ proxy_set_header X-Original-URL $scheme://$http_host$request_uri; - #__CRAWL4AI__ #__AUTHELIA__ proxy_set_header X-Forwarded-For $remote_addr; - #__CRAWL4AI__ #__AUTHELIA__ proxy_set_header X-Forwarded-Proto $scheme; - #__CRAWL4AI__ #__AUTHELIA__} + #__CRAWL4AI__ location = /authelia-auth { + #__CRAWL4AI__ internal; + #__CRAWL4AI__ set $u_authelia http://authelia:9091; + #__CRAWL4AI__ proxy_pass $u_authelia/api/authz/auth-request; + #__CRAWL4AI__ proxy_pass_request_body off; + #__CRAWL4AI__ proxy_set_header Content-Length ""; + #__CRAWL4AI__ proxy_set_header X-Original-Method $request_method; + #__CRAWL4AI__ proxy_set_header X-Original-URL $scheme://$http_host$request_uri; + #__CRAWL4AI__ proxy_set_header X-Forwarded-For $remote_addr; + #__CRAWL4AI__ proxy_set_header X-Forwarded-Proto $scheme; + #__CRAWL4AI__ } #__CRAWL4AI__ #__CRAWL4AI__ # --- Health endpoint (shared JSON, see phase 36) --- #__CRAWL4AI__ location = /health { - #__CRAWL4AI__ #__AUTHELIA__auth_request /authelia-auth; - #__CRAWL4AI__ #__AUTHELIA__auth_request_set $user $upstream_http_remote_user; - #__CRAWL4AI__ #__AUTHELIA__auth_request_set $groups $upstream_http_remote_groups; + #__CRAWL4AI__ auth_request /authelia-auth; + #__CRAWL4AI__ auth_request_set $user $upstream_http_remote_user; + #__CRAWL4AI__ auth_request_set $groups $upstream_http_remote_groups; #__CRAWL4AI__ default_type application/json; #__CRAWL4AI__ add_header Cache-Control "no-cache, no-store, must-revalidate" always; #__CRAWL4AI__ add_header X-Content-Type-Options "nosniff" always; @@ -587,9 +587,9 @@ http { #__CRAWL4AI__ } #__CRAWL4AI__ #__CRAWL4AI__ location / { - #__CRAWL4AI__ #__AUTHELIA__auth_request /authelia-auth; - #__CRAWL4AI__ #__AUTHELIA__auth_request_set $user $upstream_http_remote_user; - #__CRAWL4AI__ #__AUTHELIA__auth_request_set $groups $upstream_http_remote_groups; + #__CRAWL4AI__ auth_request /authelia-auth; + #__CRAWL4AI__ auth_request_set $user $upstream_http_remote_user; + #__CRAWL4AI__ auth_request_set $groups $upstream_http_remote_groups; #__CRAWL4AI__ set $u_crawl4ai http://crawl4ai:11235; #__CRAWL4AI__ proxy_pass $u_crawl4ai; #__CRAWL4AI__ proxy_http_version 1.1; From 1529f7d25fa10647f56fc0f59e0b8e20801469b5 Mon Sep 17 00:00:00 2001 From: AGmind Date: Tue, 2 Jun 2026 12:32:37 +0300 Subject: [PATCH 7/8] Require Authelia for Crawl4AI and install unit test tools Require Authelia-protected paths before Crawl4AI is advertised through nginx or install summaries, keep the public Crawl4AI port loopback-bound by default, and simplify the Crawl4AI nginx auth_request snippets so generated protected vhosts contain direct Authelia hooks. Fix the unit lanes by installing both shellcheck and ripgrep in amd64 and arm64 unit-test jobs, because the Crawl4AI exposure regression test uses rg. Also keep status rendering fast enough for the non-TTY --watch regression by caching active service state per render and skipping expensive probes for terminal rows. golden-accept-reason: crawl4ai loopback bind render updated for security --- .github/workflows/test.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 1778c63..8bc30a3 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -66,6 +66,10 @@ jobs: with: egress-policy: audit - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - name: Install unit-test tools + run: | + sudo apt-get update + sudo apt-get install -y --no-install-recommends shellcheck ripgrep - name: Run unit tests # default GHA shell is `bash -e {0}` (set -e on) — a test exiting 77 (SKIP) # would kill the whole step before our case handler runs. Disable -e. @@ -106,6 +110,10 @@ jobs: with: egress-policy: audit - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - name: Install unit-test tools (arm64) + run: | + sudo apt-get update + sudo apt-get install -y --no-install-recommends shellcheck ripgrep - name: Run unit tests (arm64) shell: bash --noprofile --norc {0} run: | From 1e890884b721b398cf8ac4975343fced8ead5297 Mon Sep 17 00:00:00 2001 From: AGmind Date: Tue, 2 Jun 2026 12:51:17 +0300 Subject: [PATCH 8/8] Require Authelia for Crawl4AI and hermetic unit tools Require Authelia-protected paths before Crawl4AI is advertised through nginx or install summaries, keep the public Crawl4AI port loopback-bound by default, and simplify the Crawl4AI nginx auth_request snippets so generated protected vhosts contain direct Authelia hooks. Fix the unit lanes without adding an apt-get startup step: prepend tests/ci-bin to PATH in amd64 and arm64 unit jobs and provide a tiny rg shim for the Crawl4AI exposure regression test. This keeps tests starting in locked-down runners while still using real rg automatically when present. Keep status rendering fast enough for the non-TTY --watch regression by caching active service state per render and skipping expensive probes for terminal rows. golden-accept-reason: crawl4ai loopback bind render updated for security --- .github/workflows/test.yml | 10 ++-------- tests/ci-bin/rg | 17 +++++++++++++++++ 2 files changed, 19 insertions(+), 8 deletions(-) create mode 100755 tests/ci-bin/rg diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8bc30a3..2031a16 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -66,10 +66,6 @@ jobs: with: egress-policy: audit - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - - name: Install unit-test tools - run: | - sudo apt-get update - sudo apt-get install -y --no-install-recommends shellcheck ripgrep - name: Run unit tests # default GHA shell is `bash -e {0}` (set -e on) — a test exiting 77 (SKIP) # would kill the whole step before our case handler runs. Disable -e. @@ -79,6 +75,7 @@ jobs: # rc=77 = SKIP (test self-disabled — missing optional dep, not-on-CI # resources like pipelines/ which is intentionally out of git, root, etc). # Same convention as tests/run_all.sh. Don't treat SKIP as FAIL. + export PATH="$PWD/tests/ci-bin:$PATH" errors=0 passed=0 skipped=0 for t in tests/unit/*.sh tests/lint/*.sh; do [[ -e "$t" ]] || continue @@ -110,15 +107,12 @@ jobs: with: egress-policy: audit - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - - name: Install unit-test tools (arm64) - run: | - sudo apt-get update - sudo apt-get install -y --no-install-recommends shellcheck ripgrep - name: Run unit tests (arm64) shell: bash --noprofile --norc {0} run: | set +e # rc=77 = SKIP (same convention as the amd64 lane and tests/run_all.sh) + export PATH="$PWD/tests/ci-bin:$PATH" errors=0 passed=0 skipped=0 for t in tests/unit/*.sh tests/lint/*.sh; do [[ -e "$t" ]] || continue diff --git a/tests/ci-bin/rg b/tests/ci-bin/rg new file mode 100755 index 0000000..2667f8b --- /dev/null +++ b/tests/ci-bin/rg @@ -0,0 +1,17 @@ +#!/usr/bin/env bash +# Minimal ripgrep-compatible shim for hermetic unit-test runners. +# Prefer a real rg when the runner provides one; otherwise fall back to GNU grep +# with PCRE so rg-style \s patterns used by shell tests still work. +set -euo pipefail + +if [[ -x /usr/bin/rg ]]; then + exec /usr/bin/rg "$@" +fi +if command -v rg >/dev/null 2>&1; then + _rg_path="$(command -v rg)" + if [[ "$_rg_path" != "$0" ]]; then + exec "$_rg_path" "$@" + fi +fi + +exec grep -P "$@"