|
| 1 | +volumes: |
| 2 | + archivebox-data: |
| 3 | +# Usage: |
| 4 | +# docker compose run archivebox init --setup |
| 5 | +# docker compose up |
| 6 | +# echo "https://example.com" | docker compose run archivebox archivebox add |
| 7 | +# docker compose run archivebox add --depth=1 https://example.com/some/feed.rss |
| 8 | +# docker compose run archivebox config --set MEDIA_MAX_SIZE=750m |
| 9 | +# docker compose run archivebox help |
| 10 | +# Documentation: |
| 11 | +# https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#docker-compose |
| 12 | + |
| 13 | +version: '3.9' |
| 14 | + |
| 15 | +services: |
| 16 | + archivebox: |
| 17 | + #image: ${DOCKER_IMAGE:-archivebox/archivebox:dev} |
| 18 | + container_name: archivebox |
| 19 | + image: archivebox/archivebox:dev |
| 20 | + command: server --quick-init 0.0.0.0:8000 |
| 21 | + ports: |
| 22 | + - 8250:8000 |
| 23 | + volumes: |
| 24 | + - archivebox-data:/data |
| 25 | + # - ./etc/crontabs:/var/spool/cron/crontabs # uncomment this and archivebox_scheduler below to set up automatic recurring archive jobs |
| 26 | + # - ./archivebox:/app/archivebox # uncomment this to mount the ArchiveBox source code at runtime (for developers working on archivebox) |
| 27 | + # build: . # uncomment this to build the image from source code at buildtime (for developers working on archivebox) |
| 28 | + environment: |
| 29 | + - ALLOWED_HOSTS=* # restrict this to only accept incoming traffic via specific domain name |
| 30 | + # - PUBLIC_INDEX=True # set to False to prevent anonymous users from viewing snapshot list |
| 31 | + # - PUBLIC_SNAPSHOTS=True # set to False to prevent anonymous users from viewing snapshot content |
| 32 | + # - PUBLIC_ADD_VIEW=False # set to True to allow anonymous users to submit new URLs to archive |
| 33 | + # - ADMIN_USERNAME=admin # create an admin user on first run with the given user/pass combo |
| 34 | + # - ADMIN_PASSWORD=SomeSecretPassword |
| 35 | + # - PUID=911 # set to your host user's UID & GID if you encounter permissions issues |
| 36 | + # - PGID=911 |
| 37 | + # - SEARCH_BACKEND_ENGINE=sonic # uncomment these and sonic container below for better full-text search |
| 38 | + # - SEARCH_BACKEND_HOST_NAME=sonic |
| 39 | + # - SEARCH_BACKEND_PASSWORD=SomeSecretPassword |
| 40 | + # - MEDIA_MAX_SIZE=750m # increase this filesize limit to allow archiving larger audio/video files |
| 41 | + # - TIMEOUT=60 # increase this number to 120+ seconds if you see many slow downloads timing out |
| 42 | + # - CHECK_SSL_VALIDITY=True # set to False to disable strict SSL checking (allows saving URLs w/ broken certs) |
| 43 | + # - SAVE_ARCHIVE_DOT_ORG=True # set to False to disable submitting all URLs to Archive.org when archiving |
| 44 | + # ... |
| 45 | + # add further configuration options from archivebox/config.py as needed (to apply them only to this container) |
| 46 | + # or set using `docker compose run archivebox config --set SOME_KEY=someval` (to persist config across all containers) |
| 47 | + |
| 48 | + # For ad-blocking during archiving, uncomment this section and pihole service section below |
| 49 | + # networks: |
| 50 | + # - dns |
| 51 | + # dns: |
| 52 | + # - 172.20.0.53 |
| 53 | + |
| 54 | + |
| 55 | + ######## Optional Addons: tweak examples below as needed for your specific use case ######## |
| 56 | + |
| 57 | + ### Example: To run the Sonic full-text search backend, first download the config file to sonic.cfg |
| 58 | + # $ curl -O https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/master/etc/sonic.cfg |
| 59 | + # After starting, backfill any existing Snapshots into the full-text index: |
| 60 | + # $ docker-compose run archivebox update --index-only |
| 61 | + |
| 62 | + # sonic: |
| 63 | + # image: valeriansaliou/sonic:latest |
| 64 | + # expose: |
| 65 | + # - 1491 |
| 66 | + # environment: |
| 67 | + # - SEARCH_BACKEND_PASSWORD=SomeSecretPassword |
| 68 | + # volumes: |
| 69 | + # - ./sonic.cfg:/etc/sonic.cfg:ro |
| 70 | + # - ./data/sonic:/var/lib/sonic/store |
| 71 | + |
| 72 | + |
| 73 | + ### Example: To run pihole in order to block ad/tracker requests during archiving, |
| 74 | + # uncomment this block and set up pihole using its admin interface |
| 75 | + |
| 76 | + # pihole: |
| 77 | + # image: pihole/pihole:latest |
| 78 | + # ports: |
| 79 | + # - 127.0.0.1:8090:80 # uncomment to access the admin HTTP interface on http://localhost:8090 |
| 80 | + # environment: |
| 81 | + # - WEBPASSWORD=SET_THIS_TO_SOME_SECRET_PASSWORD_FOR_ADMIN_DASHBOARD |
| 82 | + # - DNSMASQ_LISTENING=all |
| 83 | + # dns: |
| 84 | + # - 127.0.0.1 |
| 85 | + # - 1.1.1.1 |
| 86 | + # networks: |
| 87 | + # dns: |
| 88 | + # ipv4_address: 172.20.0.53 |
| 89 | + # volumes: |
| 90 | + # - ./etc/pihole:/etc/pihole |
| 91 | + # - ./etc/dnsmasq:/etc/dnsmasq.d |
| 92 | + |
| 93 | + |
| 94 | + ### Example: Enable ability to run regularly scheduled archiving tasks by uncommenting this container |
| 95 | + # $ docker compose run archivebox schedule --every=day --depth=1 'https://example.com/some/rss/feed.xml' |
| 96 | + # then restart the scheduler container to apply the changes to the schedule |
| 97 | + # $ docker compose restart archivebox_scheduler |
| 98 | + |
| 99 | + # archivebox_scheduler: |
| 100 | + # image: ${DOCKER_IMAGE:-archivebox/archivebox:dev} |
| 101 | + # command: schedule --foreground |
| 102 | + # environment: |
| 103 | + # - MEDIA_MAX_SIZE=750m # increase this number to allow archiving larger audio/video files |
| 104 | + # # - TIMEOUT=60 # increase if you see timeouts often during archiving / on slow networks |
| 105 | + # # - ONLY_NEW=True # set to False to retry previously failed URLs when re-adding instead of skipping them |
| 106 | + # # - CHECK_SSL_VALIDITY=True # set to False to allow saving URLs w/ broken SSL certs |
| 107 | + # # - SAVE_ARCHIVE_DOT_ORG=True # set to False to disable submitting URLs to Archive.org when archiving |
| 108 | + # # - PUID=502 # set to your host user's UID & GID if you encounter permissions issues |
| 109 | + # # - PGID=20 |
| 110 | + # volumes: |
| 111 | + # - ./data:/data |
| 112 | + # - ./etc/crontabs:/var/spool/cron/crontabs |
| 113 | + # # cpus: 2 # uncomment / edit these values to limit container resource consumption |
| 114 | + # # mem_limit: 2048m |
| 115 | + # # shm_size: 1024m |
| 116 | + |
| 117 | + |
| 118 | + ### Example: Put Nginx in front of the ArchiveBox server for SSL termination |
| 119 | + |
| 120 | + # nginx: |
| 121 | + # image: nginx:alpine |
| 122 | + # ports: |
| 123 | + # - 443:443 |
| 124 | + # - 80:80 |
| 125 | + # volumes: |
| 126 | + # - ./etc/nginx.conf:/etc/nginx/nginx.conf |
| 127 | + # - ./data:/var/www |
| 128 | + |
| 129 | + |
| 130 | + ### Example: run all your ArchiveBox traffic through a WireGuard VPN tunnel |
| 131 | + |
| 132 | + # wireguard: |
| 133 | + # image: linuxserver/wireguard:latest |
| 134 | + # network_mode: 'service:archivebox' |
| 135 | + # cap_add: |
| 136 | + # - NET_ADMIN |
| 137 | + # - SYS_MODULE |
| 138 | + # sysctls: |
| 139 | + # - net.ipv4.conf.all.rp_filter=2 |
| 140 | + # - net.ipv4.conf.all.src_valid_mark=1 |
| 141 | + # volumes: |
| 142 | + # - /lib/modules:/lib/modules |
| 143 | + # - ./wireguard.conf:/config/wg0.conf:ro |
| 144 | + |
| 145 | + |
| 146 | + ### Example: Run PYWB in parallel and auto-import WARCs from ArchiveBox |
| 147 | + |
| 148 | + # pywb: |
| 149 | + # image: webrecorder/pywb:latest |
| 150 | + # entrypoint: /bin/sh -c '(wb-manager init default || test $$? -eq 2) && wb-manager add default /archivebox/archive/*/warc/*.warc.gz; wayback;' |
| 151 | + # environment: |
| 152 | + # - INIT_COLLECTION=archivebox |
| 153 | + # ports: |
| 154 | + # - 8080:8080 |
| 155 | + # volumes: |
| 156 | + # - ./data:/archivebox |
| 157 | + # - ./data/wayback:/webarchive |
| 158 | + |
| 159 | + |
| 160 | +networks: |
| 161 | + |
| 162 | + # network needed for pihole container to offer :53 dns resolving on fixed ip for archivebox container |
| 163 | + dns: |
| 164 | + ipam: |
| 165 | + driver: default |
| 166 | + config: |
| 167 | + - subnet: 172.20.0.0/24 |
| 168 | + |
0 commit comments