Skip to content

Commit

Permalink
Fix issues with Dagster local development (#1479)
Browse files Browse the repository at this point in the history
* fix: Decode stdout bytes from mongodump as text

* fix: Resolve build failures for local docker-compose

Trying to launch the local docker-compose stack fails due to trying to install the
ol_orchestrate package. This adds a `--no-root` command to Poetry to address that error.

* fix: Resolve GitHub token auth for Vault client

The client object in the resource was constantly being recreated on every access. This
adds a conditional initialization and otherwise returns the initialized object.

* feat: Use Postgres for Dagster storage

* feat: Migrate to UV for faster and cleaner builds

* fix: Resolve errors in mongodump command for legacy edX export

* fix: Set default sensor statuses to stopped

When launching a local Dagster we don't want all of the sensors to automatically launch,
because that will kick off a lot of compute that isn't needed unless that is what is
being tested.
  • Loading branch information
blarghmatey authored Feb 19, 2025
1 parent 816150e commit 936f917
Show file tree
Hide file tree
Showing 12 changed files with 2,955 additions and 5,732 deletions.
18 changes: 6 additions & 12 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
FROM --platform=linux/amd64 python:3.12-slim AS dagster-base
FROM --platform=linux/amd64 ghcr.io/astral-sh/uv:debian-slim

ENV UV_LINK_MODE=copy
# Docker run launcher example
RUN mkdir -p /opt/dagster/dagster_home /opt/dagster/app
RUN mkdir -p /opt/dagster/dagster_home /opt/dagster/app /opt/dagster/code

# Install necessary dependencies
RUN pip install dagster dagster-docker dagster-webserver dagster-postgres dagster-webserver

RUN mkdir -p /opt/dagster/dagster_home /opt/dagster/app && \
useradd -s /bin/bash -d /opt/dagster/dagster_home/ dagster && \
RUN useradd -s /bin/bash -d /opt/dagster/dagster_home/ dagster && \
chown -R dagster: /opt/dagster

RUN apt update && \
Expand All @@ -16,7 +13,6 @@ RUN apt update && \
apt autoremove -y && \
rm -rf /var/lib/apt/lists/*


# Install packages needed to talk to edxapp mongodb
RUN wget -qO - https://www.mongodb.org/static/pgp/server-5.0.asc | apt-key add - && \
echo "deb http://repo.mongodb.org/apt/debian buster/mongodb-org/5.0 main" > /etc/apt/sources.list.d/mongodb-org-5.0.list && \
Expand All @@ -26,14 +22,12 @@ RUN wget -qO - https://www.mongodb.org/static/pgp/server-5.0.asc | apt-key add -
apt autoremove && \
rm -rf /var/lib/apt/lists/*

RUN pip install poetry

ENV DAGSTER_HOME=/opt/dagster/dagster_home/

USER dagster
EXPOSE 3000
WORKDIR /opt/dagster/code
# Copy poetry project to the $WORKDIR
COPY pyproject.toml /opt/dagster/code
COPY pyproject.toml uv.lock /opt/dagster/code

RUN poetry install --without=dev --no-cache
RUN uv sync --locked --no-dev --no-install-project
7 changes: 0 additions & 7 deletions dagster.yaml

This file was deleted.

92 changes: 80 additions & 12 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -1,47 +1,115 @@
---
# -*- mode: yaml -*-

services:
dagster_init:
container_name: dagster_init
image: ol-dagster
depends_on:
postgres:
condition: service_healthy
build:
context: .
dockerfile: Dockerfile
command: ["uv", "run", "dagster", "instance", "migrate"]
environment:
GITHUB_TOKEN: ${GITHUB_TOKEN}
DAGSTER_CURRENT_IMAGE: "dagster-daemon"
DAGSTER_PG_USERNAME: postgres
DAGSTER_PG_PASSWORD: postgres # pragma: allowlist secret
DAGSTER_PG_HOST: postgres
DAGSTER_PG_DB: dagster
volumes:
- /var/run/docker.sock:/var/run/docker.sock
- /tmp/io_manager_storage:/tmp/io_manager_storage
- ./src/ol_orchestrate/dagster_local/:/opt/dagster/dagster_home/
- ./src/ol_orchestrate/dagster.dev.yaml:/opt/dagster/dagster_home/dagster.yaml
- ./src/ol_orchestrate/workspace.yaml:/opt/dagster/dagster_home/workspace.yaml
- ./src/ol_orchestrate:/opt/dagster/code/ol_orchestrate
- ./src/ol_dbt:/opt/dagster/code/ol_dbt

# This service runs dagster-webserver, which loads your user code from the user code container.
# Since our instance uses the QueuedRunCoordinator, any runs submitted from the webserver will be put on
# a queue and later dequeued and launched by dagster-daemon.
dagster_webserver:
container_name: dagster_webserver
image: ol-dagster
depends_on:
postgres:
condition: service_healthy
dagster_init:
condition: service_completed_successfully
build:
context: .
dockerfile: Dockerfile
restart: unless-stopped
entrypoint: ["poetry", "run", "dagster-webserver", "-w", "/opt/dagster/app/workspace.yaml",
entrypoint: ["uv", "run", "dagster-webserver", "-w", "/opt/dagster/dagster_home/workspace.yaml",
"-h", "0.0.0.0", "-p", "3000"]
ports:
- "3000:3000"
environment:
- GITHUB_TOKEN=${GITHUB_TOKEN}
- DAGSTER_CURRENT_IMAGE="dagster-webserver"
volumes: # Make docker client accessible so we can terminate containers from the webserver
GITHUB_TOKEN: ${GITHUB_TOKEN}
DAGSTER_CURRENT_IMAGE: "ol-dagster"
DAGSTER_PG_USERNAME: postgres
DAGSTER_PG_PASSWORD: postgres # pragma: allowlist secret
DAGSTER_PG_HOST: postgres
DAGSTER_PG_DB: dagster
volumes:
- /var/run/docker.sock:/var/run/docker.sock
- /tmp/io_manager_storage:/tmp/io_manager_storage
- ./src/ol_orchestrate/workspace.yaml:/opt/dagster/app/workspace.yaml
- ./src/ol_orchestrate/dagster_local/:/opt/dagster/dagster_home/
- ./src/ol_orchestrate/dagster.dev.yaml:/opt/dagster/dagster_home/dagster.yaml
- ./src/ol_orchestrate/workspace.yaml:/opt/dagster/dagster_home/workspace.yaml
- ./src/ol_orchestrate:/opt/dagster/code/ol_orchestrate
- ./src/ol_dbt:/opt/dagster/code/ol_dbt

# This service runs the dagster-daemon process, which is responsible for taking runs
# off of the queue and launching them, as well as creating runs from schedules or sensors.
dagster_daemon:
container_name: dagster_daemon
image: ol-dagster
depends_on:
postgres:
condition: service_healthy
dagster_init:
condition: service_completed_successfully
build:
context: .
dockerfile: Dockerfile
image: dagster-daemon
restart: on-failure
# restart: unless-stopped
entrypoint: ["poetry", "run", "dagster-daemon", "run", "-w", "/opt/dagster/app/workspace.yaml"]
entrypoint: ["uv", "run", "dagster-daemon", "run", "-w", "/opt/dagster/dagster_home/workspace.yaml"]
environment:
- GITHUB_TOKEN=${GITHUB_TOKEN}
- DAGSTER_CURRENT_IMAGE="dagster-daemon"
volumes: # Make docker client accessible so we can terminate containers from the webserver
GITHUB_TOKEN: ${GITHUB_TOKEN}
DAGSTER_CURRENT_IMAGE: "ol-dagster"
DAGSTER_PG_USERNAME: postgres
DAGSTER_PG_PASSWORD: postgres # pragma: allowlist secret
DAGSTER_PG_HOST: postgres
DAGSTER_PG_DB: dagster
volumes:
- /var/run/docker.sock:/var/run/docker.sock
- /tmp/io_manager_storage:/tmp/io_manager_storage
- ./src/ol_orchestrate/workspace.yaml:/opt/dagster/app/workspace.yaml
- ./src/ol_orchestrate/dagster_local/:/opt/dagster/dagster_home/
- ./src/ol_orchestrate/dagster.dev.yaml:/opt/dagster/dagster_home/dagster.yaml
- ./src/ol_orchestrate/workspace.yaml:/opt/dagster/dagster_home/workspace.yaml
- ./src/ol_orchestrate:/opt/dagster/code/ol_orchestrate
- ./src/ol_dbt:/opt/dagster/code/ol_dbt

postgres:
image: postgres
healthcheck:
test: ["CMD", "pg_isready"]
interval: 3s
timeout: 3s
retries: 10
ports:
- 5432:5432
environment:
PGUSER: postgres
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres # pragma: allowlist secret
POSTGRES_DB: dagster
volumes:
- pgdata:/var/lib/postgresql

volumes:
pgdata:
17 changes: 8 additions & 9 deletions dockerfiles/orchestrate/Dockerfile.global
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
FROM --platform=linux/amd64 python:3.12-slim AS dagster-base
FROM --platform=linux/amd64 ghcr.io/astral-sh/uv:debian-slim AS dagster-base
ENV UV_LINK_MODE=copy
RUN mkdir -p /opt/dagster/dagster_home /opt/dagster/app && \
useradd -s /bin/bash -d /opt/dagster/dagster_home/ dagster && \
chown -R dagster: /opt/dagster
Expand All @@ -16,8 +17,6 @@ RUN wget -qO - https://www.mongodb.org/static/pgp/server-5.0.asc | apt-key add -
apt autoremove && \
rm -rf /var/lib/apt/lists/*

RUN pip install poetry

ENV DAGSTER_HOME=/opt/dagster/dagster_home/

# Copy your code and workspace to /opt/dagster/app
Expand All @@ -34,16 +33,16 @@ RUN cp /opt/dagster/code/src/ol_orchestrate/dagster.yaml /opt/dagster/dagster_ho
USER dagster
EXPOSE 3000
WORKDIR /opt/dagster/code
RUN poetry install --without=dev --no-cache
RUN uv sync --locked --no-dev

RUN cd /opt/dbt && \
poetry run -C /opt/dagster/code dbt deps --project-dir /opt/dbt && \
poetry run -C /opt/dagster/code dbt ls --project-dir /opt/dbt --profiles-dir /opt/dbt --target dev
uv run -C /opt/dagster/code dbt deps --project-dir /opt/dbt && \
uv run -C /opt/dagster/code dbt ls --project-dir /opt/dbt --profiles-dir /opt/dbt --target dev

# ENTRYPOINTS
# Dagster Daemon
# ENTRYPOINT ["poetry", "run", "dagster-daemon", "run"]
# ENTRYPOINT ["uv", "run", "dagster-daemon", "run"]
# Dagit
# ENTRYPOINT ["poetry", "run", "dagster-webserver", "-h", "0.0.0.0", "-p", "3000"]
# ENTRYPOINT ["uv", "run", "dagster-webserver", "-h", "0.0.0.0", "-p", "3000"]
# Pipeline
# ENTRYPOINT ["poetry", "run", "dagster", "api", "grpc", "-h", "0.0.0.0", "-p", "4000"]
# ENTRYPOINT ["uv", "run", "dagster", "api", "grpc", "-h", "0.0.0.0", "-p", "4000"]
Loading

0 comments on commit 936f917

Please sign in to comment.