Skip to content

Commit

Permalink
Adding support for "legacy dependencies" from a single codebase
Browse files Browse the repository at this point in the history
  • Loading branch information
alexbarcelo committed Feb 11, 2025
1 parent 4bb864f commit 3acd607
Show file tree
Hide file tree
Showing 17 changed files with 330 additions and 73 deletions.
4 changes: 4 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
# Now un-exclude:
#
!src
!compile-protos.sh
!dataclay-common
!requirements-legacydeps.txt
!requirements-dev.txt
!pyproject.toml
!README.md
!MANIFEST.in
Expand Down
17 changes: 17 additions & 0 deletions .github/workflows/docker-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ jobs:

- name: Checkout repository
uses: actions/checkout@v4
with:
submodules: "true"

- name: Set up QEMU
uses: docker/setup-qemu-action@v3
Expand Down Expand Up @@ -75,3 +77,18 @@ jobs:
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max

- name: Build and push Docker image with legacy dependencies
id: build-and-push-legacydeps
uses: docker/build-push-action@v6
with:
context: .
file: Dockerfile.legacy-deps
platforms: linux/amd64,linux/arm64
build-args:
PYTHON_VERSION=${{ matrix.python-version }}-bullseye
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ steps.meta.outputs.tags }}-legacydeps
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
- uses: wntrblm/nox@main
- id: set-matrix
shell: bash
run: echo session=$(nox --json -l -s tests | jq -c '[.[].session]') | tee --append $GITHUB_OUTPUT
run: echo session=$(nox --json -l -k citests | jq -c '[.[].session]') | tee --append $GITHUB_OUTPUT
checks:
name: Session ${{ matrix.session }}
needs: [generate-jobs]
Expand Down
22 changes: 22 additions & 0 deletions Dockerfile.legacy-deps
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# syntax=docker/dockerfile:1

ARG PYTHON_VERSION=3.10-bookworm

# install dataclay
FROM python:$PYTHON_VERSION
COPY . /app

# Compiling protos inside to ensure correctness in contrast with the non-legacy-deps image
RUN python -m pip install --upgrade pip \
&& python -m pip install "grpcio-tools==1.48.2" \
&& cd /app && ./compile-protos.sh

RUN python -m pip install --upgrade pip \
&& python -m pip install -r /app/requirements-legacydeps.txt \
&& python -m pip install /app --no-deps

# prepare dataclay storage dir
RUN mkdir -p /data/storage;

# set workdir and entrypoint
WORKDIR /workdir
16 changes: 16 additions & 0 deletions Dockerfile.legacy-deps.dev
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# syntax=docker/dockerfile:1

ARG PYTHON_VERSION=3.10-bookworm

# install dataclay
FROM python:$PYTHON_VERSION
COPY . /app
RUN python -m pip install --upgrade pip \
&& python -m pip install -r /app/requirements-dev.txt -r /app/requirements-legacydeps.txt \
&& python -m pip install -e /app --no-deps

# prepare dataclay storage dir
RUN mkdir -p /data/storage;

# set workdir and entrypoint
WORKDIR /workdir
35 changes: 28 additions & 7 deletions noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,30 +3,51 @@
# Define which Python versions to test with
PYPROJECT = nox.project.load_toml("pyproject.toml")
PYTHON_VERSIONS = nox.project.python_versions(PYPROJECT)
LATEST_PYTHON_VERSION = PYTHON_VERSIONS[-1]
DEFAULT_PYTHON = "3.10" # Arbitrary decision, choose a reliable version

# Default sessions (these will be executed in Github Actions)
# Maintain a clear separation between code checking and code altering tasks (don't add format)
nox.options.sessions = ["lint", "tests"]
# nox.options.reuse_existing_virtualenvs = True # TODO: Check if necessary


@nox.session(python=PYTHON_VERSIONS)
@nox.session(python=PYTHON_VERSIONS, tags=["citests"])
def tests(session):
"""Run the test suite."""
session.install("pytest", "pytest-asyncio", "pytest-docker", "pytest-cov")
session.install(".")
session.run("pytest", "--cov", "--cov-report=term-missing")
session.run("pytest", "-x", "--cov", "--cov-report=term-missing")


@nox.session(python=LATEST_PYTHON_VERSION)
@nox.session(python=["3.9", "3.10"], tags=["citests"])
def legacy_deps_tests(session):
"""Run the test suite with legacy dependencies."""
session.install("pytest", "pytest-asyncio", "pytest-docker", "pytest-cov")
session.install("grpcio-tools==1.48.2", "-r", "requirements-legacydeps.txt")
session.install(".", "--no-deps")
session.run(
# See compile-protos.sh, it should be the same command
"python3",
"-m",
"grpc_tools.protoc",
"--proto_path=dataclay-common",
"--python_out=src",
"--grpc_python_out=src",
"dataclay-common/dataclay/proto/common/common.proto",
"dataclay-common/dataclay/proto/backend/backend.proto",
"dataclay-common/dataclay/proto/metadata/metadata.proto",
)
session.run("pytest", "-x", "--disable-warnings", "--cov", "--cov-report=term-missing", "--build-legacy-deps", "tests/functional")


@nox.session(python=DEFAULT_PYTHON)
def lint(session):
"""Lint the codebase using flake8."""
session.install("flake8")
session.run("flake8", "src/dataclay", "tests")


@nox.session(python=LATEST_PYTHON_VERSION)
@nox.session(python=DEFAULT_PYTHON)
def format(session):
"""Automatically format code with black and isort."""
session.install("black", "isort")
Expand All @@ -35,15 +56,15 @@ def format(session):
session.run("black", ".")


@nox.session(python=LATEST_PYTHON_VERSION)
@nox.session(python=DEFAULT_PYTHON)
def mypy(session):
"""Run type checks using mypy."""
session.install(".")
session.install("mypy")
session.run("mypy", "src/dataclay")


@nox.session(python=LATEST_PYTHON_VERSION)
@nox.session(python=DEFAULT_PYTHON)
def safety(session):
"""Check for security vulnerabilities."""
session.install(".")
Expand Down
10 changes: 10 additions & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Used only for the tests on legacy-deps
# (build process on Dockerfile.legacy-deps)

pytest
pytest-asyncio
pytest-docker
grpcio-tools
black
isort
coverage[toml]
26 changes: 26 additions & 0 deletions requirements-legacydeps.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# This requirements should be used for installing dataClay in the "legacy dependencies" flavour

# You will need to install these requirements before installing dataClay. You can do it with:
#
# $ pip install -r requirements-legacydeps.txt -r requirements.txt
#
# Tweak the previous command as you see fit, assuming requirements.txt contains
# your own requirements. You may want to merge into a single requirements.txt.

# After the requirements are in place, install dataClay without dependencies:
# $ pip install --no-deps "dataclay=={version}"

aiorwlock>=1.4.0
bcrypt>=4.1.1
grpcio>=1.48.2
grpcio-health-checking>=1.48.2
hiredis>=3.0.0
opentelemetry-api>=1.14.0
protobuf<3.20
psutil>=5
pydantic<2
redis>=5.1.1
get-annotations;python_version<"3.10"
PyJWT>=2.9.0
threadpoolctl>=3.5.0

138 changes: 106 additions & 32 deletions src/dataclay/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,18 @@
import uuid
from typing import TYPE_CHECKING, Annotated, Literal, Optional, Union

from pydantic import AliasChoices, Field, StringConstraints
from pydantic_settings import BaseSettings, SettingsConfigDict
try:
import pydantic_settings
except ModuleNotFoundError:
# pydantic<2, settings are inside
from pydantic import BaseSettings, Field, constr

LEGACY_DEPS = True
else:
from pydantic import AliasChoices, Field, StringConstraints
from pydantic_settings import BaseSettings, SettingsConfigDict

LEGACY_DEPS = False

if TYPE_CHECKING:
from dataclay.runtime import BackendRuntime, ClientRuntime
Expand All @@ -29,9 +39,20 @@ class BackendSettings(BaseSettings):
For example, to set the :attr:`host` field, you can set the environment variable ``DATACLAY_BACKEND_HOST``.
"""

model_config = SettingsConfigDict(
env_prefix="dataclay_backend_", env_file=".env", secrets_dir="/run/secrets", extra="ignore"
)
if LEGACY_DEPS:

class Config:
env_prefix = "dataclay_backend_"
env_file = ".env"
secrets_dir = "/run/secrets"

else:
model_config = SettingsConfigDict(
env_prefix="dataclay_backend_",
env_file=".env",
secrets_dir="/run/secrets",
extra="ignore",
)
#: The ID for the backend. Will be autogenerated and persisted to disk if not provided.
id: Optional[uuid.UUID] = None
#: A human-readable name for this backend.
Expand All @@ -55,9 +76,20 @@ class MetadataSettings(BaseSettings):
For example, to set the :attr:`host` field, you can set the environment variable ``DATACLAY_METADATA_HOST``.
"""

model_config = SettingsConfigDict(
env_prefix="dataclay_metadata_", env_file=".env", secrets_dir="/run/secrets", extra="ignore"
)
if LEGACY_DEPS:

class Config:
env_prefix = "dataclay_metadata_"
env_file = ".env"
secrets_dir = "/run/secrets"

else:
model_config = SettingsConfigDict(
env_prefix="dataclay_metadata_",
env_file=".env",
secrets_dir="/run/secrets",
extra="ignore",
)
#: Hostname or IP address for this metadata service. This should be reachable by other dataClay services..
#: By default, the hostname of the machine is used.
host: str = socket.gethostbyname(socket.gethostname())
Expand All @@ -76,9 +108,20 @@ class ProxySettings(BaseSettings):
For example, to set the :attr:`mds_host` field, you can set the environment variable ``DATACLAY_PROXY_MDS_HOST``.
"""

model_config = SettingsConfigDict(
env_prefix="dataclay_proxy_", env_file=".env", secrets_dir="/run/secrets", extra="ignore"
)
if LEGACY_DEPS:

class Config:
env_prefix = "dataclay_proxy_"
env_file = ".env"
secrets_dir = "/run/secrets"

else:
model_config = SettingsConfigDict(
env_prefix="dataclay_proxy_",
env_file=".env",
secrets_dir="/run/secrets",
extra="ignore",
)
#: Port for the proxy service. Defaults to 8676.
port: int = 8676
#: Address to listen on. Defaults to 0.0.0.0 (any network).
Expand All @@ -100,9 +143,17 @@ class ClientSettings(BaseSettings):
For example, to set the :attr:`dataset` field, you can set the environment variable ``DC_DATASET``.
"""

model_config = SettingsConfigDict(
env_prefix="dc_", env_file=".env", secrets_dir="/run/secrets", extra="ignore"
)
if LEGACY_DEPS:

class Config:
env_prefix = "dc_"
env_file = ".env"
secrets_dir = "/run/secrets"

else:
model_config = SettingsConfigDict(
env_prefix="dc_", env_file=".env", secrets_dir="/run/secrets", extra="ignore"
)
password: str = "admin"
#: Username to use for the client. Defaults to "admin".
username: str = "admin"
Expand All @@ -113,16 +164,20 @@ class ClientSettings(BaseSettings):
local_backend: Optional[str] = None
#: Hostname or IP address for the metadata service. This should be reachable by other dataClay services.
#: Aliases: ``dc_host``, ``dataclay_metadata_host``, ``dataclay_host``.
dataclay_host: str = Field(
default="localhost",
validation_alias=AliasChoices("dc_host", "dataclay_metadata_host", "dataclay_host"),
)
#: Port for the metadata service. Defaults to 16587.
#: Aliases: ``dc_port``, ``dataclay_metadata_port``, ``dataclay_port``.
dataclay_port: int = Field(
default=16587,
validation_alias=AliasChoices("dc_port", "dataclay_metadata_port", "dataclay_port"),
)
if LEGACY_DEPS:
dataclay_host: str = Field(default="localhost", env="dc_host")
dataclay_port: int = Field(default=16587, env="dc_port")
else:
dataclay_host: str = Field(
default="localhost",
validation_alias=AliasChoices("dc_host", "dataclay_metadata_host", "dataclay_host"),
)
#: Port for the metadata service. Defaults to 16587.
#: Aliases: ``dc_port``, ``dataclay_metadata_port``, ``dataclay_port``.
dataclay_port: int = Field(
default=16587,
validation_alias=AliasChoices("dc_port", "dataclay_metadata_port", "dataclay_port"),
)
#: Enable proxy. Defaults to False. If :attr:`proxy_host` or :attr:`proxy_port` are explicitly set,
#: this flag will be set to True automatically. When proxy is being used, :attr:`dataclay_host` and
#: :attr:`dataclay_port` will be ignored.
Expand All @@ -139,14 +194,27 @@ class ClientSettings(BaseSettings):
class Settings(BaseSettings):
"""Global configuration settings for dataClay."""

model_config = SettingsConfigDict(
env_prefix="dataclay_", env_file=".env", secrets_dir="/run/secrets", extra="ignore"
)
if LEGACY_DEPS:

class Config:
env_prefix = "dataclay_"
env_file = ".env"
secrets_dir = "/run/secrets"

else:
model_config = SettingsConfigDict(
env_prefix="dataclay_", env_file=".env", secrets_dir="/run/secrets", extra="ignore"
)

# Other
dataclay_id: Optional[uuid.UUID] = Field(default=None, alias="dataclay_id")
storage_path: str = "/data/storage/"
loglevel: Annotated[str, StringConstraints(strip_whitespace=True, to_upper=True)] = "INFO"
if LEGACY_DEPS:
dataclay_id: Optional[uuid.UUID] = Field(default=None, env="dataclay_id")
loglevel: constr(strip_whitespace=True, to_upper=True) = "INFO"
else:
dataclay_id: Optional[uuid.UUID] = Field(default=None, alias="dataclay_id")
loglevel: Annotated[str, StringConstraints(strip_whitespace=True, to_upper=True)] = "INFO"

ephemeral: bool = False

# Threads
Expand Down Expand Up @@ -176,9 +244,15 @@ class Settings(BaseSettings):
memory_check_interval: int = 10

# Root account
root_password: str = Field(default="admin", alias="dataclay_password")
root_username: str = Field(default="admin", alias="dataclay_username")
root_dataset: str = Field(default="admin", alias="dataclay_dataset")
if LEGACY_DEPS:
# Some naming issues with defaults and alias, playing it safe in legacy
password: str = Field(default="admin")
username: str = Field(default="admin")
dataset: str = Field(default="admin")
else:
root_password: str = Field(default="admin", alias="dataclay_password")
root_username: str = Field(default="admin", alias="dataclay_username")
root_dataset: str = Field(default="admin", alias="dataclay_dataset")

# Tracing
service_name: Optional[str] = None
Expand Down
Loading

0 comments on commit 3acd607

Please sign in to comment.