Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
dolfinus committed Jan 27, 2025
2 parents bd14cd5 + c3878b7 commit 26379f1
Show file tree
Hide file tree
Showing 63 changed files with 852 additions and 97 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/changelog.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ on:
- master

env:
DEFAULT_PYTHON: '3.12'
DEFAULT_PYTHON: '3.13'

permissions:
contents: read
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/dev-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ on:
workflow_dispatch:

env:
DEFAULT_PYTHON: '3.12'
DEFAULT_PYTHON: '3.13'

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}
Expand Down
5 changes: 3 additions & 2 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ on:
- '[0-9]+.[0-9]+.[0-9]+'

env:
DEFAULT_PYTHON: '3.12'
DEFAULT_PYTHON: '3.13'

jobs:
release:
Expand Down Expand Up @@ -110,4 +110,5 @@ jobs:
name: ${{ steps.release-name.outputs.name }}
body_path: changelog.md
files: |
dist/*
dist/*.tar.gz
dist/*.whl
14 changes: 9 additions & 5 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ concurrency:
cancel-in-progress: true

env:
DEFAULT_PYTHON: '3.12'
DEFAULT_PYTHON: '3.13'

jobs:
tests:
Expand All @@ -22,9 +22,13 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ['3.7', '3.12']
pydantic-version: ['1', '2']
os: [ubuntu-latest]
include:
- os: ubuntu-latest
python-version: '3.13'
pydantic-version: '2'
- os: ubuntu-22.04
python-version: '3.7'
pydantic-version: '1'

steps:
- name: Checkout code
Expand Down Expand Up @@ -116,7 +120,7 @@ jobs:
run: ./combine_coverage.sh

- name: Check coverage
uses: codecov/codecov-action@v4
uses: codecov/codecov-action@v5
with:
token: ${{ secrets.CODECOV_TOKEN }}
file: ./reports/coverage.xml
Expand Down
6 changes: 3 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ repos:
args:
- --license-filepath
- .spdx-license-header.txt
- --use-current-year
- --allow-past-years
- --no-extra-eol

- repo: https://github.com/codespell-project/codespell
Expand Down Expand Up @@ -78,7 +78,7 @@ repos:
- id: text-unicode-replacement-char

- repo: https://github.com/asottile/pyupgrade
rev: v3.19.0
rev: v3.19.1
hooks:
- id: pyupgrade
args: [--py37-plus, --keep-runtime-typing]
Expand All @@ -102,7 +102,7 @@ repos:
- black==24.4.2

- repo: https://github.com/pycqa/bandit
rev: 1.7.10
rev: 1.8.2
hooks:
- id: bandit
args:
Expand Down
5 changes: 4 additions & 1 deletion .readthedocs.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
version: 2

sphinx:
configuration: docs/conf.py

build:
os: ubuntu-22.04
tools:
python: '3.12'
python: '3.13'
jobs:
post_checkout:
- git fetch --unshallow || true
Expand Down
2 changes: 1 addition & 1 deletion .spdx-license-header.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SPDX-FileCopyrightText: 2021-2024 MTS PJSC
SPDX-FileCopyrightText: 2021-2025 MTS PJSC
SPDX-License-Identifier: Apache-2.0
2 changes: 1 addition & 1 deletion LICENSE.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Copyright 2021-2024 MTS PJSC. All rights reserved.
Copyright 2021-2025 MTS PJSC. All rights reserved.

Apache License
Version 2.0, January 2004
Expand Down
26 changes: 19 additions & 7 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,35 @@
ETL Entities
============

|Repo Status| |PyPI| |PyPI License| |PyPI Python Version|
|Documentation| |Build Status| |Coverage| |pre-commit.ci|
|Repo Status| |PyPI Latest Release| |PyPI License| |PyPI Python Version| |PyPI Downloads|
|Documentation| |CI Status| |Test Coverage| |pre-commit.ci Status|

.. |Repo Status| image:: https://www.repostatus.org/badges/latest/active.svg
:alt: Repo status - Active
:target: https://github.com/MobileTeleSystems/etl-entities
.. |PyPI| image:: https://img.shields.io/pypi/v/etl-entities
.. |PyPI Latest Release| image:: https://img.shields.io/pypi/v/etl-entities
:alt: PyPI - Latest Release
:target: https://pypi.org/project/etl-entities/
.. |PyPI License| image:: https://img.shields.io/pypi/l/etl-entities.svg
:alt: PyPI - License
:target: https://github.com/MobileTeleSystems/etl-entities/blob/develop/LICENSE.txt
.. |PyPI Python Version| image:: https://img.shields.io/pypi/pyversions/etl-entities.svg
:target: https://badge.fury.io/py/etl-entities
:alt: PyPI - Python Version
:target: https://pypi.org/project/etl-entities/
.. |PyPI Downloads| image:: https://img.shields.io/pypi/dm/etl-entities
:alt: PyPI - Downloads
:target: https://pypi.org/project/etl-entities/
.. |Documentation| image:: https://readthedocs.org/projects/etl-entities/badge/?version=stable
:alt: Documentation - ReadTheDocs
:target: https://etl-entities.readthedocs.io/
.. |Build Status| image:: https://github.com/MobileTeleSystems/etl-entities/workflows/Tests/badge.svg
.. |CI Status| image:: https://github.com/MobileTeleSystems/etl-entities/workflows/Tests/badge.svg
:alt: Github Actions - latest CI build status
:target: https://github.com/MobileTeleSystems/etl-entities/actions
.. |Coverage| image:: https://codecov.io/gh/MobileTeleSystems/etl-entities/branch/develop/graph/badge.svg?token=RIO8URKNZJ
.. |Test Coverage| image:: https://codecov.io/gh/MobileTeleSystems/etl-entities/branch/develop/graph/badge.svg?token=RIO8URKNZJ
:alt: Test coverage - percent
:target: https://codecov.io/gh/MobileTeleSystems/etl-entities
.. |pre-commit.ci| image:: https://results.pre-commit.ci/badge/github/MobileTeleSystems/etl-entities/develop.svg
.. |pre-commit.ci Status| image:: https://results.pre-commit.ci/badge/github/MobileTeleSystems/etl-entities/develop.svg
:alt: pre-commit.ci - status
:target: https://results.pre-commit.ci/latest/github/MobileTeleSystems/etl-entities/develop

What is ETL Entities?
Expand All @@ -35,6 +46,7 @@ Currently implemented:
* ``ColumnDateHWM``
* ``ColumnDateTimeHWM``
* ``FileListHWM``
* ``FileModifiedTimeHWM``
* ``KeyValueIntHWM``

* HWM Store classes:
Expand Down
2 changes: 1 addition & 1 deletion codecov.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@ coverage:
status:
project:
default:
target: 94%
target: 92%
threshold: 1%
13 changes: 13 additions & 0 deletions docs/changelog/2.5.0.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
2.5.0 (2025-01-27)
==================

Features
--------

- Implement ``FileModifiedTimeHWM``, HWM based on file modification time. (:github:pull:`109`)


Improvements
------------

- Add compatibility with ``Python 3.13`` (:github:pull:`106`)
1 change: 1 addition & 0 deletions docs/changelog/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
:caption: Changelog

DRAFT
2.5.0
2.4.0
2.3.1
2.3.0
Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
# -- Project information -----------------------------------------------------

project = "etl-entities"
copyright = "2021-2024 MTS PJSC"
copyright = "2021-2025 MTS PJSC"
author = "DataOps.ETL"

# The version info for the project you're documenting, acts as replacement for
Expand Down
7 changes: 7 additions & 0 deletions docs/hwm/file/file_mtime_hwm.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
File Modified Time HWM
===========================

.. currentmodule:: etl_entities.hwm.file.file_mtime_hwm

.. autoclass:: FileModifiedTimeHWM
:members: name, set_value, dict, json, copy, covers, update
12 changes: 4 additions & 8 deletions docs/hwm/file/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ File HWM
:caption: HWM classes

file_list_hwm
file_mtime_hwm

What is File HWM?
-----------------
Expand Down Expand Up @@ -40,18 +41,13 @@ This technique is called ``High WaterMark`` or ``HWM`` for short.
It is used by different `strategies <https://onetl.readthedocs.io/en/latest/strategy/index.html#strategy>`_ to implement some complex logic
of filtering source data.


Supported types
---------------

There are a several ways to track HWM value:

* Save the entire file list, and then select only files not present in this list
(``file_list``)
* Save max modified time of all files, and then select only files with ``modified_time``
* Save list o file paths, and then select only files not present in this list - :obj:`FileListHWM`
* Save max modified time of all files, and then select only files with modified time (``file.stat().st_mtime``) - :obj:`FileModifiedTimeHWM`
higher than this value
* If file name contains some incrementing value, e.g. id or datetime,
parse it and save max value of all files, then select only files with higher value
* and so on

Currently the only HWM type implemented for files is ``file_list``. Other ones can be implemented on-demand
parse it and save max value of all files, then select only files with higher value - not implemented for now.
2 changes: 1 addition & 1 deletion etl_entities/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.4.0
2.5.0
2 changes: 1 addition & 1 deletion etl_entities/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
# SPDX-FileCopyrightText: 2021-2025 MTS PJSC
# SPDX-License-Identifier: Apache-2.0
import os

Expand Down
2 changes: 1 addition & 1 deletion etl_entities/entity.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
# SPDX-FileCopyrightText: 2021-2025 MTS PJSC
# SPDX-License-Identifier: Apache-2.0
# isort: skip_file

Expand Down
4 changes: 3 additions & 1 deletion etl_entities/hwm/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
# SPDX-FileCopyrightText: 2021-2025 MTS PJSC
# SPDX-License-Identifier: Apache-2.0
from etl_entities.hwm.column.column_hwm import ColumnHWM
from etl_entities.hwm.column.date_hwm import ColumnDateHWM
from etl_entities.hwm.column.datetime_hwm import ColumnDateTimeHWM
from etl_entities.hwm.column.int_hwm import ColumnIntHWM
from etl_entities.hwm.file.file_hwm import FileHWM
from etl_entities.hwm.file.file_list_hwm import FileListHWM
from etl_entities.hwm.file.file_mtime_hwm import FileModifiedTimeHWM
from etl_entities.hwm.hwm import HWM
from etl_entities.hwm.hwm_type_registry import HWMTypeRegistry, register_hwm_type
from etl_entities.hwm.key_value.key_value_hwm import KeyValueHWM
Expand All @@ -19,6 +20,7 @@
"ColumnIntHWM",
"FileHWM",
"FileListHWM",
"FileModifiedTimeHWM",
"KeyValueHWM",
"KeyValueIntHWM",
"HWMTypeRegistry",
Expand Down
2 changes: 1 addition & 1 deletion etl_entities/hwm/column/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
# SPDX-FileCopyrightText: 2021-2025 MTS PJSC
# SPDX-License-Identifier: Apache-2.0
7 changes: 2 additions & 5 deletions etl_entities/hwm/column/column_hwm.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
# SPDX-FileCopyrightText: 2021-2025 MTS PJSC
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations

Expand Down Expand Up @@ -164,10 +164,7 @@ def update(self: ColumnHWMType, value: ColumnValueType) -> ColumnHWMType:
2
"""

if self.value is None:
return self.set_value(value)

if self.value < value: # type: ignore[operator]
if self.value is None or self.value < value: # type: ignore[operator]
return self.set_value(value)

return self
Expand Down
4 changes: 2 additions & 2 deletions etl_entities/hwm/column/date_hwm.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
# SPDX-FileCopyrightText: 2021-2025 MTS PJSC
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations

Expand All @@ -18,7 +18,7 @@

@register_hwm_type("column_date")
class ColumnDateHWM(ColumnHWM[date]):
"""Date HWM type
"""HWM based on tracking latest column value of type :obj:`datetime.date`.
Parameters
----------
Expand Down
5 changes: 2 additions & 3 deletions etl_entities/hwm/column/datetime_hwm.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
# SPDX-FileCopyrightText: 2021-2025 MTS PJSC
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations

Expand All @@ -18,8 +18,7 @@

@register_hwm_type("column_datetime")
class ColumnDateTimeHWM(ColumnHWM[datetime]):
"""DateTime HWM type
"""HWM based on tracking latest column value of type :obj:`datetime.datetime`.
Parameters
----------
Expand Down
4 changes: 2 additions & 2 deletions etl_entities/hwm/column/int_hwm.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
# SPDX-FileCopyrightText: 2021-2025 MTS PJSC
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations

Expand All @@ -16,7 +16,7 @@

@register_hwm_type("column_int")
class ColumnIntHWM(ColumnHWM[int]):
"""Integer HWM type
"""HWM based on tracking latest column value of type :obj:`int`.
Parameters
----------
Expand Down
2 changes: 1 addition & 1 deletion etl_entities/hwm/file/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
# SPDX-FileCopyrightText: 2021-2025 MTS PJSC
# SPDX-License-Identifier: Apache-2.0
2 changes: 1 addition & 1 deletion etl_entities/hwm/file/file_hwm.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
# SPDX-FileCopyrightText: 2021-2025 MTS PJSC
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations

Expand Down
12 changes: 6 additions & 6 deletions etl_entities/hwm/file/file_list_hwm.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
# SPDX-FileCopyrightText: 2021-2025 MTS PJSC
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations

Expand All @@ -21,7 +21,7 @@

@register_hwm_type("file_list")
class FileListHWM(FileHWM[FileListType]):
"""File List HWM type
"""HWM based on tracking list of file names.
Parameters
----------
Expand Down Expand Up @@ -72,10 +72,10 @@ def covers(self, value: str | os.PathLike) -> bool: # type: ignore
--------
>>> from etl_entities.hwm import FileListHWM
>>> hwm = FileListHWM(value={"/some/path.py"}, name="my_hwm")
>>> hwm.covers("/some/path.py") # path in HWM
>>> hwm = FileListHWM(value={"/some/old_file.py"}, name="my_hwm")
>>> hwm.covers("/some/old_file.py") # path in HWM
True
>>> hwm.covers("/another/path.py") # path not in HWM
>>> hwm.covers("/some/new_file.py") # path not in HWM
False
"""

Expand All @@ -90,7 +90,7 @@ def update(self: FileListHWMType, value: str | os.PathLike | Iterable[str | os.P
Returns
-------
result : FileHWM
result : FileListHWM
Self
Expand Down
Loading

0 comments on commit 26379f1

Please sign in to comment.