Skip to content

fix: updates json array registration #339

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .github/sync-repo-settings.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ branchProtectionRules:
- 'cla/google'
- 'docs'
- 'lint'
- 'unit (3.8)'
- 'unit (3.9)'
- 'unit (3.10)'
- 'unit (3.11)'
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/unittest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
runs-on: ubuntu-22.04
strategy:
matrix:
python: ['3.8', '3.9', '3.10', '3.11', '3.12', '3.13']
python: ['3.9', '3.10', '3.11', '3.12', '3.13']
steps:
- name: Checkout
uses: actions/checkout@v4
Expand Down Expand Up @@ -103,7 +103,7 @@ jobs:
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: "3.8"
python-version: "3.9"
- name: Install coverage
run: |
python -m pip install --upgrade setuptools pip wheel
Expand Down
49 changes: 18 additions & 31 deletions db_dtypes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,14 @@
import warnings

import numpy
import packaging.version
import pandas
import pandas.api.extensions
from pandas.errors import OutOfBoundsDatetime
import pyarrow
import pyarrow.compute

from db_dtypes import core
from db_dtypes.version import __version__
from db_dtypes.json import JSONArray, JSONDtype, JSONArrowType # noqa: F401

from . import _versions_helpers

Expand All @@ -47,15 +46,6 @@
_NP_BOX_DTYPE = "datetime64[us]"


# To use JSONArray and JSONDtype, you'll need Pandas 1.5.0 or later. With the removal
# of Python 3.7 compatibility, the minimum Pandas version will be updated to 1.5.0.
if packaging.version.Version(pandas.__version__) >= packaging.version.Version("1.5.0"):
from db_dtypes.json import JSONArray, JSONArrowType, JSONDtype
else:
JSONArray = None
JSONDtype = None


@pandas.api.extensions.register_extension_dtype
class TimeDtype(core.BaseDatetimeDtype):
"""
Expand Down Expand Up @@ -347,6 +337,22 @@ def __sub__(self, other):
return super().__sub__(other)


def _determine_all(json_array_type, json_dtype_type):
"""Determines the list for __all__ based on JSON type availability."""
base_all = [
"__version__",
"DateArray",
"DateDtype",
"TimeArray",
"TimeDtype",
]
# Check if both JSON types are available (truthy)
if json_array_type and json_dtype_type:
return base_all + ["JSONDtype", "JSONArray", "JSONArrowType"]
else:
return base_all


def _check_python_version():
"""Checks the runtime Python version and issues a warning if needed."""
sys_major, sys_minor, sys_micro = _versions_helpers.extract_runtime_version()
Expand All @@ -364,23 +370,4 @@ def _check_python_version():

_check_python_version()


if not JSONArray or not JSONDtype:
__all__ = [
"__version__",
"DateArray",
"DateDtype",
"TimeArray",
"TimeDtype",
]
else:
__all__ = [
"__version__",
"DateArray",
"DateDtype",
"JSONDtype",
"JSONArray",
"JSONArrowType",
"TimeArray",
"TimeDtype",
]
__all__ = _determine_all(JSONArray, JSONDtype)
9 changes: 7 additions & 2 deletions db_dtypes/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,5 +277,10 @@ def to_pandas_dtype(self):


# Register the type to be included in RecordBatches, sent over IPC and received in
# another Python process.
pa.register_extension_type(JSONArrowType())
# another Python process. Also handle potential pre-registration
try:
pa.register_extension_type(JSONArrowType())
except pa.ArrowKeyError:
# Type 'dbjson' might already be registered if the module is reloaded,
# which is okay.
pass
6 changes: 2 additions & 4 deletions noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,9 @@
ISORT_VERSION = "isort==5.11.0"
LINT_PATHS = ["docs", "db_dtypes", "tests", "noxfile.py", "setup.py"]

DEFAULT_PYTHON_VERSION = "3.8"
DEFAULT_PYTHON_VERSION = "3.9"

UNIT_TEST_PYTHON_VERSIONS: List[str] = [
"3.7",
"3.8",
"3.9",
"3.10",
"3.11",
Expand All @@ -56,7 +54,7 @@
UNIT_TEST_EXTRAS: List[str] = []
UNIT_TEST_EXTRAS_BY_PYTHON: Dict[str, List[str]] = {}

SYSTEM_TEST_PYTHON_VERSIONS: List[str] = ["3.8"]
SYSTEM_TEST_PYTHON_VERSIONS: List[str] = ["3.9"]
SYSTEM_TEST_STANDARD_DEPENDENCIES: List[str] = [
"mock",
"pytest",
Expand Down
2 changes: 1 addition & 1 deletion owlbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
# Add templated files
# ----------------------------------------------------------------------------
templated_files = common.py_library(
system_test_python_versions=["3.8"],
system_test_python_versions=["3.9"],
cov_level=100,
intersphinx_dependencies={
"pandas": "https://pandas.pydata.org/pandas-docs/stable/"
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,6 @@ def readme():
],
platforms="Posix; MacOS X; Windows",
install_requires=dependencies,
python_requires=">=3.7",
python_requires=">=3.9",
tests_require=["pytest"],
)
59 changes: 59 additions & 0 deletions tests/unit/test__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,3 +83,62 @@ def test_check_python_version_does_not_warn_on_supported(mock_version_tuple):

# Assert that warnings.warn was NOT called
mock_warn_call.assert_not_called()


def test_determine_all_includes_json_when_available():
"""
Test that _determine_all includes JSON types when both are truthy.
"""

from db_dtypes import _determine_all

# Simulate available types (can be any truthy object)
mock_json_array = object()
mock_json_dtype = object()

result = _determine_all(mock_json_array, mock_json_dtype)

expected_all = [
"__version__",
"DateArray",
"DateDtype",
"TimeArray",
"TimeDtype",
"JSONDtype",
"JSONArray",
"JSONArrowType",
]
assert set(result) == set(expected_all)
assert "JSONDtype" in result
assert "JSONArray" in result
assert "JSONArrowType" in result


@pytest.mark.parametrize(
"mock_array, mock_dtype",
[
(None, object()), # JSONArray is None
(object(), None), # JSONDtype is None
(None, None), # Both are None
],
)
def test_determine_all_excludes_json_when_unavailable(mock_array, mock_dtype):
"""
Test that _determine_all excludes JSON types if either is falsy.
"""

from db_dtypes import _determine_all

result = _determine_all(mock_array, mock_dtype)

expected_all = [
"__version__",
"DateArray",
"DateDtype",
"TimeArray",
"TimeDtype",
]
assert set(result) == set(expected_all)
assert "JSONDtype" not in result
assert "JSONArray" not in result
assert "JSONArrowType" not in result
74 changes: 74 additions & 0 deletions tests/unit/test_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,15 @@
# limitations under the License.

import json
import sys

import numpy as np
import pandas as pd
import pyarrow as pa
import pytest

import db_dtypes
import db_dtypes.json

# Check for minimum Pandas version.
pytest.importorskip("pandas", minversion="1.5.0")
Expand Down Expand Up @@ -224,3 +226,75 @@ def test_json_arrow_record_batch():
== '{"null_field":null,"order":{"address":{"city":"Anytown","street":"123 Main St"},"items":["book","pen","computer"],"total":15}}'
)
assert s[6] == "null"


@pytest.fixture
def cleanup_json_module_for_reload():
"""
Fixture to ensure db_dtypes.json is registered and then removed
from sys.modules to allow testing the registration except block via reload.
"""

json_module_name = "db_dtypes.json"
original_module = sys.modules.get(json_module_name)

# Ensure the type is registered initially (usually by the first import)
try:
# Make sure the module is loaded so the type exists
import db_dtypes.json

# Explicitly register just in case it wasn't, or was cleaned up elsewhere.
# This might raise ArrowKeyError itself if already registered, which is fine here.
pa.register_extension_type(db_dtypes.json.JSONArrowType())

except pa.ArrowKeyError:
pass # Already registered is the state we want before the test runs

# Remove the module from sys.modules so importlib.reload re-executes it
if json_module_name in sys.modules:
del sys.modules[json_module_name]

yield # Run the test that uses this fixture

# Cleanup: Put the original module back if it existed
# This helps isolate from other tests that might import db_dtypes.json
if original_module:
sys.modules[json_module_name] = original_module
elif json_module_name in sys.modules:
# If the test re-imported it but it wasn't there originally, remove it
del sys.modules[json_module_name]

# Note: PyArrow doesn't have a public API to unregister types easily,
# thus we are using the testing pattern of module isolation/reloading.


# Test specifically for the fixture's pre-yield removal logic
def test_fixture_removes_module_if_present(cleanup_json_module_for_reload):
"""
Tests that the cleanup_json_module_for_reload fixture removes
db_dtypes.json from sys.modules before yielding to the test.
This specifically targets the 'if json_module_name in sys.modules:' block.
"""
# This test runs *after* the fixture's `yield`.
# The fixture should have removed the module if it was present.

json_module_name = "db_dtypes.json"

assert (
json_module_name not in sys.modules
), f"The fixture cleanup_json_module_for_reload should have removed {json_module_name}"


def test_json_arrow_type_reregistration_is_handled(cleanup_json_module_for_reload):
"""
Verify that attempting to re-register JSONArrowType via module reload
is caught by the except block and does not raise an error.
"""

# Re-importing the module after the fixture removed it from sys.modules
# forces Python to execute the module's top-level code again.
# This includes the pa.register_extension_type call.

import db_dtypes.json # noqa: F401

assert True, "Module re-import completed without error, except block likely worked."
Loading