Skip to content

Commit

Permalink
ADD PRINTS
Browse files Browse the repository at this point in the history
  • Loading branch information
coastalwhite committed Feb 18, 2025
1 parent 045e282 commit 5d3eb9e
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 1 deletion.
8 changes: 8 additions & 0 deletions .github/workflows/test-python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,14 @@ jobs:
python tests/docs/run_doctest.py
pytest tests/docs/test_user_guide.py -m docs
- name: Run Multiscan Parametric
if: github.ref_name != 'main'
run: pytest -n auto --dist=loadgroup -m "not release and not benchmark and not docs" tests/unit/io/test_multiscan.py -k test_multiscan_slice_parametric -s

- name: Run Multiscan Slice
if: github.ref_name != 'main'
run: pytest -n auto --dist=loadgroup -m "not release and not benchmark and not docs" tests/unit/io/test_multiscan.py -k test_multiscan_slice_middle -s

- name: Run tests
if: github.ref_name != 'main'
run: pytest -n auto --dist=loadgroup -m "not release and not benchmark and not docs" tests/unit/io/test_multiscan.py -s
Expand Down
35 changes: 34 additions & 1 deletion py-polars/tests/unit/io/test_multiscan.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
],
)
def test_include_file_paths(tmp_path: Path, scan: Any, write: Any) -> None:
print(f"START: {tmp_path}")

a_path = tmp_path / "a"
b_path = tmp_path / "b"

Expand All @@ -44,6 +46,8 @@ def test_include_file_paths(tmp_path: Path, scan: Any, write: Any) -> None:
),
)

print(f"END: {tmp_path}")


@pytest.mark.parametrize(
("scan", "write", "ext", "supports_missing_columns", "supports_hive_partitioning"),
Expand Down Expand Up @@ -72,7 +76,8 @@ def test_multiscan_projection(
hive: bool,
col: bool,
) -> None:
print(tmp_path)
print(f"START: {tmp_path}")

a = pl.DataFrame({"col": [5, 10, 1996]})
b = pl.DataFrame({"col": [13, 37]})

Expand Down Expand Up @@ -156,6 +161,8 @@ def test_multiscan_projection(
.collect(new_streaming=True), # type: ignore[call-overload]
)

print(f"END: {tmp_path}")


@pytest.mark.parametrize(
("scan", "write", "ext"),
Expand All @@ -172,6 +179,8 @@ def test_multiscan_row_index(
write: Callable[[pl.DataFrame, Path], Any],
ext: str,
) -> None:
print(f"START: {tmp_path}")

a = pl.DataFrame({"col": [5, 10, 1996]})
b = pl.DataFrame({"col": [42]})
c = pl.DataFrame({"col": [13, 37]})
Expand Down Expand Up @@ -228,6 +237,8 @@ def test_multiscan_row_index(
),
)

print(f"END: {tmp_path}")


@pytest.mark.parametrize(
("scan", "write", "ext"),
Expand Down Expand Up @@ -257,6 +268,8 @@ def test_schema_mismatch_type_mismatch(
write: Callable[[pl.DataFrame, Path], Any],
ext: str,
) -> None:
print(f"START: {tmp_path}")

a = pl.DataFrame({"xyz_col": [5, 10, 1996]})
b = pl.DataFrame({"xyz_col": ["a", "b", "c"]})

Expand All @@ -276,6 +289,8 @@ def test_schema_mismatch_type_mismatch(
):
q.collect(new_streaming=True) # type: ignore[call-overload]

print(f"END: {tmp_path}")


@pytest.mark.parametrize(
("scan", "write", "ext"),
Expand Down Expand Up @@ -305,6 +320,8 @@ def test_schema_mismatch_order_mismatch(
write: Callable[[pl.DataFrame, Path], Any],
ext: str,
) -> None:
print(f"START: {tmp_path}")

a = pl.DataFrame({"x": [5, 10, 1996], "y": ["a", "b", "c"]})
b = pl.DataFrame({"y": ["x", "y"], "x": [1, 2]})

Expand All @@ -321,6 +338,8 @@ def test_schema_mismatch_order_mismatch(
with pytest.raises(pl.exceptions.SchemaError):
q.collect(new_streaming=True) # type: ignore[call-overload]

print(f"END: {tmp_path}")


@pytest.mark.parametrize(
("scan", "write"),
Expand All @@ -342,6 +361,8 @@ def test_multiscan_head(
scan: Callable[..., pl.LazyFrame],
write: Callable[[pl.DataFrame, io.BytesIO | Path], Any],
) -> None:
print(f"START: HEAD-{scan}")

a = io.BytesIO()
b = io.BytesIO()
for f in [a, b]:
Expand All @@ -353,6 +374,8 @@ def test_multiscan_head(
pl.Series("c1", range(5)).to_frame(),
)

print(f"END: HEAD-{scan}")


@pytest.mark.parametrize(
("scan", "write"),
Expand All @@ -374,6 +397,8 @@ def test_multiscan_tail(
scan: Callable[..., pl.LazyFrame],
write: Callable[[pl.DataFrame, io.BytesIO | Path], Any],
) -> None:
print(f"STARt: TIAL-{scan}")

a = io.BytesIO()
b = io.BytesIO()
for f in [a, b]:
Expand All @@ -385,6 +410,8 @@ def test_multiscan_tail(
pl.Series("c1", range(5, 10)).to_frame(),
)

print(f"END: TAIL-{scan}")


@pytest.mark.parametrize(
("scan", "write"),
Expand All @@ -406,6 +433,7 @@ def test_multiscan_slice_middle(
scan: Callable[..., pl.LazyFrame],
write: Callable[[pl.DataFrame, io.BytesIO | Path], Any],
) -> None:
print(f"START: SLICE-{scan}")
fs = [io.BytesIO() for _ in range(13)]
for f in fs:
write(pl.Series("c1", range(7)).to_frame(), f)
Expand Down Expand Up @@ -441,6 +469,7 @@ def test_multiscan_slice_middle(
scan(fs, row_index_name="ri").slice(offset, 17).collect(new_streaming=True), # type: ignore[call-overload]
pl.DataFrame(ri_expected_series),
)
print(f"END: SLICE-{scan}")


@pytest.mark.parametrize(
Expand Down Expand Up @@ -471,6 +500,8 @@ def test_multiscan_slice_parametric(
offset: int,
length: int,
) -> None:
print(f"START: SLICE-{ext}-{offset}-{length}")

# Once CSV negative slicing is implemented this should be removed. If we
# don't do this, this test is flaky.
if ext == "csv":
Expand Down Expand Up @@ -509,3 +540,5 @@ def test_multiscan_slice_parametric(
.slice(offset, length)
.collect(new_streaming=True), # type: ignore[call-overload]
)

print(f"END: SLICE-{ext}-{offset}-{length}")

0 comments on commit 5d3eb9e

Please sign in to comment.