Skip to content

Commit 5764441

Browse files
zogomiigrefrathcmegalinter-botlars-reimann
authored
feat: join (#870)
Closes #745 ### Summary of Changes Implemented a join function for Table <!-- Please provide a summary of changes in this pull request, ensuring all changes are explained. --> --------- Co-authored-by: grefrathc <[email protected]> Co-authored-by: megalinter-bot <[email protected]> Co-authored-by: Lars Reimann <[email protected]>
1 parent 9f5992a commit 5764441

File tree

2 files changed

+150
-0
lines changed

2 files changed

+150
-0
lines changed

src/safeds/data/tabular/containers/_table.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1708,6 +1708,65 @@ def inverse_transform_table(self, fitted_transformer: InvertibleTableTransformer
17081708
"""
17091709
return fitted_transformer.inverse_transform(self)
17101710

1711+
def join(
1712+
self,
1713+
right_table: Table,
1714+
left_names: str | list[str],
1715+
right_names: str | list[str],
1716+
*,
1717+
mode: Literal["inner", "left", "outer"] = "inner",
1718+
) -> Table:
1719+
"""
1720+
Join a table with the current table and return the result.
1721+
1722+
Parameters
1723+
----------
1724+
right_table:
1725+
The other table which is to be joined to the current table.
1726+
left_names:
1727+
Name or list of names of columns from the current table on which to join right_table.
1728+
right_names:
1729+
Name or list of names of columns from right_table on which to join the current table.
1730+
mode:
1731+
Specify which type of join you want to use. Options include 'inner', 'outer', 'left', 'right'.
1732+
1733+
Returns
1734+
-------
1735+
new_table:
1736+
The table with the joined table.
1737+
1738+
Examples
1739+
--------
1740+
>>> from safeds.data.tabular.containers import Table
1741+
>>> table1 = Table({"a": [1, 2], "b": [3, 4]})
1742+
>>> table2 = Table({"d": [1, 5], "e": [5, 6]})
1743+
>>> table1.join(table2, "a", "d", mode="left")
1744+
+-----+-----+------+
1745+
| a | b | e |
1746+
| --- | --- | --- |
1747+
| i64 | i64 | i64 |
1748+
+==================+
1749+
| 1 | 3 | 5 |
1750+
| 2 | 4 | null |
1751+
+-----+-----+------+
1752+
"""
1753+
# Validation
1754+
_check_columns_exist(self, left_names)
1755+
_check_columns_exist(right_table, right_names)
1756+
1757+
if len(left_names) != len(right_names):
1758+
raise ValueError("The number of columns to join on must be the same in both tables.")
1759+
1760+
# Implementation
1761+
return self._from_polars_lazy_frame(
1762+
self._lazy_frame.join(
1763+
right_table._lazy_frame,
1764+
left_on=left_names,
1765+
right_on=right_names,
1766+
how=mode,
1767+
),
1768+
)
1769+
17111770
def transform_table(self, fitted_transformer: TableTransformer) -> Table:
17121771
"""
17131772
Return a new table transformed by a **fitted** transformer.
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
from typing import Literal
2+
3+
import pytest
4+
from safeds.data.tabular.containers import Table
5+
from safeds.exceptions import ColumnNotFoundError
6+
7+
8+
@pytest.mark.parametrize(
9+
("table_left", "table_right", "left_names", "right_names", "mode", "table_expected"),
10+
[
11+
(
12+
Table({"a": [1, 2], "b": [3, 4]}),
13+
Table({"d": [1, 5], "e": [5, 6]}),
14+
["a"],
15+
["d"],
16+
"outer",
17+
Table({"a": [1, None, 2], "b": [3, None, 4], "d": [1, 5, None], "e": [5, 6, None]}),
18+
),
19+
(
20+
Table({"a": [1, 2], "b": [3, 4]}),
21+
Table({"d": [1, 5], "e": [5, 6]}),
22+
["a"],
23+
["d"],
24+
"left",
25+
Table({"a": [1, 2], "b": [3, 4], "e": [5, None]}),
26+
),
27+
(
28+
Table({"a": [1, 2], "b": [3, 4]}),
29+
Table({"d": [1, 5], "e": [5, 6]}),
30+
["a"],
31+
["d"],
32+
"inner",
33+
Table({"a": [1], "b": [3], "e": [5]}),
34+
),
35+
(
36+
Table({"a": [1, 2], "b": [3, 4], "c": [5, 6]}),
37+
Table({"d": [1, 5], "e": [5, 6], "g": [7, 9]}),
38+
["a", "c"],
39+
["d", "e"],
40+
"inner",
41+
Table({"a": [1], "b": [3], "c": [5], "g": [7]}),
42+
),
43+
(
44+
Table({"a": [1, 2], "b": [3, 4]}),
45+
Table({"d": [1, 5], "e": [5, 6]}),
46+
["b"],
47+
["e"],
48+
"inner",
49+
Table({"a": [], "b": [], "d": []}),
50+
),
51+
],
52+
)
53+
def test_should_join_two_tables(
54+
table_left: Table,
55+
table_right: Table,
56+
left_names: list[str],
57+
right_names: list[str],
58+
mode: Literal["inner", "left", "outer"],
59+
table_expected: Table,
60+
) -> None:
61+
assert table_left.join(table_right, left_names, right_names, mode=mode) == table_expected
62+
63+
64+
def test_should_raise_if_columns_are_mismatched() -> None:
65+
table_left = Table({"a": [1, 2], "b": [3, 4]})
66+
table_right = Table({"d": [1, 5], "e": [5, 6]})
67+
left_names = ["a"]
68+
right_names = ["d", "e"]
69+
with pytest.raises(ValueError, match="The number of columns to join on must be the same in both tables."):
70+
table_left.join(table_right, left_names, right_names)
71+
72+
73+
@pytest.mark.parametrize(
74+
("table_left", "table_right", "left_names", "right_names"),
75+
[
76+
(Table({"a": [1, 2], "b": [3, 4]}), Table({"d": [1, 5], "e": [5, 6]}), ["c"], ["d"]),
77+
(Table({"a": [1, 2], "b": [3, 4]}), Table({"d": [1, 5], "e": [5, 6]}), ["a"], ["f"]),
78+
],
79+
ids=[
80+
"wrong_left_name",
81+
"wrong_right_name",
82+
],
83+
)
84+
def test_should_raise_if_columns_are_missing(
85+
table_left: Table,
86+
table_right: Table,
87+
left_names: list[str],
88+
right_names: list[str],
89+
) -> None:
90+
with pytest.raises(ColumnNotFoundError):
91+
table_left.join(table_right, left_names=left_names, right_names=right_names)

0 commit comments

Comments
 (0)