Skip to content

Commit c5869bb

Browse files
feat: infer schema of row if not passed explicitly (#134)
Closes #15. ### Summary of Changes The `schema` parameter of the `Row` initializer is now optional. If it's not passed explicitly, it gets inferred from the data. The column names are set to `column_0`, `column_1`, etc. --------- Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
1 parent 1419d25 commit c5869bb

File tree

12 files changed

+485
-300
lines changed

12 files changed

+485
-300
lines changed

src/safeds/data/tabular/containers/_column.py

Lines changed: 189 additions & 157 deletions
Large diffs are not rendered by default.

src/safeds/data/tabular/containers/_row.py

Lines changed: 63 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
1-
import typing
2-
from typing import Any
1+
from hashlib import md5
2+
from typing import Any, Iterable, Iterator, Optional
33

44
import pandas as pd
55
from IPython.core.display_functions import DisplayHandle, display
6+
from pandas.core.util.hashing import hash_pandas_object
67
from safeds.data.tabular.typing import ColumnType, Schema
78
from safeds.exceptions import UnknownColumnNameError
89

@@ -13,26 +14,66 @@ class Row:
1314
1415
Parameters
1516
----------
16-
data : typing.Iterable
17+
data : Iterable
1718
The data.
1819
schema : Schema
1920
The schema of the row.
2021
"""
2122

22-
def __init__(self, data: typing.Iterable, schema: Schema):
23+
# ------------------------------------------------------------------------------------------------------------------
24+
# Dunder methods
25+
# ------------------------------------------------------------------------------------------------------------------
26+
27+
def __init__(self, data: Iterable, schema: Optional[Schema] = None):
2328
self._data: pd.Series = data if isinstance(data, pd.Series) else pd.Series(data)
24-
self.schema: Schema = schema
2529
self._data = self._data.reset_index(drop=True)
2630

31+
self.schema: Schema
32+
if schema is not None:
33+
self.schema = schema
34+
else:
35+
column_names = [f"column_{i}" for i in range(len(self._data))]
36+
dataframe = self._data.to_frame().T
37+
dataframe.columns = column_names
38+
# noinspection PyProtectedMember
39+
self.schema = Schema._from_dataframe(dataframe)
40+
41+
def __eq__(self, other: Any) -> bool:
42+
if not isinstance(other, Row):
43+
return NotImplemented
44+
if self is other:
45+
return True
46+
return self.schema == other.schema and self._data.equals(other._data)
47+
2748
def __getitem__(self, column_name: str) -> Any:
2849
return self.get_value(column_name)
2950

30-
def __iter__(self) -> typing.Iterator[Any]:
51+
def __hash__(self) -> int:
52+
data_hash_string = md5(hash_pandas_object(self._data, index=True).values).hexdigest()
53+
column_names_frozenset = frozenset(self.get_column_names())
54+
55+
return hash((data_hash_string, column_names_frozenset))
56+
57+
def __iter__(self) -> Iterator[Any]:
3158
return iter(self.get_column_names())
3259

3360
def __len__(self) -> int:
3461
return len(self._data)
3562

63+
def __repr__(self) -> str:
64+
tmp = self._data.to_frame().T
65+
tmp.columns = self.get_column_names()
66+
return tmp.__repr__()
67+
68+
def __str__(self) -> str:
69+
tmp = self._data.to_frame().T
70+
tmp.columns = self.get_column_names()
71+
return tmp.__str__()
72+
73+
# ------------------------------------------------------------------------------------------------------------------
74+
# Getters
75+
# ------------------------------------------------------------------------------------------------------------------
76+
3677
def get_value(self, column_name: str) -> Any:
3778
"""
3879
Return the value of a specified column.
@@ -49,19 +90,9 @@ def get_value(self, column_name: str) -> Any:
4990
"""
5091
if not self.schema.has_column(column_name):
5192
raise UnknownColumnNameError([column_name])
93+
# noinspection PyProtectedMember
5294
return self._data[self.schema._get_column_index_by_name(column_name)]
5395

54-
def count(self) -> int:
55-
"""
56-
Return the number of columns in this row.
57-
58-
Returns
59-
-------
60-
count : int
61-
The number of columns.
62-
"""
63-
return len(self._data)
64-
6596
def has_column(self, column_name: str) -> bool:
6697
"""
6798
Return whether the row contains a given column.
@@ -113,25 +144,24 @@ def get_type_of_column(self, column_name: str) -> ColumnType:
113144
"""
114145
return self.schema.get_type_of_column(column_name)
115146

116-
def __eq__(self, other: typing.Any) -> bool:
117-
if not isinstance(other, Row):
118-
return NotImplemented
119-
if self is other:
120-
return True
121-
return self._data.equals(other._data) and self.schema == other.schema
147+
# ------------------------------------------------------------------------------------------------------------------
148+
# Information
149+
# ------------------------------------------------------------------------------------------------------------------
122150

123-
def __hash__(self) -> int:
124-
return hash(self._data)
151+
def count(self) -> int:
152+
"""
153+
Return the number of columns in this row.
125154
126-
def __str__(self) -> str:
127-
tmp = self._data.to_frame().T
128-
tmp.columns = self.get_column_names()
129-
return tmp.__str__()
155+
Returns
156+
-------
157+
count : int
158+
The number of columns.
159+
"""
160+
return len(self._data)
130161

131-
def __repr__(self) -> str:
132-
tmp = self._data.to_frame().T
133-
tmp.columns = self.get_column_names()
134-
return tmp.__repr__()
162+
# ------------------------------------------------------------------------------------------------------------------
163+
# Other
164+
# ------------------------------------------------------------------------------------------------------------------
135165

136166
def _ipython_display_(self) -> DisplayHandle:
137167
"""

src/safeds/data/tabular/containers/_tagged_table.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@ class TaggedTable(Table):
2121
The schema of the table. If not specified, the schema will be inferred from the data.
2222
"""
2323

24+
# ------------------------------------------------------------------------------------------------------------------
25+
# Dunder methods
26+
# ------------------------------------------------------------------------------------------------------------------
27+
2428
def __init__(
2529
self,
2630
data: Iterable,
@@ -45,14 +49,6 @@ def __init__(
4549
self._features: Table = self.keep_only_columns(feature_names)
4650
self._target: Column = self.get_column(target_name)
4751

48-
@property
49-
def features(self) -> Table:
50-
return self._features
51-
52-
@property
53-
def target(self) -> Column:
54-
return self._target
55-
5652
def __repr__(self) -> str:
5753
tmp = self._features.add_column(self._target)
5854
header_info = "Target Column is '" + self._target.name + "'\n"
@@ -63,6 +59,22 @@ def __str__(self) -> str:
6359
header_info = "Target Column is '" + self._target.name + "'\n"
6460
return header_info + tmp.__str__()
6561

62+
# ------------------------------------------------------------------------------------------------------------------
63+
# Properties
64+
# ------------------------------------------------------------------------------------------------------------------
65+
66+
@property
67+
def features(self) -> Table:
68+
return self._features
69+
70+
@property
71+
def target(self) -> Column:
72+
return self._target
73+
74+
# ------------------------------------------------------------------------------------------------------------------
75+
# Other
76+
# ------------------------------------------------------------------------------------------------------------------
77+
6678
def _ipython_display_(self) -> DisplayHandle:
6779
"""
6880
Return a display object for the column to be used in Jupyter Notebooks.

tests/safeds/data/tabular/containers/_row/__init__.py

Whitespace-only changes.

tests/safeds/data/tabular/containers/_row/test_count.py

Lines changed: 0 additions & 10 deletions
This file was deleted.

tests/safeds/data/tabular/containers/_row/test_get_column_names.py

Lines changed: 0 additions & 21 deletions
This file was deleted.

tests/safeds/data/tabular/containers/_row/test_get_value.py

Lines changed: 0 additions & 17 deletions
This file was deleted.

tests/safeds/data/tabular/containers/_row/test_getitem.py

Lines changed: 0 additions & 20 deletions
This file was deleted.

tests/safeds/data/tabular/containers/_row/test_has_column.py

Lines changed: 0 additions & 14 deletions
This file was deleted.

tests/safeds/data/tabular/containers/_row/test_iter.py

Lines changed: 0 additions & 10 deletions
This file was deleted.

0 commit comments

Comments
 (0)