Skip to content

Commit

Permalink
Deserializeable Plugin (#28)
Browse files Browse the repository at this point in the history
* Deserializeable Plugin

* black fix

* version = "0.1.2"

Co-authored-by: Taleb Zeghmi <[email protected]>
  • Loading branch information
talebzeghmi and talebzeghmi authored Aug 20, 2022
1 parent 4f309e6 commit 1686ac6
Show file tree
Hide file tree
Showing 4 changed files with 184 additions and 162 deletions.
8 changes: 5 additions & 3 deletions datasets/metaflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,13 @@ def object_hook(self, obj: dict) -> Union[_DatasetParams, StorageOptions]:
):
return {DatasetPlugin._get_context(k): v for k, v in obj.items()}
else:
mode = obj.get("mode")
params = {k: obj[k] for k in _DatasetParams().__dict__.keys() if k in obj}

mode = params.get("mode")
if mode:
obj["mode"] = mode if isinstance(obj, Mode) else Mode[mode]
params["mode"] = mode if isinstance(mode, Mode) else Mode[mode]

return _DatasetParams(**obj)
return _DatasetParams(**params)


_fallback = json.JSONEncoder().default
Expand Down
19 changes: 19 additions & 0 deletions datasets/tests/test_metaflow.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json

from datasets import Dataset
from datasets.context import Context
from datasets.dataset_plugin import StorageOptions
from datasets.metaflow import (
Expand All @@ -9,9 +10,27 @@
)
from datasets.mode import Mode
from datasets.plugins.batch.batch_base_plugin import BatchOptions
from datasets.plugins.batch.batch_dataset import BatchDataset
from datasets.plugins.batch.hive_dataset import HiveOptions


def test_dataset_dumps_load():
dataset = Dataset(
name="Example",
logical_key="my_key",
mode=Mode.READ_WRITE,
options=BatchOptions(partition_by="foo"),
)

json_value = json.dumps(dataset)
dataset2 = _DatasetTypeClass().convert(json_value, None, None)

assert dataset2.options.partition_by == "foo"
assert dataset2.mode == Mode.READ_WRITE
assert isinstance(dataset2, BatchDataset)
assert dataset == dataset2


def test_dataset_type_class():
json_value = (
'{"name": "HiDataset", "mode": "READ_WRITE", "options":{"type":"HiveOptions", "path":"/foo_hive"}}'
Expand Down
Loading

0 comments on commit 1686ac6

Please sign in to comment.