Skip to content

Commit 8dca9a4

Browse files
authored
Merge pull request #3003 from activeloopai/v4.1.0_release
v4.1.0 Release.
2 parents c80089d + c915b87 commit 8dca9a4

File tree

11 files changed

+1005
-814
lines changed

11 files changed

+1005
-814
lines changed

python/deeplake/__init__.py

Lines changed: 126 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,20 @@
11
import os
22
from typing import Callable, Any, Dict
33

4+
try:
5+
from tqdm import tqdm as progress_bar
6+
except ImportError:
7+
8+
def progress_bar(iterable, *args, **kwargs):
9+
return iterable
10+
11+
412
import numpy
513

614
import deeplake
715
from ._deeplake import *
816

9-
__version__ = "4.0.3"
17+
__version__ = "4.1.0"
1018

1119
__all__ = [
1220
"__version__",
@@ -22,7 +30,6 @@
2230
"ColumnView",
2331
"Column",
2432
"Version",
25-
"Prefetcher",
2633
"DatasetView",
2734
"Dataset",
2835
"ReadOnlyDataset",
@@ -34,6 +41,8 @@
3441
"ColumnAlreadyExistsError",
3542
"ColumnDoesNotExistError",
3643
"InvalidColumnValueError",
44+
"InvalidPolygonShapeError",
45+
"InvalidLinkDataError",
3746
"PushError",
3847
"GcsStorageProviderFailed",
3948
"History",
@@ -42,6 +51,7 @@
4251
"LogNotexistsError",
4352
"IncorrectDeeplakePathError",
4453
"AuthenticationError",
54+
"BadRequestError",
4555
"AuthorizationError",
4656
"NotFoundError",
4757
"AgreementError",
@@ -56,13 +66,15 @@
5666
"InvalidChunkStrategyType",
5767
"InvalidSequenceOfSequence",
5868
"InvalidTypeAndFormatPair",
69+
"InvalidLinkType",
5970
"UnknownType",
6071
"InvalidTextType",
6172
"UnsupportedPythonType",
6273
"UnsupportedSampleCompression",
6374
"UnsupportedChunkCompression",
6475
"InvalidImageCompression",
65-
"InvalidMaskCompression",
76+
"InvalidSegmentMaskCompression",
77+
"InvalidBinaryMaskCompression",
6678
"DtypeMismatch",
6779
"UnspecifiedDtype",
6880
"DimensionsMismatch",
@@ -90,6 +102,8 @@
90102
"StorageInternalError",
91103
"WriteFailedError",
92104
"QuantizationType",
105+
"InvalidCredsKeyAssignmentError",
106+
"CredsKeyAlreadyAssignedError",
93107
"core",
94108
"create",
95109
"create_async",
@@ -122,65 +136,158 @@
122136

123137
def _tensorflow(self) -> Any:
124138
from deeplake._tensorflow import _from_dataset
139+
125140
return _from_dataset(self)
126141

127142

128143
def _pytorch(self, transform: Callable[[Any], Any] = None):
129144
from deeplake._torch import TorchDataset
145+
130146
return TorchDataset(self, transform=transform)
131147

132148

133149
DatasetView.pytorch = _pytorch
134150
DatasetView.tensorflow = _tensorflow
135151

152+
136153
def load(*args, **kwargs):
137154
"""
138155
.. deprecated:: 4.0.0
139156
"""
140-
raise Exception("""
157+
raise Exception(
158+
"""
141159
The API for Deep Lake 4.0 has changed significantly, including the `load` method being replaced by `open`.
142160
To continue using Deep Lake 3.x, use `pip install "deeplake<4"`.
143161
For information on migrating your code, see https://docs.deeplake.ai/latest/details/v3_conversion/
144-
""".replace("\n", " ").strip())
162+
""".replace(
163+
"\n", " "
164+
).strip()
165+
)
166+
145167

146168
def empty(*args, **kwargs):
147169
"""
148170
.. deprecated:: 4.0.0
149171
"""
150-
raise Exception("""
172+
raise Exception(
173+
"""
151174
The API for Deep Lake 4.0 has changed significantly, including the `empty` method being replaced by `create`.
152175
To continue using Deep Lake 3.x, use `pip install "deeplake<4"`.
153176
For information on migrating your code, see https://docs.deeplake.ai/latest/details/v3_conversion/
154-
""".replace("\n", " ").strip())
177+
""".replace(
178+
"\n", " "
179+
).strip()
180+
)
181+
155182

156183
def convert(src: str, dst: str, dst_creds: Dict[str, str] = None):
157184
"""
158185
Copies the v3 dataset at src into a new dataset in the new v4 format.
159186
"""
160187

188+
def commit_data(dataset, message="Committing data"):
189+
dataset.commit()
190+
191+
def get_raw_columns(source):
192+
return [
193+
col.name
194+
for col in source.schema.columns
195+
if not col.dtype.is_link
196+
and col.dtype.kind
197+
in {
198+
deeplake.types.TypeKind.Image,
199+
deeplake.types.TypeKind.SegmentMask,
200+
deeplake.types.TypeKind.BinaryMask,
201+
}
202+
]
203+
204+
def transfer_non_link_data(source, dest, batch_size):
205+
dl = deeplake._deeplake._Prefetcher(
206+
source,
207+
batch_size=batch_size,
208+
adaptive=True,
209+
raw_columns=set(get_raw_columns(source)),
210+
)
211+
for counter, batch in enumerate(progress_bar(dl), start=1):
212+
dest.append(batch)
213+
if counter % 100 == 0:
214+
commit_data(dest)
215+
commit_data(dest, "Final commit of non-link data")
216+
217+
def transfer_with_links(source, dest, links, column_names, batch_size):
218+
iterable_cols = [col for col in column_names if col not in links]
219+
link_sample_info = {link: source[link]._links_info() for link in links}
220+
dest.set_creds_key(link_sample_info[links[0]]["key"])
221+
pref_ds = source.query(f"SELECT {','.join(iterable_cols)}")
222+
dl = deeplake._deeplake._Prefetcher(
223+
pref_ds,
224+
batch_size=batch_size,
225+
adaptive=True,
226+
raw_columns=set(get_raw_columns(source)),
227+
)
228+
229+
for counter, batch in enumerate(progress_bar(dl), start=1):
230+
for link in links:
231+
link_data = link_sample_info[link]["data"]
232+
start_index = (counter - 1) * batch_size
233+
end_index = min((counter) * batch_size, len(link_data))
234+
batch[link] = link_data[start_index:end_index]
235+
236+
dest.append(batch)
237+
if counter % 100 == 0:
238+
commit_data(dest)
239+
commit_data(dest, "Final commit of linked data")
240+
161241
source_ds = deeplake.query(f'select * from "{src}"')
162242
dest_ds = deeplake.like(source_ds, dst, dst_creds)
163-
dest_ds.commit("Created dataset")
243+
commit_data(dest_ds, "Created dataset")
244+
245+
column_names = [col.name for col in source_ds.schema.columns]
246+
links = [
247+
col.name
248+
for col in source_ds.schema.columns
249+
if source_ds.schema[col.name].dtype.is_link
250+
]
251+
batch_size = 10000
164252

165-
dl = deeplake.Prefetcher(source_ds, batch_size=10000)
166-
counter = 0
167253
print(f"Transferring {len(source_ds)} rows to {dst}...")
168-
for b in dl:
169-
dest_ds.append(b)
170-
counter += 1
171-
if counter > 0 and counter % 100 == 0:
172-
dest_ds.commit()
173-
dest_ds.commit()
174-
print(f"Transferring data.... to {dst}... DONE")
254+
if not links:
255+
transfer_non_link_data(source_ds, dest_ds, batch_size)
256+
else:
257+
transfer_with_links(source_ds, dest_ds, links, column_names, batch_size)
175258

259+
for column in column_names:
260+
meta = dict(source_ds[column].metadata)
261+
if meta:
262+
for key, value in meta.items():
263+
dest_ds[column].metadata[key] = value
264+
265+
commit_data(dest_ds, "Final commit of metadata")
266+
print(f"Data transfer to {dst} complete.")
176267

177268

178269
def __register_at_fork():
179270
from ._deeplake import __prepare_atfork, __parent_atfork, __child_atfork
180271

181272
UNSAFE_TYPES = (
182-
Dataset, DatasetView, ReadOnlyDataset, Column, ColumnView, ColumnDefinition, ColumnDefinitionView, Row, RowView,
183-
RowRange, RowRangeView, Schema, SchemaView, Version, History, Prefetcher,Tag,Tags)
273+
Dataset,
274+
DatasetView,
275+
ReadOnlyDataset,
276+
Column,
277+
ColumnView,
278+
ColumnDefinition,
279+
ColumnDefinitionView,
280+
Row,
281+
RowView,
282+
RowRange,
283+
RowRangeView,
284+
Schema,
285+
SchemaView,
286+
Version,
287+
History,
288+
Tag,
289+
Tags,
290+
)
184291

185292
def check_main_globals_for_unsafe_types():
186293
import inspect

0 commit comments

Comments
 (0)