Skip to content

Commit 96345ff

Browse files
authored
Add read and parse tar demo (#323)
1 parent 8a6c5c0 commit 96345ff

File tree

1 file changed

+41
-0
lines changed

1 file changed

+41
-0
lines changed

examples/tar_streaming_read.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# ByteDance Volcengine EMR, Copyright 2024.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import os
16+
17+
from tos import EnvCredentialsProvider
18+
19+
from tosfs.core import TosFileSystem
20+
import tarfile
21+
22+
def streaming_read_tar_from_tos(bucket_name, tar_key):
23+
tosfs = TosFileSystem(
24+
endpoint_url=os.environ.get("TOS_ENDPOINT"),
25+
region=os.environ.get("TOS_REGION"),
26+
credentials_provider=EnvCredentialsProvider(),
27+
)
28+
29+
with tosfs.open(f'tos://{bucket_name}/{tar_key}', 'rb') as tos_file:
30+
with tarfile.open(fileobj=tos_file, mode='r|*') as tar:
31+
for member in tar:
32+
if member.isfile():
33+
file_obj = tar.extractfile(member)
34+
if file_obj is not None:
35+
file_content = file_obj.read()
36+
print(f'Read file {member.name} with size {len(file_content)} bytes')
37+
38+
if __name__ == "__main__":
39+
bucket_name = ''
40+
tar_path = ''
41+
streaming_read_tar_from_tos(bucket_name, tar_path)

0 commit comments

Comments
 (0)