|
| 1 | +import base64 |
| 2 | +import io |
1 | 3 | import os
|
2 | 4 | import pathlib
|
| 5 | +import tarfile |
3 | 6 |
|
4 | 7 | import numpy as np
|
5 | 8 | import pytest
|
@@ -762,3 +765,132 @@ def test_bytes_metadata(self, capfd):
|
762 | 765 | grp.meta.dump()
|
763 | 766 | assert_captured(capfd, "Type: DataType.BLOB")
|
764 | 767 | grp.close()
|
| 768 | + |
| 769 | + def test_group_metadata_backwards_compat(self): |
| 770 | + # This test ensures that metadata written with the TileDB-Py 0.32.3 |
| 771 | + # will be read correctly in the future versions. |
| 772 | + |
| 773 | + # === The following code creates a group with metadata using the current version of TileDB-Py === |
| 774 | + path_new = self.path("new_group") |
| 775 | + tiledb.Group.create(path_new) |
| 776 | + group = tiledb.Group(path_new, "w") |
| 777 | + |
| 778 | + # python primitive types |
| 779 | + group.meta["python_int"] = -1234 |
| 780 | + group.meta["python_float"] = 3.14 |
| 781 | + group.meta["python_str"] = "hello" |
| 782 | + group.meta["python_bytes"] = b"hello" |
| 783 | + group.meta["python_bool"] = False |
| 784 | + |
| 785 | + # numpy primitive types |
| 786 | + group.meta["numpy_int"] = np.int64(-93) |
| 787 | + group.meta["numpy_uint"] = np.uint64(42) |
| 788 | + group.meta["numpy_float64"] = np.float64(3.14) |
| 789 | + group.meta["numpy_bytes"] = np.bytes_("hello") |
| 790 | + group.meta["numpy_str"] = np.str_("hello") |
| 791 | + group.meta["numpy_bool"] = np.bool(False) |
| 792 | + |
| 793 | + # lists/tuples |
| 794 | + group.meta["list_int"] = [7] |
| 795 | + group.meta["tuple_int"] = (7,) |
| 796 | + group.meta["list_ints"] = [1, -2, 3] |
| 797 | + group.meta["tuple_ints"] = (1, 2, 3) |
| 798 | + group.meta["list_float"] = [1.1] |
| 799 | + group.meta["tuple_float"] = (1.1,) |
| 800 | + group.meta["list_floats"] = [1.1, 2.2, 3.3] |
| 801 | + group.meta["tuple_floats"] = (1.1, 2.2, 3.3) |
| 802 | + group.meta["list_empty"] = [] |
| 803 | + group.meta["tuple_empty"] = () |
| 804 | + |
| 805 | + # numpy arrays |
| 806 | + group.meta["numpy_int"] = np.array([-11], dtype=np.int64) |
| 807 | + group.meta["numpy_ints"] = np.array([1, -2, 3], dtype=np.int64) |
| 808 | + group.meta["numpy_uint"] = np.array([22], dtype=np.uint64) |
| 809 | + group.meta["numpy_uints"] = np.array([1, 2, 3], dtype=np.uint64) |
| 810 | + group.meta["numpy_float"] = np.array([3.14], dtype=np.float64) |
| 811 | + group.meta["numpy_floats"] = np.array([1.1, 2.2, 3.3], dtype=np.float64) |
| 812 | + group.meta["numpy_byte"] = np.array([b"hello"], dtype="S5") |
| 813 | + group.meta["numpy_str"] = np.array(["hello"], dtype="U5") |
| 814 | + group.meta["numpy_bool"] = np.array([True, False, True]) |
| 815 | + |
| 816 | + group.close() |
| 817 | + # === End of the code that creates the group with metadata === |
| 818 | + |
| 819 | + # The following commented out code was used to generate the base64 encoded string of the group |
| 820 | + # from the TileDB-Py 0.32.3 after creating the group with metadata in the exact same way as above. |
| 821 | + ''' |
| 822 | + # Compress the contents of the group folder to tgz |
| 823 | + with tarfile.open("test.tar.gz", "w:gz") as tar: |
| 824 | + with os.scandir(path_new) as entries: |
| 825 | + for entry in entries: |
| 826 | + tar.add(entry.path, arcname=entry.name) |
| 827 | +
|
| 828 | + # Read the .tgz file and encode it to base64 |
| 829 | + with open("test.tar.gz", 'rb') as f: |
| 830 | + s = base64.encodebytes(f.read()) |
| 831 | +
|
| 832 | + # Print the base64 encoded string |
| 833 | + group_tgz = f"""{s.decode():>32}""" |
| 834 | + print(group_tgz) |
| 835 | + ''' |
| 836 | + |
| 837 | + # The following base64 encoded string is the contents of the group folder compressed |
| 838 | + # to a tgz file using TileDB-Py 0.32.3. |
| 839 | + group_tgz = b"""H4sICO/+G2cC/3Rlc3QudGFyANPT19N3CEis8EhNTEktYqAJMIAAXLSBgbEJgg0SNzQwMjRiUKhg |
| 840 | + oAMoLS5JLAJazzAygZGFQm5JZm6qraG5kaWFhbmlhbGekaGphbGlJRfDKBj2ID4+N7UkUZ+mdoAy |
| 841 | + tbmpKYQ2g9AGRqh53tDE3MDM3Nzc2NQcmP8NDc3NGRRM6Zn/E9Mzi/GpAypLSxt+8a83KMp/Y8zy |
| 842 | + 33C0/KdL+W+Otfy3NBot/kdS+R8fj4h/YPSj8UxTktOSjQxMjNPMzS0MDCxTjVLNTUwS01IMzMxM |
| 843 | + zJMTicj/ZiYmuMp/QwNjM9Ty38jQAFhdKBjQM/+P0PJfDIhfMULYV1khNAsjTFYITDIygAQYQbKM |
| 844 | + YBYDQv0xIEcAymdEEqtgbA1x9DtsIBATrJgRpRfwgC18R8GqqqXxD1gDJwZtnTTb5YbtE0YbprhD |
| 845 | + 8y0KH7SwVJTnps9d9sorMOX8Met7M8+yMHzas+bz0rgbMet7z3b75kqb3mSdtisqonQnu8GrGvHI |
| 846 | + 6WGxX/Jm+7UW7V45+8/OVSZ3+O+Ic/0Sloo+8OKG6hqutaun9NgfXjqDz9ftBZNBwLvXt6+fX94/ |
| 847 | + ++EfK0X1S2nBpVv5jQ0cut7nS8T3/wn7rOpq5q9/Jn2XW8OhQ/frZTLrkycxHt1evlKvrtbsXeIX |
| 848 | + 2dw33D0fd0yt5vqe8T/k3d3wtO4UI5Vm8yMvspXTJE+ozFY+13ZA7e+avDertDwP+b1mcjq0JPar |
| 849 | + QLS26mvFLQH6D97dDbyZlx1b8X/ZHYmHWpqMjTP6QiVvrZX/3nsqxv3WwofHjtgmbk+YGnhC/U1D |
| 850 | + v5+z0SvXZ5YfmXhYiw4Ynmi727rZteXvpZULJ/jvNikQV1/tuiM73XDytc2ZVu6PRcy4NN3Cuze9 |
| 851 | + 0GJc1KHr+mXOAxexJaUFAv/kVgi/K+FaI+2wZfqOxoYWocQPGzNeG9h9edh+3DfBJMYzOKL2l+em |
| 852 | + ezc0Hyq98xaQ8eT40PDoxpYX60KKnogs7Ht2d+cf9lm5m9pGy8fhDvRG+/+j/X+M9p+JqYGJ+WgD |
| 853 | + cES0/0oyc1JTkuLTi/JLC/RKUpJok//xtP+w9P+NTUD9v9H232j5P1r+D0j5b2ZoYDZa/o+I8h9c |
| 854 | + 8NN0AJiM8V8TA9PR8d9RMApGwSgYBaNgFIyCUTAKRsEooCYAAP1+F2wAKAAA""" |
| 855 | + |
| 856 | + # Ceate a new group by extracting the contents of the tgz file |
| 857 | + path_original = self.path("original_group") |
| 858 | + with tarfile.open(fileobj=io.BytesIO(base64.b64decode(group_tgz))) as tf: |
| 859 | + try: |
| 860 | + tf.extractall(path_original, filter="fully_trusted") |
| 861 | + except TypeError: |
| 862 | + tf.extractall(path_original) |
| 863 | + |
| 864 | + # Open both the original and the new group and compare the metadata both in values and types |
| 865 | + group_original = tiledb.Group(path_original, "r") |
| 866 | + group_new = tiledb.Group(path_new, "r") |
| 867 | + |
| 868 | + self.assert_metadata_roundtrip(group_new.meta, group_original.meta) |
| 869 | + |
| 870 | + group_original.close() |
| 871 | + group_new.close() |
| 872 | + |
| 873 | + def test_group_metadata_new_types(self): |
| 874 | + # This kind of data was not supported for TileDB-Py <= 0.32.3 |
| 875 | + path_new = self.path("new_group") |
| 876 | + |
| 877 | + tiledb.Group.create(path_new) |
| 878 | + group = tiledb.Group(path_new, "w") |
| 879 | + test_vals = { |
| 880 | + "int64": np.array(-1111, dtype=np.int64), |
| 881 | + "uint64": np.array(2, dtype=np.uint64), |
| 882 | + "float64": np.array(3.14, dtype=np.float64), |
| 883 | + "bool": np.array(True, dtype=bool), |
| 884 | + "str": np.array(["a", "b", "c"], dtype="S"), |
| 885 | + "unicode": np.array(["a", "b", "c"], dtype="U"), |
| 886 | + "bytes": np.array([b"a", b"b", b"c"]), |
| 887 | + "datetime": np.array( |
| 888 | + [np.datetime64("2021-01-01"), np.datetime64("2021-01-02")] |
| 889 | + ), |
| 890 | + } |
| 891 | + group.meta.update(test_vals) |
| 892 | + group.close() |
| 893 | + |
| 894 | + group = tiledb.Group(path_new, "r") |
| 895 | + self.assert_metadata_roundtrip(group.meta, test_vals) |
| 896 | + group.close() |
0 commit comments