7
7
from numpy .testing import assert_array_equal
8
8
import h5py
9
9
10
- from scripts .tsv_to_mrmatrix import coarsen , parse
10
+ from scripts .tsv_to_mrmatrix import coarsen , parse , get_height , get_width
11
11
12
12
13
13
class CoarsenTest (unittest .TestCase ):
@@ -21,7 +21,8 @@ def test_5_layer_pyramid(self):
21
21
g = hdf5 .create_group ('resolutions' )
22
22
g1 = g .create_group ('1' )
23
23
ds = g1 .create_dataset ('values' , (max_width , max_width ),
24
- dtype = 'f4' , compression = 'lzf' , fillvalue = np .nan )
24
+ dtype = 'f4' , compression = 'lzf' ,
25
+ fillvalue = np .nan )
25
26
for y in range (max_width ):
26
27
a = np .array ([float (x ) for x in range (max_width )])
27
28
ds [y , :max_width ] = a
@@ -70,7 +71,8 @@ def test_math(self):
70
71
g = hdf5 .create_group ('resolutions' )
71
72
g1 = g .create_group ('1' )
72
73
ds = g1 .create_dataset ('values' , (max_width , max_width ),
73
- dtype = 'f4' , compression = 'lzf' , fillvalue = np .nan )
74
+ dtype = 'f4' , compression = 'lzf' ,
75
+ fillvalue = np .nan )
74
76
for y in range (max_width ):
75
77
a = np .array ([float (x ) for x in range (max_width )])
76
78
ds [y , :max_width ] = a
@@ -107,7 +109,7 @@ def test_math(self):
107
109
108
110
109
111
class ParseTest (unittest .TestCase ):
110
- def test_parse (self ):
112
+ def test_wide_labelled_square (self ):
111
113
with TemporaryDirectory () as tmp_dir :
112
114
csv_path = tmp_dir + '/tmp.csv'
113
115
with open (csv_path , 'w' , newline = '' ) as csv_file :
@@ -127,7 +129,11 @@ def test_parse(self):
127
129
hdf5_path = tmp_dir + 'tmp.hdf5'
128
130
hdf5_write_handle = h5py .File (hdf5_path , 'w' )
129
131
130
- parse (csv_handle , hdf5_write_handle )
132
+ height = get_height (csv_path )
133
+ width = get_width (csv_path , is_labelled = True )
134
+ parse (csv_handle , hdf5_write_handle , height , width ,
135
+ delimiter = '\t ' , first_n = None , is_square = True ,
136
+ is_labelled = True )
131
137
132
138
hdf5 = h5py .File (hdf5_path , 'r' )
133
139
self .assertEqual (list (hdf5 .keys ()), ['labels' , 'resolutions' ])
@@ -158,3 +164,118 @@ def test_parse(self):
158
164
assert_array_equal (res_2 [4 ], [0 ] * 256 )
159
165
assert_array_equal (res_2 [5 ], [0 ] * 256 )
160
166
assert_array_equal (res_2 [6 ], [0 ] * 256 )
167
+ # TODO: We lose nan at higher aggregations.
168
+ # https://github.com/higlass/clodius/issues/62
169
+
170
+ def _assert_unlabelled_roundtrip_lt_256 (
171
+ self , matrix , delimiter , is_square ):
172
+ with TemporaryDirectory () as tmp_dir :
173
+ csv_path = tmp_dir + '/tmp.csv'
174
+ with open (csv_path , 'w' , newline = '' ) as csv_file :
175
+ writer = csv .writer (csv_file , delimiter = delimiter )
176
+ # body:
177
+ for row in matrix :
178
+ writer .writerow (row )
179
+
180
+ csv_handle = open (csv_path , 'r' )
181
+ hdf5_path = tmp_dir + 'tmp.hdf5'
182
+ hdf5_write_handle = h5py .File (hdf5_path , 'w' )
183
+
184
+ is_labelled = False
185
+ height = get_height (csv_path , is_labelled = is_labelled )
186
+ width = get_width (csv_path , is_labelled = is_labelled )
187
+ parse (csv_handle , hdf5_write_handle , height , width ,
188
+ first_n = None , is_labelled = is_labelled ,
189
+ delimiter = delimiter , is_square = is_square )
190
+
191
+ hdf5 = h5py .File (hdf5_path , 'r' )
192
+ self .assertEqual (list (hdf5 .keys ()), ['resolutions' ])
193
+ self .assertEqual (list (hdf5 ['resolutions' ].keys ()), ['1' ])
194
+ self .assertEqual (list (hdf5 ['resolutions' ]['1' ].keys ()),
195
+ ['nan_values' , 'values' ])
196
+ assert_array_equal (
197
+ hdf5 ['resolutions' ]['1' ]['nan_values' ],
198
+ [[0 ] * len (matrix [0 ])] * len (matrix )
199
+ )
200
+ assert_array_equal (
201
+ hdf5 ['resolutions' ]['1' ]['values' ],
202
+ matrix
203
+ )
204
+
205
+ def test_unlabelled_csv_is_square_true (self ):
206
+ self ._assert_unlabelled_roundtrip_lt_256 (
207
+ matrix = [[x + y for x in range (4 )] for y in range (4 )],
208
+ delimiter = ',' ,
209
+ is_square = True
210
+ )
211
+
212
+ def test_unlabelled_tsv_is_square_false (self ):
213
+ self ._assert_unlabelled_roundtrip_lt_256 (
214
+ matrix = [[x + y for x in range (4 )] for y in range (4 )],
215
+ delimiter = '\t ' ,
216
+ is_square = False
217
+ )
218
+
219
+ def _assert_unlabelled_roundtrip_1024 (
220
+ self , matrix , first_row = None , first_col = None , first_n = None ):
221
+ delimiter = '\t '
222
+ is_square = False
223
+ with TemporaryDirectory () as tmp_dir :
224
+ csv_path = tmp_dir + '/tmp.csv'
225
+ with open (csv_path , 'w' , newline = '' ) as csv_file :
226
+ writer = csv .writer (csv_file , delimiter = delimiter )
227
+ # body:
228
+ for row in matrix :
229
+ writer .writerow (row )
230
+
231
+ csv_handle = open (csv_path , 'r' )
232
+ hdf5_path = tmp_dir + 'tmp.hdf5'
233
+ hdf5_write_handle = h5py .File (hdf5_path , 'w' )
234
+
235
+ is_labelled = False
236
+ height = get_height (csv_path , is_labelled = is_labelled )
237
+ width = get_width (csv_path , is_labelled = is_labelled )
238
+ parse (csv_handle , hdf5_write_handle , height , width ,
239
+ first_n = first_n , is_labelled = is_labelled ,
240
+ delimiter = delimiter , is_square = is_square )
241
+
242
+ hdf5 = h5py .File (hdf5_path , 'r' )
243
+ self .assertEqual (list (hdf5 .keys ()), ['resolutions' ])
244
+ self .assertEqual (list (hdf5 ['resolutions' ].keys ()), ['1' , '2' , '4' ])
245
+ self .assertEqual (list (hdf5 ['resolutions' ]['1' ].keys ()),
246
+ ['nan_values' , 'values' ])
247
+ self .assertEqual (list (hdf5 ['resolutions' ]['4' ].keys ()),
248
+ ['values' ])
249
+ res_4 = hdf5 ['resolutions' ]['4' ]['values' ]
250
+ if first_row :
251
+ assert_array_equal (res_4 [0 ], first_row )
252
+ if first_col :
253
+ assert_array_equal (
254
+ [res_4 [y ][0 ] for y in range (len (first_col ))],
255
+ first_col )
256
+
257
+ def test_unlabelled_tsv_tall (self ):
258
+ self ._assert_unlabelled_roundtrip_1024 (
259
+ matrix = [[1 for x in range (4 )] for y in range (1000 )],
260
+ first_col = [16 ] * 250 + [0 ] * 6
261
+ )
262
+
263
+ def test_unlabelled_tsv_wide (self ):
264
+ self ._assert_unlabelled_roundtrip_1024 (
265
+ matrix = [[1 for x in range (1000 )] for y in range (4 )],
266
+ first_row = [16 ] * 250 + [0 ] * 6
267
+ )
268
+
269
+ def test_unlabelled_tsv_tall_first_n (self ):
270
+ self ._assert_unlabelled_roundtrip_1024 (
271
+ matrix = [[1 for x in range (4 )] for y in range (1000 )],
272
+ first_col = [8 ] + [0 ] * 255 ,
273
+ first_n = 2
274
+ )
275
+
276
+ def test_unlabelled_tsv_wide_first_n (self ):
277
+ self ._assert_unlabelled_roundtrip_1024 (
278
+ matrix = [[1 for x in range (1000 )] for y in range (4 )],
279
+ first_row = [8 ] * 250 + [0 ] * 6 ,
280
+ first_n = 2
281
+ )
0 commit comments