From e6f6f9d4a941e7be9954390dc08ff2d630c58dd4 Mon Sep 17 00:00:00 2001 From: Chuck McCallum Date: Fri, 24 May 2019 01:01:50 -0400 Subject: [PATCH] lint more tests (#86) * better roadmap * Remove unused imports * fix block comments * fix too long * fix "never used"... but it is clear there are tests that do nothing. * "flake8 test/*.py" is clean --- test/bedfile_test.py | 54 +++++++++++++---------- test/bedpe_test.py | 23 +++++----- test/cli_test.py | 85 ++++++++++++++++++------------------ test/fpark_test.py | 5 +-- test/mrmatrix_test.py | 7 ++- test/multivec_test.py | 23 +++++----- test/tile_2d_bedfile_test.py | 7 +-- test/utils.py | 2 +- travis_test.sh | 11 +++-- 9 files changed, 113 insertions(+), 104 deletions(-) diff --git a/test/bedfile_test.py b/test/bedfile_test.py index 0254b2eb..536d9202 100644 --- a/test/bedfile_test.py +++ b/test/bedfile_test.py @@ -6,7 +6,6 @@ import os import os.path as op import sqlite3 -import sys import tempfile testdir = op.realpath(op.dirname(__file__)) @@ -38,7 +37,11 @@ def check_table(filename): ''' rows = c.execute( - 'SELECT * from intervals,position_index where intervals.id=position_index.id and zoomLevel < 1 and rStartPos > 2400000000 and rEndPos < 2500000000') + 'SELECT * from intervals,position_index ' + 'where intervals.id=position_index.id ' + 'and zoomLevel < 1 ' + 'and rStartPos > 2400000000 ' + 'and rEndPos < 2500000000') counter = 0 for row in rows: assert(row[3] > 2400000000) @@ -51,12 +54,13 @@ def check_table(filename): def test_get_tiles(): filename = 'test/sample_data/gene_annotations.short.db' - tiles = cdt.get_tiles(filename, 18, 169283)[169283] - + cdt.get_tiles(filename, 18, 169283)[169283] + # TODO: Make assertions about result # print("tiles:", tiles) - x = int(tiles[0]['xStart']) - - fields = tiles[0]['fields'] + # x = int(tiles[0]['xStart']) + # + # fields = tiles[0]['fields'] + # TODO: Make assertions def test_gene_annotations(): @@ -71,7 +75,7 @@ def test_gene_annotations(): '--delimiter', '\t', '--assembly', 'mm10', '--output-file', f.name]) - import traceback + # import traceback a, b, tb = result.exc_info ''' print("exc_info:", result.exc_info) @@ -106,7 +110,7 @@ def test_random_importance(): '--max-per-tile', '2', '--importance-column', 'random', '--assembly', 'b37', '--has-header', '--output-file', f.name]) - import traceback + # import traceback a, b, tb = result.exc_info ''' print("exc_info:", result.exc_info) @@ -116,17 +120,21 @@ def test_random_importance(): print("Exception:", a,b) ''' - tileset_info = cdt.get_tileset_info(f.name) + cdt.get_tileset_info(f.name) # print("tileset_info:", tileset_info) + # TODO: Make assertions about result - rows = cdt.get_tiles(f.name, 0, 0) - #print("rows:", rows) + cdt.get_tiles(f.name, 0, 0) + # print("rows:", rows) + # TODO: Make assertions about result - rows = list(cdt.get_tiles(f.name, 1, 0).values()) + \ + list(cdt.get_tiles(f.name, 1, 0).values()) + \ list(cdt.get_tiles(f.name, 1, 1).values()) - #print('rows:', rows) + # print('rows:', rows) + # TODO: Make assertions about result - # check to make sure that tiles in the higher zoom levels are all present in lower zoom levels + # check to make sure that tiles in the higher zoom levels + # are all present in lower zoom levels found = {} for row in cdt.get_tiles(f.name, 5, 15).values(): for rect in row: @@ -143,7 +151,7 @@ def test_random_importance(): found[rect['xStart']] = True for key, value in found.items(): - assert(value == True) + assert(value) pass @@ -162,7 +170,7 @@ def test_no_chromosome_limit(): '--assembly', 'hg19', '--output-file', f.name]) - import traceback + # import traceback ''' print("exc_info:", result.exc_info) print("result:", result) @@ -183,7 +191,7 @@ def test_no_chromosome_limit(): foundOther = True break # make sure there's chromosome other than 14 in the output - assert(foundOther == True) + assert(foundOther) os.remove(f.name) pass @@ -196,16 +204,16 @@ def test_chromosome_limit(): input_file = op.join(testdir, 'sample_data', 'geneAnnotationsExonsUnions.short.bed') - result = runner.invoke( + runner.invoke( cca.bedfile, [input_file, '--max-per-tile', '60', '--importance-column', '5', '--assembly', 'hg19', '--chromosome', 'chr14', '--output-file', f.name]) + # TODO: Make assertions about result # print('output:', result.output, result) rows = cdt.get_tiles(f.name, 0, 0)[0] - foundOther = False for row in rows: assert(row['fields'][0] == 'chr14') @@ -220,11 +228,12 @@ def test_float_importance(): runner = clt.CliRunner() input_file = op.join(testdir, 'sample_data', 'test_float_importance.bed') - result = runner.invoke( + runner.invoke( cca.bedfile, [input_file, '--max-per-tile', '2', '--importance-column', '4', '--assembly', 'hg38', '--no-header', '--output-file', f.name]) + # TODO: Make assertions about result """ @@ -279,7 +288,8 @@ def test_tile_ranges(): d1 = cht.get_discrete_data(f, 11, 5) #print("d1:", len(d1)) - #print("dv:", [x for x in d1 if (int(x[1]) < 12000000 and int(x[2]) > 12000000)]) + #print("dv:", [x for x in d1 if (int(x[1]) < 12000000 + # and int(x[2]) > 12000000)]) d3 = cht.get_discrete_data(f, 12, 10) #print("d2:", len(d3)) diff --git a/test/bedpe_test.py b/test/bedpe_test.py index aaf6b4ce..cda1192c 100644 --- a/test/bedpe_test.py +++ b/test/bedpe_test.py @@ -1,10 +1,7 @@ from __future__ import print_function import clodius.db_tiles as cdt -import click.testing as clt import clodius.cli.aggregate as cca -import h5py -import negspy.coordinates as nc import os.path as op import sys @@ -31,21 +28,23 @@ def test_clodius_aggregate_bedpe(): [input_file, '--output-file', output_file, '--importance-column', 'random', - '--has-header', + '--has-header', '--assembly', 'b37']) #print('output:', result.output, result) assert(result.exit_code == 0) """ - entries = cdt.get_2d_tiles(output_file, 0, 0, 0) - #print("entries:", entries) + cdt.get_2d_tiles(output_file, 0, 0, 0) + # print("entries:", entries) - tileset_info = cdt.get_tileset_info(output_file) - #print('tileset_info', tileset_info) + cdt.get_tileset_info(output_file) + # TODO: Make assertions about result + # print('tileset_info', tileset_info) - entries = cdt.get_2d_tiles(output_file, 1, 0, 0, numx=2, numy=2) - #print("entries:", entries) + cdt.get_2d_tiles(output_file, 1, 0, 0, numx=2, numy=2) + # TODO: Make assertions about result + # print("entries:", entries) - tileset_info = cdt.get_tileset_info(output_file) - #print('tileset_info', tileset_info) + cdt.get_tileset_info(output_file) + # TODO: Make assertion diff --git a/test/cli_test.py b/test/cli_test.py index 863a4ec3..edeb649d 100644 --- a/test/cli_test.py +++ b/test/cli_test.py @@ -9,7 +9,6 @@ import numpy as np import os.path as op import sys -import traceback sys.path.append("scripts") @@ -42,7 +41,7 @@ def test_clodius_aggregate_bedfile(): assert(result.exit_code == 0) results = cdt.get_tiles(output_file, 6, 3, num_tiles=1) - #print("results:", results) + # print("results:", results) assert(len(results[3][0]['fields']) == 14) @@ -76,7 +75,7 @@ def test_clodius_aggregate_bedgraph(): cca.bedgraph, [input_file, '--output-file', output_file, - #'--assembly', 'grch37', + # '--assembly', 'grch37', '--chromsizes-filename', assembly_file, '--chromosome-col', '2', '--from-pos-col', '3', @@ -92,7 +91,7 @@ def test_clodius_aggregate_bedgraph(): [input_file, '--output-file', output_file, '--assembly', 'grch37', - #'--chromsizes-filename', assembly_file, + # '--chromsizes-filename', assembly_file, '--chromosome-col', '2', '--from-pos-col', '3', '--to-pos-col', '4', @@ -112,49 +111,51 @@ def test_clodius_aggregate_bedgraph(): assert(result.exit_code == 0) f = h5py.File(output_file) - #print("tile_0_0", d) + # print("tile_0_0", d) - #print("tile:", cht.get_data(f, 22, 0)) + # print("tile:", cht.get_data(f, 22, 0)) # return d = cht.get_data(f, 0, 0) assert(not np.isnan(d[0])) assert(np.isnan(d[-1])) - prev_tile_3_0 = cht.get_data(f, 3, 0) + cht.get_data(f, 3, 0) + # TODO: Make assertions about result - #print("prev_tile_3_0:", prev_tile_3_0) + # print("prev_tile_3_0:", prev_tile_3_0) assert(result.exit_code == 0) - return - assert(sum(prev_tile_3_0) < 0) - - input_file = op.join(testdir, 'sample_data', 'cnvs_hw.tsv.gz') - result = runner.invoke( - cca.bedgraph, - [input_file, - '--output-file', output_file, - '--assembly', 'grch37', - '--chromosome-col', '2', - '--from-pos-col', '3', - '--to-pos-col', '4', - '--value-col', '5', - '--has-header', - '--nan-value', 'NA']) - ''' - import traceback - print("exc_info:", result.exc_info) - a,b,tb = result.exc_info - print("result:", result) - print("result.output", result.output) - print("result.error", traceback.print_tb(tb)) - print("Exception:", a,b) - ''' - - f = h5py.File(output_file) - tile_3_0 = cht.get_data(f, 3, 0) - - assert(sum(tile_3_0) - sum(prev_tile_3_0) < 0.0001) + # TODO: Why are we ignoring these? + # assert(sum(prev_tile_3_0) < 0) + # + # input_file = op.join(testdir, 'sample_data', 'cnvs_hw.tsv.gz') + # result = runner.invoke( + # cca.bedgraph, + # [input_file, + # '--output-file', output_file, + # '--assembly', 'grch37', + # '--chromosome-col', '2', + # '--from-pos-col', '3', + # '--to-pos-col', '4', + # '--value-col', '5', + # '--has-header', + # '--nan-value', 'NA']) + # + # ''' + # import traceback + # print("exc_info:", result.exc_info) + # a,b,tb = result.exc_info + # print("result:", result) + # print("result.output", result.output) + # print("result.error", traceback.print_tb(tb)) + # print("Exception:", a,b) + # ''' + # + # f = h5py.File(output_file) + # tile_3_0 = cht.get_data(f, 3, 0) + # + # assert(sum(tile_3_0) - sum(prev_tile_3_0) < 0.0001) testdir = op.realpath(op.dirname(__file__)) @@ -186,13 +187,9 @@ def test_clodius_aggregate_bedpe(): assert(result.exit_code == 0) tiles = cdt.get_2d_tiles(output_file, 0, 0, 0, numx=1, numy=1) - # print("tiles:", tiles) assert('\n' not in tiles[(0, 0)][0]['fields'][2]) - #import json - # json.dumps(tiles) - testdir = op.realpath(op.dirname(__file__)) @@ -221,7 +218,8 @@ def test_clodius_aggregate_bedgraph1(): # print("result.output", result.output) f = h5py.File('/tmp/dm3_values.hitile') - max_zoom = f['meta'].attrs['max-zoom'] + # max_zoom = f['meta'].attrs['max-zoom'] + # TODO: Make assertions about result values = f['values_0'] import numpy as np @@ -264,7 +262,8 @@ def test_clodius_aggregate_bedgraph1(): # print('output:', result.output, result) f = h5py.File('/tmp/test3chroms_values.hitile') - max_zoom = f['meta'].attrs['max-zoom'] + # f['meta'].attrs['max-zoom'] + # TODO: Make assertions about result # print('max_zoom:', max_zoom) # print("len", len(f['values_0'])) diff --git a/test/fpark_test.py b/test/fpark_test.py index 07fc0ad2..01682641 100644 --- a/test/fpark_test.py +++ b/test/fpark_test.py @@ -1,5 +1,3 @@ -import sys - import clodius.fpark as fp @@ -14,7 +12,8 @@ def test_group_by_key(): a = fp.FakeSparkContext.parallelize( [(1, 2), (1, 3), (1, 4), (2, 5), (2, 6)]) - b = a.groupByKey() + a.groupByKey() + # TODO: Make assertions about result def test_textFile(): diff --git a/test/mrmatrix_test.py b/test/mrmatrix_test.py index 2eed1c87..71cd43fc 100644 --- a/test/mrmatrix_test.py +++ b/test/mrmatrix_test.py @@ -77,7 +77,8 @@ def test_padding(self): '1': { 'values': np.array([[1.0, 2], [3, 4]]) # It's important that there is a float value: - # If there isn't, np.nan will be converted to a large negative integer. + # If there isn't, np.nan will be converted + # to a large negative integer. } } }) @@ -91,7 +92,9 @@ def test_bins(self): tileset = MockHdf5({ 'resolutions': { '1': { - 'values': np.array([[float(x) for x in range(500)] for y in range(500)]) + 'values': np.array( + [[float(x) for x in range(500)] for y in range(500)] + ) } } }) diff --git a/test/multivec_test.py b/test/multivec_test.py index d777404d..12b9a407 100644 --- a/test/multivec_test.py +++ b/test/multivec_test.py @@ -1,13 +1,8 @@ from __future__ import print_function import click.testing as clt -import clodius.cli.aggregate as cca import clodius.cli.convert as ccc -import clodius.db_tiles as cdt -import os import os.path as op -import sqlite3 -import sys import tempfile testdir = op.realpath(op.dirname(__file__)) @@ -16,7 +11,8 @@ def test_bedfile_to_multivec(): runner = clt.CliRunner() input_file = op.join(testdir, 'sample_data', 'sample.bed.gz') - f = tempfile.NamedTemporaryFile(delete=False) + tempfile.NamedTemporaryFile(delete=False) + # TODO: Make assertions about result # print("input_file", input_file) result = runner.invoke( @@ -26,7 +22,7 @@ def test_bedfile_to_multivec(): '--assembly', 'hg38', '--base-resolution', '10']) - import traceback + # import traceback a, b, tb = result.exc_info ''' print("exc_info:", result.exc_info) @@ -38,7 +34,8 @@ def test_bedfile_to_multivec(): def test_load_multivec_tiles(): - input_file = op.join(testdir, 'sample_data', 'sample.bed.multires.mv5') + op.join(testdir, 'sample_data', 'sample.bed.multires.mv5') + # TODO: Make assertions about result def test_states_format_befile_to_multivec(): @@ -47,7 +44,8 @@ def test_states_format_befile_to_multivec(): 'states_format_input_testfile.bed.gz') rows_info_file = op.join(testdir, 'sample_data', 'states_format_test_row_infos.txt') - f = tempfile.NamedTemporaryFile(delete=False) + tempfile.NamedTemporaryFile(delete=False) + # TODO: Make assertions about result # print("input_file", input_file) result = runner.invoke( @@ -59,7 +57,7 @@ def test_states_format_befile_to_multivec(): '--starting-resolution', '200', '--num-rows', '10']) - import traceback + # import traceback a, b, tb = result.exc_info ''' print("exc_info:", result.exc_info) @@ -76,7 +74,8 @@ def test_ignore_bedfile_headers(): '3_header_100_testfile.bed.gz') rows_info_file = op.join(testdir, 'sample_data', '3_header_100_row_infos.txt') - f = tempfile.NamedTemporaryFile(delete=False) + tempfile.NamedTemporaryFile(delete=False) + # TODO: Make assertions about result result = runner.invoke( ccc.bedfile_to_multivec, @@ -87,5 +86,5 @@ def test_ignore_bedfile_headers(): '--starting-resolution', '200', '--num-rows', '15']) - import traceback + # import traceback a, b, tb = result.exc_info diff --git a/test/tile_2d_bedfile_test.py b/test/tile_2d_bedfile_test.py index aa848a20..ea12fbff 100644 --- a/test/tile_2d_bedfile_test.py +++ b/test/tile_2d_bedfile_test.py @@ -1,8 +1,6 @@ from __future__ import print_function import clodius.db_tiles as cdt -import h5py -import sqlite3 def test_get_tileset_info(): @@ -18,6 +16,5 @@ def test_get_tileset_info(): def test_get_tiles(): filename = 'test/sample_data/arrowhead_domains_short.txt.multires.db' - tiles = cdt.get_2d_tiles(filename, 0, 0, 0, numx=1, numy=1) - - #print("tiles:", tiles) + cdt.get_2d_tiles(filename, 0, 0, 0, numx=1, numy=1) + # TODO: Make an assertion diff --git a/test/utils.py b/test/utils.py index 697a2b5b..c4e20eb1 100644 --- a/test/utils.py +++ b/test/utils.py @@ -38,4 +38,4 @@ def get_cooler_info(file_path): 'bins_per_dimension': TILE_SIZE, } - return info \ No newline at end of file + return info diff --git a/travis_test.sh b/travis_test.sh index e146eb81..f80c8d66 100755 --- a/travis_test.sh +++ b/travis_test.sh @@ -8,12 +8,15 @@ die() { set +v; echo "$*" 1>&2 ; sleep 1; exit 1; } # https://github.com/travis-ci/travis-ci/issues/6018 start flake8 -# TODO: -# - Get more files to lint cleanly. -# - Reduce the number of errors which are ignored everywhere else. +# TODO: Remove the special cases from this file: flake8 --config=.flake8-ignore +# TODO: Add more files to this list: flake8 test/tsv_to_mrmatrix_test.py \ - scripts/tsv_to_mrmatrix.py + scripts/tsv_to_mrmatrix.py \ + test/*.py +# TODO: When everything is covered, +# just lint the whole directory once, +# instead of listing special cases. end flake8 start download