From e6f6f9d4a941e7be9954390dc08ff2d630c58dd4 Mon Sep 17 00:00:00 2001
From: Chuck McCallum <mccalluc@users.noreply.github.com>
Date: Fri, 24 May 2019 01:01:50 -0400
Subject: [PATCH] lint more tests (#86)

* better roadmap

* Remove unused imports

* fix block comments

* fix too long

* fix "never used"... but it is clear there are tests that do nothing.

* "flake8 test/*.py" is clean
---
 test/bedfile_test.py         | 54 +++++++++++++----------
 test/bedpe_test.py           | 23 +++++-----
 test/cli_test.py             | 85 ++++++++++++++++++------------------
 test/fpark_test.py           |  5 +--
 test/mrmatrix_test.py        |  7 ++-
 test/multivec_test.py        | 23 +++++-----
 test/tile_2d_bedfile_test.py |  7 +--
 test/utils.py                |  2 +-
 travis_test.sh               | 11 +++--
 9 files changed, 113 insertions(+), 104 deletions(-)

diff --git a/test/bedfile_test.py b/test/bedfile_test.py
index 0254b2eb..536d9202 100644
--- a/test/bedfile_test.py
+++ b/test/bedfile_test.py
@@ -6,7 +6,6 @@
 import os
 import os.path as op
 import sqlite3
-import sys
 import tempfile
 
 testdir = op.realpath(op.dirname(__file__))
@@ -38,7 +37,11 @@ def check_table(filename):
     '''
 
     rows = c.execute(
-        'SELECT * from intervals,position_index where intervals.id=position_index.id and zoomLevel < 1 and rStartPos > 2400000000 and rEndPos < 2500000000')
+        'SELECT * from intervals,position_index '
+        'where intervals.id=position_index.id '
+        'and zoomLevel < 1 '
+        'and rStartPos > 2400000000 '
+        'and rEndPos < 2500000000')
     counter = 0
     for row in rows:
         assert(row[3] > 2400000000)
@@ -51,12 +54,13 @@ def check_table(filename):
 def test_get_tiles():
     filename = 'test/sample_data/gene_annotations.short.db'
 
-    tiles = cdt.get_tiles(filename, 18, 169283)[169283]
-
+    cdt.get_tiles(filename, 18, 169283)[169283]
+    # TODO: Make assertions about result
     # print("tiles:", tiles)
-    x = int(tiles[0]['xStart'])
-
-    fields = tiles[0]['fields']
+    # x = int(tiles[0]['xStart'])
+    #
+    # fields = tiles[0]['fields']
+    # TODO: Make assertions
 
 
 def test_gene_annotations():
@@ -71,7 +75,7 @@ def test_gene_annotations():
          '--delimiter', '\t',
          '--assembly', 'mm10', '--output-file', f.name])
 
-    import traceback
+    # import traceback
     a, b, tb = result.exc_info
     '''
     print("exc_info:", result.exc_info)
@@ -106,7 +110,7 @@ def test_random_importance():
          '--max-per-tile', '2', '--importance-column', 'random',
          '--assembly', 'b37', '--has-header', '--output-file', f.name])
 
-    import traceback
+    # import traceback
     a, b, tb = result.exc_info
     '''
     print("exc_info:", result.exc_info)
@@ -116,17 +120,21 @@ def test_random_importance():
     print("Exception:", a,b)
     '''
 
-    tileset_info = cdt.get_tileset_info(f.name)
+    cdt.get_tileset_info(f.name)
     # print("tileset_info:", tileset_info)
+    # TODO: Make assertions about result
 
-    rows = cdt.get_tiles(f.name, 0, 0)
-    #print("rows:", rows)
+    cdt.get_tiles(f.name, 0, 0)
+    # print("rows:", rows)
+    # TODO: Make assertions about result
 
-    rows = list(cdt.get_tiles(f.name, 1, 0).values()) + \
+    list(cdt.get_tiles(f.name, 1, 0).values()) + \
         list(cdt.get_tiles(f.name, 1, 1).values())
-    #print('rows:', rows)
+    # print('rows:', rows)
+    # TODO: Make assertions about result
 
-    # check to make sure that tiles in the higher zoom levels are all present in lower zoom levels
+    # check to make sure that tiles in the higher zoom levels
+    # are all present in lower zoom levels
     found = {}
     for row in cdt.get_tiles(f.name, 5, 15).values():
         for rect in row:
@@ -143,7 +151,7 @@ def test_random_importance():
                 found[rect['xStart']] = True
 
     for key, value in found.items():
-        assert(value == True)
+        assert(value)
 
     pass
 
@@ -162,7 +170,7 @@ def test_no_chromosome_limit():
          '--assembly', 'hg19',
          '--output-file', f.name])
 
-    import traceback
+    # import traceback
     '''
     print("exc_info:", result.exc_info)
     print("result:", result)
@@ -183,7 +191,7 @@ def test_no_chromosome_limit():
             foundOther = True
         break
     # make sure there's chromosome other than 14 in the output
-    assert(foundOther == True)
+    assert(foundOther)
 
     os.remove(f.name)
     pass
@@ -196,16 +204,16 @@ def test_chromosome_limit():
     input_file = op.join(testdir, 'sample_data',
                          'geneAnnotationsExonsUnions.short.bed')
 
-    result = runner.invoke(
+    runner.invoke(
         cca.bedfile,
         [input_file,
          '--max-per-tile', '60', '--importance-column', '5',
          '--assembly', 'hg19', '--chromosome', 'chr14',
          '--output-file', f.name])
+    # TODO: Make assertions about result
 
     # print('output:', result.output, result)
     rows = cdt.get_tiles(f.name, 0, 0)[0]
-    foundOther = False
 
     for row in rows:
         assert(row['fields'][0] == 'chr14')
@@ -220,11 +228,12 @@ def test_float_importance():
     runner = clt.CliRunner()
     input_file = op.join(testdir, 'sample_data', 'test_float_importance.bed')
 
-    result = runner.invoke(
+    runner.invoke(
         cca.bedfile,
         [input_file,
          '--max-per-tile', '2', '--importance-column', '4',
          '--assembly', 'hg38', '--no-header', '--output-file', f.name])
+    # TODO: Make assertions about result
 
 
 """
@@ -279,7 +288,8 @@ def test_tile_ranges():
 
     d1 = cht.get_discrete_data(f, 11, 5)
     #print("d1:", len(d1))
-    #print("dv:", [x for x in d1 if (int(x[1]) < 12000000 and int(x[2]) > 12000000)])
+    #print("dv:", [x for x in d1 if (int(x[1]) < 12000000
+    #              and int(x[2]) > 12000000)])
 
     d3 = cht.get_discrete_data(f, 12, 10)
     #print("d2:", len(d3))
diff --git a/test/bedpe_test.py b/test/bedpe_test.py
index aaf6b4ce..cda1192c 100644
--- a/test/bedpe_test.py
+++ b/test/bedpe_test.py
@@ -1,10 +1,7 @@
 from __future__ import print_function
 
 import clodius.db_tiles as cdt
-import click.testing as clt
 import clodius.cli.aggregate as cca
-import h5py
-import negspy.coordinates as nc
 import os.path as op
 import sys
 
@@ -31,21 +28,23 @@ def test_clodius_aggregate_bedpe():
             [input_file,
             '--output-file', output_file,
             '--importance-column', 'random',
-            '--has-header', 
+            '--has-header',
             '--assembly', 'b37'])
 
     #print('output:', result.output, result)
     assert(result.exit_code == 0)
     """
 
-    entries = cdt.get_2d_tiles(output_file, 0, 0, 0)
-    #print("entries:", entries)
+    cdt.get_2d_tiles(output_file, 0, 0, 0)
+    # print("entries:", entries)
 
-    tileset_info = cdt.get_tileset_info(output_file)
-    #print('tileset_info', tileset_info)
+    cdt.get_tileset_info(output_file)
+    # TODO: Make assertions about result
+    # print('tileset_info', tileset_info)
 
-    entries = cdt.get_2d_tiles(output_file, 1, 0, 0, numx=2, numy=2)
-    #print("entries:", entries)
+    cdt.get_2d_tiles(output_file, 1, 0, 0, numx=2, numy=2)
+    # TODO: Make assertions about result
+    # print("entries:", entries)
 
-    tileset_info = cdt.get_tileset_info(output_file)
-    #print('tileset_info', tileset_info)
+    cdt.get_tileset_info(output_file)
+    # TODO: Make assertion
diff --git a/test/cli_test.py b/test/cli_test.py
index 863a4ec3..edeb649d 100644
--- a/test/cli_test.py
+++ b/test/cli_test.py
@@ -9,7 +9,6 @@
 import numpy as np
 import os.path as op
 import sys
-import traceback
 
 sys.path.append("scripts")
 
@@ -42,7 +41,7 @@ def test_clodius_aggregate_bedfile():
     assert(result.exit_code == 0)
 
     results = cdt.get_tiles(output_file, 6, 3, num_tiles=1)
-    #print("results:", results)
+    # print("results:", results)
 
     assert(len(results[3][0]['fields']) == 14)
 
@@ -76,7 +75,7 @@ def test_clodius_aggregate_bedgraph():
         cca.bedgraph,
         [input_file,
          '--output-file', output_file,
-         #'--assembly', 'grch37',
+         # '--assembly', 'grch37',
          '--chromsizes-filename', assembly_file,
          '--chromosome-col', '2',
          '--from-pos-col', '3',
@@ -92,7 +91,7 @@ def test_clodius_aggregate_bedgraph():
         [input_file,
          '--output-file', output_file,
          '--assembly', 'grch37',
-         #'--chromsizes-filename', assembly_file,
+         # '--chromsizes-filename', assembly_file,
          '--chromosome-col', '2',
          '--from-pos-col', '3',
          '--to-pos-col', '4',
@@ -112,49 +111,51 @@ def test_clodius_aggregate_bedgraph():
 
     assert(result.exit_code == 0)
     f = h5py.File(output_file)
-    #print("tile_0_0", d)
+    # print("tile_0_0", d)
 
-    #print("tile:", cht.get_data(f, 22, 0))
+    # print("tile:", cht.get_data(f, 22, 0))
     # return
     d = cht.get_data(f, 0, 0)
 
     assert(not np.isnan(d[0]))
     assert(np.isnan(d[-1]))
-    prev_tile_3_0 = cht.get_data(f, 3, 0)
+    cht.get_data(f, 3, 0)
+    # TODO: Make assertions about result
 
-    #print("prev_tile_3_0:", prev_tile_3_0)
+    # print("prev_tile_3_0:", prev_tile_3_0)
 
     assert(result.exit_code == 0)
-    return
-    assert(sum(prev_tile_3_0) < 0)
-
-    input_file = op.join(testdir, 'sample_data', 'cnvs_hw.tsv.gz')
-    result = runner.invoke(
-        cca.bedgraph,
-        [input_file,
-         '--output-file', output_file,
-         '--assembly', 'grch37',
-         '--chromosome-col', '2',
-         '--from-pos-col', '3',
-         '--to-pos-col', '4',
-         '--value-col', '5',
-         '--has-header',
-         '--nan-value', 'NA'])
 
-    '''
-    import traceback
-    print("exc_info:", result.exc_info)
-    a,b,tb = result.exc_info
-    print("result:", result)
-    print("result.output", result.output)
-    print("result.error", traceback.print_tb(tb))
-    print("Exception:", a,b)
-    '''
-
-    f = h5py.File(output_file)
-    tile_3_0 = cht.get_data(f, 3, 0)
-
-    assert(sum(tile_3_0) - sum(prev_tile_3_0) < 0.0001)
+    # TODO: Why are we ignoring these?
+    # assert(sum(prev_tile_3_0) < 0)
+    #
+    # input_file = op.join(testdir, 'sample_data', 'cnvs_hw.tsv.gz')
+    # result = runner.invoke(
+    #     cca.bedgraph,
+    #     [input_file,
+    #      '--output-file', output_file,
+    #      '--assembly', 'grch37',
+    #      '--chromosome-col', '2',
+    #      '--from-pos-col', '3',
+    #      '--to-pos-col', '4',
+    #      '--value-col', '5',
+    #      '--has-header',
+    #      '--nan-value', 'NA'])
+    #
+    # '''
+    # import traceback
+    # print("exc_info:", result.exc_info)
+    # a,b,tb = result.exc_info
+    # print("result:", result)
+    # print("result.output", result.output)
+    # print("result.error", traceback.print_tb(tb))
+    # print("Exception:", a,b)
+    # '''
+    #
+    # f = h5py.File(output_file)
+    # tile_3_0 = cht.get_data(f, 3, 0)
+    #
+    # assert(sum(tile_3_0) - sum(prev_tile_3_0) < 0.0001)
 
 
 testdir = op.realpath(op.dirname(__file__))
@@ -186,13 +187,9 @@ def test_clodius_aggregate_bedpe():
     assert(result.exit_code == 0)
 
     tiles = cdt.get_2d_tiles(output_file, 0, 0, 0, numx=1, numy=1)
-    # print("tiles:", tiles)
 
     assert('\n' not in tiles[(0, 0)][0]['fields'][2])
 
-    #import json
-    # json.dumps(tiles)
-
 
 testdir = op.realpath(op.dirname(__file__))
 
@@ -221,7 +218,8 @@ def test_clodius_aggregate_bedgraph1():
     # print("result.output", result.output)
 
     f = h5py.File('/tmp/dm3_values.hitile')
-    max_zoom = f['meta'].attrs['max-zoom']
+    # max_zoom = f['meta'].attrs['max-zoom']
+    # TODO: Make assertions about result
     values = f['values_0']
 
     import numpy as np
@@ -264,7 +262,8 @@ def test_clodius_aggregate_bedgraph1():
     # print('output:', result.output, result)
 
     f = h5py.File('/tmp/test3chroms_values.hitile')
-    max_zoom = f['meta'].attrs['max-zoom']
+    # f['meta'].attrs['max-zoom']
+    # TODO: Make assertions about result
 
     # print('max_zoom:', max_zoom)
     # print("len", len(f['values_0']))
diff --git a/test/fpark_test.py b/test/fpark_test.py
index 07fc0ad2..01682641 100644
--- a/test/fpark_test.py
+++ b/test/fpark_test.py
@@ -1,5 +1,3 @@
-import sys
-
 import clodius.fpark as fp
 
 
@@ -14,7 +12,8 @@ def test_group_by_key():
     a = fp.FakeSparkContext.parallelize(
         [(1, 2), (1, 3), (1, 4), (2, 5), (2, 6)])
 
-    b = a.groupByKey()
+    a.groupByKey()
+    # TODO: Make assertions about result
 
 
 def test_textFile():
diff --git a/test/mrmatrix_test.py b/test/mrmatrix_test.py
index 2eed1c87..71cd43fc 100644
--- a/test/mrmatrix_test.py
+++ b/test/mrmatrix_test.py
@@ -77,7 +77,8 @@ def test_padding(self):
                 '1': {
                     'values': np.array([[1.0, 2], [3, 4]])
                     # It's important that there is a float value:
-                    # If there isn't, np.nan will be converted to a large negative integer.
+                    # If there isn't, np.nan will be converted
+                    # to a large negative integer.
                 }
             }
         })
@@ -91,7 +92,9 @@ def test_bins(self):
         tileset = MockHdf5({
             'resolutions': {
                 '1': {
-                    'values': np.array([[float(x) for x in range(500)] for y in range(500)])
+                    'values': np.array(
+                        [[float(x) for x in range(500)] for y in range(500)]
+                    )
                 }
             }
         })
diff --git a/test/multivec_test.py b/test/multivec_test.py
index d777404d..12b9a407 100644
--- a/test/multivec_test.py
+++ b/test/multivec_test.py
@@ -1,13 +1,8 @@
 from __future__ import print_function
 
 import click.testing as clt
-import clodius.cli.aggregate as cca
 import clodius.cli.convert as ccc
-import clodius.db_tiles as cdt
-import os
 import os.path as op
-import sqlite3
-import sys
 import tempfile
 
 testdir = op.realpath(op.dirname(__file__))
@@ -16,7 +11,8 @@
 def test_bedfile_to_multivec():
     runner = clt.CliRunner()
     input_file = op.join(testdir, 'sample_data', 'sample.bed.gz')
-    f = tempfile.NamedTemporaryFile(delete=False)
+    tempfile.NamedTemporaryFile(delete=False)
+    # TODO: Make assertions about result
     # print("input_file", input_file)
 
     result = runner.invoke(
@@ -26,7 +22,7 @@ def test_bedfile_to_multivec():
          '--assembly', 'hg38',
          '--base-resolution', '10'])
 
-    import traceback
+    # import traceback
     a, b, tb = result.exc_info
     '''
     print("exc_info:", result.exc_info)
@@ -38,7 +34,8 @@ def test_bedfile_to_multivec():
 
 
 def test_load_multivec_tiles():
-    input_file = op.join(testdir, 'sample_data', 'sample.bed.multires.mv5')
+    op.join(testdir, 'sample_data', 'sample.bed.multires.mv5')
+    # TODO: Make assertions about result
 
 
 def test_states_format_befile_to_multivec():
@@ -47,7 +44,8 @@ def test_states_format_befile_to_multivec():
                          'states_format_input_testfile.bed.gz')
     rows_info_file = op.join(testdir, 'sample_data',
                              'states_format_test_row_infos.txt')
-    f = tempfile.NamedTemporaryFile(delete=False)
+    tempfile.NamedTemporaryFile(delete=False)
+    # TODO: Make assertions about result
     # print("input_file", input_file)
 
     result = runner.invoke(
@@ -59,7 +57,7 @@ def test_states_format_befile_to_multivec():
          '--starting-resolution', '200',
          '--num-rows', '10'])
 
-    import traceback
+    # import traceback
     a, b, tb = result.exc_info
     '''
     print("exc_info:", result.exc_info)
@@ -76,7 +74,8 @@ def test_ignore_bedfile_headers():
                          '3_header_100_testfile.bed.gz')
     rows_info_file = op.join(testdir, 'sample_data',
                              '3_header_100_row_infos.txt')
-    f = tempfile.NamedTemporaryFile(delete=False)
+    tempfile.NamedTemporaryFile(delete=False)
+    # TODO: Make assertions about result
 
     result = runner.invoke(
         ccc.bedfile_to_multivec,
@@ -87,5 +86,5 @@ def test_ignore_bedfile_headers():
          '--starting-resolution', '200',
          '--num-rows', '15'])
 
-    import traceback
+    # import traceback
     a, b, tb = result.exc_info
diff --git a/test/tile_2d_bedfile_test.py b/test/tile_2d_bedfile_test.py
index aa848a20..ea12fbff 100644
--- a/test/tile_2d_bedfile_test.py
+++ b/test/tile_2d_bedfile_test.py
@@ -1,8 +1,6 @@
 from __future__ import print_function
 
 import clodius.db_tiles as cdt
-import h5py
-import sqlite3
 
 
 def test_get_tileset_info():
@@ -18,6 +16,5 @@ def test_get_tileset_info():
 def test_get_tiles():
     filename = 'test/sample_data/arrowhead_domains_short.txt.multires.db'
 
-    tiles = cdt.get_2d_tiles(filename, 0, 0, 0, numx=1, numy=1)
-
-    #print("tiles:", tiles)
+    cdt.get_2d_tiles(filename, 0, 0, 0, numx=1, numy=1)
+    # TODO: Make an assertion
diff --git a/test/utils.py b/test/utils.py
index 697a2b5b..c4e20eb1 100644
--- a/test/utils.py
+++ b/test/utils.py
@@ -38,4 +38,4 @@ def get_cooler_info(file_path):
             'bins_per_dimension': TILE_SIZE,
         }
 
-        return info
\ No newline at end of file
+        return info
diff --git a/travis_test.sh b/travis_test.sh
index e146eb81..f80c8d66 100755
--- a/travis_test.sh
+++ b/travis_test.sh
@@ -8,12 +8,15 @@ die() { set +v; echo "$*" 1>&2 ; sleep 1; exit 1; }
 # https://github.com/travis-ci/travis-ci/issues/6018
 
 start flake8
-# TODO:
-# - Get more files to lint cleanly.
-# - Reduce the number of errors which are ignored everywhere else.
+# TODO: Remove the special cases from this file:
 flake8 --config=.flake8-ignore
+# TODO: Add more files to this list:
 flake8 test/tsv_to_mrmatrix_test.py \
-       scripts/tsv_to_mrmatrix.py
+       scripts/tsv_to_mrmatrix.py \
+       test/*.py
+# TODO: When everything is covered,
+# just lint the whole directory once,
+# instead of listing special cases.
 end flake8
 
 start download