Merge pull request #21 from marbl/dev_alt

alexsweeten · web-flow · commit d38918f4f310 · 2024-04-17T14:59:18.000-04:00
0.8.1 release
diff --git a/README.md b/README.md
@@ -1,3 +1,4 @@
+[![Documentation Status](https://readthedocs.org/projects/moddotplot/badge/?version=latest)](https://moddotplot.readthedocs.io/en/latest/?badge=latest)
 ![](images/logo.png)
 
 - [About](#about)
@@ -6,6 +7,7 @@
   - [Interactive Mode](#interactive-mode)
   - [Static Mode](#static-mode)
   - [Standard arguments](#standard-arguments)
+- [Save matrix to file, but don't render plots. In interactive mode, this must be utlized with the `--save` flag.](#save-matrix-to-file-but-dont-render-plots-in-interactive-mode-this-must-be-utlized-with-the---save-flag)
   - [Interactive Mode Commands](#interactive-mode-commands)
   - [Static Mode Commands](#static-mode-commands)
   - [Sample run - Interactive Mode](#sample-run---interactive-mode)
@@ -141,6 +143,8 @@ If set when 2 or more sequences are input into ModDotPlot, this will show an a v
 `--ambiguous <bool>`
 By default, k-mers that are homopolymers of ambiguous IUPAC codes (eg. NNNNNNNNNNN’s) are excluded from identity estimation. This results in gaps along the central diagonal for these regions.  If desired, these can be kept by setting the `—-ambiguous` flag in both interactive and static mode.  
 
+`--no-plot <bool>`
+Save matrix to file, but don't render plots. In interactive mode, this must be utlized with the `--save` flag. 
 --- 
 
 ### Interactive Mode Commands
@@ -151,7 +155,7 @@ Port to display ModDotPlot on. Default is 8050, this can be changed to any accep
 
 `-w / --window <int>`
 
-Minimum window size. By default, interactive mode sets a minimum window size based on the sequence length `n/2000` (eg. a 3Mbp sequence will have a 1500bp window). The maximum window size will always be set to `n/1000` (3000bp under the same example). This means that 2 matrices will be created. Creating more matrices will mean 
+Minimum window size. By default, interactive mode sets a minimum window size based on the sequence length `n/2000` (eg. a 3Mbp sequence will have a 1500bp window). The maximum window size will always be set to `n/1000` (3000bp under the same example). This means that 2 matrices will be created.
 
 `-q / --quick <bool>`
 
@@ -185,10 +189,6 @@ Window size. Unlike interactive mode, only one matrix will be created, so this r
 
 Skip output of bed file.
 
-`--no-plot <bool>`
-
-Skip output of pdf and png image files.
-
 `--no-hist <bool>`
 
 Skip output of histogram legend.
@@ -209,6 +209,10 @@ List of accepted palettes can be found [here](https://jiffyclub.github.io/palett
 
 Flip sequential order of color palette. Set to `-` by default for divergent palettes. 
 
+`--color <list of hexcodes>`
+
+List of custom colors in hexcode format can be entered sequentially, mapped from low to high identity. 
+
 `--breakpoints <list of ints>`
 
 Add custom identity threshold breakpoints. Note that the number of breakpoints must be equal to the number of colors + 1.
@@ -278,7 +282,7 @@ ssh -N -f -L <LOCAL_PORT_NUMBER>:127.0.0.1:<HPC_PORT_NUMBER> HPC@LOGIN.CREDENTIA
 
 You should now be able to view interactive mode using `http://127.0.0.1:<LOCAL_PORT_NUMBER>`. Note that your own HPC environment may have specific instructions and/or restrictions for setting up port forwarding.
 
-VSCode now has automatic port forwarding built into the terminal menu. See [VSCode documentation](https://code.visualstudio.com/docs/editor/port-forwarding) for fruther details 
+VSCode now has automatic port forwarding built into the terminal menu. See [VSCode documentation](https://code.visualstudio.com/docs/editor/port-forwarding) for further details 
 
 ![](images/portforwarding.png)
 
@@ -319,7 +323,7 @@ $ moddotplot static -c config/config.json
  | |  | | (_) | (_| | | |__| | (_) | |_  | |    | | (_) | |_ 
  |_|  |_|\___/ \__,_| |_____/ \___/ \__| |_|    |_|\___/ \__|
 
-v0.8.0 
+v0.8.1
 
 Running ModDotPlot in static mode
 
@@ -371,18 +375,17 @@ For bug reports or general usage questions, please raise a GitHub issue, or emai
 
 ## Known Issues
 
-Plot width and xlim (limiting the x axis to a different amount) currently do not work. I plan to have those working in v0.9.0.
-
-Mac users might encounter the following unexpected command line output:
+- Plot width and xlim (limiting the x axis to a different amount) currently do not work. I plan to have those working in v0.9.0.
 
-`/bin/sh: lscpu: command not found`
-
-This is a known issue with Plotnine, the Python plotting library used by ModDotPlot. This can be safely ignored.
+- Mac users might encounter the following unexpected command line output: `/bin/sh: lscpu: command not found`. This is a known issue with Plotnine, the Python plotting library used by ModDotPlot. This can be safely ignored.
 
+- If you encounter an error with the following traceback: `rv = reductor(4) TypeError: cannot pickle 'generator' object`, ths means that you have a newer version of Plotnine that is incompatible with ModDotPlot. Please uninstall plotnine and reinstall version 0.12.4 `pip install plotnine==0.12.4`. 
 
+- In interactive mode, comparing sequences of two sizes will lead to errors in zooming for the larger sequence. I plan to fix this in v0.9.0.
+  
 ---
 
 
 ## Cite
 
-Publication in progress!
+Publication in progress! (almost there :D)
diff --git a/images/logo.png b/images/logo.png
diff --git a/pyproject.toml b/pyproject.toml
@@ -11,11 +11,13 @@ dependencies = [
   "pandas",
   "plotly",
   "dash",
-  "plotnine",
+  "plotnine==0.12.4",
   "palettable",
   "mmh3",
   "tk",
-  "setproctitle"
+  "setproctitle",
+  "sphinx_rtd_theme",
+  "numpy==1.26.4"
 ]
 authors = [
   {name = "Alex Sweeten", email = "alex.sweeten@nih.gov"},
diff --git a/src/moddotplot/const.py b/src/moddotplot/const.py
@@ -1,4 +1,4 @@
-VERSION = "0.8.0"
+VERSION = "0.8.1"
 COLS = [
     "#query_name",
     "query_start",
diff --git a/src/moddotplot/estimate_identity.py b/src/moddotplot/estimate_identity.py
@@ -510,6 +510,7 @@ def findValueInRange(integer: int, range_dict: dict) -> int:
         if key[0] >= integer >= key[1]:
             return value
     return highest_value
+    
 
 
 def setZoomLevels(axis_length, sparsity_layers):
diff --git a/src/moddotplot/interactive.py b/src/moddotplot/interactive.py
@@ -77,17 +77,29 @@ def run_dash(matrices, metadata, axes, sparsity, identity, port_number, output_d
     for i in range(len(metadata)):
         titles.append(metadata[i]["title"])
     # Get zooming thresholds, adjust sparsity respectively.
-    mod_ranges = verifyModimizers(sparsity, len(image_pyramid))
-    # TODO: This is probably wrong, look into it later
-    mod_thresholds_list = setZoomLevels(round(metadata[0]["x_size"]), mod_ranges)
-
+    def halving_sequence(size, start):
+        sequence = [start]
+        for _ in range(1, size):
+            start /= 2
+            sequence.append(start)
+        return sequence
+    #print(current_metadata)
+    mod_thresholds_list = halving_sequence(len(current_metadata["sparsities"]), current_metadata["x_size"])
+    #print(mod_thresholds_list)
+    
+    #print(current_metadata["min_window_size"]* current_metadata["resolution"])
+    #print(current_metadata["max_window_size"])
+    numo = round(math.log2(current_metadata['max_window_size']/current_metadata['min_window_size']) + 1)
+    #print(numo)
     important = generateDictionaryFromList(mod_thresholds_list)
+    #print(f"this is imprtant: {important}")
 
     main_level = image_pyramid[0]
     main_x_axis = axes[0][0]
     main_y_axis = axes[0][1]
     main_x_axis_np = np.array(main_x_axis)
 
+    #TODO: modify value here
     main_x_axis_np += 3000
 
     # Modify text so that hover shows interval format
@@ -1529,7 +1541,10 @@ def update_dotplot(
                     amount = x_end - x_begin
 
                     # This function finds the correct level in the image pyramid
-                    zoom_factor = findValueInRange(amount, important)
+                    try:
+                        zoom_factor = findValueInRange(amount, important)
+                    except ValueError as err:
+                        zoom_factor = 0
                     # If zoom_factor is less than current_zoom, base tmp factors on the older amount
 
                     if zoom_factor > len(image_pyramid) - 1:
diff --git a/src/moddotplot/moddotplot.py b/src/moddotplot/moddotplot.py
@@ -159,6 +159,12 @@ def get_parser():
         help="Launch a quick, non-interactive version of interactive mode.",
     )
 
+    interactive_parser.add_argument(
+        "--no-plot",
+        action="store_true",
+        help="Prevent launching dash after saving. Must be used in combination with --save.",
+    )
+
     # -----------STATIC MODE SUBCOMMANDS-----------
     static_input_group = static_parser.add_mutually_exclusive_group(required=True)
     static_input_group.add_argument(
@@ -533,7 +539,15 @@ def main():
             if args.window and args.quick:
                 print(f"Conflict with `--quick` argument.")
         max_window_size = math.ceil(hgi / args.resolution)
-
+        # If only sequence is too small, throw an error.
+        if max_window_size < 10:
+            print(
+                    f"Error: sequence too small for analysis.\n"
+                )
+            print(
+                f"ModDotPlot requires a minimum window size of 10. Sequences less than 10Kbp will not work with ModDotPlot under normal resolution. We recommend rerunning ModDotPlot with --r {math.ceil(hgi / 10)}.\n"
+            )
+            sys.exit(0)
         while min_window_size <= max_window_size:
             window_lengths.append(min_window_size)
             min_window_size = min_window_size * 2
@@ -553,18 +567,20 @@ def main():
 
         # Set sparsity to be the closest power of 2
         sparsities = []
-        if window_lengths[0] < args.modimizer:
-            raise ValueError(
-                "Minimum window size must be greater than or equal to the modimizer sketch size"
-            )
-        sparsities.append(round(window_lengths[0] / args.modimizer))
+        if window_lengths[0] < 1000:
+            sparsities.append(1)
+        else:
+            sparsities.append(round(window_lengths[0] / args.modimizer))
         if sparsities[0] <= args.modimizer:
             sparsities[0] = 2 ** int(math.log2(sparsities[0]))
         else:
             sparsities[0] = 2 ** (int(math.log2(sparsities[0] - 1)) + 1)
         # expectation = round(win/seq_sparsity)
         for i in range(1, len(window_lengths)):
-            sparsities.append(sparsities[-1] * 2)
+            if window_lengths[i] > 1000:
+                sparsities.append(sparsities[-1] * 2)
+            else:
+                sparsities.append(1)
         expectation = round(window_lengths[-1] / sparsities[-1])
         matrices = []
         metadata = []
@@ -632,6 +648,7 @@ def main():
                         "resolution": args.resolution,
                         "kmer_length": args.kmer,
                         "title": f"{seq_list[j]}",
+                        "sparsities": sparsities,
                     }
                 )
         # -----------BUILD IMAGE PYRAMID FOR COMPARATIVE MATRICES-----------
@@ -653,11 +670,11 @@ def main():
                 smaller_seq = k_list[0]
             if args.quick:
                 print(
-                    f"Quickly building pairwise matrices for {seq_list[i]} and {seq_list[j]}, using a window size of {window_lengths[0]}.... \n"
+                    f"Quickly building pairwise matrices for {seq_list[0]} and {seq_list[1]}, using a window size of {window_lengths[0]}.... \n"
                 )
             else:
                 print(
-                    f"Building pairwise matrices for {seq_list[i]} and {seq_list[j]}, using a minimum window size of {window_lengths[0]}.... \n"
+                    f"Building pairwise matrices for {seq_list[0]} and {seq_list[1]}, using a minimum window size of {window_lengths[0]}.... \n"
                 )
             image_pyramid = []
             for i in range(len(window_lengths)):
@@ -725,6 +742,7 @@ def main():
                     "resolution": args.resolution,
                     "kmer_length": args.kmer,
                     "title": f"{larger_name}-{smaller_name}",
+                    "sparsities": sparsities,
                 }
             )
 
@@ -752,7 +770,11 @@ def main():
             # Save the dictionary as a pickle file
             with open(pickle_path, "wb") as f:
                 pickle.dump(metadata, f)
-            # Finally gzip the folder
+            # Check if no plot arg is used
+            if args.no_plot:
+                print(f"Saved matrices to {folder_path}. Thank you for using ModDotPlot!\n")
+                sys.exit(0)
+
 
         # Before running dash, change into intervals...
         axes = []

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-VERSION = "0.8.0"`
	`1`	`+VERSION = "0.8.1"`
`2`	`2`	`COLS = [`
`3`	`3`	`"#query_name",`
`4`	`4`	`"query_start",`