From 4b9c4dff3928ed9657bc35f0871834e9a7334cbf Mon Sep 17 00:00:00 2001
From: hanjunlee21 <67846757+hanjunlee21@users.noreply.github.com>
Date: Wed, 9 Feb 2022 09:18:45 -0500
Subject: [PATCH] Add files via upload

---
 ComponentAnalysis/ComponentAnalysis_sim.sh |  22 ++
 ComponentAnalysis/sim_backup.py            | 414 +++++++++++++++++++++
 ComponentAnalysis/sim_foldchange.py        | 414 +++++++++++++++++++++
 ComponentAnalysis/sim_variance.py          | 414 +++++++++++++++++++++
 4 files changed, 1264 insertions(+)
 create mode 100644 ComponentAnalysis/ComponentAnalysis_sim.sh
 create mode 100644 ComponentAnalysis/sim_backup.py
 create mode 100644 ComponentAnalysis/sim_foldchange.py
 create mode 100644 ComponentAnalysis/sim_variance.py

diff --git a/ComponentAnalysis/ComponentAnalysis_sim.sh b/ComponentAnalysis/ComponentAnalysis_sim.sh
new file mode 100644
index 0000000..cba5645
--- /dev/null
+++ b/ComponentAnalysis/ComponentAnalysis_sim.sh
@@ -0,0 +1,22 @@
+R="NORMAL"
+Q="DLBCL"
+O="DLBCLvsNORMAL"
+PYTHONVERSION="python3.7"
+
+# backup
+cp ~/miniconda3/lib/$PYTHONVERSION/site-packages/chess/sim.py sim_backup.py
+
+# inverse of Fano factor ("SN")
+chess sim -p 4 ../COOL/$R.chr2.25kb.KR.cool ../COOL/$Q.chr2.25kb.KR.cool ../Pairs/hg19.chr2.2Mb.500kb.txt ../CHESS/chr2.2Mb.500kb/$O.tsv
+
+# foldchange
+cp sim_foldchange.py ~/miniconda3/lib/$PYTHONVERSION/site-packages/chess/sim.py
+chess sim -p 4 ../COOL/$R.chr2.25kb.KR.cool ../COOL/$Q.chr2.25kb.KR.cool ../Pairs/hg19.chr2.2Mb.500kb.txt ../CHESS/chr2.2Mb.500kb/$O.foldchange.tsv
+
+# variance
+cp sim_variance.py ~/miniconda3/lib/$PYTHONVERSION/site-packages/chess/sim.py
+chess sim -p 4 ../COOL/$R.chr2.25kb.KR.cool ../COOL/$Q.chr2.25kb.KR.cool ../Pairs/hg19.chr2.2Mb.500kb.txt ../CHESS/chr2.2Mb.500kb/$O.variance.tsv
+
+# recovery
+cp sim_backup.py ~/miniconda3/lib/$PYTHONVERSION/site-packages/chess/sim.py
+
diff --git a/ComponentAnalysis/sim_backup.py b/ComponentAnalysis/sim_backup.py
new file mode 100644
index 0000000..e7296b1
--- /dev/null
+++ b/ComponentAnalysis/sim_backup.py
@@ -0,0 +1,414 @@
+#!/usr/bin/env python
+
+import logging
+import numpy as np
+
+from skimage.transform import resize
+from skimage.metrics import structural_similarity
+from numpy.lib.stride_tricks import as_strided
+from .helpers import sub_matrix_from_edges_dict
+import uuid
+
+logger = logging.getLogger(__name__)
+
+
+def SN(M1, M2, size=7):
+    """Compute average of local signal to noise ratios in M1 - M2.
+
+    Slide a window of dimensions size^2 over M1 - M2, compute
+    abs(mean(window)) / var(window) in each and take mean of all windows.
+    :param M1: Numpy Matrix.
+    :param M2: Numpy Matrix.
+    :param size: edge length of sliding window (must be odd), defaults to 7
+    :returns: float, the average of the local signal to noise ratios.
+    """
+    M = M1 - M2
+    uM = filter_uniform_filter(M, size=size)
+    vM = filter_uniform_filter(M, size=size, func=np.nanvar)
+    SN = np.nanmean(np.abs(uM) / vM)
+
+    return float(SN)
+
+
+def filter_uniform_filter(M, size=7, no_data_val=None,
+                          func=np.nanmean):
+    """Apply func to sliding window on input matrix.
+
+    Adaption from:
+    www.programcreek.com/python/example/93943/scipy.ndimage.uniform_filter
+    This is equivalent to scipy.ndimage.uniform_filter, but can handle nan's,
+    and can use numpy nanmean/median/max/min functions.
+    :param M: input data, numpy matrix.
+    :param size: edge length of sliding window (must be odd), defaults to 7
+    :type size: number, optional
+    :param no_data_val: value in matrix that is treated as no data value,
+                        defaults to None
+    :param func: [description], defaults to np.nanmean
+    :returns: func of the matrix A filtered by a uniform kernel of size=size
+    """
+
+    assert size % 2 == 1, 'Please supply an odd size'
+    assert M.dtype == np.dtype('float64'), 'Input must have dtype(float64)'
+    rows, cols = M.shape
+
+    padded_A = np.empty(shape=(rows + size-1,
+                               cols + size-1),
+                        dtype=M.dtype)
+    padded_A[:] = np.nan
+    rows_pad, cols_pad = padded_A.shape
+
+    if no_data_val:
+        mask = M == no_data_val
+        M[mask] = np.nan
+
+    upleft_data_start = int((size - 1) / 2)
+    rl, cl = rows_pad - upleft_data_start, cols_pad - upleft_data_start
+    padded_A[upleft_data_start: rl, upleft_data_start: cl] = M.copy()
+
+    n, m = M.shape
+    strided_data = as_strided(padded_A, (n, m, size, size),
+                              padded_A.strides+padded_A.strides)
+    strided_data = strided_data.copy().reshape((n, m, size**2))
+
+    return func(strided_data, axis=2)
+
+
+def find_masked_rows(m, masking_value=0):
+    """Find rows in m filled with masking value.
+
+    :param m: Numpy matrix to be searched for masked rows.
+    :param masking_value: Value that corresponds to missing contacts,
+                          defaults to 0
+    :returns: Indices of masked rows.
+    """
+    s = np.sum(m, 0)
+    n_bins = m.shape[0]
+    cutoff = n_bins * masking_value
+    idxs = np.where(s == cutoff)[0]
+
+    return idxs
+
+
+def remove_rows(m, target_rows):
+    """Remove target bins from m.
+
+    Remove rows and columns with indices indicated in target rows form m.
+    :param m: Numpy matrix.
+    :param target_rows: List of indices of bins to be removed from m.
+    :returns: Truncated numpy matrix.
+    """
+    idxs = target_rows
+    m_removed = np.delete(m, idxs, 0)
+    m_removed = np.delete(m_removed, idxs, 1)
+
+    return m_removed
+
+
+def comparison_worker(input_queue, output_queue,
+                      reference_edges, reference_regions,
+                      query_edges, query_regions,
+                      min_bins=20,
+                      keep_unmappable_bins=False,
+                      absolute_window_size=None,
+                      relative_window_size=1.,
+                      mappability_cutoff=0.1,
+                      default_value=1):
+    worker_id = uuid.uuid4()
+    try:
+        while True:
+            data = input_queue.get(block=True)
+            if data is None:
+                break
+            pair, compute_sn = data
+
+            pair_ix, ssim, sn = compare_pair(pair,
+                                             reference_edges, reference_regions,
+                                             query_edges, query_regions,
+                                             min_bins=min_bins,
+                                             keep_unmappable_bins=keep_unmappable_bins,
+                                             absolute_window_size=absolute_window_size,
+                                             relative_window_size=relative_window_size,
+                                             mappability_cutoff=mappability_cutoff,
+                                             default_value=default_value,
+                                             compute_sn=compute_sn)
+            output_queue.put((pair_ix, ssim, sn))
+    except Exception as e:
+        logger.error("Worker {} encountered a problem: {}".format(worker_id, e))
+        output_queue.put(e)
+
+    logger.info("Worker {} exited gracefully".format(worker_id))
+
+
+def chunk_comparison_worker(input_queue, output_queue,
+                            reference_edges, reference_regions,
+                            query_edges, query_regions,
+                            min_bins=20,
+                            keep_unmappable_bins=False,
+                            absolute_window_size=None,
+                            relative_window_size=1.,
+                            mappability_cutoff=0.1,
+                            default_value=1):
+    worker_id = uuid.uuid4()
+    previous_reference = [None, (None, None)]
+    previous_query = [None, (None, None)]
+    try:
+        while True:
+            data = input_queue.get(block=True)
+            if data is None:
+                break
+            chunk, compute_sn = data
+
+            results = []
+            for pair in chunk:
+                pair_ix, reference_region, query_region = pair
+                try:
+                    if previous_reference[0] is not None and reference_region == previous_reference[0]:
+                        reference, ref_rs = previous_reference[1]
+                    else:
+                        reference, ref_rs = sub_matrix_from_edges_dict(reference_edges,
+                                                                       reference_regions,
+                                                                       reference_region,
+                                                                       default_weight=default_value)
+                        previous_reference = [reference_region, (reference, ref_rs)]
+
+                    if previous_query[0] is not None and query_region == previous_query[0]:
+                        query, qry_rs = previous_query[1]
+                    else:
+                        query, qry_rs = sub_matrix_from_edges_dict(query_edges,
+                                                                   query_regions,
+                                                                   query_region,
+                                                                   default_weight=default_value)
+                        previous_query = [query_region, (query, qry_rs)]
+                except ValueError:
+                    results.append([pair_ix, np.nan, np.nan])
+                    continue
+
+                pair_ix, ssim, sn = compare_matrix_pair(reference, reference_region,
+                                                        query, query_region,
+                                                        pair_ix=pair_ix,
+                                                        min_bins=min_bins,
+                                                        keep_unmappable_bins=keep_unmappable_bins,
+                                                        absolute_window_size=absolute_window_size,
+                                                        relative_window_size=relative_window_size,
+                                                        mappability_cutoff=mappability_cutoff,
+                                                        default_value=default_value,
+                                                        compute_sn=compute_sn)
+                results.append([pair_ix, ssim, sn])
+            output_queue.put(results)
+    except Exception as e:
+        logger.error("Worker {} encountered a problem: {}".format(worker_id, e))
+        output_queue.put(e)
+
+    logger.debug("Worker {} exited gracefully".format(worker_id))
+
+
+def compare_pair(pair, reference_edges, reference_regions,
+                 query_edges, query_regions,
+                 min_bins=20,
+                 keep_unmappable_bins=False,
+                 absolute_window_size=None,
+                 relative_window_size=1.,
+                 mappability_cutoff=0.1,
+                 default_value=1,
+                 compute_sn=True
+                 ):
+    """Run comparison of given Hi-C matrices without any background model.
+
+    Compare given submatrices of the full Hi-C matrices in sampleID2hic.
+    All submatrices (specified in pairs) have to be of the same size.
+    The assignment of reference and query samples is arbitrary in this case,
+    but has to be consistent.
+    :param edges: edges dictionary
+    :param regions: regions intervaltree
+
+    :param worker_ID: String or int used as ID for the process in which this
+                      function is run.
+    :param pairs: Dictionary indicating the submatrix pairs that
+                  will be compared.
+                  Must have form:
+                  {
+                    pair_id: (reference_region, query_region)
+                  },
+                  where reference_region and query_region
+                  are :class:`~GenomicRegion` objects.
+    :param min_bins: Minimum size of matrix to be considered for comparison,
+                     defaults to 20
+    :param keep_unmappable_bins: If True, disables removal of unmappable bins
+                                 from matrices prior to comparison,
+                                 defaults to False
+    :param absolute_windowsize: Absolute window size used in the
+                                structural similarity function. Overwrites
+                                relative_windowsize, defaults to None
+    :param relative_windowsize: Window size relative to the size
+                                of the compared matrices,
+                                used in the structural similarity function,
+                                defaults to 1.
+    :param mappability_cutoff: Maximum fraction of unmappable bins allowed
+                               in a matrix to be considered for comparison,
+                               defaults to 0.1
+
+    :returns: results dictionary of form:
+              {
+                pair_id: (similarity_score, signal_to_noise_value)
+              }
+    """
+    pair_ix, reference_region, query_region = pair
+    try:
+        reference, ref_rs = sub_matrix_from_edges_dict(reference_edges, reference_regions,
+                                                       reference_region, default_weight=default_value)
+        query, qry_rs = sub_matrix_from_edges_dict(query_edges, query_regions,
+                                                   query_region, default_weight=default_value)
+    except ValueError:
+        return pair_ix, np.nan, np.nan
+
+    return compare_matrix_pair(reference, reference_region,
+                               query, query_region,
+                               pair_ix=pair_ix,
+                               min_bins=min_bins,
+                               keep_unmappable_bins=keep_unmappable_bins,
+                               absolute_window_size=absolute_window_size,
+                               relative_window_size=relative_window_size,
+                               mappability_cutoff=mappability_cutoff,
+                               default_value=default_value,
+                               compute_sn=compute_sn)
+
+
+def compare_matrix_pair(reference, reference_region,
+                        query, query_region, pair_ix=None,
+                        min_bins=20,
+                        keep_unmappable_bins=False,
+                        absolute_window_size=None,
+                        relative_window_size=1.,
+                        mappability_cutoff=0.1,
+                        default_value=1,
+                        compute_sn=True):
+    # check size criteria
+    if query.shape[0] < min_bins or reference.shape[0] < min_bins:
+        return pair_ix, np.nan, np.nan
+
+    # resize matrices if needed
+    size_factor = len(reference) / len(query)
+    if size_factor < 1:
+        reference = resize(reference, np.shape(query), order=0,
+                           mode='reflect').astype(np.float64)
+    elif size_factor > 1:
+        query = resize(query, np.shape(reference), order=0,
+                       mode='reflect').astype(np.float64)
+
+    # check if either matrix needs to be flipped
+    if reference_region.strand != query_region.strand:
+        query = np.fliplr(np.flipud(query))
+
+    # check content in unmappable bins in input matrices
+    idxs = []
+    init_filter_pass = True
+    if mappability_cutoff > 0:
+        ms = [reference, query]
+        for m in ms:
+            m = np.array(m)
+            idx = find_masked_rows(m, masking_value=default_value)
+            idxs.append(idx)
+            if len(idx) / np.shape(m)[0] > mappability_cutoff:
+                init_filter_pass = False
+    if not init_filter_pass:
+        return pair_ix, np.nan, np.nan
+
+    # remove masked bins
+    if not keep_unmappable_bins:
+        target_rows = np.union1d(*idxs)
+        reference = remove_rows(reference, target_rows)
+        query = remove_rows(query, target_rows)
+
+    # compare!
+    if absolute_window_size:
+        window_size = absolute_window_size
+    else:
+        window_size = int(np.shape(reference)[0] * relative_window_size)
+    if window_size % 2 == 0:
+        window_size -= 1
+
+    # prevent from going lower than three.
+    # breaks at ws = 1 because of 0 division, and 2 is not odd.
+    window_size = max(window_size, 3)
+
+    # actual comparison
+    curr_ssim = structural_similarity(reference, query, win_size=window_size)
+    if compute_sn:
+        curr_sn = SN(reference, query)
+    else:
+        curr_sn = None
+
+    return pair_ix, curr_ssim, curr_sn
+
+
+def compare_structures(reference_edges, reference_regions,
+                       query_edges, query_regions,
+                       pairs, worker_ID,
+                       min_bins=20,
+                       keep_unmappable_bins=False,
+                       absolute_window_size=None,
+                       relative_window_size=1.,
+                       mappability_cutoff=0.1,
+                       default_value=1):
+    """Run comparison of given Hi-C matrices without any background model.
+
+    Compare given submatrices of the full Hi-C matrices in sampleID2hic.
+    All submatrices (specified in pairs) have to be of the same size.
+    The assignment of reference and query samples is arbitrary in this case,
+    but has to be consistent.
+    :param edges: edges dictionary
+    :param regions: regions intervaltree
+
+    :param worker_ID: String or int used as ID for the process in which this
+                      function is run.
+    :param pairs: Dictionary indicating the submatrix pairs that
+                  will be compared.
+                  Must have form:
+                  {
+                    pair_id: (reference_region, query_region)
+                  },
+                  where reference_region and query_region
+                  are :class:`~GenomicRegion` objects.
+    :param min_bins: Minimum size of matrix to be considered for comparison,
+                     defaults to 20
+    :param keep_unmappable_bins: If True, disables removal of unmappable bins
+                                 from matrices prior to comparison,
+                                 defaults to False
+    :param absolute_windowsize: Absolute window size used in the
+                                structural similarity function. Overwrites
+                                relative_windowsize, defaults to None
+    :param relative_windowsize: Window size relative to the size
+                                of the compared matrices,
+                                used in the structural similarity function,
+                                defaults to 1.
+    :param mappability_cutoff: Maximum fraction of unmappable bins allowed
+                               in a matrix to be considered for comparison,
+                               defaults to 0.1
+    :returns: results dictionary of form:
+              {
+                pair_id: (similarity_score, signal_to_noise_value)
+              }
+    """
+
+    results = {}  # store the ssim in here
+    no_pass = set()
+
+    for pair in pairs:
+        pair_ix, ssim, sn = compare_pair(pair,
+                                         reference_edges, reference_regions,
+                                         query_edges, query_regions,
+                                         min_bins=min_bins,
+                                         keep_unmappable_bins=keep_unmappable_bins,
+                                         absolute_window_size=absolute_window_size,
+                                         relative_window_size=relative_window_size,
+                                         mappability_cutoff=mappability_cutoff,
+                                         default_value=default_value)
+        if np.isnan(ssim):
+            no_pass.add(pair_ix)
+            results[pair_ix] = (np.nan, np.nan)
+        else:
+            results[pair_ix] = (ssim, sn)
+
+    logger.debug('[WORKER #{}]: Done!'.format(worker_ID))
+
+    return results
diff --git a/ComponentAnalysis/sim_foldchange.py b/ComponentAnalysis/sim_foldchange.py
new file mode 100644
index 0000000..4a7e88a
--- /dev/null
+++ b/ComponentAnalysis/sim_foldchange.py
@@ -0,0 +1,414 @@
+#!/usr/bin/env python
+
+import logging
+import numpy as np
+
+from skimage.transform import resize
+from skimage.metrics import structural_similarity
+from numpy.lib.stride_tricks import as_strided
+from .helpers import sub_matrix_from_edges_dict
+import uuid
+
+logger = logging.getLogger(__name__)
+
+
+def SN(M1, M2, size=7):
+    """Compute average of local signal to noise ratios in M1 - M2.
+
+    Slide a window of dimensions size^2 over M1 - M2, compute
+    abs(mean(window)) / var(window) in each and take mean of all windows.
+    :param M1: Numpy Matrix.
+    :param M2: Numpy Matrix.
+    :param size: edge length of sliding window (must be odd), defaults to 7
+    :returns: float, the average of the local signal to noise ratios.
+    """
+    M = M1 - M2
+    uM = filter_uniform_filter(M, size=size)
+    vM = filter_uniform_filter(M, size=size, func=np.nanvar)
+    SN = np.nanmean(np.abs(uM))
+
+    return float(SN)
+
+
+def filter_uniform_filter(M, size=7, no_data_val=None,
+                          func=np.nanmean):
+    """Apply func to sliding window on input matrix.
+
+    Adaption from:
+    www.programcreek.com/python/example/93943/scipy.ndimage.uniform_filter
+    This is equivalent to scipy.ndimage.uniform_filter, but can handle nan's,
+    and can use numpy nanmean/median/max/min functions.
+    :param M: input data, numpy matrix.
+    :param size: edge length of sliding window (must be odd), defaults to 7
+    :type size: number, optional
+    :param no_data_val: value in matrix that is treated as no data value,
+                        defaults to None
+    :param func: [description], defaults to np.nanmean
+    :returns: func of the matrix A filtered by a uniform kernel of size=size
+    """
+
+    assert size % 2 == 1, 'Please supply an odd size'
+    assert M.dtype == np.dtype('float64'), 'Input must have dtype(float64)'
+    rows, cols = M.shape
+
+    padded_A = np.empty(shape=(rows + size-1,
+                               cols + size-1),
+                        dtype=M.dtype)
+    padded_A[:] = np.nan
+    rows_pad, cols_pad = padded_A.shape
+
+    if no_data_val:
+        mask = M == no_data_val
+        M[mask] = np.nan
+
+    upleft_data_start = int((size - 1) / 2)
+    rl, cl = rows_pad - upleft_data_start, cols_pad - upleft_data_start
+    padded_A[upleft_data_start: rl, upleft_data_start: cl] = M.copy()
+
+    n, m = M.shape
+    strided_data = as_strided(padded_A, (n, m, size, size),
+                              padded_A.strides+padded_A.strides)
+    strided_data = strided_data.copy().reshape((n, m, size**2))
+
+    return func(strided_data, axis=2)
+
+
+def find_masked_rows(m, masking_value=0):
+    """Find rows in m filled with masking value.
+
+    :param m: Numpy matrix to be searched for masked rows.
+    :param masking_value: Value that corresponds to missing contacts,
+                          defaults to 0
+    :returns: Indices of masked rows.
+    """
+    s = np.sum(m, 0)
+    n_bins = m.shape[0]
+    cutoff = n_bins * masking_value
+    idxs = np.where(s == cutoff)[0]
+
+    return idxs
+
+
+def remove_rows(m, target_rows):
+    """Remove target bins from m.
+
+    Remove rows and columns with indices indicated in target rows form m.
+    :param m: Numpy matrix.
+    :param target_rows: List of indices of bins to be removed from m.
+    :returns: Truncated numpy matrix.
+    """
+    idxs = target_rows
+    m_removed = np.delete(m, idxs, 0)
+    m_removed = np.delete(m_removed, idxs, 1)
+
+    return m_removed
+
+
+def comparison_worker(input_queue, output_queue,
+                      reference_edges, reference_regions,
+                      query_edges, query_regions,
+                      min_bins=20,
+                      keep_unmappable_bins=False,
+                      absolute_window_size=None,
+                      relative_window_size=1.,
+                      mappability_cutoff=0.1,
+                      default_value=1):
+    worker_id = uuid.uuid4()
+    try:
+        while True:
+            data = input_queue.get(block=True)
+            if data is None:
+                break
+            pair, compute_sn = data
+
+            pair_ix, ssim, sn = compare_pair(pair,
+                                             reference_edges, reference_regions,
+                                             query_edges, query_regions,
+                                             min_bins=min_bins,
+                                             keep_unmappable_bins=keep_unmappable_bins,
+                                             absolute_window_size=absolute_window_size,
+                                             relative_window_size=relative_window_size,
+                                             mappability_cutoff=mappability_cutoff,
+                                             default_value=default_value,
+                                             compute_sn=compute_sn)
+            output_queue.put((pair_ix, ssim, sn))
+    except Exception as e:
+        logger.error("Worker {} encountered a problem: {}".format(worker_id, e))
+        output_queue.put(e)
+
+    logger.info("Worker {} exited gracefully".format(worker_id))
+
+
+def chunk_comparison_worker(input_queue, output_queue,
+                            reference_edges, reference_regions,
+                            query_edges, query_regions,
+                            min_bins=20,
+                            keep_unmappable_bins=False,
+                            absolute_window_size=None,
+                            relative_window_size=1.,
+                            mappability_cutoff=0.1,
+                            default_value=1):
+    worker_id = uuid.uuid4()
+    previous_reference = [None, (None, None)]
+    previous_query = [None, (None, None)]
+    try:
+        while True:
+            data = input_queue.get(block=True)
+            if data is None:
+                break
+            chunk, compute_sn = data
+
+            results = []
+            for pair in chunk:
+                pair_ix, reference_region, query_region = pair
+                try:
+                    if previous_reference[0] is not None and reference_region == previous_reference[0]:
+                        reference, ref_rs = previous_reference[1]
+                    else:
+                        reference, ref_rs = sub_matrix_from_edges_dict(reference_edges,
+                                                                       reference_regions,
+                                                                       reference_region,
+                                                                       default_weight=default_value)
+                        previous_reference = [reference_region, (reference, ref_rs)]
+
+                    if previous_query[0] is not None and query_region == previous_query[0]:
+                        query, qry_rs = previous_query[1]
+                    else:
+                        query, qry_rs = sub_matrix_from_edges_dict(query_edges,
+                                                                   query_regions,
+                                                                   query_region,
+                                                                   default_weight=default_value)
+                        previous_query = [query_region, (query, qry_rs)]
+                except ValueError:
+                    results.append([pair_ix, np.nan, np.nan])
+                    continue
+
+                pair_ix, ssim, sn = compare_matrix_pair(reference, reference_region,
+                                                        query, query_region,
+                                                        pair_ix=pair_ix,
+                                                        min_bins=min_bins,
+                                                        keep_unmappable_bins=keep_unmappable_bins,
+                                                        absolute_window_size=absolute_window_size,
+                                                        relative_window_size=relative_window_size,
+                                                        mappability_cutoff=mappability_cutoff,
+                                                        default_value=default_value,
+                                                        compute_sn=compute_sn)
+                results.append([pair_ix, ssim, sn])
+            output_queue.put(results)
+    except Exception as e:
+        logger.error("Worker {} encountered a problem: {}".format(worker_id, e))
+        output_queue.put(e)
+
+    logger.debug("Worker {} exited gracefully".format(worker_id))
+
+
+def compare_pair(pair, reference_edges, reference_regions,
+                 query_edges, query_regions,
+                 min_bins=20,
+                 keep_unmappable_bins=False,
+                 absolute_window_size=None,
+                 relative_window_size=1.,
+                 mappability_cutoff=0.1,
+                 default_value=1,
+                 compute_sn=True
+                 ):
+    """Run comparison of given Hi-C matrices without any background model.
+
+    Compare given submatrices of the full Hi-C matrices in sampleID2hic.
+    All submatrices (specified in pairs) have to be of the same size.
+    The assignment of reference and query samples is arbitrary in this case,
+    but has to be consistent.
+    :param edges: edges dictionary
+    :param regions: regions intervaltree
+
+    :param worker_ID: String or int used as ID for the process in which this
+                      function is run.
+    :param pairs: Dictionary indicating the submatrix pairs that
+                  will be compared.
+                  Must have form:
+                  {
+                    pair_id: (reference_region, query_region)
+                  },
+                  where reference_region and query_region
+                  are :class:`~GenomicRegion` objects.
+    :param min_bins: Minimum size of matrix to be considered for comparison,
+                     defaults to 20
+    :param keep_unmappable_bins: If True, disables removal of unmappable bins
+                                 from matrices prior to comparison,
+                                 defaults to False
+    :param absolute_windowsize: Absolute window size used in the
+                                structural similarity function. Overwrites
+                                relative_windowsize, defaults to None
+    :param relative_windowsize: Window size relative to the size
+                                of the compared matrices,
+                                used in the structural similarity function,
+                                defaults to 1.
+    :param mappability_cutoff: Maximum fraction of unmappable bins allowed
+                               in a matrix to be considered for comparison,
+                               defaults to 0.1
+
+    :returns: results dictionary of form:
+              {
+                pair_id: (similarity_score, signal_to_noise_value)
+              }
+    """
+    pair_ix, reference_region, query_region = pair
+    try:
+        reference, ref_rs = sub_matrix_from_edges_dict(reference_edges, reference_regions,
+                                                       reference_region, default_weight=default_value)
+        query, qry_rs = sub_matrix_from_edges_dict(query_edges, query_regions,
+                                                   query_region, default_weight=default_value)
+    except ValueError:
+        return pair_ix, np.nan, np.nan
+
+    return compare_matrix_pair(reference, reference_region,
+                               query, query_region,
+                               pair_ix=pair_ix,
+                               min_bins=min_bins,
+                               keep_unmappable_bins=keep_unmappable_bins,
+                               absolute_window_size=absolute_window_size,
+                               relative_window_size=relative_window_size,
+                               mappability_cutoff=mappability_cutoff,
+                               default_value=default_value,
+                               compute_sn=compute_sn)
+
+
+def compare_matrix_pair(reference, reference_region,
+                        query, query_region, pair_ix=None,
+                        min_bins=20,
+                        keep_unmappable_bins=False,
+                        absolute_window_size=None,
+                        relative_window_size=1.,
+                        mappability_cutoff=0.1,
+                        default_value=1,
+                        compute_sn=True):
+    # check size criteria
+    if query.shape[0] < min_bins or reference.shape[0] < min_bins:
+        return pair_ix, np.nan, np.nan
+
+    # resize matrices if needed
+    size_factor = len(reference) / len(query)
+    if size_factor < 1:
+        reference = resize(reference, np.shape(query), order=0,
+                           mode='reflect').astype(np.float64)
+    elif size_factor > 1:
+        query = resize(query, np.shape(reference), order=0,
+                       mode='reflect').astype(np.float64)
+
+    # check if either matrix needs to be flipped
+    if reference_region.strand != query_region.strand:
+        query = np.fliplr(np.flipud(query))
+
+    # check content in unmappable bins in input matrices
+    idxs = []
+    init_filter_pass = True
+    if mappability_cutoff > 0:
+        ms = [reference, query]
+        for m in ms:
+            m = np.array(m)
+            idx = find_masked_rows(m, masking_value=default_value)
+            idxs.append(idx)
+            if len(idx) / np.shape(m)[0] > mappability_cutoff:
+                init_filter_pass = False
+    if not init_filter_pass:
+        return pair_ix, np.nan, np.nan
+
+    # remove masked bins
+    if not keep_unmappable_bins:
+        target_rows = np.union1d(*idxs)
+        reference = remove_rows(reference, target_rows)
+        query = remove_rows(query, target_rows)
+
+    # compare!
+    if absolute_window_size:
+        window_size = absolute_window_size
+    else:
+        window_size = int(np.shape(reference)[0] * relative_window_size)
+    if window_size % 2 == 0:
+        window_size -= 1
+
+    # prevent from going lower than three.
+    # breaks at ws = 1 because of 0 division, and 2 is not odd.
+    window_size = max(window_size, 3)
+
+    # actual comparison
+    curr_ssim = structural_similarity(reference, query, win_size=window_size)
+    if compute_sn:
+        curr_sn = SN(reference, query)
+    else:
+        curr_sn = None
+
+    return pair_ix, curr_ssim, curr_sn
+
+
+def compare_structures(reference_edges, reference_regions,
+                       query_edges, query_regions,
+                       pairs, worker_ID,
+                       min_bins=20,
+                       keep_unmappable_bins=False,
+                       absolute_window_size=None,
+                       relative_window_size=1.,
+                       mappability_cutoff=0.1,
+                       default_value=1):
+    """Run comparison of given Hi-C matrices without any background model.
+
+    Compare given submatrices of the full Hi-C matrices in sampleID2hic.
+    All submatrices (specified in pairs) have to be of the same size.
+    The assignment of reference and query samples is arbitrary in this case,
+    but has to be consistent.
+    :param edges: edges dictionary
+    :param regions: regions intervaltree
+
+    :param worker_ID: String or int used as ID for the process in which this
+                      function is run.
+    :param pairs: Dictionary indicating the submatrix pairs that
+                  will be compared.
+                  Must have form:
+                  {
+                    pair_id: (reference_region, query_region)
+                  },
+                  where reference_region and query_region
+                  are :class:`~GenomicRegion` objects.
+    :param min_bins: Minimum size of matrix to be considered for comparison,
+                     defaults to 20
+    :param keep_unmappable_bins: If True, disables removal of unmappable bins
+                                 from matrices prior to comparison,
+                                 defaults to False
+    :param absolute_windowsize: Absolute window size used in the
+                                structural similarity function. Overwrites
+                                relative_windowsize, defaults to None
+    :param relative_windowsize: Window size relative to the size
+                                of the compared matrices,
+                                used in the structural similarity function,
+                                defaults to 1.
+    :param mappability_cutoff: Maximum fraction of unmappable bins allowed
+                               in a matrix to be considered for comparison,
+                               defaults to 0.1
+    :returns: results dictionary of form:
+              {
+                pair_id: (similarity_score, signal_to_noise_value)
+              }
+    """
+
+    results = {}  # store the ssim in here
+    no_pass = set()
+
+    for pair in pairs:
+        pair_ix, ssim, sn = compare_pair(pair,
+                                         reference_edges, reference_regions,
+                                         query_edges, query_regions,
+                                         min_bins=min_bins,
+                                         keep_unmappable_bins=keep_unmappable_bins,
+                                         absolute_window_size=absolute_window_size,
+                                         relative_window_size=relative_window_size,
+                                         mappability_cutoff=mappability_cutoff,
+                                         default_value=default_value)
+        if np.isnan(ssim):
+            no_pass.add(pair_ix)
+            results[pair_ix] = (np.nan, np.nan)
+        else:
+            results[pair_ix] = (ssim, sn)
+
+    logger.debug('[WORKER #{}]: Done!'.format(worker_ID))
+
+    return results
diff --git a/ComponentAnalysis/sim_variance.py b/ComponentAnalysis/sim_variance.py
new file mode 100644
index 0000000..2483fdf
--- /dev/null
+++ b/ComponentAnalysis/sim_variance.py
@@ -0,0 +1,414 @@
+#!/usr/bin/env python
+
+import logging
+import numpy as np
+
+from skimage.transform import resize
+from skimage.metrics import structural_similarity
+from numpy.lib.stride_tricks import as_strided
+from .helpers import sub_matrix_from_edges_dict
+import uuid
+
+logger = logging.getLogger(__name__)
+
+
+def SN(M1, M2, size=7):
+    """Compute average of local signal to noise ratios in M1 - M2.
+
+    Slide a window of dimensions size^2 over M1 - M2, compute
+    abs(mean(window)) / var(window) in each and take mean of all windows.
+    :param M1: Numpy Matrix.
+    :param M2: Numpy Matrix.
+    :param size: edge length of sliding window (must be odd), defaults to 7
+    :returns: float, the average of the local signal to noise ratios.
+    """
+    M = M1 - M2
+    uM = filter_uniform_filter(M, size=size)
+    vM = filter_uniform_filter(M, size=size, func=np.nanvar)
+    SN = np.nanmean(vM)
+
+    return float(SN)
+
+
+def filter_uniform_filter(M, size=7, no_data_val=None,
+                          func=np.nanmean):
+    """Apply func to sliding window on input matrix.
+
+    Adaption from:
+    www.programcreek.com/python/example/93943/scipy.ndimage.uniform_filter
+    This is equivalent to scipy.ndimage.uniform_filter, but can handle nan's,
+    and can use numpy nanmean/median/max/min functions.
+    :param M: input data, numpy matrix.
+    :param size: edge length of sliding window (must be odd), defaults to 7
+    :type size: number, optional
+    :param no_data_val: value in matrix that is treated as no data value,
+                        defaults to None
+    :param func: [description], defaults to np.nanmean
+    :returns: func of the matrix A filtered by a uniform kernel of size=size
+    """
+
+    assert size % 2 == 1, 'Please supply an odd size'
+    assert M.dtype == np.dtype('float64'), 'Input must have dtype(float64)'
+    rows, cols = M.shape
+
+    padded_A = np.empty(shape=(rows + size-1,
+                               cols + size-1),
+                        dtype=M.dtype)
+    padded_A[:] = np.nan
+    rows_pad, cols_pad = padded_A.shape
+
+    if no_data_val:
+        mask = M == no_data_val
+        M[mask] = np.nan
+
+    upleft_data_start = int((size - 1) / 2)
+    rl, cl = rows_pad - upleft_data_start, cols_pad - upleft_data_start
+    padded_A[upleft_data_start: rl, upleft_data_start: cl] = M.copy()
+
+    n, m = M.shape
+    strided_data = as_strided(padded_A, (n, m, size, size),
+                              padded_A.strides+padded_A.strides)
+    strided_data = strided_data.copy().reshape((n, m, size**2))
+
+    return func(strided_data, axis=2)
+
+
+def find_masked_rows(m, masking_value=0):
+    """Find rows in m filled with masking value.
+
+    :param m: Numpy matrix to be searched for masked rows.
+    :param masking_value: Value that corresponds to missing contacts,
+                          defaults to 0
+    :returns: Indices of masked rows.
+    """
+    s = np.sum(m, 0)
+    n_bins = m.shape[0]
+    cutoff = n_bins * masking_value
+    idxs = np.where(s == cutoff)[0]
+
+    return idxs
+
+
+def remove_rows(m, target_rows):
+    """Remove target bins from m.
+
+    Remove rows and columns with indices indicated in target rows form m.
+    :param m: Numpy matrix.
+    :param target_rows: List of indices of bins to be removed from m.
+    :returns: Truncated numpy matrix.
+    """
+    idxs = target_rows
+    m_removed = np.delete(m, idxs, 0)
+    m_removed = np.delete(m_removed, idxs, 1)
+
+    return m_removed
+
+
+def comparison_worker(input_queue, output_queue,
+                      reference_edges, reference_regions,
+                      query_edges, query_regions,
+                      min_bins=20,
+                      keep_unmappable_bins=False,
+                      absolute_window_size=None,
+                      relative_window_size=1.,
+                      mappability_cutoff=0.1,
+                      default_value=1):
+    worker_id = uuid.uuid4()
+    try:
+        while True:
+            data = input_queue.get(block=True)
+            if data is None:
+                break
+            pair, compute_sn = data
+
+            pair_ix, ssim, sn = compare_pair(pair,
+                                             reference_edges, reference_regions,
+                                             query_edges, query_regions,
+                                             min_bins=min_bins,
+                                             keep_unmappable_bins=keep_unmappable_bins,
+                                             absolute_window_size=absolute_window_size,
+                                             relative_window_size=relative_window_size,
+                                             mappability_cutoff=mappability_cutoff,
+                                             default_value=default_value,
+                                             compute_sn=compute_sn)
+            output_queue.put((pair_ix, ssim, sn))
+    except Exception as e:
+        logger.error("Worker {} encountered a problem: {}".format(worker_id, e))
+        output_queue.put(e)
+
+    logger.info("Worker {} exited gracefully".format(worker_id))
+
+
+def chunk_comparison_worker(input_queue, output_queue,
+                            reference_edges, reference_regions,
+                            query_edges, query_regions,
+                            min_bins=20,
+                            keep_unmappable_bins=False,
+                            absolute_window_size=None,
+                            relative_window_size=1.,
+                            mappability_cutoff=0.1,
+                            default_value=1):
+    worker_id = uuid.uuid4()
+    previous_reference = [None, (None, None)]
+    previous_query = [None, (None, None)]
+    try:
+        while True:
+            data = input_queue.get(block=True)
+            if data is None:
+                break
+            chunk, compute_sn = data
+
+            results = []
+            for pair in chunk:
+                pair_ix, reference_region, query_region = pair
+                try:
+                    if previous_reference[0] is not None and reference_region == previous_reference[0]:
+                        reference, ref_rs = previous_reference[1]
+                    else:
+                        reference, ref_rs = sub_matrix_from_edges_dict(reference_edges,
+                                                                       reference_regions,
+                                                                       reference_region,
+                                                                       default_weight=default_value)
+                        previous_reference = [reference_region, (reference, ref_rs)]
+
+                    if previous_query[0] is not None and query_region == previous_query[0]:
+                        query, qry_rs = previous_query[1]
+                    else:
+                        query, qry_rs = sub_matrix_from_edges_dict(query_edges,
+                                                                   query_regions,
+                                                                   query_region,
+                                                                   default_weight=default_value)
+                        previous_query = [query_region, (query, qry_rs)]
+                except ValueError:
+                    results.append([pair_ix, np.nan, np.nan])
+                    continue
+
+                pair_ix, ssim, sn = compare_matrix_pair(reference, reference_region,
+                                                        query, query_region,
+                                                        pair_ix=pair_ix,
+                                                        min_bins=min_bins,
+                                                        keep_unmappable_bins=keep_unmappable_bins,
+                                                        absolute_window_size=absolute_window_size,
+                                                        relative_window_size=relative_window_size,
+                                                        mappability_cutoff=mappability_cutoff,
+                                                        default_value=default_value,
+                                                        compute_sn=compute_sn)
+                results.append([pair_ix, ssim, sn])
+            output_queue.put(results)
+    except Exception as e:
+        logger.error("Worker {} encountered a problem: {}".format(worker_id, e))
+        output_queue.put(e)
+
+    logger.debug("Worker {} exited gracefully".format(worker_id))
+
+
+def compare_pair(pair, reference_edges, reference_regions,
+                 query_edges, query_regions,
+                 min_bins=20,
+                 keep_unmappable_bins=False,
+                 absolute_window_size=None,
+                 relative_window_size=1.,
+                 mappability_cutoff=0.1,
+                 default_value=1,
+                 compute_sn=True
+                 ):
+    """Run comparison of given Hi-C matrices without any background model.
+
+    Compare given submatrices of the full Hi-C matrices in sampleID2hic.
+    All submatrices (specified in pairs) have to be of the same size.
+    The assignment of reference and query samples is arbitrary in this case,
+    but has to be consistent.
+    :param edges: edges dictionary
+    :param regions: regions intervaltree
+
+    :param worker_ID: String or int used as ID for the process in which this
+                      function is run.
+    :param pairs: Dictionary indicating the submatrix pairs that
+                  will be compared.
+                  Must have form:
+                  {
+                    pair_id: (reference_region, query_region)
+                  },
+                  where reference_region and query_region
+                  are :class:`~GenomicRegion` objects.
+    :param min_bins: Minimum size of matrix to be considered for comparison,
+                     defaults to 20
+    :param keep_unmappable_bins: If True, disables removal of unmappable bins
+                                 from matrices prior to comparison,
+                                 defaults to False
+    :param absolute_windowsize: Absolute window size used in the
+                                structural similarity function. Overwrites
+                                relative_windowsize, defaults to None
+    :param relative_windowsize: Window size relative to the size
+                                of the compared matrices,
+                                used in the structural similarity function,
+                                defaults to 1.
+    :param mappability_cutoff: Maximum fraction of unmappable bins allowed
+                               in a matrix to be considered for comparison,
+                               defaults to 0.1
+
+    :returns: results dictionary of form:
+              {
+                pair_id: (similarity_score, signal_to_noise_value)
+              }
+    """
+    pair_ix, reference_region, query_region = pair
+    try:
+        reference, ref_rs = sub_matrix_from_edges_dict(reference_edges, reference_regions,
+                                                       reference_region, default_weight=default_value)
+        query, qry_rs = sub_matrix_from_edges_dict(query_edges, query_regions,
+                                                   query_region, default_weight=default_value)
+    except ValueError:
+        return pair_ix, np.nan, np.nan
+
+    return compare_matrix_pair(reference, reference_region,
+                               query, query_region,
+                               pair_ix=pair_ix,
+                               min_bins=min_bins,
+                               keep_unmappable_bins=keep_unmappable_bins,
+                               absolute_window_size=absolute_window_size,
+                               relative_window_size=relative_window_size,
+                               mappability_cutoff=mappability_cutoff,
+                               default_value=default_value,
+                               compute_sn=compute_sn)
+
+
+def compare_matrix_pair(reference, reference_region,
+                        query, query_region, pair_ix=None,
+                        min_bins=20,
+                        keep_unmappable_bins=False,
+                        absolute_window_size=None,
+                        relative_window_size=1.,
+                        mappability_cutoff=0.1,
+                        default_value=1,
+                        compute_sn=True):
+    # check size criteria
+    if query.shape[0] < min_bins or reference.shape[0] < min_bins:
+        return pair_ix, np.nan, np.nan
+
+    # resize matrices if needed
+    size_factor = len(reference) / len(query)
+    if size_factor < 1:
+        reference = resize(reference, np.shape(query), order=0,
+                           mode='reflect').astype(np.float64)
+    elif size_factor > 1:
+        query = resize(query, np.shape(reference), order=0,
+                       mode='reflect').astype(np.float64)
+
+    # check if either matrix needs to be flipped
+    if reference_region.strand != query_region.strand:
+        query = np.fliplr(np.flipud(query))
+
+    # check content in unmappable bins in input matrices
+    idxs = []
+    init_filter_pass = True
+    if mappability_cutoff > 0:
+        ms = [reference, query]
+        for m in ms:
+            m = np.array(m)
+            idx = find_masked_rows(m, masking_value=default_value)
+            idxs.append(idx)
+            if len(idx) / np.shape(m)[0] > mappability_cutoff:
+                init_filter_pass = False
+    if not init_filter_pass:
+        return pair_ix, np.nan, np.nan
+
+    # remove masked bins
+    if not keep_unmappable_bins:
+        target_rows = np.union1d(*idxs)
+        reference = remove_rows(reference, target_rows)
+        query = remove_rows(query, target_rows)
+
+    # compare!
+    if absolute_window_size:
+        window_size = absolute_window_size
+    else:
+        window_size = int(np.shape(reference)[0] * relative_window_size)
+    if window_size % 2 == 0:
+        window_size -= 1
+
+    # prevent from going lower than three.
+    # breaks at ws = 1 because of 0 division, and 2 is not odd.
+    window_size = max(window_size, 3)
+
+    # actual comparison
+    curr_ssim = structural_similarity(reference, query, win_size=window_size)
+    if compute_sn:
+        curr_sn = SN(reference, query)
+    else:
+        curr_sn = None
+
+    return pair_ix, curr_ssim, curr_sn
+
+
+def compare_structures(reference_edges, reference_regions,
+                       query_edges, query_regions,
+                       pairs, worker_ID,
+                       min_bins=20,
+                       keep_unmappable_bins=False,
+                       absolute_window_size=None,
+                       relative_window_size=1.,
+                       mappability_cutoff=0.1,
+                       default_value=1):
+    """Run comparison of given Hi-C matrices without any background model.
+
+    Compare given submatrices of the full Hi-C matrices in sampleID2hic.
+    All submatrices (specified in pairs) have to be of the same size.
+    The assignment of reference and query samples is arbitrary in this case,
+    but has to be consistent.
+    :param edges: edges dictionary
+    :param regions: regions intervaltree
+
+    :param worker_ID: String or int used as ID for the process in which this
+                      function is run.
+    :param pairs: Dictionary indicating the submatrix pairs that
+                  will be compared.
+                  Must have form:
+                  {
+                    pair_id: (reference_region, query_region)
+                  },
+                  where reference_region and query_region
+                  are :class:`~GenomicRegion` objects.
+    :param min_bins: Minimum size of matrix to be considered for comparison,
+                     defaults to 20
+    :param keep_unmappable_bins: If True, disables removal of unmappable bins
+                                 from matrices prior to comparison,
+                                 defaults to False
+    :param absolute_windowsize: Absolute window size used in the
+                                structural similarity function. Overwrites
+                                relative_windowsize, defaults to None
+    :param relative_windowsize: Window size relative to the size
+                                of the compared matrices,
+                                used in the structural similarity function,
+                                defaults to 1.
+    :param mappability_cutoff: Maximum fraction of unmappable bins allowed
+                               in a matrix to be considered for comparison,
+                               defaults to 0.1
+    :returns: results dictionary of form:
+              {
+                pair_id: (similarity_score, signal_to_noise_value)
+              }
+    """
+
+    results = {}  # store the ssim in here
+    no_pass = set()
+
+    for pair in pairs:
+        pair_ix, ssim, sn = compare_pair(pair,
+                                         reference_edges, reference_regions,
+                                         query_edges, query_regions,
+                                         min_bins=min_bins,
+                                         keep_unmappable_bins=keep_unmappable_bins,
+                                         absolute_window_size=absolute_window_size,
+                                         relative_window_size=relative_window_size,
+                                         mappability_cutoff=mappability_cutoff,
+                                         default_value=default_value)
+        if np.isnan(ssim):
+            no_pass.add(pair_ix)
+            results[pair_ix] = (np.nan, np.nan)
+        else:
+            results[pair_ix] = (ssim, sn)
+
+    logger.debug('[WORKER #{}]: Done!'.format(worker_ID))
+
+    return results