Skip to content

Commit

Permalink
conditional usage of correlation computation based on presence of NaN…
Browse files Browse the repository at this point in the history
…s (1.3.6)
  • Loading branch information
Andrew Schonfeld committed Nov 8, 2019
1 parent 9a5a7a0 commit 2871b9b
Show file tree
Hide file tree
Showing 11 changed files with 134 additions and 53 deletions.
6 changes: 6 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,9 @@ Changelog
* Bug fixes for:
* duplicate loading of histogram data
* string serialization failing when mixing `future.str` & `str` in scatter function

### 1.3.6 (2019-11-08)

* Bug fixes for:
* choose between `pandas.corr` & `numpy.corrcoef` depending on presence of NaNs
* hide timeseries correlations when date columns only contain one day
4 changes: 2 additions & 2 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,9 @@
# built documents.
#
# The short X.Y version.
version = u'1.3.5'
version = u'1.3.6'
# The full version, including alpha/beta/rc tags.
release = u'1.3.5'
release = u'1.3.6'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
28 changes: 20 additions & 8 deletions dtale/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@

from dtale import dtale
from dtale.cli.clickutils import retrieve_meta_info_and_version
from dtale.utils import (build_shutdown_url, build_url, dict_merge,
filter_df_for_grid, find_dtype_formatter,
from dtale.utils import (build_shutdown_url, build_url, classify_type,
dict_merge, filter_df_for_grid, find_dtype_formatter,
find_selected_column, get_dtypes, get_int_arg,
get_str_arg, grid_columns, grid_formatter, json_date,
json_float, json_int, json_timestamp, jsonify,
Expand Down Expand Up @@ -469,17 +469,29 @@ def get_correlations():
data = DATA[port]
data = data.query(query) if query is not None else data

# using pandas.corr proved to be quite slow on large datasets so I moved to numpy:
# https://stackoverflow.com/questions/48270953/pandas-corr-and-corrwith-very-slow
valid_corr_cols = [c['name'] for c in DTYPES[port] if any((c['dtype'].startswith(s) for s in ['int', 'float']))]
data = np.corrcoef(data[valid_corr_cols].values, rowvar=False)
data = pd.DataFrame(data, columns=valid_corr_cols, index=valid_corr_cols)
valid_corr_cols = []
valid_date_cols = []
for col_info in DTYPES[port]:
name, dtype = map(col_info.get, ['name', 'dtype'])
dtype = classify_type(dtype)
if dtype in ['I', 'F']:
valid_corr_cols.append(name)
elif dtype == 'D' and len(data[name].dropna().unique()) > 1:
valid_date_cols.append(name)

if data[valid_corr_cols].isnull().values.any():
data = data.corr(method='pearson')
else:
# using pandas.corr proved to be quite slow on large datasets so I moved to numpy:
# https://stackoverflow.com/questions/48270953/pandas-corr-and-corrwith-very-slow
data = np.corrcoef(data[valid_corr_cols].values, rowvar=False)
data = pd.DataFrame(data, columns=valid_corr_cols, index=valid_corr_cols)

data.index.name = str('column')
data = data.reset_index()
col_types = grid_columns(data)
f = grid_formatter(col_types, nan_display=None)
return jsonify(data=f.format_dicts(data.itertuples()))
return jsonify(data=f.format_dicts(data.itertuples()), dates=valid_date_cols)
except BaseException as e:
return jsonify(dict(error=str(e), traceback=str(traceback.format_exc())))

Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "dtale",
"version": "1.3.5",
"version": "1.3.6",
"description": "All-purpose Data Viewer",
"main": "main.js",
"directories": {
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def run_tests(self):

setup(
name="dtale",
version="1.3.5",
version="1.3.6",
author="MAN Alpha Technology",
author_email="[email protected]",
description="Web Client for Visualizing Pandas Objects",
Expand Down
3 changes: 2 additions & 1 deletion static/__tests__/data/correlations.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,6 @@
"col4": 1.0,
"column": "col4"
}
]
],
"dates": ["col4", "col5"]
}
64 changes: 55 additions & 9 deletions static/__tests__/popups/Correlations-test.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import _ from "lodash";
import React from "react";

import mockPopsicle from "../MockPopsicle";
import correlationsData from "../data/correlations";
import * as t from "../jest-assertions";
import { buildInnerHTML, withGlobalJquery } from "../test-utils";

Expand All @@ -13,14 +14,6 @@ const chartData = {
type: "correlations",
title: "Correlations Test",
query: "col == 3",
columns: [
{ name: "dtale_index", dtype: "int64" },
{ name: "col1", dtype: "int64" },
{ name: "col2", dtype: "float64" },
{ name: "col3", dtype: "object" },
{ name: "col4", dtype: "datetime64[ns]" },
{ name: "col5", dtype: "datetime64[ns]" },
],
};

const originalOffsetHeight = Object.getOwnPropertyDescriptor(HTMLElement.prototype, "offsetHeight");
Expand All @@ -35,9 +28,15 @@ describe("Correlations tests", () => {
mockPopsicle.mock(url => {
if (url.startsWith("/dtale/correlations")) {
const query = qs.parse(url.split("?")[1]).query;
if (query == "null") {
if (url.startsWith("/dtale/correlations?") && query == "null") {
return { error: "No data found." };
}
if (url.startsWith("/dtale/correlations?") && query == "one-date") {
return { data: correlationsData.data, dates: ["col4"] };
}
if (url.startsWith("/dtale/correlations?") && query == "no-date") {
return { data: correlationsData.data, dates: [] };
}
}
const { urlFetcher } = require("../redux-test-utils").default;
return urlFetcher(url);
Expand Down Expand Up @@ -100,6 +99,53 @@ describe("Correlations tests", () => {
}, 200);
});

test("Correlations rendering data w/ one date column", done => {
const Correlations = require("../../popups/Correlations").ReactCorrelations;
const TimeseriesChartBody = require("../../popups/TimeseriesChartBody").TimeseriesChartBody;
buildInnerHTML("");
const result = mount(<Correlations chartData={_.assign({}, chartData, { query: "one-date" })} />, {
attachTo: document.getElementById("content"),
});
result.update();
setTimeout(() => {
result.update();
const corrGrid = result.first().find("div.ReactVirtualized__Grid__innerScrollContainer");
corrGrid
.find("div.cell")
.at(1)
.simulate("click");
setTimeout(() => {
result.update();
t.equal(result.find(TimeseriesChartBody).length, 1, "should show correlation timeseries");
t.ok(result.find("select.custom-select").length == 0, "should not render date options for timeseries");
t.ok((result.state().selectedDate = "col5"), "should change timeseries date");
done();
}, 200);
}, 200);
});

test("Correlations rendering data w/ no date columns", done => {
const Correlations = require("../../popups/Correlations").ReactCorrelations;
buildInnerHTML("");
const result = mount(<Correlations chartData={_.assign({}, chartData, { query: "no-date" })} />, {
attachTo: document.getElementById("content"),
});
result.update();
setTimeout(() => {
result.update();
const corrGrid = result.first().find("div.ReactVirtualized__Grid__innerScrollContainer");
corrGrid
.find("div.cell")
.at(1)
.simulate("click");
setTimeout(() => {
result.update();
t.equal(result.find("#rawScatterChart").length, 1, "should show scatter chart");
done();
}, 200);
}, 200);
});

test("Correlations missing data", done => {
const Correlations = require("../../popups/Correlations").ReactCorrelations;
const result = mount(<Correlations chartData={_.assign({}, chartData, { query: "null" })} />);
Expand Down
4 changes: 2 additions & 2 deletions static/dtale/DataViewerMenu.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ class ReactDataViewerMenu extends React.Component {
const col = _.head(this.props.selectedCols);
this.props.openChart(_.assignIn({ type: "histogram", col, title: col }, this.props));
};
const openScatter = () => {
const openCorrelations = () => {
this.props.openChart(_.assignIn({ type: "correlations", title: "Correlations" }, this.props));
};
const openCoverage = () => {
Expand Down Expand Up @@ -177,7 +177,7 @@ class ReactDataViewerMenu extends React.Component {
</ConditionalRender>
<li>
<span className="toggler-action">
<button className="btn btn-plain" onClick={openScatter}>
<button className="btn btn-plain" onClick={openCorrelations}>
<i className="ico-bubble-chart" />
<span className="font-weight-bold">Correlations</span>
</button>
Expand Down
19 changes: 16 additions & 3 deletions static/popups/Correlations.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,22 @@ const BASE_SCATTER_URL = "/dtale/scatter?";
const BASE_CORRELATIONS_URL = "/dtale/correlations?";
const BASE_CORRELATIONS_TS_URL = "/dtale/correlations-ts?";

function buildState() {
return {
chart: null,
error: null,
scatterError: null,
correlations: null,
selectedCols: [],
tsUrl: null,
selectedDate: null,
};
}

class ReactCorrelations extends React.Component {
constructor(props) {
super(props);
this.state = corrUtils.buildState(props);
this.state = buildState();
_.forEach(
["buildTs", "buildScatter", "viewScatter", "_cellRenderer", "viewScatterRow", "changeDate"],
f => (this[f] = this[f].bind(this))
Expand Down Expand Up @@ -65,7 +77,8 @@ class ReactCorrelations extends React.Component {
this.setState({ error: <RemovableError {...gridData} /> });
return;
}
this.setState({ correlations: gridData.data });
const { data, dates } = gridData;
this.setState({ correlations: data, dates, hasDate: _.size(dates) > 0, selectedDate: _.get(dates, 0, null) });
});
}

Expand Down Expand Up @@ -151,7 +164,7 @@ class ReactCorrelations extends React.Component {
</div>
);
}
const { correlations, selectedCols, tsUrl, hasDate, selectedDate, dates } = this.state;
const { correlations, selectedCols, tsUrl, selectedDate, hasDate, dates } = this.state;
return (
<div key="body" className="modal-body scatter-body">
<BouncerWrapper showBouncer={_.isEmpty(correlations)}>
Expand Down
20 changes: 0 additions & 20 deletions static/popups/correlations/correlationsUtils.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ function toggleBouncer() {
$("#rawScatterChart").toggle();
}

const findDateCols = columns => _.map(_.filter(columns, ({ dtype }) => dtype.startsWith("datetime")), "name");
const pointFormatter = (xProp, yProp) => point => ({ x: point[xProp], y: point[yProp], index: point.index });
const colorScale = chroma.scale(["red", "yellow", "green"]).domain([-1, 0, 1]);
const percent = num => (num === "N/A" ? num : `${_.round(num * 100, 2)}%`);
Expand Down Expand Up @@ -63,29 +62,10 @@ function createScatter(ctx, chartData, xProp, yProp, label, onClick) {
return chart;
}

function buildState(props) {
const { columns } = props.chartData;
const dates = findDateCols(columns);
const hasDate = _.size(dates) > 0;
const selectedDate = hasDate ? _.head(dates) : null;
return {
chart: null,
error: null,
scatterError: null,
correlations: null,
selectedCols: [],
tsUrl: null,
dates,
hasDate,
selectedDate,
};
}

export default {
toggleBouncer,
colorScale,
createScatter,
buildState,
percent,
pointFormatter,
};
35 changes: 29 additions & 6 deletions tests/dtale/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from builtins import str

import mock
import numpy as np
import pandas as pd
import pandas.util.testing as pdt
import pytest
Expand Down Expand Up @@ -359,16 +360,18 @@ def test_get_correlations(unittest, test_data):
stack.enter_context(mock.patch('dtale.views.DTYPES', {c.port: views.build_dtypes_state(test_data)}))
response = c.get('/dtale/correlations')
response_data = json.loads(response.data)
expected = dict(data=[
dict(column='security_id', security_id=1.0, foo=None, bar=None),
dict(column='foo', security_id=None, foo=None, bar=None),
dict(column='bar', security_id=None, foo=None, bar=None)
])
expected = dict(
data=[
dict(column='security_id', security_id=1.0, foo=None, bar=None),
dict(column='foo', security_id=None, foo=None, bar=None),
dict(column='bar', security_id=None, foo=None, bar=None)
],
dates=[]
)
unittest.assertEqual(response_data, expected, 'should return correlations')

with app.test_client() as c:
with ExitStack() as stack:
test_data, _ = views.format_data(test_data)
stack.enter_context(mock.patch('dtale.views.DATA', {c.port: test_data}))
stack.enter_context(mock.patch('dtale.views.DTYPES', {c.port: views.build_dtypes_state(test_data)}))
response = c.get('/dtale/correlations', query_string=dict(query="missing_col == 'blah'"))
Expand All @@ -377,6 +380,26 @@ def test_get_correlations(unittest, test_data):
response_data['error'], "name 'missing_col' is not defined", 'should handle correlations exception'
)

with app.test_client() as c:
with ExitStack() as stack:
test_data.loc[test_data.security_id == 1, 'bar'] = np.nan
test_data2 = test_data.copy()
test_data2.loc[:, 'date'] = pd.Timestamp('20000102')
test_data = pd.concat([test_data, test_data2], ignore_index=True)
stack.enter_context(mock.patch('dtale.views.DATA', {c.port: test_data}))
stack.enter_context(mock.patch('dtale.views.DTYPES', {c.port: views.build_dtypes_state(test_data)}))
response = c.get('/dtale/correlations')
response_data = json.loads(response.data)
expected = expected = dict(
data=[
dict(column='security_id', security_id=1.0, foo=None, bar=None),
dict(column='foo', security_id=None, foo=None, bar=None),
dict(column='bar', security_id=None, foo=None, bar=None)
],
dates=['date']
)
unittest.assertEqual(response_data, expected, 'should return correlations')


def build_ts_data(size=5, days=5):
start = pd.Timestamp('20000101')
Expand Down

0 comments on commit 2871b9b

Please sign in to comment.