Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PX ECDF and KDE #3011

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions packages/python/plotly/plotly/express/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@
box,
strip,
histogram,
ecdf,
kde,
scatter_matrix,
parallel_coordinates,
parallel_categories,
Expand Down Expand Up @@ -88,6 +90,8 @@
"box",
"strip",
"histogram",
"ecdf",
"kde",
"choropleth",
"choropleth_mapbox",
"pie",
Expand Down
116 changes: 116 additions & 0 deletions packages/python/plotly/plotly/express/_chart_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,122 @@ def histogram(
)


def ecdf(
data_frame=None,
x=None,
y=None,
color=None,
text=None,
line_dash=None,
facet_row=None,
facet_col=None,
facet_col_wrap=0,
facet_row_spacing=None,
facet_col_spacing=None,
hover_name=None,
hover_data=None,
animation_frame=None,
animation_group=None,
category_orders=None,
labels=None,
color_discrete_sequence=None,
color_discrete_map=None,
line_dash_sequence=None,
line_dash_map=None,
marginal=None,
opacity=None,
orientation=None,
line_shape="hv",
norm="probability",
complementary=False,
log_x=False,
log_y=False,
range_x=None,
range_y=None,
title=None,
template=None,
width=None,
height=None,
):
"""
In a Empirical Cumulative Distribution Function (ECDF) plot, rows of `data_frame`
are sorted by the value `x` (or `y` if `orientation` is `'h'`) and their cumulative
count (or the cumulative sum of `y` if supplied and `orientation` is `h`) is drawn
as a line.
"""
return make_figure(args=locals(), constructor=go.Scatter)


ecdf.__doc__ = make_docstring(
ecdf,
append_dict=dict(
x=[
"If `orientation` is `'h'`, the cumulative sum of this argument is plotted rather than the cumulative count."
]
+ _wide_mode_xy_append,
y=[
"If `orientation` is `'v'`, the cumulative sum of this argument is plotted rather than the cumulative count."
]
+ _wide_mode_xy_append,
),
)


def kde(
data_frame=None,
x=None,
y=None,
color=None,
line_dash=None,
facet_row=None,
facet_col=None,
facet_col_wrap=0,
facet_row_spacing=None,
facet_col_spacing=None,
hover_name=None,
hover_data=None,
animation_frame=None,
animation_group=None,
category_orders=None,
labels=None,
color_discrete_sequence=None,
color_discrete_map=None,
line_dash_sequence=None,
line_dash_map=None,
marginal=None,
opacity=None,
orientation=None,
norm=None, # TODO use this
kernel=None, # TODO use this
bw_method=None, # TODO use this
bw_adjust=None, # TODO use this
log_x=False,
log_y=False,
range_x=None,
range_y=None,
title=None,
template=None,
width=None,
height=None,
):
"""
In a Kernel Density Estimation (KDE) plot, rows of `data_frame`
are used as inputs to a KDE smoothing function which is rendered as a line.
"""
return make_figure(args=locals(), constructor=go.Scatter)


kde.__doc__ = make_docstring(
kde,
append_dict=dict(
x=["If `orientation` is `'h'`, this argument is used as KDE weights."]
+ _wide_mode_xy_append,
y=["If `orientation` is `'v'`, this argument is used as KDE weights."]
+ _wide_mode_xy_append,
),
)


def violin(
data_frame=None,
x=None,
Expand Down
69 changes: 57 additions & 12 deletions packages/python/plotly/plotly/express/_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1285,6 +1285,9 @@ def build_dataframe(args, constructor):
wide_cross_name = None # will likely be "index" in wide_mode
value_name = None # will likely be "value" in wide_mode
hist2d_types = [go.Histogram2d, go.Histogram2dContour]
hist1d_orientation = (
constructor == go.Histogram or "complementary" in args or "kernel" in args
)
if constructor in cartesians:
if wide_x and wide_y:
raise ValueError(
Expand Down Expand Up @@ -1319,7 +1322,7 @@ def build_dataframe(args, constructor):
df_provided and var_name in df_input
):
var_name = "variable"
if constructor == go.Histogram:
if hist1d_orientation:
wide_orientation = "v" if wide_x else "h"
else:
wide_orientation = "v" if wide_y else "h"
Expand All @@ -1333,7 +1336,10 @@ def build_dataframe(args, constructor):
var_name = _escape_col_name(df_input, var_name, [])

missing_bar_dim = None
if constructor in [go.Scatter, go.Bar, go.Funnel] + hist2d_types:
if (
constructor in [go.Scatter, go.Bar, go.Funnel] + hist2d_types
and not hist1d_orientation
):
if not wide_mode and (no_x != no_y):
for ax in ["x", "y"]:
if args.get(ax, None) is None:
Expand Down Expand Up @@ -1430,14 +1436,18 @@ def build_dataframe(args, constructor):
df_output[var_name] = df_output[var_name].astype(str)
orient_v = wide_orientation == "v"

if constructor in [go.Scatter, go.Funnel] + hist2d_types:
if hist1d_orientation:
args["x" if orient_v else "y"] = value_name
args["y" if orient_v else "x"] = wide_cross_name
args["color"] = args["color"] or var_name
elif constructor in [go.Scatter, go.Funnel] + hist2d_types:
args["x" if orient_v else "y"] = wide_cross_name
args["y" if orient_v else "x"] = value_name
if constructor != go.Histogram2d:
args["color"] = args["color"] or var_name
if "line_group" in args:
args["line_group"] = args["line_group"] or var_name
if constructor == go.Bar:
elif constructor == go.Bar:
if _is_continuous(df_output, value_name):
args["x" if orient_v else "y"] = wide_cross_name
args["y" if orient_v else "x"] = value_name
Expand All @@ -1447,13 +1457,24 @@ def build_dataframe(args, constructor):
args["y" if orient_v else "x"] = count_name
df_output[count_name] = 1
args["color"] = args["color"] or var_name
if constructor in [go.Violin, go.Box]:
elif constructor in [go.Violin, go.Box]:
args["x" if orient_v else "y"] = wide_cross_name or var_name
args["y" if orient_v else "x"] = value_name
if constructor == go.Histogram:
args["x" if orient_v else "y"] = value_name
args["y" if orient_v else "x"] = wide_cross_name
args["color"] = args["color"] or var_name

if hist1d_orientation and constructor == go.Scatter:
if args["x"] is not None and args["y"] is not None:
args["histfunc"] = "sum"
elif args["x"] is None:
args["histfunc"] = None
args["orientation"] = "h"
args["x"] = count_name
df_output[count_name] = 1
else:
args["histfunc"] = None
args["orientation"] = "v"
args["y"] = count_name
df_output[count_name] = 1

if no_color:
args["color"] = None
args["data_frame"] = df_output
Expand Down Expand Up @@ -1750,8 +1771,10 @@ def infer_config(args, constructor, trace_patch, layout_patch):
trace_patch["opacity"] = args["opacity"]
else:
trace_patch["marker"] = dict(opacity=args["opacity"])
if "line_group" in args:
trace_patch["mode"] = "lines" + ("+markers+text" if args["text"] else "")
if "line_group" in args or "line_dash" in args:
trace_patch["mode"] = "lines" + (
"+markers+text" if args.get("text", None) is not None else ""
)
elif constructor != go.Splom and (
"symbol" in args or constructor == go.Scattermapbox
):
Expand Down Expand Up @@ -1790,6 +1813,14 @@ def infer_config(args, constructor, trace_patch, layout_patch):
):
args["facet_col_wrap"] = 0

if "norm" in args:
if args.get("norm", None) not in [None, "percent", "probability"]:
raise ValueError(
"`norm` must be one of None, 'percent' or 'probability'. "
+ "'%s' was provided." % args["norm"]
)
args["histnorm"] = args["norm"]

# Compute applicable grouping attributes
for k in group_attrables:
if k in args:
Expand Down Expand Up @@ -1943,7 +1974,7 @@ def make_figure(args, constructor, trace_patch=None, layout_patch=None):
if (
trace_spec != trace_specs[0]
and trace_spec.constructor in [go.Violin, go.Box, go.Histogram]
and m.variable == "symbol"
and m.variable in ["symbol", "dash"]
):
pass
elif (
Expand Down Expand Up @@ -2004,6 +2035,20 @@ def make_figure(args, constructor, trace_patch=None, layout_patch=None):
):
trace.update(marker=dict(color=trace.line.color))

if "complementary" in args: # ECDF
base = args["x"] if args["orientation"] == "v" else args["y"]
var = args["x"] if args["orientation"] == "h" else args["y"]
group = group.sort_values(by=base)
group_sum = group[var].sum()
group[var] = group[var].cumsum()
if args["complementary"]:
group[var] = group_sum - group[var]

if args["norm"] == "probability":
group[var] = group[var] / group_sum
elif args["norm"] == "percent":
group[var] = 100.0 * group[var] / group_sum

patch, fit_results = make_trace_kwargs(
args, trace_spec, group, mapping_labels.copy(), sizeref
)
Expand Down
9 changes: 8 additions & 1 deletion packages/python/plotly/plotly/express/_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -541,10 +541,17 @@
"Sets the number of rendered sectors from any given `level`. Set `maxdepth` to -1 to render all the"
"levels in the hierarchy.",
],
norm=["TODO"],
complementary=["TODO"],
kernel=["TODO"],
bw_method=["TODO"],
bw_adjust=["TODO"],
)


def make_docstring(fn, override_dict={}, append_dict={}):
def make_docstring(fn, override_dict=None, append_dict=None):
override_dict = {} if override_dict is None else override_dict
append_dict = {} if append_dict is None else append_dict
tw = TextWrapper(width=75, initial_indent=" ", subsequent_indent=" ")
result = (fn.__doc__ or "") + "\nParameters\n----------\n"
for param in getfullargspec(fn)[0]:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import plotly
import pandas as pd
import plotly.express as px
from pytest import approx
Expand Down Expand Up @@ -112,25 +111,21 @@ def bad_facet_spacing_df():
def test_bad_facet_spacing_eror(bad_facet_spacing_df):
df = bad_facet_spacing_df
with pytest.raises(
ValueError, match="Use the facet_row_spacing argument to adjust this spacing\."
ValueError, match="Use the facet_row_spacing argument to adjust this spacing."
):
fig = px.scatter(
df, x="x", y="y", facet_row="category", facet_row_spacing=0.01001
)
px.scatter(df, x="x", y="y", facet_row="category", facet_row_spacing=0.01001)
with pytest.raises(
ValueError, match="Use the facet_col_spacing argument to adjust this spacing\."
ValueError, match="Use the facet_col_spacing argument to adjust this spacing."
):
fig = px.scatter(
df, x="x", y="y", facet_col="category", facet_col_spacing=0.01001
)
px.scatter(df, x="x", y="y", facet_col="category", facet_col_spacing=0.01001)
# Check error is not raised when the spacing is OK
try:
fig = px.scatter(df, x="x", y="y", facet_row="category", facet_row_spacing=0.01)
px.scatter(df, x="x", y="y", facet_row="category", facet_row_spacing=0.01)
except ValueError:
# Error shouldn't be raised, so fail if it is
assert False
try:
fig = px.scatter(df, x="x", y="y", facet_col="category", facet_col_spacing=0.01)
px.scatter(df, x="x", y="y", facet_col="category", facet_col_spacing=0.01)
except ValueError:
# Error shouldn't be raised, so fail if it is
assert False
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def test_xy_marginals(px_fn, marginal_x, marginal_y):
assert len(fig.data) == 1 + (marginal_x is not None) + (marginal_y is not None)


@pytest.mark.parametrize("px_fn", [px.histogram])
@pytest.mark.parametrize("px_fn", [px.histogram, px.ecdf, px.kde])
@pytest.mark.parametrize("marginal", [None, "rug", "histogram", "box", "violin"])
@pytest.mark.parametrize("orientation", ["h", "v"])
def test_single_marginals(px_fn, marginal, orientation):
Expand Down