diff --git a/packages/python/plotly/plotly/express/__init__.py b/packages/python/plotly/plotly/express/__init__.py index 4bffa25d784..fc6b439c592 100644 --- a/packages/python/plotly/plotly/express/__init__.py +++ b/packages/python/plotly/plotly/express/__init__.py @@ -34,6 +34,8 @@ box, strip, histogram, + ecdf, + kde, scatter_matrix, parallel_coordinates, parallel_categories, @@ -88,6 +90,8 @@ "box", "strip", "histogram", + "ecdf", + "kde", "choropleth", "choropleth_mapbox", "pie", diff --git a/packages/python/plotly/plotly/express/_chart_types.py b/packages/python/plotly/plotly/express/_chart_types.py index 7ed26491afe..feee3a66f82 100644 --- a/packages/python/plotly/plotly/express/_chart_types.py +++ b/packages/python/plotly/plotly/express/_chart_types.py @@ -471,6 +471,122 @@ def histogram( ) +def ecdf( + data_frame=None, + x=None, + y=None, + color=None, + text=None, + line_dash=None, + facet_row=None, + facet_col=None, + facet_col_wrap=0, + facet_row_spacing=None, + facet_col_spacing=None, + hover_name=None, + hover_data=None, + animation_frame=None, + animation_group=None, + category_orders=None, + labels=None, + color_discrete_sequence=None, + color_discrete_map=None, + line_dash_sequence=None, + line_dash_map=None, + marginal=None, + opacity=None, + orientation=None, + line_shape="hv", + norm="probability", + complementary=False, + log_x=False, + log_y=False, + range_x=None, + range_y=None, + title=None, + template=None, + width=None, + height=None, +): + """ + In a Empirical Cumulative Distribution Function (ECDF) plot, rows of `data_frame` + are sorted by the value `x` (or `y` if `orientation` is `'h'`) and their cumulative + count (or the cumulative sum of `y` if supplied and `orientation` is `h`) is drawn + as a line. + """ + return make_figure(args=locals(), constructor=go.Scatter) + + +ecdf.__doc__ = make_docstring( + ecdf, + append_dict=dict( + x=[ + "If `orientation` is `'h'`, the cumulative sum of this argument is plotted rather than the cumulative count." + ] + + _wide_mode_xy_append, + y=[ + "If `orientation` is `'v'`, the cumulative sum of this argument is plotted rather than the cumulative count." + ] + + _wide_mode_xy_append, + ), +) + + +def kde( + data_frame=None, + x=None, + y=None, + color=None, + line_dash=None, + facet_row=None, + facet_col=None, + facet_col_wrap=0, + facet_row_spacing=None, + facet_col_spacing=None, + hover_name=None, + hover_data=None, + animation_frame=None, + animation_group=None, + category_orders=None, + labels=None, + color_discrete_sequence=None, + color_discrete_map=None, + line_dash_sequence=None, + line_dash_map=None, + marginal=None, + opacity=None, + orientation=None, + norm=None, # TODO use this + kernel=None, # TODO use this + bw_method=None, # TODO use this + bw_adjust=None, # TODO use this + log_x=False, + log_y=False, + range_x=None, + range_y=None, + title=None, + template=None, + width=None, + height=None, +): + """ + In a Kernel Density Estimation (KDE) plot, rows of `data_frame` + are used as inputs to a KDE smoothing function which is rendered as a line. + """ + return make_figure(args=locals(), constructor=go.Scatter) + + +kde.__doc__ = make_docstring( + kde, + append_dict=dict( + x=["If `orientation` is `'h'`, this argument is used as KDE weights."] + + _wide_mode_xy_append, + y=["If `orientation` is `'v'`, this argument is used as KDE weights."] + + _wide_mode_xy_append, + ), +) + + def violin( data_frame=None, x=None, diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index d29b90b46c3..8f6c20a17d8 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -1285,6 +1285,9 @@ def build_dataframe(args, constructor): wide_cross_name = None # will likely be "index" in wide_mode value_name = None # will likely be "value" in wide_mode hist2d_types = [go.Histogram2d, go.Histogram2dContour] + hist1d_orientation = ( + constructor == go.Histogram or "complementary" in args or "kernel" in args + ) if constructor in cartesians: if wide_x and wide_y: raise ValueError( @@ -1319,7 +1322,7 @@ def build_dataframe(args, constructor): df_provided and var_name in df_input ): var_name = "variable" - if constructor == go.Histogram: + if hist1d_orientation: wide_orientation = "v" if wide_x else "h" else: wide_orientation = "v" if wide_y else "h" @@ -1333,7 +1336,10 @@ def build_dataframe(args, constructor): var_name = _escape_col_name(df_input, var_name, []) missing_bar_dim = None - if constructor in [go.Scatter, go.Bar, go.Funnel] + hist2d_types: + if ( + constructor in [go.Scatter, go.Bar, go.Funnel] + hist2d_types + and not hist1d_orientation + ): if not wide_mode and (no_x != no_y): for ax in ["x", "y"]: if args.get(ax, None) is None: @@ -1430,14 +1436,18 @@ def build_dataframe(args, constructor): df_output[var_name] = df_output[var_name].astype(str) orient_v = wide_orientation == "v" - if constructor in [go.Scatter, go.Funnel] + hist2d_types: + if hist1d_orientation: + args["x" if orient_v else "y"] = value_name + args["y" if orient_v else "x"] = wide_cross_name + args["color"] = args["color"] or var_name + elif constructor in [go.Scatter, go.Funnel] + hist2d_types: args["x" if orient_v else "y"] = wide_cross_name args["y" if orient_v else "x"] = value_name if constructor != go.Histogram2d: args["color"] = args["color"] or var_name if "line_group" in args: args["line_group"] = args["line_group"] or var_name - if constructor == go.Bar: + elif constructor == go.Bar: if _is_continuous(df_output, value_name): args["x" if orient_v else "y"] = wide_cross_name args["y" if orient_v else "x"] = value_name @@ -1447,13 +1457,24 @@ def build_dataframe(args, constructor): args["y" if orient_v else "x"] = count_name df_output[count_name] = 1 args["color"] = args["color"] or var_name - if constructor in [go.Violin, go.Box]: + elif constructor in [go.Violin, go.Box]: args["x" if orient_v else "y"] = wide_cross_name or var_name args["y" if orient_v else "x"] = value_name - if constructor == go.Histogram: - args["x" if orient_v else "y"] = value_name - args["y" if orient_v else "x"] = wide_cross_name - args["color"] = args["color"] or var_name + + if hist1d_orientation and constructor == go.Scatter: + if args["x"] is not None and args["y"] is not None: + args["histfunc"] = "sum" + elif args["x"] is None: + args["histfunc"] = None + args["orientation"] = "h" + args["x"] = count_name + df_output[count_name] = 1 + else: + args["histfunc"] = None + args["orientation"] = "v" + args["y"] = count_name + df_output[count_name] = 1 + if no_color: args["color"] = None args["data_frame"] = df_output @@ -1750,8 +1771,10 @@ def infer_config(args, constructor, trace_patch, layout_patch): trace_patch["opacity"] = args["opacity"] else: trace_patch["marker"] = dict(opacity=args["opacity"]) - if "line_group" in args: - trace_patch["mode"] = "lines" + ("+markers+text" if args["text"] else "") + if "line_group" in args or "line_dash" in args: + trace_patch["mode"] = "lines" + ( + "+markers+text" if args.get("text", None) is not None else "" + ) elif constructor != go.Splom and ( "symbol" in args or constructor == go.Scattermapbox ): @@ -1790,6 +1813,14 @@ def infer_config(args, constructor, trace_patch, layout_patch): ): args["facet_col_wrap"] = 0 + if "norm" in args: + if args.get("norm", None) not in [None, "percent", "probability"]: + raise ValueError( + "`norm` must be one of None, 'percent' or 'probability'. " + + "'%s' was provided." % args["norm"] + ) + args["histnorm"] = args["norm"] + # Compute applicable grouping attributes for k in group_attrables: if k in args: @@ -1943,7 +1974,7 @@ def make_figure(args, constructor, trace_patch=None, layout_patch=None): if ( trace_spec != trace_specs[0] and trace_spec.constructor in [go.Violin, go.Box, go.Histogram] - and m.variable == "symbol" + and m.variable in ["symbol", "dash"] ): pass elif ( @@ -2004,6 +2035,20 @@ def make_figure(args, constructor, trace_patch=None, layout_patch=None): ): trace.update(marker=dict(color=trace.line.color)) + if "complementary" in args: # ECDF + base = args["x"] if args["orientation"] == "v" else args["y"] + var = args["x"] if args["orientation"] == "h" else args["y"] + group = group.sort_values(by=base) + group_sum = group[var].sum() + group[var] = group[var].cumsum() + if args["complementary"]: + group[var] = group_sum - group[var] + + if args["norm"] == "probability": + group[var] = group[var] / group_sum + elif args["norm"] == "percent": + group[var] = 100.0 * group[var] / group_sum + patch, fit_results = make_trace_kwargs( args, trace_spec, group, mapping_labels.copy(), sizeref ) diff --git a/packages/python/plotly/plotly/express/_doc.py b/packages/python/plotly/plotly/express/_doc.py index 5d0b2b921db..8cab04e706a 100644 --- a/packages/python/plotly/plotly/express/_doc.py +++ b/packages/python/plotly/plotly/express/_doc.py @@ -541,10 +541,17 @@ "Sets the number of rendered sectors from any given `level`. Set `maxdepth` to -1 to render all the" "levels in the hierarchy.", ], + norm=["TODO"], + complementary=["TODO"], + kernel=["TODO"], + bw_method=["TODO"], + bw_adjust=["TODO"], ) -def make_docstring(fn, override_dict={}, append_dict={}): +def make_docstring(fn, override_dict=None, append_dict=None): + override_dict = {} if override_dict is None else override_dict + append_dict = {} if append_dict is None else append_dict tw = TextWrapper(width=75, initial_indent=" ", subsequent_indent=" ") result = (fn.__doc__ or "") + "\nParameters\n----------\n" for param in getfullargspec(fn)[0]: diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_facets.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_facets.py index 6598599fb94..c1db2afe775 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_facets.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_facets.py @@ -1,4 +1,3 @@ -import plotly import pandas as pd import plotly.express as px from pytest import approx @@ -112,25 +111,21 @@ def bad_facet_spacing_df(): def test_bad_facet_spacing_eror(bad_facet_spacing_df): df = bad_facet_spacing_df with pytest.raises( - ValueError, match="Use the facet_row_spacing argument to adjust this spacing\." + ValueError, match="Use the facet_row_spacing argument to adjust this spacing." ): - fig = px.scatter( - df, x="x", y="y", facet_row="category", facet_row_spacing=0.01001 - ) + px.scatter(df, x="x", y="y", facet_row="category", facet_row_spacing=0.01001) with pytest.raises( - ValueError, match="Use the facet_col_spacing argument to adjust this spacing\." + ValueError, match="Use the facet_col_spacing argument to adjust this spacing." ): - fig = px.scatter( - df, x="x", y="y", facet_col="category", facet_col_spacing=0.01001 - ) + px.scatter(df, x="x", y="y", facet_col="category", facet_col_spacing=0.01001) # Check error is not raised when the spacing is OK try: - fig = px.scatter(df, x="x", y="y", facet_row="category", facet_row_spacing=0.01) + px.scatter(df, x="x", y="y", facet_row="category", facet_row_spacing=0.01) except ValueError: # Error shouldn't be raised, so fail if it is assert False try: - fig = px.scatter(df, x="x", y="y", facet_col="category", facet_col_spacing=0.01) + px.scatter(df, x="x", y="y", facet_col="category", facet_col_spacing=0.01) except ValueError: # Error shouldn't be raised, so fail if it is assert False diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_marginals.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_marginals.py index ecb7927d62f..a9277986f0d 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_marginals.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_marginals.py @@ -14,7 +14,7 @@ def test_xy_marginals(px_fn, marginal_x, marginal_y): assert len(fig.data) == 1 + (marginal_x is not None) + (marginal_y is not None) -@pytest.mark.parametrize("px_fn", [px.histogram]) +@pytest.mark.parametrize("px_fn", [px.histogram, px.ecdf, px.kde]) @pytest.mark.parametrize("marginal", [None, "rug", "histogram", "box", "violin"]) @pytest.mark.parametrize("orientation", ["h", "v"]) def test_single_marginals(px_fn, marginal, orientation):