@@ -4892,30 +4892,33 @@ def filter(
48924892 ** constraints : Any ,
48934893 ) -> DataFrame :
48944894 """
4895- Filter the rows in the DataFrame based on one or more predicate expressions .
4895+ Filter rows, retaining those that match the given predicate expression(s) .
48964896
48974897 The original order of the remaining rows is preserved.
48984898
4899- Rows where the filter does not evaluate to True are discarded, including nulls.
4899+ Only rows where the predicate resolves as True are retained; when the
4900+ predicate result is False (or null), the row is discarded.
49004901
49014902 Parameters
49024903 ----------
49034904 predicates
4904- Expression(s) that evaluates to a boolean Series.
4905+ Expression(s) that evaluate to a boolean Series.
49054906 constraints
49064907 Column filters; use `name = value` to filter columns by the supplied value.
49074908 Each constraint will behave the same as `pl.col(name).eq(value)`, and
4908- will be implicitly joined with the other filter conditions using `&`.
4909+ be implicitly joined with the other filter conditions using `&`.
49094910
49104911 Notes
49114912 -----
4912- If you are transitioning from pandas and performing filter operations based on
4913- the comparison of two or more columns, please note that in Polars,
4914- any comparison involving null values will always result in null.
4915- As a result, these rows will be filtered out.
4916- Ensure to handle null values appropriately to avoid unintended filtering
4917- (See examples below).
4913+ If you are transitioning from Pandas, and performing filter operations based on
4914+ the comparison of two or more columns, please note that in Polars any comparison
4915+ involving `null` values will result in a `null` result, *not* boolean True or
4916+ False. As a result, these rows will not be retained. Ensure that null values
4917+ are handled appropriately to avoid unexpected behaviour (see examples below).
49184918
4919+ See Also
4920+ --------
4921+ remove
49194922
49204923 Examples
49214924 --------
@@ -4927,7 +4930,7 @@ def filter(
49274930 ... }
49284931 ... )
49294932
4930- Filter on one condition:
4933+ Filter rows matching a condition:
49314934
49324935 >>> df.filter(pl.col("foo") > 1)
49334936 shape: (3, 3)
@@ -4943,7 +4946,9 @@ def filter(
49434946
49444947 Filter on multiple conditions, combined with and/or operators:
49454948
4946- >>> df.filter((pl.col("foo") < 3) & (pl.col("ham") == "a"))
4949+ >>> df.filter(
4950+ ... (pl.col("foo") < 3) & (pl.col("ham") == "a"),
4951+ ... )
49474952 shape: (1, 3)
49484953 ┌─────┬─────┬─────┐
49494954 │ foo ┆ bar ┆ ham │
@@ -4953,7 +4958,9 @@ def filter(
49534958 │ 1 ┆ 6 ┆ a │
49544959 └─────┴─────┴─────┘
49554960
4956- >>> df.filter((pl.col("foo") == 1) | (pl.col("ham") == "c"))
4961+ >>> df.filter(
4962+ ... (pl.col("foo") == 1) | (pl.col("ham") == "c"),
4963+ ... )
49574964 shape: (2, 3)
49584965 ┌─────┬─────┬─────┐
49594966 │ foo ┆ bar ┆ ham │
@@ -4992,9 +4999,11 @@ def filter(
49924999 │ 2 ┆ 7 ┆ b │
49935000 └─────┴─────┴─────┘
49945001
4995- Filter by comparing two columns against each other
5002+ Filter by comparing two columns against each other:
49965003
4997- >>> df.filter(pl.col("foo") == pl.col("bar"))
5004+ >>> df.filter(
5005+ ... pl.col("foo") == pl.col("bar"),
5006+ ... )
49985007 shape: (1, 3)
49995008 ┌─────┬─────┬─────┐
50005009 │ foo ┆ bar ┆ ham │
@@ -5004,7 +5013,9 @@ def filter(
50045013 │ 0 ┆ 0 ┆ f │
50055014 └─────┴─────┴─────┘
50065015
5007- >>> df.filter(pl.col("foo") != pl.col("bar"))
5016+ >>> df.filter(
5017+ ... pl.col("foo") != pl.col("bar"),
5018+ ... )
50085019 shape: (3, 3)
50095020 ┌─────┬─────┬─────┐
50105021 │ foo ┆ bar ┆ ham │
@@ -5019,7 +5030,9 @@ def filter(
50195030 Notice how the row with `None` values is filtered out. In order to keep the
50205031 same behavior as pandas, use:
50215032
5022- >>> df.filter(pl.col("foo").ne_missing(pl.col("bar")))
5033+ >>> df.filter(
5034+ ... pl.col("foo").ne_missing(pl.col("bar")),
5035+ ... )
50235036 shape: (5, 3)
50245037 ┌──────┬──────┬─────┐
50255038 │ foo ┆ bar ┆ ham │
@@ -5032,10 +5045,148 @@ def filter(
50325045 │ 4 ┆ null ┆ d │
50335046 │ null ┆ 9 ┆ e │
50345047 └──────┴──────┴─────┘
5035-
50365048 """
50375049 return self .lazy ().filter (* predicates , ** constraints ).collect (_eager = True )
50385050
5051+ def remove (
5052+ self ,
5053+ * predicates : (
5054+ IntoExprColumn
5055+ | Iterable [IntoExprColumn ]
5056+ | bool
5057+ | list [bool ]
5058+ | np .ndarray [Any , Any ]
5059+ ),
5060+ ** constraints : Any ,
5061+ ) -> DataFrame :
5062+ """
5063+ Remove rows, dropping those that match the given predicate expression(s).
5064+
5065+ The original order of the remaining rows is preserved.
5066+
5067+ Rows where the filter predicate does not evaluate to True are retained
5068+ (this includes rows where the predicate evaluates as `null`).
5069+
5070+ Parameters
5071+ ----------
5072+ predicates
5073+ Expression that evaluates to a boolean Series.
5074+ constraints
5075+ Column filters; use `name = value` to filter columns using the supplied
5076+ value. Each constraint behaves the same as `pl.col(name).eq(value)`,
5077+ and is implicitly joined with the other filter conditions using `&`.
5078+
5079+ Notes
5080+ -----
5081+ If you are transitioning from Pandas, and performing filter operations based on
5082+ the comparison of two or more columns, please note that in Polars any comparison
5083+ involving `null` values will result in a `null` result, *not* boolean True or
5084+ False. As a result, these rows will not be removed. Ensure that null values
5085+ are handled appropriately to avoid unexpected behaviour (see examples below).
5086+
5087+ See Also
5088+ --------
5089+ filter
5090+
5091+ Examples
5092+ --------
5093+ >>> df = pl.DataFrame(
5094+ ... {
5095+ ... "foo": [2, 3, None, 4, 0],
5096+ ... "bar": [5, 6, None, None, 0],
5097+ ... "ham": ["a", "b", None, "c", "d"],
5098+ ... }
5099+ ... )
5100+
5101+ Remove rows matching a condition:
5102+
5103+ >>> df.remove(pl.col("bar") >= 5)
5104+ shape: (3, 3)
5105+ ┌──────┬──────┬──────┐
5106+ │ foo ┆ bar ┆ ham │
5107+ │ --- ┆ --- ┆ --- │
5108+ │ i64 ┆ i64 ┆ str │
5109+ ╞══════╪══════╪══════╡
5110+ │ null ┆ null ┆ null │
5111+ │ 4 ┆ null ┆ c │
5112+ │ 0 ┆ 0 ┆ d │
5113+ └──────┴──────┴──────┘
5114+
5115+ Discard rows based on multiple conditions, combined with and/or operators:
5116+
5117+ >>> df.remove(
5118+ ... (pl.col("foo") >= 0) & (pl.col("bar") >= 0),
5119+ ... )
5120+ shape: (2, 3)
5121+ ┌──────┬──────┬──────┐
5122+ │ foo ┆ bar ┆ ham │
5123+ │ --- ┆ --- ┆ --- │
5124+ │ i64 ┆ i64 ┆ str │
5125+ ╞══════╪══════╪══════╡
5126+ │ null ┆ null ┆ null │
5127+ │ 4 ┆ null ┆ c │
5128+ └──────┴──────┴──────┘
5129+
5130+ >>> df.remove(
5131+ ... (pl.col("foo") >= 0) | (pl.col("bar") >= 0),
5132+ ... )
5133+ shape: (1, 3)
5134+ ┌──────┬──────┬──────┐
5135+ │ foo ┆ bar ┆ ham │
5136+ │ --- ┆ --- ┆ --- │
5137+ │ i64 ┆ i64 ┆ str │
5138+ ╞══════╪══════╪══════╡
5139+ │ null ┆ null ┆ null │
5140+ └──────┴──────┴──────┘
5141+
5142+ Provide multiple constraints using `*args` syntax:
5143+
5144+ >>> df.remove(
5145+ ... pl.col("ham").is_not_null(),
5146+ ... pl.col("bar") >= 0,
5147+ ... )
5148+ shape: (2, 3)
5149+ ┌──────┬──────┬──────┐
5150+ │ foo ┆ bar ┆ ham │
5151+ │ --- ┆ --- ┆ --- │
5152+ │ i64 ┆ i64 ┆ str │
5153+ ╞══════╪══════╪══════╡
5154+ │ null ┆ null ┆ null │
5155+ │ 4 ┆ null ┆ c │
5156+ └──────┴──────┴──────┘
5157+
5158+ Provide constraints(s) using `**kwargs` syntax:
5159+
5160+ >>> df.remove(foo=0, bar=0)
5161+ shape: (4, 3)
5162+ ┌──────┬──────┬──────┐
5163+ │ foo ┆ bar ┆ ham │
5164+ │ --- ┆ --- ┆ --- │
5165+ │ i64 ┆ i64 ┆ str │
5166+ ╞══════╪══════╪══════╡
5167+ │ 2 ┆ 5 ┆ a │
5168+ │ 3 ┆ 6 ┆ b │
5169+ │ null ┆ null ┆ null │
5170+ │ 4 ┆ null ┆ c │
5171+ └──────┴──────┴──────┘
5172+
5173+ Remove rows by comparing two columns against each other:
5174+
5175+ >>> df.remove(
5176+ ... pl.col("foo").ne_missing(pl.col("bar")),
5177+ ... )
5178+ shape: (2, 3)
5179+ ┌──────┬──────┬──────┐
5180+ │ foo ┆ bar ┆ ham │
5181+ │ --- ┆ --- ┆ --- │
5182+ │ i64 ┆ i64 ┆ str │
5183+ ╞══════╪══════╪══════╡
5184+ │ null ┆ null ┆ null │
5185+ │ 0 ┆ 0 ┆ d │
5186+ └──────┴──────┴──────┘
5187+ """
5188+ return self .lazy ().remove (* predicates , ** constraints ).collect (_eager = True )
5189+
50395190 @overload
50405191 def glimpse (
50415192 self ,
@@ -7293,7 +7444,6 @@ def join_asof(
72937444 │ Netherlands ┆ 2018-08-01 ┆ 17.32 ┆ 910 │
72947445 │ Netherlands ┆ 2019-01-01 ┆ 17.4 ┆ 910 │
72957446 └─────────────┴────────────┴────────────┴──────┘
7296-
72977447 """
72987448 if not isinstance (other , DataFrame ):
72997449 msg = f"expected `other` join table to be a DataFrame, got { type (other ).__name__ !r} "
@@ -7628,7 +7778,6 @@ def join_where(
76287778 │ 101 ┆ 140 ┆ 14 ┆ 8 ┆ 676 ┆ 150 ┆ 15 ┆ 1 │
76297779 │ 101 ┆ 140 ┆ 14 ┆ 8 ┆ 742 ┆ 170 ┆ 16 ┆ 4 │
76307780 └─────┴─────┴─────┴───────┴──────┴──────┴──────┴─────────────┘
7631-
76327781 """
76337782 if not isinstance (other , DataFrame ):
76347783 msg = f"expected `other` join table to be a DataFrame, got { type (other ).__name__ !r} "
0 commit comments