diff --git a/pandas_checks/DataFrameChecks.py b/pandas_checks/DataFrameChecks.py index f9efca7..40a8cdd 100644 --- a/pandas_checks/DataFrameChecks.py +++ b/pandas_checks/DataFrameChecks.py @@ -64,14 +64,16 @@ def assert_all_nulls( """Tests whether Dataframe or subset of columns has all nulls. Optionally raises an exception. Does not modify the DataFrame itself. Example: - ( - iris - .check.assert_all_nulls(subset=["sepal_length"]) - ) + ```python + ( + iris + .check.assert_all_nulls(subset=["sepal_length"]) + ) - # Will raise an exception "ㄨ Assert all nulls failed" + # Will raise an exception "ㄨ Assert all nulls failed" - # See docs for .check.assert_data() for examples of how to customize assertions + ``` + See docs for `.check.assert_data()` for examples of how to customize assertions. Args: fail_message: Message to display if the condition fails. @@ -111,18 +113,29 @@ def assert_data( """Tests whether Dataframe meets condition. Optionally raises an exception. Does not modify the DataFrame itself. Example: - # Validate that the Dataframe has at least 2 rows - - ( - iris - .check.assert_data(lambda df: df.shape[0]>1) - - # Or customize the message displayed when alert fails - .check.assert_data(lambda df: df.shape[0]>1, "Assertion failed, DataFrame has no rows!") - - # Or show a warning instead of raising an exception - .check.assert_data(lambda df: s.shape[0]>1, "FYI Series has no rows", raise_exception=False) - ) + ```python + # Validate that the Dataframe has at least 2 rows + + ( + iris + .check.assert_data(lambda df: df.shape[0]>1) + + # Or customize the message displayed when alert fails + .check.assert_data(lambda df: df.shape[0]>1, "Assertion failed, DataFrame has no rows!") + + # Or show a warning instead of raising an exception + .check.assert_data(lambda df: s.shape[0]>1, "FYI Series has no rows", raise_exception=False) + + # Or show a message if it passes, and raise a specific exception (ValueError) if it fails. + .check.assert_data( + lambda df: s.shape[0]>1, + fail_message="FYI Series has no rows", + pass_message="Series has rows!", + exception_to_raise=ValueError, + verbose=True # To show pass_message when assertion passes + ) + ) + ``` Args: condition: Assertion criteria in the form of a lambda function, such as `lambda df: df.shape[0]>10`. @@ -220,12 +233,14 @@ def assert_datetime( """Tests whether Dataframe or subset of columns is datetime or timestamp. Optionally raises an exception. Does not modify the DataFrame itself. Example: - ( - df - .check.assert_datetime(subset="datetime_col") - ) + ```python + ( + df + .check.assert_datetime(subset="datetime_col") + ) + ``` - # See docs for .check.assert_data() for examples of how to customize assertions + See docs for `.check.assert_data()` for examples of how to customize assertions. Args: subset: Optional, which column or columns to check the condition against. @@ -262,12 +277,15 @@ def assert_float( """Tests whether Dataframe or subset of columns is floats. Optionally raises an exception. Does not modify the DataFrame itself. Example: - ( - df - .check.assert_float(subset="float_col") - ) + ```python + ( + df + .check.assert_float(subset="float_col") + ) - # See docs for .check.assert_data() for examples of how to customize assertions + ``` + + See docs for `.check.assert_data()` for examples of how to customize assertions. Args: fail_message: Message to display if the condition fails. @@ -307,16 +325,19 @@ def assert_greater_than( Example: - ( - iris - # Validate that sepal_length is always greater than 0.1 - .check.assert_greater_than(0.1, subset="sepal_length") + ```python + ( + iris + # Validate that sepal_length is always greater than 0.1 + .check.assert_greater_than(0.1, subset="sepal_length") + + # Validate that two columns are each always greater than or equal to 0.1 + .check.assert_greater_than(0.1, subset=["sepal_length", "petal_length"], or_equal_to=True) + ) - # Validate that two columns are each always greater than or equal to 0.1 - .check.assert_greater_than(0.1, subset=["sepal_length", "petal_length"], or_equal_to=True) - ) + ``` - # See docs for .check.assert_data() for examples of how to customize assertions + See docs for `.check.assert_data()` for examples of how to customize assertions. Args: min: the minimum value to compare DataFrame to. Accepts any type that can be used in >, such as int, float, str, datetime @@ -360,12 +381,14 @@ def assert_int( """Tests whether Dataframe or subset of columns is integers. Optionally raises an exception. Does not modify the DataFrame itself. Example: - ( - df - .check.assert_int(subset="int_col") - ) + ```python + ( + df + .check.assert_int(subset="int_col") + ) + ``` - # See docs for .check.assert_data() for examples of how to customize assertions + See docs for `.check.assert_data()` for examples of how to customize assertions. Args: fail_message: Message to display if the condition fails. @@ -404,17 +427,19 @@ def assert_less_than( """Tests whether all values in a Dataframe or subset of columns is < or <= a maximum threshold. Optionally raises an exception. Does not modify the DataFrame itself. Example: - ( - iris + ```python + ( + iris - # Validate that sepal_length is always < 1000 - .check.assert_less_than(1000, subset="sepal_length") + # Validate that sepal_length is always < 1000 + .check.assert_less_than(1000, subset="sepal_length") - # Validate that two columns are each always less than or equal too 100 - .check.assert_less_than(1000, subset=["sepal_length", "petal_length"], or_equal_to=True) - ) + # Validate that two columns are each always less than or equal too 100 + .check.assert_less_than(1000, subset=["sepal_length", "petal_length"], or_equal_to=True) + ) + ``` - # See docs for .check.assert_data() for examples of how to customize assertions + See docs for `.check.assert_data()` for examples of how to customize assertions. Args: max: the max value to compare DataFrame to. Accepts any type that can be used in <, such as int, float, str, datetime @@ -459,12 +484,13 @@ def assert_negative( """Tests whether Dataframe or subset of columns has all negative values. Optionally raises an exception. Does not modify the DataFrame itself. Example: - ( - df - .check.assert_negative(subset="column_name") - ) - - # See docs for .check.assert_data() for examples of how to customize assertions + ```python + ( + df + .check.assert_negative(subset="column_name") + ) + ``` + See docs for `.check.assert_data()` for examples of how to customize assertions. Args: fail_message: Message to display if the condition fails. @@ -513,12 +539,14 @@ def assert_no_nulls( """Tests whether Dataframe or subset of columns has no nulls. Optionally raises an exception. Does not modify the DataFrame itself. Example: - ( - iris - .check.assert_no_nulls(subset=["sepal_length"]) - ) + ```python + ( + iris + .check.assert_no_nulls(subset=["sepal_length"]) + ) + ``` - # See docs for .check.assert_data() for examples of how to customize assertions + See docs for `.check.assert_data()` for examples of how to customize assertions. Args: fail_message: Message to display if the condition fails. @@ -556,12 +584,14 @@ def assert_nrows( """Tests whether Dataframe has a given number of rows. Optionally raises an exception. Does not modify the DataFrame itself. Example: - ( - iris - .check.assert_nrows(20) - ) + ```python + ( + iris + .check.assert_nrows(20) + ) + ``` - # See docs for .check.assert_data() for examples of how to customize assertions + See docs for `.check.assert_data()` for examples of how to customize assertions. Args: nrows: The expected number of rows @@ -599,12 +629,14 @@ def assert_positive( """Tests whether Dataframe or subset of columns has all positive values. Optionally raises an exception. Does not modify the DataFrame itself. Example: - ( - iris - .check.assert_positive(subset=["sepal_length"]) - ) + ```python + ( + iris + .check.assert_positive(subset=["sepal_length"]) + ) + ``` - # See docs for .check.assert_data() for examples of how to customize assertions + See docs for `.check.assert_data()` for examples of how to customize assertions. Args: fail_message: Message to display if the condition fails. @@ -654,20 +686,21 @@ def assert_same_nrows( Optionally raises an exception. Does not modify the DataFrame itself. Example: - # Validate that an expected one-to-one join didn't add rows due to duplicate keys in the right table. - ( - transactions_df - .merge(how="left", right=products_df, on="product_id") - .check.assert_same_nrows(transactions_df, "Left join changed row count! Check for duplicate `product_id` keys in product_df.") - ) + ```python + # Validate that an expected one-to-one join didn't add rows due to duplicate keys in the right table. + ( + transactions_df + .merge(how="left", right=products_df, on="product_id") + .check.assert_same_nrows(transactions_df, "Left join changed row count! Check for duplicate `product_id` keys in product_df.") + ) + ``` - # See docs for .check.assert_data() for examples of how to customize assertions + See docs for `.check.assert_data()` for examples of how to customize assertions. Args: other: The DataFrame or Series that we expect to have the same # of rows as fail_message: Message to display if the condition fails. pass_message: Message to display if the condition passes. - subset: Optional, which column or columns to check the condition against. raise_exception: Whether to raise an exception if the condition fails. exception_to_raise: The exception to raise if the condition fails and raise_exception is True. verbose: Whether to display the pass message if the condition passes. @@ -699,12 +732,14 @@ def assert_str( """Tests whether Dataframe or subset of columns is strings. Optionally raises an exception. Does not modify the DataFrame itself. Example: - ( - iris - .check.assert_str(subset=["species", "another_string_column"]) - ) + ```python + ( + iris + .check.assert_str(subset=["species", "another_string_column"]) + ) + ``` - # See docs for .check.assert_data() for examples of how to customize assertions + See docs for `.check.assert_data()` for examples of how to customize assertions. Args: fail_message: Message to display if the condition fails. If None, will report expected vs observed type. @@ -741,12 +776,14 @@ def assert_timedelta( """Tests whether Dataframe or subset of columns is of type timedelta. Optionally raises an exception. Does not modify the DataFrame itself. Example: - ( - df - .check.assert_timedelta(subset=["timedelta_col"]) - ) + ```python + ( + df + .check.assert_timedelta(subset=["timedelta_col"]) + ) + ``` - # See docs for .check.assert_data() for examples of how to customize assertions + See docs for `.check.assert_data()` for examples of how to customize assertions. Args: fail_message: Message to display if the condition fails. If None, will report expected vs observed type. @@ -784,13 +821,15 @@ def assert_type( """Tests whether Dataframe or subset of columns meets type assumption. Optionally raises an exception. Does not modify the DataFrame itself. Example: - # Validate that a column of mixed types has overall type `object` - ( - iris - .check.assert_type(object, subset="column_with_mixed_types") - ) + ```python + # Validate that a column of mixed types has overall type `object` + ( + iris + .check.assert_type(object, subset="column_with_mixed_types") + ) + ``` - # See docs for .check.assert_data() for examples of how to customize assertions + See docs for `.check.assert_data()` for examples of how to customize assertions. Args: dtype: The required variable type @@ -846,16 +885,18 @@ def assert_unique( """Validates that a subset of columns have no duplicate values, or validates that a DataFrame has no duplicate rows. Optionally raises an exception. Does not modify the DataFrame itself. Example: - ( - df - # Validate that a column has no duplicate values - .check.assert_unique(subset="id_column") - - # Validate that a DataFrame has no duplicate rows - .check.assert_unique() - ) + ```python + ( + df + # Validate that a column has no duplicate values + .check.assert_unique(subset="id_column") + + # Validate that a DataFrame has no duplicate rows + .check.assert_unique() + ) + ``` - # See docs for .check.assert_data() for examples of how to customize assertions + See docs for `.check.assert_data()` for examples of how to customize assertions. Args: fail_message: Message to display if the condition fails. @@ -890,10 +931,12 @@ def columns( """Prints the column names of a DataFrame, without modifying the DataFrame itself. Example: - ( - df - .check.columns() - ) + ```python + ( + df + .check.columns() + ) + ``` Args: fn: An optional lambda function to apply to the DataFrame before printing columns. Example: `lambda df: df.shape[0]>10`. Applied before subset. @@ -921,13 +964,15 @@ def describe( ) -> pd.DataFrame: """Displays descriptive statistics about a DataFrame without modifying the DataFrame itself. - See Pandas docs for describe() for additional usage information, including more configuration options you can pass to this Pandas Checks method. + See Pandas docs for [describe()](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.describe.html) for additional usage information, including more configuration options you can pass to this Pandas Checks method. Example: - ( - df - .check.describe() - ) + ```python + ( + df + .check.describe() + ) + ``` Args: fn: An optional lambda function to apply to the DataFrame before running Pandas describe(). Example: `lambda df: df.shape[0]>10`. Applied before subset. @@ -951,12 +996,14 @@ def disable_checks(self, enable_asserts: bool = True) -> pd.DataFrame: """Turns off Pandas Checks globally, such as in production mode. Calls to .check functions will not be run. Does not modify the DataFrame itself. Example: - ( - iris - .check.disable_checks() - .check.assert_data(lambda df: df.shape[0]>10) # This check will NOT be run - .check.enable_checks() # Subsequent calls to .check will be run - ) + ```python + ( + iris + .check.disable_checks() + .check.assert_data(lambda df: df.shape[0]>10) # This check will NOT be run + .check.enable_checks() # Subsequent calls to .check will be run + ) + ``` Args enable_assert: Optionally, whether to also enable or disable assert statements @@ -975,13 +1022,13 @@ def dtypes( ) -> pd.DataFrame: """Displays the data types of a DataFrame's columns without modifying the DataFrame itself. - See Pandas docs for dtypes for additional usage information. - Example: - ( - iris - .check.dtypes() - ) + ```python + ( + iris + .check.dtypes() + ) + ``` Args: fn: An optional lambda function to apply to the DataFrame before running Pandas dtypes. Example: `lambda df: df.shape[0]>10`. Applied before subset. @@ -1004,13 +1051,15 @@ def enable_checks(self, enable_asserts: bool = True) -> pd.DataFrame: """Globally enables Pandas Checks. Subequent calls to .check methods will be run. Does not modify the DataFrame itself. Example: - ( - iris - ["sepal_length"] - .check.disable_checks() - .check.assert_data(lambda s: s.shape[0]>10) # This check will NOT be run - .check.enable_checks() # Subsequent calls to .check will be run - ) + ```python + ( + iris + ["sepal_length"] + .check.disable_checks() + .check.assert_data(lambda s: s.shape[0]>10) # This check will NOT be run + .check.enable_checks() # Subsequent calls to .check will be run + ) + ``` Args: enable_asserts: Optionally, whether to globally enable or disable calls to .check.assert_data(). @@ -1030,9 +1079,13 @@ def function( """Applies an arbitrary function on a DataFrame and shows the result, without modifying the DataFrame itself. Example: - .check.function(fn=lambda df: df.shape[0]>10, check_name='Has at least 10 rows?') - - # Will return either 'True' or 'False' + ```python + ( + iris + .check.function(fn=lambda df: df.shape[0]>10, check_name='Has at least 10 rows?') + ) + # Will return either 'True' or 'False' + ``` Args: fn: A lambda function to apply to the DataFrame. Example: `lambda df: df.shape[0]>10`. Applied before subset. @@ -1051,14 +1104,15 @@ def get_mode( """Displays the current values of Pandas Checks global options enable_checks and enable_asserts. Does not modify the DataFrame itself. Example: - ( - iris - .check.get_mode() - ) - - # The check will print: - # "🐼🩺 Pandas Checks mode: {'enable_checks': True, 'enable_asserts': True}" + ```python + ( + iris + .check.get_mode() + ) + # The check will print: + # "🐼🩺 Pandas Checks mode: {'enable_checks': True, 'enable_asserts': True}" + ``` Args: check_name: An optional name for the check. Will be used as a preface the printed result. @@ -1078,13 +1132,15 @@ def head( ) -> pd.DataFrame: """Displays the first n rows of a DataFrame, without modifying the DataFrame itself. - See Pandas docs for head() for additional usage information. + See Pandas docs for [head()](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.head.html) for additional usage information. Example: - ( - iris - .check.head(10) - ) + ```python + ( + iris + .check.head(10) + ) + ``` Args: n: The number of rows to display. @@ -1113,13 +1169,15 @@ def hist( ) -> pd.DataFrame: """Displays a histogram for the DataFrame, without modifying the DataFrame itself. - See Pandas docs for hist() for additional usage information, including more configuration options you can pass to this Pandas Checks method. + See Pandas docs for [hist()](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.hist.html) for additional usage information, including more configuration options you can pass to this Pandas Checks method. Example: - ( - iris - .check.hist(subset=["sepal_length", "sepal_width"]) - ) + ```python + ( + iris + .check.hist(subset=["sepal_length", "sepal_width"]) + ) + ``` Args: fn: An optional lambda function to apply to the DataFrame before running Pandas hist(). Example: `lambda df: df.shape[0]>10`. Applied before subset. @@ -1131,9 +1189,9 @@ def hist( The original DataFrame, unchanged. Note: - If more than one column is passed, displays a grid of histograms + If more than one column is passed, displays a grid of histograms. - Only renders in interactive mode (IPython/Jupyter), not in terminal + Only renders in interactive mode (IPython/Jupyter), not in terminal. """ if ( get_mode()["enable_checks"] and not pd.core.config_init.is_terminal() @@ -1158,13 +1216,15 @@ def info( ) -> pd.DataFrame: """Displays summary information about a DataFrame, without modifying the DataFrame itself. - See Pandas docs for info() for additional usage information, including more configuration options you can pass to this Pandas Checks method. + See Pandas docs for [info()](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.info.html) for additional usage information, including more configuration options you can pass to this Pandas Checks method. Example: - ( - iris - .check.info() - ) + ```python + ( + iris + .check.info() + ) + ``` Args: fn: An optional lambda function to apply to the DataFrame before running Pandas info(). Example: `lambda df: df.shape[0]>10`. Applied before subset. @@ -1190,13 +1250,15 @@ def memory_usage( ) -> pd.DataFrame: """Displays the memory footprint of a DataFrame, without modifying the DataFrame itself. - See Pandas docs for memory_usage() for additional usage information, including more configuration options you can pass to this Pandas Checks method. + See Pandas docs for [memory_usage()](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.memory_usage.html) for additional usage information, including more configuration options you can pass to this Pandas Checks method. Example: - ( - iris - .check.memory_usage() - ) + ```python + ( + iris + .check.memory_usage() + ) + ``` Args: fn: An optional lambda function to apply to the DataFrame before running Pandas memory_usage(). Example: `lambda df: df.shape[0]>10`. Applied before subset. @@ -1208,7 +1270,7 @@ def memory_usage( The original DataFrame, unchanged. Note: - Include argument `deep=True` to get further memory usage of object dtypes in the DataFrame. See Pandas docs for memory_usage() for more info. + Include argument `deep=True` to get further memory usage of object dtypes in the DataFrame. See Pandas docs for [memory_usage()](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.memory_usage.html) for more info. """ _check_data( self._obj, @@ -1228,10 +1290,12 @@ def ncols( """Displays the number of columns in a DataFrame, without modifying the DataFrame itself. Example: - ( - iris - .check.ncols() - ) + ```python + ( + iris + .check.ncols() + ) + ``` Args: fn: An optional lambda function to apply to the DataFrame before counting the number of columns. Example: `lambda df: df.shape[0]>10`. Applied before subset. @@ -1259,14 +1323,16 @@ def ndups( ) -> pd.DataFrame: """Displays the number of duplicated rows in a DataFrame, without modifying the DataFrame itself. - See Pandas docs for duplicated() for additional usage information, including more configuration options you can pass to this Pandas Checks method. + See Pandas docs for [duplicated()](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.duplicated.html) for additional usage information, including more configuration options you can pass to this Pandas Checks method. Example: - # Count the rows with duplicate pairs of values in two columns - ( - iris - .check.ndups(subset=["sepal_length", "sepal_width"]) - ) + ```python + # Count the number of rows with duplicate pairs of values across two columns: + ( + iris + .check.ndups(subset=["sepal_length", "sepal_width"]) + ) + ``` Args: fn: An optional lambda function to apply to the DataFrame before counting the number of duplicates. Example: `lambda df: df.shape[0]>10`. Applied before subset. @@ -1299,20 +1365,22 @@ def nnulls( ) -> pd.DataFrame: """Displays the number of rows with null values in a DataFrame, without modifying the DataFrame itself. - See Pandas docs for isna() for additional usage information. + See Pandas docs for [isna()](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.isna.html) for additional usage information. Example: - # Count the number of rows that have any nulls, one count per column - ( - iris - .check.nnulls() - ) + ```python + # Count the number of rows that have any nulls, one count per column + ( + iris + .check.nnulls() + ) - # Count the number of rows in the DataFrame that have a null in any column - ( - iris - .check.nnulls(by_column=False) - ) + # Count the number of rows in the DataFrame that have a null in any column + ( + iris + .check.nnulls(by_column=False) + ) + ``` Args: fn: An optional lambda function to apply to the DataFrame before counting the number of rows with a null. Example: `lambda df: df.shape[0]>10`. Applied before subset. @@ -1359,10 +1427,12 @@ def nrows( """Displays the number of rows in a DataFrame, without modifying the DataFrame itself. Example: - ( - iris - .check.nrows() - ) + ```python + ( + iris + .check.nrows() + ) + ``` Args: fn: An optional lambda function to apply to the DataFrame before counting the number of rows. Example: `lambda df: df.shape[0]>10`. Applied before subset. @@ -1390,13 +1460,15 @@ def nunique( ) -> pd.DataFrame: """Displays the number of unique rows in a single column, without modifying the DataFrame itself. - See Pandas docs for nunique() for additional usage information, including more configuration options you can pass to this Pandas Checks method. + See Pandas docs for [nunique()](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.nunique.html) for additional usage information, including more configuration options you can pass to this Pandas Checks method. Example: - ( - iris - .check.nunique(column="sepal_width") - ) + ```python + ( + iris + .check.nunique(column="sepal_width") + ) + ``` Args: column: The name of a column to count uniques in. Applied after fn. @@ -1429,14 +1501,15 @@ def plot( ) -> pd.DataFrame: """Displays a plot of the DataFrame, without modifying the DataFrame itself. - See Pandas docs for plot() for additional usage information, including more configuration options you can pass to this Pandas Checks method. - + See Pandas docs for [plot()](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.plot.html) for additional usage information, including more configuration options you can pass to this Pandas Checks method. Example: - ( - iris - .check.plot(kind="scatter", x="sepal_width", y="sepal_length", title="Sepal width vs sepal length") - ) + ```python + ( + iris + .check.plot(kind="scatter", x="sepal_width", y="sepal_length", title="Sepal width vs sepal length") + ) + ``` Args: fn: An optional lambda function to apply to the DataFrame before running Pandas plot(). Example: `lambda df: df.shape[0]>10`. Applied before subset. @@ -1473,19 +1546,21 @@ def print( """Displays text, another object, or (by default) the current DataFrame's head. Does not modify the DataFrame itself. Example: - # Print messages and milestones - ( - iris - .check.print("Starting data cleaning..."") - ... - ) + ```python + # Print messages and milestones + ( + iris + .check.print("Starting data cleaning..."") + ... + ) - # Inspect a DataFrame, such as the interim result of data processing - ( - iris - ... - .check.print(fn=lambda df: df.query("sepal_width<0"), check_name="Rows with negative sepal_width") - ) + # Inspect a DataFrame, such as the interim result of data processing + ( + iris + ... + .check.print(fn=lambda df: df.query("sepal_width<0"), check_name="Rows with negative sepal_width") + ) + ``` Args: object: Object to print. Can be anything printable: str, int, list, another DataFrame, etc. If None, print the DataFrame's head (with `max_rows` rows). @@ -1515,22 +1590,25 @@ def print_time_elapsed( """Displays the time elapsed since start_time. Example: - import pandas_checks as pdc + ```python - start_time = pdc.start_timer() + import pandas_checks as pdc - ( - iris - ... # Do some data processing - .check.print_time_elapsed(start_time, "Cleaning took") + start_time = pdc.start_timer() - ... # Do more - .check.print_time_elapsed(start_time, "Processing total time", units="seconds") # Force units to stay in seconds + ( + iris + ... # Do some data processing + .check.print_time_elapsed(start_time, "Cleaning took") - ) + ... # Do more + .check.print_time_elapsed(start_time, "Processing total time", units="seconds") # Force units to stay in seconds + + ) - # Result: "Cleaning took: 17.298324584960938 seconds - # "Processing total time: 71.0400543212890625 seconds + # Result: "Cleaning took: 17.298324584960938 seconds + # "Processing total time: 71.0400543212890625 seconds + ``` Args: start_time: The index time when the stopwatch started, which comes from the Pandas Checks start_timer() @@ -1552,15 +1630,17 @@ def reset_format(self) -> pd.DataFrame: """Globally restores all Pandas Checks formatting options to their default "factory" settings. Does not modify the DataFrame itself. Example: - ( - iris - .check.set_format(precision=9, use_emojis=False) + ```python + ( + iris + .check.set_format(precision=9, use_emojis=False) - # Print DF summary stats with precision 9 digits and no Pandas Checks emojis - .check.describe() + # Print DF summary stats with precision 9 digits and no Pandas Checks emojis + .check.describe() - .check.reset_format() # Go back to default precision and emojis πŸ₯³ - ) + .check.reset_format() # Go back to default precision and emojis πŸ₯³ + ) + ``` Returns: The original DataFrame, unchanged. @@ -1574,15 +1654,17 @@ def set_format(self, **kwargs: Any) -> pd.DataFrame: Run pandas_checks.describe_options() to see a list of available options. Example: - ( - iris - .check.set_format(precision=9, use_emojis=False) + ```python + ( + iris + .check.set_format(precision=9, use_emojis=False) - # Print DF summary stats with precision 9 digits and no Pandas Checks emojis - .check.describe() + # Print DF summary stats with precision 9 digits and no Pandas Checks emojis + .check.describe() - .check.reset_format() # Go back to default precision and emojis πŸ₯³ - ) + .check.reset_format() # Go back to default precision and emojis πŸ₯³ + ) + ``` Args: **kwargs: Pairs of setting name and its new value. @@ -1597,21 +1679,22 @@ def set_mode(self, enable_checks: bool, enable_asserts: bool) -> pd.DataFrame: """Configures the operation mode for Pandas Checks globally. Does not modify the DataFrame itself. Example: + ```python + + # Disable checks except keep running assertions. Same as using `.check.disable_checks()`: + ( + iris + .check.set_mode(enable_checks=False) + .check.describe() # This check will not be run + .check.assert_data(lambda s: s.shape[0]>10) # This check will still be run + ) - # Disable checks except keep running assertions - # Same as using .check.disable_checks() - ( - iris - .check.set_mode(enable_checks=False) - .check.describe() # This check will not be run - .check.assert_data(lambda s: s.shape[0]>10) # This check will still be run - ) - - # Disable checks and assertions - ( - iris - .check.set_mode(enable_checks=False, enable_asserts=False) - ) + # Disable checks _and_ assertions + ( + iris + .check.set_mode(enable_checks=False, enable_asserts=False) + ) + ``` Args: enable_checks: Whether to run any Pandas Checks methods globally. Does not affect .check.assert_*(). @@ -1631,14 +1714,14 @@ def shape( ) -> pd.DataFrame: """Displays the Dataframe's dimensions, without modifying the DataFrame itself. - See Pandas docs for shape for additional usage information. - Example: - ( - iris - .check.shape() - .check.shape(fn=lambda df: df.query("sepal_length<5"), check_name="Shape of DataFrame subgroup with sepal_length<5") - ) + ```python + ( + iris + .check.shape() + .check.shape(fn=lambda df: df.query("sepal_length<5"), check_name="Shape of DataFrame subgroup with sepal_length<5") + ) + ``` Args: fn: An optional lambda function to apply to the DataFrame before running Pandas `shape`. Example: `lambda df: df.shape[0]>10`. Applied before subset. @@ -1669,13 +1752,15 @@ def tail( ) -> pd.DataFrame: """Displays the last n rows of the DataFrame, without modifying the DataFrame itself. - See Pandas docs for tail() for additional usage information, including more configuration options you can pass to this Pandas Checks method. + See Pandas docs for [tail()](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.tail.html) for additional usage information, including more configuration options you can pass to this Pandas Checks method. Example: - ( - iris - .check.tail(10) - ) + ```python + ( + iris + .check.tail(10) + ) + ``` Args: n: Number of rows to show. @@ -1703,15 +1788,16 @@ def unique( ) -> pd.DataFrame: """Displays the unique values in a column, without modifying the DataFrame itself. - See Pandas docs for unique() for additional usage information, including more configuration options you can pass to this Pandas Checks method. + See Pandas docs for [unique()]((https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.unique.html)) for additional usage information, including more configuration options you can pass to this Pandas Checks method. Example: - ( - iris - .check.unique("species") - ) - # The check will print: - # 🌟 Unique values of species: ['setosa', 'versicolor', 'virginica'] + ```python + ( + iris + .check.unique("species") + ) + # The check will print: "🌟 Unique values of species: ['setosa', 'versicolor', 'virginica']" + ``` Args: column: Column to check for unique values. @@ -1746,13 +1832,15 @@ def value_counts( ) -> pd.DataFrame: """Displays the value counts for a column, without modifying the DataFrame itself. - See Pandas docs for value_counts() for additional usage information, including more configuration options you can pass to this Pandas Checks method. + See Pandas docs for [value_counts()]((https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.value_counts.html)) for additional usage information, including more configuration options you can pass to this Pandas Checks method. Example: - ( - iris - .check.value_counts("sepal_length") - ) + ```python + ( + iris + .check.value_counts("sepal_length") + ) + ``` Args: column: Column to check for value counts. @@ -1799,21 +1887,23 @@ def write( - .tsv # Tab-separated data file - .xlsx - This functions uses the corresponding Pandas export function such as to_csv(). See Pandas docs for those functions for additional usage information, including more configuration options you can pass to this Pandas Checks method. + This functions uses the corresponding Pandas export function, such as `to_csv()` and `to_feather()`. See [Pandas docs for those corresponding export functions][Pandas docs for those export functions](https://pandas.pydata.org/docs/reference/io.html) for additional usage information, including more configuration options you can pass to this Pandas Checks method. Example: - ( - iris + ```python + ( + iris - # Process data - ... + # Process data + ... - # Export the interim data for inspection - .check.write("iris_interim.xlsx") + # Export the interim data for inspection + .check.write("iris_interim.xlsx") - # Continue processing - ... - ) + # Continue processing + ... + ) + ``` Args: path: Path to write the file to. @@ -1821,7 +1911,7 @@ def write( fn: An optional lambda function to apply to the DataFrame before exporting. Example: `lambda df: df.shape[0]>10`. Applied before subset. subset: An optional list of column names or a string name of one column to limit which columns are exported. Applied after fn. verbose: Whether to print a message when the file is written. - **kwargs: Optional, additional keyword arguments to pass to the Pandas export function (.to_csv). + **kwargs: Optional, additional keyword arguments to pass to the Pandas export function (e.g. `.to_csv()`). Returns: The original DataFrame, unchanged. diff --git a/pandas_checks/SeriesChecks.py b/pandas_checks/SeriesChecks.py index 79374c1..0080bd3 100644 --- a/pandas_checks/SeriesChecks.py +++ b/pandas_checks/SeriesChecks.py @@ -58,6 +58,7 @@ def assert_all_nulls( """Tests whether Series has all nulls. Optionally raises an exception. Does not modify the Series itself. Example: + ```python ( iris ["sepal_length"] @@ -65,8 +66,9 @@ def assert_all_nulls( ) # Will raise an exception, "ㄨ Assert all nulls failed" + ``` - # See docs for .check.assert_data() for examples of how to customize assertions + See docs for `.check.assert_data()` for examples of how to customize assertions. Args: fail_message: Message to display if the condition fails. @@ -103,11 +105,12 @@ def assert_data( """Tests whether Series meets condition. Optionally raises an exception. Does not modify the Series itself. Example: - # Validate that the Series has at least 2 rows - + ```python ( iris ["sepal_length"] + + # Validate that a Series has at least 2 rows: .check.assert_data(lambda s: s.shape[0]>1) # Or customize the message displayed when alert fails @@ -116,6 +119,7 @@ def assert_data( # Or show a warning instead of raising an exception .check.assert_data(lambda df: s.shape[0]>1, "FYI Series has no rows", raise_exception=False) ) + ``` Args: condition: Assertion criteria in the form of a lambda function, such as `lambda s: s.shape[0]>10`. @@ -208,13 +212,15 @@ def assert_datetime( """Tests whether Series is datetime or timestamp. Optionally raises an exception. Does not modify the Series itself. Example: + ```python ( df ["datetime_col"] .check.assert_datetime() ) + ``` - # See docs for .check.assert_data() for examples of how to customize assertions + See docs for `.check.assert_data()` for examples of how to customize assertions. Args: fail_message: Message to display if the condition fails. If None, will report expected vs observed type. @@ -248,13 +254,15 @@ def assert_float( """Tests whether Series is floats. Optionally raises an exception. Does not modify the Series itself. Example: + ```python ( df ["float_col"] .check.assert_float() ) + ``` - # See docs for .check.assert_data() for examples of how to customize assertions + See docs for `.check.assert_data()` for examples of how to customize assertions. Args: fail_message: Message to display if the condition fails. If None, will report expected vs observed type. @@ -290,14 +298,16 @@ def assert_greater_than( """Tests whether Series is > or >= a minimum threshold. Optionally raises an exception. Does not modify the Series itself. Example: + ```python ( iris ["sepal_length"] # Validate that the Series is always >= 0 .check.assert_greater_than(0, or_equal_to=True) ) + ``` - # See docs for .check.assert_data() for examples of how to customize assertions + See docs for `.check.assert_data()` for examples of how to customize assertions. Args: min: the minimum value to compare Series to. Accepts any type that can be used in >, such as int, float, str, datetime @@ -338,13 +348,15 @@ def assert_int( """Tests whether Series is integers. Optionally raises an exception. Does not modify the Series itself. Example: + ```python ( df ["int_col"] .check.assert_int() ) + ``` - # See docs for .check.assert_data() for examples of how to customize assertions + See docs for `.check.assert_data()` for examples of how to customize assertions. Args: fail_message: Message to display if the condition fails. If None, will report expected vs observed type. @@ -380,6 +392,7 @@ def assert_less_than( """Tests whether all values in Series are < or <= a maximum threshold. Optionally raises an exception. Does not modify the Series itself. Example: + ```python ( iris ["sepal_length"] @@ -390,8 +403,9 @@ def assert_less_than( # Validate that it's always <= 1000 .check.assert_less_than(1000, or_equal_to=True) ) + ``` - # See docs for .check.assert_data() for examples of how to customize assertions + See docs for `.check.assert_data()` for examples of how to customize assertions. Args: max: the max value to compare Series to. Accepts any type that can be used in <, such as int, float, str, datetime @@ -433,13 +447,15 @@ def assert_negative( """Tests whether Series has all negative values. Optionally raises an exception. Does not modify the Series itself. Example: + ```python ( df ["column_name"] .check.assert_negative() ) + ``` - # See docs for .check.assert_data() for examples of how to customize assertions + See docs for `.check.assert_data()` for examples of how to customize assertions. Args: fail_message: Message to display if the condition fails. @@ -485,12 +501,14 @@ def assert_no_nulls( """Tests whether Series has no nulls. Optionally raises an exception. Does not modify the Series itself. Example: + ```python ( ["sepal_length"] .check.assert_no_nulls() ) + ``` - # See docs for .check.assert_data() for examples of how to customize assertions + See docs for `.check.assert_data()` for examples of how to customize assertions. Args: fail_message: Message to display if the condition fails. @@ -526,13 +544,15 @@ def assert_nrows( """Tests whether Series has a given number of rows. Optionally raises an exception. Does not modify the Series itself. Example: + ```python ( iris ["species"] .check.assert_nrows(20) ) + ``` - # See docs for .check.assert_data() for examples of how to customize assertions + See docs for `.check.assert_data()` for examples of how to customize assertions. Args: nrows: The expected number of rows @@ -569,13 +589,15 @@ def assert_positive( """Tests whether Series has all positive values. Optionally raises an exception. Does not modify the Series itself. Example: + ```python ( iris ["sepal_length"] .check.assert_positive() ) + ``` - # See docs for .check.assert_data() for examples of how to customize assertions + See docs for `.check.assert_data()` for examples of how to customize assertions. Args: fail_message: Message to display if the condition fails. @@ -623,19 +645,20 @@ def assert_same_nrows( Optionally raises an exception. Does not modify the Series itself. Example: + ```python ( df1 ["column"] .check.assert_same_nrows(df2) ) + ``` - # See docs for .check.assert_data() for examples of how to customize assertions + See docs for `.check.assert_data()` for examples of how to customize assertions. Args: other: The DataFrame or Series that we expect to have the same # of rows as fail_message: Message to display if the condition fails. pass_message: Message to display if the condition passes. - subset: Optional, which column or columns to check the condition against. raise_exception: Whether to raise an exception if the condition fails. exception_to_raise: The exception to raise if the condition fails and raise_exception is True. verbose: Whether to display the pass message if the condition passes. @@ -666,13 +689,15 @@ def assert_str( """Tests whether Series is strings. Optionally raises an exception. Does not modify the Series itself. Example: + ```python ( iris ["species"] .check.assert_str() ) + ``` - # See docs for .check.assert_data() for examples of how to customize assertions + See docs for `.check.assert_data()` for examples of how to customize assertions. Args: fail_message: Message to display if the condition fails. If None, will report expected vs observed type. @@ -706,12 +731,14 @@ def assert_timedelta( """Tests whether Series is of type timedelta. Optionally raises an exception. Does not modify the Series itself. Example: + ```python ( df .check.assert_timedelta(subset=["timedelta_col"]) ) + ``` - # See docs for .check.assert_data() for examples of how to customize assertions + See docs for `.check.assert_data()` for examples of how to customize assertions. Args: fail_message: Message to display if the condition fails. If None, will report expected vs observed type. @@ -746,14 +773,16 @@ def assert_type( """Tests whether Series meets type assumption. Optionally raises an exception. Does not modify the Series itself. Example: - # Validate that a column of mixed types has overall type `object` + ```python + # Validate that a column of mixed types has overall type `object`: ( iris ["column_with_mixed_types"] .check.assert_type(object) ) + ``` - # See docs for .check.assert_data() for examples of how to customize assertions + See docs for `.check.assert_data()` for examples of how to customize assertions. Args: dtype: The required variable type @@ -797,13 +826,15 @@ def assert_unique( """Validates that a Series has no duplicate values. Optionally raises an exception. Does not modify the Series itself. Example: + ```python ( df ["id_column"] .check.assert_unique() ) + ``` - # See docs for .check.assert_data() for examples of how to customize assertions + See docs for `.check.assert_data()` for examples of how to customize assertions. Args: fail_message: Message to display if the condition fails. @@ -835,14 +866,16 @@ def describe( ) -> pd.Series: """Displays descriptive statistics about a Series, without modifying the Series itself. - See Pandas docs for describe() for additional usage information, including more configuration options you can pass to this Pandas Checks method. + See Pandas docs for [describe()](https://pandas.pydata.org/docs/reference/api/pandas.Series.describe.html) for additional usage information, including more configuration options you can pass to this Pandas Checks method. Example: + ```python ( iris ["sepal_length"] .check.describe() ) + ``` Args: fn: An optional lambda function to apply to the Series before running Pandas describe(). Example: `lambda s: s.dropna()`. @@ -861,6 +894,7 @@ def disable_checks(self, enable_asserts: bool = True) -> pd.Series: """Turns off Pandas Checks globally, such as in production mode. Calls to .check functions will not be run. Does not modify the Series itself. Example: + ```python ( iris ["sepal_length"] @@ -868,6 +902,7 @@ def disable_checks(self, enable_asserts: bool = True) -> pd.Series: .check.assert_data(lambda s: s.shape[0]>10) # This check will NOT be run .check.enable_checks() # Subsequent calls to .check will be run ) + ``` Args enable_assert: Optionally, whether to also enable or disable assert statements @@ -885,14 +920,14 @@ def dtype( ) -> pd.Series: """Displays the data type of a Series, without modifying the Series itself. - See Pandas docs for .dtype for additional usage information. - Example: + ```python ( iris ["sepal_length"] .check.dtype() ) + ``` Args: fn: An optional lambda function to apply to the Series before running Pandas dtype. Example: `lambda s: s.dropna()`. @@ -913,6 +948,7 @@ def enable_checks(self, enable_asserts: bool = True) -> pd.Series: """Globally enables Pandas Checks. Subequent calls to .check methods will be run. Does not modify the Series itself. Example: + ```python ( iris ["sepal_length"] @@ -920,6 +956,7 @@ def enable_checks(self, enable_asserts: bool = True) -> pd.Series: .check.assert_data(lambda s: s.shape[0]>10) # This check will NOT be run .check.enable_checks() # Subsequent calls to .check will be run ) + ``` Args: enable_asserts: Optionally, whether to globally enable or disable calls to .check.assert_data(). @@ -938,9 +975,13 @@ def function( """Applies an arbitrary function on a Series and shows the result, without modifying the Series itself. Example: - .check.function(fn=lambda s: s.shape[0]>10, check_name='Has at least 10 rows?') - - # Will return either 'True' or 'False' + ```python + ( + iris + .check.function(fn=lambda s: s.shape[0]>10, check_name='Has at least 10 rows?') + ) + # Will return "True" + ``` Args: fn: The lambda function to apply to the Series. Example: `lambda s: s.dropna()`. @@ -958,15 +999,15 @@ def get_mode( """Displays the current values of Pandas Checks global options enable_checks and enable_asserts. Does not modify the Series itself. Example: + ```python ( iris ["sepal_length"] .check.get_mode() ) - # The check will print: - # "🐼🩺 Pandas Checks mode: {'enable_checks': True, 'enable_asserts': True}" - + # The check will print: "🐼🩺 Pandas Checks mode: {'enable_checks': True, 'enable_asserts': True}" + ``` Args: check_name: An optional name for the check. Will be used as a preface the printed result. @@ -985,14 +1026,16 @@ def head( ) -> pd.Series: """Displays the first n rows of a Series, without modifying the Series itself. - See Pandas docs for head() for additional usage information. + See Pandas docs for [head()](https://pandas.pydata.org/docs/reference/api/pandas.Series.head.html) for additional usage information. Example: + ```python ( iris ["sepal_length"] .check.head(10) ) + ``` Args: n: The number of rows to display. @@ -1015,14 +1058,16 @@ def hist( ) -> pd.Series: """Displays a histogram for the Series's distribution, without modifying the Series itself. - See Pandas docs for hist() for additional usage information, including more configuration options you can pass to this Pandas Checks method. + See Pandas docs for [hist()](https://pandas.pydata.org/docs/reference/api/pandas.Series.hist.html) for additional usage information, including more configuration options you can pass to this Pandas Checks method. Example: + ```python ( iris ["sepal_length"] .check.hist() ) + ``` Args: fn: An optional lambda function to apply to the Series before running Pandas head(). Example: `lambda s: s.dropna()`. @@ -1048,14 +1093,16 @@ def info( ) -> pd.Series: """Displays summary information about a Series, without modifying the Series itself. - See Pandas docs for info() for additional usage information, including more configuration options you can pass to this Pandas Checks method. + See Pandas docs for [info()](https://pandas.pydata.org/docs/reference/api/pandas.Series.info.html) for additional usage information, including more configuration options you can pass to this Pandas Checks method. Example: + ```python ( iris ["sepal_length"] .check.info() ) + ``` Args: fn: An optional lambda function to apply to the Series before running Pandas info(). Example: `lambda s: s.dropna()`. @@ -1079,14 +1126,16 @@ def memory_usage( ) -> pd.Series: """Displays the memory footprint of a Series, without modifying the Series itself. - See Pandas docs for memory_usage() for additional usage information, including more configuration options you can pass to this Pandas Checks method. + See Pandas docs for [memory_usage()](https://pandas.pydata.org/docs/reference/api/pandas.Series.memory_usage.html) for additional usage information, including more configuration options you can pass to this Pandas Checks method. Example: + ```python ( iris ["sepal_length"] .check.memory_usage() ) + ``` Args: fn: An optional lambda function to apply to the Series before running Pandas memory_usage(). Example: `lambda s: s.dropna()`. @@ -1112,14 +1161,16 @@ def ndups( ) -> pd.Series: """Displays the number of duplicated rows in the Series, without modifying the Series itself. - See Pandas docs for duplicated() for additional usage information, including more configuration options you can pass to this Pandas Checks method. + See Pandas docs for [duplicated()](https://pandas.pydata.org/docs/reference/api/pandas.Series.duplicated.html) for additional usage information, including more configuration options (the `keep` argument) you can pass to this Pandas Checks method. Example: + ```python ( iris ["sepal_length"] .check.ndups() ) + ``` Args: fn: An optional lambda function to apply to the Series before counting the number of duplicates. Example: `lambda s: s.dropna()`. @@ -1141,14 +1192,16 @@ def nnulls( ) -> pd.Series: """Displays the number of rows with null values in the Series, without modifying the Series itself. - See Pandas docs for isna() for additional usage information. + See Pandas docs for [isna()](https://pandas.pydata.org/docs/reference/api/pandas.Series.isna.html) for additional usage information. Example: + ```python ( iris ["sepal_length"] .check.nnulls() ) + ``` Args: fn: An optional lambda function to apply to the Series before counting rows with nulls. Example: `lambda s: s.dropna()`. @@ -1170,11 +1223,13 @@ def nrows( """Displays the number of rows in a Series, without modifying the Series itself. Example: + ```python ( iris ["sepal_width"] .check.nrows() ) + ``` Args: fn: An optional lambda function to apply to the Series before counting the number of rows. Example: `lambda s: s.dropna()`. @@ -1196,14 +1251,16 @@ def nunique( ) -> pd.Series: """Displays the number of unique rows in a Series, without modifying the Series itself. - See Pandas docs for nunique() for additional usage information, including more configuration options you can pass to this Pandas Checks method. + See Pandas docs for [nunique()](https://pandas.pydata.org/docs/reference/api/pandas.Series.nunique.html) for additional usage information, including more configuration options (the `dropna` argument) you can pass to this Pandas Checks method. Example: + ```python ( iris ["sepal_width"] .check.nunique() ) + ``` Args: fn: An optional lambda function to apply to the Series before running Pandas nunique(). Example: `lambda s: s.dropna()`. @@ -1231,16 +1288,17 @@ def plot( ) -> pd.Series: """Displays a plot of the Series, without modifying the Series itself. - See Pandas docs for plot() for additional usage information, including more configuration options you can pass to this Pandas Checks method. + See Pandas docs for [plot()](https://pandas.pydata.org/docs/reference/api/pandas.Series.plot.html) for additional usage information, including more configuration options you can pass to this Pandas Checks method. Example: - # Show a box plot of the Series distribution + ```python + # Visualize the distribution of a Series with a box plot: ( iris ["sepal_width"] .check.plot(kind="box", title="Distribution of sepal width") ) - + ``` Args: fn: An optional lambda function to apply to the Series before running Pandas plot(). Example: `lambda s: s.dropna()`. @@ -1270,20 +1328,19 @@ def print( """Displays text, another object, or (by default) the current DataFrame's head. Does not modify the Series itself. Example: - # Print messages and milestones + ```python ( iris ["sepal_width"] + + # Print messages such as milestones .check.print("Starting data cleaning..."") ... - ) - # Inspect a Series, such as the interim result of data processing - ( - iris - ... - .check.print(fn=lambda s: s[s<0], check_name="Negative values of sepal_width") + # Inspect a Series, such as the interim result of data processing + .check.print(fn=lambda s: s[s<0], check_name="Negative values of sepal_width") # Will print those values if they exist ) + ``` Args: object: Object to print. Can be anything printable: str, int, list, another DataFrame, etc. If None, print the Series's head (with `max_rows` rows). @@ -1308,6 +1365,7 @@ def print_time_elapsed( """Displays the time elapsed since start_time. Example: + ```python import pandas_checks as pdc start_time = pdc.start_timer() @@ -1325,6 +1383,7 @@ def print_time_elapsed( # Result: "Cleaning took: 17.298324584960938 seconds # "Processing total time: 71.0400543212890625 seconds + ``` Args: start_time: The index time when the stopwatch started, which comes from the Pandas Checks start_timer() @@ -1346,6 +1405,7 @@ def reset_format(self) -> pd.Series: """Globally restores all Pandas Checks formatting options to their default "factory" settings. Does not modify the Series itself. Example: + ```python ( iris ["sepal_width"] @@ -1356,6 +1416,7 @@ def reset_format(self) -> pd.Series: .check.reset_format() # Go back to default precision and emojis πŸ₯³ ) + ``` Returns: The original Series, unchanged. @@ -1371,6 +1432,7 @@ def set_format(self, **kwargs: Any) -> pd.Series: See .check.reset_format() to restore defaults. Example: + ```python ( iris ["sepal_width"] @@ -1381,6 +1443,7 @@ def set_format(self, **kwargs: Any) -> pd.Series: .check.reset_format() # Go back to default precision and emojis πŸ₯³ ) + ``` Args: **kwargs: Pairs of setting name and its new value. @@ -1395,23 +1458,20 @@ def set_mode(self, enable_checks: bool, enable_asserts: bool) -> pd.Series: """Configures the operation mode for Pandas Checks globally. Does not modify the Series itself. Example: - - # Disable checks except keep running assertions - # Same as using .check.disable_checks() + ```python ( iris ["sepal_width"] + + # Disable checks except keep running assertions. Same as using `.check.disable_checks()`: .check.set_mode(enable_checks=False) .check.describe() # This check will not be run .check.assert_data(lambda s: s.shape[0]>10) # This check will still be run - ) - # Disable checks and assertions - ( - iris - ["sepal_width"] + # Disable checks _and_ assertions .check.set_mode(enable_checks=False, enable_asserts=False) ) + ``` Args: enable_checks: Whether to run any Pandas Checks methods globally. Does not affect .check.assert_*() calls. @@ -1430,15 +1490,15 @@ def shape( ) -> pd.Series: """Displays the Series's dimensions, without modifying the Series itself. - See Pandas docs for `shape` for additional usage information. - Example: + ```python ( iris ["sepal_width"] .check.shape() .check.shape(fn=lambda s: s[s<5]), check_name="Shape of sepal_width series with values <5") ) + ``` Args: fn: An optional lambda function to apply to the Series before running Pandas `shape`. Example: `lambda s: s.dropna()`. @@ -1467,13 +1527,15 @@ def tail( ) -> pd.Series: """Displays the last n rows of the Series, without modifying the Series itself. - See Pandas docs for tail() for additional usage information. + See Pandas docs for [tail()](https://pandas.pydata.org/docs/reference/api/pandas.Series.tail.html) for additional usage information. Example: + ```python ( iris .check.tail(10) ) + ``` Args: n: Number of rows to show. @@ -1495,16 +1557,17 @@ def unique( ) -> pd.Series: """Displays the unique values in a Series, without modifying the Series itself. - See Pandas docs for unique() for additional usage information. + See Pandas docs for [unique()](https://pandas.pydata.org/docs/reference/api/pandas.Series.unique.html) for additional usage information. Example: + ```python ( iris ["species"] .check.unique() ) - # The check will print: - # 🌟 Unique values of species: ['setosa', 'versicolor', 'virginica'] + # The check will print: "🌟 Unique values of species: ['setosa', 'versicolor', 'virginica']" + ``` Args: fn: An optional lambda function to apply to the Series before running Pandas unique(). Example: `lambda s: s.dropna()`. @@ -1532,14 +1595,16 @@ def value_counts( ) -> pd.Series: """Displays the value counts for a Series, without modifying the Series itself. - See Pandas docs for value_counts() for additional usage information, including more configuration options you can pass to this Pandas Checks method. + See Pandas docs for [value_counts()](https://pandas.pydata.org/docs/reference/api/pandas.Series.value_counts.html) for additional usage information, including more configuration options you can pass to this Pandas Checks method. Example: + ```python ( iris ["sepal_length"] .check.value_counts() ) + ``` Args: max_rows: Maximum number of rows to show in the value counts. @@ -1580,12 +1645,13 @@ def write( - .tsv # Tab-separated data file - .xlsx - This functions uses the corresponding Pandas export function such as to_csv(). See Pandas docs for those functions for additional usage information, including more configuration options you can pass to this Pandas Checks method. + This functions uses the corresponding Pandas export function such as to_csv() and to_feather(). See [Pandas docs for those corresponding export functions](https://pandas.pydata.org/docs/reference/io.html) for additional usage information, including more configuration options you can pass to this Pandas Checks method. Note: Exporting to some formats such as Excel, Feather, and Parquet may require you to install additional packages. Example: + ```python ( iris ["sepal_length"] @@ -1599,13 +1665,14 @@ def write( # Continue processing ... ) + ``` Args: path: Path to write the file to. format: Optional file format to force for the export. If None, format is inferred from the file's extension in `path`. fn: An optional lambda function to apply to the Series before exporting. Example: `lambda s: s.dropna()`. verbose: Whether to print a message when the file is written. - **kwargs: Optional, additional keyword arguments to pass to the Pandas export function (.to_csv). + **kwargs: Optional, additional keyword arguments to pass to the Pandas export function (e.g. `.to_csv()`). Returns: The original Series, unchanged. diff --git a/pyproject.toml b/pyproject.toml index a128d7e..064d938 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "pandas-checks" -version = "0.3.0" +version = "0.3.1" description = "Non-invasive health checks for Pandas method chains" authors = ["Chad Parmet "] readme = "README.md"