Skip to content

Commit 911ab1a

Browse files
committed
Update df_path parameter so that it can be both a path to a csv and a dataframe.
1 parent 736a7d3 commit 911ab1a

File tree

3 files changed

+22
-17
lines changed

3 files changed

+22
-17
lines changed

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "structured-profiling"
3-
version = "0.3.10"
3+
version = "0.3.11"
44
description = "A Python library to check for data quality and automatically generate data tests. "
55
authors = ["Clearbox AI <[email protected]>"]
66
license = "GPL"

structured_data_profiling/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.3.10"
1+
__version__ = "0.3.11"

structured_data_profiling/profiler/profiler.py

+20-15
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import copy
22
import pickle
3-
from typing import List
3+
from typing import List, Union
44

55
import numpy as np
66
import pandas as pd
@@ -42,7 +42,7 @@ class DatasetProfiler:
4242

4343
def __init__(
4444
self,
45-
df_path: str,
45+
df_path: Union[str, pd.DataFram],
4646
primary_key: str = None,
4747
sequence_index: str = None,
4848
target: str = None,
@@ -60,8 +60,8 @@ def __init__(
6060
6161
Parameters
6262
----------
63-
df_path : str
64-
path of the CSV file to be profiled.
63+
df_path : Union[str, pd.DataFrame]
64+
path of the CSV file to be profiled or directly a pandas dataframe.
6565
primary_key : str, optional
6666
name of the column defining the CSV primary key (composite).
6767
sequence_index : str, optional
@@ -73,14 +73,19 @@ def __init__(
7373
Description of `param3`.
7474
7575
"""
76-
df = pd.read_csv(
77-
df_path,
78-
compression=compression,
79-
sep=separator,
80-
decimal=decimals,
81-
thousands=thousands,
82-
encoding=encoding,
83-
)
76+
if isinstance(df_path, str):
77+
df = pd.read_csv(
78+
df_path,
79+
compression=compression,
80+
sep=separator,
81+
decimal=decimals,
82+
thousands=thousands,
83+
encoding=encoding,
84+
)
85+
self.path = df_path
86+
elif isinstance(df_path, pd.DataFrame):
87+
df = df_path
88+
self.path = None
8489

8590
self.io_meta = {
8691
"compression": compression,
@@ -90,8 +95,6 @@ def __init__(
9095
"encoding": encoding,
9196
}
9297

93-
self.path = df_path
94-
9598
# Assigns primary key and makes sure key does not contain missing values
9699
if primary_key is not None:
97100
if type(primary_key) != list:
@@ -626,7 +629,9 @@ def generate_expectations(self, docs=True, suite_name=None):
626629
import great_expectations as ge
627630

628631
if suite_name is None:
629-
suite_name = self.path.split("/")[-1]
632+
suite_name = (
633+
self.path.split("/")[-1] if self.path else "dataset_profiler_results"
634+
)
630635
data_context = ge.data_context.DataContext()
631636
suite = data_context.create_expectation_suite(
632637
suite_name,

0 commit comments

Comments
 (0)