1
1
import copy
2
2
import pickle
3
- from typing import List
3
+ from typing import List , Union
4
4
5
5
import numpy as np
6
6
import pandas as pd
@@ -42,7 +42,7 @@ class DatasetProfiler:
42
42
43
43
def __init__ (
44
44
self ,
45
- df_path : str ,
45
+ df_path : Union [ str , pd . DataFram ] ,
46
46
primary_key : str = None ,
47
47
sequence_index : str = None ,
48
48
target : str = None ,
@@ -60,8 +60,8 @@ def __init__(
60
60
61
61
Parameters
62
62
----------
63
- df_path : str
64
- path of the CSV file to be profiled.
63
+ df_path : Union[ str, pd.DataFrame]
64
+ path of the CSV file to be profiled or directly a pandas dataframe .
65
65
primary_key : str, optional
66
66
name of the column defining the CSV primary key (composite).
67
67
sequence_index : str, optional
@@ -73,14 +73,19 @@ def __init__(
73
73
Description of `param3`.
74
74
75
75
"""
76
- df = pd .read_csv (
77
- df_path ,
78
- compression = compression ,
79
- sep = separator ,
80
- decimal = decimals ,
81
- thousands = thousands ,
82
- encoding = encoding ,
83
- )
76
+ if isinstance (df_path , str ):
77
+ df = pd .read_csv (
78
+ df_path ,
79
+ compression = compression ,
80
+ sep = separator ,
81
+ decimal = decimals ,
82
+ thousands = thousands ,
83
+ encoding = encoding ,
84
+ )
85
+ self .path = df_path
86
+ elif isinstance (df_path , pd .DataFrame ):
87
+ df = df_path
88
+ self .path = None
84
89
85
90
self .io_meta = {
86
91
"compression" : compression ,
@@ -90,8 +95,6 @@ def __init__(
90
95
"encoding" : encoding ,
91
96
}
92
97
93
- self .path = df_path
94
-
95
98
# Assigns primary key and makes sure key does not contain missing values
96
99
if primary_key is not None :
97
100
if type (primary_key ) != list :
@@ -626,7 +629,9 @@ def generate_expectations(self, docs=True, suite_name=None):
626
629
import great_expectations as ge
627
630
628
631
if suite_name is None :
629
- suite_name = self .path .split ("/" )[- 1 ]
632
+ suite_name = (
633
+ self .path .split ("/" )[- 1 ] if self .path else "dataset_profiler_results"
634
+ )
630
635
data_context = ge .data_context .DataContext ()
631
636
suite = data_context .create_expectation_suite (
632
637
suite_name ,
0 commit comments