-
Notifications
You must be signed in to change notification settings - Fork 233
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
d0437b7
commit 5163cec
Showing
41 changed files
with
1,091 additions
and
40 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
119 changes: 119 additions & 0 deletions
119
mlops/presenters/pipelines/sklearn_training/dashboard/block_layout.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
blocks: | ||
completed_pipeline_runs_daily_sklearn_training_a3: | ||
configuration: | ||
chart_type: time series line chart | ||
group_by: | ||
- execution_date | ||
metrics: | ||
- aggregation: count_distinct | ||
column: id | ||
time_interval: day | ||
data_source: | ||
pipeline_uuid: sklearn_training | ||
type: pipeline_runs | ||
name: Completed pipeline runs daily | ||
type: chart | ||
uuid: completed_pipeline_runs_daily_sklearn_training_a3 | ||
failed_pipeline_runs_daily_sklearn_training_o5: | ||
configuration: | ||
chart_type: time series line chart | ||
group_by: | ||
- execution_date | ||
metrics: | ||
- aggregation: count_distinct | ||
column: id | ||
time_interval: day | ||
data_source: | ||
pipeline_uuid: sklearn_training | ||
type: pipeline_runs | ||
name: Failed pipeline runs daily | ||
type: chart | ||
uuid: failed_pipeline_runs_daily_sklearn_training_o5 | ||
pipeline_run_status_sklearn_training_n3: | ||
configuration: | ||
chart_style: horizontal | ||
chart_type: bar chart | ||
group_by: | ||
- status | ||
metrics: | ||
- aggregation: count_distinct | ||
column: id | ||
y_sort_order: descending | ||
data_source: | ||
pipeline_uuid: sklearn_training | ||
type: pipeline_runs | ||
name: Pipeline run status | ||
type: chart | ||
uuid: pipeline_run_status_sklearn_training_n3 | ||
pipeline_runs_daily_sklearn_training_p1: | ||
configuration: | ||
chart_type: time series line chart | ||
group_by: | ||
- execution_date | ||
metrics: | ||
- aggregation: count_distinct | ||
column: id | ||
time_interval: day | ||
data_source: | ||
pipeline_uuid: sklearn_training | ||
type: pipeline_runs | ||
name: Pipeline runs daily | ||
type: chart | ||
uuid: pipeline_runs_daily_sklearn_training_p1 | ||
trigger_active_status_sklearn_training_r5: | ||
configuration: | ||
chart_type: bar chart | ||
group_by: | ||
- status | ||
metrics: | ||
- aggregation: count_distinct | ||
column: id | ||
y_sort_order: descending | ||
data_source: | ||
pipeline_uuid: sklearn_training | ||
type: pipeline_schedules | ||
name: Trigger active status | ||
type: chart | ||
uuid: trigger_active_status_sklearn_training_r5 | ||
trigger_frequency_sklearn_training_o8: | ||
configuration: | ||
chart_style: horizontal | ||
chart_type: bar chart | ||
group_by: | ||
- schedule_interval | ||
metrics: | ||
- aggregation: count_distinct | ||
column: id | ||
y_sort_order: descending | ||
data_source: | ||
pipeline_uuid: sklearn_training | ||
type: pipeline_schedules | ||
name: Trigger frequency | ||
type: chart | ||
uuid: trigger_frequency_sklearn_training_o8 | ||
trigger_types_sklearn_training_s4: | ||
configuration: | ||
chart_type: pie chart | ||
group_by: | ||
- schedule_type | ||
data_source: | ||
pipeline_uuid: sklearn_training | ||
type: pipeline_schedules | ||
name: Trigger types | ||
type: chart | ||
uuid: trigger_types_sklearn_training_s4 | ||
layout: | ||
- - block_uuid: trigger_active_status_sklearn_training_r5 | ||
width: 1 | ||
- block_uuid: trigger_types_sklearn_training_s4 | ||
width: 1 | ||
- block_uuid: trigger_frequency_sklearn_training_o8 | ||
width: 2 | ||
- - block_uuid: pipeline_run_status_sklearn_training_n3 | ||
width: 1 | ||
- block_uuid: pipeline_runs_daily_sklearn_training_p1 | ||
width: 2 | ||
- - block_uuid: completed_pipeline_runs_daily_sklearn_training_a3 | ||
width: 1 | ||
- block_uuid: failed_pipeline_runs_daily_sklearn_training_o5 | ||
width: 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
4 changes: 4 additions & 0 deletions
4
mlops/unit_3_observability/charts/completed_pipeline_runs_daily_sklearn_training_a3.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
|
||
@data_source | ||
def d(df): | ||
return df[df['status'] == 'completed'] |
4 changes: 4 additions & 0 deletions
4
mlops/unit_3_observability/charts/failed_pipeline_runs_daily_sklearn_training_o5.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
|
||
@data_source | ||
def d(df): | ||
return df[df['status'] == 'failed'] |
61 changes: 61 additions & 0 deletions
61
mlops/unit_3_observability/charts/feature_profiles_for_ingest.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
import statistics | ||
from mage_ai.data_cleaner.column_types.column_type_detector import infer_column_types | ||
from mage_ai.data_preparation.models.constants import DATAFRAME_ANALYSIS_MAX_COLUMNS | ||
from mage_ai.shared.parsers import convert_matrix_to_dataframe | ||
|
||
|
||
df_1 = convert_matrix_to_dataframe(df_1) | ||
df_1 = df_1.iloc[:, :DATAFRAME_ANALYSIS_MAX_COLUMNS] | ||
columns_and_types = infer_column_types(df_1).items() | ||
columns = [t[0] for t in columns_and_types] | ||
stats = ['Type', 'Missing values', 'Unique values', 'Min', 'Max', 'Mean', 'Median', 'Mode'] | ||
rows = [[] for _ in stats] | ||
|
||
for col, col_type in columns_and_types: | ||
series = df_1[col] | ||
|
||
min_value = None | ||
max_value = None | ||
mean = None | ||
median = None | ||
|
||
not_null = series[series.notnull()] | ||
|
||
if len(not_null) == 0: | ||
continue | ||
|
||
if col_type.value in ['number', 'number_with_decimals']: | ||
if str(series.dtype) == 'object': | ||
if col_type.value == 'number_with_decimals': | ||
series = series.astype('float64') | ||
not_null = not_null.astype('float64') | ||
else: | ||
series = series.astype('int64') | ||
not_null = not_null.astype('int64') | ||
|
||
count = len(not_null.index) | ||
if count >= 1: | ||
mean = round(not_null.sum() / count, 2) | ||
median = sorted(not_null)[int(count / 2)] | ||
min_value = round(series.min(), 2) | ||
max_value = round(series.max(), 2) | ||
else: | ||
min_value = not_null.astype(str).min() | ||
max_value = not_null.astype(str).max() | ||
|
||
_, mode = sorted( | ||
[(v, k) for k, v in not_null.value_counts().items()], | ||
reverse=True, | ||
)[0] | ||
|
||
for idx, value in enumerate([ | ||
col_type.value, | ||
len(series[series.isna()].index), | ||
len(series.unique()), | ||
min_value, | ||
max_value, | ||
mean, | ||
median, | ||
mode, | ||
]): | ||
rows[idx].append(value) |
Empty file.
8 changes: 8 additions & 0 deletions
8
mlops/unit_3_observability/charts/missing_values_for_ingest.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
number_of_rows = len(df_1.index) | ||
columns_with_mising_values = [] | ||
percentage_of_missing_values = [] | ||
for col in df_1.columns: | ||
missing = df_1[col].isna().sum() | ||
if missing > 0: | ||
columns_with_mising_values.append(col) | ||
percentage_of_missing_values.append(100 * missing / number_of_rows) |
21 changes: 21 additions & 0 deletions
21
mlops/unit_3_observability/charts/most_frequent_values_for_ingest.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
from mage_ai.data_preparation.models.constants import DATAFRAME_ANALYSIS_MAX_COLUMNS | ||
from mage_ai.shared.parsers import convert_matrix_to_dataframe | ||
|
||
|
||
df_1 = convert_matrix_to_dataframe(df_1) | ||
columns = ['mode value', 'frequency', '% of values'] | ||
column_index = [] | ||
rows = [] | ||
for col in df_1.columns[:DATAFRAME_ANALYSIS_MAX_COLUMNS]: | ||
value_counts = df_1[col].value_counts() | ||
if len(value_counts.index) == 0: | ||
continue | ||
column_value = value_counts.index[0] | ||
value = value_counts[column_value] | ||
number_of_rows = df_1[col].count() | ||
column_index.append(col) | ||
rows.append([ | ||
column_value, | ||
f'{round(100 * value / number_of_rows, 2)}%', | ||
value, | ||
]) |
Oops, something went wrong.