From 8df703e6ef7dddab99b39e285e4144a876774c4c Mon Sep 17 00:00:00 2001 From: Stuart Quin Date: Fri, 16 Sep 2022 09:39:34 +0100 Subject: [PATCH 1/2] Fix bug with regression type checking --- dataqa/infer_schema.py | 16 +++++++++------- pyproject.toml | 2 +- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/dataqa/infer_schema.py b/dataqa/infer_schema.py index 49bbbb5..1c3769b 100644 --- a/dataqa/infer_schema.py +++ b/dataqa/infer_schema.py @@ -1,5 +1,5 @@ from collections import defaultdict -from typing import Any, Dict, Optional, Union, List +from typing import Any, Dict, Optional, Tuple, Union, List import numpy as np import pandas as pd @@ -211,7 +211,7 @@ def is_subset(list1: List[Any], list2: List[Any]) -> bool: def check_prediction_columns( column_mapping: ColumnMapping, column_to_categories: Dict[str, List[Union[str, np.number]]], -) -> dict: +) -> Dict: schema_dict = dict( (column, {"type": ColumnType.CATEGORICAL}) for column in column_mapping.categorical_columns @@ -279,8 +279,10 @@ def check_prediction_columns( ) if task == PredictionTask.REGRESSION: - if schema_dict[prediction_column] != ColumnType.NUMERICAL: - raise Exception(f"Regression tasks only valid with numerical columns.") + if schema_dict[prediction_column]["type"] != ColumnType.NUMERICAL: + raise Exception( + f"Regression tasks only valid with numerical columns {prediction_column}" + ) if task == PredictionTask.CLASSIFICATION: if not schema_dict[prediction_column]["type"] in [ @@ -299,7 +301,7 @@ def format_validated_schema( schema_dict: dict, prediction_columns: List[PredictionColumn], column_to_categories: Dict[str, List[Union[str, np.number]]], -) -> dict: +) -> List: new_schema = [] prediction_columns_dict = { column.prediction_column: column for column in prediction_columns @@ -320,14 +322,14 @@ def format_validated_schema( column_row["ground_truth"] = prediction_columns_dict[ column ].ground_truth_column - new_schema.append(column_row) + new_schema.append(column_row) return new_schema def validate_schema( df: pd.DataFrame, column_mapping: ColumnMapping -) -> [ColumnMapping, pd.DataFrame]: +) -> Tuple[ColumnMapping, pd.DataFrame]: categorical_columns = column_mapping.categorical_columns or [] numerical_columns = column_mapping.numerical_columns or [] text_columns = column_mapping.text_columns or [] diff --git a/pyproject.toml b/pyproject.toml index 7012207..87f9f10 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dataqa" -version = "2.0.3" +version = "2.0.4" description = "Python Client library for DataQA" authors = ["Maria Mestre ","Stuart Quin "] readme = "README.md" From 3f2e7f168023a49751fd2b7b3fa12b207f494825 Mon Sep 17 00:00:00 2001 From: Stuart Quin Date: Fri, 16 Sep 2022 13:10:51 +0100 Subject: [PATCH 2/2] Surface API errors to users - In future release we maybe want to translate the errors to something more user friendly --- dataqa/publish.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/dataqa/publish.py b/dataqa/publish.py index 86d9305..2b344c0 100644 --- a/dataqa/publish.py +++ b/dataqa/publish.py @@ -28,6 +28,9 @@ def login(self): data=json.dumps({"username": username, "password": password}), ) + if not response.ok: + raise Exception(response.content) + self.auth_token = response.json()["token"] def create_release(self, project_id: str, column_mapping: List[Dict]) -> str: @@ -38,19 +41,23 @@ def create_release(self, project_id: str, column_mapping: List[Dict]) -> str: }, json={"project": project_id, "column_mapping": column_mapping}, ) + if not response.ok: + raise Exception(response.content) release_id = response.json()["id"] return release_id def publish_data(self, df: pd.DataFrame, release_id: str): row_list = df.values.tolist() - _ = requests.post( + response = requests.post( self.api_url + "/api/v1/releasedata/", headers={ "Authorization": f"Token {self.auth_token}", }, json={"release": release_id, "published_data": row_list}, ) + if not response.ok: + raise Exception(response.content) def publish( self,