diff --git a/doc/readme-screenshot.png b/doc/readme-screenshot.png index 53c071a..0b294de 100644 Binary files a/doc/readme-screenshot.png and b/doc/readme-screenshot.png differ diff --git a/doc/widgets/images/adversarial-debiasing-example.png b/doc/widgets/images/adversarial-debiasing-example.png index c4ddbb0..2191d02 100644 Binary files a/doc/widgets/images/adversarial-debiasing-example.png and b/doc/widgets/images/adversarial-debiasing-example.png differ diff --git a/doc/widgets/images/adversarial-debiasing-no-tensorflow.png b/doc/widgets/images/adversarial-debiasing-no-tensorflow.png index def630d..3dc4895 100644 Binary files a/doc/widgets/images/adversarial-debiasing-no-tensorflow.png and b/doc/widgets/images/adversarial-debiasing-no-tensorflow.png differ diff --git a/doc/widgets/images/adversarial-debiasing.png b/doc/widgets/images/adversarial-debiasing.png index 070924d..c51aca4 100644 Binary files a/doc/widgets/images/adversarial-debiasing.png and b/doc/widgets/images/adversarial-debiasing.png differ diff --git a/doc/widgets/images/as-fairness-data-example.png b/doc/widgets/images/as-fairness-data-example.png index b28d2d7..4ddd65d 100644 Binary files a/doc/widgets/images/as-fairness-data-example.png and b/doc/widgets/images/as-fairness-data-example.png differ diff --git a/doc/widgets/images/as-fairness-data.png b/doc/widgets/images/as-fairness-data.png index 9be8046..e8c3c66 100644 Binary files a/doc/widgets/images/as-fairness-data.png and b/doc/widgets/images/as-fairness-data.png differ diff --git a/doc/widgets/images/dataset-bias-example.png b/doc/widgets/images/dataset-bias-example.png index cb7daaa..e882b74 100644 Binary files a/doc/widgets/images/dataset-bias-example.png and b/doc/widgets/images/dataset-bias-example.png differ diff --git a/doc/widgets/images/dataset-bias.png b/doc/widgets/images/dataset-bias.png index a4ee989..5ee3511 100644 Binary files a/doc/widgets/images/dataset-bias.png and b/doc/widgets/images/dataset-bias.png differ diff --git a/doc/widgets/images/equal-odds-postprocessing-example.png b/doc/widgets/images/equal-odds-postprocessing-example.png index 1257d86..5e6896b 100644 Binary files a/doc/widgets/images/equal-odds-postprocessing-example.png and b/doc/widgets/images/equal-odds-postprocessing-example.png differ diff --git a/doc/widgets/images/reweighing-dataset-example.png b/doc/widgets/images/reweighing-dataset-example.png index cdae4a6..2d160b8 100644 Binary files a/doc/widgets/images/reweighing-dataset-example.png and b/doc/widgets/images/reweighing-dataset-example.png differ diff --git a/doc/widgets/images/reweighing-preprocessor-example.png b/doc/widgets/images/reweighing-preprocessor-example.png index f968d9a..a50967d 100644 Binary files a/doc/widgets/images/reweighing-preprocessor-example.png and b/doc/widgets/images/reweighing-preprocessor-example.png differ diff --git a/orangecontrib/fairness/evaluation/scoring.py b/orangecontrib/fairness/evaluation/scoring.py index 119f2c9..93b4d00 100644 --- a/orangecontrib/fairness/evaluation/scoring.py +++ b/orangecontrib/fairness/evaluation/scoring.py @@ -1,3 +1,13 @@ +""" +This module contains classes for computing fairness scores. + +Classes: +- StatisticalParityDifference +- EqualOpportunityDifference +- AverageOddsDifference +- DisparateImpact +""" + from abc import abstractmethod from Orange.data import DiscreteVariable, ContinuousVariable, Domain from Orange.evaluation.scoring import Score @@ -19,7 +29,12 @@ class FairnessScorer(Score, abstract=True): - """Abstract class which will allow fairness scores to be calculated and displayed in certain widgets""" + """ + Abstract class for computing fairness scores. + + Abstract class which will allow fairness scores to be calculated and displayed. + Subclasses need to implement the metric method which will return the fairness score. + """ class_types = ( DiscreteVariable, @@ -28,17 +43,28 @@ class FairnessScorer(Score, abstract=True): @staticmethod def is_compatible(domain: Domain) -> bool: - """Checks if the scorer is compatible with the domain of the data. If not the scores will not be computed.""" + """ + Checks if the scorer is compatible with the domain of the data. + If not the scores will not be computed. + + Args: + domain (Domain): The domain of the data. + """ return contains_fairness_attributes(domain) def compute_score(self, results): - """Method that creates a ClassificationMetric object used to compute fairness scores""" + """ + Creates a ClassificationMetric object used to compute fairness scores + + Args: + results (Results): The results of the model. + """ dataset, privileged_groups, unprivileged_groups = table_to_standard_dataset( results.data ) - # We need to subset the created dataset so that it will match the shape/order + # We need to subset the created dataset so that it will match the shape/order # This is needed when/if some of the rows in the data were used multiple times dataset = dataset.subset(results.row_indices) dataset_pred = dataset.copy() @@ -54,12 +80,22 @@ def compute_score(self, results): @abstractmethod def metric(self, classification_metric): - """Method that needs to be implemented by the subclasses of the FairnessScorer.""" + """ + Abstract method that needs to be implemented by subclasses. + + It should return the fairness score. + + Args: + classification_metric (ClassificationMetric): + The ClassificationMetric object used to compute fairness scores. + """ pass class StatisticalParityDifference(FairnessScorer): - """Class for Statistical Parity Difference fairness scoring.""" + """ + A class for computing the Statistical Parity Difference fairness score. + """ name = "SPD" long_name = str( @@ -76,7 +112,9 @@ def metric(self, classification_metric): class EqualOpportunityDifference(FairnessScorer): - """Class for Equal Opportunity Difference fairness scoring.""" + """ + A class for computing the Equal Opportunity Difference fairness score. + """ name = "EOD" long_name = str( @@ -94,7 +132,9 @@ def metric(self, classification_metric): class AverageOddsDifference(FairnessScorer): - """Class for Average Odds Difference fairness scoring.""" + """ + A class for computing the Average Odds Difference fairness score. + """ name = "AOD" long_name = str( @@ -114,7 +154,9 @@ def metric(self, classification_metric): class DisparateImpact(FairnessScorer): - """Class for Disparate Impact fairness scoring.""" + """ + A class for computing the Disparate Impact fairness score. + """ name = "DI" long_name = str( @@ -130,8 +172,8 @@ class DisparateImpact(FairnessScorer): ) # TODO: When using randomize, models sometimes predict the same class for all instances - # This can lead to division by zero in the Disparate Impact score (and untrue results for the other scores) + # This can lead to division by zero in the Disparate Impact score + # (and untrue results for the other scores) # What is the best way to handle this? def metric(self, classification_metric): return classification_metric.disparate_impact() - \ No newline at end of file diff --git a/orangecontrib/fairness/modeling/adversarial.py b/orangecontrib/fairness/modeling/adversarial.py index c0d3959..637e83c 100644 --- a/orangecontrib/fairness/modeling/adversarial.py +++ b/orangecontrib/fairness/modeling/adversarial.py @@ -1,3 +1,8 @@ +""" +This module contains the AdversarialDebiasingLearner and AdversarialDebiasingModel classes +which are used to create and fit the AdversarialDebiasing model from the aif360 library. +""" + import numpy as np from Orange.base import Learner, Model @@ -19,25 +24,32 @@ tf = None - # This gets called after the model is created and fitted # It is stored so we can use it to predict on new data class AdversarialDebiasingModel(Model): - """Model created and fitted by the AdversarialDebiasingLearner, which is used to predict on new data""" + """ + Model created and fitted by the AdversarialDebiasingLearner, used to predict on new data. + """ def __init__(self, model): super().__init__() self._model = model def predict(self, data): - """Function used to predict on new data""" + """ + Method used to 'preprocess', predict on new data and 'postprocess' the predictions. + + Args: + data (Table): The data to predict on. + """ if isinstance(data, Table): standard_dataset, _, _ = table_to_standard_dataset(data) predictions = self._model.predict(standard_dataset) # Array of scores with a column of scores for each class # The scores given by the model are always for the favorable class - # If the favorable class is 1 then the scores need to be flipped or else the AUC will be "reversed" + # If the favorable class is 1 then the scores need to be flipped or + # else the AUC will be "reversed" # (the first column is 1 - scores and the second column is scores) if standard_dataset.favorable_label == 0: scores = np.hstack( @@ -62,18 +74,41 @@ def predict_storage(self, data): def __call__(self, data, ret=Model.Value): return super().__call__(data, ret) + if is_tensorflow_installed(): + class AdversarialDebiasingLearner(Learner): - """Learner subclass used to create and fit the AdversarialDebiasingModel""" + """ + Learner subclass used to create and fit the AdversarialDebiasingModel + + Attributes: + preprocessors (list): List of preprocessors, applied when __call__ function is called + callback (function): Callback function used to track the progress of the model fitting + + Args: + preprocessors (list): List of preprocessors to apply to the data before fitting a model + classifier_num_hidden_units (int): Number of hidden units in the classifier + num_epochs (int): Number of epochs to train the model + batch_size (int): Batch size used to train the model + debias (bool): Whether to debias the model + adversary_loss_weight (float): Weight of the adversary loss + seed (int): Seed used to initialize the model + """ __returns__ = AdversarialDebiasingModel - # List of preprocessors, these get applied when the __call__ function is called preprocessors = [Normalize()] callback = None - def __init__(self, preprocessors=None, classifier_num_hidden_units=100, - num_epochs=50, batch_size=128, debias=True, - adversary_loss_weight=0.1, seed=-1): + def __init__( + self, + preprocessors=None, + classifier_num_hidden_units=100, + num_epochs=50, + batch_size=128, + debias=True, + adversary_loss_weight=0.1, + seed=-1, + ): super().__init__(preprocessors=preprocessors) self.params = vars() @@ -83,12 +118,15 @@ def __init__(self, preprocessors=None, classifier_num_hidden_units=100, "batch_size": batch_size, "debias": debias, "adversary_loss_weight": adversary_loss_weight, - **({"seed": seed} if seed != -1 else {}) + **({"seed": seed} if seed != -1 else {}), } def _calculate_total_runs(self, data): - """Function used to calculate the total number of runs the learner will perform on the data""" - # This is need to calculate and display the progress of the training + """ + Method for calculating the total number of runs the learner will perform on the data + + Used to calculate and display the progress of the training. + """ num_epochs = self.params["num_epochs"] batch_size = self.params["batch_size"] num_instances = len(data) @@ -97,7 +135,11 @@ def _calculate_total_runs(self, data): return total_runs def incompatibility_reason(self, domain): - """Function used to check if the domain is compatible with the learner (contains fairness attributes)""" + """ + Method used to check if the domain is compatible with the learner. + + The domain is compatible if it contains the fairness attributes. + """ if not contains_fairness_attributes(domain): return MISSING_FAIRNESS_ATTRIBUTES @@ -142,16 +184,32 @@ def fit(self, data: Table) -> AdversarialDebiasingModel: return AdversarialDebiasingModel(model=model) def __call__(self, data, progress_callback=None): - """Call method for AdversarialDebiasingLearner, in the superclass it calls the _fit_model function (and other things)""" + """ + Call method for AdversarialDebiasingLearner + + In the superclass it calls the _fit_model function (and other things) + """ self.callback = progress_callback model = super().__call__(data, progress_callback) model.params = self.params return model - + class CallbackSession(tf.Session): - """Subclass of tensorflow session with callback functionality for progress tracking and displaying""" + """ + Subclass of tensorflow session. + + It adds callback functionality for progress tracking and displaying. - def __init__(self, target="", graph=None, config=None, callback=None, total_runs=0): + Attributes: + callback (function): Callback function used to track the progress of the model fitting + run_count (int): Number of times the run function has been called + callback_enabled (bool): Flag to enable or disable the callback function + total_runs (int): Total number of runs the session will perform + """ + + def __init__( + self, target="", graph=None, config=None, callback=None, total_runs=0 + ): super().__init__(target=target, graph=graph, config=config) self.callback = callback self.run_count = 0 @@ -159,9 +217,13 @@ def __init__(self, target="", graph=None, config=None, callback=None, total_runs self.total_runs = total_runs def run(self, fetches, feed_dict=None, options=None, run_metadata=None): - """A overridden run function which calls the callback function and calculates the progress""" - # To calculate the progress using these ways we need to know the number of expected - # calls to the callback function and count how many times it has been called + """ + A overridden run function which calls the callback function and calculates the progress + + To calculate the progress using these ways we need to know the number of expected + calls to the callback function and count how many times it has been called. + """ + self.run_count += 1 progress = (self.run_count / self.total_runs) * 100 if self.callback_enabled and self.callback: @@ -179,9 +241,9 @@ def disable_callback(self): """Disable callback method for the model prediction fase""" self.callback_enabled = False - else: + class AdversarialDebiasingLearner(Learner): """Dummy class used if tensorflow is not installed""" + __returns__ = Model - \ No newline at end of file diff --git a/orangecontrib/fairness/modeling/postprocessing.py b/orangecontrib/fairness/modeling/postprocessing.py index 1f41d72..afc99c4 100644 --- a/orangecontrib/fairness/modeling/postprocessing.py +++ b/orangecontrib/fairness/modeling/postprocessing.py @@ -1,3 +1,9 @@ +""" +This module contains the PostprocessingModel and PostprocessingLearner classes + +These are used to create and fit the model and postprocessor and create the PostprocessingModel. +""" + import numpy as np from Orange.base import Learner, Model @@ -14,7 +20,16 @@ class PostprocessingModel(Model): - """Model created and fitted by the PostprocessingLearner, which is used to predict on new data and to postprocess the predictions""" + """ + Model created and fitted by the PostprocessingLearner + + Used to predict on new data and to postprocess the predictions + + Attributes: + - model (Model): The model used to make predictions + - postprocessor (EqOddsPostprocessing): The postprocessor used to postprocess the predictions + """ + def __init__(self, model, postprocessor): super().__init__() self.model = model @@ -22,21 +37,26 @@ def __init__(self, model, postprocessor): self.params = vars() def predict(self, data): - """Function used to preprocess, predict and postprocess on new data""" + """ + Method used to preprocess, predict and postprocess on new data. + + First we get the predictions from the model, then we use the + postprocessor on the predictions, and finally we create dummy scores + which are used to aproximate the scores of the postprocessed predictions. + """ if isinstance(data, Table): - # Get the predictions and scores from the model (we don't need the scores because they are irrelevant after postprocessing) predictions, _ = self.model(data, ret=Model.ValueProbs) standard_dataset, _, _ = table_to_standard_dataset(data) standard_dataset_pred = standard_dataset.copy(deepcopy=True) standard_dataset_pred.labels = predictions.reshape(-1, 1) - # Postprocess the predictions standard_dataset_pred_transf = self.postprocessor.predict( standard_dataset_pred ) - # Create dummy scores from predictions (if the predictions are 0 or 1, the scores will be 0 or 1) + # Create dummy scores from predictions + # (if the predictions are 0 or 1, the scores will be 0 or 1) scores = np.zeros((len(standard_dataset_pred_transf.labels), 2)) scores[:, 1] = standard_dataset_pred_transf.labels.ravel() scores[:, 0] = 1 - standard_dataset_pred_transf.labels.ravel() @@ -54,7 +74,18 @@ def __call__(self, data, ret=Model.Value): class PostprocessingLearner(Learner): - """Learner subclass used to create and fit the model and postprocessor and create the PostprocessingModel""" + """ + Subclass used to create and fit the model and postprocessor and create the PostprocessingModel + + Attributes: + - learner (Learner): The learner used to create the model + - preprocessors (list): The preprocessors used to preprocess the data + - repeatable (bool): If the model should be repeatable + - callback (function): The callback used to interrupt the widget + - seed (int): The seed used to make the model repeatable + - params (dict): The parameters used in the __call__ method + """ + __returns__ = PostprocessingModel def __init__(self, learner, preprocessors=None, repeatable=None): @@ -65,7 +96,9 @@ def __init__(self, learner, preprocessors=None, repeatable=None): self.params = vars() def incompatibility_reason(self, domain): - """Function used to check if the domain contains the fairness attributes""" + """ + Method used to check if the domain contains the fairness attributes + """ if not contains_fairness_attributes(domain): return MISSING_FAIRNESS_ATTRIBUTES @@ -85,21 +118,24 @@ def _fit_model(self, data): return self.fit(data) def fit(self, data): - """Function used to preprocess the data, fit the model and the postprocessor""" + """ + Method used to preprocess the data, fit the model and the postprocessor + """ if isinstance(data, Table): if not contains_fairness_attributes(data.domain): raise ValueError(MISSING_FAIRNESS_ATTRIBUTES) - + # Fit the model to the data - # the callback is currently not used for progress but to allow the user to interrupt the widget while the model is training + # the callback is currently not used for progress but to allow + # the user to interrupt the widget while the model is training model = self.learner(data, self.callback) - # Use cross validation to get the predictions, we do this to avoid having to use + # Use cross validation to get the predictions, we do this to avoid having to use # a train/validation split to get the predictions required to fit the postprocessor cv = CrossValidation(k=5) - # Including the callback in the cross validation will allow the user to + # Including the callback in the cross validation will allow the user to # interrupt the widget when it's in the cross validation phase - res = cv(data, [self.learner], callback=self.callback) + res = cv(data, [self.learner], callback=self.callback) predictions = res.predicted[0] row_indices = res.row_indices predictions = predictions[np.argsort(row_indices)] @@ -126,7 +162,9 @@ def fit(self, data): def __call__(self, data, progress_callback=None): self.callback = progress_callback - # self.learner.callback = progress_callback by adding this line, the progress will "work" for learners with the .callback attribute but it may cause problems for learners without it + # By adding this line, the progress will "work" for learners with the + # .callback attribute but it may cause problems for learners without it + # self.learner.callback = progress_callback model = super().__call__(data) model.params = self.params return model diff --git a/orangecontrib/fairness/widgets/icons/adversarial_debiasing.svg b/orangecontrib/fairness/widgets/icons/adversarial_debiasing.svg index 50bfaa2..6801472 100644 --- a/orangecontrib/fairness/widgets/icons/adversarial_debiasing.svg +++ b/orangecontrib/fairness/widgets/icons/adversarial_debiasing.svg @@ -1,56 +1,54 @@ - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/orangecontrib/fairness/widgets/icons/as_fairness_data.svg b/orangecontrib/fairness/widgets/icons/as_fairness_data.svg index d6b6b77..b26d3ce 100644 --- a/orangecontrib/fairness/widgets/icons/as_fairness_data.svg +++ b/orangecontrib/fairness/widgets/icons/as_fairness_data.svg @@ -1,2 +1,24 @@ - - \ No newline at end of file + + + + + + + + + + \ No newline at end of file diff --git a/orangecontrib/fairness/widgets/icons/category.svg b/orangecontrib/fairness/widgets/icons/category.svg index d6b6b77..b26d3ce 100644 --- a/orangecontrib/fairness/widgets/icons/category.svg +++ b/orangecontrib/fairness/widgets/icons/category.svg @@ -1,2 +1,24 @@ - - \ No newline at end of file + + + + + + + + + + \ No newline at end of file diff --git a/orangecontrib/fairness/widgets/icons/combine_preprocessors.svg b/orangecontrib/fairness/widgets/icons/combine_preprocessors.svg index abf42ba..2e62ee4 100644 --- a/orangecontrib/fairness/widgets/icons/combine_preprocessors.svg +++ b/orangecontrib/fairness/widgets/icons/combine_preprocessors.svg @@ -1,12 +1,29 @@ - - - - + + + + + + + + + + + + \ No newline at end of file diff --git a/orangecontrib/fairness/widgets/icons/dataset_bias.svg b/orangecontrib/fairness/widgets/icons/dataset_bias.svg index c663524..e3d21b0 100644 --- a/orangecontrib/fairness/widgets/icons/dataset_bias.svg +++ b/orangecontrib/fairness/widgets/icons/dataset_bias.svg @@ -1,2 +1,32 @@ - - \ No newline at end of file + + + + + + + + + + + + + \ No newline at end of file diff --git a/orangecontrib/fairness/widgets/icons/eq_odds_postprocessing.svg b/orangecontrib/fairness/widgets/icons/eq_odds_postprocessing.svg index aaa6951..e904da0 100644 --- a/orangecontrib/fairness/widgets/icons/eq_odds_postprocessing.svg +++ b/orangecontrib/fairness/widgets/icons/eq_odds_postprocessing.svg @@ -1,19 +1,13 @@ - - - - - stats [#1367] - Created with Sketch. - - - - - - - - - - - - + + + + + + + \ No newline at end of file diff --git a/orangecontrib/fairness/widgets/icons/reweighing.svg b/orangecontrib/fairness/widgets/icons/reweighing.svg index 94d31ea..5459f7c 100644 --- a/orangecontrib/fairness/widgets/icons/reweighing.svg +++ b/orangecontrib/fairness/widgets/icons/reweighing.svg @@ -1,2 +1,4 @@ - - \ No newline at end of file + + + + \ No newline at end of file diff --git a/orangecontrib/fairness/widgets/icons/weighted_log_reg.svg b/orangecontrib/fairness/widgets/icons/weighted_log_reg.svg index c150123..631e629 100644 --- a/orangecontrib/fairness/widgets/icons/weighted_log_reg.svg +++ b/orangecontrib/fairness/widgets/icons/weighted_log_reg.svg @@ -1,2 +1,23 @@ - - \ No newline at end of file + + + + + + + + + + + \ No newline at end of file diff --git a/orangecontrib/fairness/widgets/owadversarialdebiasing.py b/orangecontrib/fairness/widgets/owadversarialdebiasing.py index 9bd2ad9..e3e9b31 100644 --- a/orangecontrib/fairness/widgets/owadversarialdebiasing.py +++ b/orangecontrib/fairness/widgets/owadversarialdebiasing.py @@ -1,3 +1,7 @@ +""" +This module contains the OWAdversarialDebiasing widget. +""" + from itertools import chain from Orange.widgets import gui @@ -25,21 +29,26 @@ ) - - - class InterruptException(Exception): """A dummy exception used to interrupt the training process.""" + pass class AdversarialDebiasingRunner: - """A class used to run the AdversarialDebiasingLearner in a separate thread and display progress using the callback.""" + """ + A class used to run the AdversarialDebiasingLearner in a separate + thread and display progress using the callback. + """ @staticmethod def run( learner: AdversarialDebiasingLearner, data: Table, state: TaskState ) -> Model: + """ + Method used to run the AdversarialDebiasingLearner in a separate + thread and display progress using the callback. + """ if data is None: return None @@ -56,7 +65,10 @@ class OWAdversarialDebiasing(ConcurrentWidgetMixin, OWBaseLearner): """A widget used to customize and create the AdversarialDebiasing Learner and/or Model""" name = "Adversarial Debiasing" - description = "Adversarial Debiasing classification algorithm with or without fairness constraints." + description = ( + "Adversarial Debiasing classification algorithm " + "with or without fairness constraints." + ) icon = "icons/adversarial_debiasing.svg" priority = 30 @@ -76,7 +88,8 @@ class Information(OWBaseLearner.Information): # This was slightly changed from the original to fit the new widget better ignored_preprocessors = Msg( "Ignoring default preprocessing. \n" - "Default preprocessing (scailing), has been replaced with user-specified preprocessors. \n" + "Default preprocessing (scailing), has been " + "replaced with user-specified preprocessors. \n" "Problems may occur if these are inadequate for the given data." ) @@ -209,36 +222,74 @@ def no_tensorflow_layout(self): layout = QVBoxLayout() label = QLabel( - 'The Adversarial Debiasing widget requires TensorFlow, which is not installed.\n' + "The Adversarial Debiasing widget requires TensorFlow, which is not installed.\n" 'You can install it by clicking the "Install TensorFlow" button below, selecting \n' 'the checkbox next to the "tensorflow" text and clicking the "Ok" button.\n' - 'After that, you will need to restart Orange.' + "After that, you will need to restart Orange." ) label.setWordWrap(True) layout.addWidget(label) button = QPushButton("Install TensorFlow") button.clicked.connect(self.install_tensorflow) layout.addWidget(button) - + box = gui.widgetBox(self.controlArea, True, orientation=layout) - + self.Error.add_message("no_tensorflow", TENSORFLOW_NOT_INSTALLED) self.Error.no_tensorflow() - + def install_tensorflow(self): """ Installs tensorflow """ installable = Installable( - name='tensorflow', - version='2.15.0', - summary='TensorFlow is an open source machine learning framework for everyone.', + name="tensorflow", + version="2.15.0", + summary="TensorFlow is an open source machine learning framework for everyone.", description="[![Python](https://img.shields.io/pypi/pyversions/tensorflow.svg?style=plastic)](https://badge.fury.io/py/tensorflow)\n[![PyPI](https://badge.fury.io/py/tensorflow.svg)](https://badge.fury.io/py/tensorflow)\n\nTensorFlow is an open source software library for high performance numerical\ncomputation. Its flexible architecture allows easy deployment of computation\nacross a variety of platforms (CPUs, GPUs, TPUs), and from desktops to clusters\nof servers to mobile and edge devices.\n\nOriginally developed by researchers and engineers from the Google Brain team\nwithin Google's AI organization, it comes with strong support for machine\nlearning and deep learning and the flexible numerical computation core is used\nacross many other scientific domains. TensorFlow is licensed under [Apache\n2.0](https://github.com/tensorflow/tensorflow/blob/master/LICENSE).\n", - package_url='https://pypi.org/project/tensorflow/', + package_url="https://pypi.org/project/tensorflow/", release_urls=[], - requirements = ["absl-py (>=1.0.0)","astunparse (>=1.6.0)","flatbuffers (>=23.5.26)","gast (!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1)","google-pasta (>=0.1.1)","h5py (>=2.9.0)","libclang (>=13.0.0)","ml-dtypes (~=0.2.0)","numpy (<2.0.0,>=1.23.5)","opt-einsum (>=2.3.2)","packaging","protobuf (!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3)","setuptools","six (>=1.12.0)","termcolor (>=1.1.0)","typing-extensions (>=3.6.6)","wrapt (<1.15,>=1.11.0)","tensorflow-io-gcs-filesystem (>=0.23.1)","grpcio (<2.0,>=1.24.3)","tensorboard (<2.16,>=2.15)","tensorflow-estimator (<2.16,>=2.15.0)","keras (<2.16,>=2.15.0)","nvidia-cublas-cu12 (==12.2.5.6) ; extra == 'and-cuda'","nvidia-cuda-cupti-cu12 (==12.2.142) ; extra == 'and-cuda'","nvidia-cuda-nvcc-cu12 (==12.2.140) ; extra == 'and-cuda'","nvidia-cuda-nvrtc-cu12 (==12.2.140) ; extra == 'and-cuda'","nvidia-cuda-runtime-cu12 (==12.2.140) ; extra == 'and-cuda'","nvidia-cudnn-cu12 (==8.9.4.25) ; extra == 'and-cuda'","nvidia-cufft-cu12 (==11.0.8.103) ; extra == 'and-cuda'","nvidia-curand-cu12 (==10.3.3.141) ; extra == 'and-cuda'","nvidia-cusolver-cu12 (==11.5.2.141) ; extra == 'and-cuda'","nvidia-cusparse-cu12 (==12.1.2.141) ; extra == 'and-cuda'","nvidia-nccl-cu12 (==2.16.5) ; extra == 'and-cuda'","nvidia-nvjitlink-cu12 (==12.2.140) ; extra == 'and-cuda'","tensorrt (==8.6.1.post1) ; extra == 'and-cuda'","tensorrt-bindings (==8.6.1) ; extra == 'and-cuda'","tensorrt-libs (==8.6.1) ; extra == 'and-cuda'"], - description_content_type='text/markdown', + requirements=[ + "absl-py (>=1.0.0)", + "astunparse (>=1.6.0)", + "flatbuffers (>=23.5.26)", + "gast (!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1)", + "google-pasta (>=0.1.1)", + "h5py (>=2.9.0)", + "libclang (>=13.0.0)", + "ml-dtypes (~=0.2.0)", + "numpy (<2.0.0,>=1.23.5)", + "opt-einsum (>=2.3.2)", + "packaging", + "protobuf (!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3)", + "setuptools", + "six (>=1.12.0)", + "termcolor (>=1.1.0)", + "typing-extensions (>=3.6.6)", + "wrapt (<1.15,>=1.11.0)", + "tensorflow-io-gcs-filesystem (>=0.23.1)", + "grpcio (<2.0,>=1.24.3)", + "tensorboard (<2.16,>=2.15)", + "tensorflow-estimator (<2.16,>=2.15.0)", + "keras (<2.16,>=2.15.0)", + "nvidia-cublas-cu12 (==12.2.5.6) ; extra == 'and-cuda'", + "nvidia-cuda-cupti-cu12 (==12.2.142) ; extra == 'and-cuda'", + "nvidia-cuda-nvcc-cu12 (==12.2.140) ; extra == 'and-cuda'", + "nvidia-cuda-nvrtc-cu12 (==12.2.140) ; extra == 'and-cuda'", + "nvidia-cuda-runtime-cu12 (==12.2.140) ; extra == 'and-cuda'", + "nvidia-cudnn-cu12 (==8.9.4.25) ; extra == 'and-cuda'", + "nvidia-cufft-cu12 (==11.0.8.103) ; extra == 'and-cuda'", + "nvidia-curand-cu12 (==10.3.3.141) ; extra == 'and-cuda'", + "nvidia-cusolver-cu12 (==11.5.2.141) ; extra == 'and-cuda'", + "nvidia-cusparse-cu12 (==12.1.2.141) ; extra == 'and-cuda'", + "nvidia-nccl-cu12 (==2.16.5) ; extra == 'and-cuda'", + "nvidia-nvjitlink-cu12 (==12.2.140) ; extra == 'and-cuda'", + "tensorrt (==8.6.1.post1) ; extra == 'and-cuda'", + "tensorrt-bindings (==8.6.1) ; extra == 'and-cuda'", + "tensorrt-libs (==8.6.1) ; extra == 'and-cuda'", + ], + description_content_type="text/markdown", ) manager = AddonManagerDialog(self) @@ -248,7 +299,7 @@ def install_tensorflow(self): def add_main_layout(self): if is_tensorflow_installed(): self.tensorflow_layout() - else: + else: self.no_tensorflow_layout() # ---------Methods related to UI------------ @@ -303,18 +354,23 @@ def create_learner(self): """ if is_tensorflow_installed(): return self.LEARNER( - preprocessors=self.preprocessors, + preprocessors=self.preprocessors, seed=42 if self.repeatable else -1, - classifier_num_hidden_units=self.hidden_layers_neurons, - num_epochs=self.number_of_epochs, - batch_size=self.batch_size, - debias=self.debias, - adversary_loss_weight=self.selected_lambda if self.debias else 0 + classifier_num_hidden_units=self.hidden_layers_neurons, + num_epochs=self.number_of_epochs, + batch_size=self.batch_size, + debias=self.debias, + adversary_loss_weight=self.selected_lambda if self.debias else 0, ) def update_model(self): - """Responsible for starting a new thread, fitting the learner and sending the created model to the output""" - # This method is called along with the update_learner method in the apply method of the superclass + """ + Responsible for starting a new thread, fitting the + learner and sending the created model to the output + + This method is called along with the update_learner + method in the apply method of the superclass + """ self.cancel() if self.data is not None: @@ -355,4 +411,5 @@ def onDeleteWidget(self): if __name__ == "__main__": from Orange.widgets.utils.widgetpreview import WidgetPreview + WidgetPreview(OWAdversarialDebiasing).run() diff --git a/orangecontrib/fairness/widgets/owasfairness.py b/orangecontrib/fairness/widgets/owasfairness.py index d7285ab..cccd500 100644 --- a/orangecontrib/fairness/widgets/owasfairness.py +++ b/orangecontrib/fairness/widgets/owasfairness.py @@ -1,3 +1,10 @@ +""" +This module contains the AsFairness widget which is used to add fairness attributes to the data. + +The fairness attributes are the favorable class value, +protected attribute and privileged protected attribute values. +""" + from typing import Optional from AnyQt.QtCore import QItemSelectionModel @@ -13,12 +20,16 @@ class OWAsFairness(OWWidget): """ - Converts a dataset to a fairness dataset with marked favorable class values, + Converts a dataset to a fairness dataset with marked favorable class values, protected attributes and priviliged protected attribute values. """ + name = "As Fairness Data" - description = "Converts a dataset to a fairness dataset with marked favorable class values, protected attributes and priviliged protected attribute values." - icon = 'icons/as_fairness_data.svg' + description = ( + "Converts a dataset to a fairness dataset with marked favorable class values, " + "protected attributes and priviliged protected attribute values." + ) + icon = "icons/as_fairness_data.svg" priority = 0 want_main_area = False @@ -26,14 +37,18 @@ class OWAsFairness(OWWidget): class Inputs: """Define the inputs to the widgets""" + data = Input("Data", Table) class Outputs: """Define the outputs to the widgets""" + data = Output("Data", Table) - # Settings: The favorable_class, protected_attribute, and privileged_values are instance variables that are declared as ContextSetting. - # A ContextSetting is a special type of Setting that Orange remembers for each different context (i.e., input data domain). + # Settings: The favorable_class, protected_attribute, and privileged_values + # are instance variables that are declared as ContextSetting. + # A ContextSetting is a special type of Setting that Orange remembers for + # each different context (i.e., input data domain). settingsHandler = DomainContextHandler( match_values=DomainContextHandler.MATCH_VALUES_ALL ) @@ -45,7 +60,8 @@ class Outputs: ) # schema_only -> The setting is saved within the workflow but the default never changes. keep_default: bool = Setting( True, schema_only=True - ) # This setting will be used to keep the original fairness attributes when first using the widget, If the user changes the attributes from the default ones, we want to use the context + ) # This setting will be used to keep the original fairness attributes when first using the + # widget, If the user changes the attributes from the default ones, we want to use the context def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -56,13 +72,17 @@ def __init__(self, *args, **kwargs): favorable_class_items_model = PyListModel( iterable=[] - ) # Here we don't want to display the different attributes/features but the values of the attribute so I couldn't use the DomainModel which stores the features, so I used the PyListModel which can store any type in an iterable + ) # Here we don't want to display the different attributes/features but the values of the + # attribute so I couldn't use the DomainModel which stores the features, + # so I used the PyListModel which can store any type in an iterable. protected_attribute_model = DomainModel( valid_types=(DiscreteVariable,) - ) # DomainModel stores the domain of the input data and allows us to select a attribute from it. It is tied to the gui.comboBox widget. And Can be accesed by self.controls.protected_attribute.model() + ) # DomainModel stores the domain of the input data and allows us to select a attribute + # from it. It is tied to the gui.comboBox widget. And Can be accesed by + # self.controls.protected_attribute.model() privileged_pa_values_model = PyListModel(iterable=[]) - # Create a box for each of the three variables and populate them with comboboxes and listboxes. + # Create a box for each of the three variables, populate them with comboboxes and listboxes. box = gui.vBox(self.controlArea, "Favorable Class Value", margin=0) gui.comboBox( box, @@ -95,7 +115,8 @@ def __init__(self, *args, **kwargs): ) var_list.setSelectionMode(var_list.ExtendedSelection) - # A Commit/AutoCommit button which controls if a new signal is sent whenever the user changes the value of a variable or only when the user commits the changes + # A Commit/AutoCommit button which controls if a new signal is sent whenever the user + # changes the value of a variable or only when the user commits the changes self.commit_button = gui.auto_commit( self.controlArea, self, "auto_commit", "&Commit", box=False ) @@ -104,7 +125,7 @@ def __init__(self, *args, **kwargs): @check_data_structure def set_data(self, data: Table) -> None: """ - Function called when new data is received on the input. It is responsible for filling + Function called when new data is received on the input. It is responsible for filling the widget comboboxes and listboxes with the values contained in the input data. """ self.closeContext() @@ -117,8 +138,9 @@ def set_data(self, data: Table) -> None: # Copy the attributes from the original data to the new data self._data.attributes = data.attributes.copy() - # A new Domain object is created from the attributes and class variables of the input data. - # This Domain object represents the structure of the input data (i.e., the variables it contains). + # A new Domain object is created from the attributes and class variables of the + # input data. This Domain object represents the structure of the input data + # (i.e., the variables it contains). domain = Domain(data.domain.attributes, data.domain.class_vars) # Find the original fairness attributes in the input data and save them. @@ -128,46 +150,51 @@ def set_data(self, data: Table) -> None: self._clear_and_fill(domain) # Open the context for the widget using the new domain - # This means that these settings will be remembered the next time the widget receives the same input data. - # If the input data allready has a known domain, then the saved settings will be used. + # This means that these settings will be remembered the next time the widget receives + # the same input data. If the input data allready has a known domain, then the saved + # settings will be used. self.openContext(domain) # Apply the changes and send the data to the output - # Here we have commit.now() instead of a commit.deferred() because we want to apply the changes as soon as the input data changes. + # Here we have commit.now() instead of a commit.deferred() because + # we want to apply the changes as soon as the input data changes. self.commit.now() - def _clear_and_fill(self, domain): """ This function is used to first clear the comboboxes and listboxes of their values and then fill them with the values of the input data. """ - # Changes the domain of the comboboxes to the new domain, thus updating the list of variables that can be selected. + # Changes the domain of the comboboxes to the new domain, + # thus updating the list of variables that can be selected. self.controls.protected_attribute.model().set_domain(Domain(domain.attributes)) # Clear the old values of the favorable_class_items_model PyListModel if self.controls.favorable_class_value.model(): self.controls.favorable_class_value.model().clear() - # Fill the favorable_class_items_model PyListModel with the values of the class variable of the input data. + # Fill the favorable_class_items_model PyListModel with the + # values of the class variable of the input data. for value in domain.class_vars[0].values: self.controls.favorable_class_value.model().append(value) # Clear the old values of the privileged_pa_values_model PyListModel if self.controls.privileged_pa_values.model(): self.controls.privileged_pa_values.model().clear() - # Fill the privileged_pa_values_model PyListModel with the values of the protected attribute variable of the input data. + # Fill the privileged_pa_values_model PyListModel with the values + # of the protected attribute variable of the input data. for value in self.controls.protected_attribute.model()[0].values: self.controls.privileged_pa_values.model().append(value) # Select the first values of the comboboxes and listboxes. - self._select_values(domain) + self._select_values() - def _select_values(self, domain): + def _select_values(self): """ - This function is used to set the values of the comboboxes and listboxes to the first value of the list of variables. - This is needed so that the user doesn't have to select every value manually even if it is the first value of the list - and it is already shown as the selected value in the dropdown menu causing confusion. + This function is used to set the values of the comboboxes and listboxes to the first value + of the list of variables. This is needed so that the user doesn't have to select every value + manually even if it is the first value of the list and it is already shown as the selected + value in the dropdown menu causing confusion. """ self.protected_attribute = ( @@ -186,15 +213,15 @@ def _select_values(self, domain): else [] ) - def find_and_read_fainress_attributes(self, domain): """ - This function is used to find the original fairness attributes in the input data and save them + This function is used to find the original fairness attributes in input data and save them as instance variables. This is needed because the openContext will overwrite the fairness attributes when it is called and sometimes we want to keep the original ones. """ - - # Check if the data contains fairness attributes if so then set the selected variables to the values of the fairness attributes + + # Check if the data contains fairness attributes if so then set + # the selected variables to the values of the fairness attributes if "favorable_class_value" in domain.class_var.attributes: # Set the selected favorable class value to the value of the key "favorable_class_value" self.original_favorable_class_value = domain.class_var.attributes[ @@ -204,7 +231,9 @@ def find_and_read_fainress_attributes(self, domain): for var in domain.attributes: if "privileged_pa_values" in var.attributes: self.original_protected_attribute = var - self.original_privileged_pa_values = var.attributes["privileged_pa_values"] + self.original_privileged_pa_values = var.attributes[ + "privileged_pa_values" + ] break def openContext(self, *a): @@ -214,40 +243,45 @@ def openContext(self, *a): """ super().openContext(*a) - # If the user is loading the dataset for the first time and the dataset has some default fairness attributes, - # then we want to keep the default fairness attributes and not overwrite them with openContext. + # If the user is loading the dataset for the first time and the dataset has some + # default fairness attributes, then we want to keep the default fairness attributes + # and not overwrite them with openContext. if ( - self.keep_default - and self.original_favorable_class_value - and self.original_protected_attribute + self.keep_default + and self.original_favorable_class_value + and self.original_protected_attribute and self.original_privileged_pa_values ): self.favorable_class_value = self.original_favorable_class_value self.protected_attribute = self.original_protected_attribute self.privileged_pa_values = self.original_privileged_pa_values - # Check if the privileged_pa_values match the values of the protected attribute variable - # This is needed because when loading a old workflow, the openContext sometimes doesn't change the protected_attribute - if not set(self.privileged_pa_values).issubset(set(self.protected_attribute.values)): + # This is needed because when loading a old workflow, the openContext sometimes doesn't + # change the protected_attribute + if not set(self.privileged_pa_values).issubset( + set(self.protected_attribute.values) + ): self.change_values(clear_pa_values=True) - # Check if the values in self.controls.privileged_pa_values.model matches the values of the protected attribute variable - # This is needed when loading an old workflow the displayed values might not match the values of the protected attribute variable + # Check if the values in self.controls.privileged_pa_values.model matches the values of + # the protected attribute variable. This is needed when loading an old workflow the + # displayed values might not match the values of the protected attribute variable. elif not set(self.controls.privileged_pa_values.model()).issubset( set(self.protected_attribute.values) ): self.change_values(clear_pa_values=False) - def change_values(self, clear_pa_values=True) -> None: """ This function is normally called when the user changes the protected attribute variable - It changes the values of the privileged_pa_values_model PyListModel (the list of displayed privileged PA values) - and the selected privileged PA values (self.privileged_pa_values) to match the values of the new protected attribute variable. + It changes the values of the privileged_pa_values_model PyListModel (the list of displayed + privileged PA values) and the selected privileged PA values (self.privileged_pa_values) + to match the values of the new protected attribute variable. """ - # Context is now needed to use the selected fairness attributes instead of the default ones if the user saves and loads the workflow + # Context is now needed to use the selected fairness attributes instead + # of the default ones if the user saves and loads the workflow self.keep_default = False # Change the list of displayed privileged PA values @@ -263,9 +297,9 @@ def change_values(self, clear_pa_values=True) -> None: if len(self.controls.privileged_pa_values.model()) else [] ) - # If we don't want to clear the selected privileged PA values, + # If we don't want to clear the selected privileged PA values, # then we need to "select" them to make them highlighted in the listbox - else: + else: list_view = self.controls.privileged_pa_values model = list_view.model() selection_model = list_view.selectionModel() @@ -278,14 +312,15 @@ def change_values(self, clear_pa_values=True) -> None: index = model.indexOf(value) if index != -1: # -1 means the value was not found in the model selection_model.select( - model.index(index), - QItemSelectionModel.Select + model.index(index), QItemSelectionModel.Select ) - # Adding the protected attribute and favorable class value as attributes to the data domain def as_fairness_data(self, data: Table) -> Optional[Table]: - """This function adds the protected attribute and favorable class value as attributes to the data domain""" + """ + This function adds the protected attribute and favorable + class value as attributes to the data domain. + """ if ( not self.protected_attribute or not self.favorable_class_value @@ -299,11 +334,12 @@ def as_fairness_data(self, data: Table) -> Optional[Table]: # Create the new attribute or copy the old ones new_attributes = [] for attribute in old_domain.attributes: - # If the attribute is the protected attribute, then create a new attribute with the same values but add the privileged_pa_values attribute + # If the attribute is the protected attribute, then create a new attribute with + # the same values but add the privileged_pa_values attribute. if attribute.name == self.protected_attribute.name: new_attr = attribute.copy() new_attr.attributes["privileged_pa_values"] = self.privileged_pa_values - # Else just copy the attribute and remove the privileged_pa_values attribute if it exists + # Else copy the attribute and remove the privileged_pa_values attribute if it exists else: new_attr = attribute.copy() if "privileged_pa_values" in new_attr.attributes: @@ -321,7 +357,9 @@ def as_fairness_data(self, data: Table) -> Optional[Table]: new_data = data.transform(new_domain) return new_data - @gui.deferred # The defered allows us to only call the function once the user has stopped changing the values of the comboboxes or listboxes and "Applies" the changes + # The defered allows us to only call the function once the user has stopped + # changing the values of the comboboxes or listboxes and "Applies" the changes + @gui.deferred def commit(self) -> None: """ This function is called when the user changes the value of the comboboxes or listboxes. @@ -338,20 +376,3 @@ def commit(self) -> None: table = Table("https://datasets.biolab.si/core/adult.tab") WidgetPreview(OWAsFairness).run(input_data=table) - - - - - - - - - - - - - - - - - diff --git a/orangecontrib/fairness/widgets/owcombinepreprocessors.py b/orangecontrib/fairness/widgets/owcombinepreprocessors.py index 18e8ed2..12d306a 100644 --- a/orangecontrib/fairness/widgets/owcombinepreprocessors.py +++ b/orangecontrib/fairness/widgets/owcombinepreprocessors.py @@ -1,3 +1,10 @@ +""" +This module contains the implementation of the Combine Preprocessors widget. + +This widget allows the user to combine two preprocessors into +one so it can be used as input for other widgets. +""" + from typing import Optional from Orange.widgets import gui @@ -6,7 +13,9 @@ class OWCombinePreprocessors(OWWidget): - """Widget for combining 2 preprocessors into one so it can be used as input for other widgets.""" + """ + Widget for combining 2 preprocessors into one so it can be used as input for other widgets. + """ name = "Combine Preprocessors" description = "Combine multiple preprocessors into one." @@ -37,7 +46,10 @@ def __init__(self): box = gui.vBox(self.mainArea, "Info") gui.widgetLabel( box, - "This widgets allows you to combine two preprocessors into one and use it as input for other widgets.", + ( + "This widgets allows you to combine two preprocessors " + "into one and use it as input for other widgets." + ), ) @Inputs.first_preprocessor diff --git a/orangecontrib/fairness/widgets/owdatasetbias.py b/orangecontrib/fairness/widgets/owdatasetbias.py index 3dd4fd0..3596b83 100644 --- a/orangecontrib/fairness/widgets/owdatasetbias.py +++ b/orangecontrib/fairness/widgets/owdatasetbias.py @@ -1,3 +1,10 @@ +""" +This module contains the implementation of the Dataset Bias widget. + +This widget computes the bias of a dataset. More specifically, it computes +the disparate impact and statistical parity difference metrics for the dataset. +""" + from typing import Optional from Orange.widgets import gui @@ -6,16 +13,25 @@ from aif360.metrics import BinaryLabelDatasetMetric -from orangecontrib.fairness.widgets.utils import table_to_standard_dataset, check_fairness_data, check_for_missing_values +from orangecontrib.fairness.widgets.utils import ( + table_to_standard_dataset, + check_fairness_data, + check_for_missing_values, +) class OWDatasetBias(OWWidget): """ Widget for computing the fairness metrics (bias) of a dataset. - More specifically, it computes the disparate impact and statistical parity difference metrics for the dataset. + More specifically, it computes the disparate impact and statistical + parity difference metrics for the dataset. """ + name = "Dataset Bias" - description = "Computes the bias of a dataset. More specifically, it computes the disparate impact and statistical parity difference metrics for the dataset." + description = ( + "Computes the bias of a dataset. More specifically, it computes the disparate " + "impact and statistical parity difference metrics for the dataset." + ) icon = "icons/dataset_bias.svg" priority = 10 @@ -24,6 +40,7 @@ class OWDatasetBias(OWWidget): class Inputs: """Input for the widget - dataset.""" + data = Input("Data", Table) def __init__(self, *args, **kwargs): @@ -38,9 +55,7 @@ def __init__(self, *args, **kwargs): @check_for_missing_values def set_data(self, data: Optional[Table]) -> None: """Computes the bias of the dataset and displays it on the widget.""" - if ( - not data - ): + if not data: self.disparate_impact_label.setText("No data detected.") self.disparate_impact_label.setToolTip("") self.statistical_parity_difference_label.setText("") @@ -48,28 +63,37 @@ def set_data(self, data: Optional[Table]) -> None: return # Convert Orange data to aif360 StandardDataset - standard_dataset, privileged_groups, unprivileged_groups = table_to_standard_dataset(data) + standard_dataset, privileged_groups, unprivileged_groups = ( + table_to_standard_dataset(data) + ) # Compute the bias of the dataset (disparate impact and statistical parity difference) - dataset_metric = BinaryLabelDatasetMetric(standard_dataset, unprivileged_groups, privileged_groups) + dataset_metric = BinaryLabelDatasetMetric( + standard_dataset, unprivileged_groups, privileged_groups + ) disparate_impact = dataset_metric.disparate_impact() statistical_parity_difference = dataset_metric.statistical_parity_difference() - self.disparate_impact_label.setText(f"Disparate Impact (ideal = 1): {round(disparate_impact, 3)}") + self.disparate_impact_label.setText( + f"Disparate Impact (ideal = 1): {round(disparate_impact, 3)}" + ) self.disparate_impact_label.setToolTip( - "

Disparate Impact (DI): Measures the ratio of the ratios of favorable class values for an " - "unprivileged group to that of the privileged group. An ideal value of 1.0 means the ratio of " - "favorable class values is the same for both groups.

" + "

Disparate Impact (DI): Measures the ratio of the ratios of favorable class " + "values for an unprivileged group to that of the privileged group. An ideal value " + "of 1.0 means the ratio of favorable class values is the same for both groups.

" "" ) - self.statistical_parity_difference_label.setText(f"Statistical Parity Difference (ideal = 0): {round(statistical_parity_difference, 3)}") + self.statistical_parity_difference_label.setText( + f"Statistical Parity Difference (ideal = 0): {round(statistical_parity_difference, 3)}" + ) self.statistical_parity_difference_label.setToolTip( - "

Statistical Parity Difference (SPD): Measures the difference in ratios of favorable class values " - "between the unprivileged and the privileged groups. An ideal value for this metric is 0.

" + "

Statistical Parity Difference (SPD): Measures the difference in ratios of " + "favorable class values between the unprivileged and the privileged groups. An " + "ideal value for this metric is 0.

" "" - ) \ No newline at end of file + ) diff --git a/orangecontrib/fairness/widgets/owequalizedodds.py b/orangecontrib/fairness/widgets/owequalizedodds.py index 1bf849f..96608ee 100644 --- a/orangecontrib/fairness/widgets/owequalizedodds.py +++ b/orangecontrib/fairness/widgets/owequalizedodds.py @@ -1,3 +1,10 @@ +""" +File which contains the OWEqualizedOdds widget. + +This widget is used to postprocess a model predictions +using a fairness algorithm to satisfy equalized odds. +""" + from Orange.base import Learner from Orange.data import Table from Orange.widgets.settings import Setting @@ -11,19 +18,30 @@ from AnyQt.QtCore import Qt from orangecontrib.fairness.modeling.postprocessing import PostprocessingLearner -from orangecontrib.fairness.widgets.utils import check_fairness_data, check_for_missing_values +from orangecontrib.fairness.widgets.utils import ( + check_fairness_data, + check_for_missing_values, +) class InterruptException(Exception): """A dummy exception used to interrupt the training process.""" + pass + class EqualizedOddsRunner: - """A class used to run the EqualizedOddsLearner in a separate thread and display progress using the callback.""" + """ + A class used to run the EqualizedOddsLearner in a separate + thread and display progress using the callback. + """ + @staticmethod - def run( - learner: Learner, data: Table, state: TaskState - ) -> Model: + def run(learner: Learner, data: Table, state: TaskState) -> Model: + """ + Function used to run the EqualizedOddsLearner in a separate + thread and display progress using the callback. + """ if data is None: return None @@ -37,14 +55,17 @@ def callback(progress: float, msg: str = None) -> bool: return model - - class OWEqualizedOdds(ConcurrentWidgetMixin, OWBaseLearner): """ - Widget for postprocessing a model predictions using a fairness algorithm to satisfy equalized odds. + Widget for postprocessing a model predictions using a + fairness algorithm to satisfy equalized odds. """ + name = "Equalized Odds Postprocessing" - description = "Postprocessing fairness algorithm which changes the predictions of a classifier to satisfy equalized odds." + description = ( + "Postprocessing fairness algorithm which changes the " + "predictions of a classifier to satisfy equalized odds." + ) icon = "icons/eq_odds_postprocessing.svg" priority = 40 @@ -52,7 +73,11 @@ class OWEqualizedOdds(ConcurrentWidgetMixin, OWBaseLearner): repeatable = Setting(True) class Inputs(OWBaseLearner.Inputs): - """Inputs for the widget, which are the same as the inputs for the super class plus a learner input.""" + """ + Inputs for the widget, which are the same as the inputs + for the super class plus a learner input. + """ + input_learner = Input("Learner", Learner) def __init__(self): @@ -88,11 +113,10 @@ def set_data(self, data: Table): self.cancel() super().set_data(data) - @Inputs.input_learner def set_learner(self, input_learner: Learner): """ - Function which handles the learner input by first canceling the current taks, + Function which handles the learner input by first canceling the current taks, storing the learneras a class variable and updating the widget name. """ self.cancel() @@ -109,10 +133,10 @@ def set_preprocessor(self, preprocessor): self.cancel() super().set_preprocessor(preprocessor) - def create_learner(self): """ - Responsible for creating the postprocessed learner with the input_learner and the preprocessors. + Responsible for creating the postprocessed learner with + the input_learner and the preprocessors. """ if not self.input_learner: return None @@ -125,12 +149,13 @@ def create_learner(self): def handleNewSignals(self): if not self.input_learner: return - self.update_learner() + self.update_learner() if self.data is not None: self.update_model() def update_model(self): - """Responsible for starting a new thread, fitting the learner and sending the created model to the output""" + """Responsible for starting a new thread, fitting the learner + and sending the created model to the output""" self.cancel() if self.data is not None and self.input_learner is not None: self.start(EqualizedOddsRunner.run, self.learner, self.data) diff --git a/orangecontrib/fairness/widgets/owreweighing.py b/orangecontrib/fairness/widgets/owreweighing.py index b239083..bafd96f 100644 --- a/orangecontrib/fairness/widgets/owreweighing.py +++ b/orangecontrib/fairness/widgets/owreweighing.py @@ -1,3 +1,9 @@ +""" +This module contains the implementation of the Reweighing widget. + +This widget applies the reweighing algorithm to a dataset, which adjusts the weights of rows. +""" + from typing import Optional from Orange.widgets import gui @@ -10,21 +16,25 @@ from orangecontrib.fairness.widgets.utils import ( table_to_standard_dataset, check_fairness_data, - check_for_missing_values + check_for_missing_values, ) class MzCom: - """A class used to compute the weights of the rows of a dataset using a allready fitted reweighing algorithm""" + """ + A class used to compute the weights of the rows of a + dataset using a already fitted reweighing algorithm + """ def __init__(self, model, original_domain=None): self.original_domain = original_domain self.model = model def __call__(self, data): - # For creating the standard dataset we need to know the encoding the table uses for the class variable - # This can be found in the domain and is the same as the order of values of the class variable in the domain - # This is why we need to add it back to the domain if it was removed + # For creating the standard dataset we need to know the encoding the table uses for + # the class variable. This can be found in the domain and is the same as the order + # of values of the class variable in the domain. This is why we need to add it back + # to the domain if it was removed. if not data.domain.class_var: data.domain.class_var = self.original_domain.class_var data, _, _ = table_to_standard_dataset(data) @@ -36,7 +46,9 @@ def __call__(self, data): class ReweighingModel: - """A class used to create a ReweighingAlgoritm instance, fitting it to the data and returning it""" + """ + A class used to create a ReweighingAlgoritm instance, fitting it to the data and returning it. + """ def __call__(self, data): ( @@ -51,8 +63,9 @@ def __call__(self, data): class ReweighingTransform(preprocess.Preprocess): """ - A class used to add a new column/variable to the data with the weights of the rows of the data computed - by the fitted reweighing algorithm stored in the MzCom class instance as a compute_value function + A class used to add a new column/variable to the data with the weights of + the rows of the data computed by the fitted reweighing algorithm stored in + the MzCom class instance as a compute_value function. """ def __call__(self, data): @@ -60,7 +73,8 @@ def __call__(self, data): weights = ContinuousVariable( "weights", compute_value=MzCom(model, original_domain=data.domain) ) - # Alternative for the compute_value: compute_value=lambda data, model=model: transf(data, model) + # Alternative for the compute_value: + # compute_value=lambda data, model=model: transf(data, model) # Add the variable "weights" to the domain of the data new_data = data.transform( @@ -80,7 +94,10 @@ class OWReweighing(OWWidget): """ name = "Reweighing" - description = "Applies the reweighing algorithm to a dataset, which adjusts the weights of rows." + description = ( + "Applies the reweighing algorithm to a dataset, " + "which adjusts the weights of rows." + ) icon = "icons/reweighing.svg" priority = 20 @@ -106,7 +123,11 @@ def __init__(self, *args, **kwargs): box = gui.vBox(self.mainArea, "Info") gui.widgetLabel( box, - "This widget applies the reweighing algorithm to a dataset, which adjusts the weights of rows.\nThe input data must have the additional 'AsFairness' attributes and be without any missing values.", + ( + "This widget applies the reweighing algorithm to a dataset, " + "which adjusts the weights of rows.\nThe input data must have " + "the additional 'AsFairness' attributes and be without any missing values." + ), ) self._data: Optional[Table] = None @@ -126,7 +147,10 @@ def handleNewSignals(self): self.apply() def apply(self): - """Fitting the reweighing algorithm to the data and sending the preprocessed data and the preprocessor to the output""" + """ + Fitting the reweighing algorithm to the data and sending the + preprocessed data and the preprocessor to the output. + """ if self._data is None: return diff --git a/orangecontrib/fairness/widgets/owweightedlogisticregression.py b/orangecontrib/fairness/widgets/owweightedlogisticregression.py index c1314e2..c340090 100644 --- a/orangecontrib/fairness/widgets/owweightedlogisticregression.py +++ b/orangecontrib/fairness/widgets/owweightedlogisticregression.py @@ -1,3 +1,10 @@ +""" +This module contains the implementation of the OWWeightedLogisticRegression. + +This widget is used to create a logistic regression model which +can use instance weights in the training process. +""" + import Orange.widgets.model.owlogisticregression from Orange.base import Learner @@ -6,6 +13,7 @@ from orangecontrib.fairness.widgets.utils import check_for_missing_values + class WeightedLogisticRegressionLearner(LogisticRegressionLearner): """ A class used to create a LogisticRegressionLearner which can @@ -26,7 +34,9 @@ class which allows the use of instance weights return self.fit(X, Y, W) -class OWWeightedLogisticRegression(Orange.widgets.model.owlogisticregression.OWLogisticRegression): +class OWWeightedLogisticRegression( + Orange.widgets.model.owlogisticregression.OWLogisticRegression +): """A class used to create a widget which uses the WeightedLogisticRegressionLearner""" name = "Weighted Logistic Regression" @@ -42,17 +52,18 @@ class OWWeightedLogisticRegression(Orange.widgets.model.owlogisticregression.OWL LEARNER = WeightedLogisticRegressionLearner - class Inputs(Orange.widgets.model.owlogisticregression.OWLogisticRegression.Inputs): """The inputs of the widget - the dataset""" + pass @Inputs.data @check_for_missing_values def set_data(self, data=None): - """Handling input data by first imputing missing values if any and then calling the super class""" + """ + Handling input data by first imputing missing values if any and then calling the super class + """ if data is not None: if data.has_missing(): data = Impute()(data) super().set_data(data) - diff --git a/orangecontrib/fairness/widgets/tests/test_owadversarialdebiasing.py b/orangecontrib/fairness/widgets/tests/test_owadversarialdebiasing.py index 9c78ef0..4e79245 100644 --- a/orangecontrib/fairness/widgets/tests/test_owadversarialdebiasing.py +++ b/orangecontrib/fairness/widgets/tests/test_owadversarialdebiasing.py @@ -1,3 +1,7 @@ +""" +This file contains the tests for the OWAdversarialDebiasing widget. +""" + import unittest from Orange.evaluation import CrossValidation, AUC, CA @@ -8,10 +12,19 @@ from orangecontrib.fairness.widgets.owadversarialdebiasing import OWAdversarialDebiasing from orangecontrib.fairness.modeling.adversarial import AdversarialDebiasingLearner + class TestOWAdversarialDebiasing(WidgetTest): + """ + Test class for the OWAdversarialDebiasing widget. + """ + def setUp(self): - self.data_path_compas = "https://datasets.biolab.si/core/compas-scores-two-years.tab" - self.incorrect_input_data_path = "https://datasets.biolab.si/core/breast-cancer.tab" + self.data_path_compas = ( + "https://datasets.biolab.si/core/compas-scores-two-years.tab" + ) + self.incorrect_input_data_path = ( + "https://datasets.biolab.si/core/breast-cancer.tab" + ) self.widget = self.create_widget(OWAdversarialDebiasing) def test_no_data(self): @@ -35,7 +48,10 @@ def test_parameters(self): self.assertEqual(self.widget.repeatable, True) def test_incorrect_input_data(self): - """Check that the widget displays an error message when the input data does not have the 'AsFairness' attributes""" + """ + Check that the widget displays an error message when the + input data does not have the 'AsFairness' attributes + """ test_data = Table(self.incorrect_input_data_path) self.send_signal(self.widget.Inputs.data, test_data) self.assertTrue(self.widget.Error.missing_fairness_data.is_shown()) @@ -58,8 +74,11 @@ def test_model_output(self): self.assertIsNotNone(model) - class TestAdversarialDebiasing(unittest.TestCase): + """ + Test class for the AdversarialDebiasingLearner and AdversarialDebiasingModel. + """ + def setUp(self): self.data_path_german = "https://datasets.biolab.si/core/german-credit-data.tab" @@ -78,24 +97,26 @@ def test_adversarial_model(self): """Check if the adversarial model works""" learner = AdversarialDebiasingLearner(num_epochs=20, seed=42) data = Table(self.data_path_german) - model = learner(data[:len(data) // 2]) + model = learner(data[: len(data) // 2]) self.assertIsNotNone(model) - predictions = model(data[len(data) // 2:], ret=Model.ValueProbs ) + predictions = model(data[len(data) // 2 :], ret=Model.ValueProbs) self.assertIsNotNone(predictions) labels, scores = predictions self.assertEqual(len(labels), len(scores)) - self.assertEqual(len(labels), len(data[len(data) // 2:])) + self.assertEqual(len(labels), len(data[len(data) // 2 :])) self.assertLess(abs(scores.sum(axis=1) - 1).all(), 1e-6) self.assertTrue(all(label in [0, 1] for label in labels)) class TestCallbackSession(unittest.TestCase): """ - In the adversarial.py file create a Subclass of tensorflow session with callback functionality for progress tracking and displaying. - This class should be tested to ensure that the tf.Session has not been modified in a way that breaks the functionality of the widget. + In the adversarial.py file create a Subclass of tensorflow session with callback + functionality for progress tracking and displaying. This class should be tested to + ensure that the tf.Session has not been modified in a way that breaks the functionality + of the widget. """ def setUp(self): @@ -110,6 +131,7 @@ def callback_function(self, progress, msg=""): self.last_received_progress = progress def test_callback_with_learner(self): + """Test the callback function with the AdversarialDebiasingLearner.""" # Define the learner learner = AdversarialDebiasingLearner(num_epochs=20, batch_size=128) expected_total_runs = learner._calculate_total_runs(self.data) @@ -125,4 +147,4 @@ def test_callback_with_learner(self): if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/orangecontrib/fairness/widgets/tests/test_owasfairness.py b/orangecontrib/fairness/widgets/tests/test_owasfairness.py index 25fd7a9..527dcd2 100644 --- a/orangecontrib/fairness/widgets/tests/test_owasfairness.py +++ b/orangecontrib/fairness/widgets/tests/test_owasfairness.py @@ -1,3 +1,7 @@ +""" +This file contains the tests for the OWAsFairness widget. +""" + import unittest from Orange.data.table import Table @@ -9,8 +13,11 @@ from orangecontrib.fairness.widgets.tests.utils import fairness_attributes - class TestOWAsFairness(WidgetTest): + """ + Test class for the OWAsFairness widget. + """ + def setUp(self) -> None: self.widget = self.create_widget(OWAsFairness) self.data_path_adult = "https://datasets.biolab.si/core/adult.tab" @@ -36,38 +43,55 @@ def test_input_data(self): ) def test_display_default(self): - """Check that the widget automatically displays the default fairness attributes if the input data contains them""" + """ + Check that the widget automatically displays the default + fairness attributes if the input data contains them + """ test_data = Table(self.data_path_adult) self.send_signal( self.widget.Inputs.data, test_data, ) - favorable_class_value, protected_attribute, privileged_pa_values = fairness_attributes(test_data.domain) + favorable_class_value, protected_attribute, privileged_pa_values = ( + fairness_attributes(test_data.domain) + ) + + self.assertEqual( + self.widget.controls.favorable_class_value.currentText(), + favorable_class_value, + ) - self.assertEqual(self.widget.controls.favorable_class_value.currentText(), favorable_class_value) - - self.assertEqual(self.widget.controls.protected_attribute.currentText(), protected_attribute.name) + self.assertEqual( + self.widget.controls.protected_attribute.currentText(), + protected_attribute.name, + ) - selected_indexes = self.widget.controls.privileged_pa_values.selectionModel().selectedRows() + selected_indexes = ( + self.widget.controls.privileged_pa_values.selectionModel().selectedRows() + ) model = self.widget.controls.privileged_pa_values.model() selected_values = [model.data(index) for index in selected_indexes] self.assertEqual(selected_values, privileged_pa_values) def test_select_default(self): - """Check that the widget automatically selects the default fairness attributes if the input data contains them""" + """ + Check that the widget automatically selects the default + fairness attributes if the input data contains them + """ test_data = Table(self.data_path_adult) self.send_signal( self.widget.Inputs.data, test_data, ) - favorable_class_value, protected_attribute, privileged_pa_values = fairness_attributes(test_data.domain) + favorable_class_value, protected_attribute, privileged_pa_values = ( + fairness_attributes(test_data.domain) + ) self.assertEqual(self.widget.favorable_class_value, favorable_class_value) self.assertEqual(self.widget.protected_attribute.name, protected_attribute.name) self.assertEqual(self.widget.privileged_pa_values, privileged_pa_values) - def test_selection(self): """Check that the selection of fairness attributes works properly""" test_data = Table(self.data_path_adult) @@ -78,15 +102,23 @@ def test_selection(self): # Test that the selection of favorable class value works simulate.combobox_activate_index(self.widget.controls.favorable_class_value, 0) - self.assertEqual(self.widget.favorable_class_value, self.widget.controls.favorable_class_value.currentText()) + self.assertEqual( + self.widget.favorable_class_value, + self.widget.controls.favorable_class_value.currentText(), + ) # Test that the selection of protected attribute works simulate.combobox_activate_index(self.widget.controls.protected_attribute, 0) - self.assertEqual(self.widget.protected_attribute.name, self.widget.controls.protected_attribute.currentText()) + self.assertEqual( + self.widget.protected_attribute.name, + self.widget.controls.protected_attribute.currentText(), + ) # Test that the selection of privileged protected attribute values works select_rows(self.widget.controls.privileged_pa_values, [1]) - selected_indexes = self.widget.controls.privileged_pa_values.selectionModel().selectedRows() + selected_indexes = ( + self.widget.controls.privileged_pa_values.selectionModel().selectedRows() + ) model = self.widget.controls.privileged_pa_values.model() selected_values = [model.data(index) for index in selected_indexes] @@ -110,11 +142,13 @@ def test_output(self): output_data = self.get_output(self.widget.Outputs.data) self.assertTrue(output_data is not None) - self.assertTrue("favorable_class_value" in output_data.domain.class_var.attributes) + self.assertTrue( + "favorable_class_value" in output_data.domain.class_var.attributes + ) contains_pa_values = False for attr in output_data.domain.attributes: if "privileged_pa_values" in attr.attributes: - contains_pa_values = True + contains_pa_values = True self.assertTrue(contains_pa_values) diff --git a/orangecontrib/fairness/widgets/tests/test_owdatasetbias.py b/orangecontrib/fairness/widgets/tests/test_owdatasetbias.py index 4438d7f..e097737 100644 --- a/orangecontrib/fairness/widgets/tests/test_owdatasetbias.py +++ b/orangecontrib/fairness/widgets/tests/test_owdatasetbias.py @@ -1,3 +1,7 @@ +""" +This file contains the tests for the OWDatasetBias widget. +""" + import unittest from Orange.data.table import Table @@ -7,9 +11,15 @@ class TestOWDatasetBias(WidgetTest): + """ + Test class for the OWDatasetBias widget. + """ + def setUp(self) -> None: self.data_path_adult = "https://datasets.biolab.si/core/adult.tab" - self.incorrect_input_data_path = "https://datasets.biolab.si/core/breast-cancer.tab" + self.incorrect_input_data_path = ( + "https://datasets.biolab.si/core/breast-cancer.tab" + ) self.widget = self.create_widget(OWDatasetBias) self.assertEqual(self.widget.disparate_impact_label.text(), "No data detected.") @@ -22,7 +32,10 @@ def test_no_data(self): self.assertEqual(self.widget.statistical_parity_difference_label.text(), "") def test_incorrect_input_data(self): - """Check that the widget displays an error message when the input data does not have the 'AsFairness' attributes""" + """ + Check that the widget displays an error message when + the input data does not have the 'AsFairness' attributes + """ test_data = Table(self.incorrect_input_data_path) self.send_signal(self.widget.Inputs.data, test_data) self.assertTrue(self.widget.Error.missing_fairness_data.is_shown()) diff --git a/orangecontrib/fairness/widgets/tests/test_owequalizedodds.py b/orangecontrib/fairness/widgets/tests/test_owequalizedodds.py index 3fe1ced..f466d25 100644 --- a/orangecontrib/fairness/widgets/tests/test_owequalizedodds.py +++ b/orangecontrib/fairness/widgets/tests/test_owequalizedodds.py @@ -1,3 +1,5 @@ +"""This file contains the tests for the OWEqualizedOdds widget.""" + import unittest from Orange.widgets.tests.base import WidgetTest @@ -14,9 +16,15 @@ class TestOWEqualizedOdds(WidgetTest): + """ + Test class for the OWEqualizedOdds widget. + """ + def setUp(self) -> None: self.data_path_adult = "https://datasets.biolab.si/core/german-credit-data.tab" - self.incorrect_input_data_path = "https://datasets.biolab.si/core/breast-cancer.tab" + self.incorrect_input_data_path = ( + "https://datasets.biolab.si/core/breast-cancer.tab" + ) self.widget = self.create_widget(OWEqualizedOdds) self.predictions = self.create_widget(OWPredictions) self.test_and_score = self.create_widget(OWTestAndScore) @@ -26,7 +34,10 @@ def test_no_data(self): self.send_signal(self.widget.Inputs.data, None) def test_incorrect_input_data(self): - """Check that the widget displays an error message when the input data does not have the 'AsFairness' attributes""" + """ + Check that the widget displays an error message when + the input data does not have the 'AsFairness' attributes + """ test_data = Table(self.incorrect_input_data_path) self.send_signal(self.widget.Inputs.data, test_data) self.assertTrue(self.widget.Error.missing_fairness_data.is_shown()) @@ -155,7 +166,12 @@ def test_repeatable_parameter(self): self.widget.repeatable = False self.assertFalse(self.widget.repeatable) + class TestEqualizedOddsPostprocessing(unittest.TestCase): + """ + Test class for the PostprocessingLearner and PostprocessingModel. + """ + def setUp(self): self.data_path_adult = "https://datasets.biolab.si/core/adult.tab" @@ -174,20 +190,19 @@ def test_adversarial_model(self): """Check if the adversarial model works""" learner = PostprocessingLearner(LogisticRegressionLearner()) data = Table(self.data_path_adult) - model = learner(data[:len(data) // 2]) + model = learner(data[: len(data) // 2]) self.assertIsNotNone(model) - predictions = model(data[len(data) // 2:], ret=Model.ValueProbs ) + predictions = model(data[len(data) // 2 :], ret=Model.ValueProbs) self.assertIsNotNone(predictions) labels, scores = predictions self.assertEqual(len(labels), len(scores)) - self.assertEqual(len(labels), len(data[len(data) // 2:])) + self.assertEqual(len(labels), len(data[len(data) // 2 :])) self.assertLess(abs(scores.sum(axis=1) - 1).all(), 1e-6) self.assertTrue(all(label in [0, 1] for label in labels)) - if __name__ == "__main__": unittest.main() diff --git a/orangecontrib/fairness/widgets/tests/test_owreweighing.py b/orangecontrib/fairness/widgets/tests/test_owreweighing.py index c7f1467..ebc6739 100644 --- a/orangecontrib/fairness/widgets/tests/test_owreweighing.py +++ b/orangecontrib/fairness/widgets/tests/test_owreweighing.py @@ -1,3 +1,7 @@ +""" +This file contains the tests for the OWReweighing widget. +""" + import unittest import numpy as np @@ -6,18 +10,29 @@ from Orange.data import Table from orangecontrib.fairness.widgets.owreweighing import OWReweighing -from orangecontrib.fairness.widgets.owweightedlogisticregression import OWWeightedLogisticRegression +from orangecontrib.fairness.widgets.owweightedlogisticregression import ( + OWWeightedLogisticRegression, +) from orangecontrib.fairness.widgets.owcombinepreprocessors import OWCombinePreprocessors - class TestOWReweighing(WidgetTest): + """ + Test class for the OWReweighing widget. + """ + def setUp(self) -> None: - self.data_path_adult = "https://datasets.biolab.si/core/compas-scores-two-years.tab" - self.incorrect_input_data_path = "https://datasets.biolab.si/core/breast-cancer.tab" + self.data_path_adult = ( + "https://datasets.biolab.si/core/compas-scores-two-years.tab" + ) + self.incorrect_input_data_path = ( + "https://datasets.biolab.si/core/breast-cancer.tab" + ) self.widget = self.create_widget(OWReweighing) self.combine_preprocessors = self.create_widget(OWCombinePreprocessors) - self.weighted_logistic_regression = self.create_widget(OWWeightedLogisticRegression) + self.weighted_logistic_regression = self.create_widget( + OWWeightedLogisticRegression + ) def test_no_data(self): """Check that the widget doesn't crash on empty data""" @@ -26,7 +41,10 @@ def test_no_data(self): self.assertIsNotNone(self.get_output(self.widget.Outputs.preprocessor)) def test_incorrect_input_data(self): - """Check that the widget displays an error message when the input data does not have the fairness attributes""" + """ + Check that the widget displays an error message when + the input data does not have the fairness attributes + """ test_data = Table(self.incorrect_input_data_path) self.send_signal(self.widget.Inputs.data, test_data) self.assertTrue(self.widget.Error.missing_fairness_data.is_shown()) @@ -53,10 +71,16 @@ def test_combine_preprocessors(self): first_preprocessor = self.get_output(self.widget.Outputs.preprocessor) second_preprocessor = self.get_output(self.widget.Outputs.preprocessor) - self.send_signal(self.combine_preprocessors.Inputs.first_preprocessor, first_preprocessor) - self.send_signal(self.combine_preprocessors.Inputs.second_preprocessor, second_preprocessor) + self.send_signal( + self.combine_preprocessors.Inputs.first_preprocessor, first_preprocessor + ) + self.send_signal( + self.combine_preprocessors.Inputs.second_preprocessor, second_preprocessor + ) - combined_preprocessor = self.get_output(self.combine_preprocessors.Outputs.preprocessor) + combined_preprocessor = self.get_output( + self.combine_preprocessors.Outputs.preprocessor + ) # Check that the output is not None self.assertIsNotNone(combined_preprocessor) @@ -67,9 +91,11 @@ def test_combine_preprocessors(self): # Check that there are two preprocessors in the list self.assertEqual(len(combined_preprocessor.preprocessors), 2) - def test_with_weighted_logistic_regression(self): - """Check that the predictions of logistic regression on the original data and the preprocessed data are different""" + """ + Check that the predictions of logistic regression on + the original data and the preprocessed data are different + """ test_data = Table(self.data_path_adult) self.send_signal(self.widget.Inputs.data, test_data) @@ -83,15 +109,18 @@ def test_with_weighted_logistic_regression(self): normal_model = self.get_output(self.weighted_logistic_regression.Outputs.model) # Train a model on the preprocessed data - self.send_signal(self.weighted_logistic_regression.Inputs.data, preprocessed_data) + self.send_signal( + self.weighted_logistic_regression.Inputs.data, preprocessed_data + ) self.wait_until_finished(self.weighted_logistic_regression) - preprocessed_model = self.get_output(self.weighted_logistic_regression.Outputs.model) + preprocessed_model = self.get_output( + self.weighted_logistic_regression.Outputs.model + ) # Check that the predictions of the two models are different self.assertFalse( np.array_equal( - normal_model(test_data), - preprocessed_model(preprocessed_data) + normal_model(test_data), preprocessed_model(preprocessed_data) ), "Preprocessed predictions should not equal normal predictions", ) diff --git a/orangecontrib/fairness/widgets/tests/utils.py b/orangecontrib/fairness/widgets/tests/utils.py index 7dbb37a..1a0ef0f 100644 --- a/orangecontrib/fairness/widgets/tests/utils.py +++ b/orangecontrib/fairness/widgets/tests/utils.py @@ -1,9 +1,23 @@ +""" +Utility functions for testing. +""" + from Orange.evaluation import scoring from orangecontrib.fairness.evaluation import scoring as bias_scoring def print_metrics(results, bias=True): + """ + Print the metrics of the results. + + This function prints the metrics of the results. + If bias is True, it will also print the bias metrics. + + Args: + results (Results): The results of the model. + bias (bool): If True, print the bias metrics. + """ print(f"ROC AUC: {scoring.AUC(results)}") print(f"CA: {scoring.CA(results)}") print(f"F1: {scoring.F1(results)}") @@ -15,14 +29,22 @@ def print_metrics(results, bias=True): print(f"AOD: {bias_scoring.AverageOddsDifference(results)}") print(f"DI: {bias_scoring.DisparateImpact(results)}") + def fairness_attributes(domain): + """ + Get the fairness attributes from the domain. + + This function gets the favorable class value, protected attribute and + privileged protected attribute values from the domain. + + Args: + domain (Domain): The domain of the data. + """ favorable_class_value = None protected_attribute = None privileged_pa_values = None if "favorable_class_value" in domain.class_var.attributes: - favorable_class_value = domain.class_var.attributes[ - "favorable_class_value" - ] + favorable_class_value = domain.class_var.attributes["favorable_class_value"] for var in domain.attributes: if "privileged_pa_values" in var.attributes: protected_attribute = var diff --git a/orangecontrib/fairness/widgets/utils.py b/orangecontrib/fairness/widgets/utils.py index 9e0b6b5..72a6f93 100644 --- a/orangecontrib/fairness/widgets/utils.py +++ b/orangecontrib/fairness/widgets/utils.py @@ -1,3 +1,7 @@ +""" +This module contains utility functions and decorators used by the fairness widgets. +""" + import importlib.util from functools import wraps @@ -14,13 +18,14 @@ ) MISSING_VALUES: str = ( - "Missing values detected in the data. \n" + "Missing values detected in the data. \n" "They will automatically be imputed with the average or most frequent value." ) MISSING_CLASS_VARIABLE: str = ( "The dataset does not contain a class variable. \n" - "The fairness metrics can only be used with datasets containing a (categorical) class variable. \n" + "The fairness metrics can only be used with datasets " + "containing a (categorical) class variable. \n" ) NUMERICAL_CLASS_VARIABLE: str = ( @@ -37,7 +42,8 @@ REWEIGHING_PREPROCESSOR: str = ( "This widget is not compatible with the reweighing preprocessor. \n" - "The custom preprocessing is therefore being ignored and the default preprocessing is being used instead." + "The custom preprocessing is therefore being ignored and " + "the default preprocessing is being used instead." ) REWEIGHTED_DATA: str = ( @@ -57,9 +63,9 @@ def is_tensorflow_installed(): def check_for_tensorflow(f): """A function which checks if tensorflow is installed.""" - + @wraps(f) - def wrapper(widget, input, *args, **kwargs): + def wrapper(widget, input, *args, **kwargs): if not is_tensorflow_installed(): input = None @@ -68,7 +74,6 @@ def wrapper(widget, input, *args, **kwargs): return wrapper - def check_for_reweighing_preprocessor(f): """A function which checks if the input to a widget is a reweighing preprocessor.""" from orangecontrib.fairness.widgets.owreweighing import ReweighingTransform @@ -102,15 +107,14 @@ def wrapper(widget, input, *args, **kwargs): return wrapper + def check_for_reweighted_data(f): """A function which checks if the input data war reweighted by a reweighing preprocessor.""" @wraps(f) def wrapper(widget, input, *args, **kwargs): # Add the preprocessor error message - widget.Error.add_message( - "reweighinghted_data", UnboundMsg(REWEIGHTED_DATA) - ) + widget.Error.add_message("reweighinghted_data", UnboundMsg(REWEIGHTED_DATA)) widget.Error.reweighinghted_data.clear() if isinstance(input, Table): @@ -167,7 +171,9 @@ def check_for_missing_values(f): @wraps(f) def wrapper(widget, data: Table, *args, **kwargs): - widget.Warning.add_message("missing_values_detected", UnboundMsg(MISSING_VALUES)) + widget.Warning.add_message( + "missing_values_detected", UnboundMsg(MISSING_VALUES) + ) widget.Warning.missing_values_detected.clear() if data is not None and isinstance(data, Table): @@ -180,7 +186,9 @@ def wrapper(widget, data: Table, *args, **kwargs): def check_data_structure(f): - """Check if the data has a (categorical) class variable and more than one categorical attribute.""" + """ + Check if the data has a (categorical) class variable and more than one categorical attribute. + """ @wraps(f) def wrapper(widget, data: Table, *args, **kwargs): @@ -193,7 +201,7 @@ def wrapper(widget, data: Table, *args, **kwargs): widget.Error.add_message( "no_categorical_attributes", UnboundMsg(NO_CATEGORICAL_ATTRIBUTES) ) - + widget.Error.missing_class_variable.clear() widget.Error.numerical_class_variable.clear() widget.Error.no_categorical_attributes.clear() @@ -206,7 +214,7 @@ def wrapper(widget, data: Table, *args, **kwargs): elif not data.domain.class_var.is_discrete: widget.Error.numerical_class_variable() data = None - + if data is not None: categorical = False for attribute in data.domain.attributes: @@ -217,13 +225,12 @@ def wrapper(widget, data: Table, *args, **kwargs): if not categorical: widget.Error.no_categorical_attributes() data = None - + return f(widget, data, *args, **kwargs) return wrapper - ############################################################# # Functions related to the table_to_standard_dataset function ############################################################# @@ -246,14 +253,15 @@ def _get_index_attribute_encoding( data, protected_attribute, favorable_class_value, privileged_pa_values ): """ - Convert the favorable_class_value and privileged_pa_values + Convert the favorable_class_value and privileged_pa_values from their string representation to their index representation """ # Get the values for the attributes class_values = data.domain.class_var.values protected_attribute_values = data.domain[protected_attribute].values - # Get the index representation of the favorable_class_value and privileged_pa_values, this is their index in the list of values + # Get the index representation of the favorable_class_value and privileged_pa_values, + # this is their index in the list of values. favorable_class_value_indexes = class_values.index(favorable_class_value) privileged_pa_values_indexes = [ protected_attribute_values.index(value) for value in privileged_pa_values @@ -281,12 +289,14 @@ def _add_dummy_class_column(data, df): def _correct_standard_dataset(standard_dataset, favorable_class_value_indexes): """ - Check if the favorable_label in the standard_dataset matches the + Check if the favorable_label in the standard_dataset matches the favorable_class_value_indexes (the expected favorable label value) and correct it if needed. """ if standard_dataset.favorable_label != favorable_class_value_indexes: - standard_dataset.favorable_label, standard_dataset.unfavorable_label = \ - standard_dataset.unfavorable_label, standard_dataset.favorable_label + standard_dataset.favorable_label, standard_dataset.unfavorable_label = ( + standard_dataset.unfavorable_label, + standard_dataset.favorable_label, + ) def table_to_standard_dataset(data) -> None: @@ -294,24 +304,27 @@ def table_to_standard_dataset(data) -> None: if not contains_fairness_attributes(data.domain): raise ValueError(MISSING_FAIRNESS_ATTRIBUTES) - + if data.has_missing(): data = Impute()(data) xdf, ydf, mdf = data.to_pandas_dfs() # Merge xdf and ydf TODO: Check if I need to merge mdf - # This dataframe consists of all the data, the categorical variables values are represented with the index of the value in domain[attribute].values + # This dataframe consists of all the data, the categorical variables values are + # represented with the index of the value in domain[attribute].values df = ydf.merge(xdf, left_index=True, right_index=True) - # Read the fairness attributes from the domain of the data, which will be used to get the index representations + # Read the fairness attributes from the domain of the data, + # which will be used to get the index representations ( favorable_class_value, protected_attribute, privileged_pa_values, ) = _get_fairness_attributes(data) - # Convert the favorable_class_value and privileged_pa_values from their string representation to their index representation - # We need to do this because when we convert the Orange table to a pandas dataframe all categorical variables are encoded + # Convert the favorable_class_value and privileged_pa_values from their string + # representation to their index representation. We need to do this because when + # we convert the Orange table to a pandas dataframe all categorical variables are encoded. ( favorable_class_value_indexes, privileged_pa_values_indexes, @@ -320,21 +333,24 @@ def table_to_standard_dataset(data) -> None: data, protected_attribute, favorable_class_value, privileged_pa_values ) - # If the data is from a "predict" function call and does not contain the class variable we need to add it and fill it with dummy values - # The dummy values need to contain all the possible values of the class variable (in its index representation) - # This is because the aif360 StandardDataset requires the class variable to be present in the dataframe with all the possible values - if data.domain.class_var.name not in df.columns or df[data.domain.class_var.name].isnull().any(): + # If the data is from a "predict" function call and does not contain the class + # variable we need to add it and fill it with dummy values. The dummy values need + # to contain all the possible values of the class variable (in its index representation). + # This is because the aif360 StandardDataset requires the class variable to be present in + # the dataframe with all the possible values. + if ( + data.domain.class_var.name not in df.columns + or df[data.domain.class_var.name].isnull().any() + ): _add_dummy_class_column(data, df) - # Map the protected_attribute privileged values to 1 and the unprivileged values to 0 - # This is so AdversarialDebiasing can work when the protected attribute has more than two unique values - # It does not affect the performance of any other algorithm + # This is so AdversarialDebiasing can work when the protected attribute has more than + # two unique values. It does not affect the performance of any other algorithm. df[protected_attribute] = df[protected_attribute].map( lambda x: 1 if x in privileged_pa_values_indexes else 0 ) - # Create the StandardDataset, this is the dataset that aif360 uses standard_dataset = StandardDataset( df=df, # df: a pandas dataframe containing all the data @@ -347,29 +363,29 @@ def table_to_standard_dataset(data) -> None: ], # protected_attribute_names: the name of the protected attribute privileged_classes=[ [1] - ], # privileged_classes: the values of the protected attribute that are considered privileged (in this case they are index encoded) + ], # privileged_classes: the values of the protected attribute that are considered + # privileged (in this case they are index encoded). # categorical_features = discrete_variables, ) - # Adversarial debiasing bug fix (in the prediction phase when using Average Impute and Predictions - # widget all labels are set to the same value for some reason. This messes with the standard dataset - # because it thinks the label is not binary and binarizes it on its own, and selects its own favorable_label - # which is not what we want or expect so in case that happens we want to set the favorable_label back to what it should be) - # (This could also apply to some other scenarios so it is better to have it here as a precaution) + # Adversarial debiasing bug fix (in the prediction phase when using Average Impute + # and Predictions widget all labels are set to the same value for some reason. + # This messes with the standard dataset because it thinks the label is not binary + # and binarizes it on its own, and selects its own favorable_label which is not what + # we want or expect so in case that happens we want to set the favorable_label back to + # what it should be) (This could also apply to some other scenarios so it is better to + # have it here as a precaution) _correct_standard_dataset(standard_dataset, favorable_class_value_indexes) if "weights" in mdf: standard_dataset.instance_weights = mdf["weights"].to_numpy() # Create the privileged and unprivileged groups - # The format was a list of dictionaries, each dictionary contains the name of the protected attribute and the index value of the privileged/unprivileged group - # Because AdversaryDebiasing can only handle one protected attribute, we converted all privileged values to 1 and unprivileged to 0 and now only need one dictionary (the result is the same) - privileged_groups = [ - {protected_attribute: 1} - ] - unprivileged_groups = [ - {protected_attribute: 0} - ] - - - return standard_dataset, privileged_groups, unprivileged_groups \ No newline at end of file + # The format was a list of dictionaries, each dictionary contains the name of the protected + # attribute and the index value of the privileged/unprivileged group. + # AdversaryDebiasing can only handle one protected attribute, so we converted all privileged + # values to 1 and unprivileged to 0 and now only need one dictionary (the result is the same). + privileged_groups = [{protected_attribute: 1}] + unprivileged_groups = [{protected_attribute: 0}] + + return standard_dataset, privileged_groups, unprivileged_groups diff --git a/setup.py b/setup.py index 2168567..a1cfadd 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import setup, find_packages from os import path, walk -VERSION = "0.2.0" +VERSION = "0.2.1" try: LONG_DESCRIPTION = open(