Internal change

achoum · copybara-github · commit 48faf05776ef · 2021-05-25T10:09:25.000-07:00
PiperOrigin-RevId: 375732248
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,12 @@
 # Changelog
 
+## 0.1.5 - ????
+
+### Bug fix
+
+    - Fix failure when input feature contains commas.
+
+
 ## 0.1.4 - 2021-05-21
 
 ### Features
diff --git a/tensorflow_decision_forests/keras/core.py b/tensorflow_decision_forests/keras/core.py
@@ -234,8 +234,8 @@ class AdvancedArguments(NamedTuple):
     yggdrasil_training_config: Yggdrasil Decision Forests training
       configuration. Expose a few extra hyper-parameters.
       yggdrasil_deployment_config: Configuration of the computing resources used
-      to train the model e.g. number of threads. Does not impact the model
-      quality.
+        to train the model e.g. number of threads. Does not impact the model
+        quality.
   """
 
   infer_prediction_signature: Optional[bool] = True
@@ -368,7 +368,7 @@ def __init__(self,
 
     if self._temp_directory is None:
       self._temp_directory = tempfile.mkdtemp()
-      logging.info("Using %s are temporary training directory",
+      logging.info("Using %s as temporary training directory",
                    self._temp_directory)
 
     if (self._task == Task.RANKING) != (ranking_group is not None):
@@ -745,7 +745,7 @@ def fit(self,
 
     if "epochs" in kwargs:
       if kwargs["epochs"] != 1:
-        raise ValueError("all decision forests algorithms train with only 1 "+
+        raise ValueError("all decision forests algorithms train with only 1 " +
                          "epoch, epochs={} given".format(kwargs["epochs"]))
       del kwargs["epochs"]  # Not needed since we force it to 1 below.
 
@@ -774,11 +774,10 @@ def evaluate(self, *args, **kwargs):
 
     Args:
       *args: Passed to `keras.Model.evaluate`.
-      **kwargs: Passed to `keras.Model.evaluate`.
-
-    Scalar test loss (if the model has a single output and no metrics) or list
-    of scalars (if the model has multiple outputs and/or metrics). See details
-    in `keras.Model.evaluate`.
+      **kwargs: Passed to `keras.Model.evaluate`.  Scalar test loss (if the
+        model has a single output and no metrics) or list of scalars (if the
+        model has multiple outputs and/or metrics). See details in
+        `keras.Model.evaluate`.
     """
     if self._train_on_evaluate:
       if not self._is_trained.numpy():
diff --git a/tensorflow_decision_forests/keras/keras_test.py b/tensorflow_decision_forests/keras/keras_test.py
@@ -382,7 +382,7 @@ def build_model(signature: Signature, dataset: Dataset, **args) -> models.Model:
   return model
 
 
-class TFDFInKerasTest(parameterized.TestCase, tf.test.TestCase):
+class TFDFTest(parameterized.TestCase, tf.test.TestCase):
 
   def _check_adult_model(self,
                          model,
@@ -984,10 +984,10 @@ def on_epoch_end(self, epoch, logs=None):
     test_evaluation = model.evaluate(test_dataset)
     logging.info("Test evaluation: %s", test_evaluation)
     val_evaluation = [history.history[key][0] for key in val_keys]
-    logging.info("Validation evaluation in training "
-                 "(validation_data=test_dataset): %s", val_evaluation)
-    logging.info("Callback evaluation (test_dataset): %s",
-                 callback.evaluation)
+    logging.info(
+        "Validation evaluation in training "
+        "(validation_data=test_dataset): %s", val_evaluation)
+    logging.info("Callback evaluation (test_dataset): %s", callback.evaluation)
 
     # The training evaluation is capped by the ratio of missing value (5%).
     if compare is not None:
@@ -1193,6 +1193,11 @@ def processor(x):
   def test_get_all_models(self):
     print(keras.get_all_models())
 
+  def test_feature_with_comma(self):
+    model = keras.GradientBoostedTreesModel()
+    dataset = pd.DataFrame({"a,b": [0, 1, 2], "label": [0, 1, 2]})
+    model.fit(keras.pd_dataframe_to_tf_dataset(dataset, label="label"))
+
 
 if __name__ == "__main__":
   tf.test.main()
diff --git a/tensorflow_decision_forests/tensorflow/core.py b/tensorflow_decision_forests/tensorflow/core.py
@@ -508,7 +508,15 @@ def train(input_ids: List[str],
 def _input_key_to_id(model_id: str, key: str) -> str:
   """Gets the name of the feature accumulator resource."""
 
-  return model_id + "_" + key
+  # Escape the commas that are used to separate the column resource id.
+  # Those IDs have not impact to the final model, but they should be unique and
+  # not contain commas.
+  #
+  # Turn the character '|' into an escape symbol.
+  input_id = model_id + "_" + key.replace("|", "||").replace(",", "|c")
+  if "," in input_id:
+    raise ValueError(f"Internal error: Found comma in input_id {input_id}")
+  return input_id
 
 
 def combine_tensors_and_semantics(