Internal change

arvnds · copybara-github · commit aaa6ba2e80fc · 2021-05-25T06:19:37.000-07:00
PiperOrigin-RevId: 375688815
diff --git a/tensorflow_decision_forests/keras/core.py b/tensorflow_decision_forests/keras/core.py
@@ -588,10 +588,6 @@ def train_step(self, data):
       logging.info("Collect training examples.\nFeatures: %s\nLabel: %s",
                    train_x, train_y)
 
-    if len(train_y.shape) != 1:
-      raise ValueError(
-          "Expecting label of rank 1. Got {} instead.".format(train_y))
-
     if self._preprocessing is not None:
       train_x = self._preprocessing(train_x)
       if self._verbose:
@@ -621,6 +617,16 @@ def train_step(self, data):
           f"The training label tensor is expected to be a tensor. Got {train_y}"
           " instead.")
 
+    if len(train_y.shape) != 1:
+      if self._verbose:
+        logging.info("Squeezing labels to [batch_size] from [batch_size, 1].")
+      train_y = tf.squeeze(train_y, axis=1)
+
+    if len(train_y.shape) != 1:
+      raise ValueError(
+          "Labels can either be passed in as [batch_size, 1] or [batch_size]. "
+          "Invalid shape %s." % train_y.shape)
+
     # List the input features and their semantics.
     assert self._semantics is None, "The model is already trained"
     self._semantics = tf_core.infer_semantic(
@@ -750,6 +756,7 @@ def fit(self,
     # end of the epoch. This may fail in case any of the `on_train_batch_*`
     # callbacks calls `evaluate()` before the end of the 1st epoch.
     self._train_on_evaluate = True
+
     try:
       history = super(CoreModel, self).fit(
           x=x, y=y, epochs=1, callbacks=callbacks, **kwargs)
diff --git a/tensorflow_decision_forests/keras/keras_test.py b/tensorflow_decision_forests/keras/keras_test.py
@@ -22,7 +22,7 @@
 import os
 import shutil
 import subprocess
-from typing import List, Tuple, Any, Optional
+from typing import List, Tuple, Any, Optional, Type
 
 from absl import flags
 from absl import logging
@@ -830,7 +830,9 @@ def _synthetic_train_and_test(
       test_numerical: Optional[bool] = False,
       test_multidimensional_numerical: Optional[bool] = False,
       test_categorical: Optional[bool] = False,
-      test_categorical_set: Optional[bool] = False):
+      test_categorical_set: Optional[bool] = False,
+      label_shape: Optional[int] = None,
+      fit_raises: Optional[Type[Exception]] = None):
     """Trains a model on a synthetic dataset."""
 
     train_path = os.path.join(self.get_temp_dir(), "train.rio.gz")
@@ -868,12 +870,13 @@ def _synthetic_train_and_test(
     popen.wait()
 
     feature_spec = {}
+    label_shape = [label_shape] if label_shape else []
     if task == keras.Task.CLASSIFICATION:
-      feature_spec["LABEL"] = tf.io.FixedLenFeature([], tf.int64)
+      feature_spec["LABEL"] = tf.io.FixedLenFeature(label_shape, tf.int64)
     elif task == keras.Task.REGRESSION:
-      feature_spec["LABEL"] = tf.io.FixedLenFeature([], tf.float32)
+      feature_spec["LABEL"] = tf.io.FixedLenFeature(label_shape, tf.float32)
     elif task == keras.Task.RANKING:
-      feature_spec["LABEL"] = tf.io.FixedLenFeature([], tf.float32)
+      feature_spec["LABEL"] = tf.io.FixedLenFeature(label_shape, tf.float32)
       feature_spec["GROUP"] = tf.io.FixedLenFeature([], tf.string)
     else:
       assert False
@@ -964,8 +967,16 @@ def on_epoch_end(self, epoch, logs=None):
         self.evaluation = model.evaluate(test_dataset)
 
     callback = _TestEvalCallback()
-    history = model.fit(train_dataset, validation_data=test_dataset,
-                        callbacks=[callback])
+    history = None
+    if fit_raises is not None:
+      with self.assertRaises(fit_raises):
+        model.fit(
+            train_dataset, validation_data=test_dataset, callbacks=[callback])
+    else:
+      history = model.fit(
+          train_dataset, validation_data=test_dataset, callbacks=[callback])
+    if history is None:
+      return
     model.summary()
 
     train_evaluation = model.evaluate(train_dataset)
@@ -991,6 +1002,23 @@ def test_synthetic_classification_numerical(self):
     self._synthetic_train_and_test(
         keras.Task.CLASSIFICATION, 0.8, 0.72, test_numerical=True)
 
+  def test_synthetic_classification_squeeze_label(self):
+    self._synthetic_train_and_test(
+        keras.Task.CLASSIFICATION,
+        0.8,
+        0.72,
+        test_numerical=True,
+        label_shape=1)
+
+  def test_synthetic_classification_squeeze_label_invalid_shape(self):
+    self._synthetic_train_and_test(
+        keras.Task.CLASSIFICATION,
+        0.8,
+        0.72,
+        test_numerical=True,
+        label_shape=2,
+        fit_raises=ValueError)
+
   def test_synthetic_classification_categorical(self):
     self._synthetic_train_and_test(
         keras.Task.CLASSIFICATION, 0.95, 0.70, test_categorical=True)