Skip to content

Commit

Permalink
Internal change
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 375732248
  • Loading branch information
achoum authored and copybara-github committed May 25, 2021
1 parent aaa6ba2 commit 48faf05
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 15 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# Changelog

## 0.1.5 - ????

### Bug fix

- Fix failure when input feature contains commas.


## 0.1.4 - 2021-05-21

### Features
Expand Down
17 changes: 8 additions & 9 deletions tensorflow_decision_forests/keras/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,8 +234,8 @@ class AdvancedArguments(NamedTuple):
yggdrasil_training_config: Yggdrasil Decision Forests training
configuration. Expose a few extra hyper-parameters.
yggdrasil_deployment_config: Configuration of the computing resources used
to train the model e.g. number of threads. Does not impact the model
quality.
to train the model e.g. number of threads. Does not impact the model
quality.
"""

infer_prediction_signature: Optional[bool] = True
Expand Down Expand Up @@ -368,7 +368,7 @@ def __init__(self,

if self._temp_directory is None:
self._temp_directory = tempfile.mkdtemp()
logging.info("Using %s are temporary training directory",
logging.info("Using %s as temporary training directory",
self._temp_directory)

if (self._task == Task.RANKING) != (ranking_group is not None):
Expand Down Expand Up @@ -745,7 +745,7 @@ def fit(self,

if "epochs" in kwargs:
if kwargs["epochs"] != 1:
raise ValueError("all decision forests algorithms train with only 1 "+
raise ValueError("all decision forests algorithms train with only 1 " +
"epoch, epochs={} given".format(kwargs["epochs"]))
del kwargs["epochs"] # Not needed since we force it to 1 below.

Expand Down Expand Up @@ -774,11 +774,10 @@ def evaluate(self, *args, **kwargs):
Args:
*args: Passed to `keras.Model.evaluate`.
**kwargs: Passed to `keras.Model.evaluate`.
Scalar test loss (if the model has a single output and no metrics) or list
of scalars (if the model has multiple outputs and/or metrics). See details
in `keras.Model.evaluate`.
**kwargs: Passed to `keras.Model.evaluate`. Scalar test loss (if the
model has a single output and no metrics) or list of scalars (if the
model has multiple outputs and/or metrics). See details in
`keras.Model.evaluate`.
"""
if self._train_on_evaluate:
if not self._is_trained.numpy():
Expand Down
15 changes: 10 additions & 5 deletions tensorflow_decision_forests/keras/keras_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,7 @@ def build_model(signature: Signature, dataset: Dataset, **args) -> models.Model:
return model


class TFDFInKerasTest(parameterized.TestCase, tf.test.TestCase):
class TFDFTest(parameterized.TestCase, tf.test.TestCase):

def _check_adult_model(self,
model,
Expand Down Expand Up @@ -984,10 +984,10 @@ def on_epoch_end(self, epoch, logs=None):
test_evaluation = model.evaluate(test_dataset)
logging.info("Test evaluation: %s", test_evaluation)
val_evaluation = [history.history[key][0] for key in val_keys]
logging.info("Validation evaluation in training "
"(validation_data=test_dataset): %s", val_evaluation)
logging.info("Callback evaluation (test_dataset): %s",
callback.evaluation)
logging.info(
"Validation evaluation in training "
"(validation_data=test_dataset): %s", val_evaluation)
logging.info("Callback evaluation (test_dataset): %s", callback.evaluation)

# The training evaluation is capped by the ratio of missing value (5%).
if compare is not None:
Expand Down Expand Up @@ -1193,6 +1193,11 @@ def processor(x):
def test_get_all_models(self):
print(keras.get_all_models())

def test_feature_with_comma(self):
model = keras.GradientBoostedTreesModel()
dataset = pd.DataFrame({"a,b": [0, 1, 2], "label": [0, 1, 2]})
model.fit(keras.pd_dataframe_to_tf_dataset(dataset, label="label"))


if __name__ == "__main__":
tf.test.main()
10 changes: 9 additions & 1 deletion tensorflow_decision_forests/tensorflow/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,7 +508,15 @@ def train(input_ids: List[str],
def _input_key_to_id(model_id: str, key: str) -> str:
"""Gets the name of the feature accumulator resource."""

return model_id + "_" + key
# Escape the commas that are used to separate the column resource id.
# Those IDs have not impact to the final model, but they should be unique and
# not contain commas.
#
# Turn the character '|' into an escape symbol.
input_id = model_id + "_" + key.replace("|", "||").replace(",", "|c")
if "," in input_id:
raise ValueError(f"Internal error: Found comma in input_id {input_id}")
return input_id


def combine_tensors_and_semantics(
Expand Down

0 comments on commit 48faf05

Please sign in to comment.