From 81cacc91c5ebbdef9d2644cad85a86cc14bd9049 Mon Sep 17 00:00:00 2001 From: Matt Date: Mon, 27 Feb 2023 11:12:38 -0500 Subject: [PATCH] Modified based on feedback - removing pandas removing pandas requirement and using Pyspark FillNa directly --- .../CustomTransform_FillEmptyStringsInAColumn.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/examples/transforms/CustomTransform_FillEmptyStringsInAColumn.py b/examples/transforms/CustomTransform_FillEmptyStringsInAColumn.py index 0dda1df..31a503c 100644 --- a/examples/transforms/CustomTransform_FillEmptyStringsInAColumn.py +++ b/examples/transforms/CustomTransform_FillEmptyStringsInAColumn.py @@ -3,7 +3,6 @@ from pyspark.sql.dataframe import DataFrame from pyspark.sql.types import StructType from pyspark.context import SparkContext -import pandas as pd from pyspark.sql.functions import col,isnan, when, count, regexp_replace def fill_empty_null_values_txn( @@ -16,12 +15,9 @@ def fill_empty_null_values_txn( _dyf = DynamicFrame.fromDF(modifiedDF, self.glue_ctx, self.name) return _dyf elif _df.filter(col(columnName).isNull()).count() > 0: - _pdf = _df.toPandas() - _pdf[columnName] = _pdf[columnName].fillna(newValue) - modifiedDF = gluectx.spark_session.createDataFrame(_pdf) - _dyf = DynamicFrame.fromDF(modifiedDF, self.glue_ctx, self.name) + _df = _df.fillna(value=newValue, subset=[columnName]) + _dyf = DynamicFrame.fromDF(_df, self.glue_ctx, self.name) return _dyf return self - -DynamicFrame.fill_empty_null_values_txn = fill_empty_null_values_txn +DynamicFrame.fill_empty_null_values_txn = fill_empty_null_values_txn \ No newline at end of file