Skip to content

Commit a89006a

Browse files
belieferHyukjinKwon
authored andcommitted
[SPARK-31393][SQL] Show the correct alias in schema for expression
### What changes were proposed in this pull request? Some alias of expression can not display correctly in schema. This PR will fix them. - `TimeWindow` - `MaxBy` - `MinBy` - `UnaryMinus` - `BitwiseCount` This PR also fix a typo issue, please look at https://github.com/apache/spark/blob/b7cde42b04b21c9bfee6535199cf385855c15853/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala#L142 Note: 1. `MaxBy` and `MinBy` extends `MaxMinBy` and the latter add a method `funcName` not needed. We can reuse `prettyName` to replace `funcName`. 2. Spark SQL exists some function no elegant implementation.For example: `BitwiseCount` override the sql method show below: `override def sql: String = s"bit_count(${child.sql})"` I don't think it's elegant enough. Because `Expression` gives the following definitions. ``` def sql: String = { val childrenSQL = children.map(_.sql).mkString(", ") s"$prettyName($childrenSQL)" } ``` By this definition, `BitwiseCount` should override `prettyName` method. ### Why are the changes needed? Improve the implement of some expression. ### Does this PR introduce any user-facing change? 'Yes'. This PR will let user see the correct alias in schema. ### How was this patch tested? Jenkins test. Closes apache#28164 from beliefer/elegant-pretty-name-for-function. Lead-authored-by: beliefer <[email protected]> Co-authored-by: gengjiaan <[email protected]> Signed-off-by: HyukjinKwon <[email protected]>
1 parent 842b1dc commit a89006a

File tree

8 files changed

+21
-14
lines changed

8 files changed

+21
-14
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ object FunctionRegistry {
253253
expression[Log2]("log2"),
254254
expression[Log]("ln"),
255255
expression[Remainder]("mod", true),
256-
expression[UnaryMinus]("negative"),
256+
expression[UnaryMinus]("negative", true),
257257
expression[Pi]("pi"),
258258
expression[Pmod]("pmod"),
259259
expression[UnaryPositive]("positive"),

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TimeWindow.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ case class TimeWindow(
6363
override def dataType: DataType = new StructType()
6464
.add(StructField("start", TimestampType))
6565
.add(StructField("end", TimestampType))
66+
override def prettyName: String = "window"
6667

6768
// This expression is replaced in the analyzer.
6869
override lazy val resolved = false

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/MaxByAndMinBy.scala

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ abstract class MaxMinBy extends DeclarativeAggregate {
3131
def valueExpr: Expression
3232
def orderingExpr: Expression
3333

34-
protected def funcName: String
3534
// The predicate compares two ordering values.
3635
protected def predicate(oldExpr: Expression, newExpr: Expression): Expression
3736
// The arithmetic expression returns greatest/least value of all parameters.
@@ -46,7 +45,7 @@ abstract class MaxMinBy extends DeclarativeAggregate {
4645
override def dataType: DataType = valueExpr.dataType
4746

4847
override def checkInputDataTypes(): TypeCheckResult =
49-
TypeUtils.checkForOrderingExpr(orderingExpr.dataType, s"function $funcName")
48+
TypeUtils.checkForOrderingExpr(orderingExpr.dataType, s"function $prettyName")
5049

5150
// The attributes used to keep extremum (max or min) and associated aggregated values.
5251
private lazy val extremumOrdering =
@@ -101,7 +100,8 @@ abstract class MaxMinBy extends DeclarativeAggregate {
101100
group = "agg_funcs",
102101
since = "3.0.0")
103102
case class MaxBy(valueExpr: Expression, orderingExpr: Expression) extends MaxMinBy {
104-
override protected def funcName: String = "max_by"
103+
104+
override def prettyName: String = "max_by"
105105

106106
override protected def predicate(oldExpr: Expression, newExpr: Expression): Expression =
107107
oldExpr > newExpr
@@ -120,7 +120,8 @@ case class MaxBy(valueExpr: Expression, orderingExpr: Expression) extends MaxMin
120120
group = "agg_funcs",
121121
since = "3.0.0")
122122
case class MinBy(valueExpr: Expression, orderingExpr: Expression) extends MaxMinBy {
123-
override protected def funcName: String = "min_by"
123+
124+
override def prettyName: String = "min_by"
124125

125126
override protected def predicate(oldExpr: Expression, newExpr: Expression): Expression =
126127
oldExpr < newExpr

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,12 @@ case class UnaryMinus(child: Expression) extends UnaryExpression
8686
case _ => numeric.negate(input)
8787
}
8888

89-
override def sql: String = s"(- ${child.sql})"
89+
override def sql: String = {
90+
getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("-") match {
91+
case "-" => s"(- ${child.sql})"
92+
case funcName => s"$funcName(${child.sql})"
93+
}
94+
}
9095
}
9196

9297
@ExpressionDescription(

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,8 @@ case class BitwiseCount(child: Expression) extends UnaryExpression with ExpectsI
172172

173173
override def toString: String = s"bit_count($child)"
174174

175+
override def prettyName: String = "bit_count"
176+
175177
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = child.dataType match {
176178
case BooleanType => defineCodeGen(ctx, ev, c => s"if ($c) 1 else 0")
177179
case _ => defineCodeGen(ctx, ev, c => s"java.lang.Long.bitCount($c)")
@@ -184,6 +186,4 @@ case class BitwiseCount(child: Expression) extends UnaryExpression with ExpectsI
184186
case IntegerType => java.lang.Long.bitCount(input.asInstanceOf[Int])
185187
case LongType => java.lang.Long.bitCount(input.asInstanceOf[Long])
186188
}
187-
188-
override def sql: String = s"bit_count(${child.sql})"
189189
}

sql/core/src/test/resources/sql-functions/sql-expression-schema.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
<!-- Automatically generated byExpressionsSchemaSuite -->
1+
<!-- Automatically generated by ExpressionsSchemaSuite -->
22
## Summary
33
- Number of queries: 333
44
- Number of expressions that missing example: 34
@@ -278,7 +278,7 @@
278278
| org.apache.spark.sql.catalyst.expressions.TruncTimestamp | date_trunc | SELECT date_trunc('YEAR', '2015-03-05T09:32:05.359') | struct<date_trunc(YEAR, CAST(2015-03-05T09:32:05.359 AS TIMESTAMP)):timestamp> |
279279
| org.apache.spark.sql.catalyst.expressions.TypeOf | typeof | SELECT typeof(1) | struct<typeof(1):string> |
280280
| org.apache.spark.sql.catalyst.expressions.UnBase64 | unbase64 | SELECT unbase64('U3BhcmsgU1FM') | struct<unbase64(U3BhcmsgU1FM):binary> |
281-
| org.apache.spark.sql.catalyst.expressions.UnaryMinus | negative | SELECT negative(1) | struct<(- 1):int> |
281+
| org.apache.spark.sql.catalyst.expressions.UnaryMinus | negative | SELECT negative(1) | struct<negative(1):int> |
282282
| org.apache.spark.sql.catalyst.expressions.UnaryPositive | positive | N/A | N/A |
283283
| org.apache.spark.sql.catalyst.expressions.Unhex | unhex | SELECT decode(unhex('537061726B2053514C'), 'UTF-8') | struct<decode(unhex(537061726B2053514C), UTF-8):string> |
284284
| org.apache.spark.sql.catalyst.expressions.UnixTimestamp | unix_timestamp | SELECT unix_timestamp() | struct<unix_timestamp(current_timestamp(), yyyy-MM-dd HH:mm:ss):bigint> |
@@ -317,9 +317,9 @@
317317
| org.apache.spark.sql.catalyst.expressions.aggregate.Last | last_value | SELECT last_value(col) FROM VALUES (10), (5), (20) AS tab(col) | struct<last_value(col, false):int> |
318318
| org.apache.spark.sql.catalyst.expressions.aggregate.Last | last | SELECT last(col) FROM VALUES (10), (5), (20) AS tab(col) | struct<last(col, false):int> |
319319
| org.apache.spark.sql.catalyst.expressions.aggregate.Max | max | SELECT max(col) FROM VALUES (10), (50), (20) AS tab(col) | struct<max(col):int> |
320-
| org.apache.spark.sql.catalyst.expressions.aggregate.MaxBy | max_by | SELECT max_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y) | struct<maxby(x, y):string> |
320+
| org.apache.spark.sql.catalyst.expressions.aggregate.MaxBy | max_by | SELECT max_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y) | struct<max_by(x, y):string> |
321321
| org.apache.spark.sql.catalyst.expressions.aggregate.Min | min | SELECT min(col) FROM VALUES (10), (-1), (20) AS tab(col) | struct<min(col):int> |
322-
| org.apache.spark.sql.catalyst.expressions.aggregate.MinBy | min_by | SELECT min_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y) | struct<minby(x, y):string> |
322+
| org.apache.spark.sql.catalyst.expressions.aggregate.MinBy | min_by | SELECT min_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y) | struct<min_by(x, y):string> |
323323
| org.apache.spark.sql.catalyst.expressions.aggregate.Percentile | percentile | SELECT percentile(col, 0.3) FROM VALUES (0), (10) AS tab(col) | struct<percentile(col, CAST(0.3 AS DOUBLE), 1):double> |
324324
| org.apache.spark.sql.catalyst.expressions.aggregate.Skewness | skewness | SELECT skewness(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col) | struct<skewness(CAST(col AS DOUBLE)):double> |
325325
| org.apache.spark.sql.catalyst.expressions.aggregate.StddevPop | stddev_pop | SELECT stddev_pop(col) FROM VALUES (1), (2), (3) AS tab(col) | struct<stddev_pop(CAST(col AS DOUBLE)):double> |

sql/core/src/test/resources/sql-tests/results/operators.sql.out

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -437,7 +437,7 @@ struct<abs(-3.13):decimal(3,2),abs(CAST(-2.19 AS DOUBLE)):double>
437437
-- !query
438438
select positive('-1.11'), positive(-1.11), negative('-1.11'), negative(-1.11)
439439
-- !query schema
440-
struct<(+ CAST(-1.11 AS DOUBLE)):double,(+ -1.11):decimal(3,2),(- CAST(-1.11 AS DOUBLE)):double,(- -1.11):decimal(3,2)>
440+
struct<(+ CAST(-1.11 AS DOUBLE)):double,(+ -1.11):decimal(3,2),negative(CAST(-1.11 AS DOUBLE)):double,negative(-1.11):decimal(3,2)>
441441
-- !query output
442442
-1.11 -1.11 1.11 1.11
443443

sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession {
137137
}
138138

139139
val header = Seq(
140-
s"<!-- Automatically generated by${getClass.getSimpleName} -->",
140+
s"<!-- Automatically generated by ${getClass.getSimpleName} -->",
141141
"## Summary",
142142
s" - Number of queries: ${outputs.size}",
143143
s" - Number of expressions that missing example: ${missingExamples.size}",

0 commit comments

Comments
 (0)