|
1 |
| -/* |
2 |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
3 |
| - * contributor license agreements. See the NOTICE file distributed with |
4 |
| - * this work for additional information regarding copyright ownership. |
5 |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
6 |
| - * (the "License"); you may not use this file except in compliance with |
7 |
| - * the License. You may obtain a copy of the License at |
8 |
| - * |
9 |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
10 |
| - * |
11 |
| - * Unless required by applicable law or agreed to in writing, software |
12 |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
13 |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 |
| - * See the License for the specific language governing permissions and |
15 |
| - * limitations under the License. |
16 |
| - */ |
17 |
| - |
18 |
| -package org.apache.spark.sql.catalyst.optimizer |
19 |
| - |
20 |
| -import org.apache.spark.sql.catalyst.expressions._ |
21 |
| -import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan} |
22 |
| -import org.apache.spark.sql.catalyst.rules.Rule |
23 |
| - |
24 |
| -/** |
25 |
| - * Simplify redundant [[CreateNamedStruct]], [[CreateArray]] and [[CreateMap]] expressions. |
26 |
| - */ |
27 |
| -object SimplifyExtractValueOps extends Rule[LogicalPlan] { |
28 |
| - override def apply(plan: LogicalPlan): LogicalPlan = plan transform { |
29 |
| - // One place where this optimization is invalid is an aggregation where the select |
30 |
| - // list expression is a function of a grouping expression: |
31 |
| - // |
32 |
| - // SELECT struct(a,b).a FROM tbl GROUP BY struct(a,b) |
33 |
| - // |
34 |
| - // cannot be simplified to SELECT a FROM tbl GROUP BY struct(a,b). So just skip this |
35 |
| - // optimization for Aggregates (although this misses some cases where the optimization |
36 |
| - // can be made). |
37 |
| - case a: Aggregate => a |
38 |
| - case p => p.transformExpressionsUp { |
39 |
| - // Remove redundant field extraction. |
40 |
| - case GetStructField(createNamedStruct: CreateNamedStruct, ordinal, _) => |
41 |
| - createNamedStruct.valExprs(ordinal) |
42 |
| - |
43 |
| - // Remove redundant array indexing. |
44 |
| - case GetArrayStructFields(CreateArray(elems), field, ordinal, _, _) => |
45 |
| - // Instead of selecting the field on the entire array, select it from each member |
46 |
| - // of the array. Pushing down the operation this way may open other optimizations |
47 |
| - // opportunities (i.e. struct(...,x,...).x) |
48 |
| - CreateArray(elems.map(GetStructField(_, ordinal, Some(field.name)))) |
49 |
| - |
50 |
| - // Remove redundant map lookup. |
51 |
| - case ga @ GetArrayItem(CreateArray(elems), IntegerLiteral(idx)) => |
52 |
| - // Instead of creating the array and then selecting one row, remove array creation |
53 |
| - // altogether. |
54 |
| - if (idx >= 0 && idx < elems.size) { |
55 |
| - // valid index |
56 |
| - elems(idx) |
57 |
| - } else { |
58 |
| - // out of bounds, mimic the runtime behavior and return null |
59 |
| - Literal(null, ga.dataType) |
60 |
| - } |
61 |
| - case GetMapValue(CreateMap(elems), key) => CaseKeyWhen(key, elems) |
62 |
| - } |
63 |
| - } |
64 |
| -} |
| 1 | +/* |
| 2 | + * Licensed to the Apache Software Foundation (ASF) under one or more |
| 3 | + * contributor license agreements. See the NOTICE file distributed with |
| 4 | + * this work for additional information regarding copyright ownership. |
| 5 | + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| 6 | + * (the "License"); you may not use this file except in compliance with |
| 7 | + * the License. You may obtain a copy of the License at |
| 8 | + * |
| 9 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | + * |
| 11 | + * Unless required by applicable law or agreed to in writing, software |
| 12 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | + * See the License for the specific language governing permissions and |
| 15 | + * limitations under the License. |
| 16 | + */ |
| 17 | + |
| 18 | +package org.apache.spark.sql.catalyst.optimizer |
| 19 | + |
| 20 | +import org.apache.spark.sql.catalyst.expressions._ |
| 21 | +import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan} |
| 22 | +import org.apache.spark.sql.catalyst.rules.Rule |
| 23 | + |
| 24 | +/** |
| 25 | + * Simplify redundant [[CreateNamedStruct]], [[CreateArray]] and [[CreateMap]] expressions. |
| 26 | + */ |
| 27 | +object SimplifyExtractValueOps extends Rule[LogicalPlan] { |
| 28 | + override def apply(plan: LogicalPlan): LogicalPlan = plan transform { |
| 29 | + // One place where this optimization is invalid is an aggregation where the select |
| 30 | + // list expression is a function of a grouping expression: |
| 31 | + // |
| 32 | + // SELECT struct(a,b).a FROM tbl GROUP BY struct(a,b) |
| 33 | + // |
| 34 | + // cannot be simplified to SELECT a FROM tbl GROUP BY struct(a,b). So just skip this |
| 35 | + // optimization for Aggregates (although this misses some cases where the optimization |
| 36 | + // can be made). |
| 37 | + case a: Aggregate => a |
| 38 | + case p => p.transformExpressionsUp { |
| 39 | + // Remove redundant field extraction. |
| 40 | + case GetStructField(createNamedStruct: CreateNamedStruct, ordinal, _) => |
| 41 | + createNamedStruct.valExprs(ordinal) |
| 42 | + |
| 43 | + // Remove redundant array indexing. |
| 44 | + case GetArrayStructFields(CreateArray(elems), field, ordinal, _, _) => |
| 45 | + // Instead of selecting the field on the entire array, select it from each member |
| 46 | + // of the array. Pushing down the operation this way may open other optimizations |
| 47 | + // opportunities (i.e. struct(...,x,...).x) |
| 48 | + CreateArray(elems.map(GetStructField(_, ordinal, Some(field.name)))) |
| 49 | + |
| 50 | + // Remove redundant map lookup. |
| 51 | + case ga @ GetArrayItem(CreateArray(elems), IntegerLiteral(idx)) => |
| 52 | + // Instead of creating the array and then selecting one row, remove array creation |
| 53 | + // altogether. |
| 54 | + if (idx >= 0 && idx < elems.size) { |
| 55 | + // valid index |
| 56 | + elems(idx) |
| 57 | + } else { |
| 58 | + // out of bounds, mimic the runtime behavior and return null |
| 59 | + Literal(null, ga.dataType) |
| 60 | + } |
| 61 | + case GetMapValue(CreateMap(elems), key) => CaseKeyWhen(key, elems) |
| 62 | + } |
| 63 | + } |
| 64 | +} |
0 commit comments