Skip to content

Commit 0436b3d

Browse files
HeartSaVioRdongjoon-hyun
authored andcommitted
[SPARK-30653][INFRA][SQL] EOL character enforcement for java/scala/xml/py/R files
### What changes were proposed in this pull request? This patch converts CR/LF into LF in 3 source files, which most files are only using LF. This patch also add rules to enforce EOL as LF for all java, scala, xml, py, R files. ### Why are the changes needed? The majority of source code files are using LF and only three files are CR/LF. While using IDE would let us don't bother with the difference, it still has a chance to make unnecessary diff if the file is modified with the editor which doesn't handle it automatically. ### Does this PR introduce any user-facing change? No ### How was this patch tested? ``` grep -IUrl --color "^M" . | grep "\.java\|\.scala\|\.xml\|\.py\|\.R" | grep -v "/target/" | grep -v "/build/" | grep -v "/dist/" | grep -v "dependency-reduced-pom.xml" | grep -v ".pyc" ``` (Please note you'll need to type CTRL+V -> CTRL+M in bash shell to get `^M` because it's representing CR/LF, not a combination of `^` and `M`.) Before the patch, the result is: ``` ./sql/core/src/main/java/org/apache/spark/sql/execution/columnar/ColumnDictionary.java ./sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala ./sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala ``` and after the patch, the result is None. And git shows WARNING message if EOL of any of source files in given types are modified to CR/LF, like below: ``` warning: CRLF will be replaced by LF in sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala. The file will have its original line endings in your working directory. ``` Closes apache#27365 from HeartSaVioR/MINOR-remove-CRLF-in-source-codes. Authored-by: Jungtaek Lim (HeartSaVioR) <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]>
1 parent 8a24cf2 commit 0436b3d

File tree

4 files changed

+582
-577
lines changed

4 files changed

+582
-577
lines changed

.gitattributes

+5
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,7 @@
11
*.bat text eol=crlf
22
*.cmd text eol=crlf
3+
*.java text eol=lf
4+
*.scala text eol=lf
5+
*.xml text eol=lf
6+
*.py text eol=lf
7+
*.R text eol=lf
Original file line numberDiff line numberDiff line change
@@ -1,64 +1,64 @@
1-
/*
2-
* Licensed to the Apache Software Foundation (ASF) under one or more
3-
* contributor license agreements. See the NOTICE file distributed with
4-
* this work for additional information regarding copyright ownership.
5-
* The ASF licenses this file to You under the Apache License, Version 2.0
6-
* (the "License"); you may not use this file except in compliance with
7-
* the License. You may obtain a copy of the License at
8-
*
9-
* http://www.apache.org/licenses/LICENSE-2.0
10-
*
11-
* Unless required by applicable law or agreed to in writing, software
12-
* distributed under the License is distributed on an "AS IS" BASIS,
13-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14-
* See the License for the specific language governing permissions and
15-
* limitations under the License.
16-
*/
17-
18-
package org.apache.spark.sql.catalyst.optimizer
19-
20-
import org.apache.spark.sql.catalyst.expressions._
21-
import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan}
22-
import org.apache.spark.sql.catalyst.rules.Rule
23-
24-
/**
25-
* Simplify redundant [[CreateNamedStruct]], [[CreateArray]] and [[CreateMap]] expressions.
26-
*/
27-
object SimplifyExtractValueOps extends Rule[LogicalPlan] {
28-
override def apply(plan: LogicalPlan): LogicalPlan = plan transform {
29-
// One place where this optimization is invalid is an aggregation where the select
30-
// list expression is a function of a grouping expression:
31-
//
32-
// SELECT struct(a,b).a FROM tbl GROUP BY struct(a,b)
33-
//
34-
// cannot be simplified to SELECT a FROM tbl GROUP BY struct(a,b). So just skip this
35-
// optimization for Aggregates (although this misses some cases where the optimization
36-
// can be made).
37-
case a: Aggregate => a
38-
case p => p.transformExpressionsUp {
39-
// Remove redundant field extraction.
40-
case GetStructField(createNamedStruct: CreateNamedStruct, ordinal, _) =>
41-
createNamedStruct.valExprs(ordinal)
42-
43-
// Remove redundant array indexing.
44-
case GetArrayStructFields(CreateArray(elems), field, ordinal, _, _) =>
45-
// Instead of selecting the field on the entire array, select it from each member
46-
// of the array. Pushing down the operation this way may open other optimizations
47-
// opportunities (i.e. struct(...,x,...).x)
48-
CreateArray(elems.map(GetStructField(_, ordinal, Some(field.name))))
49-
50-
// Remove redundant map lookup.
51-
case ga @ GetArrayItem(CreateArray(elems), IntegerLiteral(idx)) =>
52-
// Instead of creating the array and then selecting one row, remove array creation
53-
// altogether.
54-
if (idx >= 0 && idx < elems.size) {
55-
// valid index
56-
elems(idx)
57-
} else {
58-
// out of bounds, mimic the runtime behavior and return null
59-
Literal(null, ga.dataType)
60-
}
61-
case GetMapValue(CreateMap(elems), key) => CaseKeyWhen(key, elems)
62-
}
63-
}
64-
}
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.sql.catalyst.optimizer
19+
20+
import org.apache.spark.sql.catalyst.expressions._
21+
import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan}
22+
import org.apache.spark.sql.catalyst.rules.Rule
23+
24+
/**
25+
* Simplify redundant [[CreateNamedStruct]], [[CreateArray]] and [[CreateMap]] expressions.
26+
*/
27+
object SimplifyExtractValueOps extends Rule[LogicalPlan] {
28+
override def apply(plan: LogicalPlan): LogicalPlan = plan transform {
29+
// One place where this optimization is invalid is an aggregation where the select
30+
// list expression is a function of a grouping expression:
31+
//
32+
// SELECT struct(a,b).a FROM tbl GROUP BY struct(a,b)
33+
//
34+
// cannot be simplified to SELECT a FROM tbl GROUP BY struct(a,b). So just skip this
35+
// optimization for Aggregates (although this misses some cases where the optimization
36+
// can be made).
37+
case a: Aggregate => a
38+
case p => p.transformExpressionsUp {
39+
// Remove redundant field extraction.
40+
case GetStructField(createNamedStruct: CreateNamedStruct, ordinal, _) =>
41+
createNamedStruct.valExprs(ordinal)
42+
43+
// Remove redundant array indexing.
44+
case GetArrayStructFields(CreateArray(elems), field, ordinal, _, _) =>
45+
// Instead of selecting the field on the entire array, select it from each member
46+
// of the array. Pushing down the operation this way may open other optimizations
47+
// opportunities (i.e. struct(...,x,...).x)
48+
CreateArray(elems.map(GetStructField(_, ordinal, Some(field.name))))
49+
50+
// Remove redundant map lookup.
51+
case ga @ GetArrayItem(CreateArray(elems), IntegerLiteral(idx)) =>
52+
// Instead of creating the array and then selecting one row, remove array creation
53+
// altogether.
54+
if (idx >= 0 && idx < elems.size) {
55+
// valid index
56+
elems(idx)
57+
} else {
58+
// out of bounds, mimic the runtime behavior and return null
59+
Literal(null, ga.dataType)
60+
}
61+
case GetMapValue(CreateMap(elems), key) => CaseKeyWhen(key, elems)
62+
}
63+
}
64+
}

0 commit comments

Comments
 (0)