Skip to content

Commit dbab6bb

Browse files
authored
cp to 2.0 'fix a bug in shuffle which cause panic' (#19694)
fix a bug in shuffle which cause panic Approved by: @ouyuanning, @sukki37, @aunjgr
1 parent cac1c7b commit dbab6bb

File tree

1 file changed

+41
-28
lines changed

1 file changed

+41
-28
lines changed

pkg/sql/plan/shuffle.go

Lines changed: 41 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -247,16 +247,16 @@ func maybeSorted(n *plan.Node, builder *QueryBuilder, tag int32) bool {
247247
return false
248248
}
249249

250-
func determinShuffleType(col *plan.ColRef, n *plan.Node, builder *QueryBuilder) {
250+
func determinShuffleType(col *plan.ColRef, node *plan.Node, builder *QueryBuilder) {
251251
// hash by default
252-
n.Stats.HashmapStats.ShuffleType = plan.ShuffleType_Hash
252+
node.Stats.HashmapStats.ShuffleType = plan.ShuffleType_Hash
253253

254254
if builder == nil {
255255
return
256256
}
257257
tableDef, ok := builder.tag2Table[col.RelPos]
258258
if !ok {
259-
child := builder.qry.Nodes[n.Children[0]]
259+
child := builder.qry.Nodes[node.Children[0]]
260260
if child.NodeType == plan.Node_AGG && child.Stats.HashmapStats.Shuffle && col.RelPos == child.BindingTags[0] {
261261
col = child.GroupBy[col.ColPos].GetCol()
262262
if col == nil {
@@ -266,38 +266,38 @@ func determinShuffleType(col *plan.ColRef, n *plan.Node, builder *QueryBuilder)
266266
if !ok {
267267
return
268268
}
269-
n.Stats.HashmapStats.ShuffleMethod = plan.ShuffleMethod_Reuse
270-
n.Stats.HashmapStats.ShuffleType = plan.ShuffleType_Range
271-
n.Stats.HashmapStats.HashmapSize = child.Stats.HashmapStats.HashmapSize
272-
n.Stats.HashmapStats.ShuffleColMin = child.Stats.HashmapStats.ShuffleColMin
273-
n.Stats.HashmapStats.ShuffleColMax = child.Stats.HashmapStats.ShuffleColMax
274-
n.Stats.HashmapStats.Ranges = child.Stats.HashmapStats.Ranges
275-
n.Stats.HashmapStats.Nullcnt = child.Stats.HashmapStats.Nullcnt
269+
node.Stats.HashmapStats.ShuffleMethod = plan.ShuffleMethod_Reuse
270+
node.Stats.HashmapStats.ShuffleType = plan.ShuffleType_Range
271+
node.Stats.HashmapStats.HashmapSize = child.Stats.HashmapStats.HashmapSize
272+
node.Stats.HashmapStats.ShuffleColMin = child.Stats.HashmapStats.ShuffleColMin
273+
node.Stats.HashmapStats.ShuffleColMax = child.Stats.HashmapStats.ShuffleColMax
274+
node.Stats.HashmapStats.Ranges = child.Stats.HashmapStats.Ranges
275+
node.Stats.HashmapStats.Nullcnt = child.Stats.HashmapStats.Nullcnt
276276
}
277277
return
278278
}
279279
colName := tableDef.Cols[col.ColPos].Name
280280

281281
// for shuffle join, if left child is not sorted, the cost will be very high
282282
// should use complex shuffle type
283-
if n.NodeType == plan.Node_JOIN {
283+
if node.NodeType == plan.Node_JOIN {
284284
leftSorted := true
285285
if GetSortOrder(tableDef, col.ColPos) != 0 {
286286
leftSorted = false
287287
}
288-
if !maybeSorted(builder.qry.Nodes[n.Children[0]], builder, col.RelPos) {
288+
if !maybeSorted(builder.qry.Nodes[node.Children[0]], builder, col.RelPos) {
289289
leftSorted = false
290290
}
291291
if !leftSorted {
292-
leftCost := builder.qry.Nodes[n.Children[0]].Stats.Outcnt
293-
rightCost := builder.qry.Nodes[n.Children[1]].Stats.Outcnt
294-
if n.BuildOnLeft {
292+
leftCost := builder.qry.Nodes[node.Children[0]].Stats.Outcnt
293+
rightCost := builder.qry.Nodes[node.Children[1]].Stats.Outcnt
294+
if node.BuildOnLeft {
295295
// its better for right join to go shuffle, but can not go complex shuffle
296-
if n.BuildOnLeft && leftCost > ShuffleTypeThreshHoldUpperLimit*rightCost {
296+
if node.BuildOnLeft && leftCost > ShuffleTypeThreshHoldUpperLimit*rightCost {
297297
return
298298
}
299299
} else if leftCost > ShuffleTypeThreshHoldLowerLimit*rightCost {
300-
n.Stats.HashmapStats.ShuffleTypeForMultiCN = plan.ShuffleTypeForMultiCN_Hybrid
300+
node.Stats.HashmapStats.ShuffleTypeForMultiCN = plan.ShuffleTypeForMultiCN_Hybrid
301301
}
302302
}
303303
}
@@ -306,16 +306,16 @@ func determinShuffleType(col *plan.ColRef, n *plan.Node, builder *QueryBuilder)
306306
if s == nil {
307307
return
308308
}
309-
if n.NodeType == plan.Node_AGG {
309+
if node.NodeType == plan.Node_AGG {
310310
if shouldUseHashShuffle(s.ShuffleRangeMap[colName]) {
311311
return
312312
}
313313
}
314-
n.Stats.HashmapStats.ShuffleType = plan.ShuffleType_Range
315-
n.Stats.HashmapStats.ShuffleColMin = int64(s.MinValMap[colName])
316-
n.Stats.HashmapStats.ShuffleColMax = int64(s.MaxValMap[colName])
317-
n.Stats.HashmapStats.Ranges = shouldUseShuffleRanges(s.ShuffleRangeMap[colName], colName)
318-
n.Stats.HashmapStats.Nullcnt = int64(s.NullCntMap[colName])
314+
node.Stats.HashmapStats.ShuffleType = plan.ShuffleType_Range
315+
node.Stats.HashmapStats.ShuffleColMin = int64(s.MinValMap[colName])
316+
node.Stats.HashmapStats.ShuffleColMax = int64(s.MaxValMap[colName])
317+
node.Stats.HashmapStats.Ranges = shouldUseShuffleRanges(s.ShuffleRangeMap[colName], colName)
318+
node.Stats.HashmapStats.Nullcnt = int64(s.NullCntMap[colName])
319319
}
320320

321321
// to determine if join need to go shuffle
@@ -378,23 +378,36 @@ func determinShuffleForJoin(n *plan.Node, builder *QueryBuilder) {
378378
expr1 = condImpl.F.Args[1]
379379
}
380380

381-
hashCol0, typ := GetHashColumn(expr0)
382-
if hashCol0 == nil {
381+
leftHashCol, typ := GetHashColumn(expr0)
382+
if leftHashCol == nil {
383383
return
384384
}
385-
hashCol1, _ := GetHashColumn(expr1)
386-
if hashCol1 == nil {
385+
rightHashCol, _ := GetHashColumn(expr1)
386+
if rightHashCol == nil {
387387
return
388388
}
389389
//for now ,only support integer and string type
390390
switch types.T(typ) {
391391
case types.T_int64, types.T_int32, types.T_int16, types.T_uint64, types.T_uint32, types.T_uint16, types.T_varchar, types.T_char, types.T_text:
392392
n.Stats.HashmapStats.ShuffleColIdx = int32(idx)
393393
n.Stats.HashmapStats.Shuffle = true
394-
determinShuffleType(hashCol0, n, builder)
394+
determinShuffleType(leftHashCol, n, builder)
395+
}
396+
397+
//recheck shuffle plan
398+
if n.Stats.HashmapStats.Shuffle {
395399
if n.Stats.HashmapStats.ShuffleType == plan.ShuffleType_Hash && n.Stats.HashmapStats.HashmapSize < threshHoldForHashShuffle {
396400
n.Stats.HashmapStats.Shuffle = false
397401
}
402+
if n.Stats.HashmapStats.ShuffleType == plan.ShuffleType_Range && n.Stats.HashmapStats.Ranges == nil && n.Stats.HashmapStats.ShuffleColMax-n.Stats.HashmapStats.ShuffleColMin < 100000 {
403+
n.Stats.HashmapStats.Shuffle = false
404+
}
405+
if n.Stats.HashmapStats.ShuffleMethod != plan.ShuffleMethod_Reuse {
406+
highestNDV := n.OnList[idx].Ndv
407+
if highestNDV < ShuffleThreshHoldOfNDV {
408+
n.Stats.HashmapStats.Shuffle = false
409+
}
410+
}
398411
}
399412
}
400413

0 commit comments

Comments
 (0)