@@ -247,16 +247,16 @@ func maybeSorted(n *plan.Node, builder *QueryBuilder, tag int32) bool {
247
247
return false
248
248
}
249
249
250
- func determinShuffleType (col * plan.ColRef , n * plan.Node , builder * QueryBuilder ) {
250
+ func determinShuffleType (col * plan.ColRef , node * plan.Node , builder * QueryBuilder ) {
251
251
// hash by default
252
- n .Stats .HashmapStats .ShuffleType = plan .ShuffleType_Hash
252
+ node .Stats .HashmapStats .ShuffleType = plan .ShuffleType_Hash
253
253
254
254
if builder == nil {
255
255
return
256
256
}
257
257
tableDef , ok := builder .tag2Table [col .RelPos ]
258
258
if ! ok {
259
- child := builder .qry .Nodes [n .Children [0 ]]
259
+ child := builder .qry .Nodes [node .Children [0 ]]
260
260
if child .NodeType == plan .Node_AGG && child .Stats .HashmapStats .Shuffle && col .RelPos == child .BindingTags [0 ] {
261
261
col = child .GroupBy [col .ColPos ].GetCol ()
262
262
if col == nil {
@@ -266,38 +266,38 @@ func determinShuffleType(col *plan.ColRef, n *plan.Node, builder *QueryBuilder)
266
266
if ! ok {
267
267
return
268
268
}
269
- n .Stats .HashmapStats .ShuffleMethod = plan .ShuffleMethod_Reuse
270
- n .Stats .HashmapStats .ShuffleType = plan .ShuffleType_Range
271
- n .Stats .HashmapStats .HashmapSize = child .Stats .HashmapStats .HashmapSize
272
- n .Stats .HashmapStats .ShuffleColMin = child .Stats .HashmapStats .ShuffleColMin
273
- n .Stats .HashmapStats .ShuffleColMax = child .Stats .HashmapStats .ShuffleColMax
274
- n .Stats .HashmapStats .Ranges = child .Stats .HashmapStats .Ranges
275
- n .Stats .HashmapStats .Nullcnt = child .Stats .HashmapStats .Nullcnt
269
+ node .Stats .HashmapStats .ShuffleMethod = plan .ShuffleMethod_Reuse
270
+ node .Stats .HashmapStats .ShuffleType = plan .ShuffleType_Range
271
+ node .Stats .HashmapStats .HashmapSize = child .Stats .HashmapStats .HashmapSize
272
+ node .Stats .HashmapStats .ShuffleColMin = child .Stats .HashmapStats .ShuffleColMin
273
+ node .Stats .HashmapStats .ShuffleColMax = child .Stats .HashmapStats .ShuffleColMax
274
+ node .Stats .HashmapStats .Ranges = child .Stats .HashmapStats .Ranges
275
+ node .Stats .HashmapStats .Nullcnt = child .Stats .HashmapStats .Nullcnt
276
276
}
277
277
return
278
278
}
279
279
colName := tableDef .Cols [col .ColPos ].Name
280
280
281
281
// for shuffle join, if left child is not sorted, the cost will be very high
282
282
// should use complex shuffle type
283
- if n .NodeType == plan .Node_JOIN {
283
+ if node .NodeType == plan .Node_JOIN {
284
284
leftSorted := true
285
285
if GetSortOrder (tableDef , col .ColPos ) != 0 {
286
286
leftSorted = false
287
287
}
288
- if ! maybeSorted (builder .qry .Nodes [n .Children [0 ]], builder , col .RelPos ) {
288
+ if ! maybeSorted (builder .qry .Nodes [node .Children [0 ]], builder , col .RelPos ) {
289
289
leftSorted = false
290
290
}
291
291
if ! leftSorted {
292
- leftCost := builder .qry .Nodes [n .Children [0 ]].Stats .Outcnt
293
- rightCost := builder .qry .Nodes [n .Children [1 ]].Stats .Outcnt
294
- if n .BuildOnLeft {
292
+ leftCost := builder .qry .Nodes [node .Children [0 ]].Stats .Outcnt
293
+ rightCost := builder .qry .Nodes [node .Children [1 ]].Stats .Outcnt
294
+ if node .BuildOnLeft {
295
295
// its better for right join to go shuffle, but can not go complex shuffle
296
- if n .BuildOnLeft && leftCost > ShuffleTypeThreshHoldUpperLimit * rightCost {
296
+ if node .BuildOnLeft && leftCost > ShuffleTypeThreshHoldUpperLimit * rightCost {
297
297
return
298
298
}
299
299
} else if leftCost > ShuffleTypeThreshHoldLowerLimit * rightCost {
300
- n .Stats .HashmapStats .ShuffleTypeForMultiCN = plan .ShuffleTypeForMultiCN_Hybrid
300
+ node .Stats .HashmapStats .ShuffleTypeForMultiCN = plan .ShuffleTypeForMultiCN_Hybrid
301
301
}
302
302
}
303
303
}
@@ -306,16 +306,16 @@ func determinShuffleType(col *plan.ColRef, n *plan.Node, builder *QueryBuilder)
306
306
if s == nil {
307
307
return
308
308
}
309
- if n .NodeType == plan .Node_AGG {
309
+ if node .NodeType == plan .Node_AGG {
310
310
if shouldUseHashShuffle (s .ShuffleRangeMap [colName ]) {
311
311
return
312
312
}
313
313
}
314
- n .Stats .HashmapStats .ShuffleType = plan .ShuffleType_Range
315
- n .Stats .HashmapStats .ShuffleColMin = int64 (s .MinValMap [colName ])
316
- n .Stats .HashmapStats .ShuffleColMax = int64 (s .MaxValMap [colName ])
317
- n .Stats .HashmapStats .Ranges = shouldUseShuffleRanges (s .ShuffleRangeMap [colName ], colName )
318
- n .Stats .HashmapStats .Nullcnt = int64 (s .NullCntMap [colName ])
314
+ node .Stats .HashmapStats .ShuffleType = plan .ShuffleType_Range
315
+ node .Stats .HashmapStats .ShuffleColMin = int64 (s .MinValMap [colName ])
316
+ node .Stats .HashmapStats .ShuffleColMax = int64 (s .MaxValMap [colName ])
317
+ node .Stats .HashmapStats .Ranges = shouldUseShuffleRanges (s .ShuffleRangeMap [colName ], colName )
318
+ node .Stats .HashmapStats .Nullcnt = int64 (s .NullCntMap [colName ])
319
319
}
320
320
321
321
// to determine if join need to go shuffle
@@ -378,23 +378,36 @@ func determinShuffleForJoin(n *plan.Node, builder *QueryBuilder) {
378
378
expr1 = condImpl .F .Args [1 ]
379
379
}
380
380
381
- hashCol0 , typ := GetHashColumn (expr0 )
382
- if hashCol0 == nil {
381
+ leftHashCol , typ := GetHashColumn (expr0 )
382
+ if leftHashCol == nil {
383
383
return
384
384
}
385
- hashCol1 , _ := GetHashColumn (expr1 )
386
- if hashCol1 == nil {
385
+ rightHashCol , _ := GetHashColumn (expr1 )
386
+ if rightHashCol == nil {
387
387
return
388
388
}
389
389
//for now ,only support integer and string type
390
390
switch types .T (typ ) {
391
391
case types .T_int64 , types .T_int32 , types .T_int16 , types .T_uint64 , types .T_uint32 , types .T_uint16 , types .T_varchar , types .T_char , types .T_text :
392
392
n .Stats .HashmapStats .ShuffleColIdx = int32 (idx )
393
393
n .Stats .HashmapStats .Shuffle = true
394
- determinShuffleType (hashCol0 , n , builder )
394
+ determinShuffleType (leftHashCol , n , builder )
395
+ }
396
+
397
+ //recheck shuffle plan
398
+ if n .Stats .HashmapStats .Shuffle {
395
399
if n .Stats .HashmapStats .ShuffleType == plan .ShuffleType_Hash && n .Stats .HashmapStats .HashmapSize < threshHoldForHashShuffle {
396
400
n .Stats .HashmapStats .Shuffle = false
397
401
}
402
+ if n .Stats .HashmapStats .ShuffleType == plan .ShuffleType_Range && n .Stats .HashmapStats .Ranges == nil && n .Stats .HashmapStats .ShuffleColMax - n .Stats .HashmapStats .ShuffleColMin < 100000 {
403
+ n .Stats .HashmapStats .Shuffle = false
404
+ }
405
+ if n .Stats .HashmapStats .ShuffleMethod != plan .ShuffleMethod_Reuse {
406
+ highestNDV := n .OnList [idx ].Ndv
407
+ if highestNDV < ShuffleThreshHoldOfNDV {
408
+ n .Stats .HashmapStats .Shuffle = false
409
+ }
410
+ }
398
411
}
399
412
}
400
413
0 commit comments