Skip to content

Commit 0d5ec27

Browse files
author
Abduqodiri Qurbonzoda
committed
toHashSet is suboptimal for inputs with a lot of duplicates #KT-23142
1 parent 2c4fceb commit 0d5ec27

File tree

6 files changed

+63
-64
lines changed

6 files changed

+63
-64
lines changed

libraries/stdlib/common/src/generated/_Arrays.kt

Lines changed: 20 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -8686,7 +8686,7 @@ public inline fun <K, V, M : MutableMap<in K, in V>> CharArray.associateTo(desti
86868686
@SinceKotlin("1.4")
86878687
@ExperimentalStdlibApi
86888688
public inline fun <K, V> Array<out K>.associateWith(valueSelector: (K) -> V): Map<K, V> {
8689-
val result = LinkedHashMap<K, V>()
8689+
val result = LinkedHashMap<K, V>(mapCapacity(size).coerceAtLeast(16))
86908690
return associateWithTo(result, valueSelector)
86918691
}
86928692

@@ -8704,7 +8704,7 @@ public inline fun <K, V> Array<out K>.associateWith(valueSelector: (K) -> V): Ma
87048704
@ExperimentalStdlibApi
87058705
@kotlin.internal.InlineOnly
87068706
public inline fun <V> ByteArray.associateWith(valueSelector: (Byte) -> V): Map<Byte, V> {
8707-
val result = LinkedHashMap<Byte, V>()
8707+
val result = LinkedHashMap<Byte, V>(mapCapacity(size).coerceAtLeast(16))
87088708
return associateWithTo(result, valueSelector)
87098709
}
87108710

@@ -8722,7 +8722,7 @@ public inline fun <V> ByteArray.associateWith(valueSelector: (Byte) -> V): Map<B
87228722
@ExperimentalStdlibApi
87238723
@kotlin.internal.InlineOnly
87248724
public inline fun <V> ShortArray.associateWith(valueSelector: (Short) -> V): Map<Short, V> {
8725-
val result = LinkedHashMap<Short, V>()
8725+
val result = LinkedHashMap<Short, V>(mapCapacity(size).coerceAtLeast(16))
87268726
return associateWithTo(result, valueSelector)
87278727
}
87288728

@@ -8740,7 +8740,7 @@ public inline fun <V> ShortArray.associateWith(valueSelector: (Short) -> V): Map
87408740
@ExperimentalStdlibApi
87418741
@kotlin.internal.InlineOnly
87428742
public inline fun <V> IntArray.associateWith(valueSelector: (Int) -> V): Map<Int, V> {
8743-
val result = LinkedHashMap<Int, V>()
8743+
val result = LinkedHashMap<Int, V>(mapCapacity(size).coerceAtLeast(16))
87448744
return associateWithTo(result, valueSelector)
87458745
}
87468746

@@ -8758,7 +8758,7 @@ public inline fun <V> IntArray.associateWith(valueSelector: (Int) -> V): Map<Int
87588758
@ExperimentalStdlibApi
87598759
@kotlin.internal.InlineOnly
87608760
public inline fun <V> LongArray.associateWith(valueSelector: (Long) -> V): Map<Long, V> {
8761-
val result = LinkedHashMap<Long, V>()
8761+
val result = LinkedHashMap<Long, V>(mapCapacity(size).coerceAtLeast(16))
87628762
return associateWithTo(result, valueSelector)
87638763
}
87648764

@@ -8776,7 +8776,7 @@ public inline fun <V> LongArray.associateWith(valueSelector: (Long) -> V): Map<L
87768776
@ExperimentalStdlibApi
87778777
@kotlin.internal.InlineOnly
87788778
public inline fun <V> FloatArray.associateWith(valueSelector: (Float) -> V): Map<Float, V> {
8779-
val result = LinkedHashMap<Float, V>()
8779+
val result = LinkedHashMap<Float, V>(mapCapacity(size).coerceAtLeast(16))
87808780
return associateWithTo(result, valueSelector)
87818781
}
87828782

@@ -8794,7 +8794,7 @@ public inline fun <V> FloatArray.associateWith(valueSelector: (Float) -> V): Map
87948794
@ExperimentalStdlibApi
87958795
@kotlin.internal.InlineOnly
87968796
public inline fun <V> DoubleArray.associateWith(valueSelector: (Double) -> V): Map<Double, V> {
8797-
val result = LinkedHashMap<Double, V>()
8797+
val result = LinkedHashMap<Double, V>(mapCapacity(size).coerceAtLeast(16))
87988798
return associateWithTo(result, valueSelector)
87998799
}
88008800

@@ -8812,7 +8812,7 @@ public inline fun <V> DoubleArray.associateWith(valueSelector: (Double) -> V): M
88128812
@ExperimentalStdlibApi
88138813
@kotlin.internal.InlineOnly
88148814
public inline fun <V> BooleanArray.associateWith(valueSelector: (Boolean) -> V): Map<Boolean, V> {
8815-
val result = LinkedHashMap<Boolean, V>()
8815+
val result = LinkedHashMap<Boolean, V>(mapCapacity(size).coerceAtLeast(16))
88168816
return associateWithTo(result, valueSelector)
88178817
}
88188818

@@ -8830,7 +8830,7 @@ public inline fun <V> BooleanArray.associateWith(valueSelector: (Boolean) -> V):
88308830
@ExperimentalStdlibApi
88318831
@kotlin.internal.InlineOnly
88328832
public inline fun <V> CharArray.associateWith(valueSelector: (Char) -> V): Map<Char, V> {
8833-
val result = LinkedHashMap<Char, V>()
8833+
val result = LinkedHashMap<Char, V>(mapCapacity(size.coerceAtMost(128)).coerceAtLeast(16))
88348834
return associateWithTo(result, valueSelector)
88358835
}
88368836

@@ -9145,7 +9145,7 @@ public fun BooleanArray.toHashSet(): HashSet<Boolean> {
91459145
* Returns a new [HashSet] of all elements.
91469146
*/
91479147
public fun CharArray.toHashSet(): HashSet<Char> {
9148-
return toCollection(HashSet<Char>(mapCapacity(size)))
9148+
return toCollection(HashSet<Char>(mapCapacity(size.coerceAtMost(128))))
91499149
}
91509150

91519151
/**
@@ -9439,7 +9439,7 @@ public fun CharArray.toSet(): Set<Char> {
94399439
return when (size) {
94409440
0 -> emptySet()
94419441
1 -> setOf(this[0])
9442-
else -> toCollection(LinkedHashSet<Char>(mapCapacity(size)))
9442+
else -> toCollection(LinkedHashSet<Char>(mapCapacity(size.coerceAtMost(128))))
94439443
}
94449444
}
94459445

@@ -11168,9 +11168,7 @@ public infix fun CharArray.subtract(other: Iterable<Char>): Set<Char> {
1116811168
* The returned set preserves the element iteration order of the original array.
1116911169
*/
1117011170
public fun <T> Array<out T>.toMutableSet(): MutableSet<T> {
11171-
val set = LinkedHashSet<T>(mapCapacity(size))
11172-
for (item in this) set.add(item)
11173-
return set
11171+
return toCollection(LinkedHashSet<T>(mapCapacity(size)))
1117411172
}
1117511173

1117611174
/**
@@ -11179,9 +11177,7 @@ public fun <T> Array<out T>.toMutableSet(): MutableSet<T> {
1117911177
* The returned set preserves the element iteration order of the original array.
1118011178
*/
1118111179
public fun ByteArray.toMutableSet(): MutableSet<Byte> {
11182-
val set = LinkedHashSet<Byte>(mapCapacity(size))
11183-
for (item in this) set.add(item)
11184-
return set
11180+
return toCollection(LinkedHashSet<Byte>(mapCapacity(size)))
1118511181
}
1118611182

1118711183
/**
@@ -11190,9 +11186,7 @@ public fun ByteArray.toMutableSet(): MutableSet<Byte> {
1119011186
* The returned set preserves the element iteration order of the original array.
1119111187
*/
1119211188
public fun ShortArray.toMutableSet(): MutableSet<Short> {
11193-
val set = LinkedHashSet<Short>(mapCapacity(size))
11194-
for (item in this) set.add(item)
11195-
return set
11189+
return toCollection(LinkedHashSet<Short>(mapCapacity(size)))
1119611190
}
1119711191

1119811192
/**
@@ -11201,9 +11195,7 @@ public fun ShortArray.toMutableSet(): MutableSet<Short> {
1120111195
* The returned set preserves the element iteration order of the original array.
1120211196
*/
1120311197
public fun IntArray.toMutableSet(): MutableSet<Int> {
11204-
val set = LinkedHashSet<Int>(mapCapacity(size))
11205-
for (item in this) set.add(item)
11206-
return set
11198+
return toCollection(LinkedHashSet<Int>(mapCapacity(size)))
1120711199
}
1120811200

1120911201
/**
@@ -11212,9 +11204,7 @@ public fun IntArray.toMutableSet(): MutableSet<Int> {
1121211204
* The returned set preserves the element iteration order of the original array.
1121311205
*/
1121411206
public fun LongArray.toMutableSet(): MutableSet<Long> {
11215-
val set = LinkedHashSet<Long>(mapCapacity(size))
11216-
for (item in this) set.add(item)
11217-
return set
11207+
return toCollection(LinkedHashSet<Long>(mapCapacity(size)))
1121811208
}
1121911209

1122011210
/**
@@ -11223,9 +11213,7 @@ public fun LongArray.toMutableSet(): MutableSet<Long> {
1122311213
* The returned set preserves the element iteration order of the original array.
1122411214
*/
1122511215
public fun FloatArray.toMutableSet(): MutableSet<Float> {
11226-
val set = LinkedHashSet<Float>(mapCapacity(size))
11227-
for (item in this) set.add(item)
11228-
return set
11216+
return toCollection(LinkedHashSet<Float>(mapCapacity(size)))
1122911217
}
1123011218

1123111219
/**
@@ -11234,9 +11222,7 @@ public fun FloatArray.toMutableSet(): MutableSet<Float> {
1123411222
* The returned set preserves the element iteration order of the original array.
1123511223
*/
1123611224
public fun DoubleArray.toMutableSet(): MutableSet<Double> {
11237-
val set = LinkedHashSet<Double>(mapCapacity(size))
11238-
for (item in this) set.add(item)
11239-
return set
11225+
return toCollection(LinkedHashSet<Double>(mapCapacity(size)))
1124011226
}
1124111227

1124211228
/**
@@ -11245,9 +11231,7 @@ public fun DoubleArray.toMutableSet(): MutableSet<Double> {
1124511231
* The returned set preserves the element iteration order of the original array.
1124611232
*/
1124711233
public fun BooleanArray.toMutableSet(): MutableSet<Boolean> {
11248-
val set = LinkedHashSet<Boolean>(mapCapacity(size))
11249-
for (item in this) set.add(item)
11250-
return set
11234+
return toCollection(LinkedHashSet<Boolean>(mapCapacity(size)))
1125111235
}
1125211236

1125311237
/**
@@ -11256,9 +11240,7 @@ public fun BooleanArray.toMutableSet(): MutableSet<Boolean> {
1125611240
* The returned set preserves the element iteration order of the original array.
1125711241
*/
1125811242
public fun CharArray.toMutableSet(): MutableSet<Char> {
11259-
val set = LinkedHashSet<Char>(mapCapacity(size))
11260-
for (item in this) set.add(item)
11261-
return set
11243+
return toCollection(LinkedHashSet<Char>(mapCapacity(size.coerceAtMost(128))))
1126211244
}
1126311245

1126411246
/**

libraries/stdlib/common/src/generated/_Strings.kt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -708,7 +708,7 @@ public inline fun <K, V, M : MutableMap<in K, in V>> CharSequence.associateTo(de
708708
*/
709709
@SinceKotlin("1.3")
710710
public inline fun <V> CharSequence.associateWith(valueSelector: (Char) -> V): Map<Char, V> {
711-
val result = LinkedHashMap<Char, V>(mapCapacity(length).coerceAtLeast(16))
711+
val result = LinkedHashMap<Char, V>(mapCapacity(length.coerceAtMost(128)).coerceAtLeast(16))
712712
return associateWithTo(result, valueSelector)
713713
}
714714

@@ -742,7 +742,7 @@ public fun <C : MutableCollection<in Char>> CharSequence.toCollection(destinatio
742742
* Returns a new [HashSet] of all characters.
743743
*/
744744
public fun CharSequence.toHashSet(): HashSet<Char> {
745-
return toCollection(HashSet<Char>(mapCapacity(length)))
745+
return toCollection(HashSet<Char>(mapCapacity(length.coerceAtMost(128))))
746746
}
747747

748748
/**
@@ -772,7 +772,7 @@ public fun CharSequence.toSet(): Set<Char> {
772772
return when (length) {
773773
0 -> emptySet()
774774
1 -> setOf(this[0])
775-
else -> toCollection(LinkedHashSet<Char>(mapCapacity(length)))
775+
else -> toCollection(LinkedHashSet<Char>(mapCapacity(length.coerceAtMost(128))))
776776
}
777777
}
778778

libraries/stdlib/common/src/generated/_UArrays.kt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4101,7 +4101,7 @@ public inline fun ShortArray.toUShortArray(): UShortArray {
41014101
@ExperimentalUnsignedTypes
41024102
@kotlin.internal.InlineOnly
41034103
public inline fun <V> UIntArray.associateWith(valueSelector: (UInt) -> V): Map<UInt, V> {
4104-
val result = LinkedHashMap<UInt, V>()
4104+
val result = LinkedHashMap<UInt, V>(mapCapacity(size).coerceAtLeast(16))
41054105
return associateWithTo(result, valueSelector)
41064106
}
41074107

@@ -4120,7 +4120,7 @@ public inline fun <V> UIntArray.associateWith(valueSelector: (UInt) -> V): Map<U
41204120
@ExperimentalUnsignedTypes
41214121
@kotlin.internal.InlineOnly
41224122
public inline fun <V> ULongArray.associateWith(valueSelector: (ULong) -> V): Map<ULong, V> {
4123-
val result = LinkedHashMap<ULong, V>()
4123+
val result = LinkedHashMap<ULong, V>(mapCapacity(size).coerceAtLeast(16))
41244124
return associateWithTo(result, valueSelector)
41254125
}
41264126

@@ -4139,7 +4139,7 @@ public inline fun <V> ULongArray.associateWith(valueSelector: (ULong) -> V): Map
41394139
@ExperimentalUnsignedTypes
41404140
@kotlin.internal.InlineOnly
41414141
public inline fun <V> UByteArray.associateWith(valueSelector: (UByte) -> V): Map<UByte, V> {
4142-
val result = LinkedHashMap<UByte, V>()
4142+
val result = LinkedHashMap<UByte, V>(mapCapacity(size).coerceAtLeast(16))
41434143
return associateWithTo(result, valueSelector)
41444144
}
41454145

@@ -4158,7 +4158,7 @@ public inline fun <V> UByteArray.associateWith(valueSelector: (UByte) -> V): Map
41584158
@ExperimentalUnsignedTypes
41594159
@kotlin.internal.InlineOnly
41604160
public inline fun <V> UShortArray.associateWith(valueSelector: (UShort) -> V): Map<UShort, V> {
4161-
val result = LinkedHashMap<UShort, V>()
4161+
val result = LinkedHashMap<UShort, V>(mapCapacity(size).coerceAtLeast(16))
41624162
return associateWithTo(result, valueSelector)
41634163
}
41644164

libraries/stdlib/test/text/StringTest.kt

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ package test.text
88
import kotlin.test.*
99
import test.*
1010
import test.collections.behaviors.iteratorBehavior
11+
import test.collections.behaviors.setBehavior
1112
import test.collections.compare
1213
import kotlin.math.sign
1314
import kotlin.random.Random
@@ -1651,4 +1652,19 @@ ${" "}
16511652
assertFailsWith<IndexOutOfBoundsException> { "".elementAt(0) }
16521653
assertFailsWith<IndexOutOfBoundsException> { "a c".elementAt(-1) }
16531654
}
1655+
1656+
@Test
1657+
fun toHashSet() {
1658+
compare(hashSetOf('A', 'B', 'C'), "ACAABBAC".toHashSet()) { setBehavior() }
1659+
1660+
buildString {
1661+
repeat(100) { append('1') }
1662+
append('2')
1663+
repeat(100) { append('3') }
1664+
append('4')
1665+
repeat(100) { append('5') }
1666+
}.let {
1667+
compare(hashSetOf('1', '2', '3', '4', '5'), it.toHashSet()) { setBehavior() }
1668+
}
1669+
}
16541670
}

libraries/tools/kotlin-stdlib-gen/src/templates/Sets.kt

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,6 @@
55

66
package templates
77

8-
import templates.DocExtensions.collection
9-
import templates.DocExtensions.element
10-
import templates.DocExtensions.mapResult
11-
import templates.DocExtensions.pluralize
128
import templates.Family.*
139
import templates.SequenceClass.*
1410

@@ -35,11 +31,8 @@ object SetOps : TemplateGroupBase() {
3531
"""
3632
}
3733
body(ArraysOfObjects, ArraysOfPrimitives) {
38-
"""
39-
val set = LinkedHashSet<T>(mapCapacity(size))
40-
for (item in this) set.add(item)
41-
return set
42-
"""
34+
val capacity = "size" + if (primitive == PrimitiveType.Char) ".coerceAtMost(128)" else ""
35+
"return toCollection(LinkedHashSet<T>(mapCapacity($capacity)))"
4336
}
4437
body(Sequences) {
4538
"""

libraries/tools/kotlin-stdlib-gen/src/templates/Snapshots.kt

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -55,20 +55,20 @@ object Snapshots : TemplateGroupBase() {
5555
1 -> setOf(if (this is List) this[0] else iterator().next())
5656
else -> toCollection(LinkedHashSet<T>(mapCapacity(size)))
5757
}
58-
5958
}
6059
return toCollection(LinkedHashSet<T>()).optimizeReadOnlySet()
6160
"""
6261
}
6362
body(Sequences) { "return toCollection(LinkedHashSet<T>()).optimizeReadOnlySet()" }
6463

6564
body(CharSequences, ArraysOfObjects, ArraysOfPrimitives) {
66-
val size = if (f == CharSequences) "length" else "size"
65+
val size = f.code.size
66+
val capacity = if (f == CharSequences || primitive == PrimitiveType.Char) "$size.coerceAtMost(128)" else size
6767
"""
6868
return when ($size) {
6969
0 -> emptySet()
7070
1 -> setOf(this[0])
71-
else -> toCollection(LinkedHashSet<T>(mapCapacity($size)))
71+
else -> toCollection(LinkedHashSet<T>(mapCapacity($capacity)))
7272
}
7373
"""
7474
}
@@ -82,8 +82,11 @@ object Snapshots : TemplateGroupBase() {
8282
returns("HashSet<T>")
8383
body { "return toCollection(HashSet<T>(mapCapacity(collectionSizeOrDefault(12))))" }
8484
body(Sequences) { "return toCollection(HashSet<T>())" }
85-
body(CharSequences) { "return toCollection(HashSet<T>(mapCapacity(length)))" }
86-
body(ArraysOfObjects, ArraysOfPrimitives) { "return toCollection(HashSet<T>(mapCapacity(size)))" }
85+
body(CharSequences, ArraysOfObjects, ArraysOfPrimitives) {
86+
val size = f.code.size
87+
val capacity = if (f == CharSequences || primitive == PrimitiveType.Char) "$size.coerceAtMost(128)" else size
88+
"return toCollection(HashSet<T>(mapCapacity($capacity)))"
89+
}
8790
}
8891

8992
val f_toSortedSet = fn("toSortedSet()") {
@@ -467,13 +470,18 @@ object Snapshots : TemplateGroupBase() {
467470
else -> "samples.collections.Collections.Transformations.associateWith"
468471
})
469472
body {
470-
val resultMap = when (family) {
471-
Iterables -> "LinkedHashMap<K, V>(mapCapacity(collectionSizeOrDefault(10)).coerceAtLeast(16))"
472-
CharSequences -> "LinkedHashMap<K, V>(mapCapacity(length).coerceAtLeast(16))"
473-
else -> "LinkedHashMap<K, V>()"
473+
val capacity = when (family) {
474+
Iterables -> "mapCapacity(collectionSizeOrDefault(10)).coerceAtLeast(16)"
475+
CharSequences -> "mapCapacity(length.coerceAtMost(128)).coerceAtLeast(16)"
476+
ArraysOfObjects, ArraysOfPrimitives, ArraysOfUnsigned -> if (primitive == PrimitiveType.Char) {
477+
"mapCapacity(size.coerceAtMost(128)).coerceAtLeast(16)"
478+
} else {
479+
"mapCapacity(size).coerceAtLeast(16)"
480+
}
481+
else -> ""
474482
}
475483
"""
476-
val result = $resultMap
484+
val result = LinkedHashMap<K, V>($capacity)
477485
return associateWithTo(result, valueSelector)
478486
"""
479487
}

0 commit comments

Comments
 (0)