Skip to content

Commit ef07c4f

Browse files
vseanreesermsftMirroringEirik George Tsarpalisericstjbartonjs
authored
Merging internal commits for release/8.0 (#108679)
* Fix performance issue when deserializing large payloads in JsonObject extension data. Bug fix to address performance issues when deserializing large payloads in `JsonObject` extension data. Mitigates performance issues by optimizing the deserialization process for large `JsonObject` extension data properties. - Added `LargeJsonObjectExtensionDataSerializationState` class in `System.Text.Json.Serialization.Converters` to handle large payloads efficiently. - Updated `JsonObjectConverter` to use the new state class for large objects. - Modified `ObjectDefaultConverter`, `ObjectWithParameterizedConstructorConverter`, and `ReadStackFrame` to complete deserialization using the new state class. - Added tests in `JsonObjectTests` to validate the performance improvements with large payloads. * ZipArchive: Improve performance of removing extra fields This improves the performance of removing extra fields in the ZipArchive by optimizing the removal process. `src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.cs`: Replaced multiple iterations and list operations with a single `RemoveAll` call to enhance efficiency. * Packaging: Port fix for GetParts from .NETFramework This ports a fix for the `GetParts` method to improve part URI handling and collision detection. `src/libraries/System.IO.Packaging/src/System/IO/Packaging/Package.cs`: Added sorting of parts array to ensure proper order, introduced a dictionary and list to track parts and detect collisions, and implemented a new method `CopyPartDictionaryToPartList` to copy parts to `_partList`. * Merged PR 41316: Use Marvin32 instead of xxHash32 in COSE hash codes * Microsoft.Extensions.Caching.Memory: use Marvin for keys on down-level TFMs Note that Microsoft.Extensions.Caching.Memory is deployed OOB via NuGet, and multi-targets including support for netfx and ns2.0 Marvin is the string hashing used in modern .NET (https://github.com/dotnet/runtime/blob/main/src/libraries/System.Private.CoreLib/src/System/Marvin.cs); this change extends this behaviour to `string` keys when used with `MemoryCache` - noting that in many scenarios we *expect* the key to be a `string` (even though other types are allowed) To do this we: - ~~add a custom key equality comparer for use with `ConcurrentDictionary<object, CacheEntry>` (we do this on all TFMs, replacing the need for the dynamic lookup via `EqualityComparer<TKey>.Default`)~~ - split the internal dictionary into 2 - one for `string`, one for everything else - in down-level TFMs only, provide a custom `string` key comparer with `MarvinHash` enabled (local snapshot since not available) This gives us Marvin everywhere, and Marvin+rehash on netcore TFMs (rehash requires `TKey` === `string`) --------- Co-authored-by: Mirroring <[email protected]> Co-authored-by: Eirik George Tsarpalis <[email protected]> Co-authored-by: Eric St. John <[email protected]> Co-authored-by: Jeremy Barton <[email protected]> Co-authored-by: Marc Gravell <[email protected]>
1 parent 11046b2 commit ef07c4f

File tree

16 files changed

+341
-78
lines changed

16 files changed

+341
-78
lines changed

src/libraries/Common/src/System/HashCodeRandomization.cs

+23-26
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
// Licensed to the .NET Foundation under one or more agreements.
22
// The .NET Foundation licenses this file to you under the MIT license.
33

4+
using System.Runtime.InteropServices;
5+
using System.Security.Cryptography;
6+
47
namespace System
58
{
69
// Contains helpers for calculating randomized hash codes of common types.
@@ -14,39 +17,33 @@ namespace System
1417
// rather than a global seed for the entire AppDomain.
1518
internal static class HashCodeRandomization
1619
{
20+
#if !NET
21+
private static readonly ulong s_seed = GenerateSeed();
22+
23+
private static ulong GenerateSeed()
24+
{
25+
using (RandomNumberGenerator rng = RandomNumberGenerator.Create())
26+
{
27+
byte[] rand = new byte[sizeof(ulong)];
28+
rng.GetBytes(rand);
29+
30+
return BitConverter.ToUInt64(rand, 0);
31+
}
32+
}
33+
#endif
34+
1735
public static int GetRandomizedOrdinalHashCode(this string value)
1836
{
1937
#if NETCOREAPP
2038
// In .NET Core, string hash codes are already randomized.
2139

2240
return value.GetHashCode();
2341
#else
24-
// Downlevel, we need to perform randomization ourselves. There's still
25-
// the potential for limited collisions ("Hello!" and "Hello!\0"), but
26-
// this shouldn't be a problem in practice. If we need to address it,
27-
// we can mix the string length into the accumulator before running the
28-
// string contents through.
29-
//
30-
// We'll pull out pairs of chars and write 32 bits at a time.
31-
32-
HashCode hashCode = default;
33-
int pair = 0;
34-
for (int i = 0; i < value.Length; i++)
35-
{
36-
int ch = value[i];
37-
if ((i & 1) == 0)
38-
{
39-
pair = ch << 16; // first member of pair
40-
}
41-
else
42-
{
43-
pair |= ch; // second member of pair
44-
hashCode.Add(pair); // write pair as single unit
45-
pair = 0;
46-
}
47-
}
48-
hashCode.Add(pair); // flush any leftover data (could be 0 or 1 chars)
49-
return hashCode.ToHashCode();
42+
// Downlevel, we need to perform randomization ourselves.
43+
44+
ReadOnlySpan<char> charSpan = value.AsSpan();
45+
ReadOnlySpan<byte> byteSpan = MemoryMarshal.AsBytes(charSpan);
46+
return Marvin.ComputeHash32(byteSpan, s_seed);
5047
#endif
5148
}
5249

src/libraries/Microsoft.Extensions.Caching.Memory/src/MemoryCache.cs

+89-18
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,12 @@
22
// The .NET Foundation licenses this file to you under the MIT license.
33

44
using System;
5+
using System.Collections;
56
using System.Collections.Concurrent;
67
using System.Collections.Generic;
78
using System.Diagnostics.CodeAnalysis;
89
using System.Runtime.CompilerServices;
10+
using System.Runtime.InteropServices;
911
using System.Threading;
1012
using System.Threading.Tasks;
1113
using Microsoft.Extensions.Logging;
@@ -126,7 +128,7 @@ internal void SetEntry(CacheEntry entry)
126128
entry.LastAccessed = utcNow;
127129

128130
CoherentState coherentState = _coherentState; // Clear() can update the reference in the meantime
129-
if (coherentState._entries.TryGetValue(entry.Key, out CacheEntry? priorEntry))
131+
if (coherentState.TryGetValue(entry.Key, out CacheEntry? priorEntry))
130132
{
131133
priorEntry.SetExpired(EvictionReason.Replaced);
132134
}
@@ -145,12 +147,12 @@ internal void SetEntry(CacheEntry entry)
145147
if (priorEntry == null)
146148
{
147149
// Try to add the new entry if no previous entries exist.
148-
entryAdded = coherentState._entries.TryAdd(entry.Key, entry);
150+
entryAdded = coherentState.TryAdd(entry.Key, entry);
149151
}
150152
else
151153
{
152154
// Try to update with the new entry if a previous entries exist.
153-
entryAdded = coherentState._entries.TryUpdate(entry.Key, entry, priorEntry);
155+
entryAdded = coherentState.TryUpdate(entry.Key, entry, priorEntry);
154156

155157
if (entryAdded)
156158
{
@@ -165,7 +167,7 @@ internal void SetEntry(CacheEntry entry)
165167
// The update will fail if the previous entry was removed after retrieval.
166168
// Adding the new entry will succeed only if no entry has been added since.
167169
// This guarantees removing an old entry does not prevent adding a new entry.
168-
entryAdded = coherentState._entries.TryAdd(entry.Key, entry);
170+
entryAdded = coherentState.TryAdd(entry.Key, entry);
169171
}
170172
}
171173

@@ -210,7 +212,7 @@ public bool TryGetValue(object key, out object? result)
210212
DateTime utcNow = UtcNow;
211213

212214
CoherentState coherentState = _coherentState; // Clear() can update the reference in the meantime
213-
if (coherentState._entries.TryGetValue(key, out CacheEntry? tmp))
215+
if (coherentState.TryGetValue(key, out CacheEntry? tmp))
214216
{
215217
CacheEntry entry = tmp;
216218
// Check if expired due to expiration tokens, timers, etc. and if so, remove it.
@@ -269,7 +271,8 @@ public void Remove(object key)
269271
CheckDisposed();
270272

271273
CoherentState coherentState = _coherentState; // Clear() can update the reference in the meantime
272-
if (coherentState._entries.TryRemove(key, out CacheEntry? entry))
274+
275+
if (coherentState.TryRemove(key, out CacheEntry? entry))
273276
{
274277
if (_options.HasSizeLimit)
275278
{
@@ -291,10 +294,10 @@ public void Clear()
291294
CheckDisposed();
292295

293296
CoherentState oldState = Interlocked.Exchange(ref _coherentState, new CoherentState());
294-
foreach (KeyValuePair<object, CacheEntry> entry in oldState._entries)
297+
foreach (CacheEntry entry in oldState.GetAllValues())
295298
{
296-
entry.Value.SetExpired(EvictionReason.Removed);
297-
entry.Value.InvokeEvictionCallbacks();
299+
entry.SetExpired(EvictionReason.Removed);
300+
entry.InvokeEvictionCallbacks();
298301
}
299302
}
300303

@@ -415,10 +418,9 @@ private void ScanForExpiredItems()
415418
DateTime utcNow = _lastExpirationScan = UtcNow;
416419

417420
CoherentState coherentState = _coherentState; // Clear() can update the reference in the meantime
418-
foreach (KeyValuePair<object, CacheEntry> item in coherentState._entries)
419-
{
420-
CacheEntry entry = item.Value;
421421

422+
foreach (CacheEntry entry in coherentState.GetAllValues())
423+
{
422424
if (entry.CheckExpired(utcNow))
423425
{
424426
coherentState.RemoveEntry(entry, _options);
@@ -516,9 +518,8 @@ private void Compact(long removalSizeTarget, Func<CacheEntry, long> computeEntry
516518

517519
// Sort items by expired & priority status
518520
DateTime utcNow = UtcNow;
519-
foreach (KeyValuePair<object, CacheEntry> item in coherentState._entries)
521+
foreach (CacheEntry entry in coherentState.GetAllValues())
520522
{
521-
CacheEntry entry = item.Value;
522523
if (entry.CheckExpired(utcNow))
523524
{
524525
entriesToRemove.Add(entry);
@@ -645,18 +646,59 @@ private static void ValidateCacheKey(object key)
645646
/// </summary>
646647
private sealed class CoherentState
647648
{
648-
internal ConcurrentDictionary<object, CacheEntry> _entries = new ConcurrentDictionary<object, CacheEntry>();
649+
private readonly ConcurrentDictionary<string, CacheEntry> _stringEntries = new ConcurrentDictionary<string, CacheEntry>(StringKeyComparer.Instance);
650+
private readonly ConcurrentDictionary<object, CacheEntry> _nonStringEntries = new ConcurrentDictionary<object, CacheEntry>();
649651
internal long _cacheSize;
650652

651-
private ICollection<KeyValuePair<object, CacheEntry>> EntriesCollection => _entries;
653+
internal bool TryGetValue(object key, [NotNullWhen(true)] out CacheEntry? entry)
654+
=> key is string s ? _stringEntries.TryGetValue(s, out entry) : _nonStringEntries.TryGetValue(key, out entry);
655+
656+
internal bool TryRemove(object key, [NotNullWhen(true)] out CacheEntry? entry)
657+
=> key is string s ? _stringEntries.TryRemove(s, out entry) : _nonStringEntries.TryRemove(key, out entry);
658+
659+
internal bool TryAdd(object key, CacheEntry entry)
660+
=> key is string s ? _stringEntries.TryAdd(s, entry) : _nonStringEntries.TryAdd(key, entry);
661+
662+
internal bool TryUpdate(object key, CacheEntry entry, CacheEntry comparison)
663+
=> key is string s ? _stringEntries.TryUpdate(s, entry, comparison) : _nonStringEntries.TryUpdate(key, entry, comparison);
664+
665+
public IEnumerable<CacheEntry> GetAllValues()
666+
{
667+
// note this mimics the outgoing code in that we don't just access
668+
// .Values, which has additional overheads; this is only used for rare
669+
// calls - compaction, clear, etc - so the additional overhead of a
670+
// generated enumerator is not alarming
671+
foreach (KeyValuePair<string, CacheEntry> entry in _stringEntries)
672+
{
673+
yield return entry.Value;
674+
}
675+
foreach (KeyValuePair<object, CacheEntry> entry in _nonStringEntries)
676+
{
677+
yield return entry.Value;
678+
}
679+
}
680+
681+
private ICollection<KeyValuePair<string, CacheEntry>> StringEntriesCollection => _stringEntries;
682+
private ICollection<KeyValuePair<object, CacheEntry>> NonStringEntriesCollection => _nonStringEntries;
652683

653-
internal int Count => _entries.Count;
684+
internal int Count => _stringEntries.Count + _nonStringEntries.Count;
654685

655686
internal long Size => Volatile.Read(ref _cacheSize);
656687

657688
internal void RemoveEntry(CacheEntry entry, MemoryCacheOptions options)
658689
{
659-
if (EntriesCollection.Remove(new KeyValuePair<object, CacheEntry>(entry.Key, entry)))
690+
if (entry.Key is string s)
691+
{
692+
if (StringEntriesCollection.Remove(new KeyValuePair<string, CacheEntry>(s, entry)))
693+
{
694+
if (options.SizeLimit.HasValue)
695+
{
696+
Interlocked.Add(ref _cacheSize, -entry.Size);
697+
}
698+
entry.InvokeEvictionCallbacks();
699+
}
700+
}
701+
else if (NonStringEntriesCollection.Remove(new KeyValuePair<object, CacheEntry>(entry.Key, entry)))
660702
{
661703
if (options.SizeLimit.HasValue)
662704
{
@@ -665,6 +707,35 @@ internal void RemoveEntry(CacheEntry entry, MemoryCacheOptions options)
665707
entry.InvokeEvictionCallbacks();
666708
}
667709
}
710+
711+
#if NETCOREAPP
712+
// on .NET Core, the inbuilt comparer has Marvin built in; no need to intercept
713+
private static class StringKeyComparer
714+
{
715+
internal static IEqualityComparer<string> Instance => EqualityComparer<string>.Default;
716+
}
717+
#else
718+
// otherwise, we need a custom comparer that manually implements Marvin
719+
private sealed class StringKeyComparer : IEqualityComparer<string>, IEqualityComparer
720+
{
721+
private StringKeyComparer() { }
722+
723+
internal static readonly IEqualityComparer<string> Instance = new StringKeyComparer();
724+
725+
// special-case string keys and use Marvin hashing
726+
public int GetHashCode(string? s) => s is null ? 0
727+
: Marvin.ComputeHash32(MemoryMarshal.AsBytes(s.AsSpan()), Marvin.DefaultSeed);
728+
729+
public bool Equals(string? x, string? y)
730+
=> string.Equals(x, y);
731+
732+
bool IEqualityComparer.Equals(object x, object y)
733+
=> object.Equals(x, y);
734+
735+
int IEqualityComparer.GetHashCode(object obj)
736+
=> obj is string s ? GetHashCode(s) : 0;
737+
}
738+
#endif
668739
}
669740
}
670741
}

src/libraries/Microsoft.Extensions.Caching.Memory/src/Microsoft.Extensions.Caching.Memory.csproj

+7
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@
77
<GeneratePackageOnBuild>true</GeneratePackageOnBuild>
88
<ServicingVersion>1</ServicingVersion>
99
<PackageDescription>In-memory cache implementation of Microsoft.Extensions.Caching.Memory.IMemoryCache.</PackageDescription>
10+
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
11+
<GeneratePackageOnBuild>true</GeneratePackageOnBuild>
12+
<ServicingVersion>1</ServicingVersion>
1013
</PropertyGroup>
1114

1215
<ItemGroup>
@@ -23,4 +26,8 @@
2326
<PackageReference Include="System.ValueTuple" Version="$(SystemValueTupleVersion)" />
2427
</ItemGroup>
2528

29+
<ItemGroup Condition="!$([MSBuild]::IsTargetFrameworkCompatible('$(TargetFramework)', 'netstandard2.1'))">
30+
<Compile Include="$(CoreLibSharedDir)System\Marvin.cs" Link="Common\System\Marvin.cs" />
31+
</ItemGroup>
32+
2633
</Project>

src/libraries/Microsoft.Extensions.Caching.Memory/tests/MemoryCacheSetAndRemoveTests.cs

+15
Original file line numberDiff line numberDiff line change
@@ -794,6 +794,21 @@ public async Task GetOrCreateAsyncFromCacheWithNullKeyThrows()
794794
await Assert.ThrowsAsync<ArgumentNullException>(async () => await cache.GetOrCreateAsync<object>(null, null));
795795
}
796796

797+
[Fact]
798+
public void MixedKeysUsage()
799+
{
800+
// keys are split internally into 2 separate chunks
801+
var cache = CreateCache();
802+
var typed = Assert.IsType<MemoryCache>(cache);
803+
object key0 = 123.45M, key1 = "123.45";
804+
cache.Set(key0, "string value");
805+
cache.Set(key1, "decimal value");
806+
807+
Assert.Equal(2, typed.Count);
808+
Assert.Equal("string value", cache.Get(key0));
809+
Assert.Equal("decimal value", cache.Get(key1));
810+
}
811+
797812
private class TestKey
798813
{
799814
public override bool Equals(object obj) => true;

src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.cs

+5-13
Original file line numberDiff line numberDiff line change
@@ -269,14 +269,12 @@ public static Zip64ExtraField GetAndRemoveZip64Block(List<ZipGenericExtraField>
269269
zip64Field._localHeaderOffset = null;
270270
zip64Field._startDiskNumber = null;
271271

272-
List<ZipGenericExtraField> markedForDelete = new List<ZipGenericExtraField>();
273272
bool zip64FieldFound = false;
274273

275-
foreach (ZipGenericExtraField ef in extraFields)
274+
extraFields.RemoveAll(ef =>
276275
{
277276
if (ef.Tag == TagConstant)
278277
{
279-
markedForDelete.Add(ef);
280278
if (!zip64FieldFound)
281279
{
282280
if (TryGetZip64BlockFromGenericExtraField(ef, readUncompressedSize, readCompressedSize,
@@ -285,24 +283,18 @@ public static Zip64ExtraField GetAndRemoveZip64Block(List<ZipGenericExtraField>
285283
zip64FieldFound = true;
286284
}
287285
}
286+
return true;
288287
}
289-
}
290288

291-
foreach (ZipGenericExtraField ef in markedForDelete)
292-
extraFields.Remove(ef);
289+
return false;
290+
});
293291

294292
return zip64Field;
295293
}
296294

297295
public static void RemoveZip64Blocks(List<ZipGenericExtraField> extraFields)
298296
{
299-
List<ZipGenericExtraField> markedForDelete = new List<ZipGenericExtraField>();
300-
foreach (ZipGenericExtraField field in extraFields)
301-
if (field.Tag == TagConstant)
302-
markedForDelete.Add(field);
303-
304-
foreach (ZipGenericExtraField field in markedForDelete)
305-
extraFields.Remove(field);
297+
extraFields.RemoveAll(field => field.Tag == TagConstant);
306298
}
307299

308300
public void WriteBlock(Stream stream)

src/libraries/System.IO.Packaging/src/System.IO.Packaging.csproj

+2
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
<TargetFrameworks>$(NetCoreAppCurrent);$(NetCoreAppPrevious);$(NetCoreAppMinimum);netstandard2.0;$(NetFrameworkMinimum)</TargetFrameworks>
44
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
55
<IsPackable>true</IsPackable>
6+
<GeneratePackageOnBuild>true</GeneratePackageOnBuild>
7+
<ServicingVersion>1</ServicingVersion>
68
<PackageDescription>Provides classes that support storage of multiple data objects in a single container.</PackageDescription>
79
</PropertyGroup>
810

0 commit comments

Comments
 (0)