diff --git a/Directory.Build.props b/Directory.Build.props index 83ec4b64b..fba690613 100644 --- a/Directory.Build.props +++ b/Directory.Build.props @@ -2,7 +2,7 @@ - 0.73.0 + 0.74.0 12 diff --git a/Directory.Packages.props b/Directory.Packages.props index 1a5ae16dd..baa27d297 100644 --- a/Directory.Packages.props +++ b/Directory.Packages.props @@ -4,15 +4,17 @@ - + - - + + + - + + @@ -33,18 +35,18 @@ - - - + + + - + - + @@ -52,26 +54,12 @@ - - - - - - - - - - - + + + + @@ -89,11 +77,11 @@ all runtime; build; native; contentfiles; analyzers; buildtransitive - + all runtime; build; native; contentfiles; analyzers; buildtransitive - + all runtime; build; native; contentfiles; analyzers; buildtransitive @@ -112,7 +100,7 @@ all runtime; build; native; contentfiles; analyzers; buildtransitive - + \ No newline at end of file diff --git a/extensions/OpenAI/OpenAI/Tokenizers/DefaultGPTTokenizer.cs b/extensions/OpenAI/OpenAI/Tokenizers/DefaultGPTTokenizer.cs index a4f52da57..18d55447f 100644 --- a/extensions/OpenAI/OpenAI/Tokenizers/DefaultGPTTokenizer.cs +++ b/extensions/OpenAI/OpenAI/Tokenizers/DefaultGPTTokenizer.cs @@ -9,7 +9,7 @@ namespace Microsoft.KernelMemory.AI.OpenAI; public static class DefaultGPTTokenizer { - private static readonly Tokenizer s_tokenizer = Tokenizer.CreateTiktokenForModel( + private static readonly Tokenizer s_tokenizer = TiktokenTokenizer.CreateForModel( "gpt-4", new Dictionary { { "<|im_start|>", 100264 }, { "<|im_end|>", 100265 } }); public static int StaticCountTokens(string text) diff --git a/extensions/OpenAI/OpenAI/Tokenizers/GPT2Tokenizer.cs b/extensions/OpenAI/OpenAI/Tokenizers/GPT2Tokenizer.cs index 6e6e5ba78..8b3df3559 100644 --- a/extensions/OpenAI/OpenAI/Tokenizers/GPT2Tokenizer.cs +++ b/extensions/OpenAI/OpenAI/Tokenizers/GPT2Tokenizer.cs @@ -13,7 +13,7 @@ namespace Microsoft.KernelMemory.AI.OpenAI; /// public sealed class GPT2Tokenizer : ITextTokenizer { - private static readonly Tokenizer s_tokenizer = Tokenizer.CreateTiktokenForModel("gpt2"); + private static readonly Tokenizer s_tokenizer = TiktokenTokenizer.CreateForModel("gpt2"); /// public int CountTokens(string text) @@ -24,6 +24,6 @@ public int CountTokens(string text) /// public IReadOnlyList GetTokens(string text) { - return s_tokenizer.Encode(text, out string? _).Select(t => t.Value).ToList(); + return s_tokenizer.EncodeToTokens(text, out string? _).Select(t => t.Value).ToList(); } } diff --git a/extensions/OpenAI/OpenAI/Tokenizers/GPT3Tokenizer.cs b/extensions/OpenAI/OpenAI/Tokenizers/GPT3Tokenizer.cs index b7ca2dee1..e7d03d721 100644 --- a/extensions/OpenAI/OpenAI/Tokenizers/GPT3Tokenizer.cs +++ b/extensions/OpenAI/OpenAI/Tokenizers/GPT3Tokenizer.cs @@ -13,7 +13,7 @@ namespace Microsoft.KernelMemory.AI.OpenAI; /// public sealed class GPT3Tokenizer : ITextTokenizer { - private static readonly Tokenizer s_tokenizer = Tokenizer.CreateTiktokenForModel("text-davinci-003"); + private static readonly Tokenizer s_tokenizer = TiktokenTokenizer.CreateForModel("text-davinci-003"); /// public int CountTokens(string text) @@ -24,6 +24,6 @@ public int CountTokens(string text) /// public IReadOnlyList GetTokens(string text) { - return s_tokenizer.Encode(text, out string? _).Select(t => t.Value).ToList(); + return s_tokenizer.EncodeToTokens(text, out string? _).Select(t => t.Value).ToList(); } } diff --git a/extensions/OpenAI/OpenAI/Tokenizers/GPT4Tokenizer.cs b/extensions/OpenAI/OpenAI/Tokenizers/GPT4Tokenizer.cs index 0f97a13b7..5cef0f5cc 100644 --- a/extensions/OpenAI/OpenAI/Tokenizers/GPT4Tokenizer.cs +++ b/extensions/OpenAI/OpenAI/Tokenizers/GPT4Tokenizer.cs @@ -13,7 +13,7 @@ namespace Microsoft.KernelMemory.AI.OpenAI; /// public sealed class GPT4Tokenizer : ITextTokenizer { - private static readonly Tokenizer s_tokenizer = Tokenizer.CreateTiktokenForModel("gpt-4", + private static readonly Tokenizer s_tokenizer = TiktokenTokenizer.CreateForModel("gpt-4", new Dictionary { { "<|im_start|>", 100264 }, { "<|im_end|>", 100265 } }); /// @@ -25,6 +25,6 @@ public int CountTokens(string text) /// public IReadOnlyList GetTokens(string text) { - return s_tokenizer.Encode(text, out string? _).Select(t => t.Value).ToList(); + return s_tokenizer.EncodeToTokens(text, out string? _).Select(t => t.Value).ToList(); } } diff --git a/extensions/OpenAI/OpenAI/Tokenizers/GPT4oTokenizer.cs b/extensions/OpenAI/OpenAI/Tokenizers/GPT4oTokenizer.cs index 7609d3aec..a0052c803 100644 --- a/extensions/OpenAI/OpenAI/Tokenizers/GPT4oTokenizer.cs +++ b/extensions/OpenAI/OpenAI/Tokenizers/GPT4oTokenizer.cs @@ -14,7 +14,7 @@ namespace Microsoft.KernelMemory.AI.OpenAI; // ReSharper disable once InconsistentNaming public sealed class GPT4oTokenizer : ITextTokenizer { - private static readonly Tokenizer s_tokenizer = Tokenizer.CreateTiktokenForModel("gpt-4o", + private static readonly Tokenizer s_tokenizer = TiktokenTokenizer.CreateForModel("gpt-4o", new Dictionary { { "<|im_start|>", 100264 }, { "<|im_end|>", 100265 } }); /// @@ -26,6 +26,6 @@ public int CountTokens(string text) /// public IReadOnlyList GetTokens(string text) { - return s_tokenizer.Encode(text, out string? _).Select(t => t.Value).ToList(); + return s_tokenizer.EncodeToTokens(text, out string? _).Select(t => t.Value).ToList(); } } diff --git a/service/Core/Core.csproj b/service/Core/Core.csproj index e60ba40a7..ad64da9a3 100644 --- a/service/Core/Core.csproj +++ b/service/Core/Core.csproj @@ -5,7 +5,7 @@ LatestMajor Microsoft.KernelMemory.Core Microsoft.KernelMemory - $(NoWarn);KMEXP00;KMEXP01;KMEXP02;KMEXP03;KMEXP04;SKEXP0001;SKEXP0011;CA2208;CA1308;CA1724; + $(NoWarn);KMEXP00;KMEXP01;KMEXP02;KMEXP03;KMEXP04;SKEXP0001;SKEXP0011;CA2208;CA1308;CA1724;NU5104;