Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Experimental Discord connector #463

Merged
merged 2 commits into from
May 10, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Directory.Packages.props
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@
<ManagePackageVersionsCentrally>true</ManagePackageVersionsCentrally>
</PropertyGroup>
<ItemGroup>
<PackageVersion Include="Aspire.Npgsql.EntityFrameworkCore.PostgreSQL" Version="8.0.0-preview.7.24251.11" />
<PackageVersion Include="Azure.AI.FormRecognizer" Version="4.1.0" />
<PackageVersion Include="Azure.Identity" Version="1.11.2" />
<PackageVersion Include="Azure.Search.Documents" Version="11.5.1" />
<PackageVersion Include="Azure.Storage.Blobs" Version="12.19.1" />
<PackageVersion Include="Azure.Storage.Queues" Version="12.17.1" />
<PackageVersion Include="Elastic.Clients.Elasticsearch" Version="8.11.0" />
<PackageVersion Include="Discord.Net" Version="3.14.1" />
<PackageVersion Include="HtmlAgilityPack" Version="1.11.61" />
<PackageVersion Include="LLamaSharp" Version="0.11.2" />
<PackageVersion Include="LLamaSharp.Backend.Cpu" Version="0.11.2" />
Expand All @@ -21,6 +23,7 @@
<PackageVersion Include="Microsoft.Extensions.DependencyInjection" Version="8.0.0" />
<PackageVersion Include="Microsoft.Extensions.DependencyInjection.Abstractions" Version="8.0.1" />
<PackageVersion Include="Microsoft.Extensions.Hosting" Version="8.0.0" />
<PackageVersion Include="Microsoft.Extensions.Hosting.Abstractions" Version="8.0.0" />
<PackageVersion Include="Microsoft.Extensions.Http" Version="8.0.0" />
<PackageVersion Include="Microsoft.Extensions.Logging" Version="8.0.0" />
<PackageVersion Include="Microsoft.Extensions.Logging.Abstractions" Version="8.0.1" />
Expand Down
13 changes: 13 additions & 0 deletions KernelMemory.sln
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,10 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Elasticsearch", "extensions
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Elasticsearch.UnitTests", "extensions\Elasticsearch\Elasticsearch.FunctionalTests\Elasticsearch.FunctionalTests.csproj", "{C5E6B28C-F54D-423D-954D-A9EAEFB89732}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Discord", "extensions\Discord\Discord\Discord.csproj", "{43877864-6AE8-4B03-BEDA-6B6FA8BB1D8B}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "301-discord-test-application", "examples\301-discord-test-application\301-discord-test-application.csproj", "{FAE4C6B8-38B2-43E7-8881-99693C9CEDC6}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand Down Expand Up @@ -500,6 +504,13 @@ Global
{C5E6B28C-F54D-423D-954D-A9EAEFB89732}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{C5E6B28C-F54D-423D-954D-A9EAEFB89732}.Debug|Any CPU.Build.0 = Debug|Any CPU
{C5E6B28C-F54D-423D-954D-A9EAEFB89732}.Release|Any CPU.ActiveCfg = Release|Any CPU
{43877864-6AE8-4B03-BEDA-6B6FA8BB1D8B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{43877864-6AE8-4B03-BEDA-6B6FA8BB1D8B}.Debug|Any CPU.Build.0 = Debug|Any CPU
{43877864-6AE8-4B03-BEDA-6B6FA8BB1D8B}.Release|Any CPU.ActiveCfg = Release|Any CPU
{43877864-6AE8-4B03-BEDA-6B6FA8BB1D8B}.Release|Any CPU.Build.0 = Release|Any CPU
{FAE4C6B8-38B2-43E7-8881-99693C9CEDC6}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{FAE4C6B8-38B2-43E7-8881-99693C9CEDC6}.Debug|Any CPU.Build.0 = Debug|Any CPU
{FAE4C6B8-38B2-43E7-8881-99693C9CEDC6}.Release|Any CPU.ActiveCfg = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down Expand Up @@ -580,6 +591,8 @@ Global
{B9BE1099-F78F-4A5F-A897-BF2C75E19C57} = {155DA079-E267-49AF-973A-D1D44681970F}
{2E10420F-BF96-411C-8FE0-F6268F2EEB67} = {155DA079-E267-49AF-973A-D1D44681970F}
{C5E6B28C-F54D-423D-954D-A9EAEFB89732} = {3C17F42B-CFC8-4900-8CFB-88936311E919}
{43877864-6AE8-4B03-BEDA-6B6FA8BB1D8B} = {155DA079-E267-49AF-973A-D1D44681970F}
{FAE4C6B8-38B2-43E7-8881-99693C9CEDC6} = {0A43C65C-6007-4BB4-B3FE-8D439FC91841}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {CC136C62-115C-41D1-B414-F9473EFF6EA8}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<Project Sdk="Microsoft.NET.Sdk.Web">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net8.0</TargetFramework>
<NoWarn>$(NoWarn);CA1303;CA1031;</NoWarn>
</PropertyGroup>

<ItemGroup>
<ProjectReference Include="..\..\extensions\Discord\Discord\Discord.csproj" />
<ProjectReference Include="..\..\service\Service.AspNetCore\Service.AspNetCore.csproj" />
</ItemGroup>

<ItemGroup>
<PackageReference Include="Aspire.Npgsql.EntityFrameworkCore.PostgreSQL" />
</ItemGroup>

</Project>
17 changes: 17 additions & 0 deletions examples/301-discord-test-application/DiscordDbContext.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// Copyright (c) Microsoft. All rights reserved.

using Microsoft.EntityFrameworkCore;

namespace Microsoft.Discord.TestApplication;

public class DiscordDbContext : DbContext
{
public DbContextOptions<DiscordDbContext> Options { get; }

public DbSet<DiscordDbMessage> Messages { get; set; }

public DiscordDbContext(DbContextOptions<DiscordDbContext> options) : base(options)
{
this.Options = options;
}
}
22 changes: 22 additions & 0 deletions examples/301-discord-test-application/DiscordDbMessage.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// Copyright (c) Microsoft. All rights reserved.

using System.ComponentModel.DataAnnotations;
using Microsoft.KernelMemory.Sources.DiscordBot;

namespace Microsoft.Discord.TestApplication;

public class DiscordDbMessage : DiscordMessage
{
[Key]
public string Id
{
get
{
return this.MessageId;
}
set
{
this.MessageId = value;
}
}
}
119 changes: 119 additions & 0 deletions examples/301-discord-test-application/DiscordMessageHandler.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
// Copyright (c) Microsoft. All rights reserved.

using System.Text.Json;
using Microsoft.KernelMemory;
using Microsoft.KernelMemory.Diagnostics;
using Microsoft.KernelMemory.Pipeline;
using Microsoft.KernelMemory.Sources.DiscordBot;

namespace Microsoft.Discord.TestApplication;

/// <summary>
/// KM pipeline handler fetching discord data files from content storage
/// and storing messages in Postgres.
/// </summary>
public sealed class DiscordMessageHandler : IPipelineStepHandler, IDisposable, IAsyncDisposable
{
// Name of the file where to store Discord data
private readonly string _filename;

// KM pipelines orchestrator
private readonly IPipelineOrchestrator _orchestrator;

// .NET service provider, used to get thread safe instances of EF DbContext
private readonly IServiceProvider _serviceProvider;

// EF DbContext used to create the database
private DiscordDbContext? _firstInvokeDb;

// .NET logger
private readonly ILogger<DiscordMessageHandler> _log;

public string StepName { get; } = string.Empty;

public DiscordMessageHandler(
string stepName,
IPipelineOrchestrator orchestrator,
DiscordConnectorConfig config,
IServiceProvider serviceProvider,
ILoggerFactory? loggerFactory = null)
{
this.StepName = stepName;
this._log = loggerFactory?.CreateLogger<DiscordMessageHandler>() ?? DefaultLogger<DiscordMessageHandler>.Instance;

this._orchestrator = orchestrator;
this._serviceProvider = serviceProvider;
this._filename = config.FileName;

// This DbContext instance is used only to create the database
this._firstInvokeDb = serviceProvider.GetService<DiscordDbContext>() ?? throw new ConfigurationException("Discord DB Content is not defined");
}

public async Task<(bool success, DataPipeline updatedPipeline)> InvokeAsync(DataPipeline pipeline, CancellationToken cancellationToken = default)
{
this.OnFirstInvoke();

// Note: use a new DbContext instance each time, because DbContext is not thread safe and would throw the following
// exception: System.InvalidOperationException: a second operation was started on this context instance before a previous
// operation completed. This is usually caused by different threads concurrently using the same instance of DbContext.
// For more information on how to avoid threading issues with DbContext, see https://go.microsoft.com/fwlink/?linkid=2097913.
await using (var db = (this._serviceProvider.GetService<DiscordDbContext>())!)
{
foreach (DataPipeline.FileDetails uploadedFile in pipeline.Files)
{
// Process only the file containing the discord data
if (uploadedFile.Name != this._filename) { continue; }

string fileContent = await this._orchestrator.ReadTextFileAsync(pipeline, uploadedFile.Name, cancellationToken).ConfigureAwait(false);

DiscordDbMessage? data;
try
{
data = JsonSerializer.Deserialize<DiscordDbMessage>(fileContent);
if (data == null)
{
this._log.LogError("Failed to deserialize Discord data file, result is NULL");
return (true, pipeline);
}
}
catch (Exception e)
{
this._log.LogError(e, "Failed to deserialize Discord data file");
return (true, pipeline);
}

await db.Messages.AddAsync(data, cancellationToken).ConfigureAwait(false);
}

await db.SaveChangesAsync(cancellationToken).ConfigureAwait(false);
}

return (true, pipeline);
}

public void Dispose()
{
this._firstInvokeDb?.Dispose();
this._firstInvokeDb = null;
}

public async ValueTask DisposeAsync()
{
if (this._firstInvokeDb != null) { await this._firstInvokeDb.DisposeAsync(); }

this._firstInvokeDb = null;
}

private void OnFirstInvoke()
{
if (this._firstInvokeDb == null) { return; }

lock (this._firstInvokeDb)
{
// Create DB / Tables if needed
this._firstInvokeDb.Database.EnsureCreated();
this._firstInvokeDb.Dispose();
this._firstInvokeDb = null;
}
}
}
103 changes: 103 additions & 0 deletions examples/301-discord-test-application/Program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
// Copyright (c) Microsoft. All rights reserved.

using Microsoft.KernelMemory;
using Microsoft.KernelMemory.ContentStorage.DevTools;
using Microsoft.KernelMemory.Sources.DiscordBot;

namespace Microsoft.Discord.TestApplication;

/* Example: Listen for new messages in Discord, and save them in a table in Postgres.
*
* Use ASP.NET hosted services to host a Discord Bot. The discord bot logic is based
* on DiscordConnector class.
*
* While the Discord bot is running, every time there is a new message, DiscordConnector
* invokes KM.ImportDocument API, uploading a JSON file that contains details about the
* Discord message, including server ID, channel ID, author ID, message content, etc.
*
* The call to KM.ImportDocument API asks to process the JSON file uploaded using
* DiscordMessageHandler, included in this project. No other handlers are used.
*
* DiscordMessageHandler, loads the uploaded file, deserializes its content, and
* save each Discord message into a table in Postgres, using Entity Framework.
*/

internal static class Program
{
public static void Main(string[] args)
{
WebApplicationBuilder appBuilder = WebApplication.CreateBuilder();

appBuilder.Configuration
.AddJsonFile("appsettings.json")
.AddJsonFile("appsettings.Development.json", optional: true)
.AddEnvironmentVariables()
.AddCommandLine(args);

// Discord setup
// Use DiscordConnector to connect to Discord and listen for messages.
// The Discord connection can listen from multiple servers and channels.
// For each message, DiscordConnector will send a file to Kernel Memory to process.
// Files sent to Kernel Memory are processed by DiscordMessageHandler (in this project)
var discordCfg = appBuilder.Configuration.GetSection("Discord").Get<DiscordConnectorConfig>();
ArgumentNullExceptionEx.ThrowIfNull(discordCfg, nameof(discordCfg), "Discord config is NULL");
appBuilder.Services.AddSingleton<DiscordConnectorConfig>(discordCfg);
appBuilder.Services.AddHostedService<DiscordConnector>();

// Postgres with Entity Framework
// DiscordMessageHandler reads files received by Kernel Memory and store each message in a table in Postgres.
// See DiscordDbMessage for the table schema.
appBuilder.AddNpgsqlDbContext<DiscordDbContext>("postgresDb");

// Run Kernel Memory and DiscordMessageHandler
// var kmApp = BuildAsynchronousKernelMemoryApp(appBuilder, discordConfig);
var kmApp = BuildSynchronousKernelMemoryApp(appBuilder, discordCfg);

Console.WriteLine("Starting KM application...\n");
kmApp.Run();
Console.WriteLine("\n... KM application stopped.");
}

private static WebApplication BuildSynchronousKernelMemoryApp(WebApplicationBuilder appBuilder, DiscordConnectorConfig discordConfig)
{
appBuilder.AddKernelMemory(kmb =>
{
// Note: there's no queue system, so the memory instance will be synchronous (ie MemoryServerless)

// Store files on disk
kmb.WithSimpleFileStorage(SimpleFileStorageConfig.Persistent);

// Disable AI, not needed for this example
kmb.WithoutEmbeddingGenerator();
kmb.WithoutTextGenerator();
});

WebApplication app = appBuilder.Build();

// In synchronous apps, handlers are added to the serverless memory orchestrator
(app.Services.GetService<IKernelMemory>() as MemoryServerless)!
.Orchestrator
.AddHandler<DiscordMessageHandler>(discordConfig.Steps[0]);

return app;
}

private static WebApplication BuildAsynchronousKernelMemoryApp(WebApplicationBuilder appBuilder, DiscordConnectorConfig discordConfig)
{
appBuilder.Services.AddHandlerAsHostedService<DiscordMessageHandler>(discordConfig.Steps[0]);
appBuilder.AddKernelMemory(kmb =>
{
// Note: because of this the memory instance will be asynchronous (ie MemoryService)
kmb.WithSimpleQueuesPipeline();

// Store files on disk
kmb.WithSimpleFileStorage(SimpleFileStorageConfig.Persistent);

// Disable AI, not needed for this example
kmb.WithoutEmbeddingGenerator();
kmb.WithoutTextGenerator();
});

return appBuilder.Build();
}
}
38 changes: 38 additions & 0 deletions examples/301-discord-test-application/appsettings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
{
"Discord": {
// Discord bot authentication token
// See https://discord.com/developers
"DiscordToken": "",
// Index where to store files, e.g. disk folder, Azure blobs folder, etc.
"Index": "discord",
// File name used when uploading a message to content storage.
"FileName": "discord-msg.json",
// Handlers processing the incoming Discord events
"Steps": [
"store_discord_message"
]
},
"ConnectionStrings": {
// Db where Discord messages are stored, e.g.
// "Host=contoso.postgres.database.azure.com;Port=5432;Username=adminuser;Password=mypassword;Database=discorddata;SSL Mode=VerifyFull"
"postgresDb": "Host=localhost;Port=5432;Username=;Password="
},
"Logging": {
"LogLevel": {
"Default": "Warning"
},
"Console": {
"LogToStandardErrorThreshold": "Critical",
"FormatterName": "simple",
"FormatterOptions": {
"TimestampFormat": "[HH:mm:ss.fff] ",
"SingleLine": true,
"UseUtcTimestamp": false,
"IncludeScopes": false,
"JsonWriterOptions": {
"Indented": true
}
}
}
}
}
Loading
Loading