Skip to content

Commit 0b8e4cc

Browse files
authored
Pass Tags using an explicit form field (#227)
## Motivation and Context (Why the change? What's the scenario?) Improve API schema used to specify document tags. ## High level description (Approach, Design) Use a specific form field name for tags.
1 parent 3b3b305 commit 0b8e4cc

File tree

7 files changed

+89
-39
lines changed

7 files changed

+89
-39
lines changed

clients/dotnet/WebClient/MemoryWebClient.cs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -320,8 +320,8 @@ private async Task<string> ImportInternalAsync(
320320
using (StringContent documentIdContent = new(uploadRequest.DocumentId))
321321
{
322322
List<IDisposable> disposables = new();
323-
formData.Add(documentIdContent, Constants.WebServiceDocumentIdField);
324323
formData.Add(indexContent, Constants.WebServiceIndexField);
324+
formData.Add(documentIdContent, Constants.WebServiceDocumentIdField);
325325

326326
// Add steps to the form
327327
foreach (string? step in uploadRequest.Steps)
@@ -336,9 +336,9 @@ private async Task<string> ImportInternalAsync(
336336
// Add tags to the form
337337
foreach (KeyValuePair<string, string?> tag in uploadRequest.Tags.Pairs)
338338
{
339-
var tagContent = new StringContent(tag.Value);
339+
var tagContent = new StringContent($"{tag.Key}{Constants.ReservedEqualsChar}{tag.Value}");
340340
disposables.Add(tagContent);
341-
formData.Add(tagContent, tag.Key);
341+
formData.Add(tagContent, Constants.WebServiceTagsField);
342342
}
343343

344344
// Add files to the form
@@ -360,7 +360,6 @@ private async Task<string> ImportInternalAsync(
360360
try
361361
{
362362
HttpResponseMessage? response = await this._client.PostAsync("/upload", formData, cancellationToken).ConfigureAwait(false);
363-
formData.Dispose();
364363
response.EnsureSuccessStatusCode();
365364
}
366365
catch (HttpRequestException e) when (e.Data.Contains("StatusCode"))
@@ -373,6 +372,7 @@ private async Task<string> ImportInternalAsync(
373372
}
374373
finally
375374
{
375+
formData.Dispose();
376376
foreach (var disposable in disposables)
377377
{
378378
disposable.Dispose();

nuget-package.props

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
<Project>
22
<PropertyGroup>
33
<!-- Central version prefix - applies to all nuget packages. -->
4-
<Version>0.24.0</Version>
4+
<Version>0.25.0</Version>
55

66
<!-- These are set at the project level-->
77
<IsPackable>false</IsPackable>

service/Abstractions/Constants.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@ public static class Constants
1313
// Form field containing the Document ID
1414
public const string WebServiceDocumentIdField = "documentId";
1515

16+
// Form field containing the list of tags
17+
public const string WebServiceTagsField = "tags";
18+
1619
// Form field containing the list of pipeline steps
1720
public const string WebServiceStepsField = "steps";
1821

service/Abstractions/Models/TagCollection.cs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,5 +155,11 @@ private static void ValidateKey(string key)
155155
{
156156
throw new KernelMemoryException("A tag name cannot contain the '=' char");
157157
}
158+
159+
// ':' is reserved for backward/forward compatibility
160+
if (key.Contains(':'))
161+
{
162+
throw new KernelMemoryException("A tag name cannot contain the ':' char");
163+
}
158164
}
159165
}

service/Core/WebService/HttpDocumentUploadRequest.cs

Lines changed: 34 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,6 @@ public class HttpDocumentUploadRequest
2929
public static async Task<(HttpDocumentUploadRequest model, bool isValid, string errMsg)> BindHttpRequestAsync(
3030
HttpRequest httpRequest, CancellationToken cancellationToken = default)
3131
{
32-
const string IndexField = Constants.WebServiceIndexField;
33-
const string DocumentIdField = Constants.WebServiceDocumentIdField;
34-
const string StepsField = Constants.WebServiceStepsField;
35-
3632
var result = new HttpDocumentUploadRequest();
3733

3834
// Content format validation
@@ -50,71 +46,75 @@ public class HttpDocumentUploadRequest
5046
return (result, false, "No file was uploaded");
5147
}
5248

53-
if (form.TryGetValue(IndexField, out StringValues indexes) && indexes.Count > 1)
49+
// Only one index can be defined
50+
if (form.TryGetValue(Constants.WebServiceIndexField, out StringValues indexes) && indexes.Count > 1)
5451
{
55-
return (result, false, $"Invalid index name, '{IndexField}', multiple values provided");
52+
return (result, false, $"Invalid index name, '{Constants.WebServiceIndexField}', multiple values provided");
5653
}
5754

58-
if (form.TryGetValue(DocumentIdField, out StringValues documentIds) && documentIds.Count > 1)
55+
// Only one document ID can be defined
56+
if (form.TryGetValue(Constants.WebServiceDocumentIdField, out StringValues documentIds) && documentIds.Count > 1)
5957
{
60-
return (result, false, $"Invalid document ID, '{DocumentIdField}' must be a single value, not a list");
58+
return (result, false, $"Invalid document ID, '{Constants.WebServiceDocumentIdField}' must be a single value, not a list");
6159
}
6260

63-
// Document Id is optional, e.g. used if the client wants to retry the same upload, otherwise we generate a random/unique one
64-
var documentId = documentIds.FirstOrDefault();
61+
// Document Id is optional, e.g. used if the client wants to retry the same upload, otherwise a random/unique one is generated
62+
string? documentId = documentIds.FirstOrDefault();
6563
if (string.IsNullOrWhiteSpace(documentId))
6664
{
6765
documentId = DateTimeOffset.Now.ToString("yyyyMMdd.HHmmss.", CultureInfo.InvariantCulture) + Guid.NewGuid().ToString("N");
6866
}
6967

68+
// Optional document tags. Tags are passed in as "key:value", where a key can have multiple values. See TagCollection.
69+
if (form.TryGetValue(Constants.WebServiceTagsField, out StringValues tags))
70+
{
71+
foreach (string? tag in tags)
72+
{
73+
if (tag == null) { continue; }
74+
75+
var keyValue = tag.Split(Constants.ReservedEqualsChar, 2);
76+
string key = keyValue[0];
77+
ValidateTagName(key);
78+
string? value = keyValue.Length == 1 ? null : keyValue[1];
79+
result.Tags.Add(key, value);
80+
}
81+
}
82+
7083
// Optional pipeline steps. The user can pass a custom list or leave it to the system to use the default.
71-
if (form.TryGetValue(StepsField, out StringValues steps))
84+
if (form.TryGetValue(Constants.WebServiceStepsField, out StringValues steps))
7285
{
7386
foreach (string? step in steps)
7487
{
7588
if (string.IsNullOrWhiteSpace(step)) { continue; }
7689

90+
// Allow step names to be separated by space, comma, semicolon
7791
var list = step.Replace(' ', ';').Replace(',', ';').Split(';');
7892
result.Steps.AddRange(from s in list where !string.IsNullOrWhiteSpace(s) select s.Trim());
7993
}
8094
}
8195

82-
result.DocumentId = documentId;
8396
result.Index = indexes[0]!;
97+
result.DocumentId = documentId;
8498
result.Files = form.Files;
8599

86-
// Store any extra field as a tag
87-
foreach (string key in form.Keys)
88-
{
89-
if (key == DocumentIdField
90-
|| key == IndexField
91-
|| key == StepsField
92-
|| !form.TryGetValue(key, out StringValues values)) { continue; }
93-
94-
ValidateTagName(key);
95-
foreach (string? x in values)
96-
{
97-
result.Tags.Add(key, x);
98-
}
99-
}
100-
101100
return (result, true, string.Empty);
102101
}
103102

104-
private static void ValidateTagName(string key)
103+
private static void ValidateTagName(string tagName)
105104
{
106-
if (key.Contains('=', StringComparison.Ordinal))
105+
if (tagName.StartsWith(Constants.ReservedTagsPrefix, StringComparison.Ordinal))
107106
{
108-
throw new KernelMemoryException("A tag name cannot contain the '=' char");
107+
throw new KernelMemoryException(
108+
$"The tag name prefix '{Constants.ReservedTagsPrefix}' is reserved for internal use.");
109109
}
110110

111-
if (key is
112-
Constants.ReservedDocumentIdTag
111+
if (tagName is Constants.ReservedDocumentIdTag
113112
or Constants.ReservedFileIdTag
114113
or Constants.ReservedFilePartitionTag
115-
or Constants.ReservedFileTypeTag)
114+
or Constants.ReservedFileTypeTag
115+
or Constants.ReservedSyntheticTypeTag)
116116
{
117-
throw new KernelMemoryException($"The tag name '{key}' is reserved for internal use.");
117+
throw new KernelMemoryException($"The tag name '{tagName}' is reserved for internal use.");
118118
}
119119
}
120120
}

service/Service/Service.csproj

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,13 @@
3636
<PackageReference Include="Microsoft.KernelMemory.MemoryDb.Postgres" Version="0.24.231228.5" />
3737
</ItemGroup>
3838

39+
<!-- <ItemGroup>-->
40+
<!-- <ProjectReference Include="..\Core\Core.csproj"/>-->
41+
<!-- <ProjectReference Include="..\..\extensions\LlamaSharp\LlamaSharp.csproj"/>-->
42+
<!-- <ProjectReference Include="..\..\extensions\Postgres\Postgres\Postgres.csproj"/>-->
43+
<!-- <ProjectReference Include="..\..\extensions\RabbitMQ\RabbitMQ.csproj"/>-->
44+
<!-- </ItemGroup>-->
45+
3946
<!-- Code Analysis -->
4047
<ItemGroup>
4148
<PackageReference Include="Microsoft.CodeAnalysis.Analyzers" Version="3.3.4">

service/tests/ServiceFunctionalTests/DocumentUploadTest.cs

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,4 +46,38 @@ await this._memory.ImportDocumentAsync(
4646
this.Log("Deleting memories extracted from the document");
4747
await this._memory.DeleteDocumentAsync(Id);
4848
}
49+
50+
[Fact]
51+
[Trait("Category", "ServiceFunctionalTest")]
52+
public async Task ItSupportTags()
53+
{
54+
// Arrange
55+
const string Id = "ItSupportTags-file1-NASA-news.pdf";
56+
await this._memory.ImportDocumentAsync(
57+
"file1-NASA-news.pdf",
58+
documentId: Id,
59+
tags: new TagCollection
60+
{
61+
{ "type", "news" },
62+
{ "type", "test" },
63+
{ "ext", "pdf" }
64+
},
65+
steps: Constants.PipelineWithoutSummary);
66+
67+
// Act
68+
var answer1 = await this._memory.AskAsync("What is Orion?", filter: MemoryFilters.ByTag("type", "news"));
69+
this.Log(answer1.Result);
70+
var answer2 = await this._memory.AskAsync("What is Orion?", filter: MemoryFilters.ByTag("type", "test"));
71+
this.Log(answer2.Result);
72+
var answer3 = await this._memory.AskAsync("What is Orion?", filter: MemoryFilters.ByTag("ext", "pdf"));
73+
this.Log(answer3.Result);
74+
var answer4 = await this._memory.AskAsync("What is Orion?", filter: MemoryFilters.ByTag("foo", "bar"));
75+
this.Log(answer4.Result);
76+
77+
// Assert
78+
Assert.Contains("spacecraft", answer1.Result, StringComparison.OrdinalIgnoreCase);
79+
Assert.Contains("spacecraft", answer2.Result, StringComparison.OrdinalIgnoreCase);
80+
Assert.Contains("spacecraft", answer3.Result, StringComparison.OrdinalIgnoreCase);
81+
Assert.Contains("NOT FOUND", answer4.Result, StringComparison.OrdinalIgnoreCase);
82+
}
4983
}

0 commit comments

Comments
 (0)