-
Notifications
You must be signed in to change notification settings - Fork 336
Commit
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
// Copyright (c) Microsoft. All rights reserved. | ||
|
||
namespace Microsoft.KernelMemory.Text; | ||
|
||
public static class StringExtensions | ||
{ | ||
public static string NormalizeNewlines(this string text, bool trim = false) | ||
{ | ||
if (string.IsNullOrEmpty(text)) | ||
{ | ||
return text; | ||
} | ||
|
||
// We won't need more than the original length | ||
char[] buffer = new char[text.Length]; | ||
int bufferPos = 0; | ||
|
||
// Skip leading whitespace if trimming | ||
int i = 0; | ||
if (trim) | ||
{ | ||
while (i < text.Length && char.IsWhiteSpace(text[i])) | ||
{ | ||
i++; | ||
} | ||
} | ||
|
||
// Tracks the last non-whitespace position written into buffer | ||
int lastNonWhitespacePos = -1; | ||
|
||
// 2) Single pass: replace \r\n or \r with \n, record last non-whitespace | ||
for (; i < text.Length; i++) | ||
{ | ||
char c = text[i]; | ||
|
||
if (c == '\r') | ||
{ | ||
// If \r\n then skip the \n | ||
if (i + 1 < text.Length && text[i + 1] == '\n') | ||
{ | ||
i++; | ||
} | ||
|
||
// Write a single \n | ||
buffer[bufferPos] = '\n'; | ||
} | ||
else | ||
{ | ||
buffer[bufferPos] = c; | ||
} | ||
|
||
// If trimming, update lastNonWhitespacePos only when char isn't whitespace | ||
// If not trimming, always update because we keep everything | ||
if (!trim || !char.IsWhiteSpace(buffer[bufferPos])) | ||
{ | ||
lastNonWhitespacePos = bufferPos; | ||
} | ||
|
||
bufferPos++; | ||
} | ||
|
||
// Cut off trailing whitespace if trimming | ||
// If every char was whitespace, lastNonWhitespacePos stays -1 and the result is an empty string | ||
int finalLength = (trim && lastNonWhitespacePos >= 0) | ||
? lastNonWhitespacePos + 1 | ||
: bufferPos; | ||
|
||
// Safety check if everything was trimmed away | ||
if (finalLength < 0) | ||
{ | ||
finalLength = 0; | ||
} | ||
|
||
return new string(buffer, 0, finalLength); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
// Copyright (c) Microsoft. All rights reserved. | ||
|
||
using Microsoft.KernelMemory.Text; | ||
|
||
namespace Microsoft.KM.Abstractions.UnitTests.Text; | ||
|
||
public class StringExtensionsTest | ||
{ | ||
[Theory] | ||
[Trait("Category", "UnitTest")] | ||
[InlineData(null, null)] | ||
[InlineData("", "")] | ||
[InlineData(" ", " ")] | ||
[InlineData("\n", "\n")] | ||
[InlineData("\r", "\n")] // Old Mac | ||
[InlineData("\r\n", "\n")] // Windows | ||
[InlineData("\n\r", "\n\n")] // Not standard, that's 2 line endings | ||
[InlineData("\n\n\n", "\n\n\n")] | ||
[InlineData("\r\r\r", "\n\n\n")] | ||
[InlineData("\r\r\n\r", "\n\n\n")] | ||
[InlineData("\n\r\n\r", "\n\n\n")] | ||
[InlineData("ciao", "ciao")] | ||
[InlineData("ciao ", "ciao ")] | ||
[InlineData(" ciao ", " ciao ")] | ||
[InlineData("\r ciao ", "\n ciao ")] | ||
[InlineData(" \rciao ", " \nciao ")] | ||
[InlineData(" \r\nciao ", " \nciao ")] | ||
[InlineData(" \r\nciao\n ", " \nciao\n ")] | ||
[InlineData(" \r\nciao \n", " \nciao \n")] | ||
[InlineData(" \r\nciao \r", " \nciao \n")] | ||
[InlineData(" \r\nciao \rn", " \nciao \nn")] | ||
public void ItNormalizesLineEndings(string? input, string? expected) | ||
{ | ||
// Act | ||
string actual = input.NormalizeNewlines(); | ||
Check failure on line 35 in service/tests/Abstractions.UnitTests/Text/StringExtensionsTest.cs
|
||
|
||
// Assert | ||
Assert.Equal(expected, actual); | ||
} | ||
|
||
[Theory] | ||
[Trait("Category", "UnitTest")] | ||
[InlineData(null, null)] | ||
[InlineData("", "")] | ||
[InlineData(" ", "")] | ||
[InlineData("\n", "")] | ||
[InlineData("\r", "")] | ||
[InlineData("\r\n", "")] | ||
[InlineData("\n\r", "")] | ||
[InlineData("\n\n\n", "")] | ||
[InlineData("\r\r\r", "")] | ||
[InlineData("\r\r\n\r", "")] | ||
[InlineData("\n\r\n\r", "")] | ||
[InlineData("ciao", "ciao")] | ||
[InlineData("ciao ", "ciao")] | ||
[InlineData(" ciao ", "ciao")] | ||
[InlineData("\r ciao ", "ciao")] | ||
[InlineData(" \rciao ", "ciao")] | ||
[InlineData(" \r\nciao ", "ciao")] | ||
[InlineData(" \r\nciao\n ", "ciao")] | ||
[InlineData(" \r\nciao \n", "ciao")] | ||
[InlineData(" \r\nciao \r", "ciao")] | ||
[InlineData(" \r\nciao \rn", "ciao \nn")] | ||
[InlineData(" \r\nc\ri\ra\no \r", "c\ni\na\no")] | ||
[InlineData(" \r\nc\r\ni\n\na\r\ro \r", "c\ni\n\na\n\no")] | ||
[InlineData(" \r\nccc\r\ni\n\naaa\r\ro \r", "ccc\ni\n\naaa\n\no")] | ||
public void ItCanTrimWhileNormalizingLineEndings(string? input, string? expected) | ||
{ | ||
// Act | ||
string actual = input.NormalizeNewlines(true); | ||
Check failure on line 70 in service/tests/Abstractions.UnitTests/Text/StringExtensionsTest.cs
|
||
|
||
// Assert | ||
Assert.Equal(expected, actual); | ||
} | ||
} |