From 8bc030f474c401a47f0364990e625ef9cbe2af38 Mon Sep 17 00:00:00 2001 From: Stephen Griffin Date: Mon, 6 Jan 2025 09:39:41 -0500 Subject: [PATCH] U/sgriffin/fuzz (#805) * Simplify fuzzing * make fuzzing faster * Ensure artifacts dir exists as part of setup * fix test instance name --- README.md | 10 +++-- fuzz/Build-FuzzingCorpus.ps1 | 78 ++++++++++++++++++++++++++++++++++++ fuzz/fuzz.cpp | 50 +++-------------------- 3 files changed, 91 insertions(+), 47 deletions(-) create mode 100644 fuzz/Build-FuzzingCorpus.ps1 diff --git a/README.md b/README.md index 7a6bf9f7e..2f922ed06 100644 --- a/README.md +++ b/README.md @@ -12,16 +12,20 @@ MFCMAPI depends on the [MAPI Stub Library](https://github.com/microsoft/MAPIStub ## Fuzzing -MFCMAPI supports fuzzing with [libFuzzer](https://llvm.org/docs/LibFuzzer.html) and the [fsanitize](https://learn.microsoft.com/en-us/cpp/build/reference/fsanitize?view=msvc-170) switch in Visual Studio. See [fuzz.cpp](https://github.com/microsoft/mfcmapi/blob/main/fuzz/fuzz.cpp) for details. +MFCMAPI supports fuzzing with [libFuzzer](https://llvm.org/docs/LibFuzzer.html) and the [fsanitize](https://learn.microsoft.com/en-us/cpp/build/reference/fsanitize?view=msvc-170) switch in Visual Studio. See [fuzz.cpp](fuzz/fuzz.cpp) for details. To run fuzzing for this project, follow these steps: +1. **Build Fuzzing Corpus**: + - Open Powershell prompt + - Run [fuzz\Build-FuzzingCorpus.ps1](fuzz\Build-FuzzingCorpus.ps1) to generate a fuzzing corpus in [fuzz/corpus](fuzz/corpus) from Smart View unit test data. + 1. **Switch Solution Configuration**: - Open MFCMAPI.sln in Visual Studio. - In the toolbar, locate the **Solution Configurations** dropdown. - Select **Fuzz** from the list of configurations. -2. **Debug Command Line Parameters**: +1. **Debug Command Line Parameters**: - When running the fuzzing tests, use the following command line parameters: -`$(ProjectDir)fuzz\corpus $(ProjectDir)UnitTest\SmartViewTestData\In -artifact_prefix=fuzz\artifacts\` +`$(ProjectDir)fuzz\corpus -artifact_prefix=fuzz\artifacts\` ## Help/Feedback diff --git a/fuzz/Build-FuzzingCorpus.ps1 b/fuzz/Build-FuzzingCorpus.ps1 new file mode 100644 index 000000000..9910bd752 --- /dev/null +++ b/fuzz/Build-FuzzingCorpus.ps1 @@ -0,0 +1,78 @@ + +function Build-FuzzingCorpus { + param ( + [string]$InputDir, + [string]$OutputDir + ) + + # Ensure the output directory exists + if (-not (Test-Path -Path $OutputDir)) { + New-Item -ItemType Directory -Path $OutputDir + } + + # Function to convert hex string to byte array + function Convert-HexStringToByteArray { + param ( + [string]$hexString + ) + if ($null -eq $hexString) { + return @() + } + + # remove L"\r\n\t -.,\\/'{}`\"" and whitespace from the hex string + # this is the same set of characters checked in IsFilteredHex + $hexString = $hexString -replace "[\r\n\t -.,\\/'{}`"\""]", "" -replace "\s", "" + if ($hexString.Length -eq 0) { + return @() + } + + $byteArray = @() + for ($i = 0; $i -lt $hexString.Length; $i += 2) { + try { + $byteArray += [Convert]::ToByte($hexString.Substring($i, 2), 16) + } catch { + Write-Host "Error converting hex string to byte array: $($_.Exception.Message)" + Write-Host "hexString: $hexString" + Write-Host "i: $i" + Write-Host "hexString.Length: $($hexString.Length)" + # Write the (up to) 8 characters before the error and up to 8 after + $start = [Math]::Max(0, $i - 8) + $end = [Math]::Min($hexString.Length, $i + 8) + Write-Host "hexString.Substring($i, 2): $($hexString.Substring($i, 2))" + Write-Host "hexString.Substring($start, $end - $start): $($hexString.Substring($start, $end - $start))" + break + } + } + return $byteArray + } + + # Iterate over all .dat files in the input directory + Get-ChildItem -Path $InputDir -Filter *.dat | ForEach-Object { + $inputFilePath = $_.FullName + $outputFilePath = Join-Path -Path $OutputDir -ChildPath ($_.BaseName + ".bin") + + # Read the hex data from the input file + $hexData = Get-Content -Path $inputFilePath -Raw + + Write-Host "Converting $inputFilePath to $outputFilePath" + # Write-Host "Hex data length: $($hexData.Length)" + # Write-Host "hexData: $hexData" + + # Convert the hex data to binary data + $binaryData = Convert-HexStringToByteArray -hexString $hexData + if ($null -eq $binaryData) { + $binaryData = @() + } + # Write the binary data to the output file + [System.IO.File]::WriteAllBytes($outputFilePath, $binaryData) + } +} + +# Example usage +$inputDirectory = "$PSScriptRoot\..\UnitTest\SmartViewTestData\In" +$outputDirectory = "$PSScriptRoot\corpus" +$artifactsDirectory = "$PSScriptRoot\artifacts" +if (-not (Test-Path -Path $artifactsDirectory)) { + New-Item -ItemType Directory -Path $artifactsDirectory +} +Build-FuzzingCorpus -InputDir $inputDirectory -OutputDir $outputDirectory diff --git a/fuzz/fuzz.cpp b/fuzz/fuzz.cpp index 9935e90ff..55a358acd 100644 --- a/fuzz/fuzz.cpp +++ b/fuzz/fuzz.cpp @@ -14,40 +14,19 @@ void EnsureInit() registry::useGetPropList = true; registry::parseNamedProps = true; registry::cacheNamedProps = true; - strings::setTestInstance(GetModuleHandleW(L"fuzz.exe")); + strings::setTestInstance(GetModuleHandleW(L"mfcmapi.exe")); } -void test(std::vector hex) +void test(const SBinary hex) { for (const auto parser : SmartViewParserTypeArray) { if (parser.type == parserType::NOPARSING) continue; //wprintf(L"Testing %ws\r\n", addin::AddInStructTypeToString(parser.type).c_str()); - (void) smartview::InterpretBinary({static_cast(hex.size()), hex.data()}, parser.type, nullptr); + (void) smartview::InterpretBinary(hex, parser.type, nullptr); } } -std::wstring LoadDataToString(const uint8_t* Data, size_t Size) -{ - const auto cb = Size; - const LPVOID bytes = (LPVOID) Data; - const auto data = static_cast(bytes); - - // UTF 16 LE - // In Notepad++, this is UCS-2 LE BOM encoding - // WARNING: Editing files in Visual Studio Code can alter this encoding - if (cb >= 2 && data[0] == 0xff && data[1] == 0xfe) - { - // Skip the byte order mark - const auto wstr = static_cast(bytes); - const auto cch = cb / sizeof(wchar_t); - return std::wstring(wstr + 1, cch - 1); - } - - const auto str = std::string(static_cast(bytes), cb); - return strings::stringTowstring(str); -} - #ifdef __cplusplus #define FUZZ_EXPORT extern "C" __declspec(dllexport) #else @@ -56,26 +35,9 @@ std::wstring LoadDataToString(const uint8_t* Data, size_t Size) FUZZ_EXPORT int __cdecl LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { std::call_once(_initFlag, EnsureInit); - // convert data to vector byte - - const auto inputVector = std::vector(data, data + size); - const auto input = LoadDataToString(data, size); - if (input.empty()) - { - // Print hex encoding of input so we can see what was wrong with it - //wprintf(L"Invalid input: %ws\r\n", strings::BinToHexString(inputVector, true).c_str()); - return -1; // ignore invalid hex strings - } - - auto hex = strings::HexStringToBin(input); - if (hex.empty()) - { - //wprintf(L"Invalid hex: %ws\r\n", input.c_str()); - return -1; // ignore invalid hex strings - } - - //wprintf(L"Fuzzing: %ws\r\n", input.c_str()); - test(hex); + const SBinary input = {static_cast(size), (LPBYTE) (data)}; + //wprintf(L"Fuzzing: %ws\r\n", strings::BinToHexString(&input, true).c_str()); + test(input); return 0; } #endif // FUZZ \ No newline at end of file