Skip to content

Commit bebb637

Browse files
1 parent 2a01640 commit bebb637

File tree

4 files changed

+249
-3
lines changed

4 files changed

+249
-3
lines changed

src/Makefile.test.include

+2-1
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,8 @@ FUZZ_SUITE = \
5959
test/setup_common.h \
6060
test/setup_common.cpp \
6161
test/fuzz/fuzz.cpp \
62-
test/fuzz/fuzz.h
62+
test/fuzz/fuzz.h \
63+
test/fuzz/FuzzedDataProvider.h
6364

6465
FUZZ_SUITE_LD_COMMON = \
6566
$(LIBBITCOIN_SERVER) \

src/test/fuzz/FuzzedDataProvider.h

+245
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,245 @@
1+
//===- FuzzedDataProvider.h - Utility header for fuzz targets ---*- C++ -* ===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
// A single header library providing an utility class to break up an array of
9+
// bytes. Whenever run on the same input, provides the same output, as long as
10+
// its methods are called in the same order, with the same arguments.
11+
//===----------------------------------------------------------------------===//
12+
13+
#ifndef LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_
14+
#define LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_
15+
16+
#include <limits.h>
17+
#include <stddef.h>
18+
#include <stdint.h>
19+
20+
#include <algorithm>
21+
#include <cstring>
22+
#include <initializer_list>
23+
#include <string>
24+
#include <type_traits>
25+
#include <utility>
26+
#include <vector>
27+
28+
class FuzzedDataProvider {
29+
public:
30+
// |data| is an array of length |size| that the FuzzedDataProvider wraps to
31+
// provide more granular access. |data| must outlive the FuzzedDataProvider.
32+
FuzzedDataProvider(const uint8_t *data, size_t size)
33+
: data_ptr_(data), remaining_bytes_(size) {}
34+
~FuzzedDataProvider() = default;
35+
36+
// Returns a std::vector containing |num_bytes| of input data. If fewer than
37+
// |num_bytes| of data remain, returns a shorter std::vector containing all
38+
// of the data that's left. Can be used with any byte sized type, such as
39+
// char, unsigned char, uint8_t, etc.
40+
template <typename T> std::vector<T> ConsumeBytes(size_t num_bytes) {
41+
num_bytes = std::min(num_bytes, remaining_bytes_);
42+
return ConsumeBytes<T>(num_bytes, num_bytes);
43+
}
44+
45+
// Similar to |ConsumeBytes|, but also appends the terminator value at the end
46+
// of the resulting vector. Useful, when a mutable null-terminated C-string is
47+
// needed, for example. But that is a rare case. Better avoid it, if possible,
48+
// and prefer using |ConsumeBytes| or |ConsumeBytesAsString| methods.
49+
template <typename T>
50+
std::vector<T> ConsumeBytesWithTerminator(size_t num_bytes,
51+
T terminator = 0) {
52+
num_bytes = std::min(num_bytes, remaining_bytes_);
53+
std::vector<T> result = ConsumeBytes<T>(num_bytes + 1, num_bytes);
54+
result.back() = terminator;
55+
return result;
56+
}
57+
58+
// Returns a std::string containing |num_bytes| of input data. Using this and
59+
// |.c_str()| on the resulting string is the best way to get an immutable
60+
// null-terminated C string. If fewer than |num_bytes| of data remain, returns
61+
// a shorter std::string containing all of the data that's left.
62+
std::string ConsumeBytesAsString(size_t num_bytes) {
63+
static_assert(sizeof(std::string::value_type) == sizeof(uint8_t),
64+
"ConsumeBytesAsString cannot convert the data to a string.");
65+
66+
num_bytes = std::min(num_bytes, remaining_bytes_);
67+
std::string result(
68+
reinterpret_cast<const std::string::value_type *>(data_ptr_),
69+
num_bytes);
70+
Advance(num_bytes);
71+
return result;
72+
}
73+
74+
// Returns a number in the range [min, max] by consuming bytes from the
75+
// input data. The value might not be uniformly distributed in the given
76+
// range. If there's no input data left, always returns |min|. |min| must
77+
// be less than or equal to |max|.
78+
template <typename T> T ConsumeIntegralInRange(T min, T max) {
79+
static_assert(std::is_integral<T>::value, "An integral type is required.");
80+
static_assert(sizeof(T) <= sizeof(uint64_t), "Unsupported integral type.");
81+
82+
if (min > max)
83+
abort();
84+
85+
// Use the biggest type possible to hold the range and the result.
86+
uint64_t range = static_cast<uint64_t>(max) - min;
87+
uint64_t result = 0;
88+
size_t offset = 0;
89+
90+
while (offset < sizeof(T) * CHAR_BIT && (range >> offset) > 0 &&
91+
remaining_bytes_ != 0) {
92+
// Pull bytes off the end of the seed data. Experimentally, this seems to
93+
// allow the fuzzer to more easily explore the input space. This makes
94+
// sense, since it works by modifying inputs that caused new code to run,
95+
// and this data is often used to encode length of data read by
96+
// |ConsumeBytes|. Separating out read lengths makes it easier modify the
97+
// contents of the data that is actually read.
98+
--remaining_bytes_;
99+
result = (result << CHAR_BIT) | data_ptr_[remaining_bytes_];
100+
offset += CHAR_BIT;
101+
}
102+
103+
// Avoid division by 0, in case |range + 1| results in overflow.
104+
if (range != std::numeric_limits<decltype(range)>::max())
105+
result = result % (range + 1);
106+
107+
return static_cast<T>(min + result);
108+
}
109+
110+
// Returns a std::string of length from 0 to |max_length|. When it runs out of
111+
// input data, returns what remains of the input. Designed to be more stable
112+
// with respect to a fuzzer inserting characters than just picking a random
113+
// length and then consuming that many bytes with |ConsumeBytes|.
114+
std::string ConsumeRandomLengthString(size_t max_length) {
115+
// Reads bytes from the start of |data_ptr_|. Maps "\\" to "\", and maps "\"
116+
// followed by anything else to the end of the string. As a result of this
117+
// logic, a fuzzer can insert characters into the string, and the string
118+
// will be lengthened to include those new characters, resulting in a more
119+
// stable fuzzer than picking the length of a string independently from
120+
// picking its contents.
121+
std::string result;
122+
123+
// Reserve the anticipated capaticity to prevent several reallocations.
124+
result.reserve(std::min(max_length, remaining_bytes_));
125+
for (size_t i = 0; i < max_length && remaining_bytes_ != 0; ++i) {
126+
char next = ConvertUnsignedToSigned<char>(data_ptr_[0]);
127+
Advance(1);
128+
if (next == '\\' && remaining_bytes_ != 0) {
129+
next = ConvertUnsignedToSigned<char>(data_ptr_[0]);
130+
Advance(1);
131+
if (next != '\\')
132+
break;
133+
}
134+
result += next;
135+
}
136+
137+
result.shrink_to_fit();
138+
return result;
139+
}
140+
141+
// Returns a std::vector containing all remaining bytes of the input data.
142+
template <typename T> std::vector<T> ConsumeRemainingBytes() {
143+
return ConsumeBytes<T>(remaining_bytes_);
144+
}
145+
146+
// Prefer using |ConsumeRemainingBytes| unless you actually need a std::string
147+
// object.
148+
// Returns a std::vector containing all remaining bytes of the input data.
149+
std::string ConsumeRemainingBytesAsString() {
150+
return ConsumeBytesAsString(remaining_bytes_);
151+
}
152+
153+
// Returns a number in the range [Type's min, Type's max]. The value might
154+
// not be uniformly distributed in the given range. If there's no input data
155+
// left, always returns |min|.
156+
template <typename T> T ConsumeIntegral() {
157+
return ConsumeIntegralInRange(std::numeric_limits<T>::min(),
158+
std::numeric_limits<T>::max());
159+
}
160+
161+
// Reads one byte and returns a bool, or false when no data remains.
162+
bool ConsumeBool() { return 1 & ConsumeIntegral<uint8_t>(); }
163+
164+
// Returns a copy of a value selected from a fixed-size |array|.
165+
template <typename T, size_t size>
166+
T PickValueInArray(const T (&array)[size]) {
167+
static_assert(size > 0, "The array must be non empty.");
168+
return array[ConsumeIntegralInRange<size_t>(0, size - 1)];
169+
}
170+
171+
template <typename T>
172+
T PickValueInArray(std::initializer_list<const T> list) {
173+
// static_assert(list.size() > 0, "The array must be non empty.");
174+
return *(list.begin() + ConsumeIntegralInRange<size_t>(0, list.size() - 1));
175+
}
176+
177+
// Return an enum value. The enum must start at 0 and be contiguous. It must
178+
// also contain |kMaxValue| aliased to its largest (inclusive) value. Such as:
179+
// enum class Foo { SomeValue, OtherValue, kMaxValue = OtherValue };
180+
template <typename T> T ConsumeEnum() {
181+
static_assert(std::is_enum<T>::value, "|T| must be an enum type.");
182+
return static_cast<T>(ConsumeIntegralInRange<uint32_t>(
183+
0, static_cast<uint32_t>(T::kMaxValue)));
184+
}
185+
186+
// Reports the remaining bytes available for fuzzed input.
187+
size_t remaining_bytes() { return remaining_bytes_; }
188+
189+
private:
190+
FuzzedDataProvider(const FuzzedDataProvider &) = delete;
191+
FuzzedDataProvider &operator=(const FuzzedDataProvider &) = delete;
192+
193+
void Advance(size_t num_bytes) {
194+
if (num_bytes > remaining_bytes_)
195+
abort();
196+
197+
data_ptr_ += num_bytes;
198+
remaining_bytes_ -= num_bytes;
199+
}
200+
201+
template <typename T>
202+
std::vector<T> ConsumeBytes(size_t size, size_t num_bytes_to_consume) {
203+
static_assert(sizeof(T) == sizeof(uint8_t), "Incompatible data type.");
204+
205+
// The point of using the size-based constructor below is to increase the
206+
// odds of having a vector object with capacity being equal to the length.
207+
// That part is always implementation specific, but at least both libc++ and
208+
// libstdc++ allocate the requested number of bytes in that constructor,
209+
// which seems to be a natural choice for other implementations as well.
210+
// To increase the odds even more, we also call |shrink_to_fit| below.
211+
std::vector<T> result(size);
212+
std::memcpy(result.data(), data_ptr_, num_bytes_to_consume);
213+
Advance(num_bytes_to_consume);
214+
215+
// Even though |shrink_to_fit| is also implementation specific, we expect it
216+
// to provide an additional assurance in case vector's constructor allocated
217+
// a buffer which is larger than the actual amount of data we put inside it.
218+
result.shrink_to_fit();
219+
return result;
220+
}
221+
222+
template <typename TS, typename TU> TS ConvertUnsignedToSigned(TU value) {
223+
static_assert(sizeof(TS) == sizeof(TU), "Incompatible data types.");
224+
static_assert(!std::numeric_limits<TU>::is_signed,
225+
"Source type must be unsigned.");
226+
227+
// TODO(Dor1s): change to `if constexpr` once C++17 becomes mainstream.
228+
if (std::numeric_limits<TS>::is_modulo)
229+
return static_cast<TS>(value);
230+
231+
// Avoid using implementation-defined unsigned to signer conversions.
232+
// To learn more, see https://stackoverflow.com/questions/13150449.
233+
if (value <= std::numeric_limits<TS>::max())
234+
return static_cast<TS>(value);
235+
else {
236+
constexpr auto TS_min = std::numeric_limits<TS>::min();
237+
return TS_min + static_cast<char>(value - TS_min);
238+
}
239+
}
240+
241+
const uint8_t *data_ptr_;
242+
size_t remaining_bytes_;
243+
};
244+
245+
#endif // LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_

test/lint/lint-filenames.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ export LC_ALL=C
1212
EXIT_CODE=0
1313
OUTPUT=$(git ls-files --full-name -- "*.[cC][pP][pP]" "*.[hH]" "*.[pP][yY]" "*.[sS][hH]" | \
1414
grep -vE '^[a-z0-9_./-]+$' | \
15-
grep -vE '^src/(secp256k1|univalue)/')
15+
grep -vE '^src/(secp256k1/|univalue/|test/fuzz/FuzzedDataProvider.h)')
1616

1717
if [[ ${OUTPUT} != "" ]]; then
1818
echo "Use only lowercase alphanumerics (a-z0-9), underscores (_), hyphens (-) and dots (.)"

test/lint/lint-include-guards.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ export LC_ALL=C
1010
HEADER_ID_PREFIX="BITCOIN_"
1111
HEADER_ID_SUFFIX="_H"
1212

13-
REGEXP_EXCLUDE_FILES_WITH_PREFIX="src/(crypto/ctaes/|leveldb/|secp256k1/|tinyformat.h|univalue/)"
13+
REGEXP_EXCLUDE_FILES_WITH_PREFIX="src/(crypto/ctaes/|leveldb/|secp256k1/|test/fuzz/FuzzedDataProvider.h|tinyformat.h|univalue/)"
1414

1515
EXIT_CODE=0
1616
for HEADER_FILE in $(git ls-files -- "*.h" | grep -vE "^${REGEXP_EXCLUDE_FILES_WITH_PREFIX}")

0 commit comments

Comments
 (0)