|
1 //===- FuzzedDataProvider.h - Utility header for fuzz targets ---*- C++ -* ===// |
|
2 // |
|
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
|
4 // See https://llvm.org/LICENSE.txt for license information. |
|
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
|
6 // |
|
7 //===----------------------------------------------------------------------===// |
|
8 // A single header library providing an utility class to break up an array of |
|
9 // bytes. Whenever run on the same input, provides the same output, as long as |
|
10 // its methods are called in the same order, with the same arguments. |
|
11 //===----------------------------------------------------------------------===// |
|
12 |
|
13 #ifndef LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_ |
|
14 #define LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_ |
|
15 |
|
16 #include <algorithm> |
|
17 #include <climits> |
|
18 #include <cstddef> |
|
19 #include <cstdint> |
|
20 #include <cstring> |
|
21 #include <initializer_list> |
|
22 #include <string> |
|
23 #include <type_traits> |
|
24 #include <utility> |
|
25 #include <vector> |
|
26 |
|
27 // In addition to the comments below, the API is also briefly documented at |
|
28 // https://github.com/google/fuzzing/blob/master/docs/split-inputs.md#fuzzed-data-provider |
|
29 class FuzzedDataProvider |
|
30 { |
|
31 public: |
|
32 // |data| is an array of length |size| that the FuzzedDataProvider wraps |
|
33 // to provide more granular access. |data| must outlive the |
|
34 // FuzzedDataProvider. |
|
35 FuzzedDataProvider(const uint8_t *data, size_t size) |
|
36 : data_ptr_(data), remaining_bytes_(size) |
|
37 { |
|
38 } |
|
39 ~FuzzedDataProvider() = default; |
|
40 |
|
41 // Returns a std::vector containing |num_bytes| of input data. If fewer |
|
42 // than |num_bytes| of data remain, returns a shorter std::vector |
|
43 // containing all of the data that's left. Can be used with any byte |
|
44 // sized type, such as char, unsigned char, uint8_t, etc. |
|
45 template <typename T> std::vector<T> ConsumeBytes(size_t num_bytes) |
|
46 { |
|
47 num_bytes = std::min(num_bytes, remaining_bytes_); |
|
48 return ConsumeBytes<T>(num_bytes, num_bytes); |
|
49 } |
|
50 |
|
51 // Similar to |ConsumeBytes|, but also appends the terminator value at |
|
52 // the end of the resulting vector. Useful, when a mutable |
|
53 // null-terminated C-string is needed, for example. But that is a rare |
|
54 // case. Better avoid it, if possible, and prefer using |ConsumeBytes| |
|
55 // or |ConsumeBytesAsString| methods. |
|
56 template <typename T> |
|
57 std::vector<T> ConsumeBytesWithTerminator(size_t num_bytes, |
|
58 T terminator = 0) |
|
59 { |
|
60 num_bytes = std::min(num_bytes, remaining_bytes_); |
|
61 std::vector<T> result = |
|
62 ConsumeBytes<T>(num_bytes + 1, num_bytes); |
|
63 result.back() = terminator; |
|
64 return result; |
|
65 } |
|
66 |
|
67 // Returns a std::string containing |num_bytes| of input data. Using |
|
68 // this and |
|
69 // |.c_str()| on the resulting string is the best way to get an |
|
70 // immutable null-terminated C string. If fewer than |num_bytes| of data |
|
71 // remain, returns a shorter std::string containing all of the data |
|
72 // that's left. |
|
73 std::string ConsumeBytesAsString(size_t num_bytes) |
|
74 { |
|
75 static_assert(sizeof(std::string::value_type) == |
|
76 sizeof(uint8_t), |
|
77 "ConsumeBytesAsString cannot convert the data to " |
|
78 "a string."); |
|
79 |
|
80 num_bytes = std::min(num_bytes, remaining_bytes_); |
|
81 std::string result( |
|
82 reinterpret_cast<const std::string::value_type *>( |
|
83 data_ptr_), |
|
84 num_bytes); |
|
85 Advance(num_bytes); |
|
86 return result; |
|
87 } |
|
88 |
|
89 // Returns a number in the range [min, max] by consuming bytes from the |
|
90 // input data. The value might not be uniformly distributed in the given |
|
91 // range. If there's no input data left, always returns |min|. |min| |
|
92 // must be less than or equal to |max|. |
|
93 template <typename T> T ConsumeIntegralInRange(T min, T max) |
|
94 { |
|
95 static_assert(std::is_integral<T>::value, |
|
96 "An integral type is required."); |
|
97 static_assert(sizeof(T) <= sizeof(uint64_t), |
|
98 "Unsupported integral type."); |
|
99 |
|
100 if (min > max) |
|
101 abort(); |
|
102 |
|
103 // Use the biggest type possible to hold the range and the |
|
104 // result. |
|
105 uint64_t range = static_cast<uint64_t>(max) - min; |
|
106 uint64_t result = 0; |
|
107 size_t offset = 0; |
|
108 |
|
109 while (offset < sizeof(T) * CHAR_BIT && (range >> offset) > 0 && |
|
110 remaining_bytes_ != 0) { |
|
111 // Pull bytes off the end of the seed data. |
|
112 // Experimentally, this seems to allow the fuzzer to |
|
113 // more easily explore the input space. This makes |
|
114 // sense, since it works by modifying inputs that caused |
|
115 // new code to run, and this data is often used to |
|
116 // encode length of data read by |ConsumeBytes|. |
|
117 // Separating out read lengths makes it easier modify |
|
118 // the contents of the data that is actually read. |
|
119 --remaining_bytes_; |
|
120 result = |
|
121 (result << CHAR_BIT) | data_ptr_[remaining_bytes_]; |
|
122 offset += CHAR_BIT; |
|
123 } |
|
124 |
|
125 // Avoid division by 0, in case |range + 1| results in overflow. |
|
126 if (range != std::numeric_limits<decltype(range)>::max()) |
|
127 result = result % (range + 1); |
|
128 |
|
129 return static_cast<T>(min + result); |
|
130 } |
|
131 |
|
132 // Returns a std::string of length from 0 to |max_length|. When it runs |
|
133 // out of input data, returns what remains of the input. Designed to be |
|
134 // more stable with respect to a fuzzer inserting characters than just |
|
135 // picking a random length and then consuming that many bytes with |
|
136 // |ConsumeBytes|. |
|
137 std::string ConsumeRandomLengthString(size_t max_length) |
|
138 { |
|
139 // Reads bytes from the start of |data_ptr_|. Maps "\\" to "\", |
|
140 // and maps "\" followed by anything else to the end of the |
|
141 // string. As a result of this logic, a fuzzer can insert |
|
142 // characters into the string, and the string will be lengthened |
|
143 // to include those new characters, resulting in a more stable |
|
144 // fuzzer than picking the length of a string independently from |
|
145 // picking its contents. |
|
146 std::string result; |
|
147 |
|
148 // Reserve the anticipated capaticity to prevent several |
|
149 // reallocations. |
|
150 result.reserve(std::min(max_length, remaining_bytes_)); |
|
151 for (size_t i = 0; i < max_length && remaining_bytes_ != 0; |
|
152 ++i) { |
|
153 char next = ConvertUnsignedToSigned<char>(data_ptr_[0]); |
|
154 Advance(1); |
|
155 if (next == '\\' && remaining_bytes_ != 0) { |
|
156 next = |
|
157 ConvertUnsignedToSigned<char>(data_ptr_[0]); |
|
158 Advance(1); |
|
159 if (next != '\\') |
|
160 break; |
|
161 } |
|
162 result += next; |
|
163 } |
|
164 |
|
165 result.shrink_to_fit(); |
|
166 return result; |
|
167 } |
|
168 |
|
169 // Returns a std::vector containing all remaining bytes of the input |
|
170 // data. |
|
171 template <typename T> std::vector<T> ConsumeRemainingBytes() |
|
172 { |
|
173 return ConsumeBytes<T>(remaining_bytes_); |
|
174 } |
|
175 |
|
176 // Returns a std::string containing all remaining bytes of the input |
|
177 // data. Prefer using |ConsumeRemainingBytes| unless you actually need a |
|
178 // std::string object. |
|
179 std::string ConsumeRemainingBytesAsString() |
|
180 { |
|
181 return ConsumeBytesAsString(remaining_bytes_); |
|
182 } |
|
183 |
|
184 // Returns a number in the range [Type's min, Type's max]. The value |
|
185 // might not be uniformly distributed in the given range. If there's no |
|
186 // input data left, always returns |min|. |
|
187 template <typename T> T ConsumeIntegral() |
|
188 { |
|
189 return ConsumeIntegralInRange(std::numeric_limits<T>::min(), |
|
190 std::numeric_limits<T>::max()); |
|
191 } |
|
192 |
|
193 // Reads one byte and returns a bool, or false when no data remains. |
|
194 bool ConsumeBool() |
|
195 { |
|
196 return 1 & ConsumeIntegral<uint8_t>(); |
|
197 } |
|
198 |
|
199 // Returns a copy of the value selected from the given fixed-size |
|
200 // |array|. |
|
201 template <typename T, size_t size> |
|
202 T PickValueInArray(const T (&array)[size]) |
|
203 { |
|
204 static_assert(size > 0, "The array must be non empty."); |
|
205 return array[ConsumeIntegralInRange<size_t>(0, size - 1)]; |
|
206 } |
|
207 |
|
208 template <typename T> |
|
209 T PickValueInArray(std::initializer_list<const T> list) |
|
210 { |
|
211 // TODO(Dor1s): switch to static_assert once C++14 is allowed. |
|
212 if (!list.size()) |
|
213 abort(); |
|
214 |
|
215 return *(list.begin() + |
|
216 ConsumeIntegralInRange<size_t>(0, list.size() - 1)); |
|
217 } |
|
218 |
|
219 // Returns an enum value. The enum must start at 0 and be contiguous. It |
|
220 // must also contain |kMaxValue| aliased to its largest (inclusive) |
|
221 // value. Such as: enum class Foo { SomeValue, OtherValue, kMaxValue = |
|
222 // OtherValue }; |
|
223 template <typename T> T ConsumeEnum() |
|
224 { |
|
225 static_assert(std::is_enum<T>::value, |
|
226 "|T| must be an enum type."); |
|
227 return static_cast<T>(ConsumeIntegralInRange<uint32_t>( |
|
228 0, static_cast<uint32_t>(T::kMaxValue))); |
|
229 } |
|
230 |
|
231 // Returns a floating point number in the range [0.0, 1.0]. If there's |
|
232 // no input data left, always returns 0. |
|
233 template <typename T> T ConsumeProbability() |
|
234 { |
|
235 static_assert(std::is_floating_point<T>::value, |
|
236 "A floating point type is required."); |
|
237 |
|
238 // Use different integral types for different floating point |
|
239 // types in order to provide better density of the resulting |
|
240 // values. |
|
241 using IntegralType = |
|
242 typename std::conditional<(sizeof(T) <= sizeof(uint32_t)), |
|
243 uint32_t, uint64_t>::type; |
|
244 |
|
245 T result = static_cast<T>(ConsumeIntegral<IntegralType>()); |
|
246 result /= |
|
247 static_cast<T>(std::numeric_limits<IntegralType>::max()); |
|
248 return result; |
|
249 } |
|
250 |
|
251 // Returns a floating point value in the range [Type's lowest, Type's |
|
252 // max] by consuming bytes from the input data. If there's no input data |
|
253 // left, always returns approximately 0. |
|
254 template <typename T> T ConsumeFloatingPoint() |
|
255 { |
|
256 return ConsumeFloatingPointInRange<T>( |
|
257 std::numeric_limits<T>::lowest(), |
|
258 std::numeric_limits<T>::max()); |
|
259 } |
|
260 |
|
261 // Returns a floating point value in the given range by consuming bytes |
|
262 // from the input data. If there's no input data left, returns |min|. |
|
263 // Note that |min| must be less than or equal to |max|. |
|
264 template <typename T> T ConsumeFloatingPointInRange(T min, T max) |
|
265 { |
|
266 if (min > max) |
|
267 abort(); |
|
268 |
|
269 T range = .0; |
|
270 T result = min; |
|
271 constexpr T zero(.0); |
|
272 if (max > zero && min < zero && |
|
273 max > min + std::numeric_limits<T>::max()) { |
|
274 // The diff |max - min| would overflow the given |
|
275 // floating point type. Use the half of the diff as the |
|
276 // range and consume a bool to decide whether the result |
|
277 // is in the first of the second part of the diff. |
|
278 range = (max / 2.0) - (min / 2.0); |
|
279 if (ConsumeBool()) { |
|
280 result += range; |
|
281 } |
|
282 } else { |
|
283 range = max - min; |
|
284 } |
|
285 |
|
286 return result + range * ConsumeProbability<T>(); |
|
287 } |
|
288 |
|
289 // Reports the remaining bytes available for fuzzed input. |
|
290 size_t remaining_bytes() |
|
291 { |
|
292 return remaining_bytes_; |
|
293 } |
|
294 |
|
295 private: |
|
296 FuzzedDataProvider(const FuzzedDataProvider &) = delete; |
|
297 FuzzedDataProvider &operator=(const FuzzedDataProvider &) = delete; |
|
298 |
|
299 void Advance(size_t num_bytes) |
|
300 { |
|
301 if (num_bytes > remaining_bytes_) |
|
302 abort(); |
|
303 |
|
304 data_ptr_ += num_bytes; |
|
305 remaining_bytes_ -= num_bytes; |
|
306 } |
|
307 |
|
308 template <typename T> |
|
309 std::vector<T> ConsumeBytes(size_t size, size_t num_bytes_to_consume) |
|
310 { |
|
311 static_assert(sizeof(T) == sizeof(uint8_t), |
|
312 "Incompatible data type."); |
|
313 |
|
314 // The point of using the size-based constructor below is to |
|
315 // increase the odds of having a vector object with capacity |
|
316 // being equal to the length. That part is always implementation |
|
317 // specific, but at least both libc++ and libstdc++ allocate the |
|
318 // requested number of bytes in that constructor, which seems to |
|
319 // be a natural choice for other implementations as well. To |
|
320 // increase the odds even more, we also call |shrink_to_fit| |
|
321 // below. |
|
322 std::vector<T> result(size); |
|
323 if (size == 0) { |
|
324 if (num_bytes_to_consume != 0) |
|
325 abort(); |
|
326 return result; |
|
327 } |
|
328 |
|
329 std::memcpy(result.data(), data_ptr_, num_bytes_to_consume); |
|
330 Advance(num_bytes_to_consume); |
|
331 |
|
332 // Even though |shrink_to_fit| is also implementation specific, |
|
333 // we expect it to provide an additional assurance in case |
|
334 // vector's constructor allocated a buffer which is larger than |
|
335 // the actual amount of data we put inside it. |
|
336 result.shrink_to_fit(); |
|
337 return result; |
|
338 } |
|
339 |
|
340 template <typename TS, typename TU> TS ConvertUnsignedToSigned(TU value) |
|
341 { |
|
342 static_assert(sizeof(TS) == sizeof(TU), |
|
343 "Incompatible data types."); |
|
344 static_assert(!std::numeric_limits<TU>::is_signed, |
|
345 "Source type must be unsigned."); |
|
346 |
|
347 // TODO(Dor1s): change to `if constexpr` once C++17 becomes |
|
348 // mainstream. |
|
349 if (std::numeric_limits<TS>::is_modulo) |
|
350 return static_cast<TS>(value); |
|
351 |
|
352 // Avoid using implementation-defined unsigned to signer |
|
353 // conversions. To learn more, see |
|
354 // https://stackoverflow.com/questions/13150449. |
|
355 if (value <= std::numeric_limits<TS>::max()) { |
|
356 return static_cast<TS>(value); |
|
357 } else { |
|
358 constexpr auto TS_min = std::numeric_limits<TS>::min(); |
|
359 return TS_min + static_cast<char>(value - TS_min); |
|
360 } |
|
361 } |
|
362 |
|
363 const uint8_t *data_ptr_; |
|
364 size_t remaining_bytes_; |
|
365 }; |
|
366 |
|
367 #endif // LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_ |
|
368 // no-check-code since this is from a third party |