contrib/fuzz/manifest.cc
author Manuel Jacob <me@manueljacob.de>
Mon, 11 Jul 2022 01:51:20 +0200
branchstable
changeset 49378 094a5fa3cf52
parent 44986 0ff59434af72
permissions -rw-r--r--
procutil: make stream detection in make_line_buffered more correct and strict In make_line_buffered(), we don’t want to wrap the stream if we know that lines get flushed to the underlying raw stream already. Previously, the heuristic was too optimistic. It assumed that any stream which is not an instance of io.BufferedIOBase doesn’t need wrapping. However, there are buffered streams that aren’t instances of io.BufferedIOBase, like Mercurial’s own winstdout. The new logic is different in two ways: First, only for the check, if unwraps any combination of WriteAllWrapper and winstdout. Second, it skips wrapping the stream only if it is an instance of io.RawIOBase (or already wrapped). If it is an instance of io.BufferedIOBase, it gets wrapped. In any other case, the function raises an exception. This ensures that, if an unknown stream is passed or we add another wrapper in the future, we don’t wrap the stream if it’s already line buffered or not wrap the stream if it’s not line buffered. In fact, this was already helpful during development of this change. Without it, I possibly would have forgot that WriteAllWrapper needs to be ignored for the check, leading to unnecessary wrapping if stdout is unbuffered. The alternative would have been to always wrap unknown streams. However, I don’t think that anyone would benefit from being less strict. We can expect streams from the standard library to be subclassing either io.RawIOBase or io.BufferedIOBase, so running Mercurial in the standard way should not regress by this change. Py2exe might replace sys.stdout and sys.stderr, but that currently breaks Mercurial anyway and also these streams don’t claim to be interactive, so this function is not called for them.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
40053
8c692a6b5ad1 fuzz: new fuzzer for cext/manifest.c
Augie Fackler <augie@google.com>
parents:
diff changeset
     1
#include <Python.h>
8c692a6b5ad1 fuzz: new fuzzer for cext/manifest.c
Augie Fackler <augie@google.com>
parents:
diff changeset
     2
#include <assert.h>
8c692a6b5ad1 fuzz: new fuzzer for cext/manifest.c
Augie Fackler <augie@google.com>
parents:
diff changeset
     3
#include <stdlib.h>
8c692a6b5ad1 fuzz: new fuzzer for cext/manifest.c
Augie Fackler <augie@google.com>
parents:
diff changeset
     4
#include <unistd.h>
8c692a6b5ad1 fuzz: new fuzzer for cext/manifest.c
Augie Fackler <augie@google.com>
parents:
diff changeset
     5
44986
0ff59434af72 fuzz: tell manifest fuzzer about longer node hashes
Augie Fackler <augie@google.com>
parents: 43859
diff changeset
     6
#include "FuzzedDataProvider.h"
41013
ef103c96ed33 fuzz: extract Python initialization to utility package
Augie Fackler <augie@google.com>
parents: 40373
diff changeset
     7
#include "pyutil.h"
ef103c96ed33 fuzz: extract Python initialization to utility package
Augie Fackler <augie@google.com>
parents: 40373
diff changeset
     8
40053
8c692a6b5ad1 fuzz: new fuzzer for cext/manifest.c
Augie Fackler <augie@google.com>
parents:
diff changeset
     9
#include <string>
8c692a6b5ad1 fuzz: new fuzzer for cext/manifest.c
Augie Fackler <augie@google.com>
parents:
diff changeset
    10
8c692a6b5ad1 fuzz: new fuzzer for cext/manifest.c
Augie Fackler <augie@google.com>
parents:
diff changeset
    11
extern "C" {
8c692a6b5ad1 fuzz: new fuzzer for cext/manifest.c
Augie Fackler <augie@google.com>
parents:
diff changeset
    12
43859
8766728dbce6 fuzz: add support for fuzzing under either Python 2 or 3
Augie Fackler <augie@google.com>
parents: 41312
diff changeset
    13
static PYCODETYPE *code;
40373
c3ab0a89331d fuzz: move many initialization steps into LLVMFuzzerInitialize
Augie Fackler <augie@google.com>
parents: 40280
diff changeset
    14
40053
8c692a6b5ad1 fuzz: new fuzzer for cext/manifest.c
Augie Fackler <augie@google.com>
parents:
diff changeset
    15
extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv)
8c692a6b5ad1 fuzz: new fuzzer for cext/manifest.c
Augie Fackler <augie@google.com>
parents:
diff changeset
    16
{
41013
ef103c96ed33 fuzz: extract Python initialization to utility package
Augie Fackler <augie@google.com>
parents: 40373
diff changeset
    17
	contrib::initpy(*argv[0]);
43859
8766728dbce6 fuzz: add support for fuzzing under either Python 2 or 3
Augie Fackler <augie@google.com>
parents: 41312
diff changeset
    18
	code = (PYCODETYPE *)Py_CompileString(R"py(
40053
8c692a6b5ad1 fuzz: new fuzzer for cext/manifest.c
Augie Fackler <augie@google.com>
parents:
diff changeset
    19
try:
43859
8766728dbce6 fuzz: add support for fuzzing under either Python 2 or 3
Augie Fackler <augie@google.com>
parents: 41312
diff changeset
    20
  lm = parsers.lazymanifest(mdata)
40053
8c692a6b5ad1 fuzz: new fuzzer for cext/manifest.c
Augie Fackler <augie@google.com>
parents:
diff changeset
    21
  # iterate the whole thing, which causes the code to fully parse
8c692a6b5ad1 fuzz: new fuzzer for cext/manifest.c
Augie Fackler <augie@google.com>
parents:
diff changeset
    22
  # every line in the manifest
41312
d60bd5c71cbb fuzz: exercise more of the lazymanifest code
Augie Fackler <raf@durin42.com>
parents: 41311
diff changeset
    23
  for e, _, _ in lm.iterentries():
d60bd5c71cbb fuzz: exercise more of the lazymanifest code
Augie Fackler <raf@durin42.com>
parents: 41311
diff changeset
    24
      # also exercise __getitem__ et al
d60bd5c71cbb fuzz: exercise more of the lazymanifest code
Augie Fackler <raf@durin42.com>
parents: 41311
diff changeset
    25
      lm[e]
d60bd5c71cbb fuzz: exercise more of the lazymanifest code
Augie Fackler <raf@durin42.com>
parents: 41311
diff changeset
    26
      e in lm
d60bd5c71cbb fuzz: exercise more of the lazymanifest code
Augie Fackler <raf@durin42.com>
parents: 41311
diff changeset
    27
      (e + 'nope') in lm
44986
0ff59434af72 fuzz: tell manifest fuzzer about longer node hashes
Augie Fackler <augie@google.com>
parents: 43859
diff changeset
    28
  lm[b'xyzzy'] = (b'\0' * nlen, 'x')
40053
8c692a6b5ad1 fuzz: new fuzzer for cext/manifest.c
Augie Fackler <augie@google.com>
parents:
diff changeset
    29
  # do an insert, text should change
8c692a6b5ad1 fuzz: new fuzzer for cext/manifest.c
Augie Fackler <augie@google.com>
parents:
diff changeset
    30
  assert lm.text() != mdata, "insert should change text and didn't: %r %r" % (lm.text(), mdata)
41312
d60bd5c71cbb fuzz: exercise more of the lazymanifest code
Augie Fackler <raf@durin42.com>
parents: 41311
diff changeset
    31
  cloned = lm.filtercopy(lambda x: x != 'xyzzy')
d60bd5c71cbb fuzz: exercise more of the lazymanifest code
Augie Fackler <raf@durin42.com>
parents: 41311
diff changeset
    32
  assert cloned.text() == mdata, 'cloned text should equal mdata'
d60bd5c71cbb fuzz: exercise more of the lazymanifest code
Augie Fackler <raf@durin42.com>
parents: 41311
diff changeset
    33
  cloned.diff(lm)
40053
8c692a6b5ad1 fuzz: new fuzzer for cext/manifest.c
Augie Fackler <augie@google.com>
parents:
diff changeset
    34
  del lm[b'xyzzy']
41312
d60bd5c71cbb fuzz: exercise more of the lazymanifest code
Augie Fackler <raf@durin42.com>
parents: 41311
diff changeset
    35
  cloned.diff(lm)
40053
8c692a6b5ad1 fuzz: new fuzzer for cext/manifest.c
Augie Fackler <augie@google.com>
parents:
diff changeset
    36
  # should be back to the same
8c692a6b5ad1 fuzz: new fuzzer for cext/manifest.c
Augie Fackler <augie@google.com>
parents:
diff changeset
    37
  assert lm.text() == mdata, "delete should have restored text but didn't: %r %r" % (lm.text(), mdata)
8c692a6b5ad1 fuzz: new fuzzer for cext/manifest.c
Augie Fackler <augie@google.com>
parents:
diff changeset
    38
except Exception as e:
8c692a6b5ad1 fuzz: new fuzzer for cext/manifest.c
Augie Fackler <augie@google.com>
parents:
diff changeset
    39
  pass
8c692a6b5ad1 fuzz: new fuzzer for cext/manifest.c
Augie Fackler <augie@google.com>
parents:
diff changeset
    40
  # uncomment this print if you're editing this Python code
8c692a6b5ad1 fuzz: new fuzzer for cext/manifest.c
Augie Fackler <augie@google.com>
parents:
diff changeset
    41
  # to debug failures.
8c692a6b5ad1 fuzz: new fuzzer for cext/manifest.c
Augie Fackler <augie@google.com>
parents:
diff changeset
    42
  # print e
8c692a6b5ad1 fuzz: new fuzzer for cext/manifest.c
Augie Fackler <augie@google.com>
parents:
diff changeset
    43
)py",
43859
8766728dbce6 fuzz: add support for fuzzing under either Python 2 or 3
Augie Fackler <augie@google.com>
parents: 41312
diff changeset
    44
	                                      "fuzzer", Py_file_input);
40373
c3ab0a89331d fuzz: move many initialization steps into LLVMFuzzerInitialize
Augie Fackler <augie@google.com>
parents: 40280
diff changeset
    45
	return 0;
c3ab0a89331d fuzz: move many initialization steps into LLVMFuzzerInitialize
Augie Fackler <augie@google.com>
parents: 40280
diff changeset
    46
}
c3ab0a89331d fuzz: move many initialization steps into LLVMFuzzerInitialize
Augie Fackler <augie@google.com>
parents: 40280
diff changeset
    47
c3ab0a89331d fuzz: move many initialization steps into LLVMFuzzerInitialize
Augie Fackler <augie@google.com>
parents: 40280
diff changeset
    48
int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size)
c3ab0a89331d fuzz: move many initialization steps into LLVMFuzzerInitialize
Augie Fackler <augie@google.com>
parents: 40280
diff changeset
    49
{
41311
44cd432aed9f fuzz: restrict manifest input size
Augie Fackler <raf@durin42.com>
parents: 41013
diff changeset
    50
	// Don't allow fuzzer inputs larger than 100k, since we'll just bog
44cd432aed9f fuzz: restrict manifest input size
Augie Fackler <raf@durin42.com>
parents: 41013
diff changeset
    51
	// down and not accomplish much.
44cd432aed9f fuzz: restrict manifest input size
Augie Fackler <raf@durin42.com>
parents: 41013
diff changeset
    52
	if (Size > 100000) {
44cd432aed9f fuzz: restrict manifest input size
Augie Fackler <raf@durin42.com>
parents: 41013
diff changeset
    53
		return 0;
44cd432aed9f fuzz: restrict manifest input size
Augie Fackler <raf@durin42.com>
parents: 41013
diff changeset
    54
	}
44986
0ff59434af72 fuzz: tell manifest fuzzer about longer node hashes
Augie Fackler <augie@google.com>
parents: 43859
diff changeset
    55
	FuzzedDataProvider provider(Data, Size);
0ff59434af72 fuzz: tell manifest fuzzer about longer node hashes
Augie Fackler <augie@google.com>
parents: 43859
diff changeset
    56
	Py_ssize_t nodelength = provider.ConsumeBool() ? 20 : 32;
0ff59434af72 fuzz: tell manifest fuzzer about longer node hashes
Augie Fackler <augie@google.com>
parents: 43859
diff changeset
    57
	PyObject *nlen = PyLong_FromSsize_t(nodelength);
40373
c3ab0a89331d fuzz: move many initialization steps into LLVMFuzzerInitialize
Augie Fackler <augie@google.com>
parents: 40280
diff changeset
    58
	PyObject *mtext =
c3ab0a89331d fuzz: move many initialization steps into LLVMFuzzerInitialize
Augie Fackler <augie@google.com>
parents: 40280
diff changeset
    59
	    PyBytes_FromStringAndSize((const char *)Data, (Py_ssize_t)Size);
c3ab0a89331d fuzz: move many initialization steps into LLVMFuzzerInitialize
Augie Fackler <augie@google.com>
parents: 40280
diff changeset
    60
	PyObject *locals = PyDict_New();
c3ab0a89331d fuzz: move many initialization steps into LLVMFuzzerInitialize
Augie Fackler <augie@google.com>
parents: 40280
diff changeset
    61
	PyDict_SetItemString(locals, "mdata", mtext);
44986
0ff59434af72 fuzz: tell manifest fuzzer about longer node hashes
Augie Fackler <augie@google.com>
parents: 43859
diff changeset
    62
	PyDict_SetItemString(locals, "nlen", nlen);
41013
ef103c96ed33 fuzz: extract Python initialization to utility package
Augie Fackler <augie@google.com>
parents: 40373
diff changeset
    63
	PyObject *res = PyEval_EvalCode(code, contrib::pyglobals(), locals);
40100
ca4a32d0a4d6 fuzz: report error if Python code raised exception
Yuya Nishihara <yuya@tcha.org>
parents: 40089
diff changeset
    64
	if (!res) {
ca4a32d0a4d6 fuzz: report error if Python code raised exception
Yuya Nishihara <yuya@tcha.org>
parents: 40089
diff changeset
    65
		PyErr_Print();
ca4a32d0a4d6 fuzz: report error if Python code raised exception
Yuya Nishihara <yuya@tcha.org>
parents: 40089
diff changeset
    66
	}
ca4a32d0a4d6 fuzz: report error if Python code raised exception
Yuya Nishihara <yuya@tcha.org>
parents: 40089
diff changeset
    67
	Py_XDECREF(res);
40053
8c692a6b5ad1 fuzz: new fuzzer for cext/manifest.c
Augie Fackler <augie@google.com>
parents:
diff changeset
    68
	Py_DECREF(locals);
8c692a6b5ad1 fuzz: new fuzzer for cext/manifest.c
Augie Fackler <augie@google.com>
parents:
diff changeset
    69
	Py_DECREF(mtext);
8c692a6b5ad1 fuzz: new fuzzer for cext/manifest.c
Augie Fackler <augie@google.com>
parents:
diff changeset
    70
	return 0; // Non-zero return values are reserved for future use.
8c692a6b5ad1 fuzz: new fuzzer for cext/manifest.c
Augie Fackler <augie@google.com>
parents:
diff changeset
    71
}
8c692a6b5ad1 fuzz: new fuzzer for cext/manifest.c
Augie Fackler <augie@google.com>
parents:
diff changeset
    72
}