diff --git a/module-fuzzers/fuzz_helpers.h b/module-fuzzers/fuzz_helpers.h new file mode 100644 index 0000000..7d089a4 --- /dev/null +++ b/module-fuzzers/fuzz_helpers.h @@ -0,0 +1,156 @@ +// fuzz_helpers.h — Shared infrastructure for CPython fuzz targets. +// +// Each CPython fuzzer binary (.cpp) includes this header. Since each binary +// compiles exactly one .cpp file, all definitions here are safe (no ODR +// issues across translation units). + +#ifndef FUZZ_HELPERS_H_ +#define FUZZ_HELPERS_H_ + +#include +#include +#include +#include +#include + +// --------------------------------------------------------------------------- +// LibFuzzer hooks +// --------------------------------------------------------------------------- + +// Disable LeakSanitizer. CPython's pymalloc allocator uses custom freelists +// and arenas that LSAN cannot track, causing thousands of false-positive leak +// reports on every fuzzer iteration. +extern "C" int __lsan_is_turned_off(void) { return 1; } + +// Initialize the CPython interpreter. Called once by libFuzzer before the +// main fuzzing loop begins. +extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) { + PyConfig config; + PyConfig_InitPythonConfig(&config); + config.install_signal_handlers = 0; + config.int_max_str_digits = 8086; + PyStatus status; + status = + PyConfig_SetBytesString(&config, &config.program_name, *argv[0]); + if (PyStatus_Exception(status)) goto fail; + status = Py_InitializeFromConfig(&config); + if (PyStatus_Exception(status)) goto fail; + PyConfig_Clear(&config); + + // Suppress Python warnings globally — all fuzzers want this. + PyRun_SimpleString("import warnings; warnings.filterwarnings('ignore')"); + + return 0; +fail: + PyConfig_Clear(&config); + Py_ExitStatusException(status); +} + +// --------------------------------------------------------------------------- +// RAII wrapper and macros +// --------------------------------------------------------------------------- + +// RAII wrapper for PyObject*. Prevents reference leaks by calling Py_XDECREF +// in the destructor. Non-copyable, move-enabled. +struct PyRef { + PyObject *p; + PyRef(PyObject *o = nullptr) : p(o) {} + ~PyRef() { Py_XDECREF(p); } + operator PyObject *() const { return p; } + explicit operator bool() const { return p != nullptr; } + + PyRef(const PyRef &) = delete; + PyRef &operator=(const PyRef &) = delete; + PyRef(PyRef &&o) : p(o.p) { o.p = nullptr; } + PyRef &operator=(PyRef &&o) { + Py_XDECREF(p); + p = o.p; + o.p = nullptr; + return *this; + } +}; + +// Bail out of the current operation if a Python call returns NULL/false. +// Clears the pending Python exception so the next iteration starts clean. +#define CHECK(x) \ + do { \ + if (!(x)) { \ + PyErr_Clear(); \ + return; \ + } \ + } while (0) + +// Expand a std::string into (const char*, Py_ssize_t) for "y#" format codes. +#define Y(s) (s).data(), (Py_ssize_t)(s).size() + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +// Import mod.attr and return a new reference. Aborts on failure — called only +// during one-time init, so missing modules indicate a broken build. +static PyObject *import_attr(const char *mod, const char *attr) { + PyObject *m = PyImport_ImportModule(mod); + if (!m) { + PyErr_Print(); + abort(); + } + PyObject *a = PyObject_GetAttrString(m, attr); + Py_DECREF(m); + if (!a) { + PyErr_Print(); + abort(); + } + return a; +} + +// Convert raw fuzz bytes to a Python str using a fuzz-chosen decoding. +// Different decodings give the fuzzer control over different codepoint ranges: +// 0 — Latin-1: lossless 1:1 byte-to-codepoint (U+0000-U+00FF) +// 1 — UTF-8: variable-width, full Unicode (invalid bytes -> U+FFFD) +// 2 — UTF-16-LE: 2 bytes per codepoint, covers BMP including CJK ranges +// 3 — UTF-32-LE: 4 bytes per codepoint, full Unicode incl. supplementary +static PyObject *fuzz_bytes_to_str(const std::string &data, int method) { + switch (method & 3) { + case 0: + return PyUnicode_DecodeLatin1(Y(data), NULL); + case 1: + return PyUnicode_DecodeUTF8(Y(data), "replace"); + case 2: { + int order = -1; // little-endian + return PyUnicode_DecodeUTF16( + data.data(), data.size(), "replace", &order); + } + default: { + int order = -1; // little-endian + return PyUnicode_DecodeUTF32( + data.data(), data.size(), "replace", &order); + } + } +} + +// Run a Python code string and extract a named attribute from the resulting +// globals dict. Returns a new reference. Aborts on failure — called only +// during one-time init. +static PyObject *run_python_and_get(const char *code, const char *name) { + PyObject *globals = PyDict_New(); + if (!globals) { PyErr_Print(); abort(); } + PyDict_SetItemString(globals, "__builtins__", PyEval_GetBuiltins()); + PyObject *r = PyRun_String(code, Py_file_input, globals, globals); + if (!r) { PyErr_Print(); Py_DECREF(globals); abort(); } + Py_DECREF(r); + PyObject *attr = PyDict_GetItemString(globals, name); // borrowed + if (!attr) { PyErr_Print(); Py_DECREF(globals); abort(); } + Py_INCREF(attr); + Py_DECREF(globals); + return attr; +} + +// --------------------------------------------------------------------------- +// Constants +// --------------------------------------------------------------------------- + +// Maximum fuzz input size (1 MB). +static constexpr size_t kMaxInputSize = 0x100000; + +#endif // FUZZ_HELPERS_H_ diff --git a/module-fuzzers/fuzz_locale.cpp b/module-fuzzers/fuzz_locale.cpp new file mode 100644 index 0000000..62819d7 --- /dev/null +++ b/module-fuzzers/fuzz_locale.cpp @@ -0,0 +1,73 @@ +// fuzz_locale.cpp — Fuzzer for CPython's _locale C extension module. +// +// This fuzzer exercises the following CPython C extension module via +// its Python API, called through the Python C API from C++: +// +// _locale — strxfrm, strcoll +// +// All module functions are imported once during init and cached as static +// PyObject* pointers. PyRef (RAII) prevents reference leaks. +// Max input size: 64 KB. + +#include "fuzz_helpers.h" + +static PyObject *locale_strxfrm, *locale_strcoll; + +static int initialized = 0; + +static void init_locale(void) { + if (initialized) return; + + locale_strxfrm = import_attr("locale", "strxfrm"); + locale_strcoll = import_attr("locale", "strcoll"); + assert(!PyErr_Occurred()); + initialized = 1; +} + +// op_locale: fuzz data selects target — strxfrm or strcoll. +// Exercises the _locale C module. +static void op_locale(FuzzedDataProvider &fdp) { + int str_enc = fdp.ConsumeIntegralInRange(0, 3); + + enum { STRXFRM, STRCOLL, NUM_TARGETS }; + int target_fn = fdp.ConsumeIntegralInRange(0, NUM_TARGETS - 1); + + switch (target_fn) { + case STRXFRM: { + // strxfrm: transform a string for locale-aware comparison. + std::string data = fdp.ConsumeRemainingBytesAsString(); + PyRef pystr(fuzz_bytes_to_str(data, str_enc)); + CHECK(pystr); + PyRef r = PyObject_CallFunction(locale_strxfrm, "O", (PyObject *)pystr); + break; + } + case STRCOLL: { + // strcoll: compare two substrings using locale collation rules. + // Both operands are independently produced from fuzz data. + int str_enc2 = fdp.ConsumeIntegralInRange(0, 3); + size_t split = fdp.ConsumeIntegralInRange(0, fdp.remaining_bytes()); + std::string data1 = fdp.ConsumeBytesAsString(split); + std::string data2 = fdp.ConsumeRemainingBytesAsString(); + PyRef pystr1(fuzz_bytes_to_str(data1, str_enc)); + CHECK(pystr1); + PyRef pystr2(fuzz_bytes_to_str(data2, str_enc2)); + CHECK(pystr2); + PyRef r = PyObject_CallFunction(locale_strcoll, "OO", + (PyObject *)pystr1, (PyObject *)pystr2); + break; + } + } + if (PyErr_Occurred()) PyErr_Clear(); +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + assert(Py_IsInitialized()); + init_locale(); + if (size < 1 || size > 0x10000) return 0; + if (PyErr_Occurred()) PyErr_Clear(); + + FuzzedDataProvider fdp(data, size); + op_locale(fdp); + + return 0; +} diff --git a/module-fuzzers/fuzz_mmap.cpp b/module-fuzzers/fuzz_mmap.cpp new file mode 100644 index 0000000..e813170 --- /dev/null +++ b/module-fuzzers/fuzz_mmap.cpp @@ -0,0 +1,194 @@ +// fuzz_mmap.cpp — Fuzzer for CPython's mmap C extension module. +// +// This fuzzer exercises the following CPython C extension module via +// its Python API, called through the Python C API from C++: +// +// mmap — anonymous mmap: write, find, rfind, read, readline, +// seek, resize, move, getitem, setitem, flush, size, +// tell, close +// +// All module functions are imported once during init and cached as static +// PyObject* pointers. PyRef (RAII) prevents reference leaks. +// Max input size: 64 KB. + +#include "fuzz_helpers.h" + +static PyObject *mmap_mmap; + +static int initialized = 0; + +static void init_mmap(void) { + if (initialized) return; + + mmap_mmap = import_attr("mmap", "mmap"); + assert(!PyErr_Occurred()); + initialized = 1; +} + +// op_mmap: Create anonymous mmap, write data. +// The fuzzer then selects the target. +// Exercises the mmap C module's core operations. +static void op_mmap(FuzzedDataProvider &fdp) { + enum { FIND_RFIND, READ_READLINE, RESIZE_MOVE, GETITEM_SETITEM, FLUSH_SIZE_TELL, READ_ALL, NUM_TARGETS }; + int target_fn = fdp.ConsumeIntegralInRange(0, NUM_TARGETS - 1); + if (fdp.remaining_bytes() == 0) return; + size_t data_len = fdp.ConsumeIntegralInRange( + 1, std::min(fdp.remaining_bytes(), (size_t)10000)); + std::string data = fdp.ConsumeBytesAsString(data_len); + if (data.empty()) data.push_back('\0'); + + // mmap(-1, size) + Py_ssize_t map_size = data.size(); + PyRef mm = PyObject_CallFunction(mmap_mmap, "in", -1, map_size); + CHECK(mm); + + // First write data and seek to 0. + PyRef pydata = PyBytes_FromStringAndSize(Y(data)); + CHECK(pydata); + { + PyRef r = PyObject_CallMethod(mm, "write", "O", (PyObject *)pydata); + if (!r) { PyErr_Clear(); + PyRef cl = PyObject_CallMethod(mm, "close", NULL); + if (PyErr_Occurred()) PyErr_Clear(); + return; + } + } + { + PyRef r = PyObject_CallMethod(mm, "seek", "i", 0); + if (!r) { PyErr_Clear(); + PyRef cl = PyObject_CallMethod(mm, "close", NULL); + if (PyErr_Occurred()) PyErr_Clear(); + return; + } + } + + // Now call the target method. + switch (target_fn) { + case FIND_RFIND: { + // find + rfind with fuzz-driven pattern and offsets + long start = fdp.ConsumeIntegralInRange(0, map_size); + long end = fdp.ConsumeIntegralInRange(start, map_size); + std::string pat_str = fdp.ConsumeRemainingBytesAsString(); + PyRef pat = PyBytes_FromStringAndSize(pat_str.data(), pat_str.size()); + CHECK(pat); + { + PyRef r = PyObject_CallMethod(mm, "find", "Oll", + (PyObject *)pat, start, end); + if (PyErr_Occurred()) PyErr_Clear(); + } + { + PyRef r = PyObject_CallMethod(mm, "rfind", "Oll", + (PyObject *)pat, start, end); + if (PyErr_Occurred()) PyErr_Clear(); + } + break; + } + case READ_READLINE: { + // read + readline with fuzz-driven count and seek position + { + long n = fdp.ConsumeIntegralInRange(0, map_size); + PyRef r = PyObject_CallMethod(mm, "read", "l", n); + if (PyErr_Occurred()) PyErr_Clear(); + } + { + long pos = fdp.ConsumeIntegralInRange(0, map_size); + PyRef sk = PyObject_CallMethod(mm, "seek", "l", pos); + if (PyErr_Occurred()) PyErr_Clear(); + PyRef r = PyObject_CallMethod(mm, "readline", NULL); + if (PyErr_Occurred()) PyErr_Clear(); + } + break; + } + case RESIZE_MOVE: { + // resize + move with fuzz-driven sizes and offsets + long new_size = fdp.ConsumeIntegralInRange(1, map_size * 4 + 1); + { + PyRef r = PyObject_CallMethod(mm, "resize", "l", new_size); + if (PyErr_Occurred()) PyErr_Clear(); + } + { + // After resize, effective size is new_size (if resize succeeded) + // or map_size (if it failed). Use new_size as upper bound; + // mmap.move() will raise on out-of-bounds anyway. + long dest = fdp.ConsumeIntegralInRange(0, new_size - 1); + long src = fdp.ConsumeIntegralInRange(0, new_size - 1); + long max_count = new_size - std::max(dest, src); + long count = max_count > 0 + ? fdp.ConsumeIntegralInRange(0, max_count) + : 0; + PyRef r = PyObject_CallMethod(mm, "move", "lll", + dest, src, count); + if (PyErr_Occurred()) PyErr_Clear(); + } + break; + } + case GETITEM_SETITEM: { + // getitem + setitem + { + PyRef idx = PyLong_FromLong(0); + CHECK(idx); + PyRef r = PyObject_GetItem(mm, idx); + if (PyErr_Occurred()) PyErr_Clear(); + } + { + Py_ssize_t n = map_size < 4 ? map_size : 4; + PyRef start = PyLong_FromLong(0); + PyRef stop = PyLong_FromLong(n); + PyRef sl = PySlice_New(start, stop, NULL); + CHECK(sl); + PyRef r = PyObject_GetItem(mm, sl); + if (PyErr_Occurred()) PyErr_Clear(); + } + if (data.size() > 0) { + PyRef idx = PyLong_FromLong(0); + CHECK(idx); + PyRef val = PyLong_FromLong((unsigned char)data[0]); + CHECK(val); + PyObject_SetItem(mm, idx, val); + if (PyErr_Occurred()) PyErr_Clear(); + } + break; + } + case FLUSH_SIZE_TELL: { + // flush + size + tell + { + PyRef r = PyObject_CallMethod(mm, "flush", NULL); + if (PyErr_Occurred()) PyErr_Clear(); + } + { + PyRef r = PyObject_CallMethod(mm, "size", NULL); + if (PyErr_Occurred()) PyErr_Clear(); + } + { + PyRef r = PyObject_CallMethod(mm, "tell", NULL); + if (PyErr_Occurred()) PyErr_Clear(); + } + break; + } + case READ_ALL: { + // read all + { + PyRef r = PyObject_CallMethod(mm, "read", NULL); + if (PyErr_Occurred()) PyErr_Clear(); + } + break; + } + } + + { + PyRef r = PyObject_CallMethod(mm, "close", NULL); + if (PyErr_Occurred()) PyErr_Clear(); + } +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + assert(Py_IsInitialized()); + init_mmap(); + if (size < 1 || size > 0x10000) return 0; + if (PyErr_Occurred()) PyErr_Clear(); + + FuzzedDataProvider fdp(data, size); + op_mmap(fdp); + + return 0; +} diff --git a/module-fuzzers/fuzz_operator.cpp b/module-fuzzers/fuzz_operator.cpp new file mode 100644 index 0000000..e3a003a --- /dev/null +++ b/module-fuzzers/fuzz_operator.cpp @@ -0,0 +1,190 @@ +// fuzz_operator.cpp — Fuzzer for CPython's _operator C extension module. +// +// This fuzzer exercises the following CPython C extension module via +// its Python API, called through the Python C API from C++: +// +// _operator — lt, gt, eq, ne, contains, countOf, indexOf, +// length_hint, concat, getitem, methodcaller +// +// All module functions are imported once during init and cached as static +// PyObject* pointers. PyRef (RAII) prevents reference leaks. +// Max input size: 64 KB. + +#include "fuzz_helpers.h" + +static PyObject *op_lt, *op_gt, *op_eq, *op_ne; +static PyObject *op_contains, *op_countOf, *op_indexOf, *op_length_hint; +static PyObject *op_concat, *op_getitem, *op_methodcaller; + +static int initialized = 0; + +static void init_operator(void) { + if (initialized) return; + + op_lt = import_attr("operator", "lt"); + op_gt = import_attr("operator", "gt"); + op_eq = import_attr("operator", "eq"); + op_ne = import_attr("operator", "ne"); + op_contains = import_attr("operator", "contains"); + op_countOf = import_attr("operator", "countOf"); + op_indexOf = import_attr("operator", "indexOf"); + op_length_hint = import_attr("operator", "length_hint"); + op_concat = import_attr("operator", "concat"); + op_getitem = import_attr("operator", "getitem"); + op_methodcaller = import_attr("operator", "methodcaller"); + assert(!PyErr_Occurred()); + initialized = 1; +} + +// op_operator: fuzzer selects operator variant — comparisons, sequence ops, +// concat, getitem, methodcaller. Exercises the _operator C module. +static void op_operator(FuzzedDataProvider &fdp) { + enum { COMPARISONS, SEQUENCE_OPS, CONCAT, GETITEM, METHODCALLER, CONTAINS, NUM_TARGETS }; + int target_fn = fdp.ConsumeIntegralInRange(0, NUM_TARGETS - 1); + if (fdp.remaining_bytes() == 0) return; + size_t data_len = fdp.ConsumeIntegralInRange( + 1, std::min(fdp.remaining_bytes(), (size_t)10000)); + std::string data = fdp.ConsumeBytesAsString(data_len); + + PyRef pydata = PyBytes_FromStringAndSize(Y(data)); + CHECK(pydata); + + switch (target_fn) { + case COMPARISONS: { + // Comparisons: lt/gt/eq/ne(data, other) + std::string other = fdp.ConsumeRemainingBytesAsString(); + PyRef pyother = PyBytes_FromStringAndSize(Y(other)); + CHECK(pyother); + { + PyRef r = PyObject_CallFunction(op_lt, "OO", + (PyObject *)pydata, (PyObject *)pyother); + if (PyErr_Occurred()) PyErr_Clear(); + } + { + PyRef r = PyObject_CallFunction(op_gt, "OO", + (PyObject *)pydata, (PyObject *)pyother); + if (PyErr_Occurred()) PyErr_Clear(); + } + { + PyRef r = PyObject_CallFunction(op_eq, "OO", + (PyObject *)pydata, (PyObject *)pyother); + if (PyErr_Occurred()) PyErr_Clear(); + } + { + PyRef r = PyObject_CallFunction(op_ne, "OO", + (PyObject *)pydata, (PyObject *)pyother); + if (PyErr_Occurred()) PyErr_Clear(); + } + break; + } + case SEQUENCE_OPS: { + // Sequence ops: contains, countOf, indexOf, length_hint + if (data.empty()) break; + int byte = fdp.ConsumeIntegralInRange(0, 255); + PyRef byte_val = PyLong_FromLong(byte); + CHECK(byte_val); + { + PyRef r = PyObject_CallFunction(op_contains, "OO", + (PyObject *)pydata, + (PyObject *)byte_val); + if (PyErr_Occurred()) PyErr_Clear(); + } + { + PyRef r = PyObject_CallFunction(op_countOf, "OO", + (PyObject *)pydata, + (PyObject *)byte_val); + if (PyErr_Occurred()) PyErr_Clear(); + } + { + PyRef r = PyObject_CallFunction(op_indexOf, "OO", + (PyObject *)pydata, + (PyObject *)byte_val); + if (PyErr_Occurred()) PyErr_Clear(); + } + { + PyRef r = PyObject_CallFunction(op_length_hint, "O", + (PyObject *)pydata); + if (PyErr_Occurred()) PyErr_Clear(); + } + break; + } + case CONCAT: { + // concat(data, other) + std::string other = fdp.ConsumeRemainingBytesAsString(); + PyRef pyother = PyBytes_FromStringAndSize(Y(other)); + CHECK(pyother); + PyRef r = PyObject_CallFunction(op_concat, "OO", + (PyObject *)pydata, (PyObject *)pyother); + if (PyErr_Occurred()) PyErr_Clear(); + break; + } + case GETITEM: { + // getitem(data, idx) + getitem(data, slice) + if (data.empty()) break; + long idx = fdp.ConsumeIntegralInRange(0, data.size() - 1); + PyRef pyidx = PyLong_FromLong(idx); + CHECK(pyidx); + { + PyRef r = PyObject_CallFunction(op_getitem, "OO", + (PyObject *)pydata, (PyObject *)pyidx); + if (PyErr_Occurred()) PyErr_Clear(); + } + { + long stop = fdp.ConsumeIntegralInRange(0, data.size()); + PyRef pystop = PyLong_FromLong(stop); + CHECK(pystop); + PyRef zero = PyLong_FromLong(0); + CHECK(zero); + PyRef sl = PySlice_New(zero, pystop, NULL); + CHECK(sl); + PyRef r = PyObject_CallFunction(op_getitem, "OO", + (PyObject *)pydata, (PyObject *)sl); + if (PyErr_Occurred()) PyErr_Clear(); + } + break; + } + case METHODCALLER: { + // methodcaller('upper')(str) + methodcaller('encode', 'utf-8')(str) + int str_enc = fdp.ConsumeIntegralInRange(0, 3); + std::string str_data = fdp.ConsumeRemainingBytesAsString(); + PyRef pystr(fuzz_bytes_to_str(str_data, str_enc)); + CHECK(pystr); + { + PyRef mc = PyObject_CallFunction(op_methodcaller, "s", "upper"); + CHECK(mc); + PyRef r = PyObject_CallFunction(mc, "O", (PyObject *)pystr); + if (PyErr_Occurred()) PyErr_Clear(); + } + { + PyRef mc = PyObject_CallFunction(op_methodcaller, "ss", + "encode", "utf-8"); + CHECK(mc); + PyRef r = PyObject_CallFunction(mc, "O", (PyObject *)pystr); + if (PyErr_Occurred()) PyErr_Clear(); + } + break; + } + case CONTAINS: { + // contains on bytes with fuzz-driven needle + std::string needle = fdp.ConsumeRemainingBytesAsString(); + PyRef pyneedle = PyBytes_FromStringAndSize(Y(needle)); + CHECK(pyneedle); + PyRef r = PyObject_CallFunction(op_contains, "OO", + (PyObject *)pydata, (PyObject *)pyneedle); + if (PyErr_Occurred()) PyErr_Clear(); + break; + } + } +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + assert(Py_IsInitialized()); + init_operator(); + if (size < 1 || size > 0x10000) return 0; + if (PyErr_Occurred()) PyErr_Clear(); + + FuzzedDataProvider fdp(data, size); + op_operator(fdp); + + return 0; +} diff --git a/module-fuzzers/fuzz_pickle.cpp b/module-fuzzers/fuzz_pickle.cpp new file mode 100644 index 0000000..ebd218b --- /dev/null +++ b/module-fuzzers/fuzz_pickle.cpp @@ -0,0 +1,491 @@ +// fuzz_pickle.cpp — Fuzzer for CPython's pickle C extension module. +// +// This fuzzer exercises the following CPython C extension module via +// its Python API, called through the Python C API from C++: +// +// _pickle — pickle.dumps() with 8 container types (bytes, str, +// list, tuple, set, frozenset, bytearray, dict) across +// protocols 0-5 and fix_imports flag. +// pickle.loads() via RestrictedUnpickler (blocks +// find_class), PersistentUnpickler (handles PERSID/ +// BINPERSID), and RestrictedUnpickler with +// encoding='bytes'. +// Pickler chain: dump, clear_memo, dump, getvalue. +// Round-trip: dumps then loads. +// +// FDP selects one of 4 operation types. Each operation consumes further +// bytes via FuzzedDataProvider to parameterize the call (protocol number, +// container type, boolean flags). +// +// All module functions are imported once during init and cached as static +// PyObject* pointers. Two pickle Unpickler subclasses (RestrictedUnpickler, +// PersistentUnpickler) are defined via PyRun_String at init time. +// PyRef (RAII) prevents reference leaks. Max input size: 1 MB. + +#include "fuzz_helpers.h" + +static PyObject *pickle_dumps, *pickle_loads; +static PyObject *pickle_Pickler; +static PyObject *bytesio_ctor; +static PyObject *RestrictedUnpickler_cls, *PersistentUnpickler_cls; + +static int initialized = 0; + +static void init_pickle(void) { + if (initialized) return; + + pickle_dumps = import_attr("pickle", "dumps"); + pickle_loads = import_attr("pickle", "loads"); + pickle_Pickler = import_attr("pickle", "Pickler"); + bytesio_ctor = import_attr("io", "BytesIO"); + static const char *kPickleHelpers = + "import pickle, io\n" + "class RestrictedUnpickler(pickle.Unpickler):\n" + " def find_class(self, module, name):\n" + " raise pickle.UnpicklingError('restricted')\n" + "class PersistentUnpickler(pickle.Unpickler):\n" + " def persistent_load(self, pid): return pid\n" + " def find_class(self, module, name):\n" + " raise pickle.UnpicklingError('restricted')\n"; + RestrictedUnpickler_cls = run_python_and_get(kPickleHelpers, + "RestrictedUnpickler"); + PersistentUnpickler_cls = run_python_and_get(kPickleHelpers, + "PersistentUnpickler"); + + assert(!PyErr_Occurred()); + initialized = 1; +} + +// Container types for build_pickle_container. +enum ContainerType { + CT_RAW_BYTES, CT_STR, CT_LIST, CT_TUPLE, + CT_SET, CT_FROZENSET, CT_BYTEARRAY, CT_DICT, + NUM_CONTAINER_TYPES +}; + +// Hashable types that can be used as dict keys. +// list, set, bytearray, and dict are unhashable. +enum HashableType { + HT_RAW_BYTES, HT_STR, HT_INT, HT_FLOAT, HT_TUPLE, HT_FROZENSET, + NUM_HASHABLE_TYPES +}; + +// Build a single hashable Python value from fdp, suitable for dict keys. +static PyObject *build_hashable_value(FuzzedDataProvider &fdp) { + int t = fdp.ConsumeIntegralInRange(0, NUM_HASHABLE_TYPES - 1); + switch (t) { + case HT_RAW_BYTES: { + size_t len = fdp.ConsumeIntegralInRange( + 0, std::min(fdp.remaining_bytes(), (size_t)10000)); + std::string s = fdp.ConsumeBytesAsString(len); + return PyBytes_FromStringAndSize(Y(s)); + } + case HT_STR: { + int str_enc = fdp.ConsumeIntegralInRange(0, 3); + size_t len = fdp.ConsumeIntegralInRange( + 0, std::min(fdp.remaining_bytes(), (size_t)10000)); + std::string s = fdp.ConsumeBytesAsString(len); + return fuzz_bytes_to_str(s, str_enc); + } + case HT_INT: { + long v = fdp.ConsumeIntegral(); + return PyLong_FromLong(v); + } + case HT_FLOAT: { + double v = fdp.ConsumeFloatingPoint(); + return PyFloat_FromDouble(v); + } + case HT_TUPLE: { + size_t n = fdp.ConsumeIntegralInRange(0, 200); + PyObject *tup = PyTuple_New((Py_ssize_t)n); + if (!tup) return NULL; + for (size_t i = 0; i < n; i++) { + long v = fdp.ConsumeIntegral(); + PyObject *item = PyLong_FromLong(v); + if (!item) { Py_DECREF(tup); return NULL; } + PyTuple_SET_ITEM(tup, i, item); + } + return tup; + } + case HT_FROZENSET: { + size_t n = fdp.ConsumeIntegralInRange(0, 200); + PyObject *lst = PyList_New((Py_ssize_t)n); + if (!lst) return NULL; + for (size_t i = 0; i < n; i++) { + long v = fdp.ConsumeIntegral(); + PyObject *item = PyLong_FromLong(v); + if (!item) { Py_DECREF(lst); return NULL; } + PyList_SET_ITEM(lst, i, item); + } + PyObject *fs = PyFrozenSet_New(lst); + Py_DECREF(lst); + return fs; + } + default: + return PyLong_FromLong(0); + } +} + +// Value types for dict values (any picklable type, no recursion into dict). +enum ValueType { + VT_RAW_BYTES, VT_STR, VT_INT, VT_FLOAT, VT_LIST, VT_TUPLE, + VT_SET, VT_FROZENSET, VT_BYTEARRAY, VT_NONE, + NUM_VALUE_TYPES +}; + +// Build a single Python value from fdp, suitable for dict values. +static PyObject *build_any_value(FuzzedDataProvider &fdp) { + int t = fdp.ConsumeIntegralInRange(0, NUM_VALUE_TYPES - 1); + switch (t) { + case VT_RAW_BYTES: { + size_t len = fdp.ConsumeIntegralInRange( + 0, std::min(fdp.remaining_bytes(), (size_t)10000)); + std::string s = fdp.ConsumeBytesAsString(len); + return PyBytes_FromStringAndSize(Y(s)); + } + case VT_STR: { + int str_enc = fdp.ConsumeIntegralInRange(0, 3); + size_t len = fdp.ConsumeIntegralInRange( + 0, std::min(fdp.remaining_bytes(), (size_t)10000)); + std::string s = fdp.ConsumeBytesAsString(len); + return fuzz_bytes_to_str(s, str_enc); + } + case VT_INT: { + long v = fdp.ConsumeIntegral(); + return PyLong_FromLong(v); + } + case VT_FLOAT: { + double v = fdp.ConsumeFloatingPoint(); + return PyFloat_FromDouble(v); + } + case VT_LIST: { + size_t n = fdp.ConsumeIntegralInRange(0, 200); + PyObject *lst = PyList_New((Py_ssize_t)n); + if (!lst) return NULL; + for (size_t i = 0; i < n; i++) { + long v = fdp.ConsumeIntegral(); + PyObject *item = PyLong_FromLong(v); + if (!item) { Py_DECREF(lst); return NULL; } + PyList_SET_ITEM(lst, i, item); + } + return lst; + } + case VT_TUPLE: { + size_t n = fdp.ConsumeIntegralInRange(0, 200); + PyObject *tup = PyTuple_New((Py_ssize_t)n); + if (!tup) return NULL; + for (size_t i = 0; i < n; i++) { + long v = fdp.ConsumeIntegral(); + PyObject *item = PyLong_FromLong(v); + if (!item) { Py_DECREF(tup); return NULL; } + PyTuple_SET_ITEM(tup, i, item); + } + return tup; + } + case VT_SET: { + size_t n = fdp.ConsumeIntegralInRange(0, 200); + PyObject *lst = PyList_New((Py_ssize_t)n); + if (!lst) return NULL; + for (size_t i = 0; i < n; i++) { + long v = fdp.ConsumeIntegral(); + PyObject *item = PyLong_FromLong(v); + if (!item) { Py_DECREF(lst); return NULL; } + PyList_SET_ITEM(lst, i, item); + } + PyObject *s = PySet_New(lst); + Py_DECREF(lst); + return s; + } + case VT_FROZENSET: { + size_t n = fdp.ConsumeIntegralInRange(0, 200); + PyObject *lst = PyList_New((Py_ssize_t)n); + if (!lst) return NULL; + for (size_t i = 0; i < n; i++) { + long v = fdp.ConsumeIntegral(); + PyObject *item = PyLong_FromLong(v); + if (!item) { Py_DECREF(lst); return NULL; } + PyList_SET_ITEM(lst, i, item); + } + PyObject *fs = PyFrozenSet_New(lst); + Py_DECREF(lst); + return fs; + } + case VT_BYTEARRAY: { + size_t len = fdp.ConsumeIntegralInRange( + 0, std::min(fdp.remaining_bytes(), (size_t)10000)); + std::string s = fdp.ConsumeBytesAsString(len); + return PyByteArray_FromStringAndSize(Y(s)); + } + case VT_NONE: + Py_INCREF(Py_None); + return Py_None; + default: + Py_INCREF(Py_None); + return Py_None; + } +} + +// Build a Python container from fuzz bytes for pickle.dumps operations. +// Capped at 10000 elements to keep serialization fast. +// str_enc selects the byte-to-str decoding (see fuzz_bytes_to_str). +// For CT_DICT, keys and values are consumed directly from fdp. +static PyObject *build_pickle_container(FuzzedDataProvider &fdp, + int type, const uint8_t *buf, + size_t len, int str_enc) { + if (len > 10000) len = 10000; + switch (type) { + case CT_RAW_BYTES: + return PyBytes_FromStringAndSize((const char *)buf, len); + case CT_STR: { + std::string s((const char *)buf, len); + return fuzz_bytes_to_str(s, str_enc); + } + case CT_LIST: { + PyObject *lst = PyList_New((Py_ssize_t)len); + if (!lst) return NULL; + for (size_t i = 0; i < len; i++) { + PyObject *v = PyLong_FromLong(buf[i]); + if (!v) { Py_DECREF(lst); return NULL; } + PyList_SET_ITEM(lst, i, v); + } + return lst; + } + case CT_TUPLE: { + PyObject *tup = PyTuple_New((Py_ssize_t)len); + if (!tup) return NULL; + for (size_t i = 0; i < len; i++) { + PyObject *v = PyLong_FromLong(buf[i]); + if (!v) { Py_DECREF(tup); return NULL; } + PyTuple_SET_ITEM(tup, i, v); + } + return tup; + } + case CT_SET: { + PyObject *lst = PyList_New((Py_ssize_t)len); + if (!lst) return NULL; + for (size_t i = 0; i < len; i++) { + PyObject *v = PyLong_FromLong(buf[i]); + if (!v) { Py_DECREF(lst); return NULL; } + PyList_SET_ITEM(lst, i, v); + } + PyObject *s = PySet_New(lst); + Py_DECREF(lst); + return s; + } + case CT_FROZENSET: { + PyObject *lst = PyList_New((Py_ssize_t)len); + if (!lst) return NULL; + for (size_t i = 0; i < len; i++) { + PyObject *v = PyLong_FromLong(buf[i]); + if (!v) { Py_DECREF(lst); return NULL; } + PyList_SET_ITEM(lst, i, v); + } + PyObject *s = PyFrozenSet_New(lst); + Py_DECREF(lst); + return s; + } + case CT_BYTEARRAY: + return PyByteArray_FromStringAndSize((const char *)buf, len); + case CT_DICT: { + // Build a dict with fuzzer-chosen types for each key and value. + // Keys use hashable types only; values can be any picklable type. + size_t n_entries = fdp.ConsumeIntegralInRange(0, 64); + PyObject *d = PyDict_New(); + if (!d) return NULL; + for (size_t i = 0; i < n_entries && fdp.remaining_bytes() > 0; i++) { + PyRef key(build_hashable_value(fdp)); + if (!key) { Py_DECREF(d); return NULL; } + PyRef val(build_any_value(fdp)); + if (!val) { Py_DECREF(d); return NULL; } + PyDict_SetItem(d, key, val); + } + return d; + } + default: + return PyBytes_FromStringAndSize((const char *)buf, len); + } +} + +// --------------------------------------------------------------------------- +// Operations (4 ops) +// --------------------------------------------------------------------------- + +// OP_PICKLE_DUMPS: Build a fuzz-chosen container type, then call +// pickle.dumps(obj, protocol=N, fix_imports=bool). Protocol is fuzz-chosen +// 0-5, exercising all pickle opcodes. +static void op_pickle_dumps(FuzzedDataProvider &fdp) { + int container_type = fdp.ConsumeIntegralInRange(0, NUM_CONTAINER_TYPES - 1); + int protocol = fdp.ConsumeIntegralInRange(0, 5); + bool fix_imports = fdp.ConsumeBool(); + int str_enc = fdp.ConsumeIntegralInRange(0, 3); + if (fdp.remaining_bytes() == 0) return; + size_t data_len = fdp.ConsumeIntegralInRange( + 1, std::min(fdp.remaining_bytes(), (size_t)10000)); + std::string data = fdp.ConsumeBytesAsString(data_len); + + PyRef obj(build_pickle_container( + fdp, container_type, (const uint8_t *)data.data(), data.size(), str_enc)); + CHECK(obj); + + PyRef kwargs = PyDict_New(); + CHECK(kwargs); + PyRef proto = PyLong_FromLong(protocol); + CHECK(proto); + PyDict_SetItemString(kwargs, "protocol", proto); + PyDict_SetItemString(kwargs, "fix_imports", + fix_imports ? Py_True : Py_False); + PyRef args = PyTuple_Pack(1, (PyObject *)obj); + CHECK(args); + PyRef r = PyObject_Call(pickle_dumps, args, kwargs); + if (PyErr_Occurred()) PyErr_Clear(); +} + +// OP_PICKLE_LOADS: Wrap fuzz data in BytesIO, then unpickle via one of 3 +// Unpickler subclass variants (fuzz-chosen). +static void op_pickle_loads(FuzzedDataProvider &fdp) { + int variant = fdp.ConsumeIntegralInRange(0, 2); + std::string data = fdp.ConsumeRemainingBytesAsString(); + PyRef pydata = PyBytes_FromStringAndSize(Y(data)); + CHECK(pydata); + PyRef bio = PyObject_CallFunction(bytesio_ctor, "O", + (PyObject *)pydata); + CHECK(bio); + + PyObject *cls = nullptr; + PyRef kwargs_ref; + switch (variant) { + case 0: + cls = RestrictedUnpickler_cls; + break; + case 1: + cls = PersistentUnpickler_cls; + break; + case 2: { + cls = RestrictedUnpickler_cls; + kwargs_ref = PyRef(PyDict_New()); + CHECK(kwargs_ref); + PyDict_SetItemString(kwargs_ref, "fix_imports", Py_True); + PyRef enc = PyUnicode_FromString("bytes"); + CHECK(enc); + PyDict_SetItemString(kwargs_ref, "encoding", enc); + break; + } + } + + PyRef args = PyTuple_Pack(1, (PyObject *)bio); + CHECK(args); + PyRef unpickler = PyObject_Call( + cls, args, kwargs_ref.p ? (PyObject *)kwargs_ref : NULL); + CHECK(unpickler); + PyRef r = PyObject_CallMethod(unpickler, "load", NULL); + if (PyErr_Occurred()) PyErr_Clear(); +} + +// OP_PICKLE_PICKLER: Create pickle.Pickler(BytesIO, protocol=N), then chain: +// .dump(list_of_ints), .clear_memo(), .dump(str), .getvalue(). +static void op_pickle_pickler(FuzzedDataProvider &fdp) { + int protocol = fdp.ConsumeIntegralInRange(0, 5); + int str_enc = fdp.ConsumeIntegralInRange(0, 3); + if (fdp.remaining_bytes() == 0) return; + size_t data_len = fdp.ConsumeIntegralInRange( + 1, std::min(fdp.remaining_bytes(), (size_t)10000)); + std::string data = fdp.ConsumeBytesAsString(data_len); + std::string data2 = fdp.ConsumeRemainingBytesAsString(); + + // Create an in-memory BytesIO buffer for the Pickler to write into. + PyRef bio = PyObject_CallFunction(bytesio_ctor, NULL); + CHECK(bio); + + // Construct a Pickler targeting the buffer with a fuzz-chosen protocol + // (0-5). Different protocols use different opcodes internally. + PyRef pickler = PyObject_CallFunction(pickle_Pickler, "Oi", + (PyObject *)bio, protocol); + CHECK(pickler); + + // Build a list-of-ints container from the first fuzz string and dump it. + // Exercises the Pickler's serialization of sequences. + PyRef obj1(build_pickle_container( + fdp, CT_LIST, (const uint8_t *)data.data(), data.size(), str_enc)); + CHECK(obj1); + + PyRef r1 = PyObject_CallMethod(pickler, "dump", "O", (PyObject *)obj1); + if (!r1) { + PyErr_Clear(); + return; + } + + // Clear the memo table between dumps. Exercises the memo-reset path + // so the second dump re-encodes objects from scratch. + PyRef cm = PyObject_CallMethod(pickler, "clear_memo", NULL); + if (PyErr_Occurred()) PyErr_Clear(); + + // Build a str from a second independent fuzz string and dump it. + // Exercises string serialization after a memo clear, using a different + // object type than the first dump. + PyRef obj2(fuzz_bytes_to_str(data2, str_enc)); + CHECK(obj2); + PyRef r2 = PyObject_CallMethod(pickler, "dump", "O", (PyObject *)obj2); + if (PyErr_Occurred()) PyErr_Clear(); + + // Retrieve the full serialized output from the BytesIO buffer. + // Exercises the buffer-readback path after multiple dumps. + PyRef val = PyObject_CallMethod(bio, "getvalue", NULL); + if (PyErr_Occurred()) PyErr_Clear(); +} + +// OP_PICKLE_ROUNDTRIP: Build a fuzz-chosen container, pickle.dumps() it, +// then pickle.loads() the result. +static void op_pickle_roundtrip(FuzzedDataProvider &fdp) { + int container_type = fdp.ConsumeIntegralInRange(0, NUM_CONTAINER_TYPES - 1); + int str_enc = fdp.ConsumeIntegralInRange(0, 3); + if (fdp.remaining_bytes() == 0) return; + size_t data_len = fdp.ConsumeIntegralInRange( + 1, std::min(fdp.remaining_bytes(), (size_t)10000)); + std::string data = fdp.ConsumeBytesAsString(data_len); + + PyRef obj(build_pickle_container( + fdp, container_type, (const uint8_t *)data.data(), data.size(), str_enc)); + CHECK(obj); + + PyRef dumped = PyObject_CallFunction(pickle_dumps, "O", (PyObject *)obj); + if (!dumped) { + PyErr_Clear(); + return; + } + PyRef loaded = PyObject_CallFunction(pickle_loads, "O", + (PyObject *)dumped); + if (PyErr_Occurred()) PyErr_Clear(); +} + +enum Op { + OP_PICKLE_DUMPS, + OP_PICKLE_LOADS, + OP_PICKLE_PICKLER, + OP_PICKLE_ROUNDTRIP, + NUM_OPS +}; + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + assert(Py_IsInitialized()); + init_pickle(); + if (size < 1 || size > kMaxInputSize) return 0; + if (PyErr_Occurred()) PyErr_Clear(); + + FuzzedDataProvider fdp(data, size); + switch (fdp.ConsumeIntegralInRange(0, NUM_OPS - 1)) { + case OP_PICKLE_DUMPS: + op_pickle_dumps(fdp); + break; + case OP_PICKLE_LOADS: + op_pickle_loads(fdp); + break; + case OP_PICKLE_PICKLER: + op_pickle_pickler(fdp); + break; + case OP_PICKLE_ROUNDTRIP: + op_pickle_roundtrip(fdp); + break; + } + + return 0; +} diff --git a/module-fuzzers/fuzz_sqlite3.cpp b/module-fuzzers/fuzz_sqlite3.cpp new file mode 100644 index 0000000..aef74da --- /dev/null +++ b/module-fuzzers/fuzz_sqlite3.cpp @@ -0,0 +1,530 @@ +// fuzz_sqlite3.cpp — Fuzzer for CPython's _sqlite3 C extension module. +// +// This fuzzer exercises the following CPython C extension module via +// its Python API, called through the Python C API from C++: +// +// _sqlite3 — connect(':memory:'), execute, executemany, +// executescript, complete_statement, create_function, +// create_aggregate, set_authorizer, create_collation, +// Row factory, blobopen, register_adapter +// +// Exercises the _sqlite3 C extension module wrapping the third-party +// SQLite library. +// +// The fuzzer loops 1-6 times (fdp-driven) on a single ':memory:' +// connection. Each iteration picks one of 25 single-call targets +// covering table creation, insert, select, executemany, executescript, +// complete_statement, create_function, create_aggregate, set_authorizer, +// create_collation, Row factory, blobopen, and register_adapter. +// Because iterations share one connection, the fuzzer naturally chains +// operations (e.g. CREATE_TABLE → INSERT → SET_AUTHORIZER → SELECT). +// +// All module functions and class constructors are imported once during init +// and cached as static PyObject* pointers. An Aggregate helper class is +// defined via PyRun_String at init time. +// PyRef (RAII) prevents reference leaks. Max input size: 64 KB. + +#include "fuzz_helpers.h" + +static PyObject *sqlite3_connect, *sqlite3_complete_statement; +static PyObject *sqlite3_register_adapter, *sqlite3_Row; +static long sqlite3_SQLITE_OK_val; +static PyObject *sqlite3_Aggregate_cls; + +// Cached Python lambdas and helper classes used by op_sqlite3. +// These are created once in init_sqlite3() via run_python_and_get() and +// reused across every fuzzer iteration. Caching avoids the overhead of +// calling PyRun_String to rebuild them on each invocation, and ensures +// the fuzzer spends its time in _sqlite3 code rather than the compiler. +static PyObject *sqlite3_identity_fn; // lambda x: x +static PyObject *sqlite3_auth_fn; // lambda *a: SQLITE_OK +static PyObject *sqlite3_collation_fn; // lambda a, b: (a > b) - (a < b) +static PyObject *sqlite3_adapt_cls; // class _AdaptMe +static PyObject *sqlite3_adapter_fn; // lambda a: str(a.v) + +static int initialized = 0; + +// init_sqlite3 is a one-time initialization for the sqlite3 fuzzer. +// +// Imports the Python-level sqlite3 module attributes (connect, +// complete_statement, register_adapter, Row, SQLITE_OK) and caches +// them as static PyObject* pointers so they don't need to be looked +// up on every fuzzer iteration. +// +// Also creates the helper Python objects used by op_sqlite3: +// - _Agg class (aggregate step/finalize) +// - identity lambda (scalar function callback) +// - auth lambda (authorizer callback, returns SQLITE_OK) +// - collation lambda (three-way string comparison) +// - _AdaptMe class (type adaptation target) +// - adapter lambda (converts _AdaptMe -> str) +// +// These are built once via run_python_and_get() and held for the +// lifetime of the process. Called from LLVMFuzzerTestOneInput on +// the first invocation; the `initialized` guard makes it a no-op +// on subsequent calls. +static void init_sqlite3(void) { + if (initialized) return; + + sqlite3_connect = import_attr("sqlite3", "connect"); + sqlite3_complete_statement = import_attr("sqlite3", "complete_statement"); + sqlite3_register_adapter = import_attr("sqlite3", "register_adapter"); + sqlite3_Row = import_attr("sqlite3", "Row"); + // Fetch the integer value of sqlite3.SQLITE_OK (typically 0). + // Used to build the authorizer callback that always permits operations. + { + PyObject *v = import_attr("sqlite3", "SQLITE_OK"); + sqlite3_SQLITE_OK_val = PyLong_AsLong(v); + Py_DECREF(v); + } + + // Aggregate class for conn.create_aggregate("fuzzagg", 1, _Agg). + // step() collects values; finalize() returns the count. + // Exercises the _sqlite3 aggregate callback dispatch path. + sqlite3_Aggregate_cls = run_python_and_get( + "class _Agg:\n" + " def __init__(self): self.vals = []\n" + " def step(self, v): self.vals.append(v)\n" + " def finalize(self): return len(self.vals)\n", + "_Agg"); + + // Identity function for conn.create_function("fuzzfn", 1, f). + // Simply returns its argument unchanged. + // Exercises the _sqlite3 scalar function callback dispatch path. + sqlite3_identity_fn = run_python_and_get("_f = lambda x: x\n", "_f"); + + // Authorizer callback for conn.set_authorizer(f). + // Accepts any number of args and always returns SQLITE_OK (permit). + // Exercises the _sqlite3 authorizer callback dispatch path without + // blocking any SQL operations. + { + char buf[80]; + snprintf(buf, sizeof(buf), "_f = lambda *a: %ld\n", sqlite3_SQLITE_OK_val); + sqlite3_auth_fn = run_python_and_get(buf, "_f"); + } + + // Collation function for conn.create_collation("fuzz", f). + // Standard three-way comparison: returns -1, 0, or 1. + // Exercises the _sqlite3 collation callback dispatch path used by + // ORDER BY ... COLLATE fuzz. + sqlite3_collation_fn = run_python_and_get( + "_f = lambda a, b: (a > b) - (a < b)\n", "_f"); + + // Adapter class for sqlite3.register_adapter(_AdaptMe, f). + // A simple wrapper holding a single value .v. + // Exercises the _sqlite3 type adaptation path: when an _AdaptMe instance + // is passed as a query parameter, SQLite calls the registered adapter. + sqlite3_adapt_cls = run_python_and_get( + "class _AdaptMe:\n" + " def __init__(self, v): self.v = v\n", + "_AdaptMe"); + + // Adapter function for sqlite3.register_adapter(_AdaptMe, f). + // Converts an _AdaptMe instance to a string via str(a.v). + // Exercises the _sqlite3 adapter lookup and conversion path. + sqlite3_adapter_fn = run_python_and_get("_f = lambda a: str(a.v)\n", "_f"); + + assert(!PyErr_Occurred()); + initialized = 1; +} + +// Helper: Create a memory connection with PRAGMA max_page_count=100. +static PyObject *make_sqlite_conn() { + PyObject *conn = PyObject_CallFunction(sqlite3_connect, "s", ":memory:"); + if (!conn) return NULL; + PyRef r = PyObject_CallMethod(conn, "execute", "s", + "PRAGMA max_page_count=100"); + if (!r) { + PyErr_Clear(); + Py_DECREF(conn); + return NULL; + } + return conn; +} + +// op_sqlite3: connect(':memory:'), then loop 1-6 times (fdp-driven). +// Each iteration FDP selects one of many single-call targets. Since +// iterations share the same connection, the fuzzer naturally chains +// operations (e.g. CREATE_TABLE → INSERT_PARAM → SELECT_LIKE across +// three iterations). Each case makes exactly one SQL/API call. +static void op_sqlite3(FuzzedDataProvider &fdp) { + enum { + // Basic SQL execution + EXECUTE, // conn.execute(fuzz_sql) + EXECUTESCRIPT, // conn.executescript(fuzz_sql) + COMPLETE_STMT, // sqlite3.complete_statement(fuzz_sql) + // Table setup + CREATE_TABLE_TEXT, // CREATE TABLE t(a TEXT) + CREATE_TABLE_BLOB, // CREATE TABLE t(a BLOB) + CREATE_TABLE_INT, // CREATE TABLE t(v INTEGER) + CREATE_TABLE_MULTI, // CREATE TABLE t(a TEXT, b BLOB) + CREATE_TABLE_TEXT_INT, // CREATE TABLE t(a TEXT, b INTEGER) + // Insert operations + INSERT_TEXT, // INSERT INTO t VALUES(?) with fuzz text + INSERT_PARAM, // INSERT INTO t VALUES(?,?) with text + blob + INSERT_ADAPTED, // INSERT INTO t VALUES(?) with _AdaptMe obj + // Bulk insert + EXECUTEMANY_INT, // conn.executemany("INSERT INTO t VALUES(?)", rows) + // Select operations + SELECT_ALL, // SELECT * FROM t + SELECT_LIKE, // SELECT * FROM t WHERE a LIKE ? + SELECT_AGGREGATES, // SELECT count(*), sum(v), avg(v), min(v), max(v) FROM t + SELECT_ORDERED, // SELECT * FROM t ORDER BY a COLLATE + SELECT_VIA_FUNC, // SELECT (a) FROM t + SELECT_VIA_AGG, // SELECT (v) FROM t + // Feature registration + CREATE_FUNCTION, // conn.create_function(fuzz_name, fuzz_narg, ...) + CREATE_AGGREGATE, // conn.create_aggregate(fuzz_name, fuzz_narg, ...) + SET_AUTHORIZER, // conn.set_authorizer(auth_fn) + CREATE_COLLATION, // conn.create_collation(fuzz_name, ...) + SET_ROW_FACTORY, // conn.row_factory = sqlite3.Row + REGISTER_ADAPTER, // sqlite3.register_adapter(_AdaptMe, ...) + // Blob I/O + BLOBOPEN, // conn.blobopen("main","t","a", rowid) + NUM_TARGETS + }; + + PyRef conn(make_sqlite_conn()); + CHECK(conn); + + // Track the last-registered fuzz-derived names so that SELECT_VIA_FUNC, + // SELECT_VIA_AGG, and SELECT_ORDERED can reference whatever was registered + // by an earlier iteration's CREATE_FUNCTION / CREATE_AGGREGATE / + // CREATE_COLLATION case. + std::string func_name, agg_name, collation_name; + + // Helper: consume a fuzz-derived string from fdp. Only called by cases + // that actually need string data, so cases like CREATE_TABLE_* and + // SET_AUTHORIZER don't waste fuzz bytes. + auto consume_pystr = [&fdp]() -> std::pair { + int str_enc = fdp.ConsumeIntegralInRange(0, 3); + if (fdp.remaining_bytes() == 0) + return {std::string(), PyRef(nullptr)}; + size_t data_len = fdp.ConsumeIntegralInRange( + 1, std::min(fdp.remaining_bytes(), (size_t)10000)); + std::string data = fdp.ConsumeBytesAsString(data_len); + PyRef pystr(fuzz_bytes_to_str(data, str_enc)); + return {std::move(data), std::move(pystr)}; + }; + + // Loop so the fuzzer can chain single-call operations on the same + // connection across iterations (e.g. CREATE_TABLE_TEXT → INSERT_TEXT + // → SET_AUTHORIZER → SELECT_ALL). FDP picks the iteration count (1-6). + int num_iters = fdp.ConsumeIntegralInRange(1, 6); + for (int iter = 0; iter < num_iters && fdp.remaining_bytes() > 0; iter++) { + int target_fn = fdp.ConsumeIntegralInRange(0, NUM_TARGETS - 1); + + switch (target_fn) { + case EXECUTE: { + // conn.execute(fuzz_sql) — arbitrary SQL on the live connection. + auto [data, pystr] = consume_pystr(); + if (!pystr) { PyErr_Clear(); break; } + PyRef r = PyObject_CallMethod(conn, "execute", "O", (PyObject *)pystr); + break; + } + case EXECUTESCRIPT: { + // conn.executescript(fuzz_sql) — multi-statement SQL in autocommit. + auto [data, pystr] = consume_pystr(); + if (!pystr) { PyErr_Clear(); break; } + Py_ssize_t slen = PyUnicode_GET_LENGTH(pystr); + if (slen > 0) { + PyRef r = PyObject_CallMethod(conn, "executescript", "O", + (PyObject *)pystr); + } else { + PyRef def = PyUnicode_FromString("SELECT 1;"); + PyRef r = PyObject_CallMethod(conn, "executescript", "O", + (PyObject *)def); + } + break; + } + case COMPLETE_STMT: { + // sqlite3.complete_statement(fuzz_sql) — checks for trailing ";". + auto [data, pystr] = consume_pystr(); + if (!pystr) { PyErr_Clear(); break; } + Py_ssize_t slen = PyUnicode_GET_LENGTH(pystr); + if (slen > 0) { + PyRef r = PyObject_CallFunction(sqlite3_complete_statement, "O", + (PyObject *)pystr); + } else { + PyRef def = PyUnicode_FromString("SELECT 1;"); + PyRef r = PyObject_CallFunction(sqlite3_complete_statement, "O", + (PyObject *)def); + } + break; + } + case CREATE_TABLE_TEXT: { + // CREATE TABLE t(a TEXT) — single text column. + PyRef r = PyObject_CallMethod(conn, "execute", "s", + "CREATE TABLE t(a TEXT)"); + break; + } + case CREATE_TABLE_BLOB: { + // CREATE TABLE t(a BLOB) — single blob column. + PyRef r = PyObject_CallMethod(conn, "execute", "s", + "CREATE TABLE t(a BLOB)"); + break; + } + case CREATE_TABLE_INT: { + // CREATE TABLE t(v INTEGER) — single integer column. + PyRef r = PyObject_CallMethod(conn, "execute", "s", + "CREATE TABLE t(v INTEGER)"); + break; + } + case CREATE_TABLE_MULTI: { + // CREATE TABLE t(a TEXT, b BLOB) — text + blob columns. + PyRef r = PyObject_CallMethod(conn, "execute", "s", + "CREATE TABLE t(a TEXT, b BLOB)"); + break; + } + case CREATE_TABLE_TEXT_INT: { + // CREATE TABLE t(a TEXT, b INTEGER) — text + integer columns. + PyRef r = PyObject_CallMethod(conn, "execute", "s", + "CREATE TABLE t(a TEXT, b INTEGER)"); + break; + } + case INSERT_TEXT: { + // INSERT INTO t VALUES(?) — bind fuzz text as a parameter. + auto [data, pystr] = consume_pystr(); + if (!pystr) { PyErr_Clear(); break; } + PyRef params = PyTuple_Pack(1, (PyObject *)pystr); + CHECK(params); + PyRef r = PyObject_CallMethod(conn, "execute", "sO", + "INSERT INTO t VALUES(?)", + (PyObject *)params); + break; + } + case INSERT_PARAM: { + // INSERT INTO t VALUES(?,?) — bind fuzz text + fuzz bytes. + auto [data, pystr] = consume_pystr(); + if (!pystr) { PyErr_Clear(); break; } + PyRef pydata = PyBytes_FromStringAndSize(data.data(), data.size()); + CHECK(pydata); + PyRef params = PyTuple_Pack(2, (PyObject *)pystr, (PyObject *)pydata); + CHECK(params); + PyRef r = PyObject_CallMethod(conn, "execute", "sO", + "INSERT INTO t VALUES(?, ?)", + (PyObject *)params); + break; + } + case INSERT_ADAPTED: { + // INSERT INTO t VALUES(?) — bind an _AdaptMe(fuzz_substr) object. + // Requires REGISTER_ADAPTER to have run earlier on this connection + // for the adapter to fire; otherwise sqlite3 raises an error. + auto [data, pystr] = consume_pystr(); + if (!pystr) { PyErr_Clear(); break; } + Py_ssize_t sub_len = fdp.ConsumeIntegralInRange( + 0, PyUnicode_GET_LENGTH(pystr)); + PyRef sub = PyUnicode_Substring(pystr, 0, sub_len); + if (!sub) { PyErr_Clear(); break; } + PyRef obj = PyObject_CallFunction(sqlite3_adapt_cls, "O", + (PyObject *)sub); + if (!obj) { PyErr_Clear(); break; } + PyRef params = PyTuple_Pack(1, (PyObject *)obj); + CHECK(params); + PyRef r = PyObject_CallMethod(conn, "execute", "sO", + "INSERT INTO t VALUES(?)", + (PyObject *)params); + break; + } + case EXECUTEMANY_INT: { + // conn.executemany("INSERT INTO t VALUES(?)", rows) — bulk insert + // of integer tuples built from fuzz bytes. + auto [data, pystr] = consume_pystr(); + PyRef rows = PyList_New(0); + CHECK(rows); + size_t limit = fdp.ConsumeIntegralInRange( + 0, std::min(data.size(), (size_t)10000)); + for (size_t i = 0; i < limit; i++) { + PyRef val = PyLong_FromLong((unsigned char)data[i]); + PyRef tup = PyTuple_Pack(1, (PyObject *)val); + if (tup) PyList_Append(rows, tup); + } + PyRef r = PyObject_CallMethod(conn, "executemany", "sO", + "INSERT INTO t VALUES(?)", + (PyObject *)rows); + break; + } + case SELECT_ALL: { + // SELECT * FROM t — fetch all rows. + PyRef cur = PyObject_CallMethod(conn, "execute", "s", + "SELECT * FROM t"); + if (cur) { + PyRef rows = PyObject_CallMethod(cur, "fetchall", NULL); + } + break; + } + case SELECT_LIKE: { + // SELECT * FROM t WHERE a LIKE ? — parameterized LIKE query. + auto [data, pystr] = consume_pystr(); + if (!pystr) { PyErr_Clear(); break; } + Py_ssize_t sub_len = fdp.ConsumeIntegralInRange( + 0, PyUnicode_GET_LENGTH(pystr)); + PyRef sub = PyUnicode_Substring(pystr, 0, sub_len); + if (!sub) { PyErr_Clear(); break; } + PyRef params = PyTuple_Pack(1, (PyObject *)sub); + CHECK(params); + PyRef r = PyObject_CallMethod(conn, "execute", "sO", + "SELECT * FROM t WHERE a LIKE ?", + (PyObject *)params); + break; + } + case SELECT_AGGREGATES: { + // SELECT count(*), sum(v), avg(v), min(v), max(v) FROM t. + PyRef cur = PyObject_CallMethod(conn, "execute", "s", + "SELECT count(*), sum(v), avg(v), min(v), max(v) FROM t"); + if (cur) { + PyRef row = PyObject_CallMethod(cur, "fetchone", NULL); + } + break; + } + case SELECT_ORDERED: { + // SELECT * FROM t ORDER BY a COLLATE . + // Uses the name from the last CREATE_COLLATION iteration. + if (collation_name.empty()) break; + std::string sql = "SELECT * FROM t ORDER BY a COLLATE \"" + + collation_name + "\""; + PyRef cur = PyObject_CallMethod(conn, "execute", "s", sql.c_str()); + if (cur) { + PyRef rows = PyObject_CallMethod(cur, "fetchall", NULL); + } + break; + } + case SELECT_VIA_FUNC: { + // SELECT (a) FROM t — triggers scalar function callback. + // Uses the name from the last CREATE_FUNCTION iteration. + if (func_name.empty()) break; + std::string sql = "SELECT \"" + func_name + "\"(a) FROM t"; + PyRef cur = PyObject_CallMethod(conn, "execute", "s", sql.c_str()); + if (cur) { + PyRef rows = PyObject_CallMethod(cur, "fetchall", NULL); + } + break; + } + case SELECT_VIA_AGG: { + // SELECT (v) FROM t — triggers step()/finalize() callbacks. + // Uses the name from the last CREATE_AGGREGATE iteration. + if (agg_name.empty()) break; + std::string sql = "SELECT \"" + agg_name + "\"(v) FROM t"; + PyRef cur = PyObject_CallMethod(conn, "execute", "s", sql.c_str()); + if (cur) { + PyRef row = PyObject_CallMethod(cur, "fetchone", NULL); + } + break; + } + case CREATE_FUNCTION: { + // conn.create_function(name, narg, identity_fn) — register scalar fn + // with a fuzz-derived name and argument count. Exercises _sqlite3 + // create_function name handling and narg validation. + auto [data, pystr] = consume_pystr(); + if (!pystr) { PyErr_Clear(); break; } + const char *name = PyUnicode_AsUTF8(pystr); + if (!name) { PyErr_Clear(); break; } + int narg = fdp.ConsumeIntegralInRange(-1, 8); + PyRef r = PyObject_CallMethod(conn, "create_function", "siO", + name, narg, sqlite3_identity_fn); + if (!r) { PyErr_Clear(); break; } + func_name.assign(name); + break; + } + case CREATE_AGGREGATE: { + // conn.create_aggregate(name, narg, _Agg) — register aggregate fn + // with a fuzz-derived name and argument count. Exercises _sqlite3 + // create_aggregate name handling and narg validation. + auto [data, pystr] = consume_pystr(); + if (!pystr) { PyErr_Clear(); break; } + const char *name = PyUnicode_AsUTF8(pystr); + if (!name) { PyErr_Clear(); break; } + int narg = fdp.ConsumeIntegralInRange(-1, 8); + PyRef r = PyObject_CallMethod(conn, "create_aggregate", "siO", + name, narg, sqlite3_Aggregate_cls); + if (!r) { PyErr_Clear(); break; } + agg_name.assign(name); + break; + } + case SET_AUTHORIZER: { + // conn.set_authorizer(auth_fn) — install authorizer callback. + // All subsequent SQL on this connection goes through the authorizer. + PyRef r = PyObject_CallMethod(conn, "set_authorizer", "O", + sqlite3_auth_fn); + break; + } + case CREATE_COLLATION: { + // conn.create_collation(name, collation_fn) — register collation + // with a fuzz-derived name. Exercises _sqlite3 create_collation + // name handling. + auto [data, pystr] = consume_pystr(); + if (!pystr) { PyErr_Clear(); break; } + const char *name = PyUnicode_AsUTF8(pystr); + if (!name) { PyErr_Clear(); break; } + PyRef r = PyObject_CallMethod(conn, "create_collation", "sO", + name, sqlite3_collation_fn); + if (!r) { PyErr_Clear(); break; } + collation_name.assign(name); + break; + } + case SET_ROW_FACTORY: { + // conn.row_factory = sqlite3.Row — switch to Row-based fetch. + // Subsequent SELECT results on this connection return sqlite3.Row + // objects instead of plain tuples. + PyObject_SetAttrString(conn, "row_factory", sqlite3_Row); + break; + } + case REGISTER_ADAPTER: { + // sqlite3.register_adapter(_AdaptMe, adapter_fn) — global registration. + PyRef reg = PyObject_CallFunction(sqlite3_register_adapter, "OO", + sqlite3_adapt_cls, + sqlite3_adapter_fn); + break; + } + case BLOBOPEN: { + // conn.blobopen("main","t","a", rowid) — open incremental I/O handle. + // Requires a table "t" with a BLOB column "a" and at least one row. + // Reads the first rowid, opens the blob, then does one read or write + // (fdp-chosen) before closing. + PyRef cur = PyObject_CallMethod(conn, "execute", "s", + "SELECT rowid FROM t LIMIT 1"); + if (!cur) { PyErr_Clear(); break; } + PyRef row = PyObject_CallMethod(cur, "fetchone", NULL); + if (!row || row.p == Py_None) { PyErr_Clear(); break; } + PyRef rid = PySequence_GetItem(row, 0); + CHECK(rid); + PyRef blob = PyObject_CallMethod(conn, "blobopen", "sssO", + "main", "t", "a", (PyObject *)rid); + if (!blob) { PyErr_Clear(); break; } + if (fdp.ConsumeBool()) { + // Read the blob content. + PyRef rd = PyObject_CallMethod(blob, "read", NULL); + } else { + // Write fuzz-derived bytes into the blob. + auto [data, pystr] = consume_pystr(); + size_t wr_len = fdp.ConsumeIntegralInRange( + 0, std::min(data.size(), (size_t)10000)); + PyRef wr_data = PyBytes_FromStringAndSize(data.data(), wr_len); + if (wr_data) { + PyRef wr = PyObject_CallMethod(blob, "write", "O", + (PyObject *)wr_data); + } + } + { + PyRef cl = PyObject_CallMethod(blob, "close", NULL); + } + break; + } + } + if (PyErr_Occurred()) PyErr_Clear(); + } // end loop + + PyRef cl = PyObject_CallMethod(conn, "close", NULL); + if (PyErr_Occurred()) PyErr_Clear(); +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + assert(Py_IsInitialized()); + init_sqlite3(); + if (size < 1 || size > 0x10000) return 0; + if (PyErr_Occurred()) PyErr_Clear(); + + FuzzedDataProvider fdp(data, size); + op_sqlite3(fdp); + return 0; +} diff --git a/module-fuzzers/fuzz_ssl.cpp b/module-fuzzers/fuzz_ssl.cpp new file mode 100644 index 0000000..a6e4efb --- /dev/null +++ b/module-fuzzers/fuzz_ssl.cpp @@ -0,0 +1,84 @@ +// fuzz_ssl.cpp — Fuzzer for CPython's _ssl C extension module. +// +// This fuzzer exercises the following CPython C extension module via +// its Python API, called through the Python C API from C++: +// +// _ssl — ssl.DER_cert_to_PEM_cert(), then optionally +// SSLContext(PROTOCOL_TLS_CLIENT).load_verify_locations() +// +// Exercises the OpenSSL certificate parsing path in the _ssl C module. +// +// All module functions are imported once during init and cached as static +// PyObject* pointers. PyRef (RAII) prevents reference leaks. +// Max input size: 1 MB. + +#include "fuzz_helpers.h" + +static PyObject *ssl_DER_cert_to_PEM_cert, *ssl_SSLContext; +static long ssl_PROTOCOL_TLS_CLIENT_val; + +static int initialized = 0; + +static void init_ssl(void) { + if (initialized) return; + + ssl_DER_cert_to_PEM_cert = import_attr("ssl", "DER_cert_to_PEM_cert"); + ssl_SSLContext = import_attr("ssl", "SSLContext"); + { + PyObject *v = import_attr("ssl", "PROTOCOL_TLS_CLIENT"); + ssl_PROTOCOL_TLS_CLIENT_val = PyLong_AsLong(v); + Py_DECREF(v); + } + + assert(!PyErr_Occurred()); + initialized = 1; +} + +// op_ssl_cert: Call ssl.DER_cert_to_PEM_cert(data) to attempt DER-to-PEM +// certificate conversion. If successful, create an SSLContext with +// PROTOCOL_TLS_CLIENT and call .load_verify_locations(cadata=pem_string) +// to exercise the OpenSSL certificate parsing path in the _ssl C module. +static void op_ssl_cert(FuzzedDataProvider &fdp) { + std::string data = fdp.ConsumeRemainingBytesAsString(); + PyRef pydata = PyBytes_FromStringAndSize(Y(data)); + CHECK(pydata); + PyRef pem = PyObject_CallFunction(ssl_DER_cert_to_PEM_cert, "O", + (PyObject *)pydata); + if (!pem) { + PyErr_Clear(); + return; + } + + // Optionally try to load into SSLContext. + PyRef ctx = PyObject_CallFunction(ssl_SSLContext, "l", + ssl_PROTOCOL_TLS_CLIENT_val); + if (!ctx) { + PyErr_Clear(); + return; + } + + PyRef kwargs = PyDict_New(); + CHECK(kwargs); + PyDict_SetItemString(kwargs, "cadata", pem); + PyRef empty_args = PyTuple_New(0); + CHECK(empty_args); + PyRef method = PyObject_GetAttrString(ctx, "load_verify_locations"); + if (!method) { + PyErr_Clear(); + return; + } + PyRef r = PyObject_Call(method, empty_args, kwargs); + if (PyErr_Occurred()) PyErr_Clear(); +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + assert(Py_IsInitialized()); + init_ssl(); + if (size < 1 || size > kMaxInputSize) return 0; + if (PyErr_Occurred()) PyErr_Clear(); + + FuzzedDataProvider fdp(data, size); + op_ssl_cert(fdp); + + return 0; +} diff --git a/module-fuzzers/fuzz_time.cpp b/module-fuzzers/fuzz_time.cpp new file mode 100644 index 0000000..d42f936 --- /dev/null +++ b/module-fuzzers/fuzz_time.cpp @@ -0,0 +1,97 @@ +// fuzz_time.cpp — Fuzzer for CPython's time C extension module. +// +// This fuzzer exercises the following CPython C extension module via +// its Python API, called through the Python C API from C++: +// +// time — strftime with fuzz format, strptime with fuzz input, +// strptime with fuzz format +// +// All module functions are imported once during init and cached as static +// PyObject* pointers. PyRef (RAII) prevents reference leaks. +// Max input size: 64 KB. + +#include "fuzz_helpers.h" + +static PyObject *time_strftime, *time_strptime, *time_localtime; + +static int initialized = 0; + +static void init_time(void) { + if (initialized) return; + + time_strftime = import_attr("time", "strftime"); + time_strptime = import_attr("time", "strptime"); + time_localtime = import_attr("time", "localtime"); + assert(!PyErr_Occurred()); + initialized = 1; +} + +// op_time: FDP selects variant — strftime with fuzz format, strptime with +// fuzz input, or strptime with fuzz format. Exercises the time C module. +static void op_time(FuzzedDataProvider &fdp) { + int str_enc = fdp.ConsumeIntegralInRange(0, 3); + enum { STRFTIME, STRPTIME_INPUT, STRPTIME_FORMAT, NUM_TARGETS }; + int target_fn = fdp.ConsumeIntegralInRange(0, NUM_TARGETS - 1); + if (fdp.remaining_bytes() == 0) return; + std::string data = fdp.ConsumeRemainingBytesAsString(); + PyRef pystr(fuzz_bytes_to_str(data, str_enc)); + CHECK(pystr); + + switch (target_fn) { + case STRFTIME: { + // time.strftime(str, time.localtime()) + PyRef lt = PyObject_CallFunction(time_localtime, NULL); + CHECK(lt); + // Use non-empty format. + Py_ssize_t slen = PyUnicode_GET_LENGTH(pystr); + PyObject *fmt = slen > 0 ? (PyObject *)pystr : NULL; + if (!fmt) { + PyRef def_fmt = PyUnicode_FromString("%Y"); + CHECK(def_fmt); + PyRef r = PyObject_CallFunction(time_strftime, "OO", + (PyObject *)def_fmt, (PyObject *)lt); + } else { + PyRef r = PyObject_CallFunction(time_strftime, "OO", + fmt, (PyObject *)lt); + } + break; + } + case STRPTIME_INPUT: { + // time.strptime(str, '%Y-%m-%d %H:%M:%S') + PyRef r = PyObject_CallFunction(time_strptime, "Os", + (PyObject *)pystr, + "%Y-%m-%d %H:%M:%S"); + break; + } + case STRPTIME_FORMAT: { + // time.strptime('2024-01-15 12:30:00', str) + // Use non-empty format. + Py_ssize_t slen = PyUnicode_GET_LENGTH(pystr); + PyObject *fmt = slen > 0 ? (PyObject *)pystr : NULL; + if (!fmt) { + PyRef def_fmt = PyUnicode_FromString("%Y-%m-%d %H:%M:%S"); + CHECK(def_fmt); + PyRef r = PyObject_CallFunction(time_strptime, "sO", + "2024-01-15 12:30:00", + (PyObject *)def_fmt); + } else { + PyRef r = PyObject_CallFunction(time_strptime, "sO", + "2024-01-15 12:30:00", fmt); + } + break; + } + } + if (PyErr_Occurred()) PyErr_Clear(); +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + assert(Py_IsInitialized()); + init_time(); + if (size < 1 || size > 0x10000) return 0; + if (PyErr_Occurred()) PyErr_Clear(); + + FuzzedDataProvider fdp(data, size); + op_time(fdp); + + return 0; +} diff --git a/module-fuzzers/fuzz_unicodedata.cpp b/module-fuzzers/fuzz_unicodedata.cpp new file mode 100644 index 0000000..5c2eb84 --- /dev/null +++ b/module-fuzzers/fuzz_unicodedata.cpp @@ -0,0 +1,155 @@ +// fuzz_unicodedata.cpp — Fuzzer for CPython's unicodedata C extension module. +// +// This fuzzer exercises the following CPython C extension module via +// its Python API, called through the Python C API from C++: +// +// unicodedata — category, bidirectional, numeric, decimal, +// combining, east_asian_width, mirrored, name, +// decomposition, normalize, is_normalized, lookup, +// ucd_3_2_0.normalize +// +// The first two bytes of fuzz input select string encoding and target +// function (one of 13). Remaining bytes become the input string. +// Each target makes a single call. +// +// All module functions are imported once during init and cached as static +// PyObject* pointers. PyRef (RAII) prevents reference leaks. +// Max input size: 64 KB. + +#include "fuzz_helpers.h" + +// unicodedata +static PyObject *ud_category, *ud_bidirectional, *ud_normalize, *ud_numeric; +static PyObject *ud_lookup, *ud_name, *ud_decomposition, *ud_is_normalized; +static PyObject *ud_east_asian_width, *ud_mirrored, *ud_decimal, *ud_combining; +static PyObject *ud_ucd_3_2_0; + +static int initialized = 0; + +static void init_unicodedata(void) { + if (initialized) return; + + // unicodedata + ud_category = import_attr("unicodedata", "category"); + ud_bidirectional = import_attr("unicodedata", "bidirectional"); + ud_normalize = import_attr("unicodedata", "normalize"); + ud_numeric = import_attr("unicodedata", "numeric"); + ud_lookup = import_attr("unicodedata", "lookup"); + ud_name = import_attr("unicodedata", "name"); + ud_decomposition = import_attr("unicodedata", "decomposition"); + ud_is_normalized = import_attr("unicodedata", "is_normalized"); + ud_east_asian_width = import_attr("unicodedata", "east_asian_width"); + ud_mirrored = import_attr("unicodedata", "mirrored"); + ud_decimal = import_attr("unicodedata", "decimal"); + ud_combining = import_attr("unicodedata", "combining"); + ud_ucd_3_2_0 = import_attr("unicodedata", "ucd_3_2_0"); + assert(!PyErr_Occurred()); + initialized = 1; +} + +// op_unicodedata: the fuzzer selects one of 13 targets — 9 single-character +// functions (category, bidirectional, numeric, decimal, combining, +// east_asian_width, mirrored, name, decomposition) or 4 whole-string +// functions (normalize, is_normalized, ucd_3_2_0.normalize, lookup). +// Each target makes a single call. Exercises the unicodedata C module's +// character-info, normalization, and name-lookup code paths. +static void op_unicodedata(FuzzedDataProvider &fdp) { + int str_enc = fdp.ConsumeIntegralInRange(0, 3); + enum { + CATEGORY, BIDIRECTIONAL, NUMERIC, DECIMAL, COMBINING, + EAST_ASIAN_WIDTH, MIRRORED, NAME, DECOMPOSITION, + NORMALIZE, IS_NORMALIZED, UCD_NORMALIZE, LOOKUP, + NUM_TARGETS + }; + int target_fn = fdp.ConsumeIntegralInRange(0, NUM_TARGETS - 1); + if (fdp.remaining_bytes() == 0) return; + std::string data = fdp.ConsumeRemainingBytesAsString(); + PyRef pystr(fuzz_bytes_to_str(data, str_enc)); + CHECK(pystr); + + static const char *kForms[] = {"NFC", "NFD", "NFKC", "NFKD"}; + + switch (target_fn) { + case CATEGORY: { + PyRef r = PyObject_CallFunction(ud_category, "O", (PyObject *)pystr); + break; + } + case BIDIRECTIONAL: { + PyRef r = PyObject_CallFunction(ud_bidirectional, "O", (PyObject *)pystr); + break; + } + case NUMERIC: { + PyRef dflt = PyLong_FromLong(fdp.ConsumeIntegral()); + CHECK(dflt); + PyRef r = PyObject_CallFunction(ud_numeric, "OO", + (PyObject *)pystr, (PyObject *)dflt); + break; + } + case DECIMAL: { + PyRef dflt = PyLong_FromLong(fdp.ConsumeIntegral()); + CHECK(dflt); + PyRef r = PyObject_CallFunction(ud_decimal, "OO", + (PyObject *)pystr, (PyObject *)dflt); + break; + } + case COMBINING: { + PyRef r = PyObject_CallFunction(ud_combining, "O", (PyObject *)pystr); + break; + } + case EAST_ASIAN_WIDTH: { + PyRef r = PyObject_CallFunction(ud_east_asian_width, "O", + (PyObject *)pystr); + break; + } + case MIRRORED: { + PyRef r = PyObject_CallFunction(ud_mirrored, "O", (PyObject *)pystr); + break; + } + case NAME: { + PyRef empty_str = PyUnicode_FromString(""); + CHECK(empty_str); + PyRef r = PyObject_CallFunction(ud_name, "OO", + (PyObject *)pystr, (PyObject *)empty_str); + break; + } + case DECOMPOSITION: { + PyRef r = PyObject_CallFunction(ud_decomposition, "O", + (PyObject *)pystr); + break; + } + case NORMALIZE: { + const char *form = kForms[str_enc & 3]; + PyRef r = PyObject_CallFunction(ud_normalize, "sO", + form, (PyObject *)pystr); + break; + } + case IS_NORMALIZED: { + const char *form = kForms[str_enc & 3]; + PyRef r = PyObject_CallFunction(ud_is_normalized, "sO", + form, (PyObject *)pystr); + break; + } + case UCD_NORMALIZE: { + PyRef r = PyObject_CallMethod(ud_ucd_3_2_0, "normalize", "sO", + "NFC", (PyObject *)pystr); + break; + } + case LOOKUP: { + PyRef r = PyObject_CallFunction(ud_lookup, "O", (PyObject *)pystr); + break; + } + } + if (PyErr_Occurred()) PyErr_Clear(); +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + assert(Py_IsInitialized()); + init_unicodedata(); + if (size < 1 || size > 0x10000) return 0; + if (PyErr_Occurred()) PyErr_Clear(); + + FuzzedDataProvider fdp(data, size); + op_unicodedata(fdp); + + return 0; +}