diff --git a/.gitmodules b/.gitmodules index 05927d49d1..b3b0cf963c 100644 --- a/.gitmodules +++ b/.gitmodules @@ -10,3 +10,9 @@ [submodule "others/mbedtls"] path = others/mbedtls url = https://github.com/Mbed-TLS/mbedtls.git +[submodule "others/simdjson"] + path = others/simdjson + url = https://github.com/simdjson/simdjson.git +[submodule "others/jsoncons"] + path = others/jsoncons + url = https://github.com/danielaparker/jsoncons.git diff --git a/README.md b/README.md index 50b8833319..5676007f46 100644 --- a/README.md +++ b/README.md @@ -253,6 +253,16 @@ It is recommended to use assertions where applicable, and to enable them with The source tree includes a Benchmark tool that can help measure library performance. The tool is located in the `test/benchmark/` directory. The build process also creates the binary here, so you will have the tool after the compilation is finished. +Detailed benchmark test documentation is available in: +* `docs/benchmark-tests.en.md` +* `docs/benchmark-tests.de.md` + +To compare the JSON regression suite across both supported JSON backends with separate build directories, run: + +```shell +$ ./test/run-json-backend-matrix.sh --jobs 4 +``` + To run, just type: ```shell @@ -319,10 +329,12 @@ The tool is a straightforward wrapper application that utilizes the library. It Each transaction is an HTTP/1.1 GET request with some GET parameters. Common headers are added, followed by the response headers and an XML body. Between phases, the tool checks whether an intervention has occurred. All transactions are created with the same data. -Note that the tool does not call the last phase (logging). +Note that the tool calls the logging phase (`processLogging()`) for each transaction. Please remember to reset `basic_rules.conf` if you want to try with a different ruleset. +The benchmark directory also includes `json_benchmark`, which targets JSON request-body processing with fixed scenario classes such as large objects, deep nesting, numeric payloads and UTF-8-heavy strings. + ## Reporting Issues If you are facing a configuration issue or something is not working as you diff --git a/build/msc_find_lib.m4 b/build/msc_find_lib.m4 index 77fdb38d38..dc800a9839 100644 --- a/build/msc_find_lib.m4 +++ b/build/msc_find_lib.m4 @@ -10,11 +10,11 @@ dnl Sets and AC_SUBSTs: dnl ${NAME}_CFLAGS, ${NAME}_LDADD, ${NAME}_LDFLAGS, dnl ${NAME}_VERSION, ${NAME}_DISPLAY, ${NAME}_FOUND (0/1/2) dnl -dnl NAME - Variable prefix (e.g., YAJL, CURL, LIBXML2) +dnl NAME - Variable prefix (e.g., CURL, LIBXML2, LMDB) dnl PKG_NAMES - Space-separated pkg-config names to try -dnl HEADER - Header file to look for (e.g., yajl/yajl_parse.h) +dnl HEADER - Header file to look for (e.g., libxml/parser.h) dnl LIB_NAMES - Space-separated library names for -l flags -dnl EXTRA_CFLAGS - Additional CFLAGS when found (e.g., -DWITH_YAJL) +dnl EXTRA_CFLAGS - Additional CFLAGS when found (e.g., -DWITH_LIBXML2) dnl MIN_VERSION - Optional minimum version for pkg-config check dnl WITH_NAME - Optional --with-X name if different from lowercased NAME @@ -208,7 +208,7 @@ if test "${_msc_header_dir}" = "."; then _msc_check_inc_path="$4" fi else - # Header with subdirectory (e.g., "yajl/yajl_parse.h") + # Header with subdirectory (e.g., "libxml/parser.h") if test -e "$4/include/$2"; then _msc_check_inc_path="$4/include" elif test -e "$4/$2"; then diff --git a/build/release.sh b/build/release.sh old mode 100755 new mode 100644 diff --git a/build/win32/CMakeLists.txt b/build/win32/CMakeLists.txt index fbf39f08d9..dc6cdeec5c 100644 --- a/build/win32/CMakeLists.txt +++ b/build/win32/CMakeLists.txt @@ -7,6 +7,8 @@ option(WITH_LUA "Include LUA support" ON) option(WITH_LIBXML2 "Include LibXML2 support" ON) option(WITH_MAXMIND "Include MaxMind support" ON) option(WITH_CURL "Include CURL support" ON) +set(JSON_BACKEND "simdjson" CACHE STRING "Select internal JSON backend (simdjson or jsoncons)") +set_property(CACHE JSON_BACKEND PROPERTY STRINGS simdjson jsoncons) option(USE_ASAN "Build with Address Sanitizer" OFF) @@ -51,6 +53,8 @@ target_compile_definitions(libinjection PRIVATE LIBINJECTION_VERSION="${LIBINJEC project(mbedcrypto C) set(MBEDTLS_DIR ${BASE_DIR}/others/mbedtls) +set(SIMDJSON_DIR ${BASE_DIR}/others/simdjson/singleheader) +set(JSONCONS_DIR ${BASE_DIR}/others/jsoncons/include) add_library(mbedcrypto STATIC ${MBEDTLS_DIR}/library/base64.c ${MBEDTLS_DIR}/library/sha1.c ${MBEDTLS_DIR}/library/md5.c ${MBEDTLS_DIR}/library/platform_util.c ${MBEDTLS_DIR}/library/constant_time.c) @@ -87,7 +91,25 @@ set(PACKAGE_VERSION "${PROJECT_VERSION}") set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}") set(PACKAGE_TARNAME "${PACKAGE_NAME}") -set(HAVE_YAJL 1) # should always be one, mandatory dependency +if(NOT JSON_BACKEND STREQUAL "simdjson" AND NOT JSON_BACKEND STREQUAL "jsoncons") + message(FATAL_ERROR "Unsupported JSON_BACKEND '${JSON_BACKEND}'. Use simdjson or jsoncons.") +endif() + +unset(MSC_JSON_BACKEND_SIMDJSON) +unset(MSC_JSON_BACKEND_JSONCONS) +set(JSON_BACKEND_SIMDJSON_SOURCE ${BASE_DIR}/src/request_body_processor/json_backend_simdjson.cc) +set(JSON_BACKEND_JSONCONS_SOURCE ${BASE_DIR}/src/request_body_processor/json_backend_jsoncons.cc) + +if(JSON_BACKEND STREQUAL "simdjson") + set(MSC_JSON_BACKEND_SIMDJSON 1) + set(JSON_BACKEND_SOURCES ${JSON_BACKEND_SIMDJSON_SOURCE} ${SIMDJSON_DIR}/simdjson.cpp) + set(JSON_BACKEND_INCLUDE_DIR ${SIMDJSON_DIR}) +else() + set(MSC_JSON_BACKEND_JSONCONS 1) + set(JSON_BACKEND_SOURCES ${JSON_BACKEND_JSONCONS_SOURCE}) + set(JSON_BACKEND_INCLUDE_DIR ${JSONCONS_DIR}) +endif() + set(HAVE_GEOIP 0) # should always be zero, no conan package available set(HAVE_SSDEEP 0) # should always be zero, no conan package available @@ -119,7 +141,6 @@ macro(include_package package flag) endif() endmacro() -include_package(yajl HAVE_YAJL) include_package(libxml2 HAVE_LIBXML2) include_package(lua HAVE_LUA) include_package(CURL HAVE_CURL) @@ -133,11 +154,13 @@ include_package(maxminddb HAVE_MAXMIND) set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) file(GLOB_RECURSE libModSecuritySources ${BASE_DIR}/src/*.cc) +list(REMOVE_ITEM libModSecuritySources ${JSON_BACKEND_SIMDJSON_SOURCE} ${JSON_BACKEND_JSONCONS_SOURCE}) +list(APPEND libModSecuritySources ${JSON_BACKEND_SOURCES}) add_library(libModSecurity SHARED ${libModSecuritySources}) target_compile_definitions(libModSecurity PRIVATE WITH_PCRE2) -target_include_directories(libModSecurity PRIVATE ${BASE_DIR} ${BASE_DIR}/headers ${BASE_DIR}/others ${MBEDTLS_DIR}/include) +target_include_directories(libModSecurity PRIVATE ${BASE_DIR} ${BASE_DIR}/headers ${BASE_DIR}/others ${MBEDTLS_DIR}/include ${JSON_BACKEND_INCLUDE_DIR}) target_link_libraries(libModSecurity PRIVATE pcre2::pcre2 libinjection mbedcrypto Poco::Poco Iphlpapi.lib) macro(add_package_dependency project compile_definition link_library flag) @@ -147,7 +170,6 @@ macro(add_package_dependency project compile_definition link_library flag) endif() endmacro() -add_package_dependency(libModSecurity WITH_YAJL yajl::yajl HAVE_YAJL) add_package_dependency(libModSecurity WITH_LIBXML2 LibXml2::LibXml2 HAVE_LIBXML2) add_package_dependency(libModSecurity WITH_LUA lua::lua HAVE_LUA) if(HAVE_LUA) @@ -164,17 +186,28 @@ project(libModSecurityTests) function(setTestTargetProperties executable) target_compile_definitions(${executable} PRIVATE WITH_PCRE2) - target_include_directories(${executable} PRIVATE ${BASE_DIR} ${BASE_DIR}/headers) + target_include_directories(${executable} PRIVATE ${BASE_DIR} ${BASE_DIR}/headers ${JSONCONS_DIR}) target_link_libraries(${executable} PRIVATE libModSecurity pcre2::pcre2 dirent::dirent) - add_package_dependency(${executable} WITH_YAJL yajl::yajl HAVE_YAJL) endfunction() # unit tests file(GLOB unitTestSources ${BASE_DIR}/test/unit/*.cc) + +list(REMOVE_ITEM unitTestSources + ${BASE_DIR}/test/unit/json_backend_depth_tests.cc +) + add_executable(unit_tests ${unitTestSources} ${BASE_DIR}/test/common/custom_debug_log.cc) setTestTargetProperties(unit_tests) target_compile_options(unit_tests PRIVATE /wd4805) +add_executable(json_backend_depth_tests + ${BASE_DIR}/test/unit/json_backend_depth_tests.cc + ${BASE_DIR}/test/common/custom_debug_log.cc +) +setTestTargetProperties(json_backend_depth_tests) +target_compile_options(json_backend_depth_tests PRIVATE /wd4805) + # regression tests file(GLOB regressionTestsSources ${BASE_DIR}/test/regression/*.cc) add_executable(regression_tests ${regressionTestsSources} ${BASE_DIR}/test/common/custom_debug_log.cc) @@ -193,6 +226,12 @@ add_regression_test_capability(WITH_MAXMIND HAVE_MAXMIND) enable_testing() +add_test( + NAME json_backend_depth_tests + COMMAND json_backend_depth_tests + WORKING_DIRECTORY ${BASE_DIR}/test +) + file(READ ${BASE_DIR}/test/test-suite.in TEST_FILES_RAW) string(REPLACE "\n" ";" TEST_FILES ${TEST_FILES_RAW}) @@ -212,15 +251,25 @@ foreach(TEST_FILE ${TEST_FILES}) # test name get_filename_component(TEST_NAME ${TEST_FILE} NAME_WE) + # json_backend_depth_tests is built as a standalone executable, + # so skip automatic registration through unit_tests + if(TEST_NAME STREQUAL "json_backend_depth_tests") + continue() + endif() + # determine test runner based on test path prefix string(FIND ${TEST_FILE} "test-cases/regression/" is_regression_test) if(is_regression_test EQUAL 0) - set(TEST_RUNNER "regression_tests") + set(TEST_RUNNER "regression_tests") else() - set(TEST_RUNNER "unit_tests") + set(TEST_RUNNER "unit_tests") endif() - add_test(NAME ${TEST_NAME} COMMAND ${TEST_RUNNER} ${TEST_FILE} WORKING_DIRECTORY ${BASE_DIR}/test) + add_test( + NAME ${TEST_NAME} + COMMAND ${TEST_RUNNER} ${TEST_FILE} + WORKING_DIRECTORY ${BASE_DIR}/test + ) endif() endforeach() @@ -232,7 +281,6 @@ setTestTargetProperties(benchmark) add_executable(rules_optimization ${BASE_DIR}/test/optimization/optimization.cc) setTestTargetProperties(rules_optimization) - # examples # diff --git a/build/win32/conanfile.txt b/build/win32/conanfile.txt index b8f9721d0a..0eddc175e4 100644 --- a/build/win32/conanfile.txt +++ b/build/win32/conanfile.txt @@ -1,5 +1,4 @@ [requires] -yajl/2.1.0 pcre2/10.42 libxml2/2.12.6 lua/5.4.6 diff --git a/build/win32/config.h.cmake b/build/win32/config.h.cmake index 2f6a73085e..d56ce7c56f 100644 --- a/build/win32/config.h.cmake +++ b/build/win32/config.h.cmake @@ -57,12 +57,15 @@ /* Define if SSDEEP is available */ #cmakedefine HAVE_SSDEEP -/* Define if YAJL is available */ -#cmakedefine HAVE_YAJL - /* Define if libcurl is available */ #cmakedefine HAVE_CURL +/* Define if jsoncons is the selected internal JSON backend */ +#cmakedefine MSC_JSON_BACKEND_JSONCONS + +/* Define if simdjson is the selected internal JSON backend */ +#cmakedefine MSC_JSON_BACKEND_SIMDJSON + /* Name of package */ #define PACKAGE "@PACKAGE_NAME@" @@ -89,4 +92,4 @@ #cmakedefine STDC_HEADERS #endif -#endif // ndef MODSECURITY_CONFIG_H \ No newline at end of file +#endif // ndef MODSECURITY_CONFIG_H diff --git a/build/yajl.m4 b/build/yajl.m4 deleted file mode 100644 index 06271e1fea..0000000000 --- a/build/yajl.m4 +++ /dev/null @@ -1,33 +0,0 @@ -dnl Check for YAJL Libraries -dnl Sets: -dnl YAJL_CFLAGS -dnl YAJL_LDADD -dnl YAJL_LDFLAGS -dnl YAJL_VERSION -dnl YAJL_DISPLAY -dnl YAJL_FOUND - -AC_DEFUN([PROG_YAJL], [ -MSC_CHECK_LIB([YAJL], [yajl2 yajl], [yajl/yajl_parse.h], [yajl], [-DWITH_YAJL]) - -# FIX: if the include directory in CFLAGS ends with "include/yajl", -# remove the suffix "/yajl". The library header files are included -# using the prefix (e.g., #include ), and -# this is even the case for the library itself (e.g., -# yajl_tree.h includes yajl/yajl_common.h). -_msc_yajl_new_cflags="" -for _msc_yajl_flag in $YAJL_CFLAGS; do - case "$_msc_yajl_flag" in - -I*/include/yajl) - _msc_yajl_new_flag="${_msc_yajl_flag%/yajl}" - _msc_yajl_new_cflags="$_msc_yajl_new_cflags $_msc_yajl_new_flag" - ;; - *) - _msc_yajl_new_cflags="$_msc_yajl_new_cflags $_msc_yajl_flag" - ;; - esac -done -YAJL_CFLAGS="$_msc_yajl_new_cflags" -YAJL_DISPLAY="${YAJL_LDADD}, ${YAJL_CFLAGS}" - -]) # AC_DEFUN [PROG_YAJL] diff --git a/configure.ac b/configure.ac index 03295be063..679d7284e6 100644 --- a/configure.ac +++ b/configure.ac @@ -27,7 +27,7 @@ m4_define([msc_version_with_patchlevel], [msc_version_major.msc_version_minor.msc_version_patchlevel]) m4_define([msc_version_git], - [m4_esyscmd_s(git describe)]) + [m4_esyscmd_s(git describe --tags --always 2>/dev/null || echo unknown)]) m4_define([msc_version_info], [msc_version_c_plus_a:msc_version_patchlevel:msc_version_minor]) @@ -62,6 +62,23 @@ PKG_PROG_PKG_CONFIG # Set C++ standard version and check if compiler supports it. AX_CXX_COMPILE_STDCXX(17, noext, mandatory) +AC_ARG_WITH([json-backend], + [AS_HELP_STRING([--with-json-backend=BACKEND], + [Select internal JSON backend: simdjson or jsoncons [default=simdjson]])], + [json_backend="$withval"], + [json_backend="simdjson"]) + +case "$json_backend" in + simdjson|jsoncons) + ;; + *) + AC_MSG_ERROR([Unsupported JSON backend '$json_backend'. Use simdjson or jsoncons.]) + ;; +esac + +JSON_BACKEND="$json_backend" +AC_SUBST([JSON_BACKEND]) + # Check for libinjection if ! test -f "${srcdir}/others/libinjection/src/libinjection_html5.c"; then AC_MSG_ERROR([\ @@ -80,7 +97,7 @@ AC_MSG_ERROR([\ ]) fi # Libinjection version -AC_DEFUN([LIBINJECTION_VERSION], m4_esyscmd_s(cd "others/libinjection" && git describe && cd ../..)) +AC_DEFUN([LIBINJECTION_VERSION], m4_esyscmd_s(cd "others/libinjection" && (git describe --tags --always 2>/dev/null || echo unknown) && cd ../..)) AC_SUBST([LIBINJECTION_VERSION]) # Check for Mbed TLS @@ -101,16 +118,68 @@ AC_MSG_ERROR([\ ]) fi # Mbed TLS version -AC_DEFUN([MBEDTLS_VERSION], m4_esyscmd_s(cd "others/mbedtls" && git describe && cd ../..)) +AC_DEFUN([MBEDTLS_VERSION], m4_esyscmd_s(cd "others/mbedtls" && (git describe --tags --always 2>/dev/null || echo unknown) && cd ../..)) -# SecLang test version -AC_DEFUN([SECLANG_TEST_VERSION], m4_esyscmd_s(cd "test/test-cases/secrules-language-tests" && git log -1 --format="%h" --abbrev-commit && cd ../../..)) +# Check for selected JSON backend +if test "x$json_backend" = "xsimdjson"; then +if ! test -f "${srcdir}/others/simdjson/singleheader/simdjson.h" || \ + ! test -f "${srcdir}/others/simdjson/singleheader/simdjson.cpp"; then +AC_MSG_ERROR([\ + + + simdjson was not found within ModSecurity source directory. + + simdjson code is available as part of ModSecurity source code in a format + of a git-submodule. git-submodule allow us to specify the correct version of + simdjson and still uses the simdjson repository to download it. + + You can download simdjson using git: + + $ git submodule update --init --recursive + + ]) +fi +JSON_BACKEND_VERSION=`cd "${srcdir}/others/simdjson" && git describe --tags --always 2>/dev/null || echo unknown` +AC_DEFINE([MSC_JSON_BACKEND_SIMDJSON], [1], + [Define if simdjson is the selected internal JSON backend]) +elif test "x$json_backend" = "xjsoncons"; then +if ! test -d "${srcdir}/others/jsoncons/include" || \ + ! test -f "${srcdir}/others/jsoncons/include/jsoncons/json.hpp"; then +AC_MSG_ERROR([\ + + + jsoncons was not found within ModSecurity source directory. + + jsoncons code is available as part of ModSecurity source code in a format + of a git-submodule. git-submodule allow us to specify the correct version of + jsoncons and still uses the jsoncons repository to download it. + + You can download jsoncons using git: + + $ git submodule update --init --recursive + ]) +fi +JSON_BACKEND_VERSION=`cd "${srcdir}/others/jsoncons" && git describe --tags --always 2>/dev/null || echo unknown` +AC_DEFINE([MSC_JSON_BACKEND_JSONCONS], [1], + [Define if jsoncons is the selected internal JSON backend]) +fi +AC_SUBST([JSON_BACKEND_VERSION]) + +AC_ARG_ENABLE([json-audit-instrumentation], + [AS_HELP_STRING([--enable-json-audit-instrumentation], + [Enable optional JSON audit instrumentation for benchmark builds [default=no]])], + [enable_json_audit_instrumentation="$enableval"], + [enable_json_audit_instrumentation="no"]) -# Check for yajl -PROG_YAJL +AS_CASE([$enable_json_audit_instrumentation], + [yes], [AC_DEFINE([MSC_JSON_AUDIT_INSTRUMENTATION], [1], + [Define if optional JSON audit instrumentation is enabled])], + [no], [], + [AC_MSG_ERROR([Unsupported value '$enable_json_audit_instrumentation' for --enable-json-audit-instrumentation. Use yes or no.])]) -AM_CONDITIONAL([YAJL_VERSION], [test "$YAJL_VERSION" != ""]) +# SecLang test version +AC_DEFUN([SECLANG_TEST_VERSION], m4_esyscmd_s(cd "test/test-cases/secrules-language-tests" && git log -1 --format="%h" --abbrev-commit && cd ../../..)) # Check for LibGeoIP PROG_GEOIP @@ -306,14 +375,7 @@ fi # Decide if we want to build the tests or not. -buildTestUtilities=false -if test "x$YAJL_FOUND" = "x1"; then - # Regression tests will not be able to run without the logging support. - # But we still have the unit tests. - # if test "$debugLogs" = "true"; then - buildTestUtilities=true - # fi -fi +buildTestUtilities=true AM_CONDITIONAL([TEST_UTILITIES], [test $buildTestUtilities = true]) @@ -328,6 +390,8 @@ fi AM_CONDITIONAL([EXAMPLES], [test $buildExamples = true]) AM_CONDITIONAL([BUILD_PARSER], [test $buildParser = true]) AM_CONDITIONAL([USE_MUTEX_ON_PM], [test $mutexPm = true]) +AM_CONDITIONAL([JSON_BACKEND_SIMDJSON], [test "x$json_backend" = "xsimdjson"]) +AM_CONDITIONAL([JSON_BACKEND_JSONCONS], [test "x$json_backend" = "xjsoncons"]) # General link options @@ -422,6 +486,8 @@ AS_ECHO_N(" + libInjection ....") echo LIBINJECTION_VERSION AS_ECHO_N(" + Mbed TLS ....") echo MBEDTLS_VERSION +AS_ECHO_N(" + JSON backend ....") +echo "$JSON_BACKEND ($JSON_BACKEND_VERSION)" AS_ECHO_N(" + SecLang tests ....") echo SECLANG_TEST_VERSION @@ -451,7 +517,6 @@ if test "x$GEOIP_FOUND" = "x2" && test "x$MAXMIND_FOUND" = "x2"; then fi MSC_STATUS_LIB([LibCURL ], [CURL]) -MSC_STATUS_LIB([YAJL ], [YAJL]) MSC_STATUS_LIB([LMDB ], [LMDB]) MSC_STATUS_LIB([LibXML2 ], [LIBXML2]) MSC_STATUS_LIB([SSDEEP ], [SSDEEP]) @@ -532,4 +597,3 @@ if test "$aflFuzzer" = "true"; then echo " $ export CC=afl-clang-fast " echo " " fi - diff --git a/doc/Makefile.am b/doc/Makefile.am index dfada090ff..f6aebff44d 100644 --- a/doc/Makefile.am +++ b/doc/Makefile.am @@ -6,7 +6,8 @@ ACLOCAL_AMFLAGS = -I build # distribution of the Doxygen configuration file EXTRA_DIST = \ - doxygen.cfg + doxygen.cfg \ + jsoncons_number_scan_assessment.md MAINTAINERCLEANFILES = \ @@ -14,4 +15,3 @@ MAINTAINERCLEANFILES = \ doxygen_sqlite3.db \ html \ latex - diff --git a/doc/jsoncons_number_scan_assessment.md b/doc/jsoncons_number_scan_assessment.md new file mode 100644 index 0000000000..95d40ac3e9 --- /dev/null +++ b/doc/jsoncons_number_scan_assessment.md @@ -0,0 +1,159 @@ +# Assessment of the jsoncons Number Path + +# 1. Deutsch + +## 1.1 Zweck + +Dieses Dokument beschreibt den aktuellen Zahlpfad des `jsoncons`-Backends in dieser Repository-Version und die dazu ergaenzten Regression-Tests. Es dokumentiert den sichtbaren Implementierungsstand belegt am Code und an der vendorten `jsoncons`-Dokumentation, ohne Aenderungen an Third-Party-Code vorauszusetzen. + +## 1.2 Zusammenfassung + +Die aktuelle Implementierung ist im gegebenen Scope funktional korrekt und abgeschlossen, auch wenn der zusaetzliche Zahlenscan eine bekannte Performance-Kostenstelle bleibt. + +Der aktuelle Code verarbeitet Zahlenevents im `jsoncons`-Backend ueber `emitEvent(...)` und verwendet dafuer zusaetzlich einen `RawJsonTokenCursor`, der Rohzahlentoken direkt aus dem Original-Input rekonstruiert (`src/request_body_processor/json_backend_jsoncons.cc:242-252`, `src/request_body_processor/json_backend_jsoncons.cc:523-539`, `src/request_body_processor/json_backend_jsoncons.cc:577-705`). + +Eine backend-only Entfernung dieses Scans ist im sichtbaren Scope nicht belastbar belegt, weil der oeffentliche Cursor-Kontext der vendorten `jsoncons`-Version hier keine nutzbaren `begin_position()`- und `end_position()`-Werte bereitstellt (`others/jsoncons/include/jsoncons/json_cursor.hpp:405-457`, `others/jsoncons/include/jsoncons/ser_utils.hpp:18-46`). + +Als Regression-Schutz wurden exakte Lexemtests am `JsonEventSink::on_number(std::string_view)`-Rand und zusaetzliche End-to-End-Regressionsfaelle fuer fehlende Root-Scalar-Zahlen ergaenzt (`test/unit/json_backend_depth_tests.cc:149-316`, `test/test-cases/regression/request-body-parser-json-backend-edgecases.json:47-360`). + +## 1.3 Technische Umsetzung im aktuellen Code + +`JSONAdapter::parse(...)` ist der gemeinsame Eintrittspunkt und ruft bei einem Build mit `MSC_JSON_BACKEND_JSONCONS` den Pfad `parseDocumentWithJsoncons(...)` auf (`src/request_body_processor/json_adapter.cc:59-78`). + +`parseDocumentWithJsoncons(...)` konfiguriert `jsoncons::json_options` mit `max_nesting_depth`, `lossless_number(true)` und `lossless_bignum(true)`, baut einen `jsoncons::json_string_cursor` ueber dem Input auf und initialisiert parallel dazu einen `RawJsonTokenCursor` ueber demselben Eingabestring (`src/request_body_processor/json_backend_jsoncons.cc:716-760`). + +Der zusaetzliche Rohscan ist im lokalen Hilfstyp `RawJsonTokenCursor` implementiert. `consumeNextNumberToken(...)` sucht ab der aktuellen Cursor-Position zuerst mit `skipToNextNumberToken(...)` nach dem naechsten Zahlentoken und liest das Token dann mit `consumeNumberAt(...)` ein (`src/request_body_processor/json_backend_jsoncons.cc:242-252`, `src/request_body_processor/json_backend_jsoncons.cc:425-486`, `src/request_body_processor/json_backend_jsoncons.cc:523-539`). + +`consumeNumberAt(...)` behandelt dabei sichtbar Vorzeichen, Ganzzahlteil, Nachkommateil und Exponent einschliesslich Exponent-Vorzeichen (`src/request_body_processor/json_backend_jsoncons.cc:429-486`). `skipInsignificantAt(...)` ueberspringt Leerraum, Komma und Doppelpunkt, waehrend `isNumberBoundary(...)` Leerraum sowie `,`, `]` und `}` als Zahlgrenzen erkennt (`src/request_body_processor/json_backend_jsoncons.cc:298-315`). + +Im Eventpfad gibt es zwei relevante Zahlzweige. + +- Fuer `string_value`-Events, die intern als numerische Stringevents erkannt werden, versucht der Code zuerst `advanceExactNumber(decoded_number, ...)`. Wenn das gelingt, wird der dekodierte String direkt an `on_number(...)` weitergegeben. Wenn das nicht gelingt, faellt der Code auf `consumeNextNumberToken(...)` und danach auf `rawNumberFromContext(...)` zurueck (`src/request_body_processor/json_backend_jsoncons.cc:622-658`). +- Fuer `int64_value`, `uint64_value`, `double_value` und `half_value` wird immer zuerst `consumeNextNumberToken(...)` aufgerufen. Das Ergebnis wird dann ueber `rawNumberFromContext(...)` materialisiert und an `on_number(...)` weitergereicht (`src/request_body_processor/json_backend_jsoncons.cc:684-705`). + +`rawNumberFromContext(...)` arbeitet in klarer Reihenfolge. Zuerst versucht die Funktion, einen Kandidaten ueber `context.begin_position()` und `context.end_position()` aus dem Original-Input auszuschneiden. Falls dieser Kandidat nicht passt, verwendet sie den bereits gescannten Tokenstring. Fuer numerische Stringevents kann sie zuletzt noch den dekodierten Event-String verwenden, sofern dieser selbst ein gueltiges JSON-Zahllexem ist (`src/request_body_processor/json_backend_jsoncons.cc:545-575`). + +Die aktivierten Optionen beeinflussen die von `jsoncons` gelieferten Eventformen. Laut vendorter Dokumentation liest `lossless_number(true)` Zahlen mit Nachkommateil oder Exponent als String mit `semantic_tag::bigdec`, und `lossless_bignum(true)` behaelt uebergrosse Ganzzahlen als String mit `semantic_tag::bigint` und uebergrosse Gleitkommawerte als `bigdec` (`others/jsoncons/doc/ref/corelib/basic_json_options.md:25-29`). + +Der Parsercode belegt diese Eventformen direkt. Bei Integer-Ueberlauf emittiert der Parser `string_value(..., semantic_tag::bigint, ...)`, und bei aktivem `lossless_number_` emittiert er fuer Zahlen mit Nachkommateil oder Exponent `string_value(..., semantic_tag::bigdec, ...)` (`others/jsoncons/include/jsoncons/json_parser.hpp:2458-2554`). + +Die internen Parserpositionsdaten sind im Parser selbst vorhanden. Beim ersten `-`, bei `0` und bei `1` bis `9` setzt der Parser `begin_position_`, `parse_number(...)` verarbeitet Integer-, Fraction- und Exponent-Anteile, und die Parserklasse ueberschreibt `begin_position()` und `end_position()` (`others/jsoncons/include/jsoncons/json_parser.hpp:1096-1126`, `others/jsoncons/include/jsoncons/json_parser.hpp:1720-1940`, `others/jsoncons/include/jsoncons/json_parser.hpp:2380-2392`). + +## 1.4 Warum die aktuelle Implementierung korrekt ist + +Im sichtbaren Scope ist die beobachtbare Vertragsgrenze das an `JsonEventSink::on_number(std::string_view)` weitergereichte Rohzahllexem. Genau an dieser Grenze arbeitet die aktuelle Implementierung: primitive Zahlenevents werden ueber den Rohscan materialisiert, numerische Stringevents werden bevorzugt ueber `advanceExactNumber(...)` synchronisiert und sonst ebenfalls ueber den Rohscan abgesichert (`src/request_body_processor/json_backend_jsoncons.cc:622-705`). + +Der lokale Rohscan bildet die JSON-Zahlsyntax im sichtbaren Code konkret ab. Der Scanner behandelt negatives Vorzeichen, Nachkommateil, Exponent, Exponent-Vorzeichen, Leerraum sowie die in Arrays und Objekten sichtbaren Trenner (`src/request_body_processor/json_backend_jsoncons.cc:298-315`, `src/request_body_processor/json_backend_jsoncons.cc:425-486`, `src/request_body_processor/json_backend_jsoncons.cc:523-539`). + +Die neuen Unit-Tests pruefen genau diese beobachtbare Eigenschaft ohne Normalisierung: `collectNumberLexemes(...)` parst ueber `JSONAdapter::parse(...)`, sammelt jede `on_number(...)`-Nutzlast als String und vergleicht Root-Scalar-Faelle sowie einen Objekt/Array-Fall mit Whitespace und Trennern exakt gegen die erwarteten Lexeme (`test/unit/json_backend_depth_tests.cc:149-316`). + +Die zusaetzlichen Regressionsfaelle im JSON-Testfile sichern denselben Vertrag noch einmal End-to-End ueber `ARGS:json` und den Debug-Log ab, jeweils mit exakter Stringerwartung fuer das urspruengliche Zahllexem (`test/test-cases/regression/request-body-parser-json-backend-edgecases.json:47-360`). + +Diese Bewertung behauptet nicht, dass der Pfad performance-optimal ist. Sie beschreibt, dass die aktuelle Implementierung im gegebenen Scope das rohe Zahllexem funktional korrekt weiterreicht und deshalb als abgeschlossen bewertet werden kann. + +## 1.5 Warum keine backend-only Entfernung des Scans moeglich ist + +Die sichtbare `jsoncons`-Dokumentation beschreibt `basic_json_cursor::context()` als Rueckgabe des aktuellen `ser_context` (`others/jsoncons/doc/ref/corelib/basic_json_cursor.md:117-119`). In der vendorten Implementierung gibt `basic_json_cursor::context()` tatsaechlich `*this` zurueck (`others/jsoncons/include/jsoncons/json_cursor.hpp:405-407`). + +Im selben Cursorcode sind nur `line()` und `column()` ueberschrieben (`others/jsoncons/include/jsoncons/json_cursor.hpp:450-457`). Die Basisklasse `ser_context` liefert fuer `begin_position()` und `end_position()` dagegen standardmaessig `0` (`others/jsoncons/include/jsoncons/ser_utils.hpp:18-46`). + +Die vendorte Dokumentation sagt zu `ser_context`, dass `begin_position()` und `end_position()` derzeit nur fuer den JSON-Parser unterstuetzt werden (`others/jsoncons/doc/ref/corelib/ser_context.md:20-33`). Der Parser selbst besitzt diese Positionsdaten auch intern (`others/jsoncons/include/jsoncons/json_parser.hpp:1096-1126`, `others/jsoncons/include/jsoncons/json_parser.hpp:2380-2392`), aber im sichtbaren Backend-Code wird mit `cursor.context()` gearbeitet, nicht mit dem Parserobjekt selbst (`src/request_body_processor/json_backend_jsoncons.cc:758-760`). + +Damit ist ein backend-only Fast Path ueber den oeffentlichen Cursor-Kontext in dieser Repository-Version nicht belastbar belegt. Genau deshalb bleibt der bestehende Zahlenscan im aktuellen Scope funktional notwendig. + +## 1.6 Testabdeckung + +Die ergaenzten Tests in `test/unit/json_backend_depth_tests.cc` fuegen einen kleinen `NumberCollectingSink` hinzu, der nur `on_number(std::string_view)` sammelt, und einen Helper, der `JSONAdapter::parse(...)` ueber beliebigen Input ausfuehrt (`test/unit/json_backend_depth_tests.cc:70-186`). + +Die Root-Scalar-Regressionsfaelle pruefen unveraenderte Rohlexeme fuer `0`, `-0`, `1.0`, `1e3`, `-1.25e-4`, `18446744073709551615`, `18446744073709551616` und `123456789012345678901234567890` (`test/unit/json_backend_depth_tests.cc:229-253`). + +Ein weiterer Test prueft dieselben Lexemtypen in einem gemischten Objekt/Array mit Leerraum und Trennern und vergleicht die gesamte `on_number(...)`-Sequenz exakt (`test/unit/json_backend_depth_tests.cc:256-275`). + +Das bestehende Regressionsfile wurde nur um die bislang fehlenden Root-Scalar-Faelle `0`, `-1.25e-4`, `18446744073709551615` und `18446744073709551616` ergaenzt. Bereits vorhandene Faelle wie `1.0`, `1e3`, `-0` und `123456789012345678901234567890` blieben unveraendert (`test/test-cases/regression/request-body-parser-json-backend-edgecases.json:47-360`, `test/test-cases/regression/request-body-parser-json-backend-edgecases.json:362-405`). + +Die Tests behaupten bewusst nicht, dass interne `jsoncons`-Positionsdaten korrekt nach aussen propagiert werden. Sie sichern das beobachtbare Backend-Verhalten am `on_number(...)`-Rand und ueber den bestehenden ModSecurity-Regressionspfad ab. + +## 1.7 Fazit + +Die aktuelle `jsoncons`-Implementierung ist in diesem Repository-Stand im gegebenen Scope funktional korrekt und abgeschlossen. Der zusaetzliche Zahlenscan ist sichtbar vorhanden und bleibt eine bekannte Performance-Kostenstelle, ist im aktuell erlaubten backend-only Rahmen aber weiterhin die massgebliche und notwendige Quelle fuer das Rohzahllexem. + +Die neuen Tests liefern dafuer gezielten Regression-Schutz, ohne Third-Party-Code zu aendern oder unbelegte Aussagen ueber nicht sichtbare Schnittstellen zu treffen. + +# 2. English + +## 2.1 Purpose + +This document records the current number-token path of the `jsoncons` backend in this repository revision and the regression tests that were added around it. It is intentionally limited to what is directly supported by the repository code and the vendored `jsoncons` documentation. + +## 2.2 Summary + +The current implementation is functionally correct and complete within the given scope, even though the additional numeric scan remains a known performance cost. + +In the current code, numeric events are handled by `emitEvent(...)` together with an additional `RawJsonTokenCursor` that reconstructs raw numeric tokens from the original input (`src/request_body_processor/json_backend_jsoncons.cc:242-252`, `src/request_body_processor/json_backend_jsoncons.cc:523-539`, `src/request_body_processor/json_backend_jsoncons.cc:577-705`). + +A backend-only removal of that scan is not currently supported by the visible scope, because the public cursor context exposed by the vendored `jsoncons` version does not provide usable `begin_position()` and `end_position()` values here (`others/jsoncons/include/jsoncons/json_cursor.hpp:405-457`, `others/jsoncons/include/jsoncons/ser_utils.hpp:18-46`). + +As regression protection, exact lexeme comparisons were added at the `JsonEventSink::on_number(std::string_view)` boundary together with additional end-to-end regression cases for the missing root-scalar numeric inputs (`test/unit/json_backend_depth_tests.cc:149-316`, `test/test-cases/regression/request-body-parser-json-backend-edgecases.json:47-360`). + +## 2.3 Current implementation + +`JSONAdapter::parse(...)` is the shared entry point and dispatches to `parseDocumentWithJsoncons(...)` when ModSecurity is built with `MSC_JSON_BACKEND_JSONCONS` (`src/request_body_processor/json_adapter.cc:59-78`). + +`parseDocumentWithJsoncons(...)` configures `jsoncons::json_options` with `max_nesting_depth`, `lossless_number(true)`, and `lossless_bignum(true)`, creates a `jsoncons::json_string_cursor` on the input, and also creates a `RawJsonTokenCursor` over the same input text (`src/request_body_processor/json_backend_jsoncons.cc:716-760`). + +The additional raw scan is implemented in the local `RawJsonTokenCursor`. `consumeNextNumberToken(...)` first locates the next numeric token with `skipToNextNumberToken(...)` and then reads the token with `consumeNumberAt(...)` (`src/request_body_processor/json_backend_jsoncons.cc:242-252`, `src/request_body_processor/json_backend_jsoncons.cc:425-486`, `src/request_body_processor/json_backend_jsoncons.cc:523-539`). + +`consumeNumberAt(...)` explicitly handles sign, integer part, fraction, exponent, and exponent sign (`src/request_body_processor/json_backend_jsoncons.cc:429-486`). `skipInsignificantAt(...)` skips whitespace, commas, and colons, while `isNumberBoundary(...)` treats whitespace together with `,`, `]`, and `}` as numeric boundaries (`src/request_body_processor/json_backend_jsoncons.cc:298-315`). + +There are two relevant numeric event branches. + +- For `string_value` events recognized as numeric string events, the code first attempts `advanceExactNumber(decoded_number, ...)`. If that succeeds, the decoded string is sent directly to `on_number(...)`. If it does not, the code falls back to `consumeNextNumberToken(...)` and `rawNumberFromContext(...)` (`src/request_body_processor/json_backend_jsoncons.cc:622-658`). +- For `int64_value`, `uint64_value`, `double_value`, and `half_value`, the code always calls `consumeNextNumberToken(...)` first, then materializes the token through `rawNumberFromContext(...)`, and finally passes it to `on_number(...)` (`src/request_body_processor/json_backend_jsoncons.cc:684-705`). + +`rawNumberFromContext(...)` follows a fixed order. It first attempts to slice a candidate directly from the original input using `context.begin_position()` and `context.end_position()`. If that candidate is not usable, it falls back to the already scanned token. For numeric string events it can finally use the decoded event string, provided that string is itself a valid JSON number lexeme (`src/request_body_processor/json_backend_jsoncons.cc:545-575`). + +The enabled `jsoncons` options change the event shapes delivered by the parser. The vendored documentation states that `lossless_number(true)` reads numbers with fractional parts or exponents as strings tagged `semantic_tag::bigdec`, and that `lossless_bignum(true)` preserves out-of-range integers as strings tagged `semantic_tag::bigint` and out-of-range floating-point values as `bigdec` (`others/jsoncons/doc/ref/corelib/basic_json_options.md:25-29`). + +The parser implementation matches that documentation. On integer overflow it emits `string_value(..., semantic_tag::bigint, ...)`, and when `lossless_number_` is enabled it emits `string_value(..., semantic_tag::bigdec, ...)` for numbers with fractions or exponents (`others/jsoncons/include/jsoncons/json_parser.hpp:2458-2554`). + +The parser also maintains internal position data. It sets `begin_position_` when it first sees `-`, `0`, or `1` through `9`, `parse_number(...)` handles integer, fraction, and exponent states, and the parser class overrides `begin_position()` and `end_position()` (`others/jsoncons/include/jsoncons/json_parser.hpp:1096-1126`, `others/jsoncons/include/jsoncons/json_parser.hpp:1720-1940`, `others/jsoncons/include/jsoncons/json_parser.hpp:2380-2392`). + +## 2.4 Why the current implementation is correct + +Within the visible scope, the relevant contract boundary is the raw numeric lexeme passed into `JsonEventSink::on_number(std::string_view)`. That is exactly what the current implementation preserves: primitive numeric events are materialized through the raw scan, and numeric string events are first synchronized through `advanceExactNumber(...)` when possible and otherwise protected by the same raw scan path (`src/request_body_processor/json_backend_jsoncons.cc:622-705`). + +The local raw scanner explicitly models the JSON number syntax visible in this repository. It covers negative signs, fractions, exponents, exponent signs, whitespace, and the delimiters used inside arrays and objects (`src/request_body_processor/json_backend_jsoncons.cc:298-315`, `src/request_body_processor/json_backend_jsoncons.cc:425-486`, `src/request_body_processor/json_backend_jsoncons.cc:523-539`). + +The added unit tests validate that exact observable behavior without normalization. `collectNumberLexemes(...)` parses through `JSONAdapter::parse(...)`, captures every `on_number(...)` payload as a string, and compares both root-scalar cases and a mixed object/array input with whitespace and delimiters against the exact expected lexemes (`test/unit/json_backend_depth_tests.cc:149-316`). + +The added regression cases in the JSON test file check the same contract end-to-end through `ARGS:json` and the debug log, again with exact string expectations for the original numeric lexemes (`test/test-cases/regression/request-body-parser-json-backend-edgecases.json:47-360`). + +This assessment does not claim that the path is performance-optimal. It documents that, within the current scope, the implementation functionally preserves the raw numeric lexeme and can therefore be treated as complete for this repository state. + +## 2.5 Why backend-only removal is not currently supported + +The visible `jsoncons` documentation describes `basic_json_cursor::context()` as returning the current `ser_context` (`others/jsoncons/doc/ref/corelib/basic_json_cursor.md:117-119`). In the vendored implementation, `basic_json_cursor::context()` does in fact return `*this` (`others/jsoncons/include/jsoncons/json_cursor.hpp:405-407`). + +In that same cursor implementation, only `line()` and `column()` are overridden (`others/jsoncons/include/jsoncons/json_cursor.hpp:450-457`). The base `ser_context` implementation returns `0` for `begin_position()` and `end_position()` (`others/jsoncons/include/jsoncons/ser_utils.hpp:18-46`). + +The vendored `ser_context` documentation says that `begin_position()` and `end_position()` are currently only supported for the JSON parser (`others/jsoncons/doc/ref/corelib/ser_context.md:20-33`). The parser itself does have those positions internally (`others/jsoncons/include/jsoncons/json_parser.hpp:1096-1126`, `others/jsoncons/include/jsoncons/json_parser.hpp:2380-2392`), but the visible backend code operates through `cursor.context()`, not through the parser object directly (`src/request_body_processor/json_backend_jsoncons.cc:758-760`). + +For that reason, a backend-only fast path based on the public cursor context is not supported by the currently visible code. The existing numeric scan therefore remains the authoritative and necessary source of the raw numeric lexeme in the present scope. + +## 2.6 Test coverage + +The additions in `test/unit/json_backend_depth_tests.cc` introduce a small `NumberCollectingSink` that only records `on_number(std::string_view)` and a helper that executes `JSONAdapter::parse(...)` on arbitrary input (`test/unit/json_backend_depth_tests.cc:70-186`). + +The root-scalar regression cases verify unchanged raw lexemes for `0`, `-0`, `1.0`, `1e3`, `-1.25e-4`, `18446744073709551615`, `18446744073709551616`, and `123456789012345678901234567890` (`test/unit/json_backend_depth_tests.cc:229-253`). + +An additional test validates the same lexeme classes inside a mixed object/array input with whitespace and delimiters and compares the full `on_number(...)` sequence exactly (`test/unit/json_backend_depth_tests.cc:256-275`). + +The existing regression file was extended only with the previously missing root-scalar cases `0`, `-1.25e-4`, `18446744073709551615`, and `18446744073709551616`. Existing cases such as `1.0`, `1e3`, `-0`, and `123456789012345678901234567890` were left in place (`test/test-cases/regression/request-body-parser-json-backend-edgecases.json:47-360`, `test/test-cases/regression/request-body-parser-json-backend-edgecases.json:362-405`). + +The tests intentionally do not claim that internal `jsoncons` position data is propagated outward correctly. They protect the observable backend behavior at the `on_number(...)` boundary and through the established ModSecurity regression path. + +## 2.7 Conclusion + +In this repository state, the current `jsoncons` implementation is functionally correct and complete within the given scope. The additional numeric scan is visibly present and remains a known performance cost, but within the currently allowed backend-only scope it is still the decisive and necessary source for the raw numeric lexeme. + +The added tests provide focused regression protection for that behavior without modifying third-party code or asserting capabilities that are not directly supported by the visible interfaces. diff --git a/docs/benchmark-tests.de.md b/docs/benchmark-tests.de.md new file mode 100644 index 0000000000..34f30ed847 --- /dev/null +++ b/docs/benchmark-tests.de.md @@ -0,0 +1,317 @@ +# Benchmark-Tests (ModSecurity) + +## 1. Überblick + +Dieses Repository enthält zwei Benchmark-Programme in `test/benchmark/`: + +1. `benchmark` (`test/benchmark/benchmark.cc`) + - Zweck: wiederholte Ausführung einer ModSecurity-Transaktion in einer Schleife. +2. `json_benchmark` (`test/benchmark/json_benchmark.cc`) + - Zweck: Benchmarking der JSON-Request-Body-Verarbeitung mit festen Szenarien. + +Relevante Dateien: + +- `test/benchmark/benchmark.cc` +- `test/benchmark/json_benchmark.cc` +- `test/benchmark/run-json-benchmarks.sh` +- `test/benchmark/json_benchmark_rules.conf` +- `test/benchmark/basic_rules.conf` +- `test/benchmark/download-owasp-v3-rules.sh` +- `test/benchmark/download-owasp-v4-rules.sh` +- `test/benchmark/Makefile.am` +- `README.md` (Benchmark-Abschnitt) + +--- + +## 2. Voraussetzungen + +### Benötigte Tools + +- POSIX-Shell (`bash`) +- `make` +- C/C++-Toolchain (`gcc`, `g++`) +- Autotools-Bootstrap-Unterstützung aus diesem Repo (`./build.sh`) +- `git` (für Submodule) + +### Benötigte Repository-Vorbereitung + +Der Benchmark-Build benötigt Submodule. + +Verwendete Befehle: + +```bash +./build.sh +git submodule update --init --recursive +./configure +``` + +### Optionale Voraussetzungen + +- `time` für externe Laufzeitmessung von `benchmark` +- Zwei getrennte Build-Verzeichnisse für Backend-Vergleiche mit `run-json-benchmarks.sh` + +### Beobachtete Einschränkungen in dieser Umgebung + +- `./configure` schlug zunächst fehl, bevor Submodule initialisiert wurden. +- Beobachtete Fehlermeldung: + +```text +configure: error: + + libInjection was not found within ModSecurity source directory. + ... + $ git submodule update --init --recursive +``` + +Nach dem Submodule-Update lief `./configure` in dieser Umgebung erfolgreich durch. + +--- + +## 3. Build-Anleitung + +## Standardablauf (so für die Verifikation verwendet) + +```bash +./build.sh +git submodule update --init --recursive +./configure +make -j2 -C others +make -j2 -C src libmodsecurity.la +make -j2 -C test/benchmark benchmark json_benchmark +``` + +## Hinweise + +- `test/benchmark/Makefile.am` definiert beide Binaries (`benchmark`, `json_benchmark`). +- `json_benchmark` benötigt `MSC_JSON_BENCHMARK_RULES_DIR` aus dem Build-System. + +## Typische Prüf-Befehle bei Fehlern + +```bash +git submodule status +ls -l test/benchmark/benchmark test/benchmark/json_benchmark +``` + +--- + +## 4. Ausführung + +## `benchmark` + +```bash +cd test/benchmark +./benchmark +./benchmark 1000 +``` + +Aktuelle Usage aus dem Quelltext: + +```text +Usage: benchmark [num_iterations|-h|-?|--help] +``` + +## `json_benchmark` + +```bash +cd test/benchmark +./json_benchmark --scenario numbers --iterations 100 --target-bytes 1048576 +./json_benchmark --scenario utf8 --iterations 50 --output json +``` + +Aktuelle Usage aus dem Quelltext: + +```text +Usage: json_benchmark --scenario NAME [--iterations N] [--target-bytes N] [--depth N] [--include-invalid] [--output json] +``` + +## Batch-Skript: `run-json-benchmarks.sh` + +```bash +test/benchmark/run-json-benchmarks.sh \ + --simdjson-build /pfad/zum/simdjson-build \ + --jsoncons-build /pfad/zum/jsoncons-build +``` + +Optional: + +```bash +test/benchmark/run-json-benchmarks.sh ... --include-invalid +``` + +Das Skript schreibt JSON-Ausgaben von `json_benchmark` in `json-benchmark-results.jsonl`. + +--- + +## 5. Beispiele + +## Beispiel A: verifizierter Lauf (`benchmark`) + +Ausgeführter Befehl: + +```bash +cd test/benchmark && ./benchmark 10 +``` + +Beobachtete Ausgabe: + +```text +Doing 10 transactions... +Summary: + elapsed_seconds: 0.01 + avg_transaction_ns: 917598.70 + throughput_tx_per_sec: 1089.80 +``` + +## Beispiel B: verifizierter Lauf (`json_benchmark`) + +Ausgeführter Befehl: + +```bash +cd test/benchmark && ./json_benchmark --scenario numbers --iterations 5 --target-bytes 256 +``` + +Beobachtete Ausgabe: + +```text +backend: simdjson +scenario: numbers +iterations: 5 +body_bytes: 223 +append_request_body_ns: 4021 +process_request_body_ns: 288499 +total_transaction_ns: 840357 +parse_success_count: 5 +parse_error_count: 0 +ru_maxrss_kb: 33720 +``` + +Auf anderen Systemen unterscheiden sich die absoluten Werte. + +--- + +## 6. Gemessene Metriken + +## `benchmark` + +Aktuelle Summary-Felder: + +- `elapsed_seconds` +- `avg_transaction_ns` +- `throughput_tx_per_sec` + +Diese Werte werden um die gesamte Loop gemessen und am Ende ausgegeben. + +## `json_benchmark` + +Vom Code ausgegebene Metriken: + +- `append_request_body_ns` +- `process_request_body_ns` +- `total_transaction_ns` +- `parse_success_count` +- `parse_error_count` +- `ru_maxrss_kb` + +Optionale Instrumentierungsmetriken sind verfügbar, wenn mit `--enable-json-audit-instrumentation` gebaut wird. + +--- + +## 7. Testdaten und Szenarien + +## `benchmark` + +- Verwendet feste synthetische Request-/Response-Daten aus dem Quelltext. +- Rule-Datei: `test/benchmark/basic_rules.conf`. +- Optionale CRS-Erweiterungen über: + - `test/benchmark/download-owasp-v3-rules.sh` + - `test/benchmark/download-owasp-v4-rules.sh` + +## `json_benchmark`-Szenarien + +Im Quelltext unterstützte Szenarien: + +- `large-object` +- `deep-nesting` +- `numbers` +- `utf8` +- `truncated` (benötigt `--include-invalid`) +- `malformed` (benötigt `--include-invalid`) + +Rule-Konfigurationsdatei: + +- `test/benchmark/json_benchmark_rules.conf` + +--- + +## 8. Troubleshooting + +## Problem: `./configure` scheitert wegen fehlender eingebetteter Abhängigkeit + +Prüfen: + +```bash +git submodule status +``` + +Beheben: + +```bash +git submodule update --init --recursive +./configure +``` + +## Problem: Benchmark-Binaries fehlen + +Prüfen: + +```bash +ls -l test/benchmark/benchmark test/benchmark/json_benchmark +``` + +Bauen: + +```bash +make -j2 -C src libmodsecurity.la +make -j2 -C test/benchmark benchmark json_benchmark +``` + +## Problem: Skript meldet „missing benchmark binary“ + +Das Skript erwartet: + +- `/test/benchmark/json_benchmark` + +Pfad und Executable-Bit prüfen. + +--- + +## 9. Grenzen der Benchmarks + +Was gemessen wird: + +- Kosten der wiederholten Transaction-Ausführung (`benchmark`). +- JSON-Body-Verarbeitung unter festen synthetischen Szenarien (`json_benchmark`). + +Was nicht direkt gemessen wird: + +- Verteiltes Multi-Node-Load-Verhalten. +- Volle Produktions-Traffic-Diversität. +- Latenz-Perzentile (p95/p99) aus externem Lastgenerator. + +Interpretationshinweis: + +- Ergebnisse hängen von Umgebung ab (CPU, Compiler-Flags, Rule-Umfang, Backend-Auswahl, Systemlast). + +--- + +## 10. Dateien und Referenzen + +- `README.md` — Benchmark-Abschnitt und Schnellbefehle. +- `test/benchmark/benchmark.cc` — allgemeiner Transaction-Benchmark. +- `test/benchmark/json_benchmark.cc` — JSON-Szenario-Benchmark. +- `test/benchmark/run-json-benchmarks.sh` — Hilfsskript für Backend-Vergleiche. +- `test/benchmark/json_benchmark_rules.conf` — Rules für JSON-Benchmark. +- `test/benchmark/basic_rules.conf` — Baseline-Rules für allgemeinen Benchmark. +- `test/benchmark/download-owasp-v3-rules.sh` — lädt CRS v3 und ergänzt Includes. +- `test/benchmark/download-owasp-v4-rules.sh` — lädt CRS v4 und ergänzt Includes. +- `test/benchmark/Makefile.am` — Benchmark-Targets in Automake. diff --git a/docs/benchmark-tests.en.md b/docs/benchmark-tests.en.md new file mode 100644 index 0000000000..97789f4797 --- /dev/null +++ b/docs/benchmark-tests.en.md @@ -0,0 +1,319 @@ +# Benchmark Tests (ModSecurity) + +## 1. Overview + +This repository contains two benchmark executables in `test/benchmark/`: + +1. `benchmark` (`test/benchmark/benchmark.cc`) + - Purpose: run repeated end-to-end-style ModSecurity transaction processing in a loop. +2. `json_benchmark` (`test/benchmark/json_benchmark.cc`) + - Purpose: benchmark JSON request-body processing with fixed scenarios. + +Relevant files: + +- `test/benchmark/benchmark.cc` +- `test/benchmark/json_benchmark.cc` +- `test/benchmark/run-json-benchmarks.sh` +- `test/benchmark/json_benchmark_rules.conf` +- `test/benchmark/basic_rules.conf` +- `test/benchmark/download-owasp-v3-rules.sh` +- `test/benchmark/download-owasp-v4-rules.sh` +- `test/benchmark/Makefile.am` +- `README.md` (benchmark section) + +--- + +## 2. Prerequisites + +### Required tools + +- POSIX shell (`bash`) +- `make` +- C/C++ toolchain (`gcc`, `g++`) +- Autotools bootstrap support used by this repo (`./build.sh`) +- `git` (required for submodule initialization) + +### Required repository preparation + +The benchmark build requires repository submodules. + +Commands used: + +```bash +./build.sh +git submodule update --init --recursive +./configure +``` + +### Optional prerequisites + +- `time` command for external wall-clock timing of `benchmark` +- Two separate build directories if you want to compare JSON backends with `run-json-benchmarks.sh` + +### Known environment constraints observed during verification + +- Initial `./configure` failed before submodules were initialized. +- Error observed: + +```text +configure: error: + + libInjection was not found within ModSecurity source directory. + ... + $ git submodule update --init --recursive +``` + +After submodule initialization, `./configure` completed successfully in this environment. + +--- + +## 3. Build Instructions + +## Standard build flow used for benchmark verification + +```bash +./build.sh +git submodule update --init --recursive +./configure +make -j2 -C others +make -j2 -C src libmodsecurity.la +make -j2 -C test/benchmark benchmark json_benchmark +``` + +## Notes + +- `test/benchmark/Makefile.am` declares both binaries (`benchmark`, `json_benchmark`). +- `json_benchmark` depends on `MSC_JSON_BENCHMARK_RULES_DIR` provided by the build system. + +## Typical error checks + +If build fails, verify: + +```bash +git submodule status +ls -l test/benchmark/benchmark test/benchmark/json_benchmark +``` + +--- + +## 4. Execution + +## `benchmark` + +```bash +cd test/benchmark +./benchmark +./benchmark 1000 +``` + +Current usage string from source: + +```text +Usage: benchmark [num_iterations|-h|-?|--help] +``` + +## `json_benchmark` + +```bash +cd test/benchmark +./json_benchmark --scenario numbers --iterations 100 --target-bytes 1048576 +./json_benchmark --scenario utf8 --iterations 50 --output json +``` + +Current usage string from source: + +```text +Usage: json_benchmark --scenario NAME [--iterations N] [--target-bytes N] [--depth N] [--include-invalid] [--output json] +``` + +## Batch script: `run-json-benchmarks.sh` + +```bash +test/benchmark/run-json-benchmarks.sh \ + --simdjson-build /path/to/simdjson-build \ + --jsoncons-build /path/to/jsoncons-build +``` + +Optional: + +```bash +test/benchmark/run-json-benchmarks.sh ... --include-invalid +``` + +This script appends JSON output lines from `json_benchmark` into `json-benchmark-results.jsonl`. + +--- + +## 5. Examples + +## Example A: verified local run (`benchmark`) + +Command executed: + +```bash +cd test/benchmark && ./benchmark 10 +``` + +Observed output: + +```text +Doing 10 transactions... +Summary: + elapsed_seconds: 0.01 + avg_transaction_ns: 917598.70 + throughput_tx_per_sec: 1089.80 +``` + +## Example B: verified local run (`json_benchmark`) + +Command executed: + +```bash +cd test/benchmark && ./json_benchmark --scenario numbers --iterations 5 --target-bytes 256 +``` + +Observed output: + +```text +backend: simdjson +scenario: numbers +iterations: 5 +body_bytes: 223 +append_request_body_ns: 4021 +process_request_body_ns: 288499 +total_transaction_ns: 840357 +parse_success_count: 5 +parse_error_count: 0 +ru_maxrss_kb: 33720 +``` + +If you run on another machine, absolute numeric values will differ. + +--- + +## 6. Measured Metrics + +## `benchmark` + +Current human-readable summary fields: + +- `elapsed_seconds` +- `avg_transaction_ns` +- `throughput_tx_per_sec` + +These are measured around the whole loop and printed at the end. + +## `json_benchmark` + +Metrics printed by code: + +- `append_request_body_ns` +- `process_request_body_ns` +- `total_transaction_ns` +- `parse_success_count` +- `parse_error_count` +- `ru_maxrss_kb` + +Optional instrumentation metrics are available when compiled with `--enable-json-audit-instrumentation`. + +--- + +## 7. Test Data and Scenarios + +## `benchmark` + +- Uses fixed synthetic request/response data from source. +- Rule file: `test/benchmark/basic_rules.conf`. +- Optional CRS additions via: + - `test/benchmark/download-owasp-v3-rules.sh` + - `test/benchmark/download-owasp-v4-rules.sh` + +## `json_benchmark` scenarios + +Supported scenario names in source: + +- `large-object` +- `deep-nesting` +- `numbers` +- `utf8` +- `truncated` (requires `--include-invalid`) +- `malformed` (requires `--include-invalid`) + +Rule configuration file: + +- `test/benchmark/json_benchmark_rules.conf` + +--- + +## 8. Troubleshooting + +## Problem: `./configure` fails with missing embedded dependency + +Check: + +```bash +git submodule status +``` + +Fix: + +```bash +git submodule update --init --recursive +./configure +``` + +## Problem: benchmark binaries missing + +Check: + +```bash +ls -l test/benchmark/benchmark test/benchmark/json_benchmark +``` + +Build: + +```bash +make -j2 -C src libmodsecurity.la +make -j2 -C test/benchmark benchmark json_benchmark +``` + +## Problem: JSON benchmark script reports missing binary + +The script expects binaries at: + +- `/test/benchmark/json_benchmark` + +Validate path and executable bit. + +--- + +## 9. Benchmark Limits + +What these benchmarks do measure: + +- Repeated transaction execution path cost (`benchmark`). +- JSON body handling under fixed synthetic scenarios (`json_benchmark`). + +What they do not directly measure: + +- Distributed multi-node load behavior. +- Full production traffic diversity. +- Latency percentiles (p95/p99) across an external load generator. + +Interpretation caution: + +- Results are environment-dependent (CPU, compiler flags, ruleset size, selected backend, system load). + +--- + +## 10. Files and References + +- `README.md` — benchmark section and quick commands. +- `test/benchmark/benchmark.cc` — generic transaction benchmark. +- `test/benchmark/json_benchmark.cc` — JSON scenario benchmark. +- `test/benchmark/run-json-benchmarks.sh` — backend comparison helper script. +- `test/benchmark/json_benchmark_rules.conf` — rules for JSON benchmark. +- `test/benchmark/basic_rules.conf` — baseline rules include for generic benchmark. +- `test/benchmark/download-owasp-v3-rules.sh` — fetch and append CRS v3 includes. +- `test/benchmark/download-owasp-v4-rules.sh` — fetch and append CRS v4 includes. +- `test/benchmark/Makefile.am` — benchmark targets in automake. diff --git a/examples/multiprocess_c/Makefile.am b/examples/multiprocess_c/Makefile.am index 726d1d9057..a0011d8c73 100644 --- a/examples/multiprocess_c/Makefile.am +++ b/examples/multiprocess_c/Makefile.am @@ -19,15 +19,12 @@ multi_LDFLAGS = \ -lstdc++ \ $(LUA_LDFLAGS) \ $(SSDEEP_LDFLAGS) \ - $(MAXMIND_LDFLAGS) \ - $(YAJL_LDFLAGS) + $(MAXMIND_LDFLAGS) multi_CFLAGS = \ - -I$(top_builddir)/headers \ + -I$(top_srcdir)/headers \ -I$(top_builddir) \ $(GLOBAL_CFLAGS) MAINTAINERCLEANFILES = \ Makefile.in - - diff --git a/examples/multithread/Makefile.am b/examples/multithread/Makefile.am index 0871efa1e1..c47c13b51f 100644 --- a/examples/multithread/Makefile.am +++ b/examples/multithread/Makefile.am @@ -15,8 +15,7 @@ multithread_LDADD = \ $(LUA_LDADD) \ $(PCRE_LDADD) \ $(PCRE2_LDADD) \ - $(SSDEEP_LDADD) \ - $(YAJL_LDADD) + $(SSDEEP_LDADD) multithread_LDFLAGS = \ -L$(top_builddir)/src/.libs/ \ @@ -28,12 +27,11 @@ multithread_LDFLAGS = \ $(LMDB_LDFLAGS) \ $(LUA_LDFLAGS) \ $(MAXMIND_LDFLAGS) \ - $(SSDEEP_LDFLAGS) \ - $(YAJL_LDFLAGS) + $(SSDEEP_LDFLAGS) multithread_CPPFLAGS = \ $(GLOBAL_CFLAGS) \ - -I$(top_builddir)/headers \ + -I$(top_srcdir)/headers \ -I$(top_builddir) \ -g \ -I../others \ @@ -43,7 +41,6 @@ multithread_CPPFLAGS = \ $(GEOIP_CFLAGS) \ $(GLOBAL_CPPFLAGS) \ $(MODSEC_NO_LOGS) \ - $(YAJL_CFLAGS) \ $(LMDB_CFLAGS) \ $(LUA_CFLAGS) \ $(PCRE_CFLAGS) \ @@ -53,5 +50,3 @@ multithread_CPPFLAGS = \ MAINTAINERCLEANFILES = \ Makefile.in - - diff --git a/examples/reading_logs_via_rule_message/Makefile.am b/examples/reading_logs_via_rule_message/Makefile.am index 5a6ba74b2a..384a84e73b 100644 --- a/examples/reading_logs_via_rule_message/Makefile.am +++ b/examples/reading_logs_via_rule_message/Makefile.am @@ -15,8 +15,7 @@ simple_request_LDADD = \ $(LUA_LDADD) \ $(PCRE_LDADD) \ $(PCRE2_LDADD) \ - $(SSDEEP_LDADD) \ - $(YAJL_LDADD) + $(SSDEEP_LDADD) simple_request_LDFLAGS = \ -L$(top_builddir)/src/.libs/ \ @@ -28,12 +27,11 @@ simple_request_LDFLAGS = \ $(LMDB_LDFLAGS) \ $(LUA_LDFLAGS) \ $(MAXMIND_LDFLAGS) \ - $(SSDEEP_LDFLAGS) \ - $(YAJL_LDFLAGS) + $(SSDEEP_LDFLAGS) simple_request_CPPFLAGS = \ $(GLOBAL_CFLAGS) \ - -I$(top_builddir)/headers \ + -I$(top_srcdir)/headers \ -I$(top_builddir) \ -g \ -I../others \ @@ -43,7 +41,6 @@ simple_request_CPPFLAGS = \ $(GEOIP_CFLAGS) \ $(GLOBAL_CPPFLAGS) \ $(MODSEC_NO_LOGS) \ - $(YAJL_CFLAGS) \ $(LMDB_CFLAGS) \ $(LUA_CFLAGS) \ $(PCRE_CFLAGS) \ @@ -53,5 +50,3 @@ simple_request_CPPFLAGS = \ MAINTAINERCLEANFILES = \ Makefile.in - - diff --git a/examples/reading_logs_with_offset/Makefile.am b/examples/reading_logs_with_offset/Makefile.am index a98ed48d0e..95373a4c47 100644 --- a/examples/reading_logs_with_offset/Makefile.am +++ b/examples/reading_logs_with_offset/Makefile.am @@ -15,8 +15,7 @@ read_LDADD = \ $(LUA_LDADD) \ $(PCRE_LDADD) \ $(PCRE2_LDADD) \ - $(SSDEEP_LDADD) \ - $(YAJL_LDADD) + $(SSDEEP_LDADD) read_LDFLAGS = \ -L$(top_builddir)/src/.libs/ \ @@ -27,12 +26,11 @@ read_LDFLAGS = \ $(LMDB_LDFLAGS) \ $(LUA_LDFLAGS) \ $(SSDEEP_LDFLAGS) \ - $(MAXMIND_LDFLAGS) \ - $(YAJL_LDFLAGS) + $(MAXMIND_LDFLAGS) read_CPPFLAGS = \ $(GLOBAL_CFLAGS) \ - -I$(top_builddir)/headers \ + -I$(top_srcdir)/headers \ -I$(top_builddir) \ -g \ -I../others \ @@ -43,7 +41,6 @@ read_CPPFLAGS = \ $(MAXMIND_CFLAGS) \ $(GLOBAL_CPPFLAGS) \ $(MODSEC_NO_LOGS) \ - $(YAJL_CFLAGS) \ $(LMDB_CFLAGS) \ $(LUA_CFLAGS) \ $(PCRE_CFLAGS) \ @@ -53,5 +50,3 @@ read_CPPFLAGS = \ MAINTAINERCLEANFILES = \ Makefile.in - - diff --git a/examples/simple_example_using_c/Makefile.am b/examples/simple_example_using_c/Makefile.am index b03ab96d48..9bf657ba27 100644 --- a/examples/simple_example_using_c/Makefile.am +++ b/examples/simple_example_using_c/Makefile.am @@ -17,15 +17,12 @@ test_LDFLAGS = \ -lm \ -lstdc++ \ $(LUA_LDFLAGS) \ - $(SSDEEP_LDFLAGS) \ - $(YAJL_LDFLAGS) + $(SSDEEP_LDFLAGS) test_CFLAGS = \ - -I$(top_builddir)/headers \ + -I$(top_srcdir)/headers \ -I$(top_builddir) \ $(GLOBAL_CFLAGS) MAINTAINERCLEANFILES = \ Makefile.in - - diff --git a/examples/using_bodies_in_chunks/Makefile.am b/examples/using_bodies_in_chunks/Makefile.am index 9eb438f368..68c9b34dfa 100644 --- a/examples/using_bodies_in_chunks/Makefile.am +++ b/examples/using_bodies_in_chunks/Makefile.am @@ -15,8 +15,7 @@ simple_request_LDADD = \ $(LUA_LDADD) \ $(PCRE_LDADD) \ $(PCRE2_LDADD) \ - $(SSDEEP_LDADD) \ - $(YAJL_LDADD) + $(SSDEEP_LDADD) simple_request_LDFLAGS = \ -L$(top_builddir)/src/.libs/ \ @@ -27,12 +26,11 @@ simple_request_LDFLAGS = \ $(MAXMIND_LDFLAGS) \ $(LMDB_LDFLAGS) \ $(LUA_LDFLAGS) \ - $(SSDEEP_LDFLAGS) \ - $(YAJL_LDFLAGS) + $(SSDEEP_LDFLAGS) simple_request_CPPFLAGS = \ $(GLOBAL_CFLAGS) \ - -I$(top_builddir)/headers \ + -I$(top_srcdir)/headers \ -I$(top_builddir) \ -g \ -I../others \ @@ -43,7 +41,6 @@ simple_request_CPPFLAGS = \ $(MAXMIND_CFLAGS) \ $(GLOBAL_CPPFLAGS) \ $(MODSEC_NO_LOGS) \ - $(YAJL_CFLAGS) \ $(LMDB_CFLAGS) \ $(LUA_CFLAGS) \ $(PCRE_CFLAGS) \ @@ -52,5 +49,3 @@ simple_request_CPPFLAGS = \ MAINTAINERCLEANFILES = \ Makefile.in - - diff --git a/headers/modsecurity/audit_log.h b/headers/modsecurity/audit_log.h index ab1e798dd7..dce0175873 100644 --- a/headers/modsecurity/audit_log.h +++ b/headers/modsecurity/audit_log.h @@ -17,6 +17,7 @@ #include #include #include +#include #endif #ifndef HEADERS_MODSECURITY_AUDIT_LOG_H_ diff --git a/headers/modsecurity/rules_set_phases.h b/headers/modsecurity/rules_set_phases.h index 849d8ec1bf..473d939f8e 100644 --- a/headers/modsecurity/rules_set_phases.h +++ b/headers/modsecurity/rules_set_phases.h @@ -20,6 +20,7 @@ #ifdef __cplusplus #include #include +#include #include #include #include @@ -60,4 +61,4 @@ class RulesSetPhases { } // namespace modsecurity #endif -#endif // HEADERS_MODSECURITY_RULES_SET_PHASES_H_ \ No newline at end of file +#endif // HEADERS_MODSECURITY_RULES_SET_PHASES_H_ diff --git a/headers/modsecurity/rules_set_properties.h b/headers/modsecurity/rules_set_properties.h index e16db04665..19a310fbe7 100644 --- a/headers/modsecurity/rules_set_properties.h +++ b/headers/modsecurity/rules_set_properties.h @@ -25,6 +25,7 @@ #ifdef __cplusplus #include #include +#include #include #include #include diff --git a/headers/modsecurity/transaction.h b/headers/modsecurity/transaction.h index 3e70caa38e..5cfaff0f13 100644 --- a/headers/modsecurity/transaction.h +++ b/headers/modsecurity/transaction.h @@ -77,25 +77,6 @@ typedef struct Rules_t RulesSet; do { } while (0); #endif - -#define LOGFY_ADD(a, b) \ - yajl_gen_string(g, reinterpret_cast(a), strlen(a)); \ - if (b.data() == NULL) { \ - yajl_gen_string(g, reinterpret_cast(""), \ - strlen("")); \ - } else { \ - yajl_gen_string(g, reinterpret_cast(b.data()), \ - b.length()); \ - } - -#define LOGFY_ADD_INT(a, b) \ - yajl_gen_string(g, reinterpret_cast(a), strlen(a)); \ - yajl_gen_number(g, reinterpret_cast(b), strlen(b)); - -#define LOGFY_ADD_NUM(a, b) \ - yajl_gen_string(g, reinterpret_cast(a), strlen(a)); \ - yajl_gen_integer(g, b); - #ifdef __cplusplus namespace modsecurity { diff --git a/modsecurity.pc.in b/modsecurity.pc.in index d00ad644fa..19d64b70e9 100644 --- a/modsecurity.pc.in +++ b/modsecurity.pc.in @@ -8,4 +8,4 @@ Description: ModSecurity API Version: @MSC_VERSION_WITH_PATCHLEVEL@ Cflags: -I@includedir@ Libs: -L@libdir@ -lmodsecurity -Libs.private: @CURL_LDADD@ @GEOIP_LDADD@ @MAXMIND_LDADD@ @GLOBAL_LDADD@ @LIBXML2_LDADD@ @LMDB_LDADD@ @LUA_LDADD@ @PCRE_LDADD@ @PCRE2_LDADD@ @SSDEEP_LDADD@ @YAJL_LDADD@ +Libs.private: @CURL_LDADD@ @GEOIP_LDADD@ @MAXMIND_LDADD@ @GLOBAL_LDADD@ @LIBXML2_LDADD@ @LMDB_LDADD@ @LUA_LDADD@ @PCRE_LDADD@ @PCRE2_LDADD@ @SSDEEP_LDADD@ diff --git a/others/jsoncons b/others/jsoncons new file mode 160000 index 0000000000..128553c8d1 --- /dev/null +++ b/others/jsoncons @@ -0,0 +1 @@ +Subproject commit 128553c8d1b222c30819656d123590accb60689d diff --git a/others/simdjson b/others/simdjson new file mode 160000 index 0000000000..fb83b114ef --- /dev/null +++ b/others/simdjson @@ -0,0 +1 @@ +Subproject commit fb83b114efcec4544eba8d45e3c7969ca756c086 diff --git a/src/Makefile.am b/src/Makefile.am index 14c26697b5..52f24de428 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -231,6 +231,7 @@ UTILS = \ utils/geo_lookup.cc \ utils/https_client.cc \ utils/ip_tree.cc \ + utils/json_writer.cc \ utils/msc_tree.cc \ utils/random.cc \ utils/regex.cc \ @@ -248,10 +249,29 @@ COLLECTION = \ BODY_PROCESSORS = \ request_body_processor/multipart.cc \ request_body_processor/xml.cc \ - request_body_processor/json.cc + request_body_processor/json.cc \ + request_body_processor/json_adapter.cc \ + request_body_processor/json_instrumentation.cc + +if JSON_BACKEND_SIMDJSON +BODY_PROCESSORS += \ + request_body_processor/json_backend_simdjson.cc +JSON_BACKEND_SOURCES = \ + ../others/simdjson/singleheader/simdjson.cpp +JSON_BACKEND_CPPFLAGS = \ + -I$(top_srcdir)/others/simdjson/singleheader +endif + +if JSON_BACKEND_JSONCONS +BODY_PROCESSORS += \ + request_body_processor/json_backend_jsoncons.cc +JSON_BACKEND_CPPFLAGS = \ + -I$(top_srcdir)/others/jsoncons/include +endif libmodsecurity_la_SOURCES = \ + $(JSON_BACKEND_SOURCES) \ parser/seclang-parser.cc \ parser/seclang-scanner.cc \ parser/driver.cc \ @@ -295,6 +315,7 @@ libmodsecurity_la_CPPFLAGS = \ -I$(top_builddir) \ -g \ -I$(top_srcdir)/others \ + $(JSON_BACKEND_CPPFLAGS) \ -I$(top_srcdir)/others/mbedtls/include \ -fPIC \ -O3 \ @@ -303,7 +324,6 @@ libmodsecurity_la_CPPFLAGS = \ $(GEOIP_CFLAGS) \ $(GLOBAL_CPPFLAGS) \ $(MODSEC_NO_LOGS) \ - $(YAJL_CFLAGS) \ $(LMDB_CFLAGS) \ $(PCRE_CFLAGS) \ $(PCRE2_CFLAGS) \ @@ -325,7 +345,6 @@ libmodsecurity_la_LDFLAGS = \ $(PCRE2_LDFLAGS) \ $(SSDEEP_LDFLAGS) \ $(MAXMIND_LDFLAGS) \ - $(YAJL_LDFLAGS) \ -version-info @MSC_VERSION_INFO@ @@ -341,6 +360,4 @@ libmodsecurity_la_LIBADD = \ $(PCRE_LDADD) \ $(PCRE2_LDADD) \ $(MAXMIND_LDADD) \ - $(SSDEEP_LDADD) \ - $(YAJL_LDADD) - + $(SSDEEP_LDADD) diff --git a/src/actions/ctl/rule_remove_by_id.h b/src/actions/ctl/rule_remove_by_id.h index f731db31cc..53cd379e76 100644 --- a/src/actions/ctl/rule_remove_by_id.h +++ b/src/actions/ctl/rule_remove_by_id.h @@ -13,7 +13,9 @@ * */ +#include #include +#include #include "modsecurity/actions/action.h" #include "modsecurity/transaction.h" diff --git a/src/actions/transformations/css_decode.cc b/src/actions/transformations/css_decode.cc index 41da9390ea..d999813d87 100644 --- a/src/actions/transformations/css_decode.cc +++ b/src/actions/transformations/css_decode.cc @@ -15,6 +15,8 @@ #include "css_decode.h" +#include + #include "src/utils/string.h" using namespace modsecurity::utils::string; @@ -138,7 +140,17 @@ static inline bool css_decode_inplace(std::string &val) { /* The character after backslash is not a hexadecimal digit, * nor a newline. */ /* Use one character after backslash as is. */ - *d++ = input[i++]; + const auto escaped = input[i++]; + *d++ = escaped; + + /* + * Preserve legacy behaviour for escaped NUL by consuming + * one trailing whitespace character. + */ + if ((escaped == '\0') && (i < input_len) + && std::isspace(input[i])) { + i++; + } } } else { /* No characters after backslash. */ diff --git a/src/actions/transformations/utf8_to_unicode.cc b/src/actions/transformations/utf8_to_unicode.cc index 263c782bf6..8a2cb31e5b 100644 --- a/src/actions/transformations/utf8_to_unicode.cc +++ b/src/actions/transformations/utf8_to_unicode.cc @@ -27,6 +27,42 @@ constexpr int UNICODE_ERROR_INVALID_ENCODING = -2; namespace modsecurity::actions::transformations { +static inline char *appendUnicodeEscape(char *data, + unsigned char (&unicode)[8], unsigned int d) { + int length = 0; + + *data++ = '%'; + *data++ = 'u'; + snprintf(reinterpret_cast(unicode), sizeof(unicode), "%x", d); + length = strlen(reinterpret_cast(unicode)); + + switch (length) { + case 1: + *data++ = '0'; + *data++ = '0'; + *data++ = '0'; + break; + case 2: + *data++ = '0'; + *data++ = '0'; + break; + case 3: + *data++ = '0'; + break; + case 4: + case 5: + break; + } + + for (std::string::size_type j = 0; + j < static_cast(length); j++) { + *data++ = unicode[j]; + } + + return data; +} + + static inline bool encode(std::string &value) { auto input = reinterpret_cast(value.data()); const auto input_len = value.length(); @@ -76,38 +112,9 @@ static inline bool encode(std::string &value) { unicode_len = 2; count += 6; if (count <= len) { - int length = 0; /* compute character number */ d = ((c & 0x1F) << 6) | (*(utf + 1) & 0x3F); - *data++ = '%'; - *data++ = 'u'; - snprintf(reinterpret_cast(unicode), - sizeof(reinterpret_cast(unicode)), - "%x", d); - length = strlen(reinterpret_cast(unicode)); - - switch (length) { - case 1: - *data++ = '0'; - *data++ = '0'; - *data++ = '0'; - break; - case 2: - *data++ = '0'; - *data++ = '0'; - break; - case 3: - *data++ = '0'; - break; - case 4: - case 5: - break; - } - - for (std::string::size_type j = 0; j < length; j++) { - *data++ = unicode[j]; - } - + data = appendUnicodeEscape(data, unicode, d); changed = true; } } @@ -126,40 +133,11 @@ static inline bool encode(std::string &value) { unicode_len = 3; count+=6; if (count <= len) { - int length = 0; /* compute character number */ d = ((c & 0x0F) << 12) | ((*(utf + 1) & 0x3F) << 6) | (*(utf + 2) & 0x3F); - *data++ = '%'; - *data++ = 'u'; - snprintf(reinterpret_cast(unicode), - sizeof(reinterpret_cast(unicode)), - "%x", d); - length = strlen(reinterpret_cast(unicode)); - - switch (length) { - case 1: - *data++ = '0'; - *data++ = '0'; - *data++ = '0'; - break; - case 2: - *data++ = '0'; - *data++ = '0'; - break; - case 3: - *data++ = '0'; - break; - case 4: - case 5: - break; - } - - for (std::string::size_type j = 0; j < length; j++) { - *data++ = unicode[j]; - } - + data = appendUnicodeEscape(data, unicode, d); changed = true; } } @@ -187,41 +165,12 @@ static inline bool encode(std::string &value) { unicode_len = 4; count+=7; if (count <= len) { - int length = 0; /* compute character number */ d = ((c & 0x07) << 18) | ((*(utf + 1) & 0x3F) << 12) | ((*(utf + 2) & 0x3F) << 6) | (*(utf + 3) & 0x3F); - *data++ = '%'; - *data++ = 'u'; - snprintf(reinterpret_cast(unicode), - sizeof(reinterpret_cast(unicode)), - "%x", d); - length = strlen(reinterpret_cast(unicode)); - - switch (length) { - case 1: - *data++ = '0'; - *data++ = '0'; - *data++ = '0'; - break; - case 2: - *data++ = '0'; - *data++ = '0'; - break; - case 3: - *data++ = '0'; - break; - case 4: - case 5: - break; - } - - for (std::string::size_type j = 0; j < length; j++) { - *data++ = unicode[j]; - } - + data = appendUnicodeEscape(data, unicode, d); changed = true; } } diff --git a/src/anchored_set_variable.cc b/src/anchored_set_variable.cc index 4c81dab412..eb04adf410 100644 --- a/src/anchored_set_variable.cc +++ b/src/anchored_set_variable.cc @@ -16,6 +16,7 @@ #include #include #include +#include #include #include diff --git a/src/anchored_variable.cc b/src/anchored_variable.cc index 51860d1fe6..83f4209a3e 100644 --- a/src/anchored_variable.cc +++ b/src/anchored_variable.cc @@ -16,6 +16,7 @@ #include #include #include +#include #include #include diff --git a/src/audit_log/audit_log.cc b/src/audit_log/audit_log.cc index 4115d5f34a..3068cbdd63 100644 --- a/src/audit_log/audit_log.cc +++ b/src/audit_log/audit_log.cc @@ -20,6 +20,7 @@ #include #include +#include #include "modsecurity/transaction.h" #include "modsecurity/rule_message.h" diff --git a/src/modsecurity.cc b/src/modsecurity.cc index 8f943b7f76..f3632f9cf0 100644 --- a/src/modsecurity.cc +++ b/src/modsecurity.cc @@ -17,10 +17,6 @@ #include "modsecurity/modsecurity.h" #include "src/config.h" -#ifdef WITH_YAJL -#include -#include -#endif #ifdef WITH_LIBXML2 #include #include @@ -38,6 +34,7 @@ #include "src/collection/backend/in_memory-per_process.h" #include "src/collection/backend/lmdb.h" #include "src/unique_id.h" +#include "src/utils/json_writer.h" #include "src/utils/regex.h" #include "src/utils/geo_lookup.h" #include "src/actions/transformations/transformation.h" @@ -214,59 +211,34 @@ void ModSecurity::serverLog(void *data, const RuleMessage &rm) { int ModSecurity::processContentOffset(const char *content, size_t len, const char *matchString, std::string *json, const char **err) { -#ifdef WITH_YAJL Utils::Regex variables("v([0-9]+),([0-9]+)"); Utils::Regex operators("o([0-9]+),([0-9]+)"); Utils::Regex transformations("t:(?:(?!t:).)+"); - yajl_gen g; std::string varValue; - const unsigned char *buf; - size_t jsonSize; + utils::JsonWriter writer(false); std::list vars = variables.searchAll(matchString); std::list ops = operators.searchAll(matchString); std::list trans = transformations.searchAll(matchString); - g = yajl_gen_alloc(NULL); - if (g == NULL) { - *err = "Failed to allocate memory for the JSON creation."; - return -1; - } - - yajl_gen_config(g, yajl_gen_beautify, 0); - - yajl_gen_map_open(g); - yajl_gen_string(g, reinterpret_cast("match"), - strlen("match")); - - yajl_gen_array_open(g); - yajl_gen_map_open(g); - - yajl_gen_string(g, reinterpret_cast("variable"), - strlen("variable")); - - yajl_gen_map_open(g); - yajl_gen_string(g, reinterpret_cast("highlight"), - strlen("highlight")); - - yajl_gen_array_open(g); + writer.start_object(); + writer.key("match"); + writer.start_array(); + writer.start_object(); + writer.key("variable"); + writer.start_object(); + writer.key("highlight"); + writer.start_array(); for(auto [it, pending] = std::tuple{vars.rbegin(), vars.size()}; pending > 3; pending -= 3) { - yajl_gen_map_open(g); + writer.start_object(); it++; const std::string &startingAt = it->str(); it++; const std::string &size = it->str(); it++; - yajl_gen_string(g, - reinterpret_cast("startingAt"), - strlen("startingAt")); - yajl_gen_string(g, - reinterpret_cast(startingAt.c_str()), - startingAt.size()); - yajl_gen_string(g, reinterpret_cast("size"), - strlen("size")); - yajl_gen_string(g, - reinterpret_cast(size.c_str()), - size.size()); - yajl_gen_map_close(g); + writer.key("startingAt"); + writer.string(startingAt); + writer.key("size"); + writer.string(size); + writer.end_object(); if (stoi(startingAt) >= len) { *err = "Offset is out of the content limits."; @@ -280,109 +252,70 @@ int ModSecurity::processContentOffset(const char *content, size_t len, varValue.append(value); } } - yajl_gen_array_close(g); - - yajl_gen_string(g, reinterpret_cast("value"), - strlen("value")); + writer.end_array(); - yajl_gen_array_open(g); + writer.key("value"); + writer.start_array(); - yajl_gen_map_open(g); - yajl_gen_string(g, reinterpret_cast("value"), - strlen("value")); - yajl_gen_string(g, reinterpret_cast(varValue.c_str()), - varValue.size()); - yajl_gen_map_close(g); + writer.start_object(); + writer.key("value"); + writer.string(varValue); + writer.end_object(); while (!trans.empty()) { modsecurity::actions::transformations::Transformation *t; - yajl_gen_map_open(g); - yajl_gen_string(g, - reinterpret_cast("transformation"), - strlen("transformation")); - - yajl_gen_string(g, - reinterpret_cast(trans.back().str().c_str()), - trans.back().str().size()); + writer.start_object(); + writer.key("transformation"); + writer.string(trans.back().str()); t = modsecurity::actions::transformations::Transformation::instantiate( trans.back().str().c_str()); t->transform(varValue, nullptr); trans.pop_back(); - yajl_gen_string(g, reinterpret_cast("value"), - strlen("value")); - yajl_gen_string(g, reinterpret_cast( - varValue.c_str()), - varValue.size()); - yajl_gen_map_close(g); + writer.key("value"); + writer.string(varValue); + writer.end_object(); delete t; } - yajl_gen_array_close(g); - - yajl_gen_string(g, reinterpret_cast("operator"), - strlen("operator")); + writer.end_array(); - yajl_gen_map_open(g); + writer.key("operator"); + writer.start_object(); for(auto [it, pending] = std::tuple{ops.rbegin(), ops.size()}; pending > 3; pending -= 3) { - yajl_gen_string(g, reinterpret_cast("highlight"), - strlen("highlight")); - yajl_gen_map_open(g); + writer.key("highlight"); + writer.start_object(); it++; const std::string &startingAt = it->str(); it++; const std::string &size = ops.back().str(); it++; - yajl_gen_string(g, - reinterpret_cast("startingAt"), - strlen("startingAt")); - yajl_gen_string(g, - reinterpret_cast(startingAt.c_str()), - startingAt.size()); - yajl_gen_string(g, reinterpret_cast("size"), - strlen("size")); - yajl_gen_string(g, - reinterpret_cast(size.c_str()), - size.size()); - yajl_gen_map_close(g); + writer.key("startingAt"); + writer.string(startingAt); + writer.key("size"); + writer.string(size); + writer.end_object(); if (stoi(startingAt) >= varValue.size()) { *err = "Offset is out of the variable limits."; return -1; } - yajl_gen_string(g, - reinterpret_cast("value"), - strlen("value")); const auto value = std::string(varValue, stoi(startingAt), stoi(size)); - yajl_gen_string(g, - reinterpret_cast(value.c_str()), - value.size()); + writer.key("value"); + writer.string(value); } - yajl_gen_map_close(g); - - - yajl_gen_map_close(g); - yajl_gen_array_close(g); - - yajl_gen_map_close(g); - yajl_gen_array_close(g); - yajl_gen_map_close(g); + writer.end_object(); + writer.end_object(); + writer.end_array(); + writer.end_object(); - yajl_gen_get_buf(g, &buf, &jsonSize); - - json->assign(reinterpret_cast(buf), jsonSize); + json->assign(writer.to_string()); json->append("\n"); - - yajl_gen_free(g); return 0; -#else - *err = "Without YAJL support, we cannot generate JSON."; - return -1; -#endif } diff --git a/src/operators/pm_from_file.cc b/src/operators/pm_from_file.cc index 52651e95cc..7bd778dc62 100644 --- a/src/operators/pm_from_file.cc +++ b/src/operators/pm_from_file.cc @@ -15,6 +15,7 @@ #include "src/operators/pm_from_file.h" +#include #include #include "src/operators/operator.h" diff --git a/src/operators/validate_byte_range.cc b/src/operators/validate_byte_range.cc index 05d06c7880..2ee7725c70 100644 --- a/src/operators/validate_byte_range.cc +++ b/src/operators/validate_byte_range.cc @@ -15,6 +15,9 @@ #include "src/operators/validate_byte_range.h" +#include +#include +#include #include #include @@ -23,18 +26,73 @@ namespace modsecurity { namespace operators { +namespace { + +std::string trimCopy(const std::string &value) { + std::string::size_type start = 0; + std::string::size_type end = value.size(); + + while (start < end + && std::isspace(static_cast(value[start]))) { + start++; + } + while (end > start + && std::isspace(static_cast(value[end - 1]))) { + end--; + } + + return value.substr(start, end - start); +} + +bool parseStrictInt(const std::string &value, int *result, std::string *error) { + const std::string trimmed = trimCopy(value); + + if (trimmed.empty()) { + error->assign("Not able to convert '" + value + "' into a number"); + return false; + } + + size_t pos = 0; + + try { + *result = std::stoi(trimmed, &pos); + } catch (...) { + error->assign("Not able to convert '" + trimmed + "' into a number"); + return false; + } + + if (pos != trimmed.size()) { + error->assign("Not able to convert '" + trimmed + "' into a number"); + return false; + } + + return true; +} + +inline void allowByte(std::array *table, int value) { + (*table)[value >> 3] = ((*table)[value >> 3] + | (1U << static_cast(value & 0x7))); +} + +} // namespace + + bool ValidateByteRange::getRange(const std::string &rangeRepresentation, std::string *error) { - size_t pos = rangeRepresentation.find_first_of("-"); - int start; - int end; + return getRange(rangeRepresentation, &table, error); +} + + +bool ValidateByteRange::getRange(const std::string &rangeRepresentation, + std::array *targetTable, + std::string *error) const { + const std::string range = trimCopy(rangeRepresentation); + const size_t pos = range.find_first_of("-"); + int start = 0; + int end = 0; if (pos == std::string::npos) { - try { - start = std::stoi(rangeRepresentation); - } catch(...) { - error->assign("Not able to convert '" + rangeRepresentation + - "' into a number"); + if (parseStrictInt(range, &start, error) == false) { return false; } if ((start < 0) || (start > 255)) { @@ -42,26 +100,16 @@ bool ValidateByteRange::getRange(const std::string &rangeRepresentation, std::to_string(start)); return false; } - table[start >> 3] = (table[start >> 3] | (1 << (start & 0x7))); + allowByte(targetTable, start); return true; } - try { - start = std::stoi(std::string(rangeRepresentation, 0, pos)); - } catch (...) { - error->assign("Not able to convert '" + - std::string(rangeRepresentation, 0, pos) + - "' into a number"); + if (parseStrictInt(std::string(range, 0, pos), &start, error) == false) { return false; } - try { - end = std::stoi(std::string(rangeRepresentation, pos + 1, - rangeRepresentation.length() - (pos + 1))); - } catch (...) { - error->assign("Not able to convert '" + std::string(rangeRepresentation, - pos + 1, rangeRepresentation.length() - (pos + 1)) + - "' into a number"); + if (parseStrictInt(std::string(range, pos + 1, + range.length() - (pos + 1)), &end, error) == false) { return false; } @@ -81,7 +129,7 @@ bool ValidateByteRange::getRange(const std::string &rangeRepresentation, } while (start <= end) { - table[start >> 3] = (table[start >> 3] | (1 << (start & 0x7))); + allowByte(targetTable, start); start++; } @@ -91,34 +139,33 @@ bool ValidateByteRange::getRange(const std::string &rangeRepresentation, bool ValidateByteRange::init(const std::string &file, std::string *error) { - size_t pos = m_param.find_first_of(","); - bool rc; - - if (pos == std::string::npos) { - rc = getRange(m_param, error); - } else { - rc = getRange(std::string(m_param, 0, pos), error); - } - - if (rc == false) { - return false; - } - - while (pos != std::string::npos) { - size_t next_pos = m_param.find_first_of(",", pos + 1); - - if (next_pos == std::string::npos) { - rc = getRange(std::string(m_param, pos + 1, m_param.length() - - (pos + 1)), error); - } else { - rc = getRange(std::string(m_param, pos + 1, next_pos - (pos + 1)), error); - } - if (rc == false) { + std::array parsedTable{}; + std::string::size_type pos = 0; + + table.fill('\0'); + + while (true) { + const std::string::size_type nextPos = m_param.find(',', pos); + if (const std::string token = nextPos == std::string::npos + ? m_param.substr(pos) + : m_param.substr(pos, nextPos - pos); + getRange(token, &parsedTable, error) == false) { + /* + * Keep byte 0 allowed on invalid parameters so callers that + * continue after init() failure keep legacy behaviour. + */ + table[0] = table[0] | 1U; return false; } - pos = next_pos; + + if (nextPos == std::string::npos) { + break; + } + + pos = nextPos + 1; } + table = parsedTable; return true; } diff --git a/src/operators/validate_byte_range.h b/src/operators/validate_byte_range.h index 7551171b01..f9d137a18b 100644 --- a/src/operators/validate_byte_range.h +++ b/src/operators/validate_byte_range.h @@ -16,11 +16,9 @@ #ifndef SRC_OPERATORS_VALIDATE_BYTE_RANGE_H_ #define SRC_OPERATORS_VALIDATE_BYTE_RANGE_H_ +#include #include -#include -#include #include -#include #include "src/operators/operator.h" @@ -32,9 +30,7 @@ class ValidateByteRange : public Operator { public: /** @ingroup ModSecurity_Operator */ explicit ValidateByteRange(std::unique_ptr param) - : Operator("ValidateByteRange", std::move(param)) { - std::memset(table, '\0', sizeof(char) * 32); - } + : Operator("ValidateByteRange", std::move(param)) { } ~ValidateByteRange() override { } bool evaluate(Transaction *transaction, RuleWithActions *rule, @@ -42,9 +38,15 @@ class ValidateByteRange : public Operator { RuleMessage &ruleMessage) override; bool getRange(const std::string &rangeRepresentation, std::string *error); bool init(const std::string& file, std::string *error) override; + private: - std::vector ranges; - char table[32]; + static constexpr size_t kTableSize = 32; + + bool getRange(const std::string &rangeRepresentation, + std::array *targetTable, + std::string *error) const; + + std::array table{}; }; } // namespace operators diff --git a/src/parser/Makefile.am b/src/parser/Makefile.am index 685675819f..205e8ba4d2 100644 --- a/src/parser/Makefile.am +++ b/src/parser/Makefile.am @@ -22,7 +22,6 @@ libmodsec_parser_la_CPPFLAGS = \ $(GEOIP_CFLAGS) \ $(GLOBAL_CPPFLAGS) \ $(MODSEC_NO_LOGS) \ - $(YAJL_CFLAGS) \ $(LMDB_CFLAGS) \ $(PCRE_CFLAGS) \ $(PCRE2_CFLAGS) \ diff --git a/src/request_body_processor/json.cc b/src/request_body_processor/json.cc index f56704effa..800377cd54 100644 --- a/src/request_body_processor/json.cc +++ b/src/request_body_processor/json.cc @@ -13,95 +13,148 @@ * */ - -#ifdef WITH_YAJL +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif #include "src/request_body_processor/json.h" -#include -#include +#include +#include +#include +#include #include +#include "modsecurity/transaction.h" +#include "src/request_body_processor/json_adapter.h" +#include "src/request_body_processor/json_instrumentation.h" + -namespace modsecurity { -namespace RequestBodyProcessor { +namespace modsecurity::RequestBodyProcessor { static const double json_depth_limit_default = 10000.0; -static const char* json_depth_limit_exceeded_msg = ". Parsing depth limit exceeded"; +static const char *const json_depth_limit_exceeded_msg = + ". Parsing depth limit exceeded"; + +namespace { + +void assignJsonErrorMessage(std::string *err, JsonParseStatus parse_status, + const std::string &detail) { + if (err == nullptr) { + return; + } + + if (!detail.empty()) { + err->assign(detail); + return; + } + + switch (parse_status) { + case JsonParseStatus::ParseError: + err->assign("Invalid JSON body."); + break; + case JsonParseStatus::TruncatedInput: + err->assign("Incomplete JSON body."); + break; + case JsonParseStatus::Utf8Error: + err->assign("Invalid UTF-8 in JSON body."); + break; + case JsonParseStatus::EngineAbort: + err->assign("JSON traversal aborted by ModSecurity."); + break; + case JsonParseStatus::InternalError: + err->assign("Internal JSON backend failure."); + break; + case JsonParseStatus::Ok: + err->clear(); + break; + } +} + +JsonSinkStatus startContainer( + std::deque> *containers, + std::unique_ptr container, int64_t *current_depth, + double max_depth, bool *depth_limit_exceeded) { + containers->push_back(std::move(container)); + (*current_depth)++; + if (*current_depth > max_depth) { + *depth_limit_exceeded = true; + return JsonSinkStatus::DepthLimitExceeded; + } + return JsonSinkStatus::Continue; +} + +JsonSinkStatus endContainer( + std::deque> *containers, + int64_t *current_depth) { + if (containers->empty()) { + return JsonSinkStatus::InternalError; + } + + containers->pop_back(); + + if (!containers->empty()) { + auto *array = dynamic_cast(containers->back().get()); + if (array != nullptr) { + array->m_elementCounter++; + } + } + + (*current_depth)--; + if (*current_depth < 0) { + *current_depth = 0; + return JsonSinkStatus::InternalError; + } + + return JsonSinkStatus::Continue; +} + +JsonSinkStatus addArgumentAsSinkStatus(JSON *json, + const std::string &argument_value) { + return json->addArgument(argument_value) != 0 ? JsonSinkStatus::Continue + : JsonSinkStatus::EngineAbort; +} + +JsonSinkStatus addStringViewAsSinkStatus(JSON *json, std::string_view value) { + return addArgumentAsSinkStatus(json, std::string(value.data(), value.size())); +} + +} // namespace JSON::JSON(Transaction *transaction) : m_transaction(transaction), - m_handle(NULL), - m_current_key(""), - m_max_depth(json_depth_limit_default), - m_current_depth(0), - m_depth_limit_exceeded(false) { - /** - * yajl callback functions - * For more information on the function signatures and order, check - * http://lloyd.github.com/yajl/yajl-1.0.12/structyajl__callbacks.html - */ - - /** - * yajl configuration and callbacks - */ - static yajl_callbacks callbacks = { - yajl_null, - yajl_boolean, - NULL /* yajl_integer */, - NULL /* yajl_double */, - yajl_number, - yajl_string, - yajl_start_map, - yajl_map_key, - yajl_end_map, - yajl_start_array, - yajl_end_array - }; - - - /** - * yajl initialization - * - * yajl_parser_config definition: - * http://lloyd.github.io/yajl/yajl-2.0.1/yajl__parse_8h.html#aec816c5518264d2ac41c05469a0f986c - * - * TODO: make UTF8 validation optional, as it depends on Content-Encoding - */ - m_handle = yajl_alloc(&callbacks, NULL, this); - - yajl_config(m_handle, yajl_allow_partial_values, 0); + m_max_depth(json_depth_limit_default) { } JSON::~JSON() { - while (m_containers.size() > 0) { - JSONContainer *a = m_containers.back(); - m_containers.pop_back(); - delete a; - } - yajl_free(m_handle); + clearContainers(); } bool JSON::init() { + clearContainers(); + m_current_key.clear(); + m_data.clear(); + m_current_depth = 0; + m_depth_limit_exceeded = false; + return true; } -bool JSON::processChunk(const char *buf, unsigned int size, std::string *err) { - /* Feed our parser and catch any errors */ - m_status = yajl_parse(m_handle, - (const unsigned char *)buf, size); - if (m_status != yajl_status_ok) { - unsigned char *e = yajl_get_error(m_handle, 0, - (const unsigned char *)buf, size); - /* We need to free the yajl error message later, how to do this? */ - err->assign((const char *)e); - if (m_depth_limit_exceeded) { - err->append(json_depth_limit_exceeded_msg); - } - yajl_free_error(m_handle, e); - return false; +bool JSON::processChunk(const char *buf, unsigned int size, + const std::string *err) { + (void) err; + if (buf != nullptr && size > 0) { +#ifdef MSC_JSON_AUDIT_INSTRUMENTATION + const auto start_time = std::chrono::steady_clock::now(); + m_data.append(buf, size); + recordJsonProcessChunkAppend(size, static_cast( + std::chrono::duration_cast( + std::chrono::steady_clock::now() - start_time).count())); +#else + m_data.append(buf, size); +#endif } return true; @@ -109,16 +162,20 @@ bool JSON::processChunk(const char *buf, unsigned int size, std::string *err) { bool JSON::complete(std::string *err) { - /* Wrap up the parsing process */ - m_status = yajl_complete_parse(m_handle); - if (m_status != yajl_status_ok) { - unsigned char *e = yajl_get_error(m_handle, 0, NULL, 0); - /* We need to free the yajl error message later, how to do this? */ - err->assign((const char *)e); - if (m_depth_limit_exceeded) { + if (m_data.empty()) { + return true; + } + + JSONAdapter adapter; + if (JsonParseResult result = adapter.parse(m_data, + static_cast(this)); !result.ok()) { + if (result.sink_status == JsonSinkStatus::DepthLimitExceeded) { + m_depth_limit_exceeded = true; + } + assignJsonErrorMessage(err, result.parse_status, result.detail); + if (m_depth_limit_exceeded && err != nullptr) { err->append(json_depth_limit_exceeded_msg); - } - yajl_free_error(m_handle, e); + } return false; } @@ -127,23 +184,23 @@ bool JSON::complete(std::string *err) { int JSON::addArgument(const std::string& value) { - std::string data(""); + std::string data; std::string path; - for (size_t i = 0; i < m_containers.size(); i++) { - const JSONContainerArray *a = dynamic_cast( - m_containers[i]); + for (size_t i = 0; i < m_containers.size(); i++) { + const auto *a = dynamic_cast( + m_containers[i].get()); path = path + m_containers[i]->m_name; - if (a != NULL) { + if (a != nullptr) { path = path + ".array_" + std::to_string(a->m_elementCounter); } else { path = path + "."; } } - if (m_containers.size() > 0) { - JSONContainerArray *a = dynamic_cast( - m_containers.back()); + if (!m_containers.empty()) { + auto *a = dynamic_cast( + m_containers.back().get()); if (a) { a->m_elementCounter++; } else { @@ -163,158 +220,57 @@ int JSON::addArgument(const std::string& value) { } -/** - * Callback for hash key values; we use those to define the variable names - * under ARGS. Whenever we reach a new key, we update the current key value. - */ -int JSON::yajl_map_key(void *ctx, const unsigned char *key, size_t length) { - JSON *tthis = reinterpret_cast(ctx); - std::string safe_key; - - /** - * yajl does not provide us with null-terminated strings, but - * rather expects us to copy the data from the key up to the - * length informed; we create a standalone null-termined copy - * in safe_key - */ - safe_key.assign((const char *)key, length); - - tthis->m_current_key = safe_key; - - return 1; +JsonSinkStatus JSON::on_key(std::string_view value) { + m_current_key.assign(value.data(), value.size()); + return JsonSinkStatus::Continue; } -/** - * Callback for null values - * - */ -int JSON::yajl_null(void *ctx) { - JSON *tthis = reinterpret_cast(ctx); - return tthis->addArgument(""); +JsonSinkStatus JSON::on_null() { + return addArgumentAsSinkStatus(this, ""); } -/** - * Callback for boolean values - */ -int JSON::yajl_boolean(void *ctx, int value) { - JSON *tthis = reinterpret_cast(ctx); - if (value) { - return tthis->addArgument("true"); - } - return tthis->addArgument("false"); +JsonSinkStatus JSON::on_boolean(bool value) { + return addArgumentAsSinkStatus(this, value ? "true" : "false"); } -/** - * Callback for string values - */ -int JSON::yajl_string(void *ctx, const unsigned char *value, size_t length) { - JSON *tthis = reinterpret_cast(ctx); - std::string v = std::string((const char*)value, length); - return tthis->addArgument(v); +JsonSinkStatus JSON::on_string(std::string_view value) { + return addStringViewAsSinkStatus(this, value); } -/** - * Callback for numbers; YAJL can use separate callbacks for integers/longs and - * float/double values, but since we are not interested in using the numeric - * values here, we use a generic handler which uses numeric strings - */ -int JSON::yajl_number(void *ctx, const char *value, size_t length) { - JSON *tthis = reinterpret_cast(ctx); - std::string v = std::string((const char*)value, length); - return tthis->addArgument(v); +JsonSinkStatus JSON::on_number(std::string_view value) { + return addStringViewAsSinkStatus(this, value); } -/** - * Callback for a new hash, which indicates a new subtree, labeled as the - * current argument name, is being created - */ -int JSON::yajl_start_array(void *ctx) { - JSON *tthis = reinterpret_cast(ctx); - std::string name = tthis->getCurrentKey(); - tthis->m_containers.push_back( - reinterpret_cast(new JSONContainerArray(name))); - tthis->m_current_depth++; - if (tthis->m_current_depth > tthis->m_max_depth) { - tthis->m_depth_limit_exceeded = true; - return 0; - } - return 1; +JsonSinkStatus JSON::on_start_array() { + return startContainer(&m_containers, + std::make_unique(getCurrentKey()), + &m_current_depth, m_max_depth, &m_depth_limit_exceeded); } -int JSON::yajl_end_array(void *ctx) { - JSON *tthis = reinterpret_cast(ctx); - if (tthis->m_containers.empty()) { - tthis->m_current_depth--; - return 1; - } - - JSONContainer *a = tthis->m_containers.back(); - tthis->m_containers.pop_back(); - delete a; - if (tthis->m_containers.size() > 0) { - JSONContainerArray *ja = dynamic_cast( - tthis->m_containers.back()); - if (ja) { - ja->m_elementCounter++; - } - } - tthis->m_current_depth--; - - return 1; +JsonSinkStatus JSON::on_end_array() { + return endContainer(&m_containers, &m_current_depth); } -int JSON::yajl_start_map(void *ctx) { - JSON *tthis = reinterpret_cast(ctx); - std::string name(tthis->getCurrentKey()); - tthis->m_containers.push_back( - reinterpret_cast(new JSONContainerMap(name))); - tthis->m_current_depth++; - if (tthis->m_current_depth > tthis->m_max_depth) { - tthis->m_depth_limit_exceeded = true; - return 0; - } - return 1; +JsonSinkStatus JSON::on_start_object() { + return startContainer(&m_containers, + std::make_unique(getCurrentKey()), + &m_current_depth, m_max_depth, &m_depth_limit_exceeded); } -/** - * Callback for end hash, meaning the current subtree is being closed, and that - * we should go back to the parent variable label - */ -int JSON::yajl_end_map(void *ctx) { - JSON *tthis = reinterpret_cast(ctx); - if (tthis->m_containers.empty()) { - tthis->m_current_depth--; - return 1; - } - - JSONContainer *a = tthis->m_containers.back(); - tthis->m_containers.pop_back(); - delete a; - - if (tthis->m_containers.size() > 0) { - JSONContainerArray *ja = dynamic_cast( - tthis->m_containers.back()); - if (ja) { - ja->m_elementCounter++; - } - } - - tthis->m_current_depth--; - return 1; +JsonSinkStatus JSON::on_end_object() { + return endContainer(&m_containers, &m_current_depth); } +void JSON::clearContainers() { + m_containers.clear(); +} -} // namespace RequestBodyProcessor -} // namespace modsecurity - - -#endif // WITH_YAJL - +} // namespace modsecurity::RequestBodyProcessor diff --git a/src/request_body_processor/json.h b/src/request_body_processor/json.h index 961ea94ea8..79c0da958a 100644 --- a/src/request_body_processor/json.h +++ b/src/request_body_processor/json.h @@ -16,85 +16,86 @@ #ifndef SRC_REQUEST_BODY_PROCESSOR_JSON_H_ #define SRC_REQUEST_BODY_PROCESSOR_JSON_H_ - -#ifdef WITH_YAJL - -#include - -#include -#include +#include +#include #include +#include +#include +#include -#include "modsecurity/transaction.h" -#include "modsecurity/rules_set.h" - +#include "src/request_body_processor/json_backend.h" namespace modsecurity { -namespace RequestBodyProcessor { +class Transaction; +} + + +namespace modsecurity::RequestBodyProcessor { class JSONContainer { public: explicit JSONContainer(const std::string &name) : m_name(name) { } - virtual ~JSONContainer() { } + virtual ~JSONContainer() = default; std::string m_name; }; class JSONContainerArray : public JSONContainer { public: - explicit JSONContainerArray(const std::string &name) : JSONContainer(name), - m_elementCounter(0) { } - size_t m_elementCounter; + using JSONContainer::JSONContainer; + size_t m_elementCounter = 0; }; class JSONContainerMap : public JSONContainer { public: - explicit JSONContainerMap(const std::string &name) : JSONContainer(name) { } + using JSONContainer::JSONContainer; }; -class JSON { +class JSON : public JsonEventSink { public: explicit JSON(Transaction *transaction); - ~JSON(); - - static bool init(); - bool processChunk(const char *buf, unsigned int size, std::string *err); + ~JSON() override; + JSON(const JSON &) = delete; + JSON &operator=(const JSON &) = delete; + JSON(JSON &&) = delete; + JSON &operator=(JSON &&) = delete; + + bool init(); + bool processChunk(const char *buf, unsigned int size, + const std::string *err); bool complete(std::string *err); int addArgument(const std::string& value); - static int yajl_number(void *ctx, const char *value, size_t length); - static int yajl_string(void *ctx, const unsigned char *value, - size_t length); - static int yajl_boolean(void *ctx, int value); - static int yajl_null(void *ctx); - static int yajl_map_key(void *ctx, const unsigned char *key, - size_t length); - static int yajl_end_map(void *ctx); - static int yajl_start_map(void *ctx); - static int yajl_start_array(void *ctx); - static int yajl_end_array(void *ctx); + JsonSinkStatus on_start_object() override; + JsonSinkStatus on_end_object() override; + JsonSinkStatus on_start_array() override; + JsonSinkStatus on_end_array() override; + JsonSinkStatus on_key(std::string_view value) override; + JsonSinkStatus on_string(std::string_view value) override; + JsonSinkStatus on_number(std::string_view value) override; + JsonSinkStatus on_boolean(bool value) override; + JsonSinkStatus on_null() override; bool isPreviousArray() const { - const JSONContainerArray *prev = NULL; - if (m_containers.size() < 1) { + if (m_containers.empty()) { return false; } - prev = dynamic_cast( - m_containers[m_containers.size() - 1]); - return prev != NULL; + const JSONContainerArray *prev = dynamic_cast( + m_containers.back().get()); + return prev != nullptr; } std::string getCurrentKey(bool emptyIsNull = false) { std::string ret(m_current_key); - if (m_containers.size() == 0) { + if (m_containers.empty()) { return "json"; } - if (m_current_key.empty() == true) { - if (isPreviousArray() || emptyIsNull == true) { + if (m_current_key.empty()) { + if (isPreviousArray() || emptyIsNull) { return ""; } return "empty-key"; @@ -108,21 +109,18 @@ class JSON { } private: - std::deque m_containers; - Transaction *m_transaction; - yajl_handle m_handle; - yajl_status m_status; + void clearContainers(); + + std::deque> m_containers; + Transaction *m_transaction = nullptr; std::string m_current_key; - double m_max_depth; - int64_t m_current_depth; - bool m_depth_limit_exceeded; + std::string m_data; + double m_max_depth = 0.0; + int64_t m_current_depth = 0; + bool m_depth_limit_exceeded = false; }; -} // namespace RequestBodyProcessor -} // namespace modsecurity - -#endif // WITH_YAJL +} // namespace modsecurity::RequestBodyProcessor #endif // SRC_REQUEST_BODY_PROCESSOR_JSON_H_ - diff --git a/src/request_body_processor/json_adapter.cc b/src/request_body_processor/json_adapter.cc new file mode 100644 index 0000000000..67bd661c36 --- /dev/null +++ b/src/request_body_processor/json_adapter.cc @@ -0,0 +1,105 @@ +/* + * ModSecurity, http://www.modsecurity.org/ + * Copyright (c) 2015 - 2024 Trustwave Holdings, Inc. (http://www.trustwave.com/) + * + * You may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * If any of the files related to licensing are missing or if you have any + * other questions related to licensing please contact Trustwave Holdings, Inc. + * directly using the email address security@modsecurity.org. + * + */ + +#include "src/request_body_processor/json_adapter.h" + +#include + +#include "src/config.h" + +namespace modsecurity::RequestBodyProcessor { +namespace { + +JsonParseResult makeResult(JsonParseStatus parse_status, + JsonSinkStatus sink_status = JsonSinkStatus::Continue, + std::string detail = "") { + return JsonParseResult{parse_status, sink_status, std::move(detail)}; +} + +JsonParseResult normalizeResult(JsonParseResult result) { + if (result.parse_status != JsonParseStatus::Ok) { + return result; + } + + switch (result.sink_status) { + case JsonSinkStatus::Continue: + return result; + case JsonSinkStatus::EngineAbort: + result.parse_status = JsonParseStatus::EngineAbort; + return result; + case JsonSinkStatus::DepthLimitExceeded: + result.parse_status = JsonParseStatus::ParseError; + return result; + case JsonSinkStatus::InternalError: + result.parse_status = JsonParseStatus::InternalError; + return result; + } + + result.parse_status = JsonParseStatus::InternalError; + result.sink_status = JsonSinkStatus::InternalError; + result.detail.assign("Unknown JSON sink status."); + return result; +} + +template +JsonParseResult parseImplCommon(InputType &input, JsonEventSink *sink, + const JsonBackendParseOptions &options [[maybe_unused]]) { + if (sink == nullptr) { + return makeResult(JsonParseStatus::InternalError, + JsonSinkStatus::InternalError, "JSON event sink is null."); + } + + if (input.empty()) { + return makeResult(JsonParseStatus::Ok); + } + +#if defined(MSC_JSON_BACKEND_SIMDJSON) + return normalizeResult(parseDocumentWithSimdjson(input, sink, options)); +#elif defined(MSC_JSON_BACKEND_JSONCONS) + return normalizeResult(parseDocumentWithJsoncons(input, sink, options)); +#else + return makeResult(JsonParseStatus::InternalError, + JsonSinkStatus::InternalError, + "ModSecurity was built without a selected JSON backend."); +#endif +} + +} // namespace + +JsonParseResult JSONAdapter::parseImpl(std::string &input, + JsonEventSink *sink, + const JsonBackendParseOptions &options [[maybe_unused]]) const { + return parseImplCommon(input, sink, options); +} + +JsonParseResult JSONAdapter::parseImpl(const std::string &input, + JsonEventSink *sink, + const JsonBackendParseOptions &options [[maybe_unused]]) const { + return parseImplCommon(input, sink, options); +} + +JsonParseResult JSONAdapter::parse(std::string &input, + JsonEventSink *sink, + const JsonBackendParseOptions &options) const { + return parseImpl(input, sink, options); +} + +JsonParseResult JSONAdapter::parse(const std::string &input, + JsonEventSink *sink, + const JsonBackendParseOptions &options) const { + return parseImpl(input, sink, options); +} + +} // namespace modsecurity::RequestBodyProcessor diff --git a/src/request_body_processor/json_adapter.h b/src/request_body_processor/json_adapter.h new file mode 100644 index 0000000000..134e73042c --- /dev/null +++ b/src/request_body_processor/json_adapter.h @@ -0,0 +1,45 @@ +/* + * ModSecurity, http://www.modsecurity.org/ + * Copyright (c) 2015 - 2024 Trustwave Holdings, Inc. (http://www.trustwave.com/) + * + * You may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * If any of the files related to licensing are missing or if you have any + * other questions related to licensing please contact Trustwave Holdings, Inc. + * directly using the email address security@modsecurity.org. + * + */ + +#ifndef SRC_REQUEST_BODY_PROCESSOR_JSON_ADAPTER_H_ +#define SRC_REQUEST_BODY_PROCESSOR_JSON_ADAPTER_H_ + +#include + +#include "src/request_body_processor/json_backend.h" + +namespace modsecurity::RequestBodyProcessor { + +class JSONAdapter { + public: + JsonParseResult parse(std::string &input, JsonEventSink *sink, + const JsonBackendParseOptions &options [[maybe_unused]] + = JsonBackendParseOptions()) const; + + JsonParseResult parse(const std::string &input, JsonEventSink *sink, + const JsonBackendParseOptions &options [[maybe_unused]] + = JsonBackendParseOptions()) const; + + private: + JsonParseResult parseImpl(std::string &input, JsonEventSink *sink, + const JsonBackendParseOptions &options [[maybe_unused]]) const; + + JsonParseResult parseImpl(const std::string &input, JsonEventSink *sink, + const JsonBackendParseOptions &options [[maybe_unused]]) const; +}; + +} // namespace modsecurity::RequestBodyProcessor + +#endif // SRC_REQUEST_BODY_PROCESSOR_JSON_ADAPTER_H_ diff --git a/src/request_body_processor/json_backend.h b/src/request_body_processor/json_backend.h new file mode 100644 index 0000000000..ffad7b47fe --- /dev/null +++ b/src/request_body_processor/json_backend.h @@ -0,0 +1,81 @@ +/* + * ModSecurity, http://www.modsecurity.org/ + * Copyright (c) 2015 - 2024 Trustwave Holdings, Inc. (http://www.trustwave.com/) + * + * You may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * If any of the files related to licensing are missing or if you have any + * other questions related to licensing please contact Trustwave Holdings, Inc. + * directly using the email address security@modsecurity.org. + * + */ + +#ifndef SRC_REQUEST_BODY_PROCESSOR_JSON_BACKEND_H_ +#define SRC_REQUEST_BODY_PROCESSOR_JSON_BACKEND_H_ + +#include +#include + +namespace modsecurity::RequestBodyProcessor { + +enum class JsonParseStatus { + Ok, + ParseError, + TruncatedInput, + Utf8Error, + EngineAbort, + InternalError +}; + +enum class JsonSinkStatus { + Continue, + EngineAbort, + DepthLimitExceeded, + InternalError +}; + +struct JsonParseResult { + JsonParseStatus parse_status{JsonParseStatus::Ok}; + JsonSinkStatus sink_status{JsonSinkStatus::Continue}; + std::string detail; + + bool ok() const { + return parse_status == JsonParseStatus::Ok + && sink_status == JsonSinkStatus::Continue; + } +}; + +struct JsonBackendParseOptions { + int technical_max_depth{1048576}; +}; + +class JsonEventSink { + public: + virtual ~JsonEventSink() = default; + + virtual JsonSinkStatus on_start_object() = 0; + virtual JsonSinkStatus on_end_object() = 0; + virtual JsonSinkStatus on_start_array() = 0; + virtual JsonSinkStatus on_end_array() = 0; + virtual JsonSinkStatus on_key(std::string_view value) = 0; + virtual JsonSinkStatus on_string(std::string_view value) = 0; + virtual JsonSinkStatus on_number(std::string_view raw_number) = 0; + virtual JsonSinkStatus on_boolean(bool value) = 0; + virtual JsonSinkStatus on_null() = 0; +}; + +JsonParseResult parseDocumentWithSimdjson(const std::string &input, + JsonEventSink *sink, const JsonBackendParseOptions &options); + +JsonParseResult parseDocumentWithSimdjson(std::string &input, + JsonEventSink *sink, const JsonBackendParseOptions &options); + +JsonParseResult parseDocumentWithJsoncons(const std::string &input, + JsonEventSink *sink, const JsonBackendParseOptions &options); + +} // namespace modsecurity::RequestBodyProcessor + +#endif // SRC_REQUEST_BODY_PROCESSOR_JSON_BACKEND_H_ diff --git a/src/request_body_processor/json_backend_common.h b/src/request_body_processor/json_backend_common.h new file mode 100644 index 0000000000..886acb1f18 --- /dev/null +++ b/src/request_body_processor/json_backend_common.h @@ -0,0 +1,55 @@ +/* + * ModSecurity, http://www.modsecurity.org/ + * Copyright (c) 2015 - 2024 Trustwave Holdings, Inc. (http://www.trustwave.com/) + * + * You may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * If any of the files related to licensing are missing or if you have any + * other questions related to licensing please contact Trustwave Holdings, Inc. + * directly using the email address security@modsecurity.org. + * + */ + +#ifndef SRC_REQUEST_BODY_PROCESSOR_JSON_BACKEND_COMMON_H_ +#define SRC_REQUEST_BODY_PROCESSOR_JSON_BACKEND_COMMON_H_ + +#include +#include +#include + +#include "src/request_body_processor/json_backend.h" + +namespace modsecurity::RequestBodyProcessor::json_backend_common { + +inline JsonParseResult makeResult(JsonParseStatus parse_status, + JsonSinkStatus sink_status = JsonSinkStatus::Continue, + std::string detail = "") { + return JsonParseResult{parse_status, sink_status, std::move(detail)}; +} + +inline JsonParseResult makeResult(JsonParseStatus parse_status, + std::string detail) { + return makeResult(parse_status, JsonSinkStatus::Continue, std::move(detail)); +} + +inline JsonParseResult stopTraversal(JsonSinkStatus sink_status, + std::string_view location) { + return makeResult(JsonParseStatus::Ok, sink_status, + std::string("JSON traversal stopped while ") + std::string(location) + + "."); +} + +inline JsonParseResult finishSinkCall(JsonSinkStatus sink_status, + std::string_view location) { + if (sink_status != JsonSinkStatus::Continue) { + return stopTraversal(sink_status, location); + } + return makeResult(JsonParseStatus::Ok); +} + +} // namespace modsecurity::RequestBodyProcessor::json_backend_common + +#endif // SRC_REQUEST_BODY_PROCESSOR_JSON_BACKEND_COMMON_H_ diff --git a/src/request_body_processor/json_backend_jsoncons.cc b/src/request_body_processor/json_backend_jsoncons.cc new file mode 100644 index 0000000000..098b3952a7 --- /dev/null +++ b/src/request_body_processor/json_backend_jsoncons.cc @@ -0,0 +1,807 @@ +/* + * ModSecurity, http://www.modsecurity.org/ + * Copyright (c) 2015 - 2024 Trustwave Holdings, Inc. (http://www.trustwave.com/) + * + * You may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * If any of the files related to licensing are missing or if you have any + * other questions related to licensing please contact Trustwave Holdings, Inc. + * directly using the email address security@modsecurity.org. + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "src/request_body_processor/json_backend.h" +#include "src/request_body_processor/json_backend_common.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "src/request_body_processor/json_instrumentation.h" +#include +#include +#include +#include + +namespace modsecurity::RequestBodyProcessor { +namespace { +using json_backend_common::finishSinkCall; +using json_backend_common::makeResult; + +bool isUtf8RelatedError(const std::error_code &error) { + switch (static_cast(error.value())) { + case jsoncons::json_errc::illegal_character_in_string: + case jsoncons::json_errc::illegal_control_character: + case jsoncons::json_errc::illegal_escaped_character: + case jsoncons::json_errc::expected_codepoint_surrogate_pair: + case jsoncons::json_errc::invalid_hex_escape_sequence: + case jsoncons::json_errc::invalid_unicode_escape_sequence: + case jsoncons::json_errc::expected_continuation_byte: + case jsoncons::json_errc::over_long_utf8_sequence: + case jsoncons::json_errc::illegal_codepoint: + case jsoncons::json_errc::illegal_surrogate_value: + case jsoncons::json_errc::unpaired_high_surrogate: + case jsoncons::json_errc::illegal_unicode_character: + return true; + default: + return false; + } +} + +JsonParseResult fromJsonconsError(const std::error_code &error, + const jsoncons::ser_context &context) { + std::string detail = error.message() + " at line " + + std::to_string(context.line()) + ", column " + + std::to_string(context.column()) + "."; + + switch (static_cast(error.value())) { + case jsoncons::json_errc::unexpected_eof: + return makeResult(JsonParseStatus::TruncatedInput, + JsonSinkStatus::Continue, detail); + case jsoncons::json_errc::max_nesting_depth_exceeded: + return makeResult(JsonParseStatus::ParseError, + JsonSinkStatus::Continue, detail); + case jsoncons::json_errc::source_error: + return makeResult(JsonParseStatus::InternalError, + JsonSinkStatus::Continue, detail); + default: + if (isUtf8RelatedError(error)) { + return makeResult(JsonParseStatus::Utf8Error, + JsonSinkStatus::Continue, detail); + } + return makeResult(JsonParseStatus::ParseError, + JsonSinkStatus::Continue, detail); + } +} + +bool isDigit(char value) { + return std::isdigit(static_cast(value)) != 0; +} + +bool isExponentMarker(char value) { + return value == 'e' || value == 'E'; +} + +bool isSignedExponent(std::string_view token, std::size_t index) { + return index < token.size() && (token[index] == '+' || token[index] == '-'); +} + +bool parseIntegerPart(std::string_view token, std::size_t *index) { + if (token[*index] == '0') { + (*index)++; + return true; + } + + if (!isDigit(token[*index]) || token[*index] == '0') { + return false; + } + + while (*index < token.size() && isDigit(token[*index])) { + (*index)++; + } + return true; +} + +bool parseFractionPart(std::string_view token, std::size_t *index) { + if (*index >= token.size() || token[*index] != '.') { + return true; + } + + (*index)++; + if (*index == token.size() || !isDigit(token[*index])) { + return false; + } + while (*index < token.size() && isDigit(token[*index])) { + (*index)++; + } + return true; +} + +bool parseExponentPart(std::string_view token, std::size_t *index) { + if (*index >= token.size() || !isExponentMarker(token[*index])) { + return true; + } + + (*index)++; + if (isSignedExponent(token, *index)) { + (*index)++; + } + if (*index == token.size() || !isDigit(token[*index])) { + return false; + } + while (*index < token.size() && isDigit(token[*index])) { + (*index)++; + } + return true; +} + +bool isValidJsonNumber(std::string_view token) { + if (token.empty()) { + return false; + } + + std::size_t index = 0; + if (token[index] == '-') { + index++; + if (index == token.size()) { + return false; + } + } + + if (!parseIntegerPart(token, &index)) { + return false; + } + if (!parseFractionPart(token, &index)) { + return false; + } + if (!parseExponentPart(token, &index)) { + return false; + } + + return index == token.size(); +} + +bool tokenMatchesNumericEvent(jsoncons::staj_event_type event_type, + std::string_view token) { + if (!isValidJsonNumber(token)) { + return false; + } + + if (event_type == jsoncons::staj_event_type::int64_value + || event_type == jsoncons::staj_event_type::uint64_value) { + return token.find_first_of(".eE") == std::string_view::npos; + } + + return true; +} + +bool isNumericEventType(jsoncons::staj_event_type event_type) { + switch (event_type) { + case jsoncons::staj_event_type::int64_value: + case jsoncons::staj_event_type::uint64_value: + case jsoncons::staj_event_type::double_value: + case jsoncons::staj_event_type::half_value: + return true; + default: + return false; + } +} + +bool isNumericStringEvent(const jsoncons::staj_event &event) { + return event.event_type() == jsoncons::staj_event_type::string_value + && (event.tag() == jsoncons::semantic_tag::bigint + || event.tag() == jsoncons::semantic_tag::bigdec); +} + +class RawJsonTokenCursor { + public: + explicit RawJsonTokenCursor(std::string_view input) + : m_input(input) { } + + bool consume(const jsoncons::staj_event &event, std::string_view *raw_token, + std::string *detail) { + skipInsignificant(); + + if (isNumericEventType(event.event_type()) || isNumericStringEvent(event)) { + return consumeNumber(raw_token, detail); + } + + switch (event.event_type()) { + case jsoncons::staj_event_type::begin_object: + return consumeChar('{', raw_token, detail); + case jsoncons::staj_event_type::end_object: + return consumeChar('}', raw_token, detail); + case jsoncons::staj_event_type::begin_array: + return consumeChar('[', raw_token, detail); + case jsoncons::staj_event_type::end_array: + return consumeChar(']', raw_token, detail); + case jsoncons::staj_event_type::key: + case jsoncons::staj_event_type::string_value: + return consumeString(raw_token, detail); + case jsoncons::staj_event_type::null_value: + return consumeLiteral("null", raw_token, detail); + case jsoncons::staj_event_type::bool_value: { + std::error_code error; + const bool value = event.get(error); + if (error) { + if (detail != nullptr) { + *detail = std::string("Unable to decode boolean event while synchronizing raw token cursor: ") + + error.message(); + } + return false; + } + return consumeLiteral(value ? "true" : "false", raw_token, detail); + } + case jsoncons::staj_event_type::byte_string_value: + if (detail != nullptr) { + *detail = "Unsupported byte-string event encountered in jsoncons backend."; + } + return false; + } + + if (detail != nullptr) { + *detail = "Unsupported STAJ event encountered while synchronizing raw JSON tokens."; + } + return false; + } + + bool consumeNextNumberToken(std::string_view *raw_token, + std::string *detail) { + std::size_t probe_offset = m_offset; + if (!skipToNextNumberToken(&probe_offset, detail)) { + return false; + } + if (!consumeNumberAt(&probe_offset, raw_token, detail)) { + return false; + } + m_offset = probe_offset; + return true; + } + + bool advanceExactNumber(std::string_view exact_number, std::string *detail) { + if (!isValidJsonNumber(exact_number)) { + if (detail != nullptr) { + *detail = "Unable to advance raw JSON number cursor using a non-numeric token."; + } + return false; + } + + std::size_t probe_offset = m_offset; + if (!skipToNextNumberToken(&probe_offset, detail)) { + return false; + } + if (probe_offset + exact_number.size() > m_input.size() + || m_input.compare(probe_offset, exact_number.size(), exact_number) + != 0) { + if (detail != nullptr) { + *detail = "Exact raw JSON number token did not match jsoncons numeric lexeme."; + } + return false; + } + + const std::size_t next_offset = probe_offset + exact_number.size(); + if (next_offset < m_input.size() + && !isNumberBoundary(m_input[next_offset])) { + if (detail != nullptr) { + *detail = "Exact raw JSON number token was followed by additional numeric characters."; + } + return false; + } + + m_offset = next_offset; + return true; + } + + private: + static bool setError(std::string *detail, const char *message) { + if (detail != nullptr) { + *detail = message; + } + return false; + } + + static bool isWhitespace(char value) { + return std::isspace(static_cast(value)) != 0; + } + + static bool isHexDigit(char value) { + return std::isxdigit(static_cast(value)) != 0; + } + + static bool isNumberBoundary(char value) { + return isWhitespace(value) || value == ',' || value == ']' || value == '}'; + } + + void skipInsignificant() { + skipInsignificantAt(&m_offset); + } + + void skipInsignificantAt(std::size_t *offset) const { + while (*offset < m_input.size()) { + if (char current = m_input[*offset]; + isWhitespace(current) || current == ',' || current == ':') { + (*offset)++; + continue; + } + break; + } + } + + bool consumeChar(char expected, std::string_view *raw_token, + std::string *detail) { + return consumeCharAt(&m_offset, expected, raw_token, detail); + } + + bool consumeCharAt(std::size_t *offset, char expected, + std::string_view *raw_token, std::string *detail) const { + if (*offset >= m_input.size() || m_input[*offset] != expected) { + if (detail != nullptr) { + *detail = std::string("Expected raw JSON token '") + expected + + "' while synchronizing jsoncons events."; + } + return false; + } + + *raw_token = std::string_view(m_input.data() + *offset, 1); + (*offset)++; + return true; + } + + bool consumeLiteral(const char *literal, std::string_view *raw_token, + std::string *detail) { + return consumeLiteralAt(&m_offset, literal, raw_token, detail); + } + + bool consumeLiteralAt(std::size_t *offset, const char *literal, + std::string_view *raw_token, std::string *detail) const { + const std::size_t length = std::char_traits::length(literal); + if (*offset + length > m_input.size() + || m_input.compare(*offset, length, literal) != 0) { + if (detail != nullptr) { + *detail = std::string("Expected raw JSON literal '") + literal + + "' while synchronizing jsoncons events."; + } + return false; + } + + *raw_token = std::string_view(m_input.data() + *offset, length); + *offset += length; + return true; + } + + bool consumeString(std::string_view *raw_token, std::string *detail) { + return consumeStringAt(&m_offset, raw_token, detail); + } + + bool consumeStringAt(std::size_t *offset, std::string_view *raw_token, + std::string *detail) const { + const std::size_t start = *offset; + + if (*offset >= m_input.size() || m_input[*offset] != '"') { + return setError(detail, + "Expected raw JSON string token while synchronizing jsoncons events."); + } + + (*offset)++; + while (*offset < m_input.size()) { + char current = m_input[*offset]; + (*offset)++; + if (current == '\\') { + if (!consumeEscapedCharacter(offset, detail)) { + return false; + } + continue; + } + + if (current == '"') { + *raw_token = std::string_view(m_input.data() + start, + *offset - start); + return true; + } + + if (static_cast(current) < 0x20) { + return setError(detail, + "Unexpected control character while synchronizing raw JSON string token."); + } + } + + return setError(detail, + "Unterminated string token while synchronizing jsoncons events."); + } + + bool consumeNumber(std::string_view *raw_token, std::string *detail) { + return consumeNumberAt(&m_offset, raw_token, detail); + } + + bool consumeNumberAt(std::size_t *offset, std::string_view *raw_token, + std::string *detail) const { + const std::size_t start = *offset; + + if (!consumeNumberSign(offset) + || !consumeIntegerComponent(offset, detail) + || !consumeFractionComponent(offset, detail) + || !consumeExponentComponent(offset, detail)) { + return false; + } + + *raw_token = std::string_view(m_input.data() + start, *offset - start); + return true; + } + + bool consumeEscapedCharacter(std::size_t *offset, std::string *detail) const { + if (*offset >= m_input.size()) { + return setError(detail, + "Truncated escape sequence while synchronizing raw JSON string token."); + } + + const char escaped = m_input[*offset]; + (*offset)++; + if (escaped != 'u') { + return true; + } + + for (int i = 0; i < 4; i++) { + if (*offset >= m_input.size() || !isHexDigit(m_input[*offset])) { + return setError(detail, + "Invalid Unicode escape while synchronizing raw JSON string token."); + } + (*offset)++; + } + return true; + } + + bool consumeNumberSign(std::size_t *offset) const { + if (*offset < m_input.size() && m_input[*offset] == '-') { + (*offset)++; + } + return true; + } + + bool consumeIntegerComponent(std::size_t *offset, std::string *detail) const { + if (*offset >= m_input.size()) { + return setError(detail, + "Unexpected end of input while synchronizing raw JSON number token."); + } + + if (m_input[*offset] == '0') { + (*offset)++; + return true; + } + + if (!isDigit(m_input[*offset]) || m_input[*offset] == '0') { + return setError(detail, + "Invalid integer component while synchronizing raw JSON number token."); + } + + while (*offset < m_input.size() && isDigit(m_input[*offset])) { + (*offset)++; + } + return true; + } + + bool consumeFractionComponent(std::size_t *offset, std::string *detail) const { + if (*offset >= m_input.size() || m_input[*offset] != '.') { + return true; + } + + (*offset)++; + if (*offset >= m_input.size() || !isDigit(m_input[*offset])) { + return setError(detail, + "Invalid fraction component while synchronizing raw JSON number token."); + } + + while (*offset < m_input.size() && isDigit(m_input[*offset])) { + (*offset)++; + } + return true; + } + + bool consumeExponentComponent(std::size_t *offset, std::string *detail) const { + if (*offset >= m_input.size() || !isExponentMarker(m_input[*offset])) { + return true; + } + + (*offset)++; + if (*offset < m_input.size() + && (m_input[*offset] == '+' || m_input[*offset] == '-')) { + (*offset)++; + } + if (*offset >= m_input.size() || !isDigit(m_input[*offset])) { + return setError(detail, + "Invalid exponent component while synchronizing raw JSON number token."); + } + + while (*offset < m_input.size() && isDigit(m_input[*offset])) { + (*offset)++; + } + return true; + } + + bool skipTokenAt(std::size_t *offset, std::string *detail) const { + std::string_view ignored; + if (*offset >= m_input.size()) { + if (detail != nullptr) { + *detail = "Unexpected end of input while searching for a raw JSON number token."; + } + return false; + } + + switch (m_input[*offset]) { + case '{': + return consumeCharAt(offset, '{', &ignored, detail); + case '}': + return consumeCharAt(offset, '}', &ignored, detail); + case '[': + return consumeCharAt(offset, '[', &ignored, detail); + case ']': + return consumeCharAt(offset, ']', &ignored, detail); + case '"': + return consumeStringAt(offset, &ignored, detail); + case 't': + return consumeLiteralAt(offset, "true", &ignored, detail); + case 'f': + return consumeLiteralAt(offset, "false", &ignored, detail); + case 'n': + return consumeLiteralAt(offset, "null", &ignored, detail); + default: + if (detail != nullptr) { + *detail = "Unable to locate the next raw JSON number token while synchronizing jsoncons events."; + } + return false; + } + } + + bool skipToNextNumberToken(std::size_t *offset, std::string *detail) const { + while (true) { + skipInsignificantAt(offset); + if (*offset >= m_input.size()) { + if (detail != nullptr) { + *detail = "Unexpected end of input while searching for a raw JSON number token."; + } + return false; + } + if (m_input[*offset] == '-' || isDigit(m_input[*offset])) { + return true; + } + if (!skipTokenAt(offset, detail)) { + return false; + } + } + } + + std::string_view m_input; + std::size_t m_offset{0}; +}; + +std::string_view rawNumberFromContext(std::string_view input, + jsoncons::staj_event_type event_type, const jsoncons::ser_context &context, + const jsoncons::staj_event &event, std::string_view scanned_token) { + const std::size_t begin = context.begin_position(); + if (const std::size_t end = context.end_position(); + begin < end && end <= input.size()) { + std::string_view candidate(input.data() + begin, end - begin); + if (tokenMatchesNumericEvent(event_type, candidate)) { + return candidate; + } + } + + if (tokenMatchesNumericEvent(event_type, scanned_token)) { + return scanned_token; + } + + if (isNumericStringEvent(event)) { + std::error_code error; + jsoncons::string_view decoded = event.get(error); + if (error) { + return std::string_view(); + } + if (isValidJsonNumber(std::string_view(decoded.data(), decoded.size()))) { + return std::string_view(decoded.data(), decoded.size()); + } + return std::string_view(); + } + + return std::string_view(); +} + +JsonParseResult emitNumberFromRawToken(std::string_view input, JsonEventSink *sink, + RawJsonTokenCursor *token_cursor, jsoncons::staj_event_type event_type, + const jsoncons::ser_context &context, const jsoncons::staj_event &event) { + std::string_view raw_token; + std::string sync_detail; + if (!token_cursor->consumeNextNumberToken(&raw_token, &sync_detail)) { + return makeResult(JsonParseStatus::InternalError, + JsonSinkStatus::Continue, sync_detail); + } + recordJsonconsTokenSyncStep(); + std::string_view raw_number = rawNumberFromContext(input, + event_type, context, event, raw_token); + if (raw_number.empty()) { + return makeResult(JsonParseStatus::InternalError, + JsonSinkStatus::Continue, + "Unable to materialize numeric JSON token from jsoncons backend."); + } + return finishSinkCall(sink->on_number(raw_number), "handling a number"); +} + +JsonParseResult decodeStringEventValue(const jsoncons::staj_event &event, + const jsoncons::ser_context &context, jsoncons::string_view *decoded) { + std::error_code error; + *decoded = event.get(error); + if (error) { + return fromJsonconsError(error, context); + } + return makeResult(JsonParseStatus::Ok); +} + +JsonParseResult emitEvent(std::string_view input, JsonEventSink *sink, + RawJsonTokenCursor *token_cursor, const jsoncons::staj_event &event, + const jsoncons::ser_context &context) { + std::error_code error; + + switch (event.event_type()) { + case jsoncons::staj_event_type::begin_object: + return finishSinkCall(sink->on_start_object(), "starting an object"); + case jsoncons::staj_event_type::end_object: + return finishSinkCall(sink->on_end_object(), "ending an object"); + case jsoncons::staj_event_type::begin_array: + return finishSinkCall(sink->on_start_array(), "starting an array"); + case jsoncons::staj_event_type::end_array: + return finishSinkCall(sink->on_end_array(), "ending an array"); + case jsoncons::staj_event_type::key: { + jsoncons::string_view decoded; + if (JsonParseResult result = decodeStringEventValue(event, context, + &decoded); !result.ok()) { + return result; + } + return finishSinkCall(sink->on_key(std::string_view(decoded.data(), + decoded.size())), "processing an object key"); + } + case jsoncons::staj_event_type::string_value: { + jsoncons::string_view decoded; + if (JsonParseResult result = decodeStringEventValue(event, context, + &decoded); !result.ok()) { + return result; + } + if (isNumericStringEvent(event)) { + std::string sync_detail; + if (const std::string_view decoded_number(decoded.data(), + decoded.size()); isValidJsonNumber(decoded_number) + && token_cursor->advanceExactNumber(decoded_number, + &sync_detail)) { + recordJsonconsTokenExactAdvanceStep(); + return finishSinkCall(sink->on_number(decoded_number), + "handling a number"); + } + return emitNumberFromRawToken(input, sink, token_cursor, + jsoncons::staj_event_type::double_value, context, event); + } + return finishSinkCall(sink->on_string(std::string_view(decoded.data(), + decoded.size())), "handling a string"); + } + case jsoncons::staj_event_type::null_value: + return finishSinkCall(sink->on_null(), "handling a null value"); + case jsoncons::staj_event_type::bool_value: + { + bool boolean_value = event.get(error); + if (error) { + return fromJsonconsError(error, context); + } + return finishSinkCall(sink->on_boolean(boolean_value), + "handling a boolean"); + } + case jsoncons::staj_event_type::int64_value: + case jsoncons::staj_event_type::uint64_value: + case jsoncons::staj_event_type::double_value: + case jsoncons::staj_event_type::half_value: + return emitNumberFromRawToken(input, sink, token_cursor, + event.event_type(), context, event); + case jsoncons::staj_event_type::byte_string_value: + return makeResult(JsonParseStatus::InternalError, + JsonSinkStatus::Continue, + "Unsupported byte-string event encountered in jsoncons backend."); + default: + return makeResult(JsonParseStatus::InternalError, + JsonSinkStatus::Continue, + "Unsupported JSON token type encountered in jsoncons backend."); + } +} + +} // namespace + +JsonParseResult parseDocumentWithJsoncons(const std::string &input, + JsonEventSink *sink, const JsonBackendParseOptions &options) { + if (sink == nullptr) { + return makeResult(JsonParseStatus::InternalError, + JsonSinkStatus::InternalError, "JSON event sink is null."); + } + + jsoncons::json_options cursor_options; + cursor_options.max_nesting_depth(options.technical_max_depth); + cursor_options.lossless_number(true); + cursor_options.lossless_bignum(true); + + std::error_code error; +#ifdef MSC_JSON_AUDIT_INSTRUMENTATION + const auto cursor_start = std::chrono::steady_clock::now(); + jsoncons::json_string_cursor cursor(input, cursor_options, error); + recordJsonconsCursorInit(static_cast( + std::chrono::duration_cast( + std::chrono::steady_clock::now() - cursor_start).count())); +#else + jsoncons::json_string_cursor cursor(input, cursor_options, error); +#endif + if (error) { + return fromJsonconsError(error, cursor.context()); + } + +#ifdef MSC_JSON_AUDIT_INSTRUMENTATION + const auto token_cursor_start = std::chrono::steady_clock::now(); + RawJsonTokenCursor token_cursor(input); + recordJsonconsTokenCursorInit(static_cast( + std::chrono::duration_cast( + std::chrono::steady_clock::now() - token_cursor_start).count())); + const auto event_loop_start = std::chrono::steady_clock::now(); + const auto record_event_loop = [&event_loop_start]() { + recordJsonconsEventLoop(static_cast( + std::chrono::duration_cast( + std::chrono::steady_clock::now() - event_loop_start).count())); + }; + const auto finish_with_event_loop = [&record_event_loop]( + const JsonParseResult &result) { + record_event_loop(); + return result; + }; +#else + RawJsonTokenCursor token_cursor(input); +#endif + + while (!cursor.done()) { + if (JsonParseResult result = emitEvent(input, sink, &token_cursor, + cursor.current(), cursor.context()); !result.ok()) { +#ifdef MSC_JSON_AUDIT_INSTRUMENTATION + return finish_with_event_loop(result); +#else + return result; +#endif + } + + cursor.next(error); + if (error) { +#ifdef MSC_JSON_AUDIT_INSTRUMENTATION + return finish_with_event_loop( + fromJsonconsError(error, cursor.context())); +#else + return fromJsonconsError(error, cursor.context()); +#endif + } + } + + cursor.check_done(error); + if (error) { +#ifdef MSC_JSON_AUDIT_INSTRUMENTATION + return finish_with_event_loop(fromJsonconsError(error, + cursor.context())); +#else + return fromJsonconsError(error, cursor.context()); +#endif + } + +#ifdef MSC_JSON_AUDIT_INSTRUMENTATION + record_event_loop(); +#endif + return makeResult(JsonParseStatus::Ok); +} + +} // namespace modsecurity::RequestBodyProcessor diff --git a/src/request_body_processor/json_backend_simdjson.cc b/src/request_body_processor/json_backend_simdjson.cc new file mode 100644 index 0000000000..525c1560b3 --- /dev/null +++ b/src/request_body_processor/json_backend_simdjson.cc @@ -0,0 +1,518 @@ +/* + * ModSecurity, http://www.modsecurity.org/ + * Copyright (c) 2015 - 2024 Trustwave Holdings, Inc. (http://www.trustwave.com/) + * + * You may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * If any of the files related to licensing are missing or if you have any + * other questions related to licensing please contact Trustwave Holdings, Inc. + * directly using the email address security@modsecurity.org. + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "src/request_body_processor/json_backend.h" +#include "src/request_body_processor/json_backend_common.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "src/request_body_processor/json_instrumentation.h" +#include "simdjson.h" + +namespace modsecurity::RequestBodyProcessor { +namespace { +using json_backend_common::finishSinkCall; +using json_backend_common::makeResult; + +JsonParseResult fromSimdjsonError(simdjson::error_code error) { + switch (error) { + case simdjson::UTF8_ERROR: + return makeResult(JsonParseStatus::Utf8Error, + std::string("Invalid UTF-8 in JSON body: ") + + simdjson::error_message(error)); + case simdjson::EMPTY: + case simdjson::UNCLOSED_STRING: + case simdjson::INCOMPLETE_ARRAY_OR_OBJECT: + case simdjson::INSUFFICIENT_PADDING: + return makeResult(JsonParseStatus::TruncatedInput, + std::string("Incomplete JSON body: ") + + simdjson::error_message(error)); + case simdjson::DEPTH_ERROR: + case simdjson::TAPE_ERROR: + case simdjson::STRING_ERROR: + case simdjson::T_ATOM_ERROR: + case simdjson::F_ATOM_ERROR: + case simdjson::N_ATOM_ERROR: + case simdjson::NUMBER_ERROR: + case simdjson::BIGINT_ERROR: + case simdjson::UNESCAPED_CHARS: + case simdjson::TRAILING_CONTENT: + return makeResult(JsonParseStatus::ParseError, + std::string("Invalid JSON body: ") + + simdjson::error_message(error)); + case simdjson::CAPACITY: + case simdjson::OUT_OF_CAPACITY: + case simdjson::MEMALLOC: + return makeResult(JsonParseStatus::InternalError, + std::string("JSON parser backend failure: ") + + simdjson::error_message(error)); + default: + return makeResult(JsonParseStatus::InternalError, + std::string("JSON backend failed: ") + + simdjson::error_message(error)); + } +} + +std::size_t effectiveTechnicalMaxDepth( + const JsonBackendParseOptions &options) { + return options.technical_max_depth > 0 + ? static_cast(options.technical_max_depth) : 1; +} + +std::string_view trimTrailingJsonWhitespace(std::string_view token) { + while (!token.empty()) { + if (const char tail = token.back(); + tail != ' ' && tail != '\t' && tail != '\n' && tail != '\r') { + break; + } + token.remove_suffix(1); + } + return token; +} + +/* + * The ondemand parser is reused per thread because simdjson benefits from + * keeping its internal buffers warm across parses. thread_local storage keeps + * the parser isolated to the calling thread, so no parser state is shared + * across transactions running on different threads. The parse and full + * document traversal both complete inside parseDocumentWithSimdjson(), so no + * parser-backed state escapes this function. We intentionally do not add an + * automatic release/recreate heuristic here: the vendored simdjson API + * explicitly supports parser reuse, and retained capacity after unusually + * large inputs remains a conscious tradeoff rather than an accidental leak. + */ +simdjson::ondemand::parser &getReusableSimdjsonParser() { + thread_local std::unique_ptr parser; + if (parser == nullptr) { +#ifdef MSC_JSON_AUDIT_INSTRUMENTATION + const auto parser_start = std::chrono::steady_clock::now(); + parser = std::make_unique(); + recordSimdjsonParserConstruction(static_cast( + std::chrono::duration_cast( + std::chrono::steady_clock::now() - parser_start).count())); +#else + parser = std::make_unique(); +#endif + } + return *parser; +} + +std::size_t clampRequestedMaxDepth(std::size_t input_size, + const JsonBackendParseOptions &options) { + const std::size_t requested_depth = effectiveTechnicalMaxDepth(options); + const std::size_t max_possible_depth = (input_size / 2) + 1; + return std::min(requested_depth, std::max(1, + max_possible_depth)); +} + +simdjson::error_code prepareParser(simdjson::ondemand::parser *parser, + std::size_t input_size, const JsonBackendParseOptions &options) { + if (parser == nullptr) { + return simdjson::MEMALLOC; + } + + const JsonBackendParseOptions default_options; + std::size_t required_max_depth = parser->max_depth(); + if (options.technical_max_depth != default_options.technical_max_depth) { + required_max_depth = clampRequestedMaxDepth(input_size, options); + } + + if (parser->capacity() >= input_size + && parser->max_depth() == required_max_depth) { + return simdjson::SUCCESS; + } + + // simdjson reuses parser buffers across parses. allocate() can grow the + // per-thread parser to satisfy a larger document or different max-depth, + // but it does not proactively shrink retained capacity for later, smaller + // inputs. In simdjson 4.6.1 the max-depth parameter is only enforced by + // simdjson's development checks, so we keep passing it here for that + // internal guardrail while our own walker enforces technical_max_depth at + // runtime using current_depth(). + return parser->allocate(input_size, required_max_depth); +} + +template +JsonParseResult getResult(ResultType &&result, TargetType *target) { + if (auto error = std::forward(result).get(*target); error) { + return fromSimdjsonError(error); + } + + return makeResult(JsonParseStatus::Ok); +} + +class JsonBackendWalker { + public: + JsonBackendWalker(JsonEventSink *sink, + const JsonBackendParseOptions &options) + : m_sink(sink), + m_technical_max_depth(effectiveTechnicalMaxDepth(options)) { } + + JsonParseResult walk(simdjson::ondemand::document *document) { + bool is_scalar = false; + JsonParseResult result = getResult(document->is_scalar(), &is_scalar); + if (!result.ok()) { + return result; + } + + if (is_scalar) { + return walkDocumentScalar(document); + } + + simdjson::ondemand::value root_value; + result = getResult(document->get_value(), &root_value); + if (!result.ok()) { + return result; + } + + return walkValue(root_value); + } + + private: + JsonParseResult walkDocumentScalar(simdjson::ondemand::document *document) { + simdjson::ondemand::json_type type; + JsonParseResult result = getResult(document->type(), &type); + if (!result.ok()) { + return result; + } + + switch (type) { + case simdjson::ondemand::json_type::string: { + std::string_view decoded; + result = getResult(document->get_string(), &decoded); + if (!result.ok()) { + return result; + } + + return finishSinkCall(m_sink->on_string(decoded), + "handling a root string"); + } + case simdjson::ondemand::json_type::number: { + std::string_view raw_number; + result = getResult(document->raw_json_token(), &raw_number); + if (!result.ok()) { + return result; + } + + return finishSinkCall(m_sink->on_number( + trimTrailingJsonWhitespace(raw_number)), + "handling a root number"); + } + case simdjson::ondemand::json_type::boolean: { + bool boolean_value = false; + result = getResult(document->get_bool(), &boolean_value); + if (!result.ok()) { + return result; + } + + return finishSinkCall(m_sink->on_boolean(boolean_value), + "handling a root boolean"); + } + case simdjson::ondemand::json_type::null: { + bool is_null = false; + result = getResult(document->is_null(), &is_null); + if (!result.ok()) { + return result; + } + if (!is_null) { + return makeResult(JsonParseStatus::InternalError, + "Root scalar classified as null but failed validation."); + } + + return finishSinkCall(m_sink->on_null(), + "handling a root null"); + } + case simdjson::ondemand::json_type::unknown: + return makeResult(JsonParseStatus::ParseError, + "Invalid JSON token encountered in simdjson backend."); + case simdjson::ondemand::json_type::object: + case simdjson::ondemand::json_type::array: + return makeResult(JsonParseStatus::InternalError, + "Unexpected root scalar container encountered in simdjson backend."); + } + + return makeResult(JsonParseStatus::InternalError, + "Unsupported root scalar type encountered in simdjson backend."); + } + + JsonParseResult walkValue(simdjson::ondemand::value value) { + simdjson::ondemand::json_type type; + + if (JsonParseResult result = getResult(value.type(), &type); + !result.ok()) { + return result; + } + + switch (type) { + case simdjson::ondemand::json_type::object: + if (auto result = enforceTechnicalDepth(value); !result.ok()) { + return result; + } + return walkObject(value); + case simdjson::ondemand::json_type::array: + if (auto result = enforceTechnicalDepth(value); !result.ok()) { + return result; + } + return walkArray(value); + case simdjson::ondemand::json_type::string: + return walkString(value); + case simdjson::ondemand::json_type::number: + return walkNumber(value); + case simdjson::ondemand::json_type::boolean: + return walkBoolean(value); + case simdjson::ondemand::json_type::null: { + return finishSinkCall(m_sink->on_null(), + "handling a null value"); + } + case simdjson::ondemand::json_type::unknown: + return makeResult(JsonParseStatus::ParseError, + "Invalid JSON token encountered in simdjson backend."); + } + + return makeResult(JsonParseStatus::InternalError, + "Unsupported JSON token type encountered."); + } + + JsonParseResult walkObject(simdjson::ondemand::value value) { + simdjson::ondemand::object object; + JsonParseResult result = getResult(value.get_object(), &object); + if (!result.ok()) { + return result; + } + + JsonSinkStatus sink_status = m_sink->on_start_object(); + if (JsonParseResult sink_result = finishSinkCall( + sink_status, "starting an object"); !sink_result.ok()) { + return sink_result; + } + + for (auto field_result : object) { + simdjson::ondemand::field field; + std::string_view key; + simdjson::ondemand::value child; + + result = getResult(std::move(field_result), &field); + if (!result.ok()) { + return result; + } + + result = getResult(field.unescaped_key(), &key); + if (!result.ok()) { + return result; + } + + sink_status = m_sink->on_key(key); + if (JsonParseResult sink_result = finishSinkCall( + sink_status, "processing an object key"); + !sink_result.ok()) { + return sink_result; + } + + child = field.value(); + + result = walkValue(child); + if (!result.ok()) { + return result; + } + } + + return finishSinkCall(m_sink->on_end_object(), "ending an object"); + } + + JsonParseResult walkArray(simdjson::ondemand::value value) { + simdjson::ondemand::array array; + JsonParseResult result = getResult(value.get_array(), &array); + if (!result.ok()) { + return result; + } + + JsonSinkStatus sink_status = m_sink->on_start_array(); + if (JsonParseResult sink_result = finishSinkCall( + sink_status, "starting an array"); !sink_result.ok()) { + return sink_result; + } + + for (auto element_result : array) { + simdjson::ondemand::value element; + + result = getResult(std::move(element_result), &element); + if (!result.ok()) { + return result; + } + + result = walkValue(element); + if (!result.ok()) { + return result; + } + } + + return finishSinkCall(m_sink->on_end_array(), "ending an array"); + } + + JsonParseResult walkString(simdjson::ondemand::value value) { + std::string_view decoded; + if (JsonParseResult result = getResult(value.get_string(), &decoded); + !result.ok()) { + return result; + } + + return finishSinkCall(m_sink->on_string(decoded), "handling a string"); + } + + JsonParseResult walkNumber(simdjson::ondemand::value value) { + std::string_view raw_number = trimTrailingJsonWhitespace( + value.raw_json_token()); + return finishSinkCall(m_sink->on_number(raw_number), + "handling a number"); + } + + JsonParseResult walkBoolean(simdjson::ondemand::value value) { + bool boolean_value = false; + if (JsonParseResult result = getResult(value.get_bool(), + &boolean_value); !result.ok()) { + return result; + } + + return finishSinkCall(m_sink->on_boolean(boolean_value), + "handling a boolean"); + } + + JsonParseResult enforceTechnicalDepth(simdjson::ondemand::value value) const { + const int32_t current_depth = value.current_depth(); + if (current_depth <= 0) { + return makeResult(JsonParseStatus::InternalError, + "Invalid current depth reported by simdjson backend."); + } + + if (static_cast(current_depth) > m_technical_max_depth) { + return makeResult(JsonParseStatus::ParseError, + "JSON nesting depth exceeds backend technical max depth."); + } + + return makeResult(JsonParseStatus::Ok); + } + + JsonEventSink *m_sink; + const std::size_t m_technical_max_depth; +}; + +struct PreparedSimdjsonInput { + simdjson::padded_string_view view{}; + simdjson::padded_string owned_copy{}; +}; + +PreparedSimdjsonInput prepareMutableSimdjsonInput(std::string *input) { + PreparedSimdjsonInput prepared; + + // The production request-body path owns a mutable std::string, so we can + // pad that buffer in place and keep the logical JSON length in the + // returned padded_string_view. This removes the extra padded_string copy + // while still satisfying simdjson's padding requirement explicitly. + prepared.view = simdjson::pad(*input); + return prepared; +} + +PreparedSimdjsonInput prepareConstSimdjsonInput(const std::string &input) { + PreparedSimdjsonInput prepared; + prepared.view = simdjson::padded_string_view(input); + + // The const path must not guess about std::string capacity. We only parse + // directly when simdjson itself confirms that the existing allocation + // and/or trailing whitespace provide sufficient padding. + if (prepared.view.has_sufficient_padding()) { + return prepared; + } + +#ifdef MSC_JSON_AUDIT_INSTRUMENTATION + const auto padded_start = std::chrono::steady_clock::now(); + prepared.owned_copy = simdjson::padded_string(input); + recordSimdjsonPaddedCopy(input.size(), static_cast( + std::chrono::duration_cast( + std::chrono::steady_clock::now() - padded_start).count())); +#else + prepared.owned_copy = simdjson::padded_string(input); +#endif + prepared.view = prepared.owned_copy; + return prepared; +} + +JsonParseResult parsePreparedDocumentWithSimdjson( + simdjson::padded_string_view input, JsonEventSink *sink, + const JsonBackendParseOptions &options) { + simdjson::ondemand::parser &parser = getReusableSimdjsonParser(); + // This only prepares parser capacity and max-depth bookkeeping. Buffer + // lifetime and padding must already have been handled by the caller. + if (auto error = prepareParser(&parser, input.length(), options); error) { + return fromSimdjsonError(error); + } + + simdjson::ondemand::document document; +#ifdef MSC_JSON_AUDIT_INSTRUMENTATION + const auto iterate_start = std::chrono::steady_clock::now(); + if (auto error = parser.iterate(input).get(document); error) { + recordSimdjsonIterate(static_cast( + std::chrono::duration_cast( + std::chrono::steady_clock::now() - iterate_start).count())); + return fromSimdjsonError(error); + } + recordSimdjsonIterate(static_cast( + std::chrono::duration_cast( + std::chrono::steady_clock::now() - iterate_start).count())); +#else + if (auto error = parser.iterate(input).get(document); error) { + return fromSimdjsonError(error); + } +#endif + + JsonBackendWalker walker(sink, options); + return walker.walk(&document); +} + +JsonParseResult validateSinkAndParsePreparedDocument( + simdjson::padded_string_view input, JsonEventSink *sink, + const JsonBackendParseOptions &options) { + if (sink == nullptr) { + return makeResult(JsonParseStatus::InternalError, + JsonSinkStatus::InternalError, "JSON event sink is null."); + } + + return parsePreparedDocumentWithSimdjson(input, sink, options); +} + +} // namespace + +JsonParseResult parseDocumentWithSimdjson(std::string &input, + JsonEventSink *sink, const JsonBackendParseOptions &options) { + PreparedSimdjsonInput prepared = prepareMutableSimdjsonInput(&input); + return validateSinkAndParsePreparedDocument(prepared.view, sink, options); +} + +JsonParseResult parseDocumentWithSimdjson(const std::string &input, + JsonEventSink *sink, const JsonBackendParseOptions &options) { + PreparedSimdjsonInput prepared = prepareConstSimdjsonInput(input); + return validateSinkAndParsePreparedDocument(prepared.view, sink, options); +} + +} // namespace modsecurity::RequestBodyProcessor diff --git a/src/request_body_processor/json_instrumentation.cc b/src/request_body_processor/json_instrumentation.cc new file mode 100644 index 0000000000..1d241e84d5 --- /dev/null +++ b/src/request_body_processor/json_instrumentation.cc @@ -0,0 +1,127 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "src/request_body_processor/json_instrumentation.h" + +#include +#include +#include + +namespace modsecurity::RequestBodyProcessor { +namespace { + +JsonInstrumentationMetrics &instrumentationMetrics() { + thread_local JsonInstrumentationMetrics metrics; + return metrics; +} + +std::uint64_t elapsedNanos( + std::chrono::steady_clock::time_point start_time) noexcept { + return static_cast( + std::chrono::duration_cast( + std::chrono::steady_clock::now() - start_time).count()); +} + +} // namespace + +void jsonInstrumentationReset() noexcept { + instrumentationMetrics() = JsonInstrumentationMetrics{}; +} + +JsonInstrumentationMetrics jsonInstrumentationSnapshot() noexcept { + return instrumentationMetrics(); +} + +std::string captureRequestBodySnapshot(const std::ostringstream &request_body) { +#ifdef MSC_JSON_AUDIT_INSTRUMENTATION + const auto start_time = std::chrono::steady_clock::now(); + std::string snapshot = request_body.str(); + instrumentationMetrics().request_body_snapshot_count++; + instrumentationMetrics().request_body_snapshot_bytes += snapshot.size(); + instrumentationMetrics().request_body_snapshot_ns += elapsedNanos(start_time); + return snapshot; +#else + return request_body.str(); +#endif +} + +void recordJsonProcessChunkAppend(std::size_t bytes, + std::uint64_t elapsed_ns) noexcept { +#ifdef MSC_JSON_AUDIT_INSTRUMENTATION + instrumentationMetrics().json_process_chunk_calls++; + instrumentationMetrics().json_process_chunk_appended_bytes += bytes; + instrumentationMetrics().json_process_chunk_ns += elapsed_ns; +#else + (void) bytes; + (void) elapsed_ns; +#endif +} + +void recordSimdjsonParserConstruction(std::uint64_t elapsed_ns) noexcept { +#ifdef MSC_JSON_AUDIT_INSTRUMENTATION + instrumentationMetrics().simdjson_parser_constructions++; + instrumentationMetrics().simdjson_parser_construction_ns += elapsed_ns; +#else + (void) elapsed_ns; +#endif +} + +void recordSimdjsonPaddedCopy(std::size_t bytes, + std::uint64_t elapsed_ns) noexcept { +#ifdef MSC_JSON_AUDIT_INSTRUMENTATION + instrumentationMetrics().simdjson_padded_copy_bytes += bytes; + instrumentationMetrics().simdjson_padded_copy_ns += elapsed_ns; +#else + (void) bytes; + (void) elapsed_ns; +#endif +} + +void recordSimdjsonIterate(std::uint64_t elapsed_ns) noexcept { +#ifdef MSC_JSON_AUDIT_INSTRUMENTATION + instrumentationMetrics().simdjson_iterate_ns += elapsed_ns; +#else + (void) elapsed_ns; +#endif +} + +void recordJsonconsCursorInit(std::uint64_t elapsed_ns) noexcept { +#ifdef MSC_JSON_AUDIT_INSTRUMENTATION + instrumentationMetrics().jsoncons_cursor_constructions++; + instrumentationMetrics().jsoncons_cursor_init_ns += elapsed_ns; +#else + (void) elapsed_ns; +#endif +} + +void recordJsonconsTokenCursorInit(std::uint64_t elapsed_ns) noexcept { +#ifdef MSC_JSON_AUDIT_INSTRUMENTATION + instrumentationMetrics().jsoncons_token_cursor_constructions++; + instrumentationMetrics().jsoncons_token_cursor_init_ns += elapsed_ns; +#else + (void) elapsed_ns; +#endif +} + +void recordJsonconsEventLoop(std::uint64_t elapsed_ns) noexcept { +#ifdef MSC_JSON_AUDIT_INSTRUMENTATION + instrumentationMetrics().jsoncons_event_loop_ns += elapsed_ns; +#else + (void) elapsed_ns; +#endif +} + +void recordJsonconsTokenSyncStep() noexcept { +#ifdef MSC_JSON_AUDIT_INSTRUMENTATION + instrumentationMetrics().jsoncons_token_sync_steps++; +#endif +} + +void recordJsonconsTokenExactAdvanceStep() noexcept { +#ifdef MSC_JSON_AUDIT_INSTRUMENTATION + instrumentationMetrics().jsoncons_token_exact_advance_steps++; +#endif +} + +} // namespace modsecurity::RequestBodyProcessor diff --git a/src/request_body_processor/json_instrumentation.h b/src/request_body_processor/json_instrumentation.h new file mode 100644 index 0000000000..56c486060e --- /dev/null +++ b/src/request_body_processor/json_instrumentation.h @@ -0,0 +1,54 @@ +#ifndef SRC_REQUEST_BODY_PROCESSOR_JSON_INSTRUMENTATION_H_ +#define SRC_REQUEST_BODY_PROCESSOR_JSON_INSTRUMENTATION_H_ + +#include +#include +#include +#include + +namespace modsecurity::RequestBodyProcessor { + +struct JsonInstrumentationMetrics { + std::uint64_t request_body_snapshot_count{0}; + std::uint64_t request_body_snapshot_bytes{0}; + std::uint64_t request_body_snapshot_ns{0}; + + std::uint64_t json_process_chunk_calls{0}; + std::uint64_t json_process_chunk_appended_bytes{0}; + std::uint64_t json_process_chunk_ns{0}; + + std::uint64_t simdjson_parser_constructions{0}; + std::uint64_t simdjson_parser_construction_ns{0}; + std::uint64_t simdjson_padded_copy_bytes{0}; + std::uint64_t simdjson_padded_copy_ns{0}; + std::uint64_t simdjson_iterate_ns{0}; + + std::uint64_t jsoncons_cursor_constructions{0}; + std::uint64_t jsoncons_cursor_init_ns{0}; + std::uint64_t jsoncons_token_cursor_constructions{0}; + std::uint64_t jsoncons_token_cursor_init_ns{0}; + std::uint64_t jsoncons_event_loop_ns{0}; + std::uint64_t jsoncons_token_sync_steps{0}; + std::uint64_t jsoncons_token_exact_advance_steps{0}; +}; + +void jsonInstrumentationReset() noexcept; +JsonInstrumentationMetrics jsonInstrumentationSnapshot() noexcept; + +std::string captureRequestBodySnapshot(const std::ostringstream &request_body); + +void recordJsonProcessChunkAppend(std::size_t bytes, std::uint64_t elapsed_ns) + noexcept; +void recordSimdjsonParserConstruction(std::uint64_t elapsed_ns) noexcept; +void recordSimdjsonPaddedCopy(std::size_t bytes, std::uint64_t elapsed_ns) + noexcept; +void recordSimdjsonIterate(std::uint64_t elapsed_ns) noexcept; +void recordJsonconsCursorInit(std::uint64_t elapsed_ns) noexcept; +void recordJsonconsTokenCursorInit(std::uint64_t elapsed_ns) noexcept; +void recordJsonconsEventLoop(std::uint64_t elapsed_ns) noexcept; +void recordJsonconsTokenSyncStep() noexcept; +void recordJsonconsTokenExactAdvanceStep() noexcept; + +} // namespace modsecurity::RequestBodyProcessor + +#endif // SRC_REQUEST_BODY_PROCESSOR_JSON_INSTRUMENTATION_H_ diff --git a/src/request_body_processor/multipart.cc b/src/request_body_processor/multipart.cc index 3ae591671e..02d7f5daf5 100644 --- a/src/request_body_processor/multipart.cc +++ b/src/request_body_processor/multipart.cc @@ -15,6 +15,7 @@ #include "src/request_body_processor/multipart.h" +#include #include #include #include diff --git a/src/request_body_processor/multipart.h b/src/request_body_processor/multipart.h index 08d4ffe920..48c1de304e 100644 --- a/src/request_body_processor/multipart.h +++ b/src/request_body_processor/multipart.h @@ -13,11 +13,14 @@ * */ +#include +#include #include #include #include #include #include +#include #ifndef SRC_REQUEST_BODY_PROCESSOR_MULTIPART_H_ #define SRC_REQUEST_BODY_PROCESSOR_MULTIPART_H_ diff --git a/src/request_body_processor/xml.cc b/src/request_body_processor/xml.cc index cbb7894c9b..a790a43f42 100644 --- a/src/request_body_processor/xml.cc +++ b/src/request_body_processor/xml.cc @@ -15,38 +15,51 @@ #include "src/request_body_processor/xml.h" -#include -#include +#include #include +#include "modsecurity/rules_set.h" +#include "modsecurity/rules_set_properties.h" +#include "modsecurity/transaction.h" -namespace modsecurity { -namespace RequestBodyProcessor { + +namespace modsecurity::RequestBodyProcessor { #ifdef WITH_LIBXML2 +namespace { +bool finalizeArgsParsingContext(xml_data *data, std::string *error) { + if (xmlParseChunk(data->parsing_ctx_arg, nullptr, 0, 1) == 0) { + xmlFreeParserCtxt(data->parsing_ctx_arg); + data->parsing_ctx_arg = nullptr; + return true; + } + + if (!data->xml_error.empty()) { + error->assign(data->xml_error); + } else { + error->assign("XML: Failed to parse document for ARGS."); + } + xmlFreeParserCtxt(data->parsing_ctx_arg); + data->parsing_ctx_arg = nullptr; + return false; +} +} // namespace /* * NodeData for parsing XML into args */ -NodeData::NodeData() { - has_child = false; -} +NodeData::NodeData() = default; -NodeData::~NodeData() {}; +NodeData::~NodeData() = default; /* * XMLNodes for parsing XML into args */ XMLNodes::XMLNodes(Transaction *transaction) - : nodes{}, - node_depth(0), - currpath(""), - currval(""), - currval_is_set(false), - m_transaction(transaction) + : m_transaction(transaction) {} -XMLNodes::~XMLNodes() {}; +XMLNodes::~XMLNodes() = default; /* * SAX handler for parsing XML into args @@ -57,59 +70,56 @@ class MSCSAXHandler { std::string name = reinterpret_cast(localname); - XMLNodes* xml_data = static_cast(ctx); + auto *xml_data = static_cast(ctx); xml_data->nodes.push_back(std::make_shared()); xml_data->node_depth++; - // FIXME - later if we want to check the depth of XML tree - /* if (max_depth > 0 && max_depth > xml_data->node_depth) { - std::cout << "Depth of XML tree reached the given maximum value " << xml_data->node_depth << std::endl; - exit(1); - } */ // if it's not the first (root) item, then append a '.' // note, the condition should always be true because there is always a pseudo root element: 'xml' if (xml_data->nodes.size() > 1) { xml_data->currpath.append("."); - xml_data->nodes[xml_data->nodes.size()-2]->has_child = true; + const std::size_t parent_index = xml_data->nodes.size() - 2; + xml_data->nodes[parent_index]->has_child = true; } xml_data->currpath.append(name); // set the current value empty // this is necessary because if there is any text between the tags (new line, etc) // it will be added to the current value - xml_data->currval = ""; + xml_data->currval.clear(); xml_data->currval_is_set = false; } void onEndElement(void * ctx, const xmlChar *localname) { std::string name = reinterpret_cast(localname); - XMLNodes* xml_data = static_cast(ctx); - const std::shared_ptr& nd = xml_data->nodes[xml_data->nodes.size()-1]; - if (nd->has_child == false) { + auto *xml_data = static_cast(ctx); + if (const auto &nd = + xml_data->nodes.back(); + !nd->has_child && !xml_data->m_transaction->addArgument( + "XML", xml_data->currpath, xml_data->currval, 0)) { // check the return value // if false, then stop parsing // this means the number of arguments reached the limit - if (xml_data->m_transaction->addArgument("XML", xml_data->currpath, xml_data->currval, 0) == false) { - xmlStopParser(xml_data->parsing_ctx_arg); - } + xmlStopParser(xml_data->parsing_ctx_arg); } - if (xml_data->currpath.length() > 0) { + if (!xml_data->currpath.empty()) { // set an offset to store whether this is the first item, in order to know whether to remove the '.' - int offset = (xml_data->nodes.size() > 1) ? 1 : 0; - xml_data->currpath.erase(xml_data->currpath.length() - (name.length()+offset)); + const std::size_t offset = (xml_data->nodes.size() > 1) ? 1 : 0; + xml_data->currpath.erase( + xml_data->currpath.size() - (name.size() + offset)); } xml_data->nodes.pop_back(); xml_data->node_depth--; - xml_data->currval = ""; + xml_data->currval.clear(); xml_data->currval_is_set = false; } void onCharacters(void *ctx, const xmlChar *ch, int len) { - XMLNodes* xml_data = static_cast(ctx); + auto *xml_data = static_cast(ctx); std::string content(reinterpret_cast(ch), len); // libxml2 SAX parser will call this function multiple times // during the parsing of a single node, if the value has multibyte // characters, so we need to concatenate the values - if (xml_data->currval_is_set == false) { + if (!xml_data->currval_is_set) { xml_data->currval = content; xml_data->currval_is_set = true; } else { @@ -121,64 +131,56 @@ class MSCSAXHandler { extern "C" { void MSC_startElement(void *userData, const xmlChar *name, - const xmlChar *prefix, - const xmlChar *URI, - int nb_namespaces, - const xmlChar **namespaces, - int nb_attributes, - int nb_defaulted, - const xmlChar **attributes) { - - MSCSAXHandler* handler = static_cast(userData); + const xmlChar *, + const xmlChar *, + int, + const xmlChar **, + int, + int, + const xmlChar **) { + + auto *handler = static_cast(userData); handler->onStartElement(userData, name); } void MSC_endElement( void *userData, const xmlChar *name, - const xmlChar* prefix, - const xmlChar* URI) { + const xmlChar*, + const xmlChar*) { - MSCSAXHandler* handler = static_cast(userData); + auto *handler = static_cast(userData); handler->onEndElement(userData, name); } void MSC_xmlcharacters(void *userData, const xmlChar *ch, int len) { - MSCSAXHandler* handler = static_cast(userData); + auto *handler = static_cast(userData); handler->onCharacters(userData, ch, len); } } XML::XML(Transaction *transaction) - : m_transaction(transaction) { - m_data.doc = NULL; - m_data.parsing_ctx = NULL; - m_data.sax_handler = NULL; - m_data.xml_error = ""; - m_data.parsing_ctx_arg = NULL; - m_data.xml_parser_state = NULL; -} + : m_transaction(transaction) { } XML::~XML() { - if (m_data.parsing_ctx != NULL) { + if (m_data.parsing_ctx != nullptr) { xmlFreeParserCtxt(m_data.parsing_ctx); - m_data.parsing_ctx = NULL; + m_data.parsing_ctx = nullptr; } - if (m_data.doc != NULL) { + if (m_data.doc != nullptr) { xmlFreeDoc(m_data.doc); - m_data.doc = NULL; + m_data.doc = nullptr; } } bool XML::init() { - //xmlParserInputBufferCreateFilenameFunc entity; if (m_transaction->m_rules->m_secXMLExternalEntity == RulesSetProperties::TrueConfigBoolean) { - /*entity = */xmlParserInputBufferCreateFilenameDefault( + xmlParserInputBufferCreateFilenameDefault( __xmlParserInputBufferCreateFilename); } else { - /*entity = */xmlParserInputBufferCreateFilenameDefault( + xmlParserInputBufferCreateFilenameDefault( this->unloadExternalEntity); } if (m_transaction->m_secXMLParseXmlIntoArgs @@ -198,8 +200,6 @@ bool XML::init() { // set the parser state struct m_data.xml_parser_state = std::make_unique(m_transaction); - m_data.xml_parser_state->node_depth = 0; - m_data.xml_parser_state->currval = ""; // the XML will contain at least one node, which is the pseudo root node 'xml' m_data.xml_parser_state->currpath = "xml."; } @@ -208,9 +208,9 @@ bool XML::init() { } -xmlParserInputBufferPtr XML::unloadExternalEntity(const char *URI, - xmlCharEncoding enc) { - return NULL; +xmlParserInputBufferPtr XML::unloadExternalEntity(const char *, + xmlCharEncoding) { + return nullptr; } @@ -220,30 +220,17 @@ bool XML::processChunk(const char *buf, unsigned int size, * enable us to pass it the first chunk of data so that * it can attempt to auto-detect the encoding. */ - if (m_data.parsing_ctx == NULL && m_data.parsing_ctx_arg == NULL) { + if (m_data.parsing_ctx == nullptr && m_data.parsing_ctx_arg == nullptr) { /* First invocation. */ ms_dbg_a(m_transaction, 4, "XML: Initialising parser."); - /* NOTE When Sax interface is used libxml will not - * create the document object, but we need it. - - msr->xml->sax_handler = (xmlSAXHandler *)apr_pcalloc(msr->mp, - sizeof(xmlSAXHandler)); - if (msr->xml->sax_handler == NULL) return -1; - msr->xml->sax_handler->error = xml_receive_sax_error; - msr->xml->sax_handler->warning = xml_receive_sax_error; - msr->xml->parsing_ctx = xmlCreatePushParserCtxt(msr->xml->sax_handler, - msr, buf, size, "body.xml"); - - */ - if (m_transaction->m_secXMLParseXmlIntoArgs != RulesSetProperties::OnlyArgsConfigXMLParseXmlIntoArgs) { - m_data.parsing_ctx = xmlCreatePushParserCtxt(NULL, NULL, + m_data.parsing_ctx = xmlCreatePushParserCtxt(nullptr, nullptr, buf, size, "body.xml"); - if (m_data.parsing_ctx == NULL) { + if (m_data.parsing_ctx == nullptr) { ms_dbg_a(m_transaction, 4, "XML: Failed to create parsing context."); error->assign("XML: Failed to create parsing context."); @@ -262,8 +249,8 @@ bool XML::processChunk(const char *buf, unsigned int size, m_data.xml_parser_state.get(), buf, size, - NULL); - if (m_data.parsing_ctx_arg == NULL) { + nullptr); + if (m_data.parsing_ctx_arg == nullptr) { error->assign("XML: Failed to create parsing context for ARGS."); return false; } @@ -275,7 +262,7 @@ bool XML::processChunk(const char *buf, unsigned int size, } /* Not a first invocation. */ - if (m_data.parsing_ctx != NULL && + if (m_data.parsing_ctx != nullptr && m_transaction->m_secXMLParseXmlIntoArgs != RulesSetProperties::OnlyArgsConfigXMLParseXmlIntoArgs) { xmlParseChunk(m_data.parsing_ctx, buf, size, 0); @@ -287,7 +274,7 @@ bool XML::processChunk(const char *buf, unsigned int size, } } - if (m_data.parsing_ctx_arg != NULL && + if (m_data.parsing_ctx_arg != nullptr && ( m_transaction->m_secXMLParseXmlIntoArgs == RulesSetProperties::OnlyArgsConfigXMLParseXmlIntoArgs @@ -309,12 +296,12 @@ bool XML::processChunk(const char *buf, unsigned int size, bool XML::complete(std::string *error) { /* Only if we have a context, meaning we've done some work. */ - if (m_data.parsing_ctx != NULL || m_data.parsing_ctx_arg != NULL) { - if (m_data.parsing_ctx != NULL && + if (m_data.parsing_ctx != nullptr || m_data.parsing_ctx_arg != nullptr) { + if (m_data.parsing_ctx != nullptr && m_transaction->m_secXMLParseXmlIntoArgs != RulesSetProperties::OnlyArgsConfigXMLParseXmlIntoArgs) { /* This is how we signal the end of parsing to libxml. */ - xmlParseChunk(m_data.parsing_ctx, NULL, 0, 1); + xmlParseChunk(m_data.parsing_ctx, nullptr, 0, 1); /* Preserve the results for our reference. */ m_data.well_formed = m_data.parsing_ctx->wellFormed; @@ -322,7 +309,7 @@ bool XML::complete(std::string *error) { /* Clean up everything else. */ xmlFreeParserCtxt(m_data.parsing_ctx); - m_data.parsing_ctx = NULL; + m_data.parsing_ctx = nullptr; ms_dbg_a(m_transaction, 4, "XML: Parsing complete (well_formed " \ + std::to_string(m_data.well_formed) + ")."); @@ -332,7 +319,7 @@ bool XML::complete(std::string *error) { return false; } } - if (m_data.parsing_ctx_arg != NULL && + if (m_data.parsing_ctx_arg != nullptr && ( m_transaction->m_secXMLParseXmlIntoArgs == RulesSetProperties::OnlyArgsConfigXMLParseXmlIntoArgs @@ -341,19 +328,9 @@ bool XML::complete(std::string *error) { == RulesSetProperties::TrueConfigXMLParseXmlIntoArgs) ) { /* This is how we signale the end of parsing to libxml. */ - if (xmlParseChunk(m_data.parsing_ctx_arg, NULL, 0, 1) != 0) { - if (m_data.xml_error != "") { - error->assign(m_data.xml_error); - } - else { - error->assign("XML: Failed to parse document for ARGS."); - } - xmlFreeParserCtxt(m_data.parsing_ctx_arg); - m_data.parsing_ctx_arg = NULL; + if (!finalizeArgsParsingContext(&m_data, error)) { return false; } - xmlFreeParserCtxt(m_data.parsing_ctx_arg); - m_data.parsing_ctx_arg = NULL; } } @@ -362,5 +339,4 @@ bool XML::complete(std::string *error) { #endif -} // namespace RequestBodyProcessor -} // namespace modsecurity +} // namespace modsecurity::RequestBodyProcessor diff --git a/src/request_body_processor/xml.h b/src/request_body_processor/xml.h index df766d03b7..87f614fee2 100644 --- a/src/request_body_processor/xml.h +++ b/src/request_body_processor/xml.h @@ -19,18 +19,19 @@ #include #endif +#include #include -#include - -#include "modsecurity/transaction.h" -#include "modsecurity/rules_set.h" +#include #ifndef SRC_REQUEST_BODY_PROCESSOR_XML_H_ #define SRC_REQUEST_BODY_PROCESSOR_XML_H_ - namespace modsecurity { -namespace RequestBodyProcessor { +class Transaction; +} + + +namespace modsecurity::RequestBodyProcessor { #ifdef WITH_LIBXML2 @@ -42,7 +43,7 @@ class NodeData { explicit NodeData(); ~NodeData(); - bool has_child; + bool has_child = false; }; /* @@ -51,14 +52,14 @@ class NodeData { class XMLNodes { public: std::vector> nodes; - unsigned long int node_depth; + unsigned long int node_depth = 0; std::string currpath; std::string currval; - bool currval_is_set; - Transaction *m_transaction; + bool currval_is_set = false; + Transaction *m_transaction = nullptr; // need to store context - this is the same as in xml_data // need to stop parsing if the number of arguments reached the limit - xmlParserCtxtPtr parsing_ctx_arg; + xmlParserCtxtPtr parsing_ctx_arg = nullptr; explicit XMLNodes (Transaction *); ~XMLNodes(); @@ -66,23 +67,21 @@ class XMLNodes { struct xml_data { std::unique_ptr sax_handler; - xmlParserCtxtPtr parsing_ctx; - xmlDocPtr doc; + xmlParserCtxtPtr parsing_ctx = nullptr; + xmlDocPtr doc = nullptr; - unsigned int well_formed; + unsigned int well_formed = 0; /* error reporting and XML array flag */ std::string xml_error; /* additional parser context for arguments */ - xmlParserCtxtPtr parsing_ctx_arg; + xmlParserCtxtPtr parsing_ctx_arg = nullptr; /* parser state for SAX parser */ std::unique_ptr xml_parser_state; }; -typedef struct xml_data xml_data; - class XML { public: explicit XML(Transaction *transaction); @@ -96,13 +95,12 @@ class XML { xml_data m_data; private: - Transaction *m_transaction; + Transaction *m_transaction = nullptr; std::string m_header; }; #endif -} // namespace RequestBodyProcessor -} // namespace modsecurity +} // namespace modsecurity::RequestBodyProcessor #endif // SRC_REQUEST_BODY_PROCESSOR_XML_H_ diff --git a/src/rule_with_actions.cc b/src/rule_with_actions.cc index f6642b67e6..7469be8fe9 100644 --- a/src/rule_with_actions.cc +++ b/src/rule_with_actions.cc @@ -26,6 +26,7 @@ #include #include #include +#include #include "modsecurity/rules_set.h" #include "src/operators/operator.h" diff --git a/src/rule_with_operator.cc b/src/rule_with_operator.cc index 9c356b8fb0..0f8b14c4bb 100644 --- a/src/rule_with_operator.cc +++ b/src/rule_with_operator.cc @@ -18,12 +18,14 @@ #include #include +#include #include #include #include #include #include #include +#include #include "modsecurity/rules_set.h" #include "src/operators/operator.h" diff --git a/src/rules_exceptions.cc b/src/rules_exceptions.cc index 2fb0cf857f..352d3cab83 100644 --- a/src/rules_exceptions.cc +++ b/src/rules_exceptions.cc @@ -15,7 +15,10 @@ #include "modsecurity/rules_exceptions.h" +#include #include +#include +#include #include "src/utils/string.h" #include "src/variables/variable.h" diff --git a/src/rules_set_phases.cc b/src/rules_set_phases.cc index a781930498..88731edda9 100644 --- a/src/rules_set_phases.cc +++ b/src/rules_set_phases.cc @@ -16,6 +16,7 @@ #include #include #include +#include #include #include diff --git a/src/run_time_string.cc b/src/run_time_string.cc index 45f298bc3d..48feeef38b 100644 --- a/src/run_time_string.cc +++ b/src/run_time_string.cc @@ -15,6 +15,7 @@ #include +#include #include "src/run_time_string.h" diff --git a/src/transaction.cc b/src/transaction.cc index 408f9b3d40..c9e1401b7a 100644 --- a/src/transaction.cc +++ b/src/transaction.cc @@ -13,39 +13,42 @@ * */ -#include "modsecurity/transaction.h" - -#ifdef WITH_YAJL -#include -#include +#ifdef HAVE_CONFIG_H +#include "config.h" #endif +#include "modsecurity/transaction.h" + #include #include #include +#include #include #include #include #include +#include #include +#include #include +#include #include #include "modsecurity/actions/action.h" #include "src/actions/disruptive/deny.h" #include "modsecurity/intervention.h" #include "modsecurity/modsecurity.h" +#include "src/request_body_processor/json.h" +#include "src/request_body_processor/json_instrumentation.h" #include "src/request_body_processor/multipart.h" #include "src/request_body_processor/xml.h" -#ifdef WITH_YAJL -#include "src/request_body_processor/json.h" -#endif #include "modsecurity/audit_log.h" #include "src/unique_id.h" #include "src/utils/string.h" #include "src/utils/system.h" #include "src/utils/decode.h" +#include "src/utils/json_writer.h" #include "src/utils/random.h" #include "modsecurity/rule.h" #include "modsecurity/rule_message.h" @@ -59,6 +62,7 @@ using modsecurity::actions::Action; +using modsecurity::RequestBodyProcessor::captureRequestBodySnapshot; using modsecurity::RequestBodyProcessor::Multipart; using modsecurity::RequestBodyProcessor::XML; @@ -142,11 +146,7 @@ Transaction::Transaction(ModSecurity *ms, RulesSet *rules, const char *id, #else m_xml(nullptr), #endif -#ifdef WITH_YAJL m_json(new RequestBodyProcessor::JSON(this)), -#else - m_json(nullptr), -#endif m_secRuleEngine(RulesSetProperties::PropertyNotSetRuleEngine), m_secXMLParseXmlIntoArgs(rules->m_secXMLParseXmlIntoArgs), m_logCbData(logCbData), @@ -173,9 +173,7 @@ Transaction::~Transaction() { intervention::free(&m_it); intervention::clean(&m_it); -#ifdef WITH_YAJL delete m_json; -#endif #ifdef WITH_LIBXML2 delete m_xml; #endif @@ -693,13 +691,17 @@ int Transaction::processRequestBody() { */ std::unique_ptr a = m_variableRequestHeaders.resolveFirst( "Content-Type"); + const std::string requestBodySnapshot = captureRequestBodySnapshot( + m_requestBody); + const std::size_t requestBodySnapshotSize = requestBodySnapshot.size(); bool requestBodyNoFilesLimitExceeded = false; if ((m_requestBodyType == WWWFormUrlEncoded) || (m_requestBodyProcessor == JSONRequestBody) || (m_requestBodyProcessor == XMLRequestBody)) { if ((m_rules->m_requestBodyNoFilesLimit.m_set) - && (m_requestBody.str().size() > m_rules->m_requestBodyNoFilesLimit.m_value)) { + && (requestBodySnapshotSize + > m_rules->m_requestBodyNoFilesLimit.m_value)) { m_variableReqbodyError.set("1", 0); m_variableReqbodyErrorMsg.set("Request body excluding files is bigger than the maximum expected.", 0); m_variableInboundDataError.set("1", m_variableOffset); @@ -709,72 +711,64 @@ int Transaction::processRequestBody() { } } -#ifdef WITH_LIBXML2 - if (m_requestBodyProcessor == XMLRequestBody) { + if (m_requestBodyProcessor == JSONRequestBody) { // large size might cause issues in the parsing itself; omit if exceeded if (!requestBodyNoFilesLimitExceeded) { std::string error; - if (m_xml->init() == true) { - m_xml->processChunk(m_requestBody.str().c_str(), - m_requestBody.str().size(), + if (m_rules->m_requestBodyJsonDepthLimit.m_set) { + m_json->setMaxDepth(m_rules->m_requestBodyJsonDepthLimit.m_value); + } + if (m_json->init() == true) { + m_json->processChunk(requestBodySnapshot.c_str(), + requestBodySnapshotSize, &error); - m_xml->complete(&error); + m_json->complete(&error); } - if (error.empty() == false) { + if (error.empty() == false && requestBodySnapshotSize > 0) { m_variableReqbodyError.set("1", m_variableOffset); - m_variableReqbodyErrorMsg.set("XML parsing error: " + error, + m_variableReqbodyProcessorError.set("1", m_variableOffset); + m_variableReqbodyErrorMsg.set("JSON parsing error: " + error, m_variableOffset); - m_variableReqbodyProcessorErrorMsg.set("XML parsing error: " \ + m_variableReqbodyProcessorErrorMsg.set("JSON parsing error: " \ + error, m_variableOffset); - m_variableReqbodyProcessorError.set("1", m_variableOffset); } else { m_variableReqbodyError.set("0", m_variableOffset); m_variableReqbodyProcessorError.set("0", m_variableOffset); } } -#endif -#if WITH_YAJL + } #ifdef WITH_LIBXML2 - } else if (m_requestBodyProcessor == JSONRequestBody) { -#else - if (m_requestBodyProcessor == JSONRequestBody) { -#endif + else if (m_requestBodyProcessor == XMLRequestBody) { // large size might cause issues in the parsing itself; omit if exceeded if (!requestBodyNoFilesLimitExceeded) { std::string error; - if (m_rules->m_requestBodyJsonDepthLimit.m_set) { - m_json->setMaxDepth(m_rules->m_requestBodyJsonDepthLimit.m_value); - } - if (m_json->init() == true) { - m_json->processChunk(m_requestBody.str().c_str(), - m_requestBody.str().size(), + if (m_xml->init() == true) { + m_xml->processChunk(requestBodySnapshot.c_str(), + requestBodySnapshotSize, &error); - m_json->complete(&error); + m_xml->complete(&error); } - if (error.empty() == false && m_requestBody.str().size() > 0) { + if (error.empty() == false) { m_variableReqbodyError.set("1", m_variableOffset); - m_variableReqbodyProcessorError.set("1", m_variableOffset); - m_variableReqbodyErrorMsg.set("JSON parsing error: " + error, + m_variableReqbodyErrorMsg.set("XML parsing error: " + error, m_variableOffset); - m_variableReqbodyProcessorErrorMsg.set("JSON parsing error: " \ + m_variableReqbodyProcessorErrorMsg.set("XML parsing error: " \ + error, m_variableOffset); + m_variableReqbodyProcessorError.set("1", m_variableOffset); } else { m_variableReqbodyError.set("0", m_variableOffset); m_variableReqbodyProcessorError.set("0", m_variableOffset); } } + } #endif -#if defined(WITH_LIBXML2) or defined(WITH_YAJL) - } else if (m_requestBodyType == MultiPartRequestBody) { -#else - if (m_requestBodyType == MultiPartRequestBody) { -#endif + else if (m_requestBodyType == MultiPartRequestBody) { std::string error; int reqbodyNoFilesLength = 0; if (a != NULL) { Multipart m(*a, this); if (m.init(&error) == true) { - m.process(m_requestBody.str(), &error, m_variableOffset); + m.process(requestBodySnapshot, &error, m_variableOffset); } reqbodyNoFilesLength = m.m_reqbody_no_files_length; m.multipart_complete(&error); @@ -801,7 +795,7 @@ int Transaction::processRequestBody() { m_variableOffset++; // large size might cause issues in the parsing itself; omit if exceeded if (!requestBodyNoFilesLimitExceeded) { - extractArguments("POST", m_requestBody.str(), m_variableOffset); + extractArguments("POST", requestBodySnapshot, m_variableOffset); } } else if (m_requestBodyType != UnknownFormat) { /** @@ -855,16 +849,16 @@ int Transaction::processRequestBody() { } fullRequest = fullRequest + "\n\n"; - fullRequest = fullRequest + m_requestBody.str(); + fullRequest = fullRequest + requestBodySnapshot; m_variableFullRequest.set(fullRequest, m_variableOffset); m_variableFullRequestLength.set(std::to_string(fullRequest.size()), m_variableOffset); if (m_requestBody.tellp() > 0) { - m_variableRequestBody.set(m_requestBody.str(), m_variableOffset); + m_variableRequestBody.set(requestBodySnapshot, m_variableOffset); m_variableRequestBodyLength.set(std::to_string( - m_requestBody.str().size()), - m_variableOffset, m_requestBody.str().size()); + requestBodySnapshotSize), + m_variableOffset, requestBodySnapshotSize); } this->m_rules->evaluate(modsecurity::RequestBodyPhase, this); @@ -1564,197 +1558,139 @@ std::string Transaction::toOldAuditLogFormat(int parts, std::string Transaction::toJSON(int parts) { -#ifdef WITH_YAJL - const unsigned char *buf; - size_t len; - yajl_gen g; std::string log; std::string ts = utils::string::ascTime(&m_timeStamp); std::string uniqueId = UniqueId::uniqueId(); - - g = yajl_gen_alloc(NULL); - if (g == NULL) { - return ""; - } - yajl_gen_config(g, yajl_gen_beautify, 0); - - /* main */ - yajl_gen_map_open(g); - - /* trasaction */ - yajl_gen_string(g, reinterpret_cast("transaction"), - strlen("transaction")); - - yajl_gen_map_open(g); - /* Part: A (header mandatory) */ - LOGFY_ADD("client_ip", m_clientIpAddress); - LOGFY_ADD("time_stamp", ts); - LOGFY_ADD("server_id", uniqueId); - LOGFY_ADD_NUM("client_port", m_clientPort); - LOGFY_ADD("host_ip", m_serverIpAddress); - LOGFY_ADD_NUM("host_port", m_serverPort); - LOGFY_ADD("unique_id", m_id); - - /* request */ - yajl_gen_string(g, reinterpret_cast("request"), - strlen("request")); - yajl_gen_map_open(g); - - LOGFY_ADD("method", - utils::string::dash_if_empty( - m_variableRequestMethod.evaluate())); - - LOGFY_ADD("http_version", m_httpVersion); - LOGFY_ADD("hostname", m_requestHostName); - LOGFY_ADD("uri", this->m_uri); + utils::JsonWriter writer(false); + + const auto addString = [&writer](std::string_view key, + const std::string &value) { + writer.key(key); + writer.string(value); + }; + const auto addInteger = [&writer](std::string_view key, int64_t value) { + writer.key(key); + writer.integer(value); + }; + + writer.start_object(); + writer.key("transaction"); + writer.start_object(); + + addString("client_ip", m_clientIpAddress); + addString("time_stamp", ts); + addString("server_id", uniqueId); + addInteger("client_port", m_clientPort); + addString("host_ip", m_serverIpAddress); + addInteger("host_port", m_serverPort); + addString("unique_id", m_id); + + writer.key("request"); + writer.start_object(); + addString("method", + utils::string::dash_if_empty(m_variableRequestMethod.evaluate())); + addString("http_version", m_httpVersion); + addString("hostname", m_requestHostName); + addString("uri", this->m_uri); if (parts & audit_log::AuditLog::CAuditLogPart) { - // FIXME: check for the binary content size. - LOGFY_ADD("body", utils::string::toHexIfNeeded(this->m_requestBody.str())); + addString("body", utils::string::toHexIfNeeded(this->m_requestBody.str())); } - /* request headers */ if (parts & audit_log::AuditLog::BAuditLogPart) { std::vector l; - yajl_gen_string(g, reinterpret_cast("headers"), - strlen("headers")); - yajl_gen_map_open(g); + writer.key("headers"); + writer.start_object(); m_variableRequestHeaders.resolve(&l); for (auto &h : l) { - LOGFY_ADD(utils::string::toHexIfNeeded(h->getKey().c_str()).c_str(), utils::string::toHexIfNeeded(h->getValue())); + std::string header_name = + utils::string::toHexIfNeeded(h->getKey()); + std::string header_value = + utils::string::toHexIfNeeded(h->getValue()); + addString(header_name, header_value); delete h; } - - /* end: request headers */ - yajl_gen_map_close(g); + writer.end_object(); } + writer.end_object(); - /* end: request */ - yajl_gen_map_close(g); - - /* response */ - yajl_gen_string(g, reinterpret_cast("response"), - strlen("response")); - yajl_gen_map_open(g); - + writer.key("response"); + writer.start_object(); if (parts & audit_log::AuditLog::EAuditLogPart) { - LOGFY_ADD("body", this->m_responseBody.str()); + addString("body", this->m_responseBody.str()); } - LOGFY_ADD_NUM("http_code", m_httpCodeReturned); + addInteger("http_code", m_httpCodeReturned); - /* response headers */ if (parts & audit_log::AuditLog::FAuditLogPart) { std::vector l; - yajl_gen_string(g, reinterpret_cast("headers"), - strlen("headers")); - yajl_gen_map_open(g); + writer.key("headers"); + writer.start_object(); m_variableResponseHeaders.resolve(&l); for (auto &h : l) { - LOGFY_ADD(h->getKey().c_str(), h->getValue()); + addString(h->getKey(), h->getValue()); delete h; } - - /* end: response headers */ - yajl_gen_map_close(g); + writer.end_object(); } - /* end: response */ - yajl_gen_map_close(g); + writer.end_object(); - /* producer */ if (parts & audit_log::AuditLog::HAuditLogPart) { - yajl_gen_string(g, reinterpret_cast("producer"), - strlen("producer")); - yajl_gen_map_open(g); - - /* producer > libmodsecurity */ - LOGFY_ADD("modsecurity", m_ms->whoAmI()); - - /* producer > connector */ - LOGFY_ADD("connector", m_ms->getConnectorInformation()); - - /* producer > engine state */ - LOGFY_ADD("secrules_engine", + writer.key("producer"); + writer.start_object(); + addString("modsecurity", m_ms->whoAmI()); + addString("connector", m_ms->getConnectorInformation()); + addString("secrules_engine", RulesSet::ruleEngineStateString( - (RulesSetProperties::RuleEngine) getRuleEngineState())); + (RulesSetProperties::RuleEngine) getRuleEngineState())); - /* producer > components */ - yajl_gen_string(g, - reinterpret_cast("components"), - strlen("components")); - - yajl_gen_array_open(g); + writer.key("components"); + writer.start_array(); for (const auto &a : m_rules->m_components) { - yajl_gen_string(g, - reinterpret_cast - (a.data()), a.length()); + writer.string(a); } - yajl_gen_array_close(g); - - /* end: producer */ - yajl_gen_map_close(g); - - /* messages */ - yajl_gen_string(g, - reinterpret_cast("messages"), - strlen("messages")); - yajl_gen_array_open(g); - for (auto a : m_rulesMessages) { - yajl_gen_map_open(g); - LOGFY_ADD("message", a.m_message); - yajl_gen_string(g, - reinterpret_cast("details"), - strlen("details")); - yajl_gen_map_open(g); - LOGFY_ADD("match", a.m_match); - LOGFY_ADD("reference", a.m_reference); - LOGFY_ADD("ruleId", std::to_string(a.m_rule.m_ruleId)); - LOGFY_ADD("file", a.m_rule.getFileName()); - LOGFY_ADD("lineNumber", std::to_string(a.m_rule.getLineNumber())); - LOGFY_ADD("data", utils::string::toHexIfNeeded(a.m_data)); - LOGFY_ADD("severity", std::to_string(a.m_severity)); - LOGFY_ADD("ver", a.m_rule.m_ver); - LOGFY_ADD("rev", a.m_rule.m_rev); - - yajl_gen_string(g, - reinterpret_cast("tags"), - strlen("tags")); - yajl_gen_array_open(g); - for (auto b : a.m_tags) { - yajl_gen_string(g, - reinterpret_cast(b.data()), - b.length()); + writer.end_array(); + writer.end_object(); + + writer.key("messages"); + writer.start_array(); + for (const auto &a : m_rulesMessages) { + writer.start_object(); + addString("message", a.m_message); + writer.key("details"); + writer.start_object(); + addString("match", a.m_match); + addString("reference", a.m_reference); + addString("ruleId", std::to_string(a.m_rule.m_ruleId)); + addString("file", a.m_rule.getFileName()); + addString("lineNumber", std::to_string(a.m_rule.getLineNumber())); + addString("data", utils::string::toHexIfNeeded(a.m_data)); + addString("severity", std::to_string(a.m_severity)); + addString("ver", a.m_rule.m_ver); + addString("rev", a.m_rule.m_rev); + + writer.key("tags"); + writer.start_array(); + for (const auto &b : a.m_tags) { + writer.string(b); } - yajl_gen_array_close(g); + writer.end_array(); - LOGFY_ADD("maturity", std::to_string(a.m_rule.m_maturity)); - LOGFY_ADD("accuracy", std::to_string(a.m_rule.m_accuracy)); - yajl_gen_map_close(g); - yajl_gen_map_close(g); + addString("maturity", std::to_string(a.m_rule.m_maturity)); + addString("accuracy", std::to_string(a.m_rule.m_accuracy)); + writer.end_object(); + writer.end_object(); } - yajl_gen_array_close(g); - /* end: messages */ + writer.end_array(); } - /* end: transaction */ - yajl_gen_map_close(g); - - /* end: main */ - yajl_gen_map_close(g); + writer.end_object(); + writer.end_object(); - yajl_gen_get_buf(g, &buf, &len); - - log.assign(reinterpret_cast(buf), len); + log = writer.to_string(); log.append("\n"); - - yajl_gen_free(g); - return log; -#else - return std::string("{\"error\":\"ModSecurity was " \ - "not compiled with JSON support.\"}"); -#endif } @@ -2326,4 +2262,3 @@ extern "C" int msc_set_request_hostname(Transaction *transaction, } // namespace modsecurity - diff --git a/src/utils/json_writer.cc b/src/utils/json_writer.cc new file mode 100644 index 0000000000..1be155f123 --- /dev/null +++ b/src/utils/json_writer.cc @@ -0,0 +1,196 @@ +/* + * ModSecurity, http://www.modsecurity.org/ + * Copyright (c) 2015 - 2024 Trustwave Holdings, Inc. (http://www.trustwave.com/) + * + * You may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * If any of the files related to licensing are missing or if you have any + * other questions related to licensing please contact Trustwave Holdings, Inc. + * directly using the email address security@modsecurity.org. + * + */ + +#include "src/utils/json_writer.h" + +#include +#include +#include + +namespace modsecurity::utils { + +JsonWriter::JsonWriter(bool pretty, std::string indent) + : m_pretty(pretty), + m_indent(std::move(indent)) { } + +void JsonWriter::start_object() { + begin_container(ContainerType::Object, '{'); +} + +void JsonWriter::end_object() { + end_container(ContainerType::Object, '}'); +} + +void JsonWriter::start_array() { + begin_container(ContainerType::Array, '['); +} + +void JsonWriter::end_array() { + end_container(ContainerType::Array, ']'); +} + +void JsonWriter::key(std::string_view value) { + Frame &frame = m_stack.back(); + + if (!frame.first) { + m_output.push_back(','); + } + if (m_pretty) { + newline_and_indent(m_stack.size()); + } + + write_escaped_string(value); + m_output.append(m_pretty ? ": " : ":"); + + frame.first = false; + frame.expecting_key = false; +} + +void JsonWriter::string(std::string_view value) { + before_value(); + write_escaped_string(value); +} + +void JsonWriter::number(std::string_view raw) { + before_value(); + m_output.append(raw.data(), raw.size()); +} + +void JsonWriter::integer(int64_t value) { + before_value(); + m_output.append(std::to_string(value)); +} + +void JsonWriter::boolean(bool value) { + before_value(); + m_output.append(value ? "true" : "false"); +} + +void JsonWriter::null() { + before_value(); + m_output.append("null"); +} + +const std::string& JsonWriter::str() const { + return m_output; +} + +const std::string& JsonWriter::to_string() const { + return m_output; +} + +void JsonWriter::before_value() { + if (m_stack.empty()) { + return; + } + + Frame &frame = m_stack.back(); + if (frame.type == ContainerType::Array) { + if (!frame.first) { + m_output.push_back(','); + } + if (m_pretty) { + newline_and_indent(m_stack.size()); + } + frame.first = false; + return; + } + + frame.expecting_key = true; +} + +void JsonWriter::after_container_end() { + if (m_stack.empty()) { + return; + } + + Frame &frame = m_stack.back(); + if (frame.type == ContainerType::Object) { + frame.expecting_key = true; + } +} + +void JsonWriter::begin_container(ContainerType type, char token) { + before_value(); + m_output.push_back(token); + m_stack.push_back(Frame{ + type, + true, + type == ContainerType::Object + }); +} + +void JsonWriter::end_container(ContainerType expected, char token) { + Frame frame = m_stack.back(); + m_stack.pop_back(); + + if (frame.type == expected && !frame.first && m_pretty) { + newline_and_indent(m_stack.size()); + } + + m_output.push_back(token); + after_container_end(); +} + +void JsonWriter::newline_and_indent(std::size_t depth) { + m_output.push_back('\n'); + for (std::size_t i = 0; i < depth; ++i) { + m_output.append(m_indent); + } +} + +void JsonWriter::write_escaped_string(std::string_view value) { + static const char *kHex = "0123456789abcdef"; + char unicode_escape[] = {'\\', 'u', '0', '0', '0', '0', '\0'}; + + m_output.push_back('"'); + for (const unsigned char c : value) { + switch (c) { + case '"': + m_output.append(R"(\")"); + break; + case '\\': + m_output.append(R"(\\)"); + break; + case '\b': + m_output.append("\\b"); + break; + case '\f': + m_output.append("\\f"); + break; + case '\n': + m_output.append("\\n"); + break; + case '\r': + m_output.append("\\r"); + break; + case '\t': + m_output.append("\\t"); + break; + default: + if (c < 0x20) { + unicode_escape[4] = kHex[(c >> 4) & 0x0f]; + unicode_escape[5] = kHex[c & 0x0f]; + m_output.append(unicode_escape, 6); + } else { + m_output.push_back(static_cast(c)); + } + break; + } + } + m_output.push_back('"'); +} + +} // namespace modsecurity::utils diff --git a/src/utils/json_writer.h b/src/utils/json_writer.h new file mode 100644 index 0000000000..130ec022ea --- /dev/null +++ b/src/utils/json_writer.h @@ -0,0 +1,73 @@ +/* + * ModSecurity, http://www.modsecurity.org/ + * Copyright (c) 2015 - 2024 Trustwave Holdings, Inc. (http://www.trustwave.com/) + * + * You may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * If any of the files related to licensing are missing or if you have any + * other questions related to licensing please contact Trustwave Holdings, Inc. + * directly using the email address security@modsecurity.org. + * + */ + +#ifndef SRC_UTILS_JSON_WRITER_H_ +#define SRC_UTILS_JSON_WRITER_H_ + +#include +#include +#include +#include +#include + +namespace modsecurity::utils { + +class JsonWriter { + public: + explicit JsonWriter(bool pretty = false, std::string indent = " "); + + void start_object(); + void end_object(); + void start_array(); + void end_array(); + + void key(std::string_view value); + void string(std::string_view value); + void number(std::string_view raw); + void integer(int64_t value); + void boolean(bool value); + void null(); + + const std::string& str() const; + const std::string& to_string() const; + + private: + enum class ContainerType { + Object, + Array + }; + + struct Frame { + ContainerType type; + bool first; + bool expecting_key; + }; + + void before_value(); + void after_container_end(); + void begin_container(ContainerType type, char token); + void end_container(ContainerType expected, char token); + void newline_and_indent(std::size_t depth); + void write_escaped_string(std::string_view value); + + std::string m_output; + std::vector m_stack; + bool m_pretty; + std::string m_indent; +}; + +} // namespace modsecurity::utils + +#endif // SRC_UTILS_JSON_WRITER_H_ diff --git a/src/utils/msc_tree.cc b/src/utils/msc_tree.cc index ea6c1a4167..0fe42c38fc 100644 --- a/src/utils/msc_tree.cc +++ b/src/utils/msc_tree.cc @@ -980,35 +980,32 @@ int tree_contains_ip(TreeRoot *rtree, -int add_ip_from_param( - const char *param, TreeRoot **rtree, char **error_msg) +static int add_ip_entries_from_param(const char *param, TreeRoot *rtree) { char *param_copy = strdup(param); - char *saved = NULL; - char *str; - TreeNode *tnode = NULL; + char *saved = nullptr; + char *str = nullptr; + TreeNode *tnode = nullptr; str = strtok_r(param_copy, ",", &saved); - while (str != NULL) + while (str != nullptr) { - if (strchr(str, ':') == NULL) + if (strchr(str, ':') == nullptr) { - tnode = TreeAddIP(str, (*rtree)->ipv4_tree, IPV4_TREE); + tnode = TreeAddIP(str, rtree->ipv4_tree, IPV4_TREE); } else { - tnode = TreeAddIP(str, (*rtree)->ipv6_tree, IPV6_TREE); + tnode = TreeAddIP(str, rtree->ipv6_tree, IPV6_TREE); } - if (tnode == NULL) + if (tnode == nullptr) { - //*error_msg = apr_psprintf("Could not add entry " \ - // "\"%s\" from: %s.", str, param); free(param_copy); return -1; } - str = strtok_r(NULL, ",", &saved); + str = strtok_r(nullptr, ",", &saved); } free(param_copy); @@ -1016,45 +1013,22 @@ int add_ip_from_param( } -int ip_tree_from_param( +int add_ip_from_param( const char *param, TreeRoot **rtree, char **error_msg) { - char *param_copy = strdup(param); - char *saved = NULL; - char *str = NULL; - TreeNode *tnode = NULL; + return add_ip_entries_from_param(param, *rtree); +} + +int ip_tree_from_param( + const char *param, TreeRoot **rtree, char **error_msg) +{ if (create_radix_tree(rtree, error_msg)) { - free(param_copy); return -1; } - str = strtok_r(param_copy, ",", &saved); - while (str != NULL) - { - if (strchr(str, ':') == NULL) - { - tnode = TreeAddIP(str, (*rtree)->ipv4_tree, IPV4_TREE); - } - else - { - tnode = TreeAddIP(str, (*rtree)->ipv6_tree, IPV6_TREE); - } - - if (tnode == NULL) - { - //*error_msg = apr_psprintf("Could not add entry " \ - // "\"%s\" from: %s.", str, param); - free(param_copy); - return -1; - } - - str = strtok_r(NULL, ",", &saved); - } - free(param_copy); - - return 0; + return add_ip_entries_from_param(param, *rtree); } diff --git a/src/utils/sha1.h b/src/utils/sha1.h index a40d7fa1c8..aa9132c652 100644 --- a/src/utils/sha1.h +++ b/src/utils/sha1.h @@ -16,7 +16,9 @@ #ifndef SRC_UTILS_SHA1_H_ #define SRC_UTILS_SHA1_H_ +#include #include +#include #include #include "src/utils/string.h" diff --git a/src/utils/string.h b/src/utils/string.h index ca2967aa5f..8933a5f884 100644 --- a/src/utils/string.h +++ b/src/utils/string.h @@ -18,6 +18,7 @@ #include #include +#include #include #include #include diff --git a/src/variables/variable.cc b/src/variables/variable.cc index caf8f6fd88..ca1eacfe56 100644 --- a/src/variables/variable.cc +++ b/src/variables/variable.cc @@ -16,6 +16,7 @@ #include "src/variables/variable.h" #include +#include #include #include #include diff --git a/src/variables/variable.h b/src/variables/variable.h index 06f407f2c3..0d6f3adb6a 100644 --- a/src/variables/variable.h +++ b/src/variables/variable.h @@ -98,6 +98,59 @@ class n : public Variable { \ } \ }; +#define VARIABLE_MONKEY_SIMPLE_VARIABLES(X) \ + X("RESPONSE_CONTENT_TYPE", m_variableResponseContentType) \ + X("ARGS_COMBINED_SIZE", m_variableARGScombinedSize) \ + X("AUTH_TYPE", m_variableAuthType) \ + X("FILES_COMBINED_SIZE", m_variableFilesCombinedSize) \ + X("FULL_REQUEST", m_variableFullRequest) \ + X("FULL_REQUEST_LENGTH", m_variableFullRequestLength) \ + X("INBOUND_DATA_ERROR", m_variableInboundDataError) \ + X("MATCHED_VAR", m_variableMatchedVar) \ + X("MATCHED_VAR_NAME", m_variableMatchedVarName) \ + X("MSC_PCRE_ERROR", m_variableMscPcreError) \ + X("MSC_PCRE_LIMITS_EXCEEDED", m_variableMscPcreLimitsExceeded) \ + X("MULTIPART_CRLF_LF_LINES", m_variableMultipartCrlfLFLines) \ + X("MULTIPART_DATA_AFTER", m_variableMultipartDataAfter) \ + X("MULTIPART_FILE_LIMIT_EXCEEDED", m_variableMultipartFileLimitExceeded) \ + X("MULTIPART_STRICT_ERROR", m_variableMultipartStrictError) \ + X("MULTIPART_HEADER_FOLDING", m_variableMultipartHeaderFolding) \ + X("MULTIPART_INVALID_QUOTING", m_variableMultipartInvalidQuoting) \ + X("MULTIPART_INVALID_HEADER_FOLDING", m_variableMultipartInvalidHeaderFolding) \ + X("MULTIPART_UNMATCHED_BOUNDARY", m_variableMultipartUnmatchedBoundary) \ + X("OUTBOUND_DATA_ERROR", m_variableOutboundDataError) \ + X("PATH_INFO", m_variablePathInfo) \ + X("QUERY_STRING", m_variableQueryString) \ + X("REMOTE_ADDR", m_variableRemoteAddr) \ + X("REMOTE_HOST", m_variableRemoteHost) \ + X("REMOTE_PORT", m_variableRemotePort) \ + X("REQBODY_ERROR", m_variableReqbodyError) \ + X("REQBODY_ERROR_MSG", m_variableReqbodyErrorMsg) \ + X("REQBODY_PROCESSOR_ERROR_MSG", m_variableReqbodyProcessorErrorMsg) \ + X("REQBODY_PROCESSOR_ERROR", m_variableReqbodyProcessorError) \ + X("REQBODY_PROCESSOR", m_variableReqbodyProcessor) \ + X("REQUEST_BASENAME", m_variableRequestBasename) \ + X("REQUEST_BODY", m_variableRequestBody) \ + X("REQUEST_BODY_LENGTH", m_variableRequestBodyLength) \ + X("REQUEST_FILENAME", m_variableRequestFilename) \ + X("REQUEST_LINE", m_variableRequestLine) \ + X("REQUEST_METHOD", m_variableRequestMethod) \ + X("REQUEST_PROTOCOL", m_variableRequestProtocol) \ + X("REQUEST_URI", m_variableRequestURI) \ + X("REQUEST_URI_RAW", m_variableRequestURIRaw) \ + X("RESOURCE", m_variableResource) \ + X("RESPONSE_BODY", m_variableResponseBody) \ + X("RESPONSE_CONTENT_LENGTH", m_variableResponseContentLength) \ + X("RESPONSE_PROTOCOL", m_variableResponseProtocol) \ + X("RESPONSE_STATUS", m_variableResponseStatus) \ + X("SERVER_ADDR", m_variableServerAddr) \ + X("SERVER_NAME", m_variableServerName) \ + X("SERVER_PORT", m_variableServerPort) \ + X("SESSIONID", m_variableSessionID) \ + X("UNIQUE_ID", m_variableUniqueID) \ + X("URLENCODED_ERROR", m_variableUrlEncodedError) \ + X("USERID", m_variableUserID) + namespace modsecurity { @@ -274,109 +327,16 @@ class VariableMonkeyResolution { throw std::invalid_argument("Variable not found."); } - if (comp(variable, "RESPONSE_CONTENT_TYPE")) { - t->m_variableResponseContentType.evaluate(l); - } else if (comp(variable, "ARGS_COMBINED_SIZE")) { - t->m_variableARGScombinedSize.evaluate(l); - } else if (comp(variable, "AUTH_TYPE")) { - t->m_variableAuthType.evaluate(l); - } else if (comp(variable, "FILES_COMBINED_SIZE")) { - t->m_variableFilesCombinedSize.evaluate(l); - } else if (comp(variable, "FULL_REQUEST")) { - t->m_variableFullRequest.evaluate(l); - } else if (comp(variable, "FULL_REQUEST_LENGTH")) { - t->m_variableFullRequestLength.evaluate(l); - } else if (comp(variable, "INBOUND_DATA_ERROR")) { - t->m_variableInboundDataError.evaluate(l); - } else if (comp(variable, "MATCHED_VAR")) { - t->m_variableMatchedVar.evaluate(l); - } else if (comp(variable, "MATCHED_VAR_NAME")) { - t->m_variableMatchedVarName.evaluate(l); - } else if (comp(variable, "MSC_PCRE_ERROR")) { - t->m_variableMscPcreError.evaluate(l); - } else if (comp(variable, "MSC_PCRE_LIMITS_EXCEEDED")) { - t->m_variableMscPcreLimitsExceeded.evaluate(l); - } else if (comp(variable, "MULTIPART_CRLF_LF_LINES")) { - t->m_variableMultipartCrlfLFLines.evaluate(l); - } else if (comp(variable, "MULTIPART_DATA_AFTER")) { - t->m_variableMultipartDataAfter.evaluate(l); - } else if (comp(variable, "MULTIPART_FILE_LIMIT_EXCEEDED")) { - t->m_variableMultipartFileLimitExceeded.evaluate(l); - } else if (comp(variable, "MULTIPART_STRICT_ERROR")) { - t->m_variableMultipartStrictError.evaluate(l); - } else if (comp(variable, "MULTIPART_HEADER_FOLDING")) { - t->m_variableMultipartHeaderFolding.evaluate(l); - } else if (comp(variable, "MULTIPART_INVALID_QUOTING")) { - t->m_variableMultipartInvalidQuoting.evaluate(l); - } else if (comp(variable, "MULTIPART_INVALID_HEADER_FOLDING")) { - t->m_variableMultipartInvalidHeaderFolding.evaluate(l); - } else if (comp(variable, "MULTIPART_UNMATCHED_BOUNDARY")) { - t->m_variableMultipartUnmatchedBoundary.evaluate(l); - } else if (comp(variable, "OUTBOUND_DATA_ERROR")) { - t->m_variableOutboundDataError.evaluate(l); - } else if (comp(variable, "PATH_INFO")) { - t->m_variablePathInfo.evaluate(l); - } else if (comp(variable, "QUERY_STRING")) { - t->m_variableQueryString.evaluate(l); - } else if (comp(variable, "REMOTE_ADDR")) { - t->m_variableRemoteAddr.evaluate(l); - } else if (comp(variable, "REMOTE_HOST")) { - t->m_variableRemoteHost.evaluate(l); - } else if (comp(variable, "REMOTE_PORT")) { - t->m_variableRemotePort.evaluate(l); - } else if (comp(variable, "REQBODY_ERROR")) { - t->m_variableReqbodyError.evaluate(l); - } else if (comp(variable, "REQBODY_ERROR_MSG")) { - t->m_variableReqbodyErrorMsg.evaluate(l); - } else if (comp(variable, "REQBODY_PROCESSOR_ERROR_MSG")) { - t->m_variableReqbodyProcessorErrorMsg.evaluate(l); - } else if (comp(variable, "REQBODY_PROCESSOR_ERROR")) { - t->m_variableReqbodyProcessorError.evaluate(l); - } else if (comp(variable, "REQBODY_PROCESSOR")) { - t->m_variableReqbodyProcessor.evaluate(l); - } else if (comp(variable, "REQUEST_BASENAME")) { - t->m_variableRequestBasename.evaluate(l); - } else if (comp(variable, "REQUEST_BODY")) { - t->m_variableRequestBody.evaluate(l); - } else if (comp(variable, "REQUEST_BODY_LENGTH")) { - t->m_variableRequestBodyLength.evaluate(l); - } else if (comp(variable, "REQUEST_FILENAME")) { - t->m_variableRequestFilename.evaluate(l); - } else if (comp(variable, "REQUEST_LINE")) { - t->m_variableRequestLine.evaluate(l); - } else if (comp(variable, "REQUEST_METHOD")) { - t->m_variableRequestMethod.evaluate(l); - } else if (comp(variable, "REQUEST_PROTOCOL")) { - t->m_variableRequestProtocol.evaluate(l); - } else if (comp(variable, "REQUEST_URI")) { - t->m_variableRequestURI.evaluate(l); - } else if (comp(variable, "REQUEST_URI_RAW")) { - t->m_variableRequestURIRaw.evaluate(l); - } else if (comp(variable, "RESOURCE")) { - t->m_variableResource.evaluate(l); - } else if (comp(variable, "RESPONSE_BODY")) { - t->m_variableResponseBody.evaluate(l); - } else if (comp(variable, "RESPONSE_CONTENT_LENGTH")) { - t->m_variableResponseContentLength.evaluate(l); - } else if (comp(variable, "RESPONSE_PROTOCOL")) { - t->m_variableResponseProtocol.evaluate(l); - } else if (comp(variable, "RESPONSE_STATUS")) { - t->m_variableResponseStatus.evaluate(l); - } else if (comp(variable, "SERVER_ADDR")) { - t->m_variableServerAddr.evaluate(l); - } else if (comp(variable, "SERVER_NAME")) { - t->m_variableServerName.evaluate(l); - } else if (comp(variable, "SERVER_PORT")) { - t->m_variableServerPort.evaluate(l); - } else if (comp(variable, "SESSIONID")) { - t->m_variableSessionID.evaluate(l); - } else if (comp(variable, "UNIQUE_ID")) { - t->m_variableUniqueID.evaluate(l); - } else if (comp(variable, "URLENCODED_ERROR")) { - t->m_variableUrlEncodedError.evaluate(l); - } else if (comp(variable, "USERID")) { - t->m_variableUserID.evaluate(l); - } else { + bool matched = false; +#define VARIABLE_MONKEY_EVALUATE_DISPATCH(name, member) \ + if (matched == false && comp(variable, name)) { \ + t->member.evaluate(l); \ + matched = true; \ + } + VARIABLE_MONKEY_SIMPLE_VARIABLES(VARIABLE_MONKEY_EVALUATE_DISPATCH); +#undef VARIABLE_MONKEY_EVALUATE_DISPATCH + + if (matched == false) { throw std::invalid_argument("Variable not found."); } } @@ -389,126 +349,41 @@ class VariableMonkeyResolution { collection = variable.find(":"); } if (collection == std::string::npos) { - if (comp(variable, "RESPONSE_CONTENT_TYPE")) { - vv = t->m_variableResponseContentType.resolveFirst(); - } else if (comp(variable, "ARGS_COMBINED_SIZE")) { - vv = t->m_variableARGScombinedSize.resolveFirst(); - } else if (comp(variable, "AUTH_TYPE")) { - vv = t->m_variableAuthType.resolveFirst(); - } else if (comp(variable, "FILES_COMBINED_SIZE")) { - vv = t->m_variableFilesCombinedSize.resolveFirst(); - } else if (comp(variable, "FULL_REQUEST")) { - vv = t->m_variableFullRequest.resolveFirst(); - } else if (comp(variable, "FULL_REQUEST_LENGTH")) { - vv = t->m_variableFullRequestLength.resolveFirst(); - } else if (comp(variable, "INBOUND_DATA_ERROR")) { - vv = t->m_variableInboundDataError.resolveFirst(); - } else if (comp(variable, "MATCHED_VAR")) { - vv = t->m_variableMatchedVar.resolveFirst(); - } else if (comp(variable, "MATCHED_VAR_NAME")) { - vv = t->m_variableMatchedVarName.resolveFirst(); - } else if (comp(variable, "MSC_PCRE_ERROR")) { - vv = t->m_variableMscPcreError.resolveFirst(); - } else if (comp(variable, "MSC_PCRE_LIMITS_EXCEEDED")) { - vv = t->m_variableMscPcreLimitsExceeded.resolveFirst(); - } else if (comp(variable, "MULTIPART_CRLF_LF_LINES")) { - vv = t->m_variableMultipartCrlfLFLines.resolveFirst(); - } else if (comp(variable, "MULTIPART_DATA_AFTER")) { - vv = t->m_variableMultipartDataAfter.resolveFirst(); - } else if (comp(variable, "MULTIPART_FILE_LIMIT_EXCEEDED")) { - vv = t->m_variableMultipartFileLimitExceeded.resolveFirst(); - } else if (comp(variable, "MULTIPART_STRICT_ERROR")) { - vv = t->m_variableMultipartStrictError.resolveFirst(); - } else if (comp(variable, "MULTIPART_HEADER_FOLDING")) { - vv = t->m_variableMultipartHeaderFolding.resolveFirst(); - } else if (comp(variable, "MULTIPART_INVALID_QUOTING")) { - vv = t->m_variableMultipartInvalidQuoting.resolveFirst(); - } else if (comp(variable, "MULTIPART_INVALID_HEADER_FOLDING")) { - vv = t->m_variableMultipartInvalidHeaderFolding.resolveFirst(); - } else if (comp(variable, "MULTIPART_UNMATCHED_BOUNDARY")) { - vv = t->m_variableMultipartUnmatchedBoundary.resolveFirst(); - } else if (comp(variable, "OUTBOUND_DATA_ERROR")) { - vv = t->m_variableOutboundDataError.resolveFirst(); - } else if (comp(variable, "PATH_INFO")) { - vv = t->m_variablePathInfo.resolveFirst(); - } else if (comp(variable, "QUERY_STRING")) { - vv = t->m_variableQueryString.resolveFirst(); - } else if (comp(variable, "REMOTE_ADDR")) { - vv = t->m_variableRemoteAddr.resolveFirst(); - } else if (comp(variable, "REMOTE_HOST")) { - vv = t->m_variableRemoteHost.resolveFirst(); - } else if (comp(variable, "REMOTE_PORT")) { - vv = t->m_variableRemotePort.resolveFirst(); - } else if (comp(variable, "REQBODY_ERROR")) { - vv = t->m_variableReqbodyError.resolveFirst(); - } else if (comp(variable, "REQBODY_ERROR_MSG")) { - vv = t->m_variableReqbodyErrorMsg.resolveFirst(); - } else if (comp(variable, "REQBODY_PROCESSOR_ERROR_MSG")) { - vv = t->m_variableReqbodyProcessorErrorMsg.resolveFirst(); - } else if (comp(variable, "REQBODY_PROCESSOR_ERROR")) { - vv = t->m_variableReqbodyProcessorError.resolveFirst(); - } else if (comp(variable, "REQBODY_PROCESSOR")) { - vv = t->m_variableReqbodyProcessor.resolveFirst(); - } else if (comp(variable, "REQUEST_BASENAME")) { - vv = t->m_variableRequestBasename.resolveFirst(); - } else if (comp(variable, "REQUEST_BODY")) { - vv = t->m_variableRequestBody.resolveFirst(); - } else if (comp(variable, "REQUEST_BODY_LENGTH")) { - vv = t->m_variableRequestBodyLength.resolveFirst(); - } else if (comp(variable, "REQUEST_FILENAME")) { - vv = t->m_variableRequestFilename.resolveFirst(); - } else if (comp(variable, "REQUEST_LINE")) { - vv = t->m_variableRequestLine.resolveFirst(); - } else if (comp(variable, "REQUEST_METHOD")) { - vv = t->m_variableRequestMethod.resolveFirst(); - } else if (comp(variable, "REQUEST_PROTOCOL")) { - vv = t->m_variableRequestProtocol.resolveFirst(); - } else if (comp(variable, "REQUEST_URI")) { - vv = t->m_variableRequestURI.resolveFirst(); - } else if (comp(variable, "REQUEST_URI_RAW")) { - vv = t->m_variableRequestURIRaw.resolveFirst(); - } else if (comp(variable, "RESOURCE")) { - vv = t->m_variableResource.resolveFirst(); - } else if (comp(variable, "RESPONSE_BODY")) { - vv = t->m_variableResponseBody.resolveFirst(); - } else if (comp(variable, "RESPONSE_CONTENT_LENGTH")) { - vv = t->m_variableResponseContentLength.resolveFirst(); - } else if (comp(variable, "RESPONSE_PROTOCOL")) { - vv = t->m_variableResponseProtocol.resolveFirst(); - } else if (comp(variable, "RESPONSE_STATUS")) { - vv = t->m_variableResponseStatus.resolveFirst(); - } else if (comp(variable, "SERVER_ADDR")) { - vv = t->m_variableServerAddr.resolveFirst(); - } else if (comp(variable, "SERVER_NAME")) { - vv = t->m_variableServerName.resolveFirst(); - } else if (comp(variable, "SERVER_PORT")) { - vv = t->m_variableServerPort.resolveFirst(); - } else if (comp(variable, "SESSIONID")) { - vv = t->m_variableSessionID.resolveFirst(); - } else if (comp(variable, "UNIQUE_ID")) { - vv = t->m_variableUniqueID.resolveFirst(); - } else if (comp(variable, "URLENCODED_ERROR")) { - vv = t->m_variableUrlEncodedError.resolveFirst(); - } else if (comp(variable, "USERID")) { - vv = t->m_variableUserID.resolveFirst(); - } else if (comp(variable, "TX")) { + bool matched = false; +#define VARIABLE_MONKEY_RESOLVE_DISPATCH(name, member) \ + if (matched == false && comp(variable, name)) { \ + vv = t->member.resolveFirst(); \ + matched = true; \ + } + VARIABLE_MONKEY_SIMPLE_VARIABLES(VARIABLE_MONKEY_RESOLVE_DISPATCH); +#undef VARIABLE_MONKEY_RESOLVE_DISPATCH + + if (matched == false && comp(variable, "TX")) { vv = t->m_collections.m_tx_collection->resolveFirst(""); - } else if (comp(variable, "RESOURCE")) { + matched = true; + } else if (matched == false && comp(variable, "RESOURCE")) { vv = t->m_collections.m_resource_collection->resolveFirst("", t->m_collections.m_resource_collection_key, t->m_rules->m_secWebAppId.m_value); - } else if (comp(variable, "USER")) { + matched = true; + } else if (matched == false && comp(variable, "USER")) { vv = t->m_collections.m_user_collection->resolveFirst("", t->m_collections.m_user_collection_key, t->m_rules->m_secWebAppId.m_value); - } else if (comp(variable, "SESSION")) { + matched = true; + } else if (matched == false && comp(variable, "SESSION")) { vv = t->m_collections.m_session_collection->resolveFirst("", t->m_collections.m_session_collection_key, t->m_rules->m_secWebAppId.m_value); - } else if (comp(variable, "IP")) { + matched = true; + } else if (matched == false && comp(variable, "IP")) { vv = t->m_collections.m_ip_collection->resolveFirst("", t->m_collections.m_ip_collection_key, t->m_rules->m_secWebAppId.m_value); - } else if (comp(variable, "GLOBAL")) { + matched = true; + } else if (matched == false && comp(variable, "GLOBAL")) { vv = t->m_collections.m_global_collection->resolveFirst("", t->m_collections.m_global_collection_key, t->m_rules->m_secWebAppId.m_value); - } else { + matched = true; + } + + if (matched == false) { throw std::invalid_argument("Variable not found."); } } else { @@ -729,4 +604,6 @@ std::string operator+(const std::string &a, const modsecurity::variables::Variab } // namespace variables } // namespace modsecurity +#undef VARIABLE_MONKEY_SIMPLE_VARIABLES + #endif // SRC_VARIABLES_VARIABLE_H_ diff --git a/test/Makefile.am b/test/Makefile.am index 2e7e05d614..4d3a469620 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -23,7 +23,8 @@ noinst_PROGRAMS = EXTRA_DIST = \ test-cases/* \ custom-test-driver \ - test-suite.sh + test-suite.sh \ + run-json-backend-matrix.sh # unit_tests @@ -34,12 +35,20 @@ unit_tests_SOURCES = \ unit/unit_test.cc \ common/custom_debug_log.cc +noinst_PROGRAMS += json_backend_depth_tests +json_backend_depth_tests_SOURCES = \ + unit/json_backend_depth_tests.cc + noinst_HEADERS = \ - common/modsecurity_test.cc \ - common/*.h \ - unit/*.h \ - regression/*.h + $(srcdir)/common/colors.h \ + $(srcdir)/common/custom_debug_log.h \ + $(srcdir)/common/json.h \ + $(srcdir)/common/modsecurity_test.h \ + $(srcdir)/common/modsecurity_test_context.h \ + $(srcdir)/common/modsecurity_test_results.h \ + $(srcdir)/unit/unit_test.h \ + $(srcdir)/regression/regression_test.h unit_tests_LDADD = \ @@ -52,8 +61,7 @@ unit_tests_LDADD = \ $(LUA_LDADD) \ $(PCRE_LDADD) \ $(PCRE2_LDADD) \ - $(SSDEEP_LDADD) \ - $(YAJL_LDADD) + $(SSDEEP_LDADD) unit_tests_LDFLAGS = \ @@ -66,15 +74,15 @@ unit_tests_LDFLAGS = \ $(MAXMIND_LDFLAGS) \ $(LMDB_LDFLAGS) \ $(LUA_LDFLAGS) \ - $(SSDEEP_LDFLAGS) \ - $(YAJL_LDFLAGS) + $(SSDEEP_LDFLAGS) unit_tests_CPPFLAGS = \ -Icommon \ -I$(top_srcdir)/ \ + -I$(top_srcdir)/others/jsoncons/include \ -g \ - -I$(top_builddir)/headers \ + -I$(top_srcdir)/headers \ $(CURL_CFLAGS) \ $(MODSEC_NO_LOGS) \ $(GEOIP_CFLAGS) \ @@ -83,11 +91,19 @@ unit_tests_CPPFLAGS = \ $(LMDB_CFLAGS) \ $(PCRE_CFLAGS) \ $(PCRE2_CFLAGS) \ - $(YAJL_CFLAGS) \ $(LUA_CFLAGS) \ $(SSDEEP_CFLAGS) \ $(LIBXML2_CFLAGS) +json_backend_depth_tests_LDADD = \ + $(unit_tests_LDADD) + +json_backend_depth_tests_LDFLAGS = \ + $(unit_tests_LDFLAGS) + +json_backend_depth_tests_CPPFLAGS = \ + $(unit_tests_CPPFLAGS) + # regression @@ -107,8 +123,7 @@ regression_tests_LDADD = \ $(LUA_LDADD) \ $(PCRE_LDADD) \ $(PCRE2_LDADD) \ - $(SSDEEP_LDADD) \ - $(YAJL_LDADD) + $(SSDEEP_LDADD) regression_tests_LDFLAGS = \ @@ -119,7 +134,6 @@ regression_tests_LDFLAGS = \ -lm \ -lstdc++ \ $(MAXMIND_LDFLAGS) \ - $(YAJL_LDFLAGS) \ $(LMDB_LDFLAGS) \ $(SSDEEP_LDFLAGS) \ $(LUA_LDFLAGS) @@ -128,8 +142,9 @@ regression_tests_LDFLAGS = \ regression_tests_CPPFLAGS = \ -Icommon \ -I$(top_srcdir) \ + -I$(top_srcdir)/others/jsoncons/include \ -g \ - -I$(top_builddir)/headers \ + -I$(top_srcdir)/headers \ $(CURL_CFLAGS) \ $(MODSEC_NO_LOGS) \ $(GEOIP_CFLAGS) \ @@ -140,7 +155,6 @@ regression_tests_CPPFLAGS = \ $(SSDEEP_CFLAGS) \ $(PCRE_CFLAGS) \ $(PCRE2_CFLAGS) \ - $(YAJL_CFLAGS) \ $(LIBXML2_CFLAGS) @@ -161,8 +175,7 @@ rules_optimization_LDADD = \ $(LUA_LDADD) \ $(PCRE_LDADD) \ $(PCRE2_LDADD) \ - $(SSDEEP_LDADD) \ - $(YAJL_LDADD) + $(SSDEEP_LDADD) rules_optimization_LDFLAGS = \ -L$(top_builddir)/src/.libs/ \ @@ -174,14 +187,13 @@ rules_optimization_LDFLAGS = \ $(MAXMIND_LDFLAGS) \ $(LMDB_LDFLAGS) \ $(LUA_LDFLAGS) \ - $(SSDEEP_LDFLAGS) \ - $(YAJL_LDFLAGS) + $(SSDEEP_LDFLAGS) rules_optimization_CPPFLAGS = \ -Icommon \ -I$(top_srcdir)/ \ -g \ - -I$(top_builddir)/headers \ + -I$(top_srcdir)/headers \ $(CURL_CFLAGS) \ $(MODSEC_NO_LOGS) \ $(GEOIP_CFLAGS) \ @@ -192,6 +204,4 @@ rules_optimization_CPPFLAGS = \ $(SSDEEP_CFLAGS) \ $(PCRE_CFLAGS) \ $(PCRE2_CFLAGS) \ - $(YAJL_CFLAGS) \ $(LIBXML2_CFLAGS) - diff --git a/test/benchmark/Makefile.am b/test/benchmark/Makefile.am index 2ac9d92111..a7f681484b 100644 --- a/test/benchmark/Makefile.am +++ b/test/benchmark/Makefile.am @@ -1,17 +1,28 @@ -noinst_PROGRAMS = benchmark +noinst_PROGRAMS = \ + benchmark \ + json_benchmark + +EXTRA_DIST = \ + basic_rules.conf \ + json_benchmark_rules.conf \ + run-json-benchmarks.sh \ + download-owasp-v3-rules.sh \ + download-owasp-v4-rules.sh benchmark_SOURCES = \ benchmark.cc +json_benchmark_SOURCES = \ + json_benchmark.cc + benchmark_LDADD = \ $(CURL_LDADD) \ $(GEOIP_LDADD) \ $(MAXMIND_LDADD) \ $(PCRE_LDADD) \ $(PCRE2_LDADD) \ - $(YAJL_LDADD) \ $(LMDB_LDADD) \ $(SSDEEP_LDADD) \ $(LUA_LDADD) \ @@ -27,19 +38,29 @@ benchmark_LDFLAGS = \ -lstdc++ \ $(GEOIP_LDFLAGS) \ $(MAXMIND_LDFLAGS) \ - $(YAJL_LDFLAGS) \ $(LMDB_LDFLAGS) \ $(SSDEEP_LDFLAGS) \ $(LUA_LDFLAGS) benchmark_CPPFLAGS = \ - -I$(top_builddir)/headers \ + -I$(top_builddir) \ + -I$(top_srcdir) \ + -I$(top_srcdir)/headers \ $(GLOBAL_CPPFLAGS) \ $(PCRE_CFLAGS) \ $(PCRE2_CFLAGS) \ $(LMDB_CFLAGS) \ $(LIBXML2_CFLAGS) +json_benchmark_LDADD = \ + $(benchmark_LDADD) + +json_benchmark_LDFLAGS = \ + $(benchmark_LDFLAGS) + +json_benchmark_CPPFLAGS = \ + $(benchmark_CPPFLAGS) \ + -DMSC_JSON_BENCHMARK_RULES_DIR=\"$(srcdir)\" + MAINTAINERCLEANFILES = \ Makefile.in - diff --git a/test/benchmark/benchmark.cc b/test/benchmark/benchmark.cc index a502150eaf..c4ce4635b9 100644 --- a/test/benchmark/benchmark.cc +++ b/test/benchmark/benchmark.cc @@ -16,15 +16,24 @@ #include #include +#include #include #include +#include #include "modsecurity/rules_set.h" #include "modsecurity/modsecurity.h" using modsecurity::Transaction; -char request_uri[] = "/test.pl?param1=test¶2=test2"; +namespace { + +constexpr const char *kRequestUri = "/test.pl?param1=test¶2=test2"; +constexpr const char *kClientIp = "198.51.100.10"; // RFC 5737 documentation range +constexpr const char *kServerIp = "198.51.100.20"; // RFC 5737 documentation range +constexpr const char *kRulesFile = "basic_rules.conf"; + +} // namespace unsigned char response_body[] = "" \ "\n\r" \ @@ -38,10 +47,6 @@ unsigned char response_body[] = "" \ " \n\r" \ "\n\r"; -char ip[] = "200.249.12.31"; - -char rules_file[] = "basic_rules.conf"; - const char* const help_message = "Usage: benchmark [num_iterations|-h|-?|--help]"; int main(int argc, const char *argv[]) { @@ -79,23 +84,26 @@ int main(int argc, const char *argv[]) { " (ModSecurity benchmark utility)"); rules = new modsecurity::RulesSet(); - if (rules->loadFromUri(rules_file) < 0) { + if (rules->loadFromUri(kRulesFile) < 0) { std::cout << "Problems loading the rules..." << std::endl; std::cout << rules->m_parserError.str() << std::endl; return -1; } + // Start timing after one-time setup to measure only transaction processing. + const auto benchmark_start = std::chrono::steady_clock::now(); + for (unsigned long long i = 0; i < NUM_REQUESTS; i++) { //std::cout << "Proceeding with request " << i << std::endl; Transaction *modsecTransaction = new Transaction(modsec, rules, NULL); - modsecTransaction->processConnection(ip, 12345, "127.0.0.1", 80); + modsecTransaction->processConnection(kClientIp, 12345, kServerIp, 80); if (modsecTransaction->intervention(&it)) { std::cout << "There is an intervention" << std::endl; goto next_request; } - modsecTransaction->processURI(request_uri, "GET", "1.1"); + modsecTransaction->processURI(kRequestUri, "GET", "1.1"); if (modsecTransaction->intervention(&it)) { std::cout << "There is an intervention" << std::endl; goto next_request; @@ -173,4 +181,19 @@ int main(int argc, const char *argv[]) { delete rules; delete modsec; + + const auto elapsed = std::chrono::duration_cast( + std::chrono::steady_clock::now() - benchmark_start); + const long double elapsed_seconds = + static_cast(elapsed.count()) / 1000000000.0L; + const long double avg_tx_ns = static_cast(elapsed.count()) + / static_cast(NUM_REQUESTS); + const long double tx_per_sec = static_cast(NUM_REQUESTS) + / elapsed_seconds; + + std::cout << std::fixed << std::setprecision(2); + std::cout << "Summary:\n"; + std::cout << " elapsed_seconds: " << elapsed_seconds << "\n"; + std::cout << " avg_transaction_ns: " << avg_tx_ns << "\n"; + std::cout << " throughput_tx_per_sec: " << tx_per_sec << "\n"; } diff --git a/test/benchmark/json_benchmark.cc b/test/benchmark/json_benchmark.cc new file mode 100644 index 0000000000..286fd13b41 --- /dev/null +++ b/test/benchmark/json_benchmark.cc @@ -0,0 +1,539 @@ +/* + * ModSecurity, http://www.modsecurity.org/ + * Copyright (c) 2015 - 2024 Trustwave Holdings, Inc. (http://www.trustwave.com/) + * + * You may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * If any of the files related to licensing are missing or if you have any + * other questions related to licensing please contact Trustwave Holdings, Inc. + * directly using the email address security@modsecurity.org. + * + */ + +#include "config.h" + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "modsecurity/modsecurity.h" +#include "modsecurity/rules_set.h" +#include "modsecurity/transaction.h" +#include "src/request_body_processor/json_instrumentation.h" + +#ifndef MSC_JSON_BENCHMARK_RULES_DIR +#error "MSC_JSON_BENCHMARK_RULES_DIR must be defined by the build system." +#endif + +namespace { + +using Clock = std::chrono::steady_clock; + +constexpr std::size_t kDefaultTargetBytes = 1048576; +constexpr std::size_t kDefaultDepth = 512; +constexpr unsigned long long kDefaultIterations = 100; +constexpr const char *kRulesFileName = "json_benchmark_rules.conf"; +constexpr const char *kClientIp = "198.51.100.10"; // RFC 5737 documentation range +constexpr const char *kServerIp = "198.51.100.20"; // RFC 5737 documentation range + +struct Options { + std::string scenario; + unsigned long long iterations{kDefaultIterations}; + std::size_t target_bytes{kDefaultTargetBytes}; + std::size_t depth{kDefaultDepth}; + bool include_invalid{false}; + bool output_json{false}; +}; + +struct Metrics { + std::uint64_t append_request_body_ns{0}; + std::uint64_t process_request_body_ns{0}; + std::uint64_t total_transaction_ns{0}; + unsigned long long parse_success_count{0}; + unsigned long long parse_error_count{0}; +}; + +class JsonBenchmarkError : public std::runtime_error { + public: + explicit JsonBenchmarkError(const std::string &message) + : std::runtime_error(message) { } +}; + +const char *const usage_message = + "Usage: json_benchmark --scenario NAME [--iterations N] " + "[--target-bytes N] [--depth N] [--include-invalid] [--output json]"; + +std::string benchmarkBackend() { +#if defined(MSC_JSON_BACKEND_SIMDJSON) + return "simdjson"; +#elif defined(MSC_JSON_BACKEND_JSONCONS) + return "jsoncons"; +#else +#error "A JSON backend must be selected at build time." +#endif +} + +std::string rulesFilePath() { + return std::string(MSC_JSON_BENCHMARK_RULES_DIR) + "/" + kRulesFileName; +} + +std::uint64_t elapsedNanos(Clock::time_point start_time) { + return static_cast( + std::chrono::duration_cast( + Clock::now() - start_time).count()); +} + +unsigned long long parseUnsignedLongLong(const char *value, + const char *flag_name, bool allow_zero) { + errno = 0; + char *end = nullptr; + const unsigned long long parsed = std::strtoull(value, &end, 10); + if (errno != 0 || end == value || *end != '\0' + || (!allow_zero && parsed == 0)) { + throw JsonBenchmarkError(std::string("invalid numeric value for ") + + flag_name + ": " + value); + } + return parsed; +} + +std::size_t parseSize(const char *value, const char *flag_name) { + const unsigned long long parsed = + parseUnsignedLongLong(value, flag_name, true); + if (parsed > std::numeric_limits::max()) { + throw JsonBenchmarkError(std::string("value too large for ") + + flag_name + ": " + value); + } + return static_cast(parsed); +} + +unsigned long long parseIterations(const char *value) { + return parseUnsignedLongLong(value, "--iterations", false); +} + +const char *requireOptionValue(int argc, const char *argv[], int *index, + const char *option_name) { + if (*index + 1 >= argc) { + throw JsonBenchmarkError(std::string("missing value for ") + + option_name); + } + *index += 1; + return argv[*index]; +} + +Options parseOptions(int argc, const char *argv[]) { + Options options; + + int i = 1; + while (i < argc) { + const std::string current(argv[i]); + if (current == "-h" || current == "-?" || current == "--help") { + std::cout << usage_message << std::endl; + std::exit(0); + } else if (current == "--scenario") { + options.scenario.assign( + requireOptionValue(argc, argv, &i, "--scenario")); + } else if (current == "--iterations") { + options.iterations = parseIterations( + requireOptionValue(argc, argv, &i, "--iterations")); + } else if (current == "--target-bytes") { + options.target_bytes = parseSize( + requireOptionValue(argc, argv, &i, "--target-bytes"), + "--target-bytes"); + } else if (current == "--depth") { + options.depth = parseSize( + requireOptionValue(argc, argv, &i, "--depth"), "--depth"); + } else if (current == "--include-invalid") { + options.include_invalid = true; + } else if (current == "--output") { + if (const std::string output_format( + requireOptionValue(argc, argv, &i, "--output")); + output_format != "json") { + throw JsonBenchmarkError("unsupported output format: " + + output_format); + } + options.output_json = true; + } else { + throw JsonBenchmarkError("unknown option: " + current); + } + i++; + } + + if (options.scenario.empty()) { + throw JsonBenchmarkError("missing required --scenario"); + } + + if (const bool is_invalid_scenario = options.scenario == "truncated" + || options.scenario == "malformed"; + is_invalid_scenario && !options.include_invalid) { + throw JsonBenchmarkError( + "invalid JSON scenarios require --include-invalid"); + } + + return options; +} + +std::string makeLargeObject(std::size_t target_bytes) { + std::string body("{"); + std::size_t index = 0; + + while (body.size() + 32 < target_bytes || index == 0) { + if (index > 0) { + body.push_back(','); + } + body += "\"key"; + body += std::to_string(index); + body += "\":\"value"; + body += std::to_string(index); + body += "\""; + index++; + } + + body.push_back('}'); + return body; +} + +std::string makeUtf8Object(std::size_t target_bytes) { + static const std::array utf8_values{{ + u8"Gr\u00fc\u00dfe", + u8"\u3053\u3093\u306b\u3061\u306f", + u8"\u043f\u0440\u0438\u0432\u0435\u0442", + u8"\u0645\u0631\u062d\u0628\u0627", + u8"\U0001F30D" + }}; + + std::string body("{"); + std::size_t index = 0; + + while (body.size() + 48 < target_bytes || index == 0) { + if (index > 0) { + body.push_back(','); + } + body += "\"utf8_"; + body += std::to_string(index); + body += "\":\""; + body += utf8_values[index % utf8_values.size()]; + body += "\""; + index++; + } + + body.push_back('}'); + return body; +} + +std::string makeNumbersArray(std::size_t target_bytes) { + static const std::array numeric_tokens{{ + "0", + "-0", + "1.0", + "1e3", + "-1.25e-4", + "123456789012345678901234567890", + "6.02214076e23", + "3.141592653589793238462643383279" + }}; + + std::string body("["); + std::size_t index = 0; + + while (body.size() + 40 < target_bytes || index == 0) { + if (index > 0) { + body.push_back(','); + } + body += numeric_tokens[index % numeric_tokens.size()]; + index++; + } + + body.push_back(']'); + return body; +} + +std::string makeDeepNesting(std::size_t depth) { + std::string body; + + for (std::size_t i = 0; i < depth; i++) { + if ((i % 2) == 0) { + body += "{\"k\":"; + } else { + body.push_back('['); + } + } + + body += "\"leaf\""; + + for (std::size_t i = depth; i > 0; i--) { + if (((i - 1) % 2) == 0) { + body.push_back('}'); + } else { + body.push_back(']'); + } + } + + return body; +} + +std::string buildScenarioBody(const Options &options) { + if (options.scenario == "large-object") { + return makeLargeObject(options.target_bytes); + } + if (options.scenario == "deep-nesting") { + return makeDeepNesting(options.depth); + } + if (options.scenario == "numbers") { + return makeNumbersArray(options.target_bytes); + } + if (options.scenario == "utf8") { + return makeUtf8Object(options.target_bytes); + } + if (options.scenario == "truncated") { + std::string body = makeLargeObject(options.target_bytes); + body.pop_back(); + return body; + } + if (options.scenario == "malformed") { + std::string body = makeLargeObject(options.target_bytes); + body.insert(body.size() - 1, ",]"); + return body; + } + + throw JsonBenchmarkError("unsupported scenario: " + options.scenario); +} + +bool isResolvedZero(const std::unique_ptr &value) { + return value && *value == "0"; +} + +Metrics runBenchmark(modsecurity::ModSecurity *modsec, + modsecurity::RulesSet *rules, const std::string &body, + const Options &options) { + Metrics metrics; + + for (unsigned long long iteration = 0; iteration < options.iterations; + iteration++) { + const auto total_start = Clock::now(); + modsecurity::Transaction transaction(modsec, rules, nullptr); + transaction.processConnection(kClientIp, 12345, kServerIp, 80); + transaction.processURI("/json-benchmark", "POST", "1.1"); + transaction.addRequestHeader("Host", "localhost"); + transaction.addRequestHeader("User-Agent", + "ModSecurity-json-benchmark/1.0"); + transaction.addRequestHeader("Content-Type", "application/json"); + const std::string content_length = std::to_string(body.size()); + transaction.addRequestHeader("Content-Length", content_length); + transaction.processRequestHeaders(); + + const auto append_start = Clock::now(); + const int append_ok = transaction.appendRequestBody( + reinterpret_cast(body.data()), body.size()); + metrics.append_request_body_ns += elapsedNanos(append_start); + if (append_ok == 0) { + throw JsonBenchmarkError( + "appendRequestBody reported partial body processing"); + } + + const auto process_start = Clock::now(); + if (!transaction.processRequestBody()) { + throw JsonBenchmarkError("processRequestBody returned false"); + } + metrics.process_request_body_ns += elapsedNanos(process_start); + metrics.total_transaction_ns += elapsedNanos(total_start); + + const std::unique_ptr reqbody_error = + transaction.m_variableReqbodyError.resolveFirst(); + const std::unique_ptr processor_error = + transaction.m_variableReqbodyProcessorError.resolveFirst(); + + if (!reqbody_error || !processor_error) { + throw JsonBenchmarkError( + "unable to resolve JSON parse outcome variables"); + } + + const bool parse_success = isResolvedZero(reqbody_error) + && isResolvedZero(processor_error); + if (const bool parse_error = !isResolvedZero(reqbody_error) + || !isResolvedZero(processor_error); + parse_success == parse_error) { + throw JsonBenchmarkError( + "ambiguous JSON parse outcome observed in benchmark"); + } + + if (parse_success) { + metrics.parse_success_count++; + } else { + metrics.parse_error_count++; + } + } + + return metrics; +} + +long currentMaxRssKb() { + struct rusage usage; + if (getrusage(RUSAGE_SELF, &usage) != 0) { + return -1; + } + return usage.ru_maxrss; +} + +void printJsonStringField(const char *name, const std::string &value, + bool &first) { + if (!first) { + std::cout << ","; + } + std::cout << "\"" << name << "\":\"" << value << "\""; + first = false; +} + +template +void printJsonNumericField(const char *name, T value, bool &first) { + if (!first) { + std::cout << ","; + } + std::cout << "\"" << name << "\":" << value; + first = false; +} + +void printHumanField(const char *name, const std::string &value) { + std::cout << name << ": " << value << "\n"; +} + +template +void printHumanField(const char *name, T value) { + std::cout << name << ": " << value << "\n"; +} + +void printJson(const Options &options, const std::string &body, + const Metrics &metrics) { + bool first = true; + + std::cout << "{"; + printJsonStringField("backend", benchmarkBackend(), first); + printJsonStringField("scenario", options.scenario, first); + printJsonNumericField("iterations", options.iterations, first); + printJsonNumericField("body_bytes", body.size(), first); + printJsonNumericField("append_request_body_ns", + metrics.append_request_body_ns, first); + printJsonNumericField("process_request_body_ns", + metrics.process_request_body_ns, first); + printJsonNumericField("total_transaction_ns", + metrics.total_transaction_ns, first); + printJsonNumericField("parse_success_count", + metrics.parse_success_count, first); + printJsonNumericField("parse_error_count", + metrics.parse_error_count, first); + printJsonNumericField("ru_maxrss_kb", currentMaxRssKb(), first); + +#ifdef MSC_JSON_AUDIT_INSTRUMENTATION + const modsecurity::RequestBodyProcessor::JsonInstrumentationMetrics + instrumentation = + modsecurity::RequestBodyProcessor::jsonInstrumentationSnapshot(); + + printJsonNumericField("request_body_snapshot_count", + instrumentation.request_body_snapshot_count, first); + printJsonNumericField("request_body_snapshot_bytes", + instrumentation.request_body_snapshot_bytes, first); + printJsonNumericField("request_body_snapshot_ns", + instrumentation.request_body_snapshot_ns, first); + printJsonNumericField("json_process_chunk_calls", + instrumentation.json_process_chunk_calls, first); + printJsonNumericField("json_process_chunk_appended_bytes", + instrumentation.json_process_chunk_appended_bytes, first); + printJsonNumericField("json_process_chunk_ns", + instrumentation.json_process_chunk_ns, first); + printJsonNumericField("simdjson_parser_constructions", + instrumentation.simdjson_parser_constructions, first); + printJsonNumericField("simdjson_parser_construction_ns", + instrumentation.simdjson_parser_construction_ns, first); + printJsonNumericField("simdjson_padded_copy_bytes", + instrumentation.simdjson_padded_copy_bytes, first); + printJsonNumericField("simdjson_padded_copy_ns", + instrumentation.simdjson_padded_copy_ns, first); + printJsonNumericField("simdjson_iterate_ns", + instrumentation.simdjson_iterate_ns, first); + printJsonNumericField("jsoncons_cursor_constructions", + instrumentation.jsoncons_cursor_constructions, first); + printJsonNumericField("jsoncons_cursor_init_ns", + instrumentation.jsoncons_cursor_init_ns, first); + printJsonNumericField("jsoncons_token_cursor_constructions", + instrumentation.jsoncons_token_cursor_constructions, first); + printJsonNumericField("jsoncons_token_cursor_init_ns", + instrumentation.jsoncons_token_cursor_init_ns, first); + printJsonNumericField("jsoncons_event_loop_ns", + instrumentation.jsoncons_event_loop_ns, first); + printJsonNumericField("jsoncons_token_sync_steps", + instrumentation.jsoncons_token_sync_steps, first); + printJsonNumericField("jsoncons_token_exact_advance_steps", + instrumentation.jsoncons_token_exact_advance_steps, first); +#endif + + std::cout << "}" << std::endl; +} + +void printHumanReadable(const Options &options, const std::string &body, + const Metrics &metrics) { + printHumanField("backend", benchmarkBackend()); + printHumanField("scenario", options.scenario); + printHumanField("iterations", options.iterations); + printHumanField("body_bytes", body.size()); + printHumanField("append_request_body_ns", + metrics.append_request_body_ns); + printHumanField("process_request_body_ns", + metrics.process_request_body_ns); + printHumanField("total_transaction_ns", + metrics.total_transaction_ns); + printHumanField("parse_success_count", + metrics.parse_success_count); + printHumanField("parse_error_count", + metrics.parse_error_count); + printHumanField("ru_maxrss_kb", currentMaxRssKb()); +} + +} // namespace + +int main(int argc, const char *argv[]) { + try { + const Options options = parseOptions(argc, argv); + const std::string body = buildScenarioBody(options); + + modsecurity::ModSecurity modsec; + modsec.setConnectorInformation( + "ModSecurity-json-benchmark v0.0.1-alpha"); + + modsecurity::RulesSet rules; + if (const std::string rules_path = rulesFilePath(); + rules.loadFromUri(rules_path.c_str()) < 0) { + std::cerr << "failed to load benchmark rules from " + << rules_path << std::endl; + std::cerr << rules.m_parserError.str() << std::endl; + return 1; + } + + modsecurity::RequestBodyProcessor::jsonInstrumentationReset(); + const Metrics metrics = runBenchmark(&modsec, &rules, body, options); + + if (options.output_json) { + printJson(options, body, metrics); + } else { + printHumanReadable(options, body, metrics); + } + return 0; + } catch (const std::exception &error) { + std::cerr << error.what() << std::endl; + std::cerr << usage_message << std::endl; + return 64; + } +} diff --git a/test/benchmark/json_benchmark_rules.conf b/test/benchmark/json_benchmark_rules.conf new file mode 100644 index 0000000000..5e13a15705 --- /dev/null +++ b/test/benchmark/json_benchmark_rules.conf @@ -0,0 +1,9 @@ +SecRuleEngine On +SecRequestBodyAccess On +SecAuditEngine Off +SecDebugLog /dev/null +SecDebugLogLevel 0 +SecRequestBodyLimit 8388608 +SecRequestBodyNoFilesLimit 8388608 +SecRequestBodyJsonDepthLimit 4096 +SecRule REQUEST_HEADERS:Content-Type "^application/json$" "id:901001,phase:1,t:none,t:lowercase,pass,nolog,ctl:requestBodyProcessor=JSON" diff --git a/test/benchmark/run-json-benchmarks.sh b/test/benchmark/run-json-benchmarks.sh new file mode 100755 index 0000000000..2bac03bd5e --- /dev/null +++ b/test/benchmark/run-json-benchmarks.sh @@ -0,0 +1,86 @@ +#!/usr/bin/env bash + +set -u + +usage() { + cat <<'EOF' +Usage: test/benchmark/run-json-benchmarks.sh --simdjson-build DIR --jsoncons-build DIR [--include-invalid] +EOF + return +} + +simdjson_build="" +jsoncons_build="" +include_invalid=0 +output_file="${PWD}/json-benchmark-results.jsonl" + +while [[ "$#" -gt 0 ]]; do + case "$1" in + --simdjson-build) + shift + [[ "$#" -gt 0 ]] || { usage; exit 64; } + simdjson_build="$1" + ;; + --jsoncons-build) + shift + [[ "$#" -gt 0 ]] || { usage; exit 64; } + jsoncons_build="$1" + ;; + --include-invalid) + include_invalid=1 + ;; + -h|--help) + usage + exit 0 + ;; + *) + usage + exit 64 + ;; + esac + shift +done + +[[ -n "${simdjson_build}" ]] || { usage; exit 64; } +[[ -n "${jsoncons_build}" ]] || { usage; exit 64; } + +readonly base_scenarios=( + "large-object" + "deep-nesting" + "numbers" + "utf8" +) + +readonly invalid_scenarios=( + "truncated" + "malformed" +) + +run_scenarios() { + local build_dir="$1" + local binary="${build_dir}/test/benchmark/json_benchmark" + local scenario + + if [[ ! -x "${binary}" ]]; then + echo "missing benchmark binary: ${binary}" >&2 + return 1 + fi + + for scenario in "${base_scenarios[@]}"; do + "${binary}" --scenario "${scenario}" --output json >> "${output_file}" + done + + if [[ "${include_invalid}" -ne 0 ]]; then + for scenario in "${invalid_scenarios[@]}"; do + "${binary}" --scenario "${scenario}" --include-invalid --output json >> "${output_file}" + done + fi + + return 0 +} + +: > "${output_file}" +run_scenarios "${simdjson_build}" || exit 1 +run_scenarios "${jsoncons_build}" || exit 1 + +printf 'Wrote %s\n' "${output_file}" diff --git a/test/common/json.h b/test/common/json.h new file mode 100644 index 0000000000..e365edd90a --- /dev/null +++ b/test/common/json.h @@ -0,0 +1,340 @@ +/* + * ModSecurity, http://www.modsecurity.org/ + * Copyright (c) 2015 - 2024 Trustwave Holdings, Inc. (http://www.trustwave.com/) + * + * You may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * If any of the files related to licensing are missing or if you have any + * other questions related to licensing please contact Trustwave Holdings, Inc. + * directly using the email address security@modsecurity.org. + * + */ + +#ifndef TEST_COMMON_JSON_H_ +#define TEST_COMMON_JSON_H_ + +#include + +#include +#include +#include +#include + +namespace modsecurity_test::json { + +enum class JsonType { + Object, + Array, + String, + Number, + Boolean, + Null, + Unknown +}; + +using JsonNode = jsoncons::ojson; + +class JsonValue; + +class JsonField { + public: + JsonField() = default; + JsonField(std::string_view key, const JsonNode *value) + : m_key(key), + m_value(value) { } + + bool valid() const { + return m_value != nullptr; + } + + std::string_view unescaped_key() const { + return m_key; + } + + JsonValue value() const; + + private: + std::string_view m_key; + const JsonNode *m_value{nullptr}; +}; + +class JsonArray { + public: + class iterator { + public: + explicit iterator(JsonNode::const_array_iterator iterator) + : m_iterator(iterator) { } + + JsonValue operator*() const; + + iterator &operator++() { + ++m_iterator; + return *this; + } + + bool operator!=(const iterator &other) const { + return m_iterator != other.m_iterator; + } + + private: + JsonNode::const_array_iterator m_iterator; + }; + + JsonArray() = default; + explicit JsonArray(const JsonNode *value) : m_value(value) { } + + bool valid() const { + return m_value != nullptr && m_value->is_array(); + } + + iterator begin() const { + return iterator(m_value->array_range().begin()); + } + + iterator end() const { + return iterator(m_value->array_range().end()); + } + + private: + const JsonNode *m_value{nullptr}; +}; + +class JsonObject { + public: + class iterator { + public: + explicit iterator(JsonNode::const_object_iterator iterator) + : m_iterator(iterator) { } + + JsonField operator*() const { + const auto &member = *m_iterator; + return JsonField(std::string_view(member.key().data(), + member.key().size()), &member.value()); + } + + iterator &operator++() { + ++m_iterator; + return *this; + } + + bool operator!=(const iterator &other) const { + return m_iterator != other.m_iterator; + } + + private: + JsonNode::const_object_iterator m_iterator; + }; + + JsonObject() = default; + explicit JsonObject(const JsonNode *value) : m_value(value) { } + + bool valid() const { + return m_value != nullptr && m_value->is_object(); + } + + iterator begin() const { + return iterator(m_value->object_range().begin()); + } + + iterator end() const { + return iterator(m_value->object_range().end()); + } + + private: + const JsonNode *m_value{nullptr}; +}; + +class JsonValue { + public: + JsonValue() = default; + explicit JsonValue(const JsonNode *value) : m_value(value) { } + + bool valid() const { + return m_value != nullptr; + } + + JsonObject get_object() const { + return JsonObject(valid() && m_value->is_object() ? m_value : nullptr); + } + + JsonArray get_array() const { + return JsonArray(valid() && m_value->is_array() ? m_value : nullptr); + } + + JsonType type() const { + if (!valid()) { + return JsonType::Unknown; + } + if (m_value->is_object()) { + return JsonType::Object; + } + if (m_value->is_array()) { + return JsonType::Array; + } + if (m_value->is_string()) { + return JsonType::String; + } + if (m_value->is_bool()) { + return JsonType::Boolean; + } + if (m_value->is_null()) { + return JsonType::Null; + } + return JsonType::Number; + } + + const JsonNode *raw() const { + return m_value; + } + + private: + const JsonNode *m_value{nullptr}; +}; + +inline JsonValue JsonField::value() const { + return JsonValue(m_value); +} + +inline JsonValue JsonArray::iterator::operator*() const { + return JsonValue(&(*m_iterator)); +} + +class JsonDocument { + public: + JsonValue get_value() const { + return JsonValue(&m_root); + } + + JsonArray get_array() const { + return JsonArray(&m_root); + } + + bool parse(const std::string &input, std::string *error = nullptr) { + try { + m_root = JsonNode::parse(input); + return true; + } catch (const std::exception &exception) { + if (error != nullptr) { + error->assign(exception.what()); + } + return false; + } + } + + private: + JsonNode m_root; +}; + +inline bool get(JsonArray value, JsonArray *target, + std::string *error = nullptr) { + (void) error; + if (!value.valid()) { + return false; + } + *target = value; + return true; +} + +inline bool get(const JsonField &value, JsonField *target, + std::string *error = nullptr) { + (void) error; + if (!value.valid()) { + return false; + } + *target = value; + return true; +} + +inline bool get(JsonObject value, JsonObject *target, + std::string *error = nullptr) { + (void) error; + if (!value.valid()) { + return false; + } + *target = value; + return true; +} + +inline bool get(JsonType value, JsonType *target, + std::string *error = nullptr) { + (void) error; + *target = value; + return true; +} + +inline bool get(JsonValue value, JsonValue *target, + std::string *error = nullptr) { + (void) error; + if (!value.valid()) { + return false; + } + *target = value; + return true; +} + +inline bool get(std::string_view value, std::string_view *target, + std::string *error = nullptr) { + (void) error; + *target = value; + return true; +} + +inline bool load_document(const std::string &file, JsonDocument *document, + std::string *error) { + std::ifstream input(file.c_str()); + std::string buffer; + + if (input.is_open() == false) { + if (error != nullptr) { + error->assign("Unable to open JSON file."); + } + return false; + } + + buffer.assign((std::istreambuf_iterator(input)), + std::istreambuf_iterator()); + return document->parse(buffer, error); +} + +inline std::string get_string(JsonValue value) { + if (!value.valid()) { + return ""; + } + + try { + return value.raw()->as(); + } catch (const std::exception &) { + return ""; + } +} + +inline std::string get_raw_number(JsonValue value) { + if (!value.valid() || value.raw()->is_number() == false) { + return ""; + } + + try { + return value.raw()->as(); + } catch (const std::exception &) { + return ""; + } +} + +inline int64_t get_integer(JsonValue value) { + if (!value.valid()) { + return 0; + } + + try { + return value.raw()->as(); + } catch (const std::exception &) { + return 0; + } +} + +} // namespace modsecurity_test::json + +#endif // TEST_COMMON_JSON_H_ diff --git a/test/common/modsecurity_test.cc b/test/common/modsecurity_test.cc index 23eed49e58..dc3e55a261 100644 --- a/test/common/modsecurity_test.cc +++ b/test/common/modsecurity_test.cc @@ -15,9 +15,6 @@ #include "test/common/modsecurity_test.h" -#ifdef WITH_YAJL -#include -#endif #include #include #include @@ -29,6 +26,7 @@ #include #include "modsecurity/modsecurity.h" +#include "test/common/json.h" namespace modsecurity_test { @@ -47,8 +45,15 @@ std::string ModSecurityTest::header() { template bool ModSecurityTest::load_test_json(const std::string &file) { - char errbuf[1024]; - yajl_val node; + auto reportParsingError = [&file](const std::string &error_message) { + std::cout << "Problems parsing file: " << file << std::endl; + if (error_message.empty() == false) { + std::cout << error_message << std::endl; + } + }; + + std::string error; + modsecurity_test::json::JsonDocument document; std::ifstream myfile; myfile.open(file.c_str()); @@ -56,29 +61,36 @@ bool ModSecurityTest::load_test_json(const std::string &file) { std::cout << "Problems opening file: " << file << std::endl; return false; } + myfile.close(); - std::string str((std::istreambuf_iterator(myfile)), - std::istreambuf_iterator()); - node = yajl_tree_parse((const char *) str.c_str(), errbuf, sizeof(errbuf)); - if (node == NULL) { - std::cout << "Problems parsing file: " << file << std::endl; - if (strlen(errbuf) > 0) { - std::cout << errbuf << std::endl; - } + if (modsecurity_test::json::load_document(file, &document, &error) + == false) { + reportParsingError(error); return false; } if (m_format) { - auto u = T::from_yajl_node(node); + auto u = T::from_json_document(&document); u->filename = file; (*this)[file].push_back(std::move(u)); } else { - size_t num_tests = node->u.array.len; - for ( int i = 0; i < num_tests; i++ ) { - yajl_val obj = node->u.array.values[i]; + modsecurity_test::json::JsonArray tests; + if (modsecurity_test::json::get(document.get_array(), &tests, + &error) == false) { + reportParsingError(error); + return false; + } - auto u = T::from_yajl_node(obj); + for (auto test_result : tests) { + modsecurity_test::json::JsonValue value; + if (modsecurity_test::json::get(std::move(test_result), &value, + &error) == false) { + reportParsingError(error); + return false; + } + + auto u = T::from_json_value(value); u->filename = file; const auto key = u->filename + ":" + u->name; @@ -86,8 +98,6 @@ bool ModSecurityTest::load_test_json(const std::string &file) { } } - yajl_tree_free(node); - return true; } diff --git a/test/common/modsecurity_test.h b/test/common/modsecurity_test.h index 6e8a3bbc8f..80ebaefa09 100644 --- a/test/common/modsecurity_test.h +++ b/test/common/modsecurity_test.h @@ -18,6 +18,7 @@ #include #include #include +#include #ifndef TEST_COMMON_MODSECURITY_TEST_H_ #define TEST_COMMON_MODSECURITY_TEST_H_ diff --git a/test/fuzzer/Makefile.am b/test/fuzzer/Makefile.am index eee3a94e23..b1022fdab9 100644 --- a/test/fuzzer/Makefile.am +++ b/test/fuzzer/Makefile.am @@ -19,7 +19,6 @@ afl_fuzzer_LDADD = \ $(GEOIP_LDFLAGS) $(GEOIP_LDADD) \ $(PCRE_LDADD) \ $(PCRE2_LDADD) \ - $(YAJL_LDFLAGS) $(YAJL_LDADD) \ $(LMDB_LDFLAGS) $(LMDB_LDADD) \ $(MAXMIND_LDFLAGS) $(MAXMIND_LDADD) \ $(SSDEEP_LDFLAGS) $(SSDEEP_LDADD) \ @@ -36,13 +35,12 @@ afl_fuzzer_CPPFLAGS = \ -I../../ \ -O0 \ -g \ - -I$(top_builddir)/headers \ + -I$(top_srcdir)/headers \ $(CURL_CFLAGS) \ $(GEOIP_CFLAGS) \ $(MAXMIND_CFLAGS) \ $(GLOBAL_CPPFLAGS) \ $(MODSEC_NO_LOGS) \ - $(YAJL_CFLAGS) \ $(LMDB_CFLAGS) \ $(PCRE_CFLAGS) \ $(PCRE2_CFLAGS) \ diff --git a/test/regression/regression.cc b/test/regression/regression.cc index 5b1ca514e8..f1d8eab8d7 100644 --- a/test/regression/regression.cc +++ b/test/regression/regression.cc @@ -439,7 +439,6 @@ int main(int argc, char **argv) test.cmd_options(argc, argv); if (test.m_format) { -#ifdef WITH_YAJL std::cout << "start formatting test case JSON files" << std::endl; ModSecurityTest test2; test2.cmd_options(argc, argv); @@ -459,11 +458,6 @@ int main(int argc, char **argv) } std::cout << "finished formatting files." << std::endl; return 0; -#else - std::cout << "Test utility cannot format test case JSON files without being built with YAJL." \ - << std::endl; - return 1; -#endif } if (!test.m_automake_output && !test.m_count_all) { diff --git a/test/regression/regression_test.cc b/test/regression/regression_test.cc index 18f61b64dc..04119c5ea1 100644 --- a/test/regression/regression_test.cc +++ b/test/regression/regression_test.cc @@ -22,210 +22,319 @@ #include #include #include +#include -#ifdef WITH_YAJL -#include -#endif +#include "src/utils/json_writer.h" namespace modsecurity_test { +namespace { -std::string RegressionTest::print() { - std::stringstream i; +std::string join_strings(const std::vector &values) { + std::stringstream stream; -#if 0 - i << KRED << "Test failed." << RESET << " From: " \ - i << this->filename << std::endl; - i << "{" << std::endl; - i << " \"ret\": \"" << this->ret << "\"" << std::endl; - i << " \"type\": \"" << this->type << "\"" << std::endl; - i << " \"name\": \"" << this->name << "\"" << std::endl; - i << " \"input\": \"" << this->input << "\"" << std::endl; - i << " \"param\": \"" << this->param << "\"" << std::endl; - i << "}" << std::endl; - i << "Expecting: " << this->ret << " - operator returned: " << \ - this->obtained << std::endl; -#endif - return i.str(); + for (const auto &entry : values) { + stream << entry; + } + + return stream.str(); } +std::vector json_array_to_vec_string( + modsecurity_test::json::JsonValue value) { + modsecurity_test::json::JsonArray array; + std::vector values; -inline std::string RegressionTest::yajl_array_to_str(const yajl_val &node) { - std::stringstream i; - for (int z = 0; z < node->u.array.len; z++) { - yajl_val val3 = node->u.array.values[z]; - const char *key = YAJL_GET_STRING(val3); - i << key; + if (modsecurity_test::json::get(value.get_array(), &array) == false) { + return values; } - return i.str(); + + for (auto entry_result : array) { + modsecurity_test::json::JsonValue entry; + + if (modsecurity_test::json::get(std::move(entry_result), &entry) + == false) { + continue; + } + + values.push_back(modsecurity_test::json::get_string(entry)); + } + + return values; } +std::vector> json_object_to_map( + modsecurity_test::json::JsonValue value) { + modsecurity_test::json::JsonObject object; + std::vector> values; + + if (modsecurity_test::json::get(value.get_object(), &object) == false) { + return values; + } + + for (auto field_result : object) { + modsecurity_test::json::JsonField field; + std::string_view key; + modsecurity_test::json::JsonValue child; -inline std::vector RegressionTest::yajl_array_to_vec_str( - const yajl_val &node) { - std::vector vec; - for (int z = 0; z < node->u.array.len; z++) { - yajl_val val3 = node->u.array.values[z]; - const char *key = YAJL_GET_STRING(val3); - vec.push_back(key); + if (modsecurity_test::json::get(field_result, &field) + == false) { + continue; + } + if (modsecurity_test::json::get(field.unescaped_key(), &key) == false) { + continue; + } + child = field.value(); + + values.emplace_back(std::string(key), + modsecurity_test::json::get_string(child)); } - return vec; + + return values; } +template +void for_each_json_field(modsecurity_test::json::JsonValue value, + Callback callback) { + modsecurity_test::json::JsonObject object; -inline std::vector> - RegressionTest::yajl_array_to_map(const yajl_val &node) { - std::vector> vec; - for (int z = 0; z < node->u.object.len; z++) { - const char *key = node->u.object.keys[z]; - yajl_val val3 = node->u.object.values[z]; - const char *value = YAJL_GET_STRING(val3); - std::pair a(key, value); - vec.push_back(a); + if (modsecurity_test::json::get(value.get_object(), &object) == false) { + return; + } + + for (auto field_result : object) { + modsecurity_test::json::JsonField field; + std::string_view key; + modsecurity_test::json::JsonValue child; + + if (modsecurity_test::json::get(field_result, &field) == false) { + continue; + } + if (modsecurity_test::json::get(field.unescaped_key(), &key) == false) { + continue; + } + + child = field.value(); + callback(key, child); } - return vec; } -static inline void set_int_from_yajl(int &dest, std::string_view want_key, std::string_view key, const yajl_val &val) { +void set_int_from_json(int &dest, std::string_view want_key, + std::string_view key, modsecurity_test::json::JsonValue value) { if (key == want_key) { - dest = YAJL_GET_INTEGER(val); + dest = static_cast(modsecurity_test::json::get_integer(value)); } } -static inline void set_opt_int_from_yajl(std::optional &dest, std::string_view want_key, std::string_view key, const yajl_val &val) { +void set_opt_int_from_json(std::optional &dest, std::string_view want_key, + std::string_view key, modsecurity_test::json::JsonValue value) { if (key == want_key) { - dest = YAJL_GET_INTEGER(val); + dest = static_cast(modsecurity_test::json::get_integer(value)); } } -static inline void set_string_from_yajl(std::string &dest, std::string_view want_key, std::string_view key, const yajl_val &val) { +void set_string_from_json(std::string &dest, std::string_view want_key, + std::string_view key, modsecurity_test::json::JsonValue value) { if (key == want_key) { - dest = YAJL_GET_STRING(val); + dest = modsecurity_test::json::get_string(value); } } -std::unique_ptr RegressionTest::from_yajl_node(const yajl_val &node) { - size_t nelem = node->u.object.len; - auto u = std::make_unique(); - u->http_code = 200; - - for (int i = 0; i < nelem; i++) { - const char *key = node->u.object.keys[ i ]; - yajl_val val = node->u.object.values[ i ]; - - set_int_from_yajl(u->enabled, "enabled", key, val); - set_int_from_yajl(u->version_min, "version_min", key, val); - set_opt_int_from_yajl(u->version_max, "version_max", key, val); - set_string_from_yajl(u->title, "title", key, val); - set_string_from_yajl(u->url, "url", key, val); - set_string_from_yajl(u->resource, "resource", key, val); - set_opt_int_from_yajl(u->github_issue, "github_issue", key, val); - if (strcmp(key, "client") == 0) { - u->update_client_from_yajl_node(val); - } - if (strcmp(key, "server") == 0) { - u->update_server_from_yajl_node(val); - } - if (strcmp(key, "request") == 0) { - u->update_request_from_yajl_node(val); - } - if (strcmp(key, "response") == 0) { - u->update_response_from_yajl_node(val); - } - if (strcmp(key, "expected") == 0) { - u->update_expected_from_yajl_node(val); - } - if (strcmp(key, "rules") == 0) { - u->update_rules_from_yajl_node(val); - } +std::unique_ptr make_empty_regression_test() { + auto test = std::make_unique(); + test->enabled = 0; + test->version_min = 0; + test->clientPort = 0; + test->serverPort = 0; + test->http_code = 200; + return test; +} + +void append_headers(modsecurity::utils::JsonWriter *writer, + const std::vector> &headers) { + writer->start_object(); + for (const auto &[name, value] : headers) { + writer->key(name); + writer->string(value); } + writer->end_object(); +} - u->name = u->title; +void append_string_array(modsecurity::utils::JsonWriter *writer, + std::vector values) { + if (values.empty()) { + values.emplace_back(""); + } - return u; + writer->start_array(); + for (const auto &value : values) { + writer->string(value); + } + writer->end_array(); } -void RegressionTest::update_client_from_yajl_node(const yajl_val &val) { - for (int j = 0; j < val->u.object.len; j++) { - const char *key2 = val->u.object.keys[j]; - yajl_val val2 = val->u.object.values[j]; +} // namespace - set_string_from_yajl(clientIp, "ip", key2, val2); - set_int_from_yajl(clientPort, "port", key2, val2); - } +std::string RegressionTest::print() { + std::stringstream i; + +#if 0 + i << KRED << "Test failed." << RESET << " From: " \ + i << this->filename << std::endl; + i << "{" << std::endl; + i << " \"ret\": \"" << this->ret << "\"" << std::endl; + i << " \"type\": \"" << this->type << "\"" << std::endl; + i << " \"name\": \"" << this->name << "\"" << std::endl; + i << " \"input\": \"" << this->input << "\"" << std::endl; + i << " \"param\": \"" << this->param << "\"" << std::endl; + i << "}" << std::endl; + i << "Expecting: " << this->ret << " - operator returned: " << \ + this->obtained << std::endl; +#endif + return i.str(); } -void RegressionTest::update_server_from_yajl_node(const yajl_val &val) { - for (int j = 0; j < val->u.object.len; j++) { - const char *key2 = val->u.object.keys[j]; - yajl_val val2 = val->u.object.values[j]; +std::unique_ptr RegressionTest::from_json_document( + const modsecurity_test::json::JsonDocument *document) { + modsecurity_test::json::JsonValue root; - set_string_from_yajl(serverIp, "ip", key2, val2); - set_int_from_yajl(serverPort, "port", key2, val2); - set_string_from_yajl(hostname, "hostname", key2, val2); + if (modsecurity_test::json::get(document->get_value(), &root) == false) { + return make_empty_regression_test(); } -} -void RegressionTest::update_request_from_yajl_node(const yajl_val &val) { - for (int j = 0; j < val->u.object.len; j++) { - const char *key2 = val->u.object.keys[j]; - yajl_val val2 = val->u.object.values[j]; + modsecurity_test::json::JsonType type; + if (modsecurity_test::json::get(root.type(), &type) == false) { + return make_empty_regression_test(); + } - set_string_from_yajl(uri, "uri", key2, val2); - set_string_from_yajl(method, "method", key2, val2); - if (strcmp(key2, "http_version") == 0) { - httpVersion = YAJL_GET_NUMBER(val2); + if (type == modsecurity_test::json::JsonType::Array) { + modsecurity_test::json::JsonArray tests; + if (modsecurity_test::json::get(root.get_array(), &tests) == false) { + return make_empty_regression_test(); } - if (strcmp(key2, "headers") == 0) { - request_headers = yajl_array_to_map(val2); - } - if (strcmp(key2, "body") == 0) { - request_body = yajl_array_to_str(val2); - request_body_lines = yajl_array_to_vec_str(val2); + + for (auto test_result : tests) { + modsecurity_test::json::JsonValue test; + if (modsecurity_test::json::get(std::move(test_result), &test) + == false) { + continue; + } + + return from_json_value(test); } + + return make_empty_regression_test(); } + + return from_json_value(root); +} + +std::unique_ptr RegressionTest::from_json_value( + modsecurity_test::json::JsonValue value) { + auto test = make_empty_regression_test(); + + for_each_json_field(value, [&test](std::string_view key, + modsecurity_test::json::JsonValue child) { + set_int_from_json(test->enabled, "enabled", key, child); + set_int_from_json(test->version_min, "version_min", key, child); + set_opt_int_from_json(test->version_max, "version_max", key, child); + set_string_from_json(test->title, "title", key, child); + set_string_from_json(test->url, "url", key, child); + set_string_from_json(test->resource, "resource", key, child); + set_opt_int_from_json(test->github_issue, "github_issue", key, child); + + if (key == "client") { + test->update_client_from_json_value(child); + } else if (key == "server") { + test->update_server_from_json_value(child); + } else if (key == "request") { + test->update_request_from_json_value(child); + } else if (key == "response") { + test->update_response_from_json_value(child); + } else if (key == "expected") { + test->update_expected_from_json_value(child); + } else if (key == "rules") { + test->update_rules_from_json_value(child); + } + }); + + test->name = test->title; + return test; +} + +void RegressionTest::update_client_from_json_value( + modsecurity_test::json::JsonValue value) { + for_each_json_field(value, [this](std::string_view key, + modsecurity_test::json::JsonValue child) { + set_string_from_json(clientIp, "ip", key, child); + set_int_from_json(clientPort, "port", key, child); + }); } -void RegressionTest::update_response_from_yajl_node(const yajl_val &val) { - for (int j = 0; j < val->u.object.len; j++) { - const char *key2 = val->u.object.keys[j]; - yajl_val val2 = val->u.object.values[j]; +void RegressionTest::update_server_from_json_value( + modsecurity_test::json::JsonValue value) { + for_each_json_field(value, [this](std::string_view key, + modsecurity_test::json::JsonValue child) { + set_string_from_json(serverIp, "ip", key, child); + set_int_from_json(serverPort, "port", key, child); + set_string_from_json(hostname, "hostname", key, child); + }); +} - if (strcmp(key2, "headers") == 0) { - response_headers = yajl_array_to_map(val2); +void RegressionTest::update_request_from_json_value( + modsecurity_test::json::JsonValue value) { + for_each_json_field(value, [this](std::string_view key, + modsecurity_test::json::JsonValue child) { + set_string_from_json(uri, "uri", key, child); + set_string_from_json(method, "method", key, child); + if (key == "http_version") { + httpVersion = modsecurity_test::json::get_raw_number(child); + } else if (key == "headers") { + request_headers = json_object_to_map(child); + } else if (key == "body") { + request_body_lines = json_array_to_vec_string(child); + request_body = join_strings(request_body_lines); } - if (strcmp(key2, "body") == 0) { - response_body = yajl_array_to_str(val2); - response_body_lines = yajl_array_to_vec_str(val2); + }); +} + +void RegressionTest::update_response_from_json_value( + modsecurity_test::json::JsonValue value) { + for_each_json_field(value, [this](std::string_view key, + modsecurity_test::json::JsonValue child) { + if (key == "headers") { + response_headers = json_object_to_map(child); + } else if (key == "body") { + response_body_lines = json_array_to_vec_string(child); + response_body = join_strings(response_body_lines); } - set_string_from_yajl(response_protocol, "protocol", key2, val2); - } + set_string_from_json(response_protocol, "protocol", key, child); + }); } -void RegressionTest::update_expected_from_yajl_node(const yajl_val &val) { - for (int j = 0; j < val->u.object.len; j++) { - const char *key2 = val->u.object.keys[j]; - yajl_val val2 = val->u.object.values[j]; - - set_string_from_yajl(audit_log, "audit_log", key2, val2); - set_string_from_yajl(debug_log, "debug_log", key2, val2); - set_string_from_yajl(error_log, "error_log", key2, val2); - set_int_from_yajl(http_code, "http_code", key2, val2); - set_string_from_yajl(redirect_url, "redirect_url", key2, val2); - set_string_from_yajl(parser_error, "parser_error", key2, val2); - } +void RegressionTest::update_expected_from_json_value( + modsecurity_test::json::JsonValue value) { + for_each_json_field(value, [this](std::string_view key, + modsecurity_test::json::JsonValue child) { + set_string_from_json(audit_log, "audit_log", key, child); + set_string_from_json(debug_log, "debug_log", key, child); + set_string_from_json(error_log, "error_log", key, child); + set_int_from_json(http_code, "http_code", key, child); + set_string_from_json(redirect_url, "redirect_url", key, child); + set_string_from_json(parser_error, "parser_error", key, child); + }); } -void RegressionTest::update_rules_from_yajl_node(const yajl_val &val) { - std::stringstream si; - for (int j = 0; j < val->u.array.len; j++) { - yajl_val val2 = val->u.array.values[ j ]; - const char *keyj = YAJL_GET_STRING(val2); - si << keyj << "\n"; +void RegressionTest::update_rules_from_json_value( + modsecurity_test::json::JsonValue value) { + std::stringstream stream; + + rules_lines = json_array_to_vec_string(value); + for (const auto &line : rules_lines) { + stream << line << "\n"; } - rules = si.str(); - rules_lines = yajl_array_to_vec_str(val); -} + rules = stream.str(); +} constexpr char ascii_tolower(char c) { return 'A' <= c && c <= 'Z' ? (c + ('a' - 'A')) : c; @@ -239,15 +348,18 @@ bool iequals_ascii(std::string_view a, std::string_view b) { }); } -static bool has_chunked_header(const std::vector> &headers) { +static bool has_chunked_header( + const std::vector> &headers) { return std::any_of(std::begin(headers), std::end(headers), [](const auto &header) { const auto &[name, value]{header}; - return iequals_ascii(name, "Transfer-Encoding") && iequals_ascii(value, "chunked"); + return iequals_ascii(name, "Transfer-Encoding") + && iequals_ascii(value, "chunked"); }); } -static void update_content_length(std::vector> &headers, size_t length) { +static void update_content_length( + std::vector> &headers, size_t length) { if (has_chunked_header(headers)) { return; } @@ -260,7 +372,8 @@ static void update_content_length(std::vector RegressionTests::from_yajl_node(const yajl_val &node) { - auto u = std::make_unique(); - size_t num_tests = node->u.array.len; - for (int i = 0; i < num_tests; i++) { - yajl_val obj = node->u.array.values[i]; - u->tests.emplace_back(std::move(RegressionTest::from_yajl_node(obj))); - } - return u; -} +std::unique_ptr RegressionTests::from_json_document( + const modsecurity_test::json::JsonDocument *document) { + modsecurity_test::json::JsonValue root; -void RegressionTests::update_content_lengths() { - for (auto & test : tests) { - test->update_content_lengths(); + if (modsecurity_test::json::get(document->get_value(), &root) == false) { + return std::make_unique(); } -} - -#ifdef WITH_YAJL -static yajl_gen_status gen_string_view(yajl_gen g, std::string_view s) { - return yajl_gen_string(g, reinterpret_cast(s.data()), s.length()); + return from_json_value(root); } -static yajl_gen_status gen_key_str(yajl_gen g, std::string_view key, std::string_view val) { - if (auto s{gen_string_view(g, key)}; s != yajl_gen_status_ok) { - return s; - } - return gen_string_view(g, val); -} +std::unique_ptr RegressionTests::from_json_value( + modsecurity_test::json::JsonValue value) { + auto tests = std::make_unique(); + modsecurity_test::json::JsonType type; -static yajl_gen_status gen_key_str_if_non_empty(yajl_gen g, std::string_view key, std::string_view val) { - if (val.empty()) { - return yajl_gen_status_ok; + if (modsecurity_test::json::get(value.type(), &type) == false) { + return tests; } - return gen_key_str(g, key, val); -} -static yajl_gen_status gen_key_int(yajl_gen g, std::string_view key, int val) { - if (auto s{gen_string_view(g, key)}; s != yajl_gen_status_ok) { - return s; - } - return yajl_gen_integer(g, val); -} + if (type == modsecurity_test::json::JsonType::Array) { + modsecurity_test::json::JsonArray array; -static yajl_gen_status gen_key_opt_int(yajl_gen g, std::string_view key, std::optional val) { - if (!val.has_value()) { - return yajl_gen_status_ok; - } - return gen_key_int(g, key, val.value()); -} + if (modsecurity_test::json::get(value.get_array(), &array) == false) { + return tests; + } -static yajl_gen_status gen_key_int_if_non_zero(yajl_gen g, std::string_view key, int val) { - if (val == 0) { - return yajl_gen_status_ok; + for (auto test_result : array) { + modsecurity_test::json::JsonValue test_value; + if (modsecurity_test::json::get(std::move(test_result), &test_value) + == false) { + continue; + } + tests->tests.emplace_back( + RegressionTest::from_json_value(test_value)); + } + return tests; } - return gen_key_int(g, key, val); -} -static yajl_gen_status gen_key_number(yajl_gen g, std::string_view key, std::string_view raw_val) { - if (auto s{gen_string_view(g, key)}; s != yajl_gen_status_ok) { - return s; + if (type == modsecurity_test::json::JsonType::Object) { + tests->tests.emplace_back(RegressionTest::from_json_value(value)); } - return yajl_gen_number(g, reinterpret_cast(raw_val.data()), raw_val.length()); -} -static yajl_gen_status gen_key_str_array(yajl_gen g, std::string_view key, const std::vector &lines) { - if (auto s{gen_string_view(g, key)}; s != yajl_gen_status_ok) { - return s; - } - if (auto s{yajl_gen_array_open(g)}; s != yajl_gen_status_ok) { - return s; - } - for (const auto &line : lines) { - if (auto s{gen_string_view(g, line)}; s != yajl_gen_status_ok) { - return s; - } - } - return yajl_gen_array_close(g); + return tests; } -static yajl_gen_status gen_key_headers(yajl_gen g, std::string_view key, const std::vector> &headers) { - if (auto s{gen_string_view(g, key)}; s != yajl_gen_status_ok) { - return s; - } - if (auto s{yajl_gen_map_open(g)}; s != yajl_gen_status_ok) { - return s; - } - for (const auto &[name, value] : headers) { - if (auto s{gen_key_str(g, name, value)}; s != yajl_gen_status_ok) { - return s; - } +void RegressionTests::update_content_lengths() { + for (auto &test : tests) { + test->update_content_lengths(); } - return yajl_gen_map_close(g); } std::string RegressionTests::toJSON() const { - const unsigned char *buf; - size_t len; - yajl_gen g; - - g = yajl_gen_alloc(NULL); - if (g == NULL) { - return ""; - } - yajl_gen_config(g, yajl_gen_beautify, 1); - yajl_gen_config(g, yajl_gen_indent_string, " "); - - yajl_gen_array_open(g); - for (const auto &t : tests) { - yajl_gen_map_open(g); - gen_key_int(g, "enabled", t->enabled); - gen_key_int(g, "version_min", t->version_min); - gen_key_opt_int(g, "version_max", t->version_max); - gen_key_str(g, "title", t->title); - gen_key_str_if_non_empty(g, "url", t->url); - gen_key_str_if_non_empty(g, "resource", t->resource); - gen_key_opt_int(g, "github_issue", t->github_issue); - - gen_string_view(g, "client"); - yajl_gen_map_open(g); - gen_key_str(g, "ip", t->clientIp); - gen_key_int(g, "port", t->clientPort); - yajl_gen_map_close(g); - - gen_string_view(g, "server"); - yajl_gen_map_open(g); - gen_key_str(g, "ip", t->serverIp); - gen_key_int(g, "port", t->serverPort); - yajl_gen_map_close(g); - - gen_string_view(g, "request"); - yajl_gen_map_open(g); - gen_key_headers(g, "headers", t->request_headers); - gen_key_str(g, "uri", t->uri); - gen_key_str(g, "method", t->method); - if (!t->httpVersion.empty()) { - gen_key_number(g, "http_version", t->httpVersion); + modsecurity::utils::JsonWriter writer(true, " "); + + const auto addString = [&writer](std::string_view key, + const std::string &value) { + writer.key(key); + writer.string(value); + }; + const auto addStringIfNonEmpty = [&addString]( + std::string_view key, const std::string &value) { + if (value.empty() == false) { + addString(key, value); } - - auto request_body_lines{t->request_body_lines}; - if (request_body_lines.empty()) { - request_body_lines.emplace_back(""); + }; + const auto addInteger = [&writer](std::string_view key, int value) { + writer.key(key); + writer.integer(value); + }; + const auto addOptionalInteger = [&writer](std::string_view key, + const std::optional &value) { + if (value.has_value()) { + writer.key(key); + writer.integer(value.value()); } - gen_key_str_array(g, "body", request_body_lines); - - yajl_gen_map_close(g); - - gen_string_view(g, "response"); - yajl_gen_map_open(g); - gen_key_headers(g, "headers", t->response_headers); + }; - auto response_body_lines{t->response_body_lines}; - if (response_body_lines.empty()) { - response_body_lines.emplace_back(""); + writer.start_array(); + for (const auto &t : tests) { + writer.start_object(); + addInteger("enabled", t->enabled); + addInteger("version_min", t->version_min); + addOptionalInteger("version_max", t->version_max); + addString("title", t->title); + addStringIfNonEmpty("url", t->url); + addStringIfNonEmpty("resource", t->resource); + addOptionalInteger("github_issue", t->github_issue); + + writer.key("client"); + writer.start_object(); + addString("ip", t->clientIp); + addInteger("port", t->clientPort); + writer.end_object(); + + writer.key("server"); + writer.start_object(); + addString("ip", t->serverIp); + addInteger("port", t->serverPort); + writer.end_object(); + + writer.key("request"); + writer.start_object(); + writer.key("headers"); + append_headers(&writer, t->request_headers); + addString("uri", t->uri); + addString("method", t->method); + if (!t->httpVersion.empty()) { + writer.key("http_version"); + writer.number(t->httpVersion); } - gen_key_str_array(g, "body", response_body_lines); - - gen_key_str_if_non_empty(g, "protocol", t->response_protocol); - yajl_gen_map_close(g); - - gen_string_view(g, "expected"); - yajl_gen_map_open(g); - gen_key_str_if_non_empty(g, "audit_log", t->audit_log); - gen_key_str_if_non_empty(g, "debug_log", t->debug_log); - gen_key_str_if_non_empty(g, "error_log", t->error_log); - gen_key_int(g, "http_code", t->http_code); - gen_key_str_if_non_empty(g, "redirect_url", t->redirect_url); - gen_key_str_if_non_empty(g, "parser_error", t->parser_error); - yajl_gen_map_close(g); - gen_key_str_array(g, "rules", t->rules_lines); - - yajl_gen_map_close(g); - } - yajl_gen_array_close(g); - - yajl_gen_get_buf(g, &buf, &len); - std::string s{reinterpret_cast(buf), len}; - yajl_gen_free(g); - return s; + writer.key("body"); + append_string_array(&writer, t->request_body_lines); + writer.end_object(); + + writer.key("response"); + writer.start_object(); + writer.key("headers"); + append_headers(&writer, t->response_headers); + writer.key("body"); + append_string_array(&writer, t->response_body_lines); + addStringIfNonEmpty("protocol", t->response_protocol); + writer.end_object(); + + writer.key("expected"); + writer.start_object(); + addStringIfNonEmpty("audit_log", t->audit_log); + addStringIfNonEmpty("debug_log", t->debug_log); + addStringIfNonEmpty("error_log", t->error_log); + addInteger("http_code", t->http_code); + addStringIfNonEmpty("redirect_url", t->redirect_url); + addStringIfNonEmpty("parser_error", t->parser_error); + writer.end_object(); + + writer.key("rules"); + append_string_array(&writer, t->rules_lines); + + writer.end_object(); + } + writer.end_array(); + + return writer.to_string(); } -#endif // WITH_YAJL - } // namespace modsecurity_test diff --git a/test/regression/regression_test.h b/test/regression/regression_test.h index 0271482f96..1ff13ac10d 100644 --- a/test/regression/regression_test.h +++ b/test/regression/regression_test.h @@ -13,8 +13,6 @@ * */ -#include - #include #include #include @@ -24,6 +22,8 @@ #include #include +#include "test/common/json.h" + #ifndef TEST_REGRESSION_REGRESSION_TEST_H_ #define TEST_REGRESSION_REGRESSION_TEST_H_ @@ -32,7 +32,10 @@ namespace modsecurity_test { class RegressionTest { public: - static std::unique_ptr from_yajl_node(const yajl_val &); + static std::unique_ptr from_json_document( + const modsecurity_test::json::JsonDocument *document); + static std::unique_ptr from_json_value( + modsecurity_test::json::JsonValue value); static std::string print(); std::string filename; @@ -69,12 +72,6 @@ class RegressionTest { std::string uri; std::string resource; - static inline std::string yajl_array_to_str(const yajl_val &node); - static inline std::vector yajl_array_to_vec_str( - const yajl_val &node); - static inline std::vector> - yajl_array_to_map(const yajl_val &node); - int http_code; std::string redirect_url; @@ -86,17 +83,20 @@ class RegressionTest { void update_content_lengths(); private: - void update_client_from_yajl_node(const yajl_val &val); - void update_server_from_yajl_node(const yajl_val &val); - void update_request_from_yajl_node(const yajl_val &val); - void update_response_from_yajl_node(const yajl_val &val); - void update_expected_from_yajl_node(const yajl_val &val); - void update_rules_from_yajl_node(const yajl_val &val); + void update_client_from_json_value(modsecurity_test::json::JsonValue value); + void update_server_from_json_value(modsecurity_test::json::JsonValue value); + void update_request_from_json_value(modsecurity_test::json::JsonValue value); + void update_response_from_json_value(modsecurity_test::json::JsonValue value); + void update_expected_from_json_value(modsecurity_test::json::JsonValue value); + void update_rules_from_json_value(modsecurity_test::json::JsonValue value); }; class RegressionTests { public: - static std::unique_ptr from_yajl_node(const yajl_val &); + static std::unique_ptr from_json_document( + const modsecurity_test::json::JsonDocument *document); + static std::unique_ptr from_json_value( + modsecurity_test::json::JsonValue value); void update_content_lengths(); std::string toJSON() const; diff --git a/test/run-json-backend-matrix.sh b/test/run-json-backend-matrix.sh new file mode 100755 index 0000000000..0a9c5f64d7 --- /dev/null +++ b/test/run-json-backend-matrix.sh @@ -0,0 +1,206 @@ +#!/usr/bin/env bash + +set -u + +usage() { + cat <<'EOF' +Usage: test/run-json-backend-matrix.sh [--build-root DIR] [--jobs N] [--configure-extra "ARGS"] [--keep-build-dirs] + +Exit codes: + 0 success + 10 configure failure + 11 build failure + 12 test failure + 13 backend result difference + 64 invalid usage +EOF + return +} + +repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +build_root="${repo_root}/build-json-backend-matrix" +jobs="$(getconf _NPROCESSORS_ONLN 2>/dev/null || printf '1')" +configure_extra="" +keep_build_dirs=0 + +while [[ "$#" -gt 0 ]]; do + case "$1" in + --build-root) + shift + [[ "$#" -gt 0 ]] || { usage; exit 64; } + build_root="$1" + ;; + --jobs) + shift + [[ "$#" -gt 0 ]] || { usage; exit 64; } + jobs="$1" + ;; + --configure-extra) + shift + [[ "$#" -gt 0 ]] || { usage; exit 64; } + configure_extra="$1" + ;; + --keep-build-dirs) + keep_build_dirs=1 + ;; + -h|--help) + usage + exit 0 + ;; + *) + usage + exit 64 + ;; + esac + shift +done + +declare -a configure_extra_args=() +if [[ -n "${configure_extra}" ]]; then + read -r -a configure_extra_args <<< "${configure_extra}" +fi + +readonly test_files=( + "test/test-cases/regression/request-body-parser-json.json" + "test/test-cases/regression/request-body-parser-json-backend-edgecases.json" +) + +mkdir -p "${build_root}/logs" + +extract_summary() { + local backend="$1" + local input_log="$2" + local output_tsv="$3" + + awk -v backend="${backend}" ' + match($0, /^:test-result:[[:space:]]+([^[:space:]]+)[[:space:]]+([^:]+):(.*)$/, m) { + print backend "\t" m[2] "\t" m[3] "\t" m[1]; + } + ' "${input_log}" > "${output_tsv}" + return +} + +run_backend() { + local backend="$1" + local build_dir="${build_root}/${backend}" + local raw_log="${build_root}/logs/${backend}.log" + local summary_file="${build_root}/logs/${backend}.summary.tsv" + local configure_status=0 + local build_status=0 + local test_status=0 + + if [[ "${keep_build_dirs}" -eq 0 ]]; then + rm -rf "${build_dir}" + fi + mkdir -p "${build_dir}" + : > "${raw_log}" + + ( + cd "${build_dir}" && \ + "${repo_root}/configure" \ + --with-json-backend="${backend}" \ + "${configure_extra_args[@]}" + ) >> "${raw_log}" 2>&1 + configure_status=$? + if [[ "${configure_status}" -ne 0 ]]; then + return 10 + fi + + ( + cd "${build_dir}" && \ + make -j "${jobs}" -C others + ) >> "${raw_log}" 2>&1 + build_status=$? + if [[ "${build_status}" -ne 0 ]]; then + return 11 + fi + + ( + cd "${build_dir}" && \ + make -j "${jobs}" -C src libmodsecurity.la + ) >> "${raw_log}" 2>&1 + build_status=$? + if [[ "${build_status}" -ne 0 ]]; then + return 11 + fi + + ( + cd "${build_dir}" && \ + make -j "${jobs}" -C test regression_tests json_backend_depth_tests + ) >> "${raw_log}" 2>&1 + build_status=$? + if [[ "${build_status}" -ne 0 ]]; then + return 11 + fi + + for test_file in "${test_files[@]}"; do + ( + cd "${build_dir}/test" && \ + ./regression_tests automake "${repo_root}/${test_file}" + ) >> "${raw_log}" 2>&1 + if [[ "$?" -ne 0 ]]; then + test_status=1 + fi + done + + ( + cd "${build_dir}/test" && \ + ./json_backend_depth_tests + ) >> "${raw_log}" 2>&1 + if [[ "$?" -ne 0 ]]; then + test_status=1 + fi + + extract_summary "${backend}" "${raw_log}" "${summary_file}" + if [[ ! -s "${summary_file}" ]]; then + test_status=1 + fi + if awk -F '\t' '$4 != "PASS" {exit 1}' "${summary_file}"; then + : + else + test_status=1 + fi + + if [[ "${test_status}" -ne 0 ]]; then + return 12 + fi + return 0 +} + +backend_test_failure=0 +for backend in simdjson jsoncons; do + run_backend "${backend}" + status=$? + case "${status}" in + 0) + ;; + 10|11) + exit "${status}" + ;; + 12) + backend_test_failure=1 + ;; + *) + exit "${status}" + ;; + esac +done + +cat "${build_root}/logs/simdjson.summary.tsv" \ + "${build_root}/logs/jsoncons.summary.tsv" \ + > "${build_root}/summary.tsv" + +if diff -u \ + <(cut -f 2- "${build_root}/logs/simdjson.summary.tsv" | sort) \ + <(cut -f 2- "${build_root}/logs/jsoncons.summary.tsv" | sort) \ + > "${build_root}/logs/backend-diff.log"; then + : +else + exit 13 +fi + +if [[ "${backend_test_failure}" -ne 0 ]]; then + exit 12 +fi + +exit 0 diff --git a/test/test-cases/regression/request-body-parser-json-backend-edgecases.json b/test/test-cases/regression/request-body-parser-json-backend-edgecases.json new file mode 100644 index 0000000000..03c87a0348 --- /dev/null +++ b/test/test-cases/regression/request-body-parser-json-backend-edgecases.json @@ -0,0 +1,857 @@ +[ + { + "enabled": 1, + "version_min": 300000, + "title": "Testing JSON request body parser - root scalar string", + "client": { + "ip": "200.249.12.31", + "port": 123 + }, + "server": { + "ip": "200.249.12.31", + "port": 80 + }, + "request": { + "headers": { + "Host": "localhost", + "User-Agent": "curl/7.38.0", + "Accept": "*/*", + "Content-Type": "application/json", + "Content-Length": "7" + }, + "uri": "/", + "method": "POST", + "body": [ + "\"hello\"" + ] + }, + "response": { + "headers": { + "Content-Length": "0" + }, + "body": [ + "" + ] + }, + "expected": { + "debug_log": "Target value: \"hello\" \\(Variable: ARGS:json\\)", + "http_code": 200 + }, + "rules": [ + "SecRuleEngine On", + "SecRequestBodyAccess On", + "SecRule REQUEST_HEADERS:Content-Type \"application/json\" \"id:'210101',phase:1,t:none,t:lowercase,pass,nolog,ctl:requestBodyProcessor=JSON\"", + "SecRule ARGS:json \"^hello$\" \"id:'210102',phase:3,log\"" + ] + }, + { + "enabled": 1, + "version_min": 300000, + "title": "Testing JSON request body parser - root scalar zero", + "client": { + "ip": "200.249.12.31", + "port": 123 + }, + "server": { + "ip": "200.249.12.31", + "port": 80 + }, + "request": { + "headers": { + "Host": "localhost", + "User-Agent": "curl/7.38.0", + "Accept": "*/*", + "Content-Type": "application/json", + "Content-Length": "1" + }, + "uri": "/", + "method": "POST", + "body": [ + "0" + ] + }, + "response": { + "headers": { + "Content-Length": "0" + }, + "body": [ + "" + ] + }, + "expected": { + "debug_log": "Target value: \"0\" \\(Variable: ARGS:json\\)", + "http_code": 200 + }, + "rules": [ + "SecRuleEngine On", + "SecRequestBodyAccess On", + "SecRule REQUEST_HEADERS:Content-Type \"application/json\" \"id:'210131',phase:1,t:none,t:lowercase,pass,nolog,ctl:requestBodyProcessor=JSON\"", + "SecRule ARGS:json \"^0$\" \"id:'210132',phase:3,log\"" + ] + }, + { + "enabled": 1, + "version_min": 300000, + "title": "Testing JSON request body parser - root scalar decimal number", + "client": { + "ip": "200.249.12.31", + "port": 123 + }, + "server": { + "ip": "200.249.12.31", + "port": 80 + }, + "request": { + "headers": { + "Host": "localhost", + "User-Agent": "curl/7.38.0", + "Accept": "*/*", + "Content-Type": "application/json", + "Content-Length": "3" + }, + "uri": "/", + "method": "POST", + "body": [ + "1.0" + ] + }, + "response": { + "headers": { + "Content-Length": "0" + }, + "body": [ + "" + ] + }, + "expected": { + "debug_log": "Target value: \"1.0\" \\(Variable: ARGS:json\\)", + "http_code": 200 + }, + "rules": [ + "SecRuleEngine On", + "SecRequestBodyAccess On", + "SecRule REQUEST_HEADERS:Content-Type \"application/json\" \"id:'210103',phase:1,t:none,t:lowercase,pass,nolog,ctl:requestBodyProcessor=JSON\"", + "SecRule ARGS:json \"^1.0$\" \"id:'210104',phase:3,log\"" + ] + }, + { + "enabled": 1, + "version_min": 300000, + "title": "Testing JSON request body parser - root scalar scientific number", + "client": { + "ip": "200.249.12.31", + "port": 123 + }, + "server": { + "ip": "200.249.12.31", + "port": 80 + }, + "request": { + "headers": { + "Host": "localhost", + "User-Agent": "curl/7.38.0", + "Accept": "*/*", + "Content-Type": "application/json", + "Content-Length": "3" + }, + "uri": "/", + "method": "POST", + "body": [ + "1e3" + ] + }, + "response": { + "headers": { + "Content-Length": "0" + }, + "body": [ + "" + ] + }, + "expected": { + "debug_log": "Target value: \"1e3\" \\(Variable: ARGS:json\\)", + "http_code": 200 + }, + "rules": [ + "SecRuleEngine On", + "SecRequestBodyAccess On", + "SecRule REQUEST_HEADERS:Content-Type \"application/json\" \"id:'210105',phase:1,t:none,t:lowercase,pass,nolog,ctl:requestBodyProcessor=JSON\"", + "SecRule ARGS:json \"^1e3$\" \"id:'210106',phase:3,log\"" + ] + }, + { + "enabled": 1, + "version_min": 300000, + "title": "Testing JSON request body parser - root scalar negative fraction with exponent", + "client": { + "ip": "200.249.12.31", + "port": 123 + }, + "server": { + "ip": "200.249.12.31", + "port": 80 + }, + "request": { + "headers": { + "Host": "localhost", + "User-Agent": "curl/7.38.0", + "Accept": "*/*", + "Content-Type": "application/json", + "Content-Length": "8" + }, + "uri": "/", + "method": "POST", + "body": [ + "-1.25e-4" + ] + }, + "response": { + "headers": { + "Content-Length": "0" + }, + "body": [ + "" + ] + }, + "expected": { + "debug_log": "Target value: \"-1.25e-4\" \\(Variable: ARGS:json\\)", + "http_code": 200 + }, + "rules": [ + "SecRuleEngine On", + "SecRequestBodyAccess On", + "SecRule REQUEST_HEADERS:Content-Type \"application/json\" \"id:'210133',phase:1,t:none,t:lowercase,pass,nolog,ctl:requestBodyProcessor=JSON\"", + "SecRule ARGS:json \"^-1\\.25e-4$\" \"id:'210134',phase:3,log\"" + ] + }, + { + "enabled": 1, + "version_min": 300000, + "title": "Testing JSON request body parser - root scalar negative zero", + "client": { + "ip": "200.249.12.31", + "port": 123 + }, + "server": { + "ip": "200.249.12.31", + "port": 80 + }, + "request": { + "headers": { + "Host": "localhost", + "User-Agent": "curl/7.38.0", + "Accept": "*/*", + "Content-Type": "application/json", + "Content-Length": "2" + }, + "uri": "/", + "method": "POST", + "body": [ + "-0" + ] + }, + "response": { + "headers": { + "Content-Length": "0" + }, + "body": [ + "" + ] + }, + "expected": { + "debug_log": "Target value: \"-0\" \\(Variable: ARGS:json\\)", + "http_code": 200 + }, + "rules": [ + "SecRuleEngine On", + "SecRequestBodyAccess On", + "SecRule REQUEST_HEADERS:Content-Type \"application/json\" \"id:'210107',phase:1,t:none,t:lowercase,pass,nolog,ctl:requestBodyProcessor=JSON\"", + "SecRule ARGS:json \"^-0$\" \"id:'210108',phase:3,log\"" + ] + }, + { + "enabled": 1, + "version_min": 300000, + "title": "Testing JSON request body parser - root scalar uint64 max", + "client": { + "ip": "200.249.12.31", + "port": 123 + }, + "server": { + "ip": "200.249.12.31", + "port": 80 + }, + "request": { + "headers": { + "Host": "localhost", + "User-Agent": "curl/7.38.0", + "Accept": "*/*", + "Content-Type": "application/json", + "Content-Length": "20" + }, + "uri": "/", + "method": "POST", + "body": [ + "18446744073709551615" + ] + }, + "response": { + "headers": { + "Content-Length": "0" + }, + "body": [ + "" + ] + }, + "expected": { + "debug_log": "Target value: \"18446744073709551615\" \\(Variable: ARGS:json\\)", + "http_code": 200 + }, + "rules": [ + "SecRuleEngine On", + "SecRequestBodyAccess On", + "SecRule REQUEST_HEADERS:Content-Type \"application/json\" \"id:'210135',phase:1,t:none,t:lowercase,pass,nolog,ctl:requestBodyProcessor=JSON\"", + "SecRule ARGS:json \"^18446744073709551615$\" \"id:'210136',phase:3,log\"" + ] + }, + { + "enabled": 1, + "version_min": 300000, + "title": "Testing JSON request body parser - root scalar uint64 overflow", + "client": { + "ip": "200.249.12.31", + "port": 123 + }, + "server": { + "ip": "200.249.12.31", + "port": 80 + }, + "request": { + "headers": { + "Host": "localhost", + "User-Agent": "curl/7.38.0", + "Accept": "*/*", + "Content-Type": "application/json", + "Content-Length": "20" + }, + "uri": "/", + "method": "POST", + "body": [ + "18446744073709551616" + ] + }, + "response": { + "headers": { + "Content-Length": "0" + }, + "body": [ + "" + ] + }, + "expected": { + "debug_log": "Target value: \"18446744073709551616\" \\(Variable: ARGS:json\\)", + "http_code": 200 + }, + "rules": [ + "SecRuleEngine On", + "SecRequestBodyAccess On", + "SecRule REQUEST_HEADERS:Content-Type \"application/json\" \"id:'210137',phase:1,t:none,t:lowercase,pass,nolog,ctl:requestBodyProcessor=JSON\"", + "SecRule ARGS:json \"^18446744073709551616$\" \"id:'210138',phase:3,log\"" + ] + }, + { + "enabled": 1, + "version_min": 300000, + "title": "Testing JSON request body parser - root scalar big integer", + "client": { + "ip": "200.249.12.31", + "port": 123 + }, + "server": { + "ip": "200.249.12.31", + "port": 80 + }, + "request": { + "headers": { + "Host": "localhost", + "User-Agent": "curl/7.38.0", + "Accept": "*/*", + "Content-Type": "application/json", + "Content-Length": "30" + }, + "uri": "/", + "method": "POST", + "body": [ + "123456789012345678901234567890" + ] + }, + "response": { + "headers": { + "Content-Length": "0" + }, + "body": [ + "" + ] + }, + "expected": { + "debug_log": "Target value: \"123456789012345678901234567890\" \\(Variable: ARGS:json\\)", + "http_code": 200 + }, + "rules": [ + "SecRuleEngine On", + "SecRequestBodyAccess On", + "SecRule REQUEST_HEADERS:Content-Type \"application/json\" \"id:'210109',phase:1,t:none,t:lowercase,pass,nolog,ctl:requestBodyProcessor=JSON\"", + "SecRule ARGS:json \"^123456789012345678901234567890$\" \"id:'210110',phase:3,log\"" + ] + }, + { + "enabled": 1, + "version_min": 300000, + "title": "Testing JSON request body parser - root scalar boolean", + "client": { + "ip": "200.249.12.31", + "port": 123 + }, + "server": { + "ip": "200.249.12.31", + "port": 80 + }, + "request": { + "headers": { + "Host": "localhost", + "User-Agent": "curl/7.38.0", + "Accept": "*/*", + "Content-Type": "application/json", + "Content-Length": "5" + }, + "uri": "/", + "method": "POST", + "body": [ + "false" + ] + }, + "response": { + "headers": { + "Content-Length": "0" + }, + "body": [ + "" + ] + }, + "expected": { + "debug_log": "Target value: \"false\" \\(Variable: ARGS:json\\)", + "http_code": 200 + }, + "rules": [ + "SecRuleEngine On", + "SecRequestBodyAccess On", + "SecRule REQUEST_HEADERS:Content-Type \"application/json\" \"id:'210121',phase:1,t:none,t:lowercase,pass,nolog,ctl:requestBodyProcessor=JSON\"", + "SecRule ARGS:json \"^false$\" \"id:'210122',phase:3,log\"" + ] + }, + { + "enabled": 1, + "version_min": 300000, + "title": "Testing JSON request body parser - root scalar null", + "client": { + "ip": "200.249.12.31", + "port": 123 + }, + "server": { + "ip": "200.249.12.31", + "port": 80 + }, + "request": { + "headers": { + "Host": "localhost", + "User-Agent": "curl/7.38.0", + "Accept": "*/*", + "Content-Type": "application/json", + "Content-Length": "4" + }, + "uri": "/", + "method": "POST", + "body": [ + "null" + ] + }, + "response": { + "headers": { + "Content-Length": "0" + }, + "body": [ + "" + ] + }, + "expected": { + "debug_log": "Target value: \"\" \\(Variable: ARGS:json\\)", + "http_code": 200 + }, + "rules": [ + "SecRuleEngine On", + "SecRequestBodyAccess On", + "SecRule REQUEST_HEADERS:Content-Type \"application/json\" \"id:'210123',phase:1,t:none,t:lowercase,pass,nolog,ctl:requestBodyProcessor=JSON\"", + "SecRule ARGS:json \"^$\" \"id:'210124',phase:3,log\"" + ] + }, + { + "enabled": 1, + "version_min": 300000, + "title": "Testing JSON request body parser - nested root array path", + "client": { + "ip": "200.249.12.31", + "port": 123 + }, + "server": { + "ip": "200.249.12.31", + "port": 80 + }, + "request": { + "headers": { + "Host": "localhost", + "User-Agent": "curl/7.38.0", + "Accept": "*/*", + "Content-Type": "application/json", + "Content-Length": "20" + }, + "uri": "/", + "method": "POST", + "body": [ + "[{\"deep\":[\"x\",\"y\"]}]" + ] + }, + "response": { + "headers": { + "Content-Length": "0" + }, + "body": [ + "" + ] + }, + "expected": { + "debug_log": "Target value: \"y\" \\(Variable: ARGS:json.array_0.deep.array_1\\)", + "http_code": 200 + }, + "rules": [ + "SecRuleEngine On", + "SecRequestBodyAccess On", + "SecRule REQUEST_HEADERS:Content-Type \"application/json\" \"id:'210111',phase:1,t:none,t:lowercase,pass,nolog,ctl:requestBodyProcessor=JSON\"", + "SecRule ARGS:json.array_0.deep.array_1 \"^y$\" \"id:'210112',phase:3,log\"" + ] + }, + { + "enabled": 1, + "version_min": 300000, + "title": "Testing JSON request body parser - empty root array", + "client": { + "ip": "200.249.12.31", + "port": 123 + }, + "server": { + "ip": "200.249.12.31", + "port": 80 + }, + "request": { + "headers": { + "Host": "localhost", + "User-Agent": "curl/7.38.0", + "Accept": "*/*", + "Content-Type": "application/json", + "Content-Length": "2" + }, + "uri": "/", + "method": "POST", + "body": [ + "[]" + ] + }, + "response": { + "headers": { + "Content-Length": "0" + }, + "body": [ + "" + ] + }, + "expected": { + "debug_log": "Target value: \"0\" .Variable: REQBODY_ERROR.", + "http_code": 200 + }, + "rules": [ + "SecRuleEngine On", + "SecRequestBodyAccess On", + "SecRule REQUEST_HEADERS:Content-Type \"application/json\" \"id:'210125',phase:1,t:none,t:lowercase,pass,nolog,ctl:requestBodyProcessor=JSON\"", + "SecRule REQBODY_ERROR \"0\" \"id:'210126',phase:3,log\"" + ] + }, + { + "enabled": 1, + "version_min": 300000, + "title": "Testing JSON request body parser - empty root object", + "client": { + "ip": "200.249.12.31", + "port": 123 + }, + "server": { + "ip": "200.249.12.31", + "port": 80 + }, + "request": { + "headers": { + "Host": "localhost", + "User-Agent": "curl/7.38.0", + "Accept": "*/*", + "Content-Type": "application/json", + "Content-Length": "2" + }, + "uri": "/", + "method": "POST", + "body": [ + "{}" + ] + }, + "response": { + "headers": { + "Content-Length": "0" + }, + "body": [ + "" + ] + }, + "expected": { + "debug_log": "Target value: \"0\" .Variable: REQBODY_ERROR.", + "http_code": 200 + }, + "rules": [ + "SecRuleEngine On", + "SecRequestBodyAccess On", + "SecRule REQUEST_HEADERS:Content-Type \"application/json\" \"id:'210113',phase:1,t:none,t:lowercase,pass,nolog,ctl:requestBodyProcessor=JSON\"", + "SecRule REQBODY_ERROR \"0\" \"id:'210114',phase:3,log\"" + ] + }, + { + "enabled": 1, + "version_min": 300000, + "title": "Testing JSON request body parser - root array high index", + "client": { + "ip": "200.249.12.31", + "port": 123 + }, + "server": { + "ip": "200.249.12.31", + "port": 80 + }, + "request": { + "headers": { + "Host": "localhost", + "User-Agent": "curl/7.38.0", + "Accept": "*/*", + "Content-Type": "application/json", + "Content-Length": "21" + }, + "uri": "/", + "method": "POST", + "body": [ + "[0,1,2,3,4,5,6,7,8,9]" + ] + }, + "response": { + "headers": { + "Content-Length": "0" + }, + "body": [ + "" + ] + }, + "expected": { + "debug_log": "Target value: \"9\" \\(Variable: ARGS:json.array_9\\)", + "http_code": 200 + }, + "rules": [ + "SecRuleEngine On", + "SecRequestBodyAccess On", + "SecRule REQUEST_HEADERS:Content-Type \"application/json\" \"id:'210127',phase:1,t:none,t:lowercase,pass,nolog,ctl:requestBodyProcessor=JSON\"", + "SecRule ARGS:json.array_9 \"^9$\" \"id:'210128',phase:3,log\"" + ] + }, + { + "enabled": 1, + "version_min": 300000, + "title": "Testing JSON request body parser - root scalar big exponent", + "client": { + "ip": "200.249.12.31", + "port": 123 + }, + "server": { + "ip": "200.249.12.31", + "port": 80 + }, + "request": { + "headers": { + "Host": "localhost", + "User-Agent": "curl/7.38.0", + "Accept": "*/*", + "Content-Type": "application/json", + "Content-Length": "6" + }, + "uri": "/", + "method": "POST", + "body": [ + "1e1000" + ] + }, + "response": { + "headers": { + "Content-Length": "0" + }, + "body": [ + "" + ] + }, + "expected": { + "debug_log": "Target value: \"1e1000\" \\(Variable: ARGS:json\\)", + "http_code": 200 + }, + "rules": [ + "SecRuleEngine On", + "SecRequestBodyAccess On", + "SecRule REQUEST_HEADERS:Content-Type \"application/json\" \"id:'210129',phase:1,t:none,t:lowercase,pass,nolog,ctl:requestBodyProcessor=JSON\"", + "SecRule ARGS:json \"^1e1000$\" \"id:'210130',phase:3,log\"" + ] + }, + { + "enabled": 1, + "version_min": 300000, + "title": "Testing JSON request body parser - duplicate keys remain addressable", + "client": { + "ip": "200.249.12.31", + "port": 123 + }, + "server": { + "ip": "200.249.12.31", + "port": 80 + }, + "request": { + "headers": { + "Host": "localhost", + "User-Agent": "curl/7.38.0", + "Accept": "*/*", + "Content-Type": "application/json", + "Content-Length": "30" + }, + "uri": "/", + "method": "POST", + "body": [ + "{\"dup\":\"first\",\"dup\":\"second\"}" + ] + }, + "response": { + "headers": { + "Content-Length": "0" + }, + "body": [ + "" + ] + }, + "expected": { + "debug_log": "Target value: \"second\" \\(Variable: ARGS:json.dup\\)", + "http_code": 200 + }, + "rules": [ + "SecRuleEngine On", + "SecRequestBodyAccess On", + "SecRule REQUEST_HEADERS:Content-Type \"application/json\" \"id:'210115',phase:1,t:none,t:lowercase,pass,nolog,ctl:requestBodyProcessor=JSON\"", + "SecRule ARGS:json.dup \"^second$\" \"id:'210116',phase:3,log\"" + ] + }, + { + "enabled": 1, + "version_min": 300000, + "title": "Testing JSON request body parser - truncated body", + "client": { + "ip": "200.249.12.31", + "port": 123 + }, + "server": { + "ip": "200.249.12.31", + "port": 80 + }, + "request": { + "headers": { + "Host": "localhost", + "User-Agent": "curl/7.38.0", + "Accept": "*/*", + "Content-Type": "application/json", + "Content-Length": "7" + }, + "uri": "/", + "method": "POST", + "body": [ + "{\"foo\":" + ] + }, + "response": { + "headers": { + "Content-Length": "0" + }, + "body": [ + "" + ] + }, + "expected": { + "debug_log": "Failed to parse request body", + "http_code": 403 + }, + "rules": [ + "SecRuleEngine On", + "SecRequestBodyAccess On", + "SecRule REQUEST_HEADERS:Content-Type \"application/json\" \"id:'210117',phase:1,t:none,t:lowercase,pass,nolog,ctl:requestBodyProcessor=JSON\"", + "SecRule REQBODY_ERROR \"!@eq 0\" \"id:'210118',phase:2,t:none,log,deny,status:403,msg:'Failed to parse request body.',logdata:'%{reqbody_error_msg}'\"" + ] + }, + { + "enabled": 1, + "version_min": 300000, + "title": "Testing JSON request body parser - invalid unicode escape", + "client": { + "ip": "200.249.12.31", + "port": 123 + }, + "server": { + "ip": "200.249.12.31", + "port": 80 + }, + "request": { + "headers": { + "Host": "localhost", + "User-Agent": "curl/7.38.0", + "Accept": "*/*", + "Content-Type": "application/json", + "Content-Length": "16" + }, + "uri": "/", + "method": "POST", + "body": [ + "{\"bad\":\"\\uD800\"}" + ] + }, + "response": { + "headers": { + "Content-Length": "0" + }, + "body": [ + "" + ] + }, + "expected": { + "debug_log": "Failed to parse request body", + "http_code": 403 + }, + "rules": [ + "SecRuleEngine On", + "SecRequestBodyAccess On", + "SecRule REQUEST_HEADERS:Content-Type \"application/json\" \"id:'210119',phase:1,t:none,t:lowercase,pass,nolog,ctl:requestBodyProcessor=JSON\"", + "SecRule REQBODY_ERROR \"!@eq 0\" \"id:'210120',phase:2,t:none,log,deny,status:403,msg:'Failed to parse request body.',logdata:'%{reqbody_error_msg}'\"" + ] + } +] diff --git a/test/test-suite.in b/test/test-suite.in index 6e8754254b..f16500eb65 100644 --- a/test/test-suite.in +++ b/test/test-suite.in @@ -96,6 +96,7 @@ TESTS+=test/test-cases/regression/operator-verifycpf.json TESTS+=test/test-cases/regression/operator-verifyssn.json TESTS+=test/test-cases/regression/operator-verifysvnr.json TESTS+=test/test-cases/regression/request-body-parser-json.json +TESTS+=test/test-cases/regression/request-body-parser-json-backend-edgecases.json TESTS+=test/test-cases/regression/request-body-parser-multipart-crlf.json TESTS+=test/test-cases/regression/request-body-parser-multipart.json TESTS+=test/test-cases/regression/request-body-parser-xml.json diff --git a/test/unit/json_backend_depth_tests.cc b/test/unit/json_backend_depth_tests.cc new file mode 100644 index 0000000000..ebc76d843a --- /dev/null +++ b/test/unit/json_backend_depth_tests.cc @@ -0,0 +1,487 @@ +/* + * ModSecurity, http://www.modsecurity.org/ + * Copyright (c) 2015 - 2024 Trustwave Holdings, Inc. (http://www.trustwave.com/) + * + * You may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * If any of the files related to licensing are missing or if you have any + * other questions related to licensing please contact Trustwave Holdings, Inc. + * directly using the email address security@modsecurity.org. + * + */ + +#include +#include +#include +#include +#include + +#include "src/request_body_processor/json_adapter.h" + +namespace modsecurity::RequestBodyProcessor { +namespace { + +class AcceptAllSink : public JsonEventSink { + public: + JsonSinkStatus on_start_object() override { + return JsonSinkStatus::Continue; + } + + JsonSinkStatus on_end_object() override { + return JsonSinkStatus::Continue; + } + + JsonSinkStatus on_start_array() override { + return JsonSinkStatus::Continue; + } + + JsonSinkStatus on_end_array() override { + return JsonSinkStatus::Continue; + } + + JsonSinkStatus on_key(std::string_view value) override { + (void) value; + return JsonSinkStatus::Continue; + } + + JsonSinkStatus on_string(std::string_view value) override { + (void) value; + return JsonSinkStatus::Continue; + } + + JsonSinkStatus on_number(std::string_view raw_number) override { + (void) raw_number; + return JsonSinkStatus::Continue; + } + + JsonSinkStatus on_boolean(bool value) override { + (void) value; + return JsonSinkStatus::Continue; + } + + JsonSinkStatus on_null() override { + return JsonSinkStatus::Continue; + } +}; + +class NumberCollectingSink : public AcceptAllSink { + public: + JsonSinkStatus on_number(std::string_view raw_number) override { + numbers.emplace_back(raw_number.data(), raw_number.size()); + return JsonSinkStatus::Continue; + } + + std::vector numbers; +}; + +const char *parseStatusName(JsonParseStatus status) { + switch (status) { + case JsonParseStatus::Ok: + return "Ok"; + case JsonParseStatus::ParseError: + return "ParseError"; + case JsonParseStatus::TruncatedInput: + return "TruncatedInput"; + case JsonParseStatus::Utf8Error: + return "Utf8Error"; + case JsonParseStatus::EngineAbort: + return "EngineAbort"; + case JsonParseStatus::InternalError: + return "InternalError"; + } + + return "UnknownParseStatus"; +} + +const char *sinkStatusName(JsonSinkStatus status) { + switch (status) { + case JsonSinkStatus::Continue: + return "Continue"; + case JsonSinkStatus::EngineAbort: + return "EngineAbort"; + case JsonSinkStatus::DepthLimitExceeded: + return "DepthLimitExceeded"; + case JsonSinkStatus::InternalError: + return "InternalError"; + } + + return "UnknownSinkStatus"; +} + +std::string makeNestedArrayJson(std::size_t depth) { + std::string input(depth, '['); + input.push_back('0'); + input.append(depth, ']'); + return input; +} + +std::string describeUnexpectedResult(const JsonParseResult &result, + const char *expectation) { + std::string detail = std::string("Expected ") + expectation + ", got " + + parseStatusName(result.parse_status) + "/" + + sinkStatusName(result.sink_status) + "."; + if (!result.detail.empty()) { + detail.append(" "); + detail.append(result.detail); + } + return detail; +} + +std::string describeStringList(const std::vector &values) { + std::string description = "["; + + for (std::size_t i = 0; i < values.size(); i++) { + if (i != 0) { + description.append(", "); + } + description.push_back('"'); + description.append(values[i]); + description.push_back('"'); + } + + description.push_back(']'); + return description; +} + +bool expectParseResult(const std::string &input, JsonParseStatus parse_status, + JsonSinkStatus sink_status, const char *expectation, + std::string *failure_detail) { + AcceptAllSink sink; + JSONAdapter adapter; + if (JsonParseResult result = adapter.parse(input, &sink, + JsonBackendParseOptions()); result.parse_status != parse_status + || result.sink_status != sink_status) { + if (failure_detail != nullptr) { + *failure_detail = describeUnexpectedResult(result, expectation); + failure_detail->append(" Input: "); + failure_detail->append(input); + } + return false; + } + + return true; +} + +bool collectNumberLexemes(const std::string &input, + std::vector *numbers, std::string *failure_detail) { + NumberCollectingSink sink; + JSONAdapter adapter; + if (JsonParseResult result = adapter.parse(input, &sink, + JsonBackendParseOptions()); !result.ok()) { + if (failure_detail != nullptr) { + *failure_detail = describeUnexpectedResult(result, "Ok/Continue"); + failure_detail->append(" Input: "); + failure_detail->append(input); + } + return false; + } + + if (numbers != nullptr) { + *numbers = sink.numbers; + } + return true; +} + +bool collectNumberLexemes(std::string *input, std::vector *numbers, + std::string *failure_detail) { + NumberCollectingSink sink; + JSONAdapter adapter; + if (JsonParseResult result = adapter.parse(*input, &sink, + JsonBackendParseOptions()); !result.ok()) { + if (failure_detail != nullptr) { + *failure_detail = describeUnexpectedResult(result, "Ok/Continue"); + failure_detail->append(" Input: "); + failure_detail->append(*input); + } + return false; + } + + if (numbers != nullptr) { + *numbers = sink.numbers; + } + return true; +} + +bool expectNumberLexemes(const char *case_name, const std::string &input, + const std::vector &expected, std::string *failure_detail) { + std::vector actual; + if (!collectNumberLexemes(input, &actual, failure_detail)) { + return false; + } + + if (actual != expected) { + if (failure_detail != nullptr) { + *failure_detail = std::string("Case '") + case_name + + "' expected " + describeStringList(expected) + + ", got " + describeStringList(actual) + "."; + } + return false; + } + + return true; +} + +bool expectBackendDepthLimitParseError(std::string *failure_detail) { + AcceptAllSink sink; + JSONAdapter adapter; + JsonBackendParseOptions options; + + options.technical_max_depth = 2; + if (JsonParseResult result = adapter.parse(makeNestedArrayJson(8), &sink, + options); result.parse_status != JsonParseStatus::ParseError + || result.sink_status != JsonSinkStatus::Continue) { + if (failure_detail != nullptr) { + *failure_detail = describeUnexpectedResult(result, + "ParseError/Continue"); + } + return false; + } + + return true; +} + +bool expectBackendDepthHeadroomSuccess(std::string *failure_detail) { + AcceptAllSink sink; + JSONAdapter adapter; + JsonBackendParseOptions options; + + options.technical_max_depth = 32; + if (JsonParseResult result = adapter.parse(makeNestedArrayJson(8), &sink, + options); !result.ok()) { + if (failure_detail != nullptr) { + *failure_detail = describeUnexpectedResult(result, "Ok/Continue"); + } + return false; + } + + return true; +} + +bool expectEmptyInputSuccess(std::string *failure_detail) { + return expectParseResult("", JsonParseStatus::Ok, + JsonSinkStatus::Continue, "Ok/Continue", failure_detail); +} + +bool expectMalformedInputParseError(std::string *failure_detail) { + return expectParseResult("a", JsonParseStatus::ParseError, + JsonSinkStatus::Continue, "ParseError/Continue", failure_detail); +} + +bool expectTruncatedInputMapsToTruncatedInput(std::string *failure_detail) { + return expectParseResult("{\"key\":", JsonParseStatus::TruncatedInput, + JsonSinkStatus::Continue, "TruncatedInput/Continue", failure_detail); +} + +bool expectExactRootScalarNumberLexemes(std::string *failure_detail) { + struct NumberLexemeCase { + const char *name; + const char *input; + }; + + const std::array cases{{ + {"zero", "0"}, + {"negative_zero", "-0"}, + {"decimal", "1.0"}, + {"scientific", "1e3"}, + {"negative_fraction_with_exponent", "-1.25e-4"}, + {"uint64_max", "18446744073709551615"}, + {"uint64_overflow", "18446744073709551616"}, + {"large_integer", "123456789012345678901234567890"} + }}; + + for (const auto &test_case : cases) { + if (!expectNumberLexemes(test_case.name, test_case.input, + std::vector{test_case.input}, failure_detail)) { + return false; + } + } + + return true; +} + +bool expectExactContainerNumberLexemes(std::string *failure_detail) { + const std::string input = + R"({ "arr" : [ 0 , -0 , 1.0 , 1e3 ], )" + R"("obj" : { "frac" : -1.25e-4 , )" + R"("max" : 18446744073709551615 , )" + R"("over" : 18446744073709551616 , )" + R"("big" : 123456789012345678901234567890 } })"; + + return expectNumberLexemes("container_numbers_with_whitespace_and_boundaries", + input, std::vector{ + "0", + "-0", + "1.0", + "1e3", + "-1.25e-4", + "18446744073709551615", + "18446744073709551616", + "123456789012345678901234567890" + }, failure_detail); +} + +#if defined(MSC_JSON_BACKEND_SIMDJSON) +bool isJsonWhitespace(char value) { + return value == ' ' || value == '\t' || value == '\n' || value == '\r'; +} + +bool expectMutableSimdjsonPathPreservesLogicalInput( + std::string *failure_detail) { + std::string input("{\"n\":1}"); + const std::string original = input; + std::vector numbers; + + input.shrink_to_fit(); + if (!collectNumberLexemes(&input, &numbers, failure_detail)) { + return false; + } + + if (numbers != std::vector{"1"}) { + if (failure_detail != nullptr) { + *failure_detail = std::string("Expected [\"1\"], got ") + + describeStringList(numbers) + "."; + } + return false; + } + + if (input.size() <= original.size()) { + if (failure_detail != nullptr) { + *failure_detail = "Expected mutable simdjson input to grow after " + "in-place padding."; + } + return false; + } + + if (input.compare(0, original.size(), original) != 0) { + if (failure_detail != nullptr) { + *failure_detail = "Mutable simdjson input changed its logical JSON " + "prefix."; + } + return false; + } + + for (std::size_t i = original.size(); i < input.size(); i++) { + if (!isJsonWhitespace(input[i])) { + if (failure_detail != nullptr) { + *failure_detail = "Mutable simdjson input appended a non-" + "whitespace padding byte."; + } + return false; + } + } + + return true; +} + +bool expectConstSimdjsonPathLeavesInputUntouched(std::string *failure_detail) { + const std::string original("{\"n\":1}"); + std::string input = original; + std::vector numbers; + + input.shrink_to_fit(); + if (!collectNumberLexemes(static_cast(input), &numbers, + failure_detail)) { + return false; + } + + if (numbers != std::vector{"1"}) { + if (failure_detail != nullptr) { + *failure_detail = std::string("Expected [\"1\"], got ") + + describeStringList(numbers) + "."; + } + return false; + } + + if (input != original) { + if (failure_detail != nullptr) { + *failure_detail = "Const simdjson input was mutated."; + } + return false; + } + + return true; +} +#endif + +bool reportTestResult(const char *name, bool passed, + const std::string &detail) { + std::cout << ":test-result: " << (passed ? "PASS " : "FAIL ") + << "json_backend_depth_tests:" << name << std::endl; + if (!passed && !detail.empty()) { + std::cerr << name << ": " << detail << std::endl; + } + return passed; +} + +} // namespace + +int runJsonBackendDepthTests() { + int failures = 0; + std::string detail; + + if (!reportTestResult("technical_depth_limit_returns_parse_error", + expectBackendDepthLimitParseError(&detail), detail)) { + failures++; + } + + detail.clear(); + if (!reportTestResult("technical_depth_with_headroom_succeeds", + expectBackendDepthHeadroomSuccess(&detail), detail)) { + failures++; + } + + detail.clear(); + if (!reportTestResult("empty_input_returns_ok", + expectEmptyInputSuccess(&detail), detail)) { + failures++; + } + + detail.clear(); + if (!reportTestResult("malformed_input_maps_to_parse_error", + expectMalformedInputParseError(&detail), detail)) { + failures++; + } + + detail.clear(); + if (!reportTestResult("truncated_input_maps_to_truncated_input", + expectTruncatedInputMapsToTruncatedInput(&detail), detail)) { + failures++; + } + + detail.clear(); + if (!reportTestResult("number_lexemes_for_root_scalars_remain_exact", + expectExactRootScalarNumberLexemes(&detail), detail)) { + failures++; + } + + detail.clear(); + if (!reportTestResult("number_lexemes_in_containers_remain_exact", + expectExactContainerNumberLexemes(&detail), detail)) { + failures++; + } + +#if defined(MSC_JSON_BACKEND_SIMDJSON) + detail.clear(); + if (!reportTestResult("mutable_simdjson_input_keeps_logical_json_prefix", + expectMutableSimdjsonPathPreservesLogicalInput(&detail), detail)) { + failures++; + } + + detail.clear(); + if (!reportTestResult("const_simdjson_input_is_not_mutated", + expectConstSimdjsonPathLeavesInputUntouched(&detail), detail)) { + failures++; + } +#endif + + return failures == 0 ? 0 : 1; +} + +} // namespace modsecurity::RequestBodyProcessor + +int main() { + return modsecurity::RequestBodyProcessor::runJsonBackendDepthTests(); +} diff --git a/test/unit/unit_test.cc b/test/unit/unit_test.cc index e67c100523..82a7afbc6c 100644 --- a/test/unit/unit_test.cc +++ b/test/unit/unit_test.cc @@ -29,6 +29,16 @@ namespace modsecurity_test { +namespace { + +std::unique_ptr make_empty_unit_test() { + auto test = std::make_unique(); + test->ret = 0; + test->skipped = false; + return test; +} + +} // namespace void replaceAll(std::string *s, const std::string &search, @@ -106,36 +116,85 @@ std::string UnitTest::print() const { } -std::unique_ptr UnitTest::from_yajl_node(const yajl_val &node) { - size_t num_tests = node->u.object.len; - auto u = std::make_unique(); - - for (int i = 0; i < num_tests; i++) { - const char *key = node->u.object.keys[ i ]; - yajl_val val = node->u.object.values[ i ]; - - u->skipped = false; - if (strcmp(key, "param") == 0) { - u->param = YAJL_GET_STRING(val); - } else if (strcmp(key, "input") == 0) { - u->input = YAJL_GET_STRING(val); - json2bin(&u->input); - } else if (strcmp(key, "resource") == 0) { - u->resource = YAJL_GET_STRING(val); - } else if (strcmp(key, "name") == 0) { - u->name = YAJL_GET_STRING(val); - } else if (strcmp(key, "type") == 0) { - u->type = YAJL_GET_STRING(val); - } else if (strcmp(key, "ret") == 0) { - u->ret = YAJL_GET_INTEGER(val); - } else if (strcmp(key, "output") == 0) { - u->output = std::string(YAJL_GET_STRING(val)); - json2bin(&u->output); - /* - * Converting \\u0000 to \0 due to the following gcc bug: - * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53690 - * - */ +std::unique_ptr UnitTest::from_json_document( + const modsecurity_test::json::JsonDocument *document) { + modsecurity_test::json::JsonValue root; + + if (modsecurity_test::json::get(document->get_value(), &root) == false) { + return make_empty_unit_test(); + } + + modsecurity_test::json::JsonType type; + if (modsecurity_test::json::get(root.type(), &type) == false) { + return make_empty_unit_test(); + } + + if (type == modsecurity_test::json::JsonType::Array) { + modsecurity_test::json::JsonArray tests; + if (modsecurity_test::json::get(root.get_array(), &tests) == false) { + return make_empty_unit_test(); + } + + for (auto test_result : tests) { + modsecurity_test::json::JsonValue test_value; + if (modsecurity_test::json::get(std::move(test_result), + &test_value) == false) { + continue; + } + + return from_json_value(test_value); + } + + return make_empty_unit_test(); + } + + return from_json_value(root); +} + +std::unique_ptr UnitTest::from_json_value( + modsecurity_test::json::JsonValue value) { + modsecurity_test::json::JsonObject object; + auto u = make_empty_unit_test(); + + if (modsecurity_test::json::get(value.get_object(), &object) == false) { + return u; + } + + for (auto field_result : object) { + modsecurity_test::json::JsonField field; + std::string_view key; + modsecurity_test::json::JsonValue child; + + if (modsecurity_test::json::get(field_result, &field) + == false) { + continue; + } + if (modsecurity_test::json::get(field.unescaped_key(), &key) == false) { + continue; + } + child = field.value(); + + if (key == "param") { + u->param = modsecurity_test::json::get_string(child); + } else if (key == "input") { + u->input = modsecurity_test::json::get_string(child); + json2bin(&u->input); + } else if (key == "resource") { + u->resource = modsecurity_test::json::get_string(child); + } else if (key == "name") { + u->name = modsecurity_test::json::get_string(child); + } else if (key == "type") { + u->type = modsecurity_test::json::get_string(child); + } else if (key == "ret") { + u->ret = static_cast(modsecurity_test::json::get_integer(child)); + } else if (key == "output") { + u->output = modsecurity_test::json::get_string(child); + json2bin(&u->output); + /* + * Converting \\u0000 to \0 due to the following gcc bug: + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53690 + * + */ } } diff --git a/test/unit/unit_test.h b/test/unit/unit_test.h index ffd776442b..a92565fc6c 100644 --- a/test/unit/unit_test.h +++ b/test/unit/unit_test.h @@ -13,13 +13,13 @@ * */ -#include - #include #include #include #include +#include "test/common/json.h" + #ifndef TEST_UNIT_UNIT_TEST_H_ #define TEST_UNIT_UNIT_TEST_H_ @@ -27,13 +27,16 @@ namespace modsecurity_test { class UnitTestResult { public: - int ret; + int ret = 0; std::string output; }; class UnitTest { public: - static std::unique_ptr from_yajl_node(const yajl_val &); + static std::unique_ptr from_json_document( + const modsecurity_test::json::JsonDocument *document); + static std::unique_ptr from_json_value( + modsecurity_test::json::JsonValue value); std::string print() const; diff --git a/tools/rules-check/Makefile.am b/tools/rules-check/Makefile.am index 8080411716..fc14a6f986 100644 --- a/tools/rules-check/Makefile.am +++ b/tools/rules-check/Makefile.am @@ -16,8 +16,7 @@ modsec_rules_check_LDADD = \ $(LUA_LDADD) \ $(PCRE_LDADD) \ $(PCRE2_LDADD) \ - $(SSDEEP_LDADD) \ - $(YAJL_LDADD) + $(SSDEEP_LDADD) modsec_rules_check_LDFLAGS = \ $(GEOIP_LDFLAGS) \ @@ -26,11 +25,10 @@ modsec_rules_check_LDFLAGS = \ $(LMDB_LDFLAGS) \ $(LUA_LDFLAGS) \ $(SSDEEP_LDFLAGS) \ - $(YAJL_LDFLAGS) \ $(LIBXML2_LDFLAGS) modsec_rules_check_CPPFLAGS = \ - -I$(top_builddir)/headers \ + -I$(top_srcdir)/headers \ $(GLOBAL_CPPFLAGS) \ $(PCRE_CFLAGS) \ $(PCRE2_CFLAGS) \ @@ -40,4 +38,3 @@ modsec_rules_check_CPPFLAGS = \ MAINTAINERCLEANFILES = \ Makefile.in -